X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/9bccf70c0258c7cac2dcb80011b2a964d884c552..ff6e181ae92fc6f1e89841290f461d1f2f9badd9:/osfmk/vm/vm_object.c diff --git a/osfmk/vm/vm_object.c b/osfmk/vm/vm_object.c index 3e2579dbf..2ed4261d8 100644 --- a/osfmk/vm/vm_object.c +++ b/osfmk/vm/vm_object.c @@ -1,21 +1,22 @@ /* - * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ @@ -56,11 +57,6 @@ * Virtual memory object module. */ -#ifdef MACH_BSD -/* remove as part of compoenent support merge */ -extern int vnode_pager_workaround; -#endif - #include #include @@ -69,7 +65,11 @@ extern int vnode_pager_workaround; #include #include #include + +#include #include + +#include #include #include #include @@ -78,15 +78,15 @@ extern int vnode_pager_workaround; #include #include #include +#include + #include #include #include #include #include #include -#include - - +#include /* * Virtual memory objects maintain the actual data @@ -162,10 +162,6 @@ extern int vnode_pager_workaround; */ /* Forward declarations for internal functions. */ -static void _vm_object_allocate( - vm_object_size_t size, - vm_object_t object); - static kern_return_t vm_object_terminate( vm_object_t object); @@ -178,9 +174,6 @@ static vm_object_t vm_object_cache_trim( static void vm_object_deactivate_all_pages( vm_object_t object); -static void vm_object_abort_activity( - vm_object_t object); - static kern_return_t vm_object_copy_call( vm_object_t src_object, vm_object_offset_t src_offset, @@ -286,6 +279,9 @@ typedef struct vm_object_hash_entry *vm_object_hash_entry_t; #define vm_object_hash(pager) \ ((((unsigned)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT) +void vm_object_hash_entry_free( + vm_object_hash_entry_t entry); + /* * vm_object_hash_lookup looks up a pager in the hashtable * and returns the corresponding entry, with optional removal. @@ -351,7 +347,7 @@ void vm_object_hash_entry_free( vm_object_hash_entry_t entry) { - zfree(vm_object_hash_zone, (vm_offset_t)entry); + zfree(vm_object_hash_zone, entry); } /* @@ -360,7 +356,7 @@ vm_object_hash_entry_free( * Returns a new object with the given size. */ -static void +__private_extern__ void _vm_object_allocate( vm_object_size_t size, vm_object_t object) @@ -372,9 +368,9 @@ _vm_object_allocate( *object = vm_object_template; queue_init(&object->memq); queue_init(&object->msr_q); -#ifdef UBC_DEBUG +#ifdef UPL_DEBUG queue_init(&object->uplq); -#endif /* UBC_DEBUG */ +#endif /* UPL_DEBUG */ vm_object_lock_init(object); object->size = size; } @@ -403,20 +399,20 @@ vm_object_allocate( __private_extern__ void vm_object_bootstrap(void) { - register i; + register int i; vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object), - round_page(512*1024), - round_page(12*1024), + round_page_32(512*1024), + round_page_32(12*1024), "vm objects"); queue_init(&vm_object_cached_list); - mutex_init(&vm_object_cached_lock_data, ETAP_VM_OBJ_CACHE); + mutex_init(&vm_object_cached_lock_data, 0); vm_object_hash_zone = zinit((vm_size_t) sizeof (struct vm_object_hash_entry), - round_page(512*1024), - round_page(12*1024), + round_page_32(512*1024), + round_page_32(12*1024), "vm object hash entries"); for (i = 0; i < VM_OBJECT_HASH_COUNT; i++) @@ -428,7 +424,7 @@ vm_object_bootstrap(void) /* memq; Lock; init after allocation */ vm_object_template.size = 0; - vm_object_template.frozen_size = 0; + vm_object_template.memq_hint = VM_PAGE_NULL; vm_object_template.ref_count = 1; #if TASK_SWAPPER vm_object_template.res_count = 1; @@ -437,12 +433,12 @@ vm_object_bootstrap(void) vm_object_template.copy = VM_OBJECT_NULL; vm_object_template.shadow = VM_OBJECT_NULL; vm_object_template.shadow_offset = (vm_object_offset_t) 0; - vm_object_template.cow_hint = 0; + vm_object_template.cow_hint = ~(vm_offset_t)0; vm_object_template.true_share = FALSE; vm_object_template.pager = MEMORY_OBJECT_NULL; vm_object_template.paging_offset = 0; - vm_object_template.pager_request = PAGER_REQUEST_NULL; + vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL; /* msr_q; init after allocation */ vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC; @@ -461,8 +457,7 @@ vm_object_bootstrap(void) vm_object_template.private = FALSE; vm_object_template.pageout = FALSE; vm_object_template.alive = TRUE; - vm_object_template.lock_in_progress = FALSE; - vm_object_template.lock_restart = FALSE; + vm_object_template.purgable = VM_OBJECT_NONPURGABLE; vm_object_template.silent_overwrite = FALSE; vm_object_template.advisory_pageout = FALSE; vm_object_template.shadowed = FALSE; @@ -493,10 +488,17 @@ vm_object_bootstrap(void) /* * Note that in the following size specifications, we need to add 1 because - * VM_MAX_KERNEL_ADDRESS is a maximum address, not a size. + * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size. */ + +#ifdef ppc + _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1, + kernel_object); +#else _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1, kernel_object); +#endif + kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; /* * Initialize the "submap object". Make it as large as the @@ -504,8 +506,15 @@ vm_object_bootstrap(void) */ vm_submap_object = &vm_submap_object_store; +#ifdef ppc + _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1, + vm_submap_object); +#else _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1, vm_submap_object); +#endif + vm_submap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; + /* * Create an "extra" reference to this object so that we never * try to deallocate it; zfree doesn't like to be called with @@ -555,7 +564,7 @@ vm_object_deallocate( register vm_object_t object) { boolean_t retry_cache_trim = FALSE; - vm_object_t shadow; + vm_object_t shadow = VM_OBJECT_NULL; // if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */ // else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */ @@ -568,10 +577,21 @@ vm_object_deallocate( * the object; we must lock it before removing * the object. */ + for (;;) { + vm_object_cache_lock(); - vm_object_cache_lock(); - vm_object_lock(object); - + /* + * if we try to take a regular lock here + * we risk deadlocking against someone + * holding a lock on this object while + * trying to vm_object_deallocate a different + * object + */ + if (vm_object_lock_try(object)) + break; + vm_object_cache_unlock(); + mutex_pause(); /* wait a bit */ + } assert(object->ref_count > 0); /* @@ -591,8 +611,21 @@ vm_object_deallocate( memory_object_unmap(pager); - vm_object_cache_lock(); - vm_object_lock(object); + for (;;) { + vm_object_cache_lock(); + + /* + * if we try to take a regular lock here + * we risk deadlocking against someone + * holding a lock on this object while + * trying to vm_object_deallocate a different + * object + */ + if (vm_object_lock_try(object)) + break; + vm_object_cache_unlock(); + mutex_pause(); /* wait a bit */ + } assert(object->ref_count > 0); } } @@ -612,8 +645,23 @@ vm_object_deallocate( if ((object->ref_count > 1) || object->terminating) { object->ref_count--; vm_object_res_deallocate(object); - vm_object_unlock(object); vm_object_cache_unlock(); + + if (object->ref_count == 1 && + object->shadow != VM_OBJECT_NULL) { + /* + * We don't use this VM object anymore. We + * would like to collapse it into its parent(s), + * but we don't have any pointers back to these + * parent object(s). + * But we can try and collapse this object with + * its own shadows, in case these are useless + * too... + */ + vm_object_collapse(object, 0); + } + + vm_object_unlock(object); if (retry_cache_trim && ((object = vm_object_cache_trim(TRUE)) != VM_OBJECT_NULL)) { @@ -730,10 +778,10 @@ vm_object_deallocate( * This object is not cachable; terminate it. */ XPR(XPR_VM_OBJECT, - "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%lX ref %d\n", - (integer_t)object, object->resident_page_count, - object->paging_in_progress, - (natural_t)current_thread(),object->ref_count); + "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n", + (integer_t)object, object->resident_page_count, + object->paging_in_progress, + (void *)current_thread(),object->ref_count); VM_OBJ_RES_DECR(object); /* XXX ? */ /* @@ -901,6 +949,7 @@ vm_object_terminate( } vm_page_lock_queues(); + p->busy = TRUE; VM_PAGE_QUEUES_REMOVE(p); vm_page_unlock_queues(); @@ -921,16 +970,10 @@ vm_object_terminate( panic("vm_object_terminate.4 0x%x 0x%x", object, p); if (!p->dirty) - p->dirty = pmap_is_modified(p->phys_addr); + p->dirty = pmap_is_modified(p->phys_page); if ((p->dirty || p->precious) && !p->error && object->alive) { - p->busy = TRUE; - vm_object_paging_begin(object); - /* protect the object from re-use/caching while it */ - /* is unlocked */ - vm_object_unlock(object); vm_pageout_cluster(p); /* flush page */ - vm_object_lock(object); vm_object_paging_wait(object, THREAD_UNINT); XPR(XPR_VM_OBJECT, "vm_object_terminate restart, object 0x%X ref %d\n", @@ -979,14 +1022,14 @@ vm_object_terminate( /* * Detach the object from its shadow if we are the shadow's - * copy. + * copy. The reference we hold on the shadow must be dropped + * by our caller. */ if (((shadow_object = object->shadow) != VM_OBJECT_NULL) && !(object->pageout)) { vm_object_lock(shadow_object); - assert((shadow_object->copy == object) || - (shadow_object->copy == VM_OBJECT_NULL)); - shadow_object->copy = VM_OBJECT_NULL; + if (shadow_object->copy == object) + shadow_object->copy = VM_OBJECT_NULL; vm_object_unlock(shadow_object); } @@ -1010,7 +1053,7 @@ vm_object_terminate( object->pager = MEMORY_OBJECT_NULL; if (pager != MEMORY_OBJECT_NULL) - memory_object_control_disable(object->pager_request); + memory_object_control_disable(object->pager_control); vm_object_cache_unlock(); object->ref_count--; @@ -1070,7 +1113,7 @@ vm_object_terminate( /* * Free the space for the object. */ - zfree(vm_object_zone, (vm_offset_t) object); + zfree(vm_object_zone, object); return KERN_SUCCESS; } @@ -1133,63 +1176,6 @@ vm_object_release_pager( memory_object_deallocate(pager); } -/* - * Routine: vm_object_abort_activity [internal use only] - * Purpose: - * Abort paging requests pending on this object. - * In/out conditions: - * The object is locked on entry and exit. - */ -static void -vm_object_abort_activity( - vm_object_t object) -{ - register - vm_page_t p; - vm_page_t next; - - XPR(XPR_VM_OBJECT, "vm_object_abort_activity, object 0x%X\n", - (integer_t)object, 0, 0, 0, 0); - - /* - * Abort all activity that would be waiting - * for a result on this memory object. - * - * We could also choose to destroy all pages - * that we have in memory for this object, but - * we don't. - */ - - p = (vm_page_t) queue_first(&object->memq); - while (!queue_end(&object->memq, (queue_entry_t) p)) { - next = (vm_page_t) queue_next(&p->listq); - - /* - * If it's being paged in, destroy it. - * If an unlock has been requested, start it again. - */ - - if (p->busy && p->absent) { - VM_PAGE_FREE(p); - } - else { - if (p->unlock_request != VM_PROT_NONE) - p->unlock_request = VM_PROT_NONE; - PAGE_WAKEUP(p); - } - - p = next; - } - - /* - * Wake up threads waiting for the memory object to - * become ready. - */ - - object->pager_ready = TRUE; - vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY); -} - /* * Routine: vm_object_destroy * Purpose: @@ -1200,7 +1186,7 @@ vm_object_abort_activity( kern_return_t vm_object_destroy( vm_object_t object, - kern_return_t reason) + __unused kern_return_t reason) { memory_object_t old_pager; @@ -1230,7 +1216,7 @@ vm_object_destroy( old_pager = object->pager; object->pager = MEMORY_OBJECT_NULL; if (old_pager != MEMORY_OBJECT_NULL) - memory_object_control_disable(object->pager_request); + memory_object_control_disable(object->pager_control); vm_object_cache_unlock(); /* @@ -1316,17 +1302,23 @@ vm_object_deactivate_pages( if ((m->wire_count == 0) && (!m->private) && (!m->gobbled) && (!m->busy)) { + assert(!m->laundry); + m->reference = FALSE; - pmap_clear_reference(m->phys_addr); + pmap_clear_reference(m->phys_page); if ((kill_page) && (object->internal)) { m->precious = FALSE; m->dirty = FALSE; - pmap_clear_modify(m->phys_addr); + pmap_clear_modify(m->phys_page); vm_external_state_clr(object->existence_map, offset); } VM_PAGE_QUEUES_REMOVE(m); + assert(!m->laundry); + assert(m->object != kernel_object); + assert(m->pageq.next == NULL && + m->pageq.prev == NULL); if(m->zero_fill) { queue_enter_first( &vm_page_queue_zf, @@ -1396,23 +1388,22 @@ __private_extern__ void vm_object_pmap_protect( register vm_object_t object, register vm_object_offset_t offset, - vm_size_t size, + vm_object_size_t size, pmap_t pmap, - vm_offset_t pmap_start, + vm_map_offset_t pmap_start, vm_prot_t prot) { if (object == VM_OBJECT_NULL) return; - size = round_page_64(size); - offset = trunc_page_64(offset); + size = vm_object_round_page(size); + offset = vm_object_trunc_page(offset); vm_object_lock(object); - assert(object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC); + assert(object->internal); while (TRUE) { - if (object->resident_page_count > atop(size) / 2 && - pmap != PMAP_NULL) { + if (ptoa_64(object->resident_page_count) > size/2 && pmap != PMAP_NULL) { vm_object_unlock(object); pmap_protect(pmap, pmap_start, pmap_start + size, prot); return; @@ -1421,7 +1412,7 @@ vm_object_pmap_protect( /* if we are doing large ranges with respect to resident */ /* page count then we should interate over pages otherwise */ /* inverse page look-up will be faster */ - if ((object->resident_page_count / 4) < atop(size)) { + if (ptoa_64(object->resident_page_count / 4) < size) { vm_page_t p; vm_object_offset_t end; @@ -1431,11 +1422,10 @@ vm_object_pmap_protect( queue_iterate(&object->memq, p, vm_page_t, listq) { if (!p->fictitious && (offset <= p->offset) && (p->offset < end)) { + vm_map_offset_t start; - vm_offset_t start = pmap_start + - (vm_offset_t)(p->offset - offset); - - pmap_protect(pmap, start, start + PAGE_SIZE, prot); + start = pmap_start + p->offset - offset; + pmap_protect(pmap, start, start + PAGE_SIZE_64, prot); } } } else { @@ -1443,7 +1433,7 @@ vm_object_pmap_protect( if (!p->fictitious && (offset <= p->offset) && (p->offset < end)) { - pmap_page_protect(p->phys_addr, + pmap_page_protect(p->phys_page, prot & ~p->page_lock); } } @@ -1457,9 +1447,12 @@ vm_object_pmap_protect( if (pmap != PMAP_NULL) { for(target_off = offset; - target_off < end; target_off += PAGE_SIZE) { - if(p = vm_page_lookup(object, target_off)) { - vm_offset_t start = pmap_start + + target_off < end; + target_off += PAGE_SIZE) { + p = vm_page_lookup(object, target_off); + if (p != VM_PAGE_NULL) { + vm_offset_t start; + start = pmap_start + (vm_offset_t)(p->offset - offset); pmap_protect(pmap, start, start + PAGE_SIZE, prot); @@ -1468,8 +1461,9 @@ vm_object_pmap_protect( } else { for(target_off = offset; target_off < end; target_off += PAGE_SIZE) { - if(p = vm_page_lookup(object, target_off)) { - pmap_page_protect(p->phys_addr, + p = vm_page_lookup(object, target_off); + if (p != VM_PAGE_NULL) { + pmap_page_protect(p->phys_page, prot & ~p->page_lock); } } @@ -1582,6 +1576,7 @@ vm_object_copy_slowly( new_object = vm_object_allocate(size); new_offset = 0; + vm_object_lock(new_object); assert(size == trunc_page_64(size)); /* Will the loop terminate? */ @@ -1596,7 +1591,9 @@ vm_object_copy_slowly( while ((new_page = vm_page_alloc(new_object, new_offset)) == VM_PAGE_NULL) { if (!vm_page_wait(interruptible)) { + vm_object_unlock(new_object); vm_object_deallocate(new_object); + vm_object_deallocate(src_object); *_result_object = VM_OBJECT_NULL; return(MACH_SEND_INTERRUPTED); } @@ -1684,6 +1681,7 @@ vm_object_copy_slowly( case VM_FAULT_INTERRUPTED: vm_page_free(new_page); + vm_object_unlock(new_object); vm_object_deallocate(new_object); vm_object_deallocate(src_object); *_result_object = VM_OBJECT_NULL; @@ -1701,6 +1699,7 @@ vm_object_copy_slowly( vm_page_lock_queues(); vm_page_free(new_page); vm_page_unlock_queues(); + vm_object_unlock(new_object); vm_object_deallocate(new_object); vm_object_deallocate(src_object); *_result_object = VM_OBJECT_NULL; @@ -1714,6 +1713,7 @@ vm_object_copy_slowly( * Lose the extra reference, and return our object. */ + vm_object_unlock(new_object); vm_object_deallocate(src_object); *_result_object = new_object; return(KERN_SUCCESS); @@ -1740,8 +1740,8 @@ vm_object_copy_slowly( __private_extern__ boolean_t vm_object_copy_quickly( vm_object_t *_object, /* INOUT */ - vm_object_offset_t offset, /* IN */ - vm_object_size_t size, /* IN */ + __unused vm_object_offset_t offset, /* IN */ + __unused vm_object_size_t size, /* IN */ boolean_t *_src_needs_copy, /* OUT */ boolean_t *_dst_needs_copy) /* OUT */ { @@ -1921,8 +1921,6 @@ static int copy_delayed_lock_collisions = 0; static int copy_delayed_max_collisions = 0; static int copy_delayed_lock_contention = 0; static int copy_delayed_protect_iterate = 0; -static int copy_delayed_protect_lookup = 0; -static int copy_delayed_protect_lookup_wait = 0; /* * Routine: vm_object_copy_delayed [internal] @@ -1932,7 +1930,8 @@ static int copy_delayed_protect_lookup_wait = 0; * the asymmetric copy-on-write algorithm. * * In/out conditions: - * The object must be unlocked on entry. + * The src_object must be locked on entry. It will be unlocked + * on exit - so the caller must also hold a reference to it. * * This routine will not block waiting for user-generated * events. It is not interruptible. @@ -1946,7 +1945,7 @@ vm_object_copy_delayed( vm_object_t new_copy = VM_OBJECT_NULL; vm_object_t old_copy; vm_page_t p; - vm_object_size_t copy_size; + vm_object_size_t copy_size = src_offset + size; int collisions = 0; /* @@ -1989,8 +1988,13 @@ vm_object_copy_delayed( */ Retry: - vm_object_lock(src_object); + /* + * Wait for paging in progress. + */ + if (!src_object->true_share) + vm_object_paging_wait(src_object, THREAD_UNINT); + /* * See whether we can reuse the result of a previous * copy operation. @@ -2013,6 +2017,7 @@ vm_object_copy_delayed( if (collisions > copy_delayed_max_collisions) copy_delayed_max_collisions = collisions; + vm_object_lock(src_object); goto Retry; } @@ -2027,27 +2032,45 @@ vm_object_copy_delayed( * It has not been modified. * * Return another reference to - * the existing copy-object. + * the existing copy-object if + * we can safely grow it (if + * needed). */ - assert(old_copy->ref_count > 0); - old_copy->ref_count++; - - if (old_copy->size < src_offset+size) - old_copy->size = src_offset+size; -#if TASK_SWAPPER - /* - * We have to reproduce some of the code from - * vm_object_res_reference because we've taken - * the locks out of order here, and deadlock - * would result if we simply called that function. - */ - if (++old_copy->res_count == 1) { - assert(old_copy->shadow == src_object); - vm_object_res_reference(src_object); + if (old_copy->size < copy_size) { + /* + * We can't perform a delayed copy if any of the + * pages in the extended range are wired (because + * we can't safely take write permission away from + * wired pages). If the pages aren't wired, then + * go ahead and protect them. + */ + copy_delayed_protect_iterate++; + queue_iterate(&src_object->memq, p, vm_page_t, listq) { + if (!p->fictitious && + p->offset >= old_copy->size && + p->offset < copy_size) { + if (p->wire_count > 0) { + vm_object_unlock(old_copy); + vm_object_unlock(src_object); + + if (new_copy != VM_OBJECT_NULL) { + vm_object_unlock(new_copy); + vm_object_deallocate(new_copy); + } + + return VM_OBJECT_NULL; + } else { + pmap_page_protect(p->phys_page, + (VM_PROT_ALL & ~VM_PROT_WRITE & + ~p->page_lock)); + } + } + } + old_copy->size = copy_size; } -#endif /* TASK_SWAPPER */ - + + vm_object_reference_locked(old_copy); vm_object_unlock(old_copy); vm_object_unlock(src_object); @@ -2058,21 +2081,24 @@ vm_object_copy_delayed( return(old_copy); } + + /* + * Adjust the size argument so that the newly-created + * copy object will be large enough to back either the + * old copy object or the new mapping. + */ + if (old_copy->size > copy_size) + copy_size = old_copy->size; + if (new_copy == VM_OBJECT_NULL) { vm_object_unlock(old_copy); vm_object_unlock(src_object); - new_copy = vm_object_allocate(src_offset + size); + new_copy = vm_object_allocate(copy_size); + vm_object_lock(src_object); vm_object_lock(new_copy); goto Retry; } - - /* - * Adjust the size argument so that the newly-created - * copy object will be large enough to back either the - * new old copy object or the new mapping. - */ - if (old_copy->size > src_offset+size) - size = old_copy->size - src_offset; + new_copy->size = copy_size; /* * The copy-object is always made large enough to @@ -2084,6 +2110,44 @@ vm_object_copy_delayed( assert((old_copy->shadow == src_object) && (old_copy->shadow_offset == (vm_object_offset_t) 0)); + } else if (new_copy == VM_OBJECT_NULL) { + vm_object_unlock(src_object); + new_copy = vm_object_allocate(copy_size); + vm_object_lock(src_object); + vm_object_lock(new_copy); + goto Retry; + } + + /* + * We now have the src object locked, and the new copy object + * allocated and locked (and potentially the old copy locked). + * Before we go any further, make sure we can still perform + * a delayed copy, as the situation may have changed. + * + * Specifically, we can't perform a delayed copy if any of the + * pages in the range are wired (because we can't safely take + * write permission away from wired pages). If the pages aren't + * wired, then go ahead and protect them. + */ + copy_delayed_protect_iterate++; + queue_iterate(&src_object->memq, p, vm_page_t, listq) { + if (!p->fictitious && p->offset < copy_size) { + if (p->wire_count > 0) { + if (old_copy) + vm_object_unlock(old_copy); + vm_object_unlock(src_object); + vm_object_unlock(new_copy); + vm_object_deallocate(new_copy); + return VM_OBJECT_NULL; + } else { + pmap_page_protect(p->phys_page, + (VM_PROT_ALL & ~VM_PROT_WRITE & + ~p->page_lock)); + } + } + } + + if (old_copy != VM_OBJECT_NULL) { /* * Make the old copy-object shadow the new one. * It will receive no more pages from the original @@ -2104,26 +2168,11 @@ vm_object_copy_delayed( #endif vm_object_unlock(old_copy); /* done with old_copy */ - } else if (new_copy == VM_OBJECT_NULL) { - vm_object_unlock(src_object); - new_copy = vm_object_allocate(src_offset + size); - vm_object_lock(new_copy); - goto Retry; - } - - /* - * Readjust the copy-object size if necessary. - */ - copy_size = new_copy->size; - if (copy_size < src_offset+size) { - copy_size = src_offset+size; - new_copy->size = copy_size; } /* * Point the new copy at the existing object. */ - new_copy->shadow = src_object; new_copy->shadow_offset = 0; new_copy->shadowed = TRUE; /* caller must set needs_copy */ @@ -2131,23 +2180,9 @@ vm_object_copy_delayed( src_object->ref_count++; VM_OBJ_RES_INCR(src_object); src_object->copy = new_copy; + vm_object_unlock(src_object); vm_object_unlock(new_copy); - /* - * Mark all (current) pages of the existing object copy-on-write. - * This object may have a shadow chain below it, but - * those pages will already be marked copy-on-write. - */ - - vm_object_paging_wait(src_object, THREAD_UNINT); - copy_delayed_protect_iterate++; - queue_iterate(&src_object->memq, p, vm_page_t, listq) { - if (!p->fictitious) - pmap_page_protect(p->phys_addr, - (VM_PROT_ALL & ~VM_PROT_WRITE & - ~p->page_lock)); - } - vm_object_unlock(src_object); XPR(XPR_VM_OBJECT, "vm_object_copy_delayed: used copy object %X for source %X\n", (integer_t)new_copy, (integer_t)src_object, 0, 0, 0); @@ -2207,6 +2242,18 @@ vm_object_copy_strategically( */ switch (copy_strategy) { + case MEMORY_OBJECT_COPY_DELAY: + *dst_object = vm_object_copy_delayed(src_object, + src_offset, size); + if (*dst_object != VM_OBJECT_NULL) { + *dst_offset = src_offset; + *dst_needs_copy = TRUE; + result = KERN_SUCCESS; + break; + } + vm_object_lock(src_object); + /* fall thru when delayed copy not allowed */ + case MEMORY_OBJECT_COPY_NONE: result = vm_object_copy_slowly(src_object, src_offset, size, interruptible, dst_object); @@ -2225,15 +2272,6 @@ vm_object_copy_strategically( } break; - case MEMORY_OBJECT_COPY_DELAY: - vm_object_unlock(src_object); - *dst_object = vm_object_copy_delayed(src_object, - src_offset, size); - *dst_offset = src_offset; - *dst_needs_copy = TRUE; - result = KERN_SUCCESS; - break; - case MEMORY_OBJECT_COPY_SYMMETRIC: XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n",(natural_t)src_object, src_offset, size, 0, 0); vm_object_unlock(src_object); @@ -2395,6 +2433,66 @@ vm_object_shadow( */ #if 0 +static void vm_object_abort_activity( + vm_object_t object); + +/* + * Routine: vm_object_abort_activity [internal use only] + * Purpose: + * Abort paging requests pending on this object. + * In/out conditions: + * The object is locked on entry and exit. + */ +static void +vm_object_abort_activity( + vm_object_t object) +{ + register + vm_page_t p; + vm_page_t next; + + XPR(XPR_VM_OBJECT, "vm_object_abort_activity, object 0x%X\n", + (integer_t)object, 0, 0, 0, 0); + + /* + * Abort all activity that would be waiting + * for a result on this memory object. + * + * We could also choose to destroy all pages + * that we have in memory for this object, but + * we don't. + */ + + p = (vm_page_t) queue_first(&object->memq); + while (!queue_end(&object->memq, (queue_entry_t) p)) { + next = (vm_page_t) queue_next(&p->listq); + + /* + * If it's being paged in, destroy it. + * If an unlock has been requested, start it again. + */ + + if (p->busy && p->absent) { + VM_PAGE_FREE(p); + } + else { + if (p->unlock_request != VM_PROT_NONE) + p->unlock_request = VM_PROT_NONE; + PAGE_WAKEUP(p); + } + + p = next; + } + + /* + * Wake up threads waiting for the memory object to + * become ready. + */ + + object->pager_ready = TRUE; + vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY); +} + /* * Routine: vm_object_pager_dead * @@ -2522,69 +2620,53 @@ vm_object_enter( * Look for an object associated with this port. */ -restart: vm_object_cache_lock(); - for (;;) { + do { entry = vm_object_hash_lookup(pager, FALSE); - /* - * If a previous object is being terminated, - * we must wait for the termination message - * to be queued. - * - * We set kobject to a non-null value to let the - * terminator know that someone is waiting. - * Among the possibilities is that the port - * could die while we're waiting. Must restart - * instead of continuing the loop. - */ - - if (entry != VM_OBJECT_HASH_ENTRY_NULL) { - if (entry->object != VM_OBJECT_NULL) - break; - - entry->waiting = TRUE; - assert_wait((event_t) pager, THREAD_UNINT); - vm_object_cache_unlock(); - thread_block((void (*)(void))0); - goto restart; - } - - /* - * We must unlock to create a new object; - * if we do so, we must try the lookup again. - */ - - if (new_object == VM_OBJECT_NULL) { - vm_object_cache_unlock(); - assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL); - new_entry = vm_object_hash_entry_alloc(pager); - new_object = vm_object_allocate(size); - vm_object_cache_lock(); - } else { - /* - * Lookup failed twice, and we have something - * to insert; set the object. - */ - - if (entry == VM_OBJECT_HASH_ENTRY_NULL) { + if (entry == VM_OBJECT_HASH_ENTRY_NULL) { + if (new_object == VM_OBJECT_NULL) { + /* + * We must unlock to create a new object; + * if we do so, we must try the lookup again. + */ + vm_object_cache_unlock(); + assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL); + new_entry = vm_object_hash_entry_alloc(pager); + new_object = vm_object_allocate(size); + vm_object_cache_lock(); + } else { + /* + * Lookup failed twice, and we have something + * to insert; set the object. + */ vm_object_hash_insert(new_entry); entry = new_entry; + entry->object = new_object; new_entry = VM_OBJECT_HASH_ENTRY_NULL; + new_object = VM_OBJECT_NULL; + must_init = TRUE; } - - entry->object = new_object; - new_object = VM_OBJECT_NULL; - must_init = TRUE; + } else if (entry->object == VM_OBJECT_NULL) { + /* + * If a previous object is being terminated, + * we must wait for the termination message + * to be queued (and lookup the entry again). + */ + entry->waiting = TRUE; + entry = VM_OBJECT_HASH_ENTRY_NULL; + assert_wait((event_t) pager, THREAD_UNINT); + vm_object_cache_unlock(); + thread_block(THREAD_CONTINUE_NULL); + vm_object_cache_lock(); } - } + } while (entry == VM_OBJECT_HASH_ENTRY_NULL); object = entry->object; assert(object != VM_OBJECT_NULL); if (!must_init) { vm_object_lock(object); - assert(object->pager_created); assert(!internal || object->internal); if (named) { assert(!object->named); @@ -2628,16 +2710,17 @@ restart: vm_object_hash_entry_free(new_entry); if (must_init) { - pager_request_t pager_request; + memory_object_control_t control; /* * Allocate request port. */ - pager_request = memory_object_control_allocate(object); - assert (pager_request != PAGER_REQUEST_NULL); + control = memory_object_control_allocate(object); + assert (control != MEMORY_OBJECT_CONTROL_NULL); vm_object_lock(object); + assert(object != kernel_object); /* * Copy the reference we were given. @@ -2652,7 +2735,7 @@ restart: /* copy strategy invalid until set by memory manager */ object->copy_strategy = MEMORY_OBJECT_COPY_INVALID; } - object->pager_request = pager_request; + object->pager_control = control; object->pager_ready = FALSE; vm_object_unlock(object); @@ -2662,7 +2745,7 @@ restart: */ (void) memory_object_init(pager, - object->pager_request, + object->pager_control, PAGE_SIZE); vm_object_lock(object); @@ -2729,6 +2812,8 @@ vm_object_pager_create( XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n", (integer_t)object, 0,0,0,0); + assert(object != kernel_object); + if (memory_manager_default_check() != KERN_SUCCESS) return; @@ -2839,7 +2924,6 @@ vm_object_remove( vm_object_t object) { memory_object_t pager; - pager_request_t pager_request; if ((pager = object->pager) != MEMORY_OBJECT_NULL) { vm_object_hash_entry_t entry; @@ -2866,6 +2950,8 @@ static boolean_t vm_object_bypass_allowed = TRUE; static int vm_external_discarded; static int vm_external_collapsed; +unsigned long vm_object_collapse_encrypted = 0; + /* * Routine: vm_object_do_collapse * Purpose: @@ -2903,7 +2989,7 @@ vm_object_do_collapse( new_offset = (p->offset - backing_offset); assert(!p->busy || p->absent); - + /* * If the parent has a page here, or if * this page falls outside the parent, @@ -2915,6 +3001,19 @@ vm_object_do_collapse( if (p->offset < backing_offset || new_offset >= size) { VM_PAGE_FREE(p); } else { + /* + * ENCRYPTED SWAP: + * The encryption key includes the "pager" and the + * "paging_offset". These might not be the same in + * the new object, so we can't just move an encrypted + * page from one object to the other. We can't just + * decrypt the page here either, because that would drop + * the object lock. + * The caller should check for encrypted pages before + * attempting to collapse. + */ + ASSERT_PAGE_DECRYPTED(p); + pp = vm_page_lookup(object, new_offset); if (pp == VM_PAGE_NULL) { @@ -2955,8 +3054,13 @@ vm_object_do_collapse( } } - assert(object->pager == MEMORY_OBJECT_NULL || - backing_object->pager == MEMORY_OBJECT_NULL); +#if !MACH_PAGEMAP + assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL + || (!backing_object->pager_created + && backing_object->pager == MEMORY_OBJECT_NULL)); +#else + assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL); +#endif /* !MACH_PAGEMAP */ if (backing_object->pager != MEMORY_OBJECT_NULL) { vm_object_hash_entry_t entry; @@ -2969,27 +3073,26 @@ vm_object_do_collapse( * unused portion. */ + assert(!object->paging_in_progress); object->pager = backing_object->pager; entry = vm_object_hash_lookup(object->pager, FALSE); assert(entry != VM_OBJECT_HASH_ENTRY_NULL); entry->object = object; object->pager_created = backing_object->pager_created; - object->pager_request = backing_object->pager_request; + object->pager_control = backing_object->pager_control; object->pager_ready = backing_object->pager_ready; object->pager_initialized = backing_object->pager_initialized; object->cluster_size = backing_object->cluster_size; object->paging_offset = backing_object->paging_offset + backing_offset; - if (object->pager_request != PAGER_REQUEST_NULL) { - memory_object_control_collapse(object->pager_request, + if (object->pager_control != MEMORY_OBJECT_CONTROL_NULL) { + memory_object_control_collapse(object->pager_control, object); } } vm_object_cache_unlock(); - object->paging_offset = backing_object->paging_offset + backing_offset; - #if MACH_PAGEMAP /* * If the shadow offset is 0, the use the existence map from @@ -3022,10 +3125,17 @@ vm_object_do_collapse( * moves from within backing_object to within object. */ + assert(!object->phys_contiguous); + assert(!backing_object->phys_contiguous); object->shadow = backing_object->shadow; - object->shadow_offset += backing_object->shadow_offset; + if (object->shadow) { + object->shadow_offset += backing_object->shadow_offset; + } else { + /* no shadow, therefore no shadow offset... */ + object->shadow_offset = 0; + } assert((object->shadow == VM_OBJECT_NULL) || - (object->shadow->copy == VM_OBJECT_NULL)); + (object->shadow->copy != backing_object)); /* * Discard backing_object. @@ -3045,7 +3155,7 @@ vm_object_do_collapse( XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n", (integer_t)backing_object, 0,0,0,0); - zfree(vm_object_zone, (vm_offset_t) backing_object); + zfree(vm_object_zone, backing_object); object_collapses++; } @@ -3079,8 +3189,15 @@ vm_object_do_bypass( vm_object_reference(backing_object->shadow); #endif /* TASK_SWAPPER */ + assert(!object->phys_contiguous); + assert(!backing_object->phys_contiguous); object->shadow = backing_object->shadow; - object->shadow_offset += backing_object->shadow_offset; + if (object->shadow) { + object->shadow_offset += backing_object->shadow_offset; + } else { + /* no shadow, therefore no shadow offset... */ + object->shadow_offset = 0; + } /* * Backing object might have had a copy pointer @@ -3163,17 +3280,24 @@ vm_object_do_bypass( * Requires that the object be locked and the page queues be unlocked. * */ +static unsigned long vm_object_collapse_calls = 0; +static unsigned long vm_object_collapse_objects = 0; +static unsigned long vm_object_collapse_do_collapse = 0; +static unsigned long vm_object_collapse_do_bypass = 0; __private_extern__ void vm_object_collapse( - register vm_object_t object) + register vm_object_t object, + register vm_object_offset_t hint_offset) { register vm_object_t backing_object; - register vm_object_offset_t backing_offset; - register vm_object_size_t size; - register vm_object_offset_t new_offset; - register vm_page_t p; + register unsigned int rcount; + register unsigned int size; + vm_object_offset_t collapse_min_offset; + vm_object_offset_t collapse_max_offset; + vm_page_t page; + vm_object_t original_object; - vm_offset_t current_offset; + vm_object_collapse_calls++; if (! vm_object_collapse_allowed && ! vm_object_bypass_allowed) { return; @@ -3182,26 +3306,45 @@ vm_object_collapse( XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n", (integer_t)object, 0,0,0,0); + if (object == VM_OBJECT_NULL) + return; + + original_object = object; + while (TRUE) { + vm_object_collapse_objects++; /* * Verify that the conditions are right for either * collapse or bypass: - * - * The object exists and no pages in it are currently - * being paged out, and */ - if (object == VM_OBJECT_NULL || - object->paging_in_progress != 0 || - object->absent_count != 0) - return; /* * There is a backing object, and */ - if ((backing_object = object->shadow) == VM_OBJECT_NULL) + backing_object = object->shadow; + if (backing_object == VM_OBJECT_NULL) { + if (object != original_object) { + vm_object_unlock(object); + } return; + } + /* + * No pages in the object are currently + * being paged out, and + */ + if (object->paging_in_progress != 0 || + object->absent_count != 0) { + /* try and collapse the rest of the shadow chain */ + vm_object_lock(backing_object); + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + continue; + } + vm_object_lock(backing_object); /* @@ -3215,8 +3358,12 @@ vm_object_collapse( if (!backing_object->internal || backing_object->paging_in_progress != 0) { - vm_object_unlock(backing_object); - return; + /* try and collapse the rest of the shadow chain */ + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + continue; } /* @@ -3230,9 +3377,13 @@ vm_object_collapse( * parent object. */ if (backing_object->shadow != VM_OBJECT_NULL && - backing_object->shadow->copy != VM_OBJECT_NULL) { - vm_object_unlock(backing_object); - return; + backing_object->shadow->copy == backing_object) { + /* try and collapse the rest of the shadow chain */ + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + continue; } /* @@ -3245,22 +3396,28 @@ vm_object_collapse( * object, we may be able to collapse it into the * parent. * - * The backing object must not have a pager - * created for it, since collapsing an object - * into a backing_object dumps new pages into - * the backing_object that its pager doesn't - * know about. + * If MACH_PAGEMAP is defined: + * The parent must not have a pager created for it, + * since collapsing a backing_object dumps new pages + * into the parent that its pager doesn't know about + * (and the collapse code can't merge the existence + * maps). + * Otherwise: + * As long as one of the objects is still not known + * to the pager, we can collapse them. */ - if (backing_object->ref_count == 1 && - ! object->pager_created && - vm_object_collapse_allowed) { + (!object->pager_created +#if !MACH_PAGEMAP + || !backing_object->pager_created +#endif /*!MACH_PAGEMAP */ + ) && vm_object_collapse_allowed) { XPR(XPR_VM_OBJECT, - "vm_object_collapse: %x to %x, pager %x, pager_request %x\n", + "vm_object_collapse: %x to %x, pager %x, pager_control %x\n", (integer_t)backing_object, (integer_t)object, (integer_t)backing_object->pager, - (integer_t)backing_object->pager_request, 0); + (integer_t)backing_object->pager_control, 0); /* * We need the cache lock for collapsing, @@ -3268,10 +3425,46 @@ vm_object_collapse( */ if (! vm_object_cache_lock_try()) { + if (object != original_object) { + vm_object_unlock(object); + } vm_object_unlock(backing_object); return; } + /* + * ENCRYPTED SWAP + * We can't collapse the object if it contains + * any encypted page, because the encryption key + * includes the info. We can't + * drop the object lock in vm_object_do_collapse() + * so we can't decrypt the page there either. + */ + if (vm_pages_encrypted) { + collapse_min_offset = object->shadow_offset; + collapse_max_offset = + object->shadow_offset + object->size; + queue_iterate(&backing_object->memq, + page, vm_page_t, listq) { + if (page->encrypted && + (page->offset >= + collapse_min_offset) && + (page->offset < + collapse_max_offset)) { + /* + * We found an encrypted page + * in the backing object, + * within the range covered + * by the parent object: we can + * not collapse them. + */ + vm_object_collapse_encrypted++; + vm_object_cache_unlock(); + goto try_bypass; + } + } + } + /* * Collapse the object with its backing * object, and try again with the object's @@ -3279,112 +3472,207 @@ vm_object_collapse( */ vm_object_do_collapse(object, backing_object); + vm_object_collapse_do_collapse++; continue; } - + try_bypass: /* * Collapsing the backing object was not possible * or permitted, so let's try bypassing it. */ if (! vm_object_bypass_allowed) { - vm_object_unlock(backing_object); - return; + /* try and collapse the rest of the shadow chain */ + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + continue; } /* - * If the backing object has a pager but no pagemap, - * then we cannot bypass it, because we don't know - * what pages it has. + * If the object doesn't have all its pages present, + * we have to make sure no pages in the backing object + * "show through" before bypassing it. */ - if (backing_object->pager_created + size = atop(object->size); + rcount = object->resident_page_count; + if (rcount != size) { + vm_object_offset_t offset; + vm_object_offset_t backing_offset; + unsigned int backing_rcount; + unsigned int lookups = 0; + + /* + * If the backing object has a pager but no pagemap, + * then we cannot bypass it, because we don't know + * what pages it has. + */ + if (backing_object->pager_created #if MACH_PAGEMAP - && (backing_object->existence_map == VM_EXTERNAL_NULL) + && (backing_object->existence_map == VM_EXTERNAL_NULL) #endif /* MACH_PAGEMAP */ - ) { - vm_object_unlock(backing_object); - return; - } + ) { + /* try and collapse the rest of the shadow chain */ + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + continue; + } - /* - * If the object has a pager but no pagemap, - * then we cannot bypass it, because we don't know - * what pages it has. - */ - if (object->pager_created + /* + * If the object has a pager but no pagemap, + * then we cannot bypass it, because we don't know + * what pages it has. + */ + if (object->pager_created #if MACH_PAGEMAP - && (object->existence_map == VM_EXTERNAL_NULL) + && (object->existence_map == VM_EXTERNAL_NULL) #endif /* MACH_PAGEMAP */ - ) { - vm_object_unlock(backing_object); - return; - } + ) { + /* try and collapse the rest of the shadow chain */ + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + continue; + } - backing_offset = object->shadow_offset; - size = object->size; + /* + * If all of the pages in the backing object are + * shadowed by the parent object, the parent + * object no longer has to shadow the backing + * object; it can shadow the next one in the + * chain. + * + * If the backing object has existence info, + * we must check examine its existence info + * as well. + * + */ - /* - * If all of the pages in the backing object are - * shadowed by the parent object, the parent - * object no longer has to shadow the backing - * object; it can shadow the next one in the - * chain. - * - * If the backing object has existence info, - * we must check examine its existence info - * as well. - * - */ + backing_offset = object->shadow_offset; + backing_rcount = backing_object->resident_page_count; - if(object->cow_hint >= size) - object->cow_hint = 0; - current_offset = object->cow_hint; - while(TRUE) { - if (vm_page_lookup(object, - (vm_object_offset_t)current_offset) - != VM_PAGE_NULL) { - current_offset+=PAGE_SIZE; - } else if ((object->pager_created) && - (object->existence_map != NULL) && - (vm_external_state_get(object->existence_map, - current_offset) - != VM_EXTERNAL_STATE_ABSENT)) { - current_offset+=PAGE_SIZE; - } else if (vm_page_lookup(backing_object, - (vm_object_offset_t)current_offset - + backing_offset)!= VM_PAGE_NULL) { - /* found a dependency */ - object->cow_hint = current_offset; - vm_object_unlock(backing_object); - return; - } else if ((backing_object->pager_created) && - (backing_object->existence_map != NULL) && - (vm_external_state_get( - backing_object->existence_map, - current_offset + backing_offset) - != VM_EXTERNAL_STATE_ABSENT)) { - /* found a dependency */ - object->cow_hint = current_offset; - vm_object_unlock(backing_object); - return; - } else { - current_offset+=PAGE_SIZE; +#define EXISTS_IN_OBJECT(obj, off, rc) \ + (vm_external_state_get((obj)->existence_map, \ + (vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS || \ + ((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--)) + + /* + * Check the hint location first + * (since it is often the quickest way out of here). + */ + if (object->cow_hint != ~(vm_offset_t)0) + hint_offset = (vm_object_offset_t)object->cow_hint; + else + hint_offset = (hint_offset > 8 * PAGE_SIZE_64) ? + (hint_offset - 8 * PAGE_SIZE_64) : 0; + + if (EXISTS_IN_OBJECT(backing_object, hint_offset + + backing_offset, backing_rcount) && + !EXISTS_IN_OBJECT(object, hint_offset, rcount)) { + /* dependency right at the hint */ + object->cow_hint = (vm_offset_t)hint_offset; + /* try and collapse the rest of the shadow chain */ + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + continue; } - if(current_offset >= size) { - /* wrap at end of object */ - current_offset = 0; + + /* + * If the object's window onto the backing_object + * is large compared to the number of resident + * pages in the backing object, it makes sense to + * walk the backing_object's resident pages first. + * + * NOTE: Pages may be in both the existence map and + * resident. So, we can't permanently decrement + * the rcount here because the second loop may + * find the same pages in the backing object' + * existence map that we found here and we would + * double-decrement the rcount. We also may or + * may not have found the + */ + if (backing_rcount && size > + ((backing_object->existence_map) ? + backing_rcount : (backing_rcount >> 1))) { + unsigned int rc = rcount; + vm_page_t p; + + backing_rcount = backing_object->resident_page_count; + p = (vm_page_t)queue_first(&backing_object->memq); + do { + /* Until we get more than one lookup lock */ + if (lookups > 256) { + lookups = 0; + delay(1); + } + + offset = (p->offset - backing_offset); + if (offset < object->size && + offset != hint_offset && + !EXISTS_IN_OBJECT(object, offset, rc)) { + /* found a dependency */ + object->cow_hint = (vm_offset_t)offset; + break; + } + p = (vm_page_t) queue_next(&p->listq); + + } while (--backing_rcount); + if (backing_rcount != 0 ) { + /* try and collapse the rest of the shadow chain */ + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + continue; + } } - if(current_offset == object->cow_hint) { - /* we are free of shadow influence */ - break; + + /* + * Walk through the offsets looking for pages in the + * backing object that show through to the object. + */ + if (backing_rcount || backing_object->existence_map) { + offset = hint_offset; + + while((offset = + (offset + PAGE_SIZE_64 < object->size) ? + (offset + PAGE_SIZE_64) : 0) != hint_offset) { + + /* Until we get more than one lookup lock */ + if (lookups > 256) { + lookups = 0; + delay(1); + } + + if (EXISTS_IN_OBJECT(backing_object, offset + + backing_offset, backing_rcount) && + !EXISTS_IN_OBJECT(object, offset, rcount)) { + /* found a dependency */ + object->cow_hint = (vm_offset_t)offset; + break; + } + } + if (offset != hint_offset) { + /* try and collapse the rest of the shadow chain */ + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + continue; + } } } - /* reset the cow_hint for any objects deeper in the chain */ - object->cow_hint = 0; - + /* reset the offset hint for any objects deeper in the chain */ + object->cow_hint = (vm_offset_t)0; /* * All interesting pages in the backing object @@ -3393,6 +3681,7 @@ vm_object_collapse( */ vm_object_do_bypass(object, backing_object); + vm_object_collapse_do_bypass++; /* * Try again with this object's new backing object. @@ -3400,6 +3689,10 @@ vm_object_collapse( continue; } + + if (object != original_object) { + vm_object_unlock(object); + } } /* @@ -3430,7 +3723,7 @@ vm_object_page_remove( * It balances vm_object_lookup vs iteration. */ - if (atop(end - start) < (unsigned)object->resident_page_count/16) { + if (atop_64(end - start) < (unsigned)object->resident_page_count/16) { vm_object_page_remove_lookup++; for (; start < end; start += PAGE_SIZE_64) { @@ -3438,8 +3731,7 @@ vm_object_page_remove( if (p != VM_PAGE_NULL) { assert(!p->cleaning && !p->pageout); if (!p->fictitious) - pmap_page_protect(p->phys_addr, - VM_PROT_NONE); + pmap_disconnect(p->phys_page); VM_PAGE_FREE(p); } } @@ -3452,8 +3744,7 @@ vm_object_page_remove( if ((start <= p->offset) && (p->offset < end)) { assert(!p->cleaning && !p->pageout); if (!p->fictitious) - pmap_page_protect(p->phys_addr, - VM_PROT_NONE); + pmap_disconnect(p->phys_page); VM_PAGE_FREE(p); } p = next; @@ -3492,7 +3783,7 @@ vm_object_coalesce( register vm_object_t prev_object, vm_object_t next_object, vm_object_offset_t prev_offset, - vm_object_offset_t next_offset, + __unused vm_object_offset_t next_offset, vm_object_size_t prev_size, vm_object_size_t next_size) { @@ -3519,7 +3810,7 @@ vm_object_coalesce( /* * Try to collapse the object first */ - vm_object_collapse(prev_object); + vm_object_collapse(prev_object, prev_offset); /* * Can't coalesce if pages not mapped to @@ -3528,6 +3819,7 @@ vm_object_coalesce( * . paged out * . shadows another object * . has a copy elsewhere + * . is purgable * . paging references (pages might be in page-list) */ @@ -3536,6 +3828,7 @@ vm_object_coalesce( (prev_object->shadow != VM_OBJECT_NULL) || (prev_object->copy != VM_OBJECT_NULL) || (prev_object->true_share != FALSE) || + (prev_object->purgable != VM_OBJECT_NONPURGABLE) || (prev_object->paging_in_progress != 0)) { vm_object_unlock(prev_object); return(FALSE); @@ -3597,7 +3890,7 @@ vm_object_page_map( vm_page_t old_page; vm_object_offset_t addr; - num_pages = atop(size); + num_pages = atop_64(size); for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) { @@ -3678,23 +3971,23 @@ vm_object_cached( */ void vm_external_print( - vm_external_map_t map, - vm_size_t size) + vm_external_map_t emap, + vm_size_t size) { - if (map == VM_EXTERNAL_NULL) { + if (emap == VM_EXTERNAL_NULL) { printf("0 "); } else { vm_size_t existence_size = stob(size); printf("{ size=%d, map=[", existence_size); if (existence_size > 0) { - print_bitstring(map[0]); + print_bitstring(emap[0]); } if (existence_size > 1) { - print_bitstring(map[1]); + print_bitstring(emap[1]); } if (existence_size > 2) { printf("..."); - print_bitstring(map[existence_size-1]); + print_bitstring(emap[existence_size-1]); } printf("] }\n"); } @@ -3706,8 +3999,6 @@ int vm_follow_object( vm_object_t object) { - extern db_indent; - int count = 0; int orig_db_indent = db_indent; @@ -3736,17 +4027,18 @@ vm_follow_object( */ void vm_object_print( - vm_object_t object, - boolean_t have_addr, - int arg_count, - char *modif) + db_addr_t db_addr, + __unused boolean_t have_addr, + __unused int arg_count, + __unused char *modif) { + vm_object_t object; register vm_page_t p; - extern db_indent; - char *s; + const char *s; register int count; + object = (vm_object_t) (long) db_addr; if (object == VM_OBJECT_NULL) return; @@ -3756,7 +4048,7 @@ vm_object_print( iprintf("size=0x%x", object->size); printf(", cluster=0x%x", object->cluster_size); - printf(", frozen=0x%x", object->frozen_size); + printf(", memq_hint=%p", object->memq_hint); printf(", ref_count=%d\n", object->ref_count); iprintf(""); #if TASK_SWAPPER @@ -3768,7 +4060,7 @@ vm_object_print( if (object->shadow) { register int i = 0; vm_object_t shadow = object; - while(shadow = shadow->shadow) + while((shadow = shadow->shadow)) i++; printf(" (depth %d)", i); } @@ -3778,7 +4070,7 @@ vm_object_print( iprintf("pager=0x%x", object->pager); printf(", paging_offset=0x%x", object->paging_offset); - printf(", pager_request=0x%x\n", object->pager_request); + printf(", pager_control=0x%x\n", object->pager_control); iprintf("copy_strategy=%d[", object->copy_strategy); switch (object->copy_strategy) { @@ -3854,10 +4146,11 @@ vm_object_print( (object->pageout ? "" : "!"), (object->internal ? "internal" : "external"), (object->temporary ? "temporary" : "permanent")); - iprintf("%salive, %slock_in_progress, %slock_restart, %sshadowed, %scached, %sprivate\n", + iprintf("%salive, %spurgable, %spurgable_volatile, %spurgable_empty, %sshadowed, %scached, %sprivate\n", (object->alive ? "" : "!"), - (object->lock_in_progress ? "" : "!"), - (object->lock_restart ? "" : "!"), + ((object->purgable != VM_OBJECT_NONPURGABLE) ? "" : "!"), + ((object->purgable == VM_OBJECT_PURGABLE_VOLATILE) ? "" : "!"), + ((object->purgable == VM_OBJECT_PURGABLE_EMPTY) ? "" : "!"), (object->shadowed ? "" : "!"), (vm_object_cached(object) ? "" : "!"), (object->private ? "" : "!")); @@ -3888,7 +4181,7 @@ vm_object_print( } count++; - printf("(off=0x%X,page=0x%X)", p->offset, (integer_t) p); + printf("(off=0x%llX,page=%p)", p->offset, p); p = (vm_page_t) queue_next(&p->listq); } if (count != 0) { @@ -3962,27 +4255,24 @@ vm_object_find( kern_return_t vm_object_populate_with_private( - vm_object_t object, + vm_object_t object, vm_object_offset_t offset, - vm_offset_t phys_addr, - vm_size_t size) + ppnum_t phys_page, + vm_size_t size) { - vm_offset_t base_addr; + ppnum_t base_page; vm_object_offset_t base_offset; if(!object->private) return KERN_FAILURE; - if((base_addr = trunc_page(phys_addr)) != phys_addr) { - return KERN_FAILURE; - } - + base_page = phys_page; vm_object_lock(object); if(!object->phys_contiguous) { vm_page_t m; - if((base_offset = trunc_page(offset)) != offset) { + if((base_offset = trunc_page_64(offset)) != offset) { vm_object_unlock(object); return KERN_FAILURE; } @@ -3994,7 +4284,7 @@ vm_object_populate_with_private( vm_page_lock_queues(); m->fictitious = FALSE; m->private = TRUE; - m->phys_addr = base_addr; + m->phys_page = base_page; if(!m->busy) { m->busy = TRUE; } @@ -4004,12 +4294,23 @@ vm_object_populate_with_private( } m->list_req_pending = TRUE; vm_page_unlock_queues(); - } else if (m->phys_addr != base_addr) { + } else if (m->phys_page != base_page) { /* pmap call to clear old mapping */ - pmap_page_protect(m->phys_addr, - VM_PROT_NONE); - m->phys_addr = base_addr; + pmap_disconnect(m->phys_page); + m->phys_page = base_page; } + + /* + * ENCRYPTED SWAP: + * We're not pointing to the same + * physical page any longer and the + * contents of the new one are not + * supposed to be encrypted. + * XXX What happens to the original + * physical page. Is it lost ? + */ + m->encrypted = FALSE; + } else { while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL) @@ -4017,7 +4318,7 @@ vm_object_populate_with_private( vm_page_lock_queues(); m->fictitious = FALSE; m->private = TRUE; - m->phys_addr = base_addr; + m->phys_page = base_page; m->list_req_pending = TRUE; m->absent = TRUE; m->unusual = TRUE; @@ -4025,7 +4326,7 @@ vm_object_populate_with_private( vm_page_unlock_queues(); vm_page_insert(m, object, base_offset); } - base_addr += PAGE_SIZE; + base_page++; /* Go to the next physical page */ base_offset += PAGE_SIZE; size -= PAGE_SIZE; } @@ -4038,7 +4339,7 @@ vm_object_populate_with_private( /* shadows on contiguous memory are not allowed */ /* we therefore can use the offset field */ - object->shadow_offset = (vm_object_offset_t)phys_addr; + object->shadow_offset = (vm_object_offset_t)(phys_page << 12); object->size = size; } vm_object_unlock(object); @@ -4059,13 +4360,12 @@ vm_object_populate_with_private( __private_extern__ kern_return_t memory_object_free_from_cache( - host_t host, + __unused host_t host, int *pager_id, int *count) { int object_released = 0; - int i; register vm_object_t object = VM_OBJECT_NULL; vm_object_t shadow; @@ -4159,7 +4459,7 @@ memory_object_create_named( VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT); } - *control = object->pager_request; + *control = object->pager_control; vm_object_unlock(object); } return (KERN_SUCCESS); @@ -4350,6 +4650,9 @@ vm_object_release_name( return KERN_SUCCESS; } } + /*NOTREACHED*/ + assert(0); + return KERN_FAILURE; } @@ -4362,8 +4665,9 @@ vm_object_lock_request( int flags, vm_prot_t prot) { - vm_object_offset_t original_offset = offset; - boolean_t should_flush=flags & MEMORY_OBJECT_DATA_FLUSH; + __unused boolean_t should_flush; + + should_flush = flags & MEMORY_OBJECT_DATA_FLUSH; XPR(XPR_MEMORY_OBJECT, "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n", @@ -4379,7 +4683,7 @@ vm_object_lock_request( if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE) return (KERN_INVALID_ARGUMENT); - size = round_page(size); + size = round_page_64(size); /* * Lock the object, and acquire a paging reference to @@ -4387,10 +4691,9 @@ vm_object_lock_request( */ vm_object_lock(object); vm_object_paging_begin(object); - offset -= object->paging_offset; (void)vm_object_update(object, - offset, size, should_return, flags, prot); + offset, size, NULL, NULL, should_return, flags, prot); vm_object_paging_end(object); vm_object_unlock(object); @@ -4398,7 +4701,364 @@ vm_object_lock_request( return (KERN_SUCCESS); } +/* + * Empty a purgable object by grabbing the physical pages assigned to it and + * putting them on the free queue without writing them to backing store, etc. + * When the pages are next touched they will be demand zero-fill pages. We + * skip pages which are busy, being paged in/out, wired, etc. We do _not_ + * skip referenced/dirty pages, pages on the active queue, etc. We're more + * than happy to grab these since this is a purgable object. We mark the + * object as "empty" after reaping its pages. + * + * On entry the object and page queues are locked, the object must be a + * purgable object with no delayed copies pending. + */ +unsigned int +vm_object_purge(vm_object_t object) +{ + vm_page_t p, next; + unsigned int num_purged_pages; + vm_page_t local_freeq; + unsigned long local_freed; + int purge_loop_quota; +/* free pages as soon as we gather PURGE_BATCH_FREE_LIMIT pages to free */ +#define PURGE_BATCH_FREE_LIMIT 50 +/* release page queues lock every PURGE_LOOP_QUOTA iterations */ +#define PURGE_LOOP_QUOTA 100 + + num_purged_pages = 0; + if (object->purgable == VM_OBJECT_NONPURGABLE) + return num_purged_pages; + + object->purgable = VM_OBJECT_PURGABLE_EMPTY; + + assert(object->copy == VM_OBJECT_NULL); + assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE); + purge_loop_quota = PURGE_LOOP_QUOTA; + + local_freeq = VM_PAGE_NULL; + local_freed = 0; + + /* + * Go through the object's resident pages and try and discard them. + */ + next = (vm_page_t)queue_first(&object->memq); + while (!queue_end(&object->memq, (queue_entry_t)next)) { + p = next; + next = (vm_page_t)queue_next(&next->listq); + + if (purge_loop_quota-- == 0) { + /* + * Avoid holding the page queues lock for too long. + * Let someone else take it for a while if needed. + * Keep holding the object's lock to guarantee that + * the object's page list doesn't change under us + * while we yield. + */ + if (local_freeq != VM_PAGE_NULL) { + /* + * Flush our queue of pages to free. + */ + vm_page_free_list(local_freeq); + local_freeq = VM_PAGE_NULL; + local_freed = 0; + } + vm_page_unlock_queues(); + mutex_pause(); + vm_page_lock_queues(); + + /* resume with the current page and a new quota */ + purge_loop_quota = PURGE_LOOP_QUOTA; + } + + + if (p->busy || p->cleaning || p->laundry || + p->list_req_pending) { + /* page is being acted upon, so don't mess with it */ + continue; + } + if (p->wire_count) { + /* don't discard a wired page */ + continue; + } + + if (p->tabled) { + /* clean up the object/offset table */ + vm_page_remove(p); + } + if (p->absent) { + /* update the object's count of absent pages */ + vm_object_absent_release(object); + } + + /* we can discard this page */ + + /* advertize that this page is in a transition state */ + p->busy = TRUE; + + if (p->no_isync == TRUE) { + /* the page hasn't been mapped yet */ + /* (optimization to delay the i-cache sync) */ + } else { + /* unmap the page */ + int refmod_state; + + refmod_state = pmap_disconnect(p->phys_page); + if (refmod_state & VM_MEM_MODIFIED) { + p->dirty = TRUE; + } + } + + if (p->dirty || p->precious) { + /* we saved the cost of cleaning this page ! */ + num_purged_pages++; + vm_page_purged_count++; + } + + /* remove page from active or inactive queue... */ + VM_PAGE_QUEUES_REMOVE(p); + + /* ... and put it on our queue of pages to free */ + assert(!p->laundry); + assert(p->object != kernel_object); + assert(p->pageq.next == NULL && + p->pageq.prev == NULL); + p->pageq.next = (queue_entry_t) local_freeq; + local_freeq = p; + if (++local_freed >= PURGE_BATCH_FREE_LIMIT) { + /* flush our queue of pages to free */ + vm_page_free_list(local_freeq); + local_freeq = VM_PAGE_NULL; + local_freed = 0; + } + } + + /* flush our local queue of pages to free one last time */ + if (local_freeq != VM_PAGE_NULL) { + vm_page_free_list(local_freeq); + local_freeq = VM_PAGE_NULL; + local_freed = 0; + } + + return num_purged_pages; +} + +/* + * vm_object_purgable_control() allows the caller to control and investigate the + * state of a purgable object. A purgable object is created via a call to + * vm_allocate() with VM_FLAGS_PURGABLE specified. A purgable object will + * never be coalesced with any other object -- even other purgable objects -- + * and will thus always remain a distinct object. A purgable object has + * special semantics when its reference count is exactly 1. If its reference + * count is greater than 1, then a purgable object will behave like a normal + * object and attempts to use this interface will result in an error return + * of KERN_INVALID_ARGUMENT. + * + * A purgable object may be put into a "volatile" state which will make the + * object's pages elligable for being reclaimed without paging to backing + * store if the system runs low on memory. If the pages in a volatile + * purgable object are reclaimed, the purgable object is said to have been + * "emptied." When a purgable object is emptied the system will reclaim as + * many pages from the object as it can in a convenient manner (pages already + * en route to backing store or busy for other reasons are left as is). When + * a purgable object is made volatile, its pages will generally be reclaimed + * before other pages in the application's working set. This semantic is + * generally used by applications which can recreate the data in the object + * faster than it can be paged in. One such example might be media assets + * which can be reread from a much faster RAID volume. + * + * A purgable object may be designated as "non-volatile" which means it will + * behave like all other objects in the system with pages being written to and + * read from backing store as needed to satisfy system memory needs. If the + * object was emptied before the object was made non-volatile, that fact will + * be returned as the old state of the purgable object (see + * VM_PURGABLE_SET_STATE below). In this case, any pages of the object which + * were reclaimed as part of emptying the object will be refaulted in as + * zero-fill on demand. It is up to the application to note that an object + * was emptied and recreate the objects contents if necessary. When a + * purgable object is made non-volatile, its pages will generally not be paged + * out to backing store in the immediate future. A purgable object may also + * be manually emptied. + * + * Finally, the current state (non-volatile, volatile, volatile & empty) of a + * volatile purgable object may be queried at any time. This information may + * be used as a control input to let the application know when the system is + * experiencing memory pressure and is reclaiming memory. + * + * The specified address may be any address within the purgable object. If + * the specified address does not represent any object in the target task's + * virtual address space, then KERN_INVALID_ADDRESS will be returned. If the + * object containing the specified address is not a purgable object, then + * KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be + * returned. + * + * The control parameter may be any one of VM_PURGABLE_SET_STATE or + * VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter + * state is used to set the new state of the purgable object and return its + * old state. For VM_PURGABLE_GET_STATE, the current state of the purgable + * object is returned in the parameter state. + * + * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE, + * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent + * the non-volatile, volatile and volatile/empty states described above. + * Setting the state of a purgable object to VM_PURGABLE_EMPTY will + * immediately reclaim as many pages in the object as can be conveniently + * collected (some may have already been written to backing store or be + * otherwise busy). + * + * The process of making a purgable object non-volatile and determining its + * previous state is atomic. Thus, if a purgable object is made + * VM_PURGABLE_NONVOLATILE and the old state is returned as + * VM_PURGABLE_VOLATILE, then the purgable object's previous contents are + * completely intact and will remain so until the object is made volatile + * again. If the old state is returned as VM_PURGABLE_EMPTY then the object + * was reclaimed while it was in a volatile state and its previous contents + * have been lost. + */ +/* + * The object must be locked. + */ +kern_return_t +vm_object_purgable_control( + vm_object_t object, + vm_purgable_t control, + int *state) +{ + int old_state; + vm_page_t p; + if (object == VM_OBJECT_NULL) { + /* + * Object must already be present or it can't be purgable. + */ + return KERN_INVALID_ARGUMENT; + } + + /* + * Get current state of the purgable object. + */ + switch (object->purgable) { + case VM_OBJECT_NONPURGABLE: + return KERN_INVALID_ARGUMENT; + + case VM_OBJECT_PURGABLE_NONVOLATILE: + old_state = VM_PURGABLE_NONVOLATILE; + break; + + case VM_OBJECT_PURGABLE_VOLATILE: + old_state = VM_PURGABLE_VOLATILE; + break; + + case VM_OBJECT_PURGABLE_EMPTY: + old_state = VM_PURGABLE_EMPTY; + break; + + default: + old_state = VM_PURGABLE_NONVOLATILE; + panic("Bad state (%d) for purgable object!\n", + object->purgable); + /*NOTREACHED*/ + } + + /* purgable cant have delayed copies - now or in the future */ + assert(object->copy == VM_OBJECT_NULL); + assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE); + + /* + * Execute the desired operation. + */ + if (control == VM_PURGABLE_GET_STATE) { + *state = old_state; + return KERN_SUCCESS; + } + + switch (*state) { + case VM_PURGABLE_NONVOLATILE: + vm_page_lock_queues(); + if (object->purgable != VM_OBJECT_PURGABLE_NONVOLATILE) { + assert(vm_page_purgeable_count >= + object->resident_page_count); + vm_page_purgeable_count -= object->resident_page_count; + } + + object->purgable = VM_OBJECT_PURGABLE_NONVOLATILE; + + /* + * If the object wasn't emptied, then mark all pages of the + * object as referenced in order to give them a complete turn + * of the virtual memory "clock" before becoming candidates + * for paging out (if the system is suffering from memory + * pressure). We don't really need to set the pmap reference + * bits (which would be expensive) since the software copies + * are believed if they're set to true ... + */ + if (old_state != VM_PURGABLE_EMPTY) { + for (p = (vm_page_t)queue_first(&object->memq); + !queue_end(&object->memq, (queue_entry_t)p); + p = (vm_page_t)queue_next(&p->listq)) + p->reference = TRUE; + } + + vm_page_unlock_queues(); + + break; + + case VM_PURGABLE_VOLATILE: + vm_page_lock_queues(); + + if (object->purgable != VM_OBJECT_PURGABLE_VOLATILE && + object->purgable != VM_OBJECT_PURGABLE_EMPTY) { + vm_page_purgeable_count += object->resident_page_count; + } + + object->purgable = VM_OBJECT_PURGABLE_VOLATILE; + + /* + * We want the newly volatile purgable object to be a + * candidate for the pageout scan before other pages in the + * application if the system is suffering from memory + * pressure. To do this, we move a page of the object from + * the active queue onto the inactive queue in order to + * promote the object for early reclaim. We only need to move + * a single page since the pageout scan will reap the entire + * purgable object if it finds a single page in a volatile + * state. Obviously we don't do this if there are no pages + * associated with the object or we find a page of the object + * already on the inactive queue. + */ + for (p = (vm_page_t)queue_first(&object->memq); + !queue_end(&object->memq, (queue_entry_t)p); + p = (vm_page_t)queue_next(&p->listq)) { + if (p->inactive) { + /* already a page on the inactive queue */ + break; + } + if (p->active && !p->busy) { + /* found one we can move */ + vm_page_deactivate(p); + break; + } + } + vm_page_unlock_queues(); + + break; + + + case VM_PURGABLE_EMPTY: + vm_page_lock_queues(); + if (object->purgable != VM_OBJECT_PURGABLE_VOLATILE && + object->purgable != VM_OBJECT_PURGABLE_EMPTY) { + vm_page_purgeable_count += object->resident_page_count; + } + (void) vm_object_purge(object); + vm_page_unlock_queues(); + break; + + } + *state = old_state; + + return KERN_SUCCESS; +} #if TASK_SWAPPER /* @@ -4515,7 +5175,9 @@ vm_object_reference( * This is also needed as number of vnodes can be dynamically scaled. */ kern_return_t -adjust_vm_object_cache(vm_size_t oval, vm_size_t nval) +adjust_vm_object_cache( + __unused vm_size_t oval, + vm_size_t nval) { vm_object_cached_max = nval; vm_object_cache_trim(FALSE); @@ -4523,3 +5185,269 @@ adjust_vm_object_cache(vm_size_t oval, vm_size_t nval) } #endif /* MACH_BSD */ + +/* + * vm_object_transpose + * + * This routine takes two VM objects of the same size and exchanges + * their backing store. + * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE + * and UPL_BLOCK_ACCESS if they are referenced anywhere. + * + * The VM objects must not be locked by caller. + */ +kern_return_t +vm_object_transpose( + vm_object_t object1, + vm_object_t object2, + vm_object_size_t transpose_size) +{ + vm_object_t tmp_object; + kern_return_t retval; + boolean_t object1_locked, object2_locked; + boolean_t object1_paging, object2_paging; + vm_page_t page; + vm_object_offset_t page_offset; + + tmp_object = VM_OBJECT_NULL; + object1_locked = FALSE; object2_locked = FALSE; + object1_paging = FALSE; object2_paging = FALSE; + + if (object1 == object2 || + object1 == VM_OBJECT_NULL || + object2 == VM_OBJECT_NULL) { + /* + * If the 2 VM objects are the same, there's + * no point in exchanging their backing store. + */ + retval = KERN_INVALID_VALUE; + goto done; + } + + vm_object_lock(object1); + object1_locked = TRUE; + if (object1->copy || object1->shadow || object1->shadowed || + object1->purgable != VM_OBJECT_NONPURGABLE) { + /* + * We don't deal with copy or shadow objects (yet). + */ + retval = KERN_INVALID_VALUE; + goto done; + } + /* + * Since we're about to mess with the object's backing store, + * mark it as "paging_in_progress". Note that this is not enough + * to prevent any paging activity on this object, so the caller should + * have "quiesced" the objects beforehand, via a UPL operation with + * UPL_SET_IO_WIRE (to make sure all the pages are there and wired) + * and UPL_BLOCK_ACCESS (to mark the pages "busy"). + */ + vm_object_paging_begin(object1); + object1_paging = TRUE; + vm_object_unlock(object1); + object1_locked = FALSE; + + /* + * Same as above for the 2nd object... + */ + vm_object_lock(object2); + object2_locked = TRUE; + if (object2->copy || object2->shadow || object2->shadowed || + object2->purgable != VM_OBJECT_NONPURGABLE) { + retval = KERN_INVALID_VALUE; + goto done; + } + vm_object_paging_begin(object2); + object2_paging = TRUE; + vm_object_unlock(object2); + object2_locked = FALSE; + + /* + * Allocate a temporary VM object to hold object1's contents + * while we copy object2 to object1. + */ + tmp_object = vm_object_allocate(transpose_size); + vm_object_lock(tmp_object); + vm_object_paging_begin(tmp_object); + tmp_object->can_persist = FALSE; + + /* + * Since we need to lock both objects at the same time, + * make sure we always lock them in the same order to + * avoid deadlocks. + */ + if (object1 < object2) { + vm_object_lock(object1); + vm_object_lock(object2); + } else { + vm_object_lock(object2); + vm_object_lock(object1); + } + object1_locked = TRUE; + object2_locked = TRUE; + + if (object1->size != object2->size || + object1->size != transpose_size) { + /* + * If the 2 objects don't have the same size, we can't + * exchange their backing stores or one would overflow. + * If their size doesn't match the caller's + * "transpose_size", we can't do it either because the + * transpose operation will affect the entire span of + * the objects. + */ + retval = KERN_INVALID_VALUE; + goto done; + } + + + /* + * Transpose the lists of resident pages. + */ + if (object1->phys_contiguous || queue_empty(&object1->memq)) { + /* + * No pages in object1, just transfer pages + * from object2 to object1. No need to go through + * an intermediate object. + */ + while (!queue_empty(&object2->memq)) { + page = (vm_page_t) queue_first(&object2->memq); + vm_page_rename(page, object1, page->offset); + } + assert(queue_empty(&object2->memq)); + } else if (object2->phys_contiguous || queue_empty(&object2->memq)) { + /* + * No pages in object2, just transfer pages + * from object1 to object2. No need to go through + * an intermediate object. + */ + while (!queue_empty(&object1->memq)) { + page = (vm_page_t) queue_first(&object1->memq); + vm_page_rename(page, object2, page->offset); + } + assert(queue_empty(&object1->memq)); + } else { + /* transfer object1's pages to tmp_object */ + vm_page_lock_queues(); + while (!queue_empty(&object1->memq)) { + page = (vm_page_t) queue_first(&object1->memq); + page_offset = page->offset; + vm_page_remove(page); + page->offset = page_offset; + queue_enter(&tmp_object->memq, page, vm_page_t, listq); + } + vm_page_unlock_queues(); + assert(queue_empty(&object1->memq)); + /* transfer object2's pages to object1 */ + while (!queue_empty(&object2->memq)) { + page = (vm_page_t) queue_first(&object2->memq); + vm_page_rename(page, object1, page->offset); + } + assert(queue_empty(&object2->memq)); + /* transfer tmp_object's pages to object1 */ + while (!queue_empty(&tmp_object->memq)) { + page = (vm_page_t) queue_first(&tmp_object->memq); + queue_remove(&tmp_object->memq, page, + vm_page_t, listq); + vm_page_insert(page, object2, page->offset); + } + assert(queue_empty(&tmp_object->memq)); + } + + /* no need to transpose the size: they should be identical */ + assert(object1->size == object2->size); + +#define __TRANSPOSE_FIELD(field) \ +MACRO_BEGIN \ + tmp_object->field = object1->field; \ + object1->field = object2->field; \ + object2->field = tmp_object->field; \ +MACRO_END + + assert(!object1->copy); + assert(!object2->copy); + + assert(!object1->shadow); + assert(!object2->shadow); + + __TRANSPOSE_FIELD(shadow_offset); /* used by phys_contiguous objects */ + __TRANSPOSE_FIELD(pager); + __TRANSPOSE_FIELD(paging_offset); + + __TRANSPOSE_FIELD(pager_control); + /* update the memory_objects' pointers back to the VM objects */ + if (object1->pager_control != MEMORY_OBJECT_CONTROL_NULL) { + memory_object_control_collapse(object1->pager_control, + object1); + } + if (object2->pager_control != MEMORY_OBJECT_CONTROL_NULL) { + memory_object_control_collapse(object2->pager_control, + object2); + } + + __TRANSPOSE_FIELD(absent_count); + + assert(object1->paging_in_progress); + assert(object2->paging_in_progress); + + __TRANSPOSE_FIELD(pager_created); + __TRANSPOSE_FIELD(pager_initialized); + __TRANSPOSE_FIELD(pager_ready); + __TRANSPOSE_FIELD(pager_trusted); + __TRANSPOSE_FIELD(internal); + __TRANSPOSE_FIELD(temporary); + __TRANSPOSE_FIELD(private); + __TRANSPOSE_FIELD(pageout); + __TRANSPOSE_FIELD(true_share); + __TRANSPOSE_FIELD(phys_contiguous); + __TRANSPOSE_FIELD(nophyscache); + __TRANSPOSE_FIELD(last_alloc); + __TRANSPOSE_FIELD(sequential); + __TRANSPOSE_FIELD(cluster_size); + __TRANSPOSE_FIELD(existence_map); + __TRANSPOSE_FIELD(cow_hint); + __TRANSPOSE_FIELD(wimg_bits); + +#undef __TRANSPOSE_FIELD + + retval = KERN_SUCCESS; + +done: + /* + * Cleanup. + */ + if (tmp_object != VM_OBJECT_NULL) { + vm_object_paging_end(tmp_object); + vm_object_unlock(tmp_object); + /* + * Re-initialize the temporary object to avoid + * deallocating a real pager. + */ + _vm_object_allocate(transpose_size, tmp_object); + vm_object_deallocate(tmp_object); + tmp_object = VM_OBJECT_NULL; + } + + if (object1_locked) { + vm_object_unlock(object1); + object1_locked = FALSE; + } + if (object2_locked) { + vm_object_unlock(object2); + object2_locked = FALSE; + } + if (object1_paging) { + vm_object_lock(object1); + vm_object_paging_end(object1); + vm_object_unlock(object1); + object1_paging = FALSE; + } + if (object2_paging) { + vm_object_lock(object2); + vm_object_paging_end(object2); + vm_object_unlock(object2); + object2_paging = FALSE; + } + + return retval; +}