X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/935ed37a5c468c8a1c07408573c08b8b7ef80e8b..143464d58d2bd6378e74eec636961ceb0d32fb91:/osfmk/vm/vm_object.c diff --git a/osfmk/vm/vm_object.c b/osfmk/vm/vm_object.c index 680c07f12..a16857ec0 100644 --- a/osfmk/vm/vm_object.c +++ b/osfmk/vm/vm_object.c @@ -72,6 +72,8 @@ #include #include +#include + #include #include @@ -80,6 +82,7 @@ #include #include #include +#include #include #include #include @@ -87,6 +90,7 @@ #include #include +#include #include #include #include @@ -95,6 +99,8 @@ #include #include +#include + /* * Virtual memory objects maintain the actual data * associated with allocated virtual memory. A given @@ -175,12 +181,6 @@ static kern_return_t vm_object_terminate( extern void vm_object_remove( vm_object_t object); -static vm_object_t vm_object_cache_trim( - boolean_t called_from_vm_object_deallocate); - -static void vm_object_deactivate_all_pages( - vm_object_t object); - static kern_return_t vm_object_copy_call( vm_object_t src_object, vm_object_offset_t src_offset, @@ -196,7 +196,8 @@ static void vm_object_do_bypass( vm_object_t backing_object); static void vm_object_release_pager( - memory_object_t pager); + memory_object_t pager, + boolean_t hashed); static zone_t vm_object_zone; /* vm backing store zone */ @@ -207,6 +208,8 @@ static zone_t vm_object_zone; /* vm backing store zone */ static struct vm_object kernel_object_store; vm_object_t kernel_object; +static struct vm_object compressor_object_store; +vm_object_t compressor_object = &compressor_object_store; /* * The submap object is used as a placeholder for vm_map_submap @@ -226,6 +229,11 @@ static struct vm_object vm_submap_object_store; */ static struct vm_object vm_object_template; +unsigned int vm_page_purged_wired = 0; +unsigned int vm_page_purged_busy = 0; +unsigned int vm_page_purged_others = 0; + +#if VM_OBJECT_CACHE /* * Virtual memory objects that are not referenced by * any address maps, but that are allowed to persist @@ -254,23 +262,53 @@ static struct vm_object vm_object_template; * from the reference mechanism, so that the lock need * not be held to make simple references. */ -static queue_head_t vm_object_cached_list; -static int vm_object_cached_count=0; +static vm_object_t vm_object_cache_trim( + boolean_t called_from_vm_object_deallocate); + +static void vm_object_deactivate_all_pages( + vm_object_t object); + static int vm_object_cached_high; /* highest # cached objects */ static int vm_object_cached_max = 512; /* may be patched*/ -static decl_mutex_data(,vm_object_cached_lock_data) - #define vm_object_cache_lock() \ - mutex_lock(&vm_object_cached_lock_data) -#define vm_object_cache_lock_try() \ - mutex_try(&vm_object_cached_lock_data) + lck_mtx_lock(&vm_object_cached_lock_data) +#define vm_object_cache_lock_try() \ + lck_mtx_try_lock(&vm_object_cached_lock_data) + +#endif /* VM_OBJECT_CACHE */ + +static queue_head_t vm_object_cached_list; +static uint32_t vm_object_cache_pages_freed = 0; +static uint32_t vm_object_cache_pages_moved = 0; +static uint32_t vm_object_cache_pages_skipped = 0; +static uint32_t vm_object_cache_adds = 0; +static uint32_t vm_object_cached_count = 0; +static lck_mtx_t vm_object_cached_lock_data; +static lck_mtx_ext_t vm_object_cached_lock_data_ext; + +static uint32_t vm_object_page_grab_failed = 0; +static uint32_t vm_object_page_grab_skipped = 0; +static uint32_t vm_object_page_grab_returned = 0; +static uint32_t vm_object_page_grab_pmapped = 0; +static uint32_t vm_object_page_grab_reactivations = 0; + +#define vm_object_cache_lock_spin() \ + lck_mtx_lock_spin(&vm_object_cached_lock_data) #define vm_object_cache_unlock() \ - mutex_unlock(&vm_object_cached_lock_data) + lck_mtx_unlock(&vm_object_cached_lock_data) + +static void vm_object_cache_remove_locked(vm_object_t); + #define VM_OBJECT_HASH_COUNT 1024 +#define VM_OBJECT_HASH_LOCK_COUNT 512 + +static lck_mtx_t vm_object_hashed_lock_data[VM_OBJECT_HASH_LOCK_COUNT]; +static lck_mtx_ext_t vm_object_hashed_lock_data_ext[VM_OBJECT_HASH_LOCK_COUNT]; + static queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT]; -static struct zone *vm_object_hash_zone; +static struct zone *vm_object_hash_zone; struct vm_object_hash_entry { queue_chain_t hash_link; /* hash chain link */ @@ -283,9 +321,12 @@ struct vm_object_hash_entry { typedef struct vm_object_hash_entry *vm_object_hash_entry_t; #define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0) -#define VM_OBJECT_HASH_SHIFT 8 +#define VM_OBJECT_HASH_SHIFT 5 #define vm_object_hash(pager) \ - ((((unsigned)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT) + ((int)((((uintptr_t)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)) + +#define vm_object_lock_hash(pager) \ + ((int)((((uintptr_t)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_LOCK_COUNT)) void vm_object_hash_entry_free( vm_object_hash_entry_t entry); @@ -293,38 +334,72 @@ void vm_object_hash_entry_free( static void vm_object_reap(vm_object_t object); static void vm_object_reap_async(vm_object_t object); static void vm_object_reaper_thread(void); -static queue_head_t vm_object_reaper_queue; /* protected by vm_object_cache_lock() */ + +static lck_mtx_t vm_object_reaper_lock_data; +static lck_mtx_ext_t vm_object_reaper_lock_data_ext; + +static queue_head_t vm_object_reaper_queue; /* protected by vm_object_reaper_lock() */ unsigned int vm_object_reap_count = 0; unsigned int vm_object_reap_count_async = 0; +#define vm_object_reaper_lock() \ + lck_mtx_lock(&vm_object_reaper_lock_data) +#define vm_object_reaper_lock_spin() \ + lck_mtx_lock_spin(&vm_object_reaper_lock_data) +#define vm_object_reaper_unlock() \ + lck_mtx_unlock(&vm_object_reaper_lock_data) + +#if 0 +#undef KERNEL_DEBUG +#define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT +#endif + + +static lck_mtx_t * +vm_object_hash_lock_spin( + memory_object_t pager) +{ + int index; + + index = vm_object_lock_hash(pager); + + lck_mtx_lock_spin(&vm_object_hashed_lock_data[index]); + + return (&vm_object_hashed_lock_data[index]); +} + +static void +vm_object_hash_unlock(lck_mtx_t *lck) +{ + lck_mtx_unlock(lck); +} + + /* * vm_object_hash_lookup looks up a pager in the hashtable * and returns the corresponding entry, with optional removal. */ - static vm_object_hash_entry_t vm_object_hash_lookup( memory_object_t pager, boolean_t remove_entry) { - register queue_t bucket; - register vm_object_hash_entry_t entry; + queue_t bucket; + vm_object_hash_entry_t entry; bucket = &vm_object_hashtable[vm_object_hash(pager)]; entry = (vm_object_hash_entry_t)queue_first(bucket); while (!queue_end(bucket, (queue_entry_t)entry)) { - if (entry->pager == pager && !remove_entry) - return(entry); - else if (entry->pager == pager) { - queue_remove(bucket, entry, - vm_object_hash_entry_t, hash_link); + if (entry->pager == pager) { + if (remove_entry) { + queue_remove(bucket, entry, + vm_object_hash_entry_t, hash_link); + } return(entry); } - entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link); } - return(VM_OBJECT_HASH_ENTRY_NULL); } @@ -335,13 +410,17 @@ vm_object_hash_lookup( static void vm_object_hash_insert( - vm_object_hash_entry_t entry) + vm_object_hash_entry_t entry, + vm_object_t object) { - register queue_t bucket; + queue_t bucket; bucket = &vm_object_hashtable[vm_object_hash(entry->pager)]; queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link); + + entry->object = object; + object->hashed = TRUE; } static vm_object_hash_entry_t @@ -378,16 +457,16 @@ _vm_object_allocate( { XPR(XPR_VM_OBJECT, "vm_object_allocate, object 0x%X size 0x%X\n", - (integer_t)object, size, 0,0,0); + object, size, 0,0,0); *object = vm_object_template; queue_init(&object->memq); queue_init(&object->msr_q); -#ifdef UPL_DEBUG +#if UPL_DEBUG queue_init(&object->uplq); #endif /* UPL_DEBUG */ vm_object_lock_init(object); - object->size = size; + object->vo_size = size; } __private_extern__ vm_object_t @@ -407,10 +486,12 @@ vm_object_allocate( } -lck_grp_t vm_object_lck_grp; -lck_grp_attr_t vm_object_lck_grp_attr; -lck_attr_t vm_object_lck_attr; -lck_attr_t kernel_object_lck_attr; +lck_grp_t vm_object_lck_grp; +lck_grp_t vm_object_cache_lck_grp; +lck_grp_attr_t vm_object_lck_grp_attr; +lck_attr_t vm_object_lck_attr; +lck_attr_t kernel_object_lck_attr; +lck_attr_t compressor_object_lck_attr; /* * vm_object_bootstrap: @@ -423,24 +504,45 @@ vm_object_bootstrap(void) register int i; vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object), - round_page_32(512*1024), - round_page_32(12*1024), + round_page(512*1024), + round_page(12*1024), "vm objects"); + zone_change(vm_object_zone, Z_CALLERACCT, FALSE); /* don't charge caller */ + zone_change(vm_object_zone, Z_NOENCRYPT, TRUE); + + vm_object_init_lck_grp(); - queue_init(&vm_object_reaper_queue); queue_init(&vm_object_cached_list); - mutex_init(&vm_object_cached_lock_data, 0); + + lck_mtx_init_ext(&vm_object_cached_lock_data, + &vm_object_cached_lock_data_ext, + &vm_object_cache_lck_grp, + &vm_object_lck_attr); + + queue_init(&vm_object_reaper_queue); + + for (i = 0; i < VM_OBJECT_HASH_LOCK_COUNT; i++) { + lck_mtx_init_ext(&vm_object_hashed_lock_data[i], + &vm_object_hashed_lock_data_ext[i], + &vm_object_lck_grp, + &vm_object_lck_attr); + } + lck_mtx_init_ext(&vm_object_reaper_lock_data, + &vm_object_reaper_lock_data_ext, + &vm_object_lck_grp, + &vm_object_lck_attr); vm_object_hash_zone = zinit((vm_size_t) sizeof (struct vm_object_hash_entry), - round_page_32(512*1024), - round_page_32(12*1024), + round_page(512*1024), + round_page(12*1024), "vm object hash entries"); + zone_change(vm_object_hash_zone, Z_CALLERACCT, FALSE); + zone_change(vm_object_hash_zone, Z_NOENCRYPT, TRUE); for (i = 0; i < VM_OBJECT_HASH_COUNT; i++) queue_init(&vm_object_hashtable[i]); - vm_object_init_lck_grp(); /* * Fill in a template object, for quick initialization @@ -453,27 +555,30 @@ vm_object_bootstrap(void) /* * We can't call vm_object_lock_init() here because that will * allocate some memory and VM is not fully initialized yet. - * The lock will be initialized for each allocate object in + * The lock will be initialized for each allocated object in * _vm_object_allocate(), so we don't need to initialize it in * the vm_object_template. */ vm_object_lock_init(&vm_object_template); #endif - vm_object_template.size = 0; + vm_object_template.vo_size = 0; vm_object_template.memq_hint = VM_PAGE_NULL; vm_object_template.ref_count = 1; #if TASK_SWAPPER vm_object_template.res_count = 1; #endif /* TASK_SWAPPER */ vm_object_template.resident_page_count = 0; + vm_object_template.wired_page_count = 0; + vm_object_template.reusable_page_count = 0; vm_object_template.copy = VM_OBJECT_NULL; vm_object_template.shadow = VM_OBJECT_NULL; - vm_object_template.shadow_offset = (vm_object_offset_t) 0; + vm_object_template.vo_shadow_offset = (vm_object_offset_t) 0; vm_object_template.pager = MEMORY_OBJECT_NULL; vm_object_template.paging_offset = 0; vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL; vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC; vm_object_template.paging_in_progress = 0; + vm_object_template.activity_in_progress = 0; /* Begin bitfields */ vm_object_template.all_wanted = 0; /* all bits FALSE */ @@ -488,8 +593,8 @@ vm_object_bootstrap(void) vm_object_template.pageout = FALSE; vm_object_template.alive = TRUE; vm_object_template.purgable = VM_PURGABLE_DENY; + vm_object_template.purgeable_when_ripe = FALSE; vm_object_template.shadowed = FALSE; - vm_object_template.silent_overwrite = FALSE; vm_object_template.advisory_pageout = FALSE; vm_object_template.true_share = FALSE; vm_object_template.terminating = FALSE; @@ -508,6 +613,7 @@ vm_object_bootstrap(void) vm_object_template.sequential = (vm_object_offset_t) 0; vm_object_template.pages_created = 0; vm_object_template.pages_used = 0; + vm_object_template.scan_collisions = 0; #if MACH_PAGEMAP vm_object_template.existence_map = VM_EXTERNAL_NULL; @@ -518,10 +624,19 @@ vm_object_bootstrap(void) #endif /* MACH_ASSERT */ /* cache bitfields */ - vm_object_template.wimg_bits = VM_WIMG_DEFAULT; + vm_object_template.wimg_bits = VM_WIMG_USE_DEFAULT; + vm_object_template.set_cache_attr = FALSE; + vm_object_template.object_slid = FALSE; vm_object_template.code_signed = FALSE; - vm_object_template.not_in_use = 0; -#ifdef UPL_DEBUG + vm_object_template.hashed = FALSE; + vm_object_template.transposed = FALSE; + vm_object_template.mapping_in_progress = FALSE; + vm_object_template.volatile_empty = FALSE; + vm_object_template.volatile_fault = FALSE; + vm_object_template.all_reusable = FALSE; + vm_object_template.blocked_access = FALSE; + vm_object_template.__object2_unused_bits = 0; +#if UPL_DEBUG vm_object_template.uplq.prev = NULL; vm_object_template.uplq.next = NULL; #endif /* UPL_DEBUG */ @@ -533,6 +648,10 @@ vm_object_bootstrap(void) vm_object_template.objq.next=NULL; vm_object_template.objq.prev=NULL; + vm_object_template.purgeable_queue_type = PURGEABLE_Q_TYPE_MAX; + vm_object_template.purgeable_queue_group = 0; + + vm_object_template.vo_cache_ts = 0; /* * Initialize the "kernel object" @@ -546,13 +665,17 @@ vm_object_bootstrap(void) */ #ifdef ppc - _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1, - kernel_object); + _vm_object_allocate(vm_last_addr + 1, + kernel_object); #else - _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1, - kernel_object); + _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, + kernel_object); + + _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, + compressor_object); #endif kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; + compressor_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; /* * Initialize the "submap object". Make it as large as the @@ -561,11 +684,11 @@ vm_object_bootstrap(void) vm_submap_object = &vm_submap_object_store; #ifdef ppc - _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1, - vm_submap_object); + _vm_object_allocate(vm_last_addr + 1, + vm_submap_object); #else - _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1, - vm_submap_object); + _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, + vm_submap_object); #endif vm_submap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; @@ -610,21 +733,25 @@ vm_object_init(void) __private_extern__ void vm_object_init_lck_grp(void) { - /* + /* * initialze the vm_object lock world */ - lck_grp_attr_setdefault(&vm_object_lck_grp_attr); + lck_grp_attr_setdefault(&vm_object_lck_grp_attr); lck_grp_init(&vm_object_lck_grp, "vm_object", &vm_object_lck_grp_attr); + lck_grp_init(&vm_object_cache_lck_grp, "vm_object_cache", &vm_object_lck_grp_attr); lck_attr_setdefault(&vm_object_lck_attr); lck_attr_setdefault(&kernel_object_lck_attr); lck_attr_cleardebug(&kernel_object_lck_attr); + lck_attr_setdefault(&compressor_object_lck_attr); + lck_attr_cleardebug(&compressor_object_lck_attr); } - +#if VM_OBJECT_CACHE #define MIGHT_NOT_CACHE_SHADOWS 1 #if MIGHT_NOT_CACHE_SHADOWS static int cache_shadows = TRUE; #endif /* MIGHT_NOT_CACHE_SHADOWS */ +#endif /* * vm_object_deallocate: @@ -644,9 +771,11 @@ __private_extern__ void vm_object_deallocate( register vm_object_t object) { +#if VM_OBJECT_CACHE boolean_t retry_cache_trim = FALSE; - vm_object_t shadow = VM_OBJECT_NULL; uint32_t try_failed_count = 0; +#endif + vm_object_t shadow = VM_OBJECT_NULL; // if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */ // else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */ @@ -654,13 +783,18 @@ vm_object_deallocate( if (object == VM_OBJECT_NULL) return; - if (object == kernel_object) { - vm_object_lock(kernel_object); - kernel_object->ref_count--; - if (kernel_object->ref_count == 0) { - panic("vm_object_deallocate: losing kernel_object\n"); + if (object == kernel_object || object == compressor_object) { + vm_object_lock_shared(object); + + OSAddAtomic(-1, &object->ref_count); + + if (object->ref_count == 0) { + if (object == kernel_object) + panic("vm_object_deallocate: losing kernel_object\n"); + else + panic("vm_object_deallocate: losing compressor_object\n"); } - vm_object_unlock(kernel_object); + vm_object_unlock(object); return; } @@ -702,7 +836,9 @@ vm_object_deallocate( vm_object_unlock(object); if (atomic_swap) { - /* ref_count was updated atomically ! */ + /* + * ref_count was updated atomically ! + */ vm_object_deallocate_shared_successes++; return; } @@ -717,28 +853,8 @@ vm_object_deallocate( while (object != VM_OBJECT_NULL) { - /* - * The cache holds a reference (uncounted) to - * the object; we must lock it before removing - * the object. - */ - for (;;) { - vm_object_cache_lock(); - - /* - * if we try to take a regular lock here - * we risk deadlocking against someone - * holding a lock on this object while - * trying to vm_object_deallocate a different - * object - */ - if (vm_object_lock_try(object)) - break; - vm_object_cache_unlock(); - try_failed_count++; + vm_object_lock(object); - mutex_pause(try_failed_count); /* wait a bit */ - } assert(object->ref_count > 0); /* @@ -753,31 +869,16 @@ vm_object_deallocate( /* more mappers for this object */ if (pager != MEMORY_OBJECT_NULL) { + vm_object_mapping_wait(object, THREAD_UNINT); + vm_object_mapping_begin(object); vm_object_unlock(object); - vm_object_cache_unlock(); - - memory_object_unmap(pager); - - try_failed_count = 0; - for (;;) { - vm_object_cache_lock(); - /* - * if we try to take a regular lock here - * we risk deadlocking against someone - * holding a lock on this object while - * trying to vm_object_deallocate a different - * object - */ - if (vm_object_lock_try(object)) - break; - vm_object_cache_unlock(); - try_failed_count++; + memory_object_last_unmap(pager); - mutex_pause(try_failed_count); /* wait a bit */ - } - assert(object->ref_count > 0); + vm_object_lock(object); + vm_object_mapping_end(object); } + assert(object->ref_count > 0); } /* @@ -796,7 +897,6 @@ vm_object_deallocate( vm_object_lock_assert_exclusive(object); object->ref_count--; vm_object_res_deallocate(object); - vm_object_cache_unlock(); if (object->ref_count == 1 && object->shadow != VM_OBJECT_NULL) { @@ -818,13 +918,14 @@ vm_object_deallocate( */ vm_object_collapse(object, 0, FALSE); } - vm_object_unlock(object); +#if VM_OBJECT_CACHE if (retry_cache_trim && ((object = vm_object_cache_trim(TRUE)) != VM_OBJECT_NULL)) { continue; } +#endif return; } @@ -839,11 +940,12 @@ vm_object_deallocate( VM_OBJECT_EVENT_INITIALIZED, THREAD_UNINT); vm_object_unlock(object); - vm_object_cache_unlock(); + thread_block(THREAD_CONTINUE_NULL); continue; } +#if VM_OBJECT_CACHE /* * If this object can persist, then enter it in * the cache. Otherwise, terminate it. @@ -859,11 +961,12 @@ vm_object_deallocate( * Now it is safe to decrement reference count, * and to return if reference count is > 0. */ + vm_object_lock_assert_exclusive(object); if (--object->ref_count > 0) { vm_object_res_deallocate(object); vm_object_unlock(object); - vm_object_cache_unlock(); + if (retry_cache_trim && ((object = vm_object_cache_trim(TRUE)) != VM_OBJECT_NULL)) { @@ -892,16 +995,38 @@ vm_object_deallocate( VM_OBJ_RES_DECR(object); XPR(XPR_VM_OBJECT, "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n", - (integer_t)object, - (integer_t)vm_object_cached_list.next, - (integer_t)vm_object_cached_list.prev,0,0); + object, + vm_object_cached_list.next, + vm_object_cached_list.prev,0,0); + + + vm_object_unlock(object); + + try_failed_count = 0; + for (;;) { + vm_object_cache_lock(); + + /* + * if we try to take a regular lock here + * we risk deadlocking against someone + * holding a lock on this object while + * trying to vm_object_deallocate a different + * object + */ + if (vm_object_lock_try(object)) + break; + vm_object_cache_unlock(); + try_failed_count++; + mutex_pause(try_failed_count); /* wait a bit */ + } vm_object_cached_count++; if (vm_object_cached_count > vm_object_cached_high) vm_object_cached_high = vm_object_cached_count; queue_enter(&vm_object_cached_list, object, vm_object_t, cached_list); vm_object_cache_unlock(); + vm_object_deactivate_all_pages(object); vm_object_unlock(object); @@ -931,14 +1056,15 @@ vm_object_deallocate( return; } retry_cache_trim = TRUE; - - } else { + } else +#endif /* VM_OBJECT_CACHE */ + { /* * This object is not cachable; terminate it. */ XPR(XPR_VM_OBJECT, "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n", - (integer_t)object, object->resident_page_count, + object, object->resident_page_count, object->paging_in_progress, (void *)current_thread(),object->ref_count); @@ -952,309 +1078,528 @@ vm_object_deallocate( * a normal reference. */ shadow = object->pageout?VM_OBJECT_NULL:object->shadow; - if(vm_object_terminate(object) != KERN_SUCCESS) { + + if (vm_object_terminate(object) != KERN_SUCCESS) { return; } if (shadow != VM_OBJECT_NULL) { object = shadow; continue; } +#if VM_OBJECT_CACHE if (retry_cache_trim && ((object = vm_object_cache_trim(TRUE)) != VM_OBJECT_NULL)) { continue; } +#endif return; } } +#if VM_OBJECT_CACHE assert(! retry_cache_trim); +#endif } -/* - * Check to see whether we really need to trim - * down the cache. If so, remove an object from - * the cache, terminate it, and repeat. - * - * Called with, and returns with, cache lock unlocked. - */ -vm_object_t -vm_object_cache_trim( - boolean_t called_from_vm_object_deallocate) + + +vm_page_t +vm_object_page_grab( + vm_object_t object) { - register vm_object_t object = VM_OBJECT_NULL; - vm_object_t shadow; + vm_page_t p, next_p; + int p_limit = 0; + int p_skipped = 0; - for (;;) { + vm_object_lock_assert_exclusive(object); - /* - * If we no longer need to trim the cache, - * then we are done. - */ + next_p = (vm_page_t)queue_first(&object->memq); + p_limit = MIN(50, object->resident_page_count); - vm_object_cache_lock(); - if (vm_object_cached_count <= vm_object_cached_max) { - vm_object_cache_unlock(); - return VM_OBJECT_NULL; - } + while (!queue_end(&object->memq, (queue_entry_t)next_p) && --p_limit > 0) { - /* - * We must trim down the cache, so remove - * the first object in the cache. - */ - XPR(XPR_VM_OBJECT, - "vm_object_cache_trim: removing from front of cache (%x, %x)\n", - (integer_t)vm_object_cached_list.next, - (integer_t)vm_object_cached_list.prev, 0, 0, 0); + p = next_p; + next_p = (vm_page_t)queue_next(&next_p->listq); - object = (vm_object_t) queue_first(&vm_object_cached_list); - if(object == (vm_object_t) &vm_object_cached_list) { - /* something's wrong with the calling parameter or */ - /* the value of vm_object_cached_count, just fix */ - /* and return */ - if(vm_object_cached_max < 0) - vm_object_cached_max = 0; - vm_object_cached_count = 0; - vm_object_cache_unlock(); - return VM_OBJECT_NULL; - } - vm_object_lock(object); - queue_remove(&vm_object_cached_list, object, vm_object_t, - cached_list); - vm_object_cached_count--; + if (VM_PAGE_WIRED(p) || p->busy || p->cleaning || p->laundry || p->fictitious) + goto move_page_in_obj; - /* - * Since this object is in the cache, we know - * that it is initialized and has no references. - * Take a reference to avoid recursive deallocations. - */ + if (p->pmapped || p->dirty || p->precious) { + vm_page_lockspin_queues(); - assert(object->pager_initialized); - assert(object->ref_count == 0); - vm_object_lock_assert_exclusive(object); - object->ref_count++; + if (p->pmapped) { + int refmod_state; - /* - * Terminate the object. - * If the object had a shadow, we let vm_object_deallocate - * deallocate it. "pageout" objects have a shadow, but - * maintain a "paging reference" rather than a normal - * reference. - * (We are careful here to limit recursion.) - */ - shadow = object->pageout?VM_OBJECT_NULL:object->shadow; - if(vm_object_terminate(object) != KERN_SUCCESS) - continue; - if (shadow != VM_OBJECT_NULL) { - if (called_from_vm_object_deallocate) { - return shadow; - } else { - vm_object_deallocate(shadow); + vm_object_page_grab_pmapped++; + + if (p->reference == FALSE || p->dirty == FALSE) { + + refmod_state = pmap_get_refmod(p->phys_page); + + if (refmod_state & VM_MEM_REFERENCED) + p->reference = TRUE; + if (refmod_state & VM_MEM_MODIFIED) { + SET_PAGE_DIRTY(p, FALSE); + } + } + if (p->dirty == FALSE && p->precious == FALSE) { + + refmod_state = pmap_disconnect(p->phys_page); + + if (refmod_state & VM_MEM_REFERENCED) + p->reference = TRUE; + if (refmod_state & VM_MEM_MODIFIED) { + SET_PAGE_DIRTY(p, FALSE); + } + + if (p->dirty == FALSE) + goto take_page; + } + } + if (p->inactive && p->reference == TRUE) { + vm_page_activate(p); + + VM_STAT_INCR(reactivations); + vm_object_page_grab_reactivations++; } + vm_page_unlock_queues(); +move_page_in_obj: + queue_remove(&object->memq, p, vm_page_t, listq); + queue_enter(&object->memq, p, vm_page_t, listq); + + p_skipped++; + continue; } + vm_page_lockspin_queues(); +take_page: + vm_page_free_prepare_queues(p); + vm_object_page_grab_returned++; + vm_object_page_grab_skipped += p_skipped; + + vm_page_unlock_queues(); + + vm_page_free_prepare_object(p, TRUE); + + return (p); } + vm_object_page_grab_skipped += p_skipped; + vm_object_page_grab_failed++; + + return (NULL); } -#define VM_OBJ_TERM_STATS DEBUG -#if VM_OBJ_TERM_STATS -uint32_t vm_object_terminate_pages_freed = 0; -uint32_t vm_object_terminate_pages_removed = 0; -uint32_t vm_object_terminate_batches = 0; -uint32_t vm_object_terminate_biggest_batch = 0; -#endif /* VM_OBJ_TERM_STATS */ -#define V_O_T_MAX_BATCH 256 -/* - * Routine: vm_object_terminate - * Purpose: - * Free all resources associated with a vm_object. - * In/out conditions: - * Upon entry, the object must be locked, - * and the object must have exactly one reference. - * - * The shadow object reference is left alone. - * - * The object must be unlocked if its found that pages - * must be flushed to a backing object. If someone - * manages to map the object while it is being flushed - * the object is returned unlocked and unchanged. Otherwise, - * upon exit, the cache will be unlocked, and the - * object will cease to exist. - */ -static kern_return_t -vm_object_terminate( - register vm_object_t object) +#define EVICT_PREPARE_LIMIT 64 +#define EVICT_AGE 10 + +static clock_sec_t vm_object_cache_aging_ts = 0; + +static void +vm_object_cache_remove_locked( + vm_object_t object) { - register vm_page_t p; - vm_object_t shadow_object; - vm_page_t local_free_q; - int loop_count; -#if VM_OBJ_TERM_STATS - uint32_t local_free_count; - uint32_t pages_removed; -#endif /* VM_OBJ_TERM_STATS */ + queue_remove(&vm_object_cached_list, object, vm_object_t, objq); + object->objq.next = NULL; + object->objq.prev = NULL; -#if VM_OBJ_TERM_STATS -#define VM_OBJ_TERM_FREELIST_DEBUG(_pages_removed, _local_free_count) \ - MACRO_BEGIN \ - if (_pages_removed) { \ - hw_atomic_add(&vm_object_terminate_batches, 1); \ - hw_atomic_add(&vm_object_terminate_pages_removed, \ - _pages_removed); \ - hw_atomic_add(&vm_object_terminate_pages_freed, \ - _local_free_count); \ - if (_local_free_count > \ - vm_object_terminate_biggest_batch) { \ - vm_object_terminate_biggest_batch = \ - _local_free_count; \ - } \ - _local_free_count = 0; \ - } \ - MACRO_END -#else /* VM_OBJ_TERM_STATS */ -#define VM_OBJ_TERM_FREELIST_DEBUG(_pages_removed, _local_free_count) -#endif /* VM_OBJ_TERM_STATS */ + vm_object_cached_count--; +} -#define VM_OBJ_TERM_FREELIST(_pages_removed, _local_free_count, _local_free_q) \ - MACRO_BEGIN \ - VM_OBJ_TERM_FREELIST_DEBUG(_pages_removed, _local_free_count); \ - if (_local_free_q) { \ - vm_page_free_list(_local_free_q); \ - _local_free_q = VM_PAGE_NULL; \ - } \ - MACRO_END +void +vm_object_cache_remove( + vm_object_t object) +{ + vm_object_cache_lock_spin(); + if (object->objq.next || object->objq.prev) + vm_object_cache_remove_locked(object); + vm_object_cache_unlock(); +} - XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n", - (integer_t)object, object->ref_count, 0, 0, 0); - - local_free_q = VM_PAGE_NULL; -#if VM_OBJ_TERM_STATS - local_free_count = 0; - pages_removed = 0; -#endif /* VM_OBJ_TERM_STATS */ - - if (!object->pageout && (!object->temporary || object->can_persist) - && (object->pager != NULL || object->shadow_severed)) { - vm_object_cache_unlock(); - loop_count = V_O_T_MAX_BATCH; - vm_page_lock_queues(); - while (!queue_empty(&object->memq)) { - if (--loop_count == 0) { - /* - * Free the pages we've reclaimed so far and - * take a little break to avoid hogging - * the page queues lock too long. - */ - VM_OBJ_TERM_FREELIST(pages_removed, - local_free_count, - local_free_q); - mutex_yield(&vm_page_queue_lock); - loop_count = V_O_T_MAX_BATCH; +void +vm_object_cache_add( + vm_object_t object) +{ + clock_sec_t sec; + clock_nsec_t nsec; + + if (object->resident_page_count == 0) + return; + clock_get_system_nanotime(&sec, &nsec); + + vm_object_cache_lock_spin(); + + if (object->objq.next == NULL && object->objq.prev == NULL) { + queue_enter(&vm_object_cached_list, object, vm_object_t, objq); + object->vo_cache_ts = sec + EVICT_AGE; + object->vo_cache_pages_to_scan = object->resident_page_count; + + vm_object_cached_count++; + vm_object_cache_adds++; + } + vm_object_cache_unlock(); +} + +int +vm_object_cache_evict( + int num_to_evict, + int max_objects_to_examine) +{ + vm_object_t object = VM_OBJECT_NULL; + vm_object_t next_obj = VM_OBJECT_NULL; + vm_page_t local_free_q = VM_PAGE_NULL; + vm_page_t p; + vm_page_t next_p; + int object_cnt = 0; + vm_page_t ep_array[EVICT_PREPARE_LIMIT]; + int ep_count; + int ep_limit; + int ep_index; + int ep_freed = 0; + int ep_moved = 0; + uint32_t ep_skipped = 0; + clock_sec_t sec; + clock_nsec_t nsec; + + KERNEL_DEBUG(0x13001ec | DBG_FUNC_START, 0, 0, 0, 0, 0); + /* + * do a couple of quick checks to see if it's + * worthwhile grabbing the lock + */ + if (queue_empty(&vm_object_cached_list)) { + KERNEL_DEBUG(0x13001ec | DBG_FUNC_END, 0, 0, 0, 0, 0); + return (0); + } + clock_get_system_nanotime(&sec, &nsec); + + /* + * the object on the head of the queue has not + * yet sufficiently aged + */ + if (sec < vm_object_cache_aging_ts) { + KERNEL_DEBUG(0x13001ec | DBG_FUNC_END, 0, 0, 0, 0, 0); + return (0); + } + /* + * don't need the queue lock to find + * and lock an object on the cached list + */ + vm_page_unlock_queues(); + + vm_object_cache_lock_spin(); + + for (;;) { + next_obj = (vm_object_t)queue_first(&vm_object_cached_list); + + while (!queue_end(&vm_object_cached_list, (queue_entry_t)next_obj) && object_cnt++ < max_objects_to_examine) { + + object = next_obj; + next_obj = (vm_object_t)queue_next(&next_obj->objq); + + if (sec < object->vo_cache_ts) { + KERNEL_DEBUG(0x130020c, object, object->resident_page_count, object->vo_cache_ts, sec, 0); + + vm_object_cache_aging_ts = object->vo_cache_ts; + object = VM_OBJECT_NULL; + break; + } + if (!vm_object_lock_try_scan(object)) { + /* + * just skip over this guy for now... if we find + * an object to steal pages from, we'll revist in a bit... + * hopefully, the lock will have cleared + */ + KERNEL_DEBUG(0x13001f8, object, object->resident_page_count, 0, 0, 0); + + object = VM_OBJECT_NULL; + continue; + } + if (queue_empty(&object->memq) || object->vo_cache_pages_to_scan == 0) { + /* + * this case really shouldn't happen, but it's not fatal + * so deal with it... if we don't remove the object from + * the list, we'll never move past it. + */ + KERNEL_DEBUG(0x13001fc, object, object->resident_page_count, ep_freed, ep_moved, 0); + + vm_object_cache_remove_locked(object); + vm_object_unlock(object); + object = VM_OBJECT_NULL; + continue; + } + /* + * we have a locked object with pages... + * time to start harvesting + */ + break; } + vm_object_cache_unlock(); + + if (object == VM_OBJECT_NULL) + break; + /* - * Clear pager_trusted bit so that the pages get yanked - * out of the object instead of cleaned in place. This - * prevents a deadlock in XMM and makes more sense anyway. + * object is locked at this point and + * has resident pages */ - object->pager_trusted = FALSE; + next_p = (vm_page_t)queue_first(&object->memq); - p = (vm_page_t) queue_first(&object->memq); + /* + * break the page scan into 2 pieces to minimize the time spent + * behind the page queue lock... + * the list of pages on these unused objects is likely to be cold + * w/r to the cpu cache which increases the time to scan the list + * tenfold... and we may have a 'run' of pages we can't utilize that + * needs to be skipped over... + */ + if ((ep_limit = num_to_evict - (ep_freed + ep_moved)) > EVICT_PREPARE_LIMIT) + ep_limit = EVICT_PREPARE_LIMIT; + ep_count = 0; - VM_PAGE_CHECK(p); + while (!queue_end(&object->memq, (queue_entry_t)next_p) && object->vo_cache_pages_to_scan && ep_count < ep_limit) { - if (p->busy || p->cleaning) { - if(p->cleaning || p->absent) { - /* free the pages reclaimed so far */ - VM_OBJ_TERM_FREELIST(pages_removed, - local_free_count, - local_free_q); - vm_page_unlock_queues(); - vm_object_paging_wait(object, THREAD_UNINT); - vm_page_lock_queues(); + p = next_p; + next_p = (vm_page_t)queue_next(&next_p->listq); + + object->vo_cache_pages_to_scan--; + + if (VM_PAGE_WIRED(p) || p->busy || p->cleaning || p->laundry) { + queue_remove(&object->memq, p, vm_page_t, listq); + queue_enter(&object->memq, p, vm_page_t, listq); + + ep_skipped++; continue; + } + if (p->wpmapped || p->dirty || p->precious) { + queue_remove(&object->memq, p, vm_page_t, listq); + queue_enter(&object->memq, p, vm_page_t, listq); + + pmap_clear_reference(p->phys_page); + } + ep_array[ep_count++] = p; + } + KERNEL_DEBUG(0x13001f4 | DBG_FUNC_START, object, object->resident_page_count, ep_freed, ep_moved, 0); + + vm_page_lockspin_queues(); + + for (ep_index = 0; ep_index < ep_count; ep_index++) { + + p = ep_array[ep_index]; + + if (p->wpmapped || p->dirty || p->precious) { + p->reference = FALSE; + p->no_cache = FALSE; + + /* + * we've already filtered out pages that are in the laundry + * so if we get here, this page can't be on the pageout queue + */ + assert(!p->pageout_queue); + + VM_PAGE_QUEUES_REMOVE(p); + VM_PAGE_ENQUEUE_INACTIVE(p, TRUE); + + ep_moved++; } else { - panic("vm_object_terminate.3 %p %p", object, p); + vm_page_free_prepare_queues(p); + + assert(p->pageq.next == NULL && p->pageq.prev == NULL); + /* + * Add this page to our list of reclaimed pages, + * to be freed later. + */ + p->pageq.next = (queue_entry_t) local_free_q; + local_free_q = p; + + ep_freed++; } } + vm_page_unlock_queues(); - p->busy = TRUE; - VM_PAGE_QUEUES_REMOVE(p); -#if VM_OBJ_TERM_STATS - pages_removed++; -#endif /* VM_OBJ_TERM_STATS */ + KERNEL_DEBUG(0x13001f4 | DBG_FUNC_END, object, object->resident_page_count, ep_freed, ep_moved, 0); - if (p->absent || p->private) { + if (local_free_q) { + vm_page_free_list(local_free_q, TRUE); + local_free_q = VM_PAGE_NULL; + } + if (object->vo_cache_pages_to_scan == 0) { + KERNEL_DEBUG(0x1300208, object, object->resident_page_count, ep_freed, ep_moved, 0); + + vm_object_cache_remove(object); + KERNEL_DEBUG(0x13001fc, object, object->resident_page_count, ep_freed, ep_moved, 0); + } + /* + * done with this object + */ + vm_object_unlock(object); + object = VM_OBJECT_NULL; + + /* + * at this point, we are not holding any locks + */ + if ((ep_freed + ep_moved) >= num_to_evict) { /* - * For private pages, VM_PAGE_FREE just - * leaves the page structure around for - * its owner to clean up. For absent - * pages, the structure is returned to - * the appropriate pool. + * we've reached our target for the + * number of pages to evict */ + break; + } + vm_object_cache_lock_spin(); + } + /* + * put the page queues lock back to the caller's + * idea of it + */ + vm_page_lock_queues(); + + vm_object_cache_pages_freed += ep_freed; + vm_object_cache_pages_moved += ep_moved; + vm_object_cache_pages_skipped += ep_skipped; + + KERNEL_DEBUG(0x13001ec | DBG_FUNC_END, ep_freed, 0, 0, 0, 0); + return (ep_freed); +} + + +#if VM_OBJECT_CACHE +/* + * Check to see whether we really need to trim + * down the cache. If so, remove an object from + * the cache, terminate it, and repeat. + * + * Called with, and returns with, cache lock unlocked. + */ +vm_object_t +vm_object_cache_trim( + boolean_t called_from_vm_object_deallocate) +{ + register vm_object_t object = VM_OBJECT_NULL; + vm_object_t shadow; + + for (;;) { - goto free_page; + /* + * If we no longer need to trim the cache, + * then we are done. + */ + if (vm_object_cached_count <= vm_object_cached_max) + return VM_OBJECT_NULL; + + vm_object_cache_lock(); + if (vm_object_cached_count <= vm_object_cached_max) { + vm_object_cache_unlock(); + return VM_OBJECT_NULL; } - if (p->fictitious) { - if (p->phys_page == vm_page_guard_addr) { - goto free_page; - } - panic("vm_object_terminate.4 %p %p", object, p); + /* + * We must trim down the cache, so remove + * the first object in the cache. + */ + XPR(XPR_VM_OBJECT, + "vm_object_cache_trim: removing from front of cache (%x, %x)\n", + vm_object_cached_list.next, + vm_object_cached_list.prev, 0, 0, 0); + + object = (vm_object_t) queue_first(&vm_object_cached_list); + if(object == (vm_object_t) &vm_object_cached_list) { + /* something's wrong with the calling parameter or */ + /* the value of vm_object_cached_count, just fix */ + /* and return */ + if(vm_object_cached_max < 0) + vm_object_cached_max = 0; + vm_object_cached_count = 0; + vm_object_cache_unlock(); + return VM_OBJECT_NULL; } + vm_object_lock(object); + queue_remove(&vm_object_cached_list, object, vm_object_t, + cached_list); + vm_object_cached_count--; + + vm_object_cache_unlock(); + /* + * Since this object is in the cache, we know + * that it is initialized and has no references. + * Take a reference to avoid recursive deallocations. + */ - if (!p->dirty && p->wpmapped) - p->dirty = pmap_is_modified(p->phys_page); + assert(object->pager_initialized); + assert(object->ref_count == 0); + vm_object_lock_assert_exclusive(object); + object->ref_count++; - if ((p->dirty || p->precious) && !p->error && object->alive) { - /* free the pages reclaimed so far */ - VM_OBJ_TERM_FREELIST(pages_removed, - local_free_count, - local_free_q); - vm_page_unlock_queues(); - vm_pageout_cluster(p); /* flush page */ - vm_object_paging_wait(object, THREAD_UNINT); - XPR(XPR_VM_OBJECT, - "vm_object_terminate restart, object 0x%X ref %d\n", - (integer_t)object, object->ref_count, 0, 0, 0); - vm_page_lock_queues(); - } else { - free_page: - /* - * Add this page to our list of reclaimed pages, - * to be freed later. - */ - vm_page_free_prepare(p); - p->pageq.next = (queue_entry_t) local_free_q; - local_free_q = p; -#if VM_OBJ_TERM_STATS - local_free_count++; -#endif /* VM_OBJ_TERM_STATS */ - } - } - - /* - * Free the remaining reclaimed pages. - */ - VM_OBJ_TERM_FREELIST(pages_removed, - local_free_count, - local_free_q); - vm_page_unlock_queues(); - vm_object_unlock(object); - vm_object_cache_lock(); - vm_object_lock(object); + /* + * Terminate the object. + * If the object had a shadow, we let vm_object_deallocate + * deallocate it. "pageout" objects have a shadow, but + * maintain a "paging reference" rather than a normal + * reference. + * (We are careful here to limit recursion.) + */ + shadow = object->pageout?VM_OBJECT_NULL:object->shadow; + + if(vm_object_terminate(object) != KERN_SUCCESS) + continue; + + if (shadow != VM_OBJECT_NULL) { + if (called_from_vm_object_deallocate) { + return shadow; + } else { + vm_object_deallocate(shadow); + } + } } +} +#endif + +/* + * Routine: vm_object_terminate + * Purpose: + * Free all resources associated with a vm_object. + * In/out conditions: + * Upon entry, the object must be locked, + * and the object must have exactly one reference. + * + * The shadow object reference is left alone. + * + * The object must be unlocked if its found that pages + * must be flushed to a backing object. If someone + * manages to map the object while it is being flushed + * the object is returned unlocked and unchanged. Otherwise, + * upon exit, the cache will be unlocked, and the + * object will cease to exist. + */ +static kern_return_t +vm_object_terminate( + vm_object_t object) +{ + vm_object_t shadow_object; + + XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n", + object, object->ref_count, 0, 0, 0); + + if (!object->pageout && (!object->temporary || object->can_persist) && + (object->pager != NULL || object->shadow_severed)) { + /* + * Clear pager_trusted bit so that the pages get yanked + * out of the object instead of cleaned in place. This + * prevents a deadlock in XMM and makes more sense anyway. + */ + object->pager_trusted = FALSE; + + vm_object_reap_pages(object, REAP_TERMINATE); + } /* * Make sure the object isn't already being terminated */ - if(object->terminating) { + if (object->terminating) { vm_object_lock_assert_exclusive(object); object->ref_count--; assert(object->ref_count > 0); - vm_object_cache_unlock(); vm_object_unlock(object); return KERN_FAILURE; } @@ -1263,12 +1608,11 @@ vm_object_terminate( * Did somebody get a reference to the object while we were * cleaning it? */ - if(object->ref_count != 1) { + if (object->ref_count != 1) { vm_object_lock_assert_exclusive(object); object->ref_count--; assert(object->ref_count > 0); vm_object_res_deallocate(object); - vm_object_cache_unlock(); vm_object_unlock(object); return KERN_FAILURE; } @@ -1279,8 +1623,17 @@ vm_object_terminate( object->terminating = TRUE; object->alive = FALSE; - vm_object_remove(object); + if ( !object->internal && (object->objq.next || object->objq.prev)) + vm_object_cache_remove(object); + + if (object->hashed) { + lck_mtx_t *lck; + + lck = vm_object_hash_lock_spin(object->pager); + vm_object_remove(object); + vm_object_hash_unlock(lck); + } /* * Detach the object from its shadow if we are the shadow's * copy. The reference we hold on the shadow must be dropped @@ -1294,7 +1647,8 @@ vm_object_terminate( vm_object_unlock(shadow_object); } - if (object->paging_in_progress != 0) { + if (object->paging_in_progress != 0 || + object->activity_in_progress != 0) { /* * There are still some paging_in_progress references * on this object, meaning that there are some paging @@ -1317,7 +1671,6 @@ vm_object_terminate( * VM object is "terminating" and not "alive". */ vm_object_reap_async(object); - vm_object_cache_unlock(); vm_object_unlock(object); /* * Return KERN_FAILURE to let the caller know that we @@ -1328,13 +1681,15 @@ vm_object_terminate( */ return KERN_FAILURE; } - - /* complete the VM object termination */ + /* + * complete the VM object termination + */ vm_object_reap(object); object = VM_OBJECT_NULL; - /* cache lock and object lock were released by vm_object_reap() */ /* + * the object lock was released by vm_object_reap() + * * KERN_SUCCESS means that this object has been terminated * and no longer needs its shadow object but still holds a * reference on it. @@ -1345,48 +1700,34 @@ vm_object_terminate( return KERN_SUCCESS; } + /* * vm_object_reap(): * * Complete the termination of a VM object after it's been marked * as "terminating" and "!alive" by vm_object_terminate(). * - * The VM object cache and the VM object must be locked by caller. - * The locks will be released on return and the VM object is no longer valid. + * The VM object must be locked by caller. + * The lock will be released on return and the VM object is no longer valid. */ void vm_object_reap( vm_object_t object) { memory_object_t pager; - vm_page_t p; - vm_page_t local_free_q; - int loop_count; -#if VM_OBJ_TERM_STATS - uint32_t local_free_count; -#endif /* VM_OBJ_TERM_STATS */ -#if DEBUG - mutex_assert(&vm_object_cached_lock_data, MA_OWNED); -#endif /* DEBUG */ vm_object_lock_assert_exclusive(object); assert(object->paging_in_progress == 0); + assert(object->activity_in_progress == 0); vm_object_reap_count++; - local_free_q = VM_PAGE_NULL; -#if VM_OBJ_TERM_STATS - local_free_count = 0; -#endif /* VM_OBJ_TERM_STATS */ - pager = object->pager; object->pager = MEMORY_OBJECT_NULL; if (pager != MEMORY_OBJECT_NULL) memory_object_control_disable(object->pager_control); - vm_object_cache_unlock(); - vm_object_lock_assert_exclusive(object); object->ref_count--; #if TASK_SWAPPER assert(object->res_count == 0); @@ -1394,17 +1735,24 @@ vm_object_reap( assert (object->ref_count == 0); - /* remove from purgeable queue if it's on */ - if (object->objq.next || object->objq.prev) { + /* + * remove from purgeable queue if it's on + */ + if (object->internal && (object->objq.next || object->objq.prev)) { purgeable_q_t queue = vm_purgeable_object_remove(object); assert(queue); - /* Must take page lock for this - using it to protect token queue */ - vm_page_lock_queues(); - vm_purgeable_token_delete_first(queue); + if (object->purgeable_when_ripe) { + /* + * Must take page lock for this - + * using it to protect token queue + */ + vm_page_lock_queues(); + vm_purgeable_token_delete_first(queue); - assert(queue->debug_count_objects>=0); - vm_page_unlock_queues(); + assert(queue->debug_count_objects>=0); + vm_page_unlock_queues(); + } } /* @@ -1417,110 +1765,329 @@ vm_object_reap( vm_pageout_object_terminate(object); - } else if ((object->temporary && !object->can_persist) || - (pager == MEMORY_OBJECT_NULL)) { - loop_count = V_O_T_MAX_BATCH; - vm_page_lock_queues(); - while (!queue_empty(&object->memq)) { - if (--loop_count == 0) { + } else if (((object->temporary && !object->can_persist) || (pager == MEMORY_OBJECT_NULL))) { + + vm_object_reap_pages(object, REAP_REAP); + } + assert(queue_empty(&object->memq)); + assert(object->paging_in_progress == 0); + assert(object->activity_in_progress == 0); + assert(object->ref_count == 0); + + /* + * If the pager has not already been released by + * vm_object_destroy, we need to terminate it and + * release our reference to it here. + */ + if (pager != MEMORY_OBJECT_NULL) { + vm_object_unlock(object); + vm_object_release_pager(pager, object->hashed); + vm_object_lock(object); + } + + /* kick off anyone waiting on terminating */ + object->terminating = FALSE; + vm_object_paging_begin(object); + vm_object_paging_end(object); + vm_object_unlock(object); + +#if MACH_PAGEMAP + vm_external_destroy(object->existence_map, object->vo_size); +#endif /* MACH_PAGEMAP */ + + object->shadow = VM_OBJECT_NULL; + + vm_object_lock_destroy(object); + /* + * Free the space for the object. + */ + zfree(vm_object_zone, object); + object = VM_OBJECT_NULL; +} + + +unsigned int vm_max_batch = 256; + +#define V_O_R_MAX_BATCH 128 + +#define BATCH_LIMIT(max) (vm_max_batch >= max ? max : vm_max_batch) + + +#define VM_OBJ_REAP_FREELIST(_local_free_q, do_disconnect) \ + MACRO_BEGIN \ + if (_local_free_q) { \ + if (do_disconnect) { \ + vm_page_t m; \ + for (m = _local_free_q; \ + m != VM_PAGE_NULL; \ + m = (vm_page_t) m->pageq.next) { \ + if (m->pmapped) { \ + pmap_disconnect(m->phys_page); \ + } \ + } \ + } \ + vm_page_free_list(_local_free_q, TRUE); \ + _local_free_q = VM_PAGE_NULL; \ + } \ + MACRO_END + + +void +vm_object_reap_pages( + vm_object_t object, + int reap_type) +{ + vm_page_t p; + vm_page_t next; + vm_page_t local_free_q = VM_PAGE_NULL; + int loop_count; + boolean_t disconnect_on_release; + pmap_flush_context pmap_flush_context_storage; + + if (reap_type == REAP_DATA_FLUSH) { + /* + * We need to disconnect pages from all pmaps before + * releasing them to the free list + */ + disconnect_on_release = TRUE; + } else { + /* + * Either the caller has already disconnected the pages + * from all pmaps, or we disconnect them here as we add + * them to out local list of pages to be released. + * No need to re-disconnect them when we release the pages + * to the free list. + */ + disconnect_on_release = FALSE; + } + +restart_after_sleep: + if (queue_empty(&object->memq)) + return; + loop_count = BATCH_LIMIT(V_O_R_MAX_BATCH); + + if (reap_type == REAP_PURGEABLE) + pmap_flush_context_init(&pmap_flush_context_storage); + + vm_page_lockspin_queues(); + + next = (vm_page_t)queue_first(&object->memq); + + while (!queue_end(&object->memq, (queue_entry_t)next)) { + + p = next; + next = (vm_page_t)queue_next(&next->listq); + + if (--loop_count == 0) { + + vm_page_unlock_queues(); + + if (local_free_q) { + + if (reap_type == REAP_PURGEABLE) { + pmap_flush(&pmap_flush_context_storage); + pmap_flush_context_init(&pmap_flush_context_storage); + } /* * Free the pages we reclaimed so far * and take a little break to avoid * hogging the page queue lock too long */ - VM_OBJ_TERM_FREELIST(local_free_count, - local_free_count, - local_free_q); - mutex_yield(&vm_page_queue_lock); - loop_count = V_O_T_MAX_BATCH; + VM_OBJ_REAP_FREELIST(local_free_q, + disconnect_on_release); + } else + mutex_pause(0); + + loop_count = BATCH_LIMIT(V_O_R_MAX_BATCH); + + vm_page_lockspin_queues(); + } + if (reap_type == REAP_DATA_FLUSH || reap_type == REAP_TERMINATE) { + + if (p->busy || p->cleaning) { + + vm_page_unlock_queues(); + /* + * free the pages reclaimed so far + */ + VM_OBJ_REAP_FREELIST(local_free_q, + disconnect_on_release); + + PAGE_SLEEP(object, p, THREAD_UNINT); + + goto restart_after_sleep; + } + if (p->laundry) { + p->pageout = FALSE; + + vm_pageout_steal_laundry(p, TRUE); + } + } + switch (reap_type) { + + case REAP_DATA_FLUSH: + if (VM_PAGE_WIRED(p)) { + /* + * this is an odd case... perhaps we should + * zero-fill this page since we're conceptually + * tossing its data at this point, but leaving + * it on the object to honor the 'wire' contract + */ + continue; + } + break; + + case REAP_PURGEABLE: + if (VM_PAGE_WIRED(p)) { + /* + * can't purge a wired page + */ + vm_page_purged_wired++; + continue; + } + if (p->laundry && !p->busy && !p->cleaning) { + p->pageout = FALSE; + + vm_pageout_steal_laundry(p, TRUE); + } + if (p->cleaning || p->laundry) { + /* + * page is being acted upon, + * so don't mess with it + */ + vm_page_purged_others++; + continue; + } + if (p->busy) { + /* + * We can't reclaim a busy page but we can + * make it more likely to be paged (it's not wired) to make + * sure that it gets considered by + * vm_pageout_scan() later. + */ + vm_page_deactivate(p); + vm_page_purged_busy++; + continue; } - p = (vm_page_t) queue_first(&object->memq); - vm_page_free_prepare(p); + assert(p->object != kernel_object); + + /* + * we can discard this page... + */ + if (p->pmapped == TRUE) { + /* + * unmap the page + */ + pmap_disconnect_options(p->phys_page, PMAP_OPTIONS_NOFLUSH | PMAP_OPTIONS_NOREFMOD, (void *)&pmap_flush_context_storage); + } + vm_page_purged_count++; + + break; + + case REAP_TERMINATE: + if (p->absent || p->private) { + /* + * For private pages, VM_PAGE_FREE just + * leaves the page structure around for + * its owner to clean up. For absent + * pages, the structure is returned to + * the appropriate pool. + */ + break; + } + if (p->fictitious) { + assert (p->phys_page == vm_page_guard_addr); + break; + } + if (!p->dirty && p->wpmapped) + p->dirty = pmap_is_modified(p->phys_page); + + if ((p->dirty || p->precious) && !p->error && object->alive) { + + if (!p->laundry) { + VM_PAGE_QUEUES_REMOVE(p); + /* + * flush page... page will be freed + * upon completion of I/O + */ + vm_pageout_cluster(p, TRUE); + } + vm_page_unlock_queues(); + /* + * free the pages reclaimed so far + */ + VM_OBJ_REAP_FREELIST(local_free_q, + disconnect_on_release); + + vm_object_paging_wait(object, THREAD_UNINT); + + goto restart_after_sleep; + } + break; - assert(p->pageq.next == NULL && p->pageq.prev == NULL); - p->pageq.next = (queue_entry_t) local_free_q; - local_free_q = p; -#if VM_OBJ_TERM_STATS - local_free_count++; -#endif /* VM_OBJ_TERM_STATS */ + case REAP_REAP: + break; } + vm_page_free_prepare_queues(p); + assert(p->pageq.next == NULL && p->pageq.prev == NULL); /* - * Free the remaining reclaimed pages + * Add this page to our list of reclaimed pages, + * to be freed later. */ - VM_OBJ_TERM_FREELIST(local_free_count, - local_free_count, - local_free_q); - vm_page_unlock_queues(); - } else if (!queue_empty(&object->memq)) { - panic("vm_object_reap: queue just emptied isn't"); + p->pageq.next = (queue_entry_t) local_free_q; + local_free_q = p; } - - assert(object->paging_in_progress == 0); - assert(object->ref_count == 0); + vm_page_unlock_queues(); /* - * If the pager has not already been released by - * vm_object_destroy, we need to terminate it and - * release our reference to it here. + * Free the remaining reclaimed pages */ - if (pager != MEMORY_OBJECT_NULL) { - vm_object_unlock(object); - vm_object_release_pager(pager); - vm_object_lock(object); - } - - /* kick off anyone waiting on terminating */ - object->terminating = FALSE; - vm_object_paging_begin(object); - vm_object_paging_end(object); - vm_object_unlock(object); - -#if MACH_PAGEMAP - vm_external_destroy(object->existence_map, object->size); -#endif /* MACH_PAGEMAP */ + if (reap_type == REAP_PURGEABLE) + pmap_flush(&pmap_flush_context_storage); - object->shadow = VM_OBJECT_NULL; - - vm_object_lock_destroy(object); - /* - * Free the space for the object. - */ - zfree(vm_object_zone, object); - object = VM_OBJECT_NULL; + VM_OBJ_REAP_FREELIST(local_free_q, + disconnect_on_release); } + void vm_object_reap_async( vm_object_t object) { -#if DEBUG - mutex_assert(&vm_object_cached_lock_data, MA_OWNED); -#endif /* DEBUG */ vm_object_lock_assert_exclusive(object); + vm_object_reaper_lock_spin(); + vm_object_reap_count_async++; /* enqueue the VM object... */ queue_enter(&vm_object_reaper_queue, object, vm_object_t, cached_list); + + vm_object_reaper_unlock(); + /* ... and wake up the reaper thread */ thread_wakeup((event_t) &vm_object_reaper_queue); } + void vm_object_reaper_thread(void) { vm_object_t object, shadow_object; - vm_object_cache_lock(); + vm_object_reaper_lock_spin(); while (!queue_empty(&vm_object_reaper_queue)) { queue_remove_first(&vm_object_reaper_queue, object, vm_object_t, cached_list); + + vm_object_reaper_unlock(); vm_object_lock(object); + assert(object->terminating); assert(!object->alive); @@ -1532,12 +2099,11 @@ vm_object_reaper_thread(void) * ceased before we break the association with the pager * itself. */ - while (object->paging_in_progress != 0) { - vm_object_cache_unlock(); + while (object->paging_in_progress != 0 || + object->activity_in_progress != 0) { vm_object_wait(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS, THREAD_UNINT); - vm_object_cache_lock(); vm_object_lock(object); } @@ -1556,13 +2122,14 @@ vm_object_reaper_thread(void) vm_object_deallocate(shadow_object); shadow_object = VM_OBJECT_NULL; } - - vm_object_cache_lock(); + vm_object_reaper_lock_spin(); } /* wait for more work... */ assert_wait((event_t) &vm_object_reaper_queue, THREAD_UNINT); - vm_object_cache_unlock(); + + vm_object_reaper_unlock(); + thread_block((thread_continue_t) vm_object_reaper_thread); /*NOTREACHED*/ } @@ -1578,16 +2145,18 @@ vm_object_pager_wakeup( { vm_object_hash_entry_t entry; boolean_t waiting = FALSE; + lck_mtx_t *lck; /* * If anyone was waiting for the memory_object_terminate * to be queued, wake them up now. */ - vm_object_cache_lock(); + lck = vm_object_hash_lock_spin(pager); entry = vm_object_hash_lookup(pager, TRUE); if (entry != VM_OBJECT_HASH_ENTRY_NULL) waiting = entry->waiting; - vm_object_cache_unlock(); + vm_object_hash_unlock(lck); + if (entry != VM_OBJECT_HASH_ENTRY_NULL) { if (waiting) thread_wakeup((event_t) pager); @@ -1606,7 +2175,8 @@ vm_object_pager_wakeup( */ static void vm_object_release_pager( - memory_object_t pager) + memory_object_t pager, + boolean_t hashed) { /* @@ -1615,11 +2185,13 @@ vm_object_release_pager( (void) memory_object_terminate(pager); - /* - * Wakeup anyone waiting for this terminate - */ - vm_object_pager_wakeup(pager); - + if (hashed == TRUE) { + /* + * Wakeup anyone waiting for this terminate + * and remove the entry from the hash + */ + vm_object_pager_wakeup(pager); + } /* * Release reference to pager. */ @@ -1652,22 +2224,24 @@ vm_object_destroy( * the destroy call.] */ - vm_object_cache_lock(); vm_object_lock(object); object->can_persist = FALSE; object->named = FALSE; object->alive = FALSE; - /* - * Rip out the pager from the vm_object now... - */ - - vm_object_remove(object); + if (object->hashed) { + lck_mtx_t *lck; + /* + * Rip out the pager from the vm_object now... + */ + lck = vm_object_hash_lock_spin(object->pager); + vm_object_remove(object); + vm_object_hash_unlock(lck); + } old_pager = object->pager; object->pager = MEMORY_OBJECT_NULL; if (old_pager != MEMORY_OBJECT_NULL) memory_object_control_disable(object->pager_control); - vm_object_cache_unlock(); /* * Wait for the existing paging activity (that got @@ -1681,7 +2255,7 @@ vm_object_destroy( * Terminate the object now. */ if (old_pager != MEMORY_OBJECT_NULL) { - vm_object_release_pager(old_pager); + vm_object_release_pager(old_pager, object->hashed); /* * JMM - Release the caller's reference. This assumes the @@ -1696,13 +2270,16 @@ vm_object_destroy( return(KERN_SUCCESS); } + +#if VM_OBJECT_CACHE + #define VM_OBJ_DEACT_ALL_STATS DEBUG #if VM_OBJ_DEACT_ALL_STATS uint32_t vm_object_deactivate_all_pages_batches = 0; uint32_t vm_object_deactivate_all_pages_pages = 0; #endif /* VM_OBJ_DEACT_ALL_STATS */ /* - * vm_object_deactivate_pages + * vm_object_deactivate_all_pages * * Deactivate all pages in the specified object. (Keep its pages * in memory even though it is no longer referenced.) @@ -1720,7 +2297,7 @@ vm_object_deactivate_all_pages( #endif /* VM_OBJ_DEACT_ALL_STATS */ #define V_O_D_A_P_MAX_BATCH 256 - loop_count = V_O_D_A_P_MAX_BATCH; + loop_count = BATCH_LIMIT(V_O_D_A_P_MAX_BATCH); #if VM_OBJ_DEACT_ALL_STATS pages_count = 0; #endif /* VM_OBJ_DEACT_ALL_STATS */ @@ -1734,8 +2311,8 @@ vm_object_deactivate_all_pages( pages_count); pages_count = 0; #endif /* VM_OBJ_DEACT_ALL_STATS */ - mutex_yield(&vm_page_queue_lock); - loop_count = V_O_D_A_P_MAX_BATCH; + lck_mtx_yield(&vm_page_queue_lock); + loop_count = BATCH_LIMIT(V_O_D_A_P_MAX_BATCH); } if (!p->busy && !p->throttled) { #if VM_OBJ_DEACT_ALL_STATS @@ -1754,108 +2331,576 @@ vm_object_deactivate_all_pages( #endif /* VM_OBJ_DEACT_ALL_STATS */ vm_page_unlock_queues(); } +#endif /* VM_OBJECT_CACHE */ + + + +/* + * The "chunk" macros are used by routines below when looking for pages to deactivate. These + * exist because of the need to handle shadow chains. When deactivating pages, we only + * want to deactive the ones at the top most level in the object chain. In order to do + * this efficiently, the specified address range is divided up into "chunks" and we use + * a bit map to keep track of which pages have already been processed as we descend down + * the shadow chain. These chunk macros hide the details of the bit map implementation + * as much as we can. + * + * For convenience, we use a 64-bit data type as the bit map, and therefore a chunk is + * set to 64 pages. The bit map is indexed from the low-order end, so that the lowest + * order bit represents page 0 in the current range and highest order bit represents + * page 63. + * + * For further convenience, we also use negative logic for the page state in the bit map. + * The bit is set to 1 to indicate it has not yet been seen, and to 0 to indicate it has + * been processed. This way we can simply test the 64-bit long word to see if it's zero + * to easily tell if the whole range has been processed. Therefore, the bit map starts + * out with all the bits set. The macros below hide all these details from the caller. + */ + +#define PAGES_IN_A_CHUNK 64 /* The number of pages in the chunk must */ + /* be the same as the number of bits in */ + /* the chunk_state_t type. We use 64 */ + /* just for convenience. */ + +#define CHUNK_SIZE (PAGES_IN_A_CHUNK * PAGE_SIZE_64) /* Size of a chunk in bytes */ + +typedef uint64_t chunk_state_t; + +/* + * The bit map uses negative logic, so we start out with all 64 bits set to indicate + * that no pages have been processed yet. Also, if len is less than the full CHUNK_SIZE, + * then we mark pages beyond the len as having been "processed" so that we don't waste time + * looking at pages in that range. This can save us from unnecessarily chasing down the + * shadow chain. + */ + +#define CHUNK_INIT(c, len) \ + MACRO_BEGIN \ + uint64_t p; \ + \ + (c) = 0xffffffffffffffffLL; \ + \ + for (p = (len) / PAGE_SIZE_64; p < PAGES_IN_A_CHUNK; p++) \ + MARK_PAGE_HANDLED(c, p); \ + MACRO_END + + +/* + * Return true if all pages in the chunk have not yet been processed. + */ + +#define CHUNK_NOT_COMPLETE(c) ((c) != 0) + +/* + * Return true if the page at offset 'p' in the bit map has already been handled + * while processing a higher level object in the shadow chain. + */ + +#define PAGE_ALREADY_HANDLED(c, p) (((c) & (1LL << (p))) == 0) + +/* + * Mark the page at offset 'p' in the bit map as having been processed. + */ + +#define MARK_PAGE_HANDLED(c, p) \ +MACRO_BEGIN \ + (c) = (c) & ~(1LL << (p)); \ +MACRO_END + + +/* + * Return true if the page at the given offset has been paged out. Object is + * locked upon entry and returned locked. + */ + +static boolean_t +page_is_paged_out( + vm_object_t object, + vm_object_offset_t offset) +{ + kern_return_t kr; + memory_object_t pager; + + /* + * Check the existence map for the page if we have one, otherwise + * ask the pager about this page. + */ + +#if MACH_PAGEMAP + if (object->existence_map) { + if (vm_external_state_get(object->existence_map, offset) + == VM_EXTERNAL_STATE_EXISTS) { + /* + * We found the page + */ + + return TRUE; + } + } else +#endif /* MACH_PAGEMAP */ + if (object->internal && + object->alive && + !object->terminating && + object->pager_ready) { + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset) + == VM_EXTERNAL_STATE_EXISTS) { + return TRUE; + } else { + return FALSE; + } + } + + /* + * We're already holding a "paging in progress" reference + * so the object can't disappear when we release the lock. + */ + + assert(object->paging_in_progress); + pager = object->pager; + vm_object_unlock(object); + + kr = memory_object_data_request( + pager, + offset + object->paging_offset, + 0, /* just poke the pager */ + VM_PROT_READ, + NULL); + + vm_object_lock(object); + + if (kr == KERN_SUCCESS) { + + /* + * We found the page + */ + + return TRUE; + } + } + + return FALSE; +} + + + +/* + * madvise_free_debug + * + * To help debug madvise(MADV_FREE*) mis-usage, this triggers a + * zero-fill as soon as a page is affected by a madvise(MADV_FREE*), to + * simulate the loss of the page's contents as if the page had been + * reclaimed and then re-faulted. + */ +#if DEVELOPMENT || DEBUG +int madvise_free_debug = 1; +#else /* DEBUG */ +int madvise_free_debug = 0; +#endif /* DEBUG */ + +/* + * Deactivate the pages in the specified object and range. If kill_page is set, also discard any + * page modified state from the pmap. Update the chunk_state as we go along. The caller must specify + * a size that is less than or equal to the CHUNK_SIZE. + */ + +static void +deactivate_pages_in_object( + vm_object_t object, + vm_object_offset_t offset, + vm_object_size_t size, + boolean_t kill_page, + boolean_t reusable_page, + boolean_t all_reusable, + chunk_state_t *chunk_state, + pmap_flush_context *pfc) +{ + vm_page_t m; + int p; + struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT]; + struct vm_page_delayed_work *dwp; + int dw_count; + int dw_limit; + unsigned int reusable = 0; + + /* + * Examine each page in the chunk. The variable 'p' is the page number relative to the start of the + * chunk. Since this routine is called once for each level in the shadow chain, the chunk_state may + * have pages marked as having been processed already. We stop the loop early if we find we've handled + * all the pages in the chunk. + */ + + dwp = &dw_array[0]; + dw_count = 0; + dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); + + for(p = 0; size && CHUNK_NOT_COMPLETE(*chunk_state); p++, size -= PAGE_SIZE_64, offset += PAGE_SIZE_64) { + + /* + * If this offset has already been found and handled in a higher level object, then don't + * do anything with it in the current shadow object. + */ + + if (PAGE_ALREADY_HANDLED(*chunk_state, p)) + continue; + + /* + * See if the page at this offset is around. First check to see if the page is resident, + * then if not, check the existence map or with the pager. + */ + + if ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { + + /* + * We found a page we were looking for. Mark it as "handled" now in the chunk_state + * so that we won't bother looking for a page at this offset again if there are more + * shadow objects. Then deactivate the page. + */ + + MARK_PAGE_HANDLED(*chunk_state, p); + + if (( !VM_PAGE_WIRED(m)) && (!m->private) && (!m->gobbled) && (!m->busy) && (!m->laundry)) { + int clear_refmod; + + dwp->dw_mask = 0; + + clear_refmod = VM_MEM_REFERENCED; + dwp->dw_mask |= DW_clear_reference; + + if ((kill_page) && (object->internal)) { + if (madvise_free_debug) { + /* + * zero-fill the page now + * to simulate it being + * reclaimed and re-faulted. + */ + pmap_zero_page(m->phys_page); + } + m->precious = FALSE; + m->dirty = FALSE; + + clear_refmod |= VM_MEM_MODIFIED; + if (m->throttled) { + /* + * This page is now clean and + * reclaimable. Move it out + * of the throttled queue, so + * that vm_pageout_scan() can + * find it. + */ + dwp->dw_mask |= DW_move_page; + } +#if MACH_PAGEMAP + vm_external_state_clr(object->existence_map, offset); +#endif /* MACH_PAGEMAP */ + VM_COMPRESSOR_PAGER_STATE_CLR(object, + offset); + + if (reusable_page && !m->reusable) { + assert(!all_reusable); + assert(!object->all_reusable); + m->reusable = TRUE; + object->reusable_page_count++; + assert(object->resident_page_count >= object->reusable_page_count); + reusable++; + } + } + pmap_clear_refmod_options(m->phys_page, clear_refmod, PMAP_OPTIONS_NOFLUSH, (void *)pfc); + + if (!m->throttled && !(reusable_page || all_reusable)) + dwp->dw_mask |= DW_move_page; + + if (dwp->dw_mask) + VM_PAGE_ADD_DELAYED_WORK(dwp, m, + dw_count); + + if (dw_count >= dw_limit) { + if (reusable) { + OSAddAtomic(reusable, + &vm_page_stats_reusable.reusable_count); + vm_page_stats_reusable.reusable += reusable; + reusable = 0; + } + vm_page_do_delayed_work(object, &dw_array[0], dw_count); + + dwp = &dw_array[0]; + dw_count = 0; + } + } + + } else { + + /* + * The page at this offset isn't memory resident, check to see if it's + * been paged out. If so, mark it as handled so we don't bother looking + * for it in the shadow chain. + */ + + if (page_is_paged_out(object, offset)) { + MARK_PAGE_HANDLED(*chunk_state, p); + + /* + * If we're killing a non-resident page, then clear the page in the existence + * map so we don't bother paging it back in if it's touched again in the future. + */ + + if ((kill_page) && (object->internal)) { +#if MACH_PAGEMAP + vm_external_state_clr(object->existence_map, offset); +#endif /* MACH_PAGEMAP */ + VM_COMPRESSOR_PAGER_STATE_CLR(object, + offset); + } + } + } + } + + if (reusable) { + OSAddAtomic(reusable, &vm_page_stats_reusable.reusable_count); + vm_page_stats_reusable.reusable += reusable; + reusable = 0; + } + + if (dw_count) + vm_page_do_delayed_work(object, &dw_array[0], dw_count); +} + + +/* + * Deactive a "chunk" of the given range of the object starting at offset. A "chunk" + * will always be less than or equal to the given size. The total range is divided up + * into chunks for efficiency and performance related to the locks and handling the shadow + * chain. This routine returns how much of the given "size" it actually processed. It's + * up to the caler to loop and keep calling this routine until the entire range they want + * to process has been done. + */ + +static vm_object_size_t +deactivate_a_chunk( + vm_object_t orig_object, + vm_object_offset_t offset, + vm_object_size_t size, + boolean_t kill_page, + boolean_t reusable_page, + boolean_t all_reusable, + pmap_flush_context *pfc) +{ + vm_object_t object; + vm_object_t tmp_object; + vm_object_size_t length; + chunk_state_t chunk_state; + + + /* + * Get set to do a chunk. We'll do up to CHUNK_SIZE, but no more than the + * remaining size the caller asked for. + */ + + length = MIN(size, CHUNK_SIZE); + + /* + * The chunk_state keeps track of which pages we've already processed if there's + * a shadow chain on this object. At this point, we haven't done anything with this + * range of pages yet, so initialize the state to indicate no pages processed yet. + */ + + CHUNK_INIT(chunk_state, length); + object = orig_object; + + /* + * Start at the top level object and iterate around the loop once for each object + * in the shadow chain. We stop processing early if we've already found all the pages + * in the range. Otherwise we stop when we run out of shadow objects. + */ + + while (object && CHUNK_NOT_COMPLETE(chunk_state)) { + vm_object_paging_begin(object); + + deactivate_pages_in_object(object, offset, length, kill_page, reusable_page, all_reusable, &chunk_state, pfc); + + vm_object_paging_end(object); + + /* + * We've finished with this object, see if there's a shadow object. If + * there is, update the offset and lock the new object. We also turn off + * kill_page at this point since we only kill pages in the top most object. + */ + + tmp_object = object->shadow; + + if (tmp_object) { + kill_page = FALSE; + reusable_page = FALSE; + all_reusable = FALSE; + offset += object->vo_shadow_offset; + vm_object_lock(tmp_object); + } + + if (object != orig_object) + vm_object_unlock(object); + + object = tmp_object; + } + + if (object && object != orig_object) + vm_object_unlock(object); + + return length; +} + + + +/* + * Move any resident pages in the specified range to the inactive queue. If kill_page is set, + * we also clear the modified status of the page and "forget" any changes that have been made + * to the page. + */ __private_extern__ void vm_object_deactivate_pages( vm_object_t object, vm_object_offset_t offset, vm_object_size_t size, - boolean_t kill_page) + boolean_t kill_page, + boolean_t reusable_page) { - vm_object_t orig_object; - int pages_moved = 0; - int pages_found = 0; + vm_object_size_t length; + boolean_t all_reusable; + pmap_flush_context pmap_flush_context_storage; /* - * entered with object lock held, acquire a paging reference to - * prevent the memory_object and control ports from - * being destroyed. + * We break the range up into chunks and do one chunk at a time. This is for + * efficiency and performance while handling the shadow chains and the locks. + * The deactivate_a_chunk() function returns how much of the range it processed. + * We keep calling this routine until the given size is exhausted. */ - orig_object = object; - - for (;;) { - register vm_page_t m; - vm_object_offset_t toffset; - vm_object_size_t tsize; - - vm_object_paging_begin(object); - vm_page_lock_queues(); - for (tsize = size, toffset = offset; tsize; tsize -= PAGE_SIZE, toffset += PAGE_SIZE) { - if ((m = vm_page_lookup(object, toffset)) != VM_PAGE_NULL) { + all_reusable = FALSE; + if (reusable_page && + object->internal && + object->vo_size != 0 && + object->vo_size == size && + object->reusable_page_count == 0) { + all_reusable = TRUE; + reusable_page = FALSE; + } - pages_found++; + if ((reusable_page || all_reusable) && object->all_reusable) { + /* This means MADV_FREE_REUSABLE has been called twice, which + * is probably illegal. */ + return; + } - if ((m->wire_count == 0) && (!m->private) && (!m->gobbled) && (!m->busy)) { + pmap_flush_context_init(&pmap_flush_context_storage); - assert(!m->laundry); + while (size) { + length = deactivate_a_chunk(object, offset, size, kill_page, reusable_page, all_reusable, &pmap_flush_context_storage); - m->reference = FALSE; - pmap_clear_reference(m->phys_page); + size -= length; + offset += length; + } + pmap_flush(&pmap_flush_context_storage); + + if (all_reusable) { + if (!object->all_reusable) { + unsigned int reusable; + + object->all_reusable = TRUE; + assert(object->reusable_page_count == 0); + /* update global stats */ + reusable = object->resident_page_count; + OSAddAtomic(reusable, + &vm_page_stats_reusable.reusable_count); + vm_page_stats_reusable.reusable += reusable; + vm_page_stats_reusable.all_reusable_calls++; + } + } else if (reusable_page) { + vm_page_stats_reusable.partial_reusable_calls++; + } +} - if ((kill_page) && (object->internal)) { - m->precious = FALSE; - m->dirty = FALSE; - pmap_clear_modify(m->phys_page); -#if MACH_PAGEMAP - vm_external_state_clr(object->existence_map, offset); -#endif /* MACH_PAGEMAP */ - } +void +vm_object_reuse_pages( + vm_object_t object, + vm_object_offset_t start_offset, + vm_object_offset_t end_offset, + boolean_t allow_partial_reuse) +{ + vm_object_offset_t cur_offset; + vm_page_t m; + unsigned int reused, reusable; - if (!m->throttled) { - VM_PAGE_QUEUES_REMOVE(m); +#define VM_OBJECT_REUSE_PAGE(object, m, reused) \ + MACRO_BEGIN \ + if ((m) != VM_PAGE_NULL && \ + (m)->reusable) { \ + assert((object)->reusable_page_count <= \ + (object)->resident_page_count); \ + assert((object)->reusable_page_count > 0); \ + (object)->reusable_page_count--; \ + (m)->reusable = FALSE; \ + (reused)++; \ + } \ + MACRO_END - assert(!m->laundry); - assert(m->object != kernel_object); - assert(m->pageq.next == NULL && - m->pageq.prev == NULL); - - if(m->zero_fill) { - queue_enter_first( - &vm_page_queue_zf, - m, vm_page_t, pageq); - vm_zf_queue_count++; - } else { - queue_enter_first( - &vm_page_queue_inactive, - m, vm_page_t, pageq); - } + reused = 0; + reusable = 0; - m->inactive = TRUE; - if (!m->fictitious) { - vm_page_inactive_count++; - token_new_pagecount++; - } else { - assert(m->phys_page == vm_page_fictitious_addr); - } + vm_object_lock_assert_exclusive(object); - pages_moved++; - } + if (object->all_reusable) { + assert(object->reusable_page_count == 0); + object->all_reusable = FALSE; + if (end_offset - start_offset == object->vo_size || + !allow_partial_reuse) { + vm_page_stats_reusable.all_reuse_calls++; + reused = object->resident_page_count; + } else { + vm_page_stats_reusable.partial_reuse_calls++; + queue_iterate(&object->memq, m, vm_page_t, listq) { + if (m->offset < start_offset || + m->offset >= end_offset) { + m->reusable = TRUE; + object->reusable_page_count++; + assert(object->resident_page_count >= object->reusable_page_count); + continue; + } else { + assert(!m->reusable); + reused++; } } } - vm_page_unlock_queues(); - vm_object_paging_end(object); - - if (object->shadow) { - vm_object_t tmp_object; - - kill_page = 0; - - offset += object->shadow_offset; - - tmp_object = object->shadow; - vm_object_lock(tmp_object); - - if (object != orig_object) - vm_object_unlock(object); - object = tmp_object; - } else - break; + } else if (object->resident_page_count > + ((end_offset - start_offset) >> PAGE_SHIFT)) { + vm_page_stats_reusable.partial_reuse_calls++; + for (cur_offset = start_offset; + cur_offset < end_offset; + cur_offset += PAGE_SIZE_64) { + if (object->reusable_page_count == 0) { + break; + } + m = vm_page_lookup(object, cur_offset); + VM_OBJECT_REUSE_PAGE(object, m, reused); + } + } else { + vm_page_stats_reusable.partial_reuse_calls++; + queue_iterate(&object->memq, m, vm_page_t, listq) { + if (object->reusable_page_count == 0) { + break; + } + if (m->offset < start_offset || + m->offset >= end_offset) { + continue; + } + VM_OBJECT_REUSE_PAGE(object, m, reused); + } } - if (object != orig_object) - vm_object_unlock(object); + + /* update global stats */ + OSAddAtomic(reusable-reused, &vm_page_stats_reusable.reusable_count); + vm_page_stats_reusable.reused += reused; + vm_page_stats_reusable.reusable += reusable; } /* @@ -1891,8 +2936,25 @@ vm_object_pmap_protect( vm_map_offset_t pmap_start, vm_prot_t prot) { + vm_object_pmap_protect_options(object, offset, size, + pmap, pmap_start, prot, 0); +} + +__private_extern__ void +vm_object_pmap_protect_options( + register vm_object_t object, + register vm_object_offset_t offset, + vm_object_size_t size, + pmap_t pmap, + vm_map_offset_t pmap_start, + vm_prot_t prot, + int options) +{ + pmap_flush_context pmap_flush_context_storage; + boolean_t delayed_pmap_flush = FALSE; + if (object == VM_OBJECT_NULL) - return; + return; size = vm_object_round_page(size); offset = vm_object_trunc_page(offset); @@ -1901,21 +2963,36 @@ vm_object_pmap_protect( if (object->phys_contiguous) { if (pmap != NULL) { vm_object_unlock(object); - pmap_protect(pmap, pmap_start, pmap_start + size, prot); + pmap_protect_options(pmap, + pmap_start, + pmap_start + size, + prot, + options & ~PMAP_OPTIONS_NOFLUSH, + NULL); } else { vm_object_offset_t phys_start, phys_end, phys_addr; - phys_start = object->shadow_offset + offset; + phys_start = object->vo_shadow_offset + offset; phys_end = phys_start + size; assert(phys_start <= phys_end); - assert(phys_end <= object->shadow_offset + object->size); + assert(phys_end <= object->vo_shadow_offset + object->vo_size); vm_object_unlock(object); + pmap_flush_context_init(&pmap_flush_context_storage); + delayed_pmap_flush = FALSE; + for (phys_addr = phys_start; phys_addr < phys_end; phys_addr += PAGE_SIZE_64) { - pmap_page_protect(phys_addr >> PAGE_SHIFT, prot); + pmap_page_protect_options( + (ppnum_t) (phys_addr >> PAGE_SHIFT), + prot, + options | PMAP_OPTIONS_NOFLUSH, + (void *)&pmap_flush_context_storage); + delayed_pmap_flush = TRUE; } + if (delayed_pmap_flush == TRUE) + pmap_flush(&pmap_flush_context_storage); } return; } @@ -1925,38 +3002,49 @@ vm_object_pmap_protect( while (TRUE) { if (ptoa_64(object->resident_page_count) > size/2 && pmap != PMAP_NULL) { vm_object_unlock(object); - pmap_protect(pmap, pmap_start, pmap_start + size, prot); + pmap_protect_options(pmap, pmap_start, pmap_start + size, prot, + options & ~PMAP_OPTIONS_NOFLUSH, NULL); return; } - /* if we are doing large ranges with respect to resident */ - /* page count then we should interate over pages otherwise */ - /* inverse page look-up will be faster */ + pmap_flush_context_init(&pmap_flush_context_storage); + delayed_pmap_flush = FALSE; + + /* + * if we are doing large ranges with respect to resident + * page count then we should interate over pages otherwise + * inverse page look-up will be faster + */ if (ptoa_64(object->resident_page_count / 4) < size) { vm_page_t p; vm_object_offset_t end; end = offset + size; - if (pmap != PMAP_NULL) { - queue_iterate(&object->memq, p, vm_page_t, listq) { - if (!p->fictitious && - (offset <= p->offset) && (p->offset < end)) { - vm_map_offset_t start; - - start = pmap_start + p->offset - offset; - pmap_protect(pmap, start, start + PAGE_SIZE_64, prot); - } - } - } else { - queue_iterate(&object->memq, p, vm_page_t, listq) { - if (!p->fictitious && - (offset <= p->offset) && (p->offset < end)) { - - pmap_page_protect(p->phys_page, prot); - } - } + queue_iterate(&object->memq, p, vm_page_t, listq) { + if (!p->fictitious && (offset <= p->offset) && (p->offset < end)) { + vm_map_offset_t start; + + start = pmap_start + p->offset - offset; + + if (pmap != PMAP_NULL) + pmap_protect_options( + pmap, + start, + start + PAGE_SIZE_64, + prot, + options | PMAP_OPTIONS_NOFLUSH, + &pmap_flush_context_storage); + else + pmap_page_protect_options( + p->phys_page, + prot, + options | PMAP_OPTIONS_NOFLUSH, + &pmap_flush_context_storage); + delayed_pmap_flush = TRUE; + } } + } else { vm_page_t p; vm_object_offset_t end; @@ -1964,29 +3052,36 @@ vm_object_pmap_protect( end = offset + size; - if (pmap != PMAP_NULL) { - for(target_off = offset; - target_off < end; - target_off += PAGE_SIZE) { - p = vm_page_lookup(object, target_off); - if (p != VM_PAGE_NULL) { - vm_offset_t start; - start = pmap_start + - (vm_offset_t)(p->offset - offset); - pmap_protect(pmap, start, - start + PAGE_SIZE, prot); - } - } - } else { - for(target_off = offset; - target_off < end; target_off += PAGE_SIZE) { - p = vm_page_lookup(object, target_off); - if (p != VM_PAGE_NULL) { - pmap_page_protect(p->phys_page, prot); - } + for (target_off = offset; + target_off < end; target_off += PAGE_SIZE) { + + p = vm_page_lookup(object, target_off); + + if (p != VM_PAGE_NULL) { + vm_object_offset_t start; + + start = pmap_start + (p->offset - offset); + + if (pmap != PMAP_NULL) + pmap_protect_options( + pmap, + start, + start + PAGE_SIZE_64, + prot, + options | PMAP_OPTIONS_NOFLUSH, + &pmap_flush_context_storage); + else + pmap_page_protect_options( + p->phys_page, + prot, + options | PMAP_OPTIONS_NOFLUSH, + &pmap_flush_context_storage); + delayed_pmap_flush = TRUE; } } - } + } + if (delayed_pmap_flush == TRUE) + pmap_flush(&pmap_flush_context_storage); if (prot == VM_PROT_NONE) { /* @@ -1997,7 +3092,7 @@ vm_object_pmap_protect( next_object = object->shadow; if (next_object != VM_OBJECT_NULL) { - offset += object->shadow_offset; + offset += object->vo_shadow_offset; vm_object_lock(next_object); vm_object_unlock(object); object = next_object; @@ -2100,6 +3195,11 @@ vm_object_copy_slowly( fault_info.lo_offset = src_offset; fault_info.hi_offset = src_offset + size; fault_info.no_cache = FALSE; + fault_info.stealth = TRUE; + fault_info.io_sync = FALSE; + fault_info.cs_bypass = FALSE; + fault_info.mark_zf_absent = FALSE; + fault_info.batch_pmap_op = FALSE; for ( ; size != 0 ; @@ -2137,103 +3237,114 @@ vm_object_copy_slowly( vm_object_lock(src_object); vm_object_paging_begin(src_object); - fault_info.cluster_size = size; + if (size > (vm_size_t) -1) { + /* 32-bit overflow */ + fault_info.cluster_size = (vm_size_t) (0 - PAGE_SIZE); + } else { + fault_info.cluster_size = (vm_size_t) size; + assert(fault_info.cluster_size == size); + } XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0); + _result_page = VM_PAGE_NULL; result = vm_fault_page(src_object, src_offset, VM_PROT_READ, FALSE, + FALSE, /* page not looked up */ &prot, &_result_page, &top_page, (int *)0, &error_code, FALSE, FALSE, &fault_info); switch(result) { - case VM_FAULT_SUCCESS: - result_page = _result_page; + case VM_FAULT_SUCCESS: + result_page = _result_page; - /* - * We don't need to hold the object - * lock -- the busy page will be enough. - * [We don't care about picking up any - * new modifications.] - * - * Copy the page to the new object. - * - * POLICY DECISION: - * If result_page is clean, - * we could steal it instead - * of copying. - */ + /* + * Copy the page to the new object. + * + * POLICY DECISION: + * If result_page is clean, + * we could steal it instead + * of copying. + */ - vm_object_unlock(result_page->object); - vm_page_copy(result_page, new_page); + vm_page_copy(result_page, new_page); + vm_object_unlock(result_page->object); - /* - * Let go of both pages (make them - * not busy, perform wakeup, activate). - */ - vm_object_lock(new_object); - new_page->dirty = TRUE; - PAGE_WAKEUP_DONE(new_page); - vm_object_unlock(new_object); - - vm_object_lock(result_page->object); - PAGE_WAKEUP_DONE(result_page); - - vm_page_lockspin_queues(); - if (!result_page->active && - !result_page->inactive && - !result_page->throttled) - vm_page_activate(result_page); - vm_page_activate(new_page); - vm_page_unlock_queues(); + /* + * Let go of both pages (make them + * not busy, perform wakeup, activate). + */ + vm_object_lock(new_object); + SET_PAGE_DIRTY(new_page, FALSE); + PAGE_WAKEUP_DONE(new_page); + vm_object_unlock(new_object); - /* - * Release paging references and - * top-level placeholder page, if any. - */ + vm_object_lock(result_page->object); + PAGE_WAKEUP_DONE(result_page); - vm_fault_cleanup(result_page->object, - top_page); + vm_page_lockspin_queues(); + if (!result_page->active && + !result_page->inactive && + !result_page->throttled) + vm_page_activate(result_page); + vm_page_activate(new_page); + vm_page_unlock_queues(); - break; + /* + * Release paging references and + * top-level placeholder page, if any. + */ + + vm_fault_cleanup(result_page->object, + top_page); + + break; - case VM_FAULT_RETRY: - break; + case VM_FAULT_RETRY: + break; - case VM_FAULT_FICTITIOUS_SHORTAGE: - vm_page_more_fictitious(); + case VM_FAULT_MEMORY_SHORTAGE: + if (vm_page_wait(interruptible)) break; + /* fall thru */ - case VM_FAULT_MEMORY_SHORTAGE: - if (vm_page_wait(interruptible)) - break; - /* fall thru */ + case VM_FAULT_INTERRUPTED: + vm_object_lock(new_object); + VM_PAGE_FREE(new_page); + vm_object_unlock(new_object); + + vm_object_deallocate(new_object); + vm_object_deallocate(src_object); + *_result_object = VM_OBJECT_NULL; + return(MACH_SEND_INTERRUPTED); - case VM_FAULT_INTERRUPTED: - vm_page_free(new_page); - vm_object_deallocate(new_object); - vm_object_deallocate(src_object); - *_result_object = VM_OBJECT_NULL; - return(MACH_SEND_INTERRUPTED); + case VM_FAULT_SUCCESS_NO_VM_PAGE: + /* success but no VM page: fail */ + vm_object_paging_end(src_object); + vm_object_unlock(src_object); + /*FALLTHROUGH*/ + case VM_FAULT_MEMORY_ERROR: + /* + * A policy choice: + * (a) ignore pages that we can't + * copy + * (b) return the null object if + * any page fails [chosen] + */ - case VM_FAULT_MEMORY_ERROR: - /* - * A policy choice: - * (a) ignore pages that we can't - * copy - * (b) return the null object if - * any page fails [chosen] - */ + vm_object_lock(new_object); + VM_PAGE_FREE(new_page); + vm_object_unlock(new_object); - vm_page_lock_queues(); - vm_page_free(new_page); - vm_page_unlock_queues(); + vm_object_deallocate(new_object); + vm_object_deallocate(src_object); + *_result_object = VM_OBJECT_NULL; + return(error_code ? error_code: + KERN_MEMORY_ERROR); - vm_object_deallocate(new_object); - vm_object_deallocate(src_object); - *_result_object = VM_OBJECT_NULL; - return(error_code ? error_code: - KERN_MEMORY_ERROR); + default: + panic("vm_object_copy_slowly: unexpected error" + " 0x%x from vm_fault_page()\n", result); } } while (result != VM_FAULT_SUCCESS); } @@ -2421,8 +3532,8 @@ Retry: vm_object_lock(src_object); goto Retry; } - if (copy->size < src_offset+size) - copy->size = src_offset+size; + if (copy->vo_size < src_offset+size) + copy->vo_size = src_offset+size; if (!copy->pager_ready) check_ready = TRUE; @@ -2476,6 +3587,8 @@ vm_object_copy_delayed( vm_object_t old_copy; vm_page_t p; vm_object_size_t copy_size = src_offset + size; + pmap_flush_context pmap_flush_context_storage; + boolean_t delayed_pmap_flush = FALSE; int collisions = 0; @@ -2523,12 +3636,14 @@ vm_object_copy_delayed( /* * Wait for paging in progress. */ - if (!src_object->true_share && src_object->paging_in_progress) { + if (!src_object->true_share && + (src_object->paging_in_progress != 0 || + src_object->activity_in_progress != 0)) { if (src_object_shared == TRUE) { vm_object_unlock(src_object); - vm_object_lock(src_object); src_object_shared = FALSE; + goto Retry; } vm_object_paging_wait(src_object, THREAD_UNINT); } @@ -2586,7 +3701,7 @@ vm_object_copy_delayed( * needed). */ - if (old_copy->size < copy_size) { + if (old_copy->vo_size < copy_size) { if (src_object_shared == TRUE) { vm_object_unlock(old_copy); vm_object_unlock(src_object); @@ -2604,11 +3719,14 @@ vm_object_copy_delayed( */ copy_delayed_protect_iterate++; + pmap_flush_context_init(&pmap_flush_context_storage); + delayed_pmap_flush = FALSE; + queue_iterate(&src_object->memq, p, vm_page_t, listq) { if (!p->fictitious && - p->offset >= old_copy->size && + p->offset >= old_copy->vo_size && p->offset < copy_size) { - if (p->wire_count > 0) { + if (VM_PAGE_WIRED(p)) { vm_object_unlock(old_copy); vm_object_unlock(src_object); @@ -2616,15 +3734,21 @@ vm_object_copy_delayed( vm_object_unlock(new_copy); vm_object_deallocate(new_copy); } + if (delayed_pmap_flush == TRUE) + pmap_flush(&pmap_flush_context_storage); return VM_OBJECT_NULL; } else { - pmap_page_protect(p->phys_page, - (VM_PROT_ALL & ~VM_PROT_WRITE)); + pmap_page_protect_options(p->phys_page, (VM_PROT_ALL & ~VM_PROT_WRITE), + PMAP_OPTIONS_NOFLUSH, (void *)&pmap_flush_context_storage); + delayed_pmap_flush = TRUE; } } } - old_copy->size = copy_size; + if (delayed_pmap_flush == TRUE) + pmap_flush(&pmap_flush_context_storage); + + old_copy->vo_size = copy_size; } if (src_object_shared == TRUE) vm_object_reference_shared(old_copy); @@ -2647,8 +3771,8 @@ vm_object_copy_delayed( * copy object will be large enough to back either the * old copy object or the new mapping. */ - if (old_copy->size > copy_size) - copy_size = old_copy->size; + if (old_copy->vo_size > copy_size) + copy_size = old_copy->vo_size; if (new_copy == VM_OBJECT_NULL) { vm_object_unlock(old_copy); @@ -2660,7 +3784,7 @@ vm_object_copy_delayed( src_object_shared = FALSE; goto Retry; } - new_copy->size = copy_size; + new_copy->vo_size = copy_size; /* * The copy-object is always made large enough to @@ -2670,7 +3794,7 @@ vm_object_copy_delayed( */ assert((old_copy->shadow == src_object) && - (old_copy->shadow_offset == (vm_object_offset_t) 0)); + (old_copy->vo_shadow_offset == (vm_object_offset_t) 0)); } else if (new_copy == VM_OBJECT_NULL) { vm_object_unlock(src_object); @@ -2695,21 +3819,32 @@ vm_object_copy_delayed( */ copy_delayed_protect_iterate++; + pmap_flush_context_init(&pmap_flush_context_storage); + delayed_pmap_flush = FALSE; + queue_iterate(&src_object->memq, p, vm_page_t, listq) { if (!p->fictitious && p->offset < copy_size) { - if (p->wire_count > 0) { + if (VM_PAGE_WIRED(p)) { if (old_copy) vm_object_unlock(old_copy); vm_object_unlock(src_object); vm_object_unlock(new_copy); vm_object_deallocate(new_copy); + + if (delayed_pmap_flush == TRUE) + pmap_flush(&pmap_flush_context_storage); + return VM_OBJECT_NULL; } else { - pmap_page_protect(p->phys_page, - (VM_PROT_ALL & ~VM_PROT_WRITE)); + pmap_page_protect_options(p->phys_page, (VM_PROT_ALL & ~VM_PROT_WRITE), + PMAP_OPTIONS_NOFLUSH, (void *)&pmap_flush_context_storage); + delayed_pmap_flush = TRUE; } } } + if (delayed_pmap_flush == TRUE) + pmap_flush(&pmap_flush_context_storage); + if (old_copy != VM_OBJECT_NULL) { /* * Make the old copy-object shadow the new one. @@ -2742,7 +3877,7 @@ vm_object_copy_delayed( */ vm_object_lock_assert_exclusive(new_copy); new_copy->shadow = src_object; - new_copy->shadow_offset = 0; + new_copy->vo_shadow_offset = 0; new_copy->shadowed = TRUE; /* caller must set needs_copy */ vm_object_lock_assert_exclusive(src_object); @@ -2753,7 +3888,7 @@ vm_object_copy_delayed( XPR(XPR_VM_OBJECT, "vm_object_copy_delayed: used copy object %X for source %X\n", - (integer_t)new_copy, (integer_t)src_object, 0, 0, 0); + new_copy, src_object, 0, 0, 0); return new_copy; } @@ -2852,7 +3987,7 @@ vm_object_copy_strategically( break; case MEMORY_OBJECT_COPY_SYMMETRIC: - XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n",(natural_t)src_object, src_offset, size, 0, 0); + XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n", src_object, src_offset, size, 0, 0); vm_object_unlock(src_object); result = KERN_MEMORY_RESTART_COPY; break; @@ -2874,7 +4009,7 @@ vm_object_copy_strategically( * The new object and offset into that object * are returned in the source parameters. */ -boolean_t vm_object_shadow_check = FALSE; +boolean_t vm_object_shadow_check = TRUE; __private_extern__ boolean_t vm_object_shadow( @@ -2886,6 +4021,10 @@ vm_object_shadow( register vm_object_t result; source = *object; + assert(source != VM_OBJECT_NULL); + if (source == VM_OBJECT_NULL) + return FALSE; + #if 0 /* * XXX FBDP @@ -2905,11 +4044,19 @@ vm_object_shadow( /* * Determine if we really need a shadow. + * + * If the source object is larger than what we are trying + * to create, then force the shadow creation even if the + * ref count is 1. This will allow us to [potentially] + * collapse the underlying object away in the future + * (freeing up the extra data it might contain and that + * we don't need). */ - - if (vm_object_shadow_check && source->ref_count == 1 && + if (vm_object_shadow_check && + source->vo_size == length && + source->ref_count == 1 && (source->shadow == VM_OBJECT_NULL || - source->shadow->copy == VM_OBJECT_NULL)) + source->shadow->copy == VM_OBJECT_NULL) ) { source->shadowed = FALSE; return FALSE; @@ -2936,7 +4083,7 @@ vm_object_shadow( * and fix up the offset into the new object. */ - result->shadow_offset = *offset; + result->vo_shadow_offset = *offset; /* * Return the new things @@ -3006,9 +4153,9 @@ vm_object_shadow( * [Furthermore, each routine must cope with the simultaneous * or previous operations of the others.] * - * In addition to the lock on the object, the vm_object_cache_lock + * In addition to the lock on the object, the vm_object_hash_lock * governs the associations. References gained through the - * association require use of the cache lock. + * association require use of the hash lock. * * Because the pager field may be cleared spontaneously, it * cannot be used to determine whether a memory object has @@ -3047,6 +4194,7 @@ vm_object_enter( boolean_t must_init; vm_object_hash_entry_t entry, new_entry; uint32_t try_failed_count = 0; + lck_mtx_t *lck; if (pager == MEMORY_OBJECT_NULL) return(vm_object_allocate(size)); @@ -3059,7 +4207,7 @@ vm_object_enter( * Look for an object associated with this port. */ Retry: - vm_object_cache_lock(); + lck = vm_object_hash_lock_spin(pager); do { entry = vm_object_hash_lookup(pager, FALSE); @@ -3069,19 +4217,18 @@ Retry: * We must unlock to create a new object; * if we do so, we must try the lookup again. */ - vm_object_cache_unlock(); + vm_object_hash_unlock(lck); assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL); new_entry = vm_object_hash_entry_alloc(pager); new_object = vm_object_allocate(size); - vm_object_cache_lock(); + lck = vm_object_hash_lock_spin(pager); } else { /* * Lookup failed twice, and we have something * to insert; set the object. */ - vm_object_hash_insert(new_entry); + vm_object_hash_insert(new_entry, new_object); entry = new_entry; - entry->object = new_object; new_entry = VM_OBJECT_HASH_ENTRY_NULL; new_object = VM_OBJECT_NULL; must_init = TRUE; @@ -3095,9 +4242,10 @@ Retry: entry->waiting = TRUE; entry = VM_OBJECT_HASH_ENTRY_NULL; assert_wait((event_t) pager, THREAD_UNINT); - vm_object_cache_unlock(); + vm_object_hash_unlock(lck); + thread_block(THREAD_CONTINUE_NULL); - vm_object_cache_lock(); + lck = vm_object_hash_lock_spin(pager); } } while (entry == VM_OBJECT_HASH_ENTRY_NULL); @@ -3105,46 +4253,60 @@ Retry: assert(object != VM_OBJECT_NULL); if (!must_init) { - if (!vm_object_lock_try(object)) { + if ( !vm_object_lock_try(object)) { - vm_object_cache_unlock(); + vm_object_hash_unlock(lck); try_failed_count++; mutex_pause(try_failed_count); /* wait a bit */ - goto Retry; } assert(!internal || object->internal); - if (named) { - assert(!object->named); - object->named = TRUE; - } +#if VM_OBJECT_CACHE if (object->ref_count == 0) { + if ( !vm_object_cache_lock_try()) { + + vm_object_hash_unlock(lck); + vm_object_unlock(object); + + try_failed_count++; + mutex_pause(try_failed_count); /* wait a bit */ + goto Retry; + } XPR(XPR_VM_OBJECT_CACHE, - "vm_object_enter: removing %x from cache, head (%x, %x)\n", - (integer_t)object, - (integer_t)vm_object_cached_list.next, - (integer_t)vm_object_cached_list.prev, 0,0); + "vm_object_enter: removing %x from cache, head (%x, %x)\n", + object, + vm_object_cached_list.next, + vm_object_cached_list.prev, 0,0); queue_remove(&vm_object_cached_list, object, vm_object_t, cached_list); vm_object_cached_count--; + + vm_object_cache_unlock(); + } +#endif + if (named) { + assert(!object->named); + object->named = TRUE; } vm_object_lock_assert_exclusive(object); object->ref_count++; vm_object_res_reference(object); + + vm_object_hash_unlock(lck); vm_object_unlock(object); VM_STAT_INCR(hits); - } + } else + vm_object_hash_unlock(lck); + assert(object->ref_count > 0); VM_STAT_INCR(lookups); - vm_object_cache_unlock(); - XPR(XPR_VM_OBJECT, "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n", - (integer_t)pager, (integer_t)object, must_init, 0, 0); + pager, object, must_init, 0, 0); /* * If we raced to create a vm_object but lost, let's @@ -3228,7 +4390,7 @@ Retry: XPR(XPR_VM_OBJECT, "vm_object_enter: vm_object %x, memory_object %x, internal %d\n", - (integer_t)object, (integer_t)object->pager, internal, 0,0); + object, object->pager, internal, 0,0); return(object); } @@ -3247,24 +4409,133 @@ Retry: */ void -vm_object_pager_create( +vm_object_pager_create( + register vm_object_t object) +{ + memory_object_t pager; + vm_object_hash_entry_t entry; + lck_mtx_t *lck; +#if MACH_PAGEMAP + vm_object_size_t size; + vm_external_map_t map; +#endif /* MACH_PAGEMAP */ + + XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n", + object, 0,0,0,0); + + assert(object != kernel_object); + + if (memory_manager_default_check() != KERN_SUCCESS) + return; + + /* + * Prevent collapse or termination by holding a paging reference + */ + + vm_object_paging_begin(object); + if (object->pager_created) { + /* + * Someone else got to it first... + * wait for them to finish initializing the ports + */ + while (!object->pager_initialized) { + vm_object_sleep(object, + VM_OBJECT_EVENT_INITIALIZED, + THREAD_UNINT); + } + vm_object_paging_end(object); + return; + } + + /* + * Indicate that a memory object has been assigned + * before dropping the lock, to prevent a race. + */ + + object->pager_created = TRUE; + object->paging_offset = 0; + +#if MACH_PAGEMAP + size = object->vo_size; +#endif /* MACH_PAGEMAP */ + vm_object_unlock(object); + +#if MACH_PAGEMAP + if (DEFAULT_PAGER_IS_ACTIVE) { + map = vm_external_create(size); + vm_object_lock(object); + assert(object->vo_size == size); + object->existence_map = map; + vm_object_unlock(object); + } +#endif /* MACH_PAGEMAP */ + + if ((uint32_t) object->vo_size != object->vo_size) { + panic("vm_object_pager_create(): object size 0x%llx >= 4GB\n", + (uint64_t) object->vo_size); + } + + /* + * Create the [internal] pager, and associate it with this object. + * + * We make the association here so that vm_object_enter() + * can look up the object to complete initializing it. No + * user will ever map this object. + */ + { + memory_object_default_t dmm; + + /* acquire a reference for the default memory manager */ + dmm = memory_manager_default_reference(); + + assert(object->temporary); + + /* create our new memory object */ + assert((vm_size_t) object->vo_size == object->vo_size); + (void) memory_object_create(dmm, (vm_size_t) object->vo_size, + &pager); + + memory_object_default_deallocate(dmm); + } + + entry = vm_object_hash_entry_alloc(pager); + + lck = vm_object_hash_lock_spin(pager); + vm_object_hash_insert(entry, object); + vm_object_hash_unlock(lck); + + /* + * A reference was returned by + * memory_object_create(), and it is + * copied by vm_object_enter(). + */ + + if (vm_object_enter(pager, object->vo_size, TRUE, TRUE, FALSE) != object) + panic("vm_object_pager_create: mismatch"); + + /* + * Drop the reference we were passed. + */ + memory_object_deallocate(pager); + + vm_object_lock(object); + + /* + * Release the paging reference + */ + vm_object_paging_end(object); +} + +void +vm_object_compressor_pager_create( register vm_object_t object) { memory_object_t pager; vm_object_hash_entry_t entry; -#if MACH_PAGEMAP - vm_object_size_t size; - vm_external_map_t map; -#endif /* MACH_PAGEMAP */ - - XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n", - (integer_t)object, 0,0,0,0); + lck_mtx_t *lck; assert(object != kernel_object); - if (memory_manager_default_check() != KERN_SUCCESS) - return; - /* * Prevent collapse or termination by holding a paging reference */ @@ -3292,18 +4563,16 @@ vm_object_pager_create( object->pager_created = TRUE; object->paging_offset = 0; -#if MACH_PAGEMAP - size = object->size; -#endif /* MACH_PAGEMAP */ vm_object_unlock(object); -#if MACH_PAGEMAP - map = vm_external_create(size); - vm_object_lock(object); - assert(object->size == size); - object->existence_map = map; - vm_object_unlock(object); -#endif /* MACH_PAGEMAP */ + if ((uint32_t) (object->vo_size/PAGE_SIZE) != + (object->vo_size/PAGE_SIZE)) { + panic("vm_object_compressor_pager_create(%p): " + "object size 0x%llx >= 0x%llx\n", + object, + (uint64_t) object->vo_size, + 0x0FFFFFFFFULL*PAGE_SIZE); + } /* * Create the [internal] pager, and associate it with this object. @@ -3313,26 +4582,26 @@ vm_object_pager_create( * user will ever map this object. */ { - memory_object_default_t dmm; - - /* acquire a reference for the default memory manager */ - dmm = memory_manager_default_reference(); - assert(object->temporary); /* create our new memory object */ - (void) memory_object_create(dmm, object->size, &pager); - - memory_object_default_deallocate(dmm); + assert((uint32_t) (object->vo_size/PAGE_SIZE) == + (object->vo_size/PAGE_SIZE)); + (void) compressor_memory_object_create( + (memory_object_size_t) object->vo_size, + &pager); + if (pager == NULL) { + panic("vm_object_compressor_pager_create(): " + "no pager for object %p size 0x%llx\n", + object, (uint64_t) object->vo_size); + } } entry = vm_object_hash_entry_alloc(pager); - vm_object_cache_lock(); - vm_object_hash_insert(entry); - - entry->object = object; - vm_object_cache_unlock(); + lck = vm_object_hash_lock_spin(pager); + vm_object_hash_insert(entry, object); + vm_object_hash_unlock(lck); /* * A reference was returned by @@ -3340,8 +4609,8 @@ vm_object_pager_create( * copied by vm_object_enter(). */ - if (vm_object_enter(pager, object->size, TRUE, TRUE, FALSE) != object) - panic("vm_object_pager_create: mismatch"); + if (vm_object_enter(pager, object->vo_size, TRUE, TRUE, FALSE) != object) + panic("vm_object_compressor_pager_create: mismatch"); /* * Drop the reference we were passed. @@ -3419,8 +4688,11 @@ vm_object_do_collapse( vm_object_offset_t new_offset, backing_offset; vm_object_size_t size; - backing_offset = object->shadow_offset; - size = object->size; + vm_object_lock_assert_exclusive(object); + vm_object_lock_assert_exclusive(backing_object); + + backing_offset = object->vo_shadow_offset; + size = object->vo_size; /* * Move all in-memory pages from backing_object @@ -3511,6 +4783,14 @@ vm_object_do_collapse( if (backing_object->pager != MEMORY_OBJECT_NULL) { vm_object_hash_entry_t entry; +#if 00 + if (COMPRESSED_PAGER_IS_ACTIVE) { + panic("vm_object_do_collapse(%p,%p): " + "backing_object has a compressor pager", + object, backing_object); + } +#endif + /* * Move the pager from backing_object to object. * @@ -3520,10 +4800,20 @@ vm_object_do_collapse( */ assert(!object->paging_in_progress); + assert(!object->activity_in_progress); object->pager = backing_object->pager; - entry = vm_object_hash_lookup(object->pager, FALSE); - assert(entry != VM_OBJECT_HASH_ENTRY_NULL); - entry->object = object; + + if (backing_object->hashed) { + lck_mtx_t *lck; + + lck = vm_object_hash_lock_spin(backing_object->pager); + entry = vm_object_hash_lookup(object->pager, FALSE); + assert(entry != VM_OBJECT_HASH_ENTRY_NULL); + entry->object = object; + vm_object_hash_unlock(lck); + + object->hashed = TRUE; + } object->pager_created = backing_object->pager_created; object->pager_control = backing_object->pager_control; object->pager_ready = backing_object->pager_ready; @@ -3536,8 +4826,6 @@ vm_object_do_collapse( } } - vm_object_cache_unlock(); - #if MACH_PAGEMAP /* * If the shadow offset is 0, the use the existence map from @@ -3552,10 +4840,10 @@ vm_object_do_collapse( * this code should be fixed to salvage the map. */ assert(object->existence_map == VM_EXTERNAL_NULL); - if (backing_offset || (size != backing_object->size)) { + if (backing_offset || (size != backing_object->vo_size)) { vm_external_discarded++; vm_external_destroy(backing_object->existence_map, - backing_object->size); + backing_object->vo_size); } else { vm_external_collapsed++; @@ -3574,10 +4862,10 @@ vm_object_do_collapse( assert(!backing_object->phys_contiguous); object->shadow = backing_object->shadow; if (object->shadow) { - object->shadow_offset += backing_object->shadow_offset; + object->vo_shadow_offset += backing_object->vo_shadow_offset; } else { /* no shadow, therefore no shadow offset... */ - object->shadow_offset = 0; + object->vo_shadow_offset = 0; } assert((object->shadow == VM_OBJECT_NULL) || (object->shadow->copy != backing_object)); @@ -3592,13 +4880,14 @@ vm_object_do_collapse( assert((backing_object->ref_count == 1) && (backing_object->resident_page_count == 0) && - (backing_object->paging_in_progress == 0)); + (backing_object->paging_in_progress == 0) && + (backing_object->activity_in_progress == 0)); backing_object->alive = FALSE; vm_object_unlock(backing_object); XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n", - (integer_t)backing_object, 0,0,0,0); + backing_object, 0,0,0,0); vm_object_lock_destroy(backing_object); @@ -3617,6 +4906,7 @@ vm_object_do_bypass( * in the chain. */ + vm_object_lock_assert_exclusive(object); vm_object_lock_assert_exclusive(backing_object); #if TASK_SWAPPER @@ -3643,10 +4933,10 @@ vm_object_do_bypass( assert(!backing_object->phys_contiguous); object->shadow = backing_object->shadow; if (object->shadow) { - object->shadow_offset += backing_object->shadow_offset; + object->vo_shadow_offset += backing_object->vo_shadow_offset; } else { /* no shadow, therefore no shadow offset... */ - object->shadow_offset = 0; + object->vo_shadow_offset = 0; } /* @@ -3663,7 +4953,7 @@ vm_object_do_bypass( * Since its ref_count was at least 2, it * will not vanish; so we don't need to call * vm_object_deallocate. - * [FBDP: that doesn't seem to be true any more] + * [with a caveat for "named" objects] * * The res_count on the backing object is * conditionally decremented. It's possible @@ -3681,7 +4971,8 @@ vm_object_do_bypass( * is temporary and cachable. #endif */ - if (backing_object->ref_count > 1) { + if (backing_object->ref_count > 2 || + (!backing_object->named && backing_object->ref_count > 1)) { vm_object_lock_assert_exclusive(backing_object); backing_object->ref_count--; #if TASK_SWAPPER @@ -3703,7 +4994,19 @@ vm_object_do_bypass( vm_object_res_reference(backing_object); } #endif /* TASK_SWAPPER */ + /* + * vm_object_collapse (the caller of this function) is + * now called from contexts that may not guarantee that a + * valid reference is held on the object... w/o a valid + * reference, it is unsafe and unwise (you will definitely + * regret it) to unlock the object and then retake the lock + * since the object may be terminated and recycled in between. + * The "activity_in_progress" reference will keep the object + * 'stable'. + */ + vm_object_activity_begin(object); vm_object_unlock(object); + vm_object_unlock(backing_object); vm_object_deallocate(backing_object); @@ -3715,6 +5018,7 @@ vm_object_do_bypass( */ vm_object_lock(object); + vm_object_activity_end(object); } object_bypasses++; @@ -3735,7 +5039,7 @@ static unsigned long vm_object_collapse_calls = 0; static unsigned long vm_object_collapse_objects = 0; static unsigned long vm_object_collapse_do_collapse = 0; static unsigned long vm_object_collapse_do_bypass = 0; -static unsigned long vm_object_collapse_delays = 0; + __private_extern__ void vm_object_collapse( register vm_object_t object, @@ -3746,6 +5050,8 @@ vm_object_collapse( register unsigned int rcount; register unsigned int size; vm_object_t original_object; + int object_lock_type; + int backing_object_lock_type; vm_object_collapse_calls++; @@ -3755,13 +5061,26 @@ vm_object_collapse( } XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n", - (integer_t)object, 0,0,0,0); + object, 0,0,0,0); if (object == VM_OBJECT_NULL) return; original_object = object; + /* + * The top object was locked "exclusive" by the caller. + * In the first pass, to determine if we can collapse the shadow chain, + * take a "shared" lock on the shadow objects. If we can collapse, + * we'll have to go down the chain again with exclusive locks. + */ + object_lock_type = OBJECT_LOCK_EXCLUSIVE; + backing_object_lock_type = OBJECT_LOCK_SHARED; + +retry: + object = original_object; + vm_object_lock_assert_exclusive(object); + while (TRUE) { vm_object_collapse_objects++; /* @@ -3780,23 +5099,27 @@ vm_object_collapse( } return; } - + if (backing_object_lock_type == OBJECT_LOCK_SHARED) { + vm_object_lock_shared(backing_object); + } else { + vm_object_lock(backing_object); + } + /* * No pages in the object are currently * being paged out, and */ - if (object->paging_in_progress != 0) { + if (object->paging_in_progress != 0 || + object->activity_in_progress != 0) { /* try and collapse the rest of the shadow chain */ - vm_object_lock(backing_object); if (object != original_object) { vm_object_unlock(object); } object = backing_object; + object_lock_type = backing_object_lock_type; continue; } - vm_object_lock(backing_object); - /* * ... * The backing object is not read_only, @@ -3807,12 +5130,14 @@ vm_object_collapse( */ if (!backing_object->internal || - backing_object->paging_in_progress != 0) { + backing_object->paging_in_progress != 0 || + backing_object->activity_in_progress != 0) { /* try and collapse the rest of the shadow chain */ if (object != original_object) { vm_object_unlock(object); } object = backing_object; + object_lock_type = backing_object_lock_type; continue; } @@ -3833,6 +5158,7 @@ vm_object_collapse( vm_object_unlock(object); } object = backing_object; + object_lock_type = backing_object_lock_type; continue; } @@ -3859,29 +5185,41 @@ vm_object_collapse( if (backing_object->ref_count == 1 && (!object->pager_created #if !MACH_PAGEMAP - || !backing_object->pager_created + || (!backing_object->pager_created) #endif /*!MACH_PAGEMAP */ ) && vm_object_collapse_allowed) { - XPR(XPR_VM_OBJECT, - "vm_object_collapse: %x to %x, pager %x, pager_control %x\n", - (integer_t)backing_object, (integer_t)object, - (integer_t)backing_object->pager, - (integer_t)backing_object->pager_control, 0); - /* - * We need the cache lock for collapsing, - * but we must not deadlock. + * We need the exclusive lock on the VM objects. */ - - if (! vm_object_cache_lock_try()) { - if (object != original_object) { - vm_object_unlock(object); - } + if (backing_object_lock_type != OBJECT_LOCK_EXCLUSIVE) { + /* + * We have an object and its shadow locked + * "shared". We can't just upgrade the locks + * to "exclusive", as some other thread might + * also have these objects locked "shared" and + * attempt to upgrade one or the other to + * "exclusive". The upgrades would block + * forever waiting for the other "shared" locks + * to get released. + * So we have to release the locks and go + * down the shadow chain again (since it could + * have changed) with "exclusive" locking. + */ vm_object_unlock(backing_object); - return; + if (object != original_object) + vm_object_unlock(object); + object_lock_type = OBJECT_LOCK_EXCLUSIVE; + backing_object_lock_type = OBJECT_LOCK_EXCLUSIVE; + goto retry; } + XPR(XPR_VM_OBJECT, + "vm_object_collapse: %x to %x, pager %x, pager_control %x\n", + backing_object, object, + backing_object->pager, + backing_object->pager_control, 0); + /* * Collapse the object with its backing * object, and try again with the object's @@ -3904,6 +5242,7 @@ vm_object_collapse( vm_object_unlock(object); } object = backing_object; + object_lock_type = backing_object_lock_type; continue; } @@ -3913,13 +5252,13 @@ vm_object_collapse( * we have to make sure no pages in the backing object * "show through" before bypassing it. */ - size = atop(object->size); + size = (unsigned int)atop(object->vo_size); rcount = object->resident_page_count; + if (rcount != size) { vm_object_offset_t offset; vm_object_offset_t backing_offset; unsigned int backing_rcount; - unsigned int lookups = 0; /* * If the backing object has a pager but no pagemap, @@ -3928,7 +5267,7 @@ vm_object_collapse( */ if (backing_object->pager_created #if MACH_PAGEMAP - && (backing_object->existence_map == VM_EXTERNAL_NULL) + && (backing_object->existence_map == VM_EXTERNAL_NULL) #endif /* MACH_PAGEMAP */ ) { /* try and collapse the rest of the shadow chain */ @@ -3936,6 +5275,7 @@ vm_object_collapse( vm_object_unlock(object); } object = backing_object; + object_lock_type = backing_object_lock_type; continue; } @@ -3946,7 +5286,7 @@ vm_object_collapse( */ if (object->pager_created #if MACH_PAGEMAP - && (object->existence_map == VM_EXTERNAL_NULL) + && (object->existence_map == VM_EXTERNAL_NULL) #endif /* MACH_PAGEMAP */ ) { /* try and collapse the rest of the shadow chain */ @@ -3954,6 +5294,25 @@ vm_object_collapse( vm_object_unlock(object); } object = backing_object; + object_lock_type = backing_object_lock_type; + continue; + } + + backing_offset = object->vo_shadow_offset; + backing_rcount = backing_object->resident_page_count; + + if ( (int)backing_rcount - (int)(atop(backing_object->vo_size) - size) > (int)rcount) { + /* + * we have enough pages in the backing object to guarantee that + * at least 1 of them must be 'uncovered' by a resident page + * in the object we're evaluating, so move on and + * try to collapse the rest of the shadow chain + */ + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + object_lock_type = backing_object_lock_type; continue; } @@ -3970,17 +5329,19 @@ vm_object_collapse( * */ - backing_offset = object->shadow_offset; - backing_rcount = backing_object->resident_page_count; - #if MACH_PAGEMAP #define EXISTS_IN_OBJECT(obj, off, rc) \ - (vm_external_state_get((obj)->existence_map, \ - (vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS || \ - ((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--)) -#else -#define EXISTS_IN_OBJECT(obj, off, rc) \ - (((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--)) + ((vm_external_state_get((obj)->existence_map, \ + (vm_offset_t)(off)) \ + == VM_EXTERNAL_STATE_EXISTS) || \ + (VM_COMPRESSOR_PAGER_STATE_GET((obj), (off)) \ + == VM_EXTERNAL_STATE_EXISTS) || \ + ((rc) && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--)) +#else /* MACH_PAGEMAP */ +#define EXISTS_IN_OBJECT(obj, off, rc) \ + ((VM_COMPRESSOR_PAGER_STATE_GET((obj), (off)) \ + == VM_EXTERNAL_STATE_EXISTS) || \ + ((rc) && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--)) #endif /* MACH_PAGEMAP */ /* @@ -3997,12 +5358,13 @@ vm_object_collapse( backing_offset, backing_rcount) && !EXISTS_IN_OBJECT(object, hint_offset, rcount)) { /* dependency right at the hint */ - object->cow_hint = (vm_offset_t)hint_offset; + object->cow_hint = (vm_offset_t) hint_offset; /* atomic */ /* try and collapse the rest of the shadow chain */ if (object != original_object) { vm_object_unlock(object); } object = backing_object; + object_lock_type = backing_object_lock_type; continue; } @@ -4012,41 +5374,29 @@ vm_object_collapse( * pages in the backing object, it makes sense to * walk the backing_object's resident pages first. * - * NOTE: Pages may be in both the existence map and - * resident. So, we can't permanently decrement - * the rcount here because the second loop may - * find the same pages in the backing object' - * existence map that we found here and we would - * double-decrement the rcount. We also may or - * may not have found the + * NOTE: Pages may be in both the existence map and/or + * resident, so if we don't find a dependency while + * walking the backing object's resident page list + * directly, and there is an existence map, we'll have + * to run the offset based 2nd pass. Because we may + * have to run both passes, we need to be careful + * not to decrement 'rcount' in the 1st pass */ - if (backing_rcount && -#if MACH_PAGEMAP - size > ((backing_object->existence_map) ? - backing_rcount : (backing_rcount >> 1)) -#else - size > (backing_rcount >> 1) -#endif /* MACH_PAGEMAP */ - ) { + if (backing_rcount && backing_rcount < (size / 8)) { unsigned int rc = rcount; vm_page_t p; backing_rcount = backing_object->resident_page_count; p = (vm_page_t)queue_first(&backing_object->memq); do { - /* Until we get more than one lookup lock */ - if (lookups > 256) { - vm_object_collapse_delays++; - lookups = 0; - mutex_pause(0); - } - offset = (p->offset - backing_offset); - if (offset < object->size && + + if (offset < object->vo_size && offset != hint_offset && !EXISTS_IN_OBJECT(object, offset, rc)) { /* found a dependency */ - object->cow_hint = (vm_offset_t)offset; + object->cow_hint = (vm_offset_t) offset; /* atomic */ + break; } p = (vm_page_t) queue_next(&p->listq); @@ -4058,6 +5408,7 @@ vm_object_collapse( vm_object_unlock(object); } object = backing_object; + object_lock_type = backing_object_lock_type; continue; } } @@ -4066,29 +5417,22 @@ vm_object_collapse( * Walk through the offsets looking for pages in the * backing object that show through to the object. */ -#if MACH_PAGEMAP - if (backing_rcount || backing_object->existence_map) { -#else - if (backing_rcount) { + if (backing_rcount +#if MACH_PAGEMAP + || backing_object->existence_map #endif /* MACH_PAGEMAP */ + ) { offset = hint_offset; while((offset = - (offset + PAGE_SIZE_64 < object->size) ? + (offset + PAGE_SIZE_64 < object->vo_size) ? (offset + PAGE_SIZE_64) : 0) != hint_offset) { - /* Until we get more than one lookup lock */ - if (lookups > 256) { - vm_object_collapse_delays++; - lookups = 0; - mutex_pause(0); - } - if (EXISTS_IN_OBJECT(backing_object, offset + backing_offset, backing_rcount) && !EXISTS_IN_OBJECT(object, offset, rcount)) { /* found a dependency */ - object->cow_hint = (vm_offset_t)offset; + object->cow_hint = (vm_offset_t) offset; /* atomic */ break; } } @@ -4098,11 +5442,24 @@ vm_object_collapse( vm_object_unlock(object); } object = backing_object; + object_lock_type = backing_object_lock_type; continue; } } } + /* + * We need "exclusive" locks on the 2 VM objects. + */ + if (backing_object_lock_type != OBJECT_LOCK_EXCLUSIVE) { + vm_object_unlock(backing_object); + if (object != original_object) + vm_object_unlock(object); + object_lock_type = OBJECT_LOCK_EXCLUSIVE; + backing_object_lock_type = OBJECT_LOCK_EXCLUSIVE; + goto retry; + } + /* reset the offset hint for any objects deeper in the chain */ object->cow_hint = (vm_offset_t)0; @@ -4161,7 +5518,7 @@ vm_object_page_remove( for (; start < end; start += PAGE_SIZE_64) { p = vm_page_lookup(object, start); if (p != VM_PAGE_NULL) { - assert(!p->cleaning && !p->pageout); + assert(!p->cleaning && !p->pageout && !p->laundry); if (!p->fictitious && p->pmapped) pmap_disconnect(p->phys_page); VM_PAGE_FREE(p); @@ -4174,7 +5531,7 @@ vm_object_page_remove( while (!queue_end(&object->memq, (queue_entry_t) p)) { next = (vm_page_t) queue_next(&p->listq); if ((start <= p->offset) && (p->offset < end)) { - assert(!p->cleaning && !p->pageout); + assert(!p->cleaning && !p->pageout && !p->laundry); if (!p->fictitious && p->pmapped) pmap_disconnect(p->phys_page); VM_PAGE_FREE(p); @@ -4235,7 +5592,7 @@ vm_object_coalesce( XPR(XPR_VM_OBJECT, "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n", - (integer_t)prev_object, prev_offset, prev_size, next_size, 0); + prev_object, prev_offset, prev_size, next_size, 0); vm_object_lock(prev_object); @@ -4261,7 +5618,8 @@ vm_object_coalesce( (prev_object->copy != VM_OBJECT_NULL) || (prev_object->true_share != FALSE) || (prev_object->purgable != VM_PURGABLE_DENY) || - (prev_object->paging_in_progress != 0)) { + (prev_object->paging_in_progress != 0) || + (prev_object->activity_in_progress != 0)) { vm_object_unlock(prev_object); return(FALSE); } @@ -4280,7 +5638,7 @@ vm_object_coalesce( * Extend the object if necessary. */ newsize = prev_offset + prev_size + next_size; - if (newsize > prev_object->size) { + if (newsize > prev_object->vo_size) { #if MACH_PAGEMAP /* * We cannot extend an object that has existence info, @@ -4293,7 +5651,7 @@ vm_object_coalesce( */ assert(prev_object->existence_map == VM_EXTERNAL_NULL); #endif /* MACH_PAGEMAP */ - prev_object->size = newsize; + prev_object->vo_size = newsize; } vm_object_unlock(prev_object); @@ -4316,7 +5674,7 @@ vm_object_page_map( vm_object_offset_t offset), void *map_fn_data) /* private to map_fn */ { - int num_pages; + int64_t num_pages; int i; vm_page_t m; vm_page_t old_page; @@ -4335,14 +5693,15 @@ vm_object_page_map( if ((old_page = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { - vm_page_lock_queues(); - vm_page_free(old_page); - vm_page_unlock_queues(); + VM_PAGE_FREE(old_page); } - vm_page_init(m, addr); - /* private normally requires lock_queues but since we */ - /* are initializing the page, its not necessary here */ + assert((ppnum_t) addr == addr); + vm_page_init(m, (ppnum_t) addr, FALSE); + /* + * private normally requires lock_queues but since we + * are initializing the page, its not necessary here + */ m->private = TRUE; /* don`t free page */ m->wire_count = 1; vm_page_insert(m, object, offset); @@ -4352,329 +5711,6 @@ vm_object_page_map( } } -#include - -#if MACH_KDB -#include -#include - -#define printf kdbprintf - -extern boolean_t vm_object_cached( - vm_object_t object); - -extern void print_bitstring( - char byte); - -boolean_t vm_object_print_pages = FALSE; - -void -print_bitstring( - char byte) -{ - printf("%c%c%c%c%c%c%c%c", - ((byte & (1 << 0)) ? '1' : '0'), - ((byte & (1 << 1)) ? '1' : '0'), - ((byte & (1 << 2)) ? '1' : '0'), - ((byte & (1 << 3)) ? '1' : '0'), - ((byte & (1 << 4)) ? '1' : '0'), - ((byte & (1 << 5)) ? '1' : '0'), - ((byte & (1 << 6)) ? '1' : '0'), - ((byte & (1 << 7)) ? '1' : '0')); -} - -boolean_t -vm_object_cached( - register vm_object_t object) -{ - register vm_object_t o; - - queue_iterate(&vm_object_cached_list, o, vm_object_t, cached_list) { - if (object == o) { - return TRUE; - } - } - return FALSE; -} - -#if MACH_PAGEMAP -/* - * vm_external_print: [ debug ] - */ -void -vm_external_print( - vm_external_map_t emap, - vm_size_t size) -{ - if (emap == VM_EXTERNAL_NULL) { - printf("0 "); - } else { - vm_size_t existence_size = stob(size); - printf("{ size=%d, map=[", existence_size); - if (existence_size > 0) { - print_bitstring(emap[0]); - } - if (existence_size > 1) { - print_bitstring(emap[1]); - } - if (existence_size > 2) { - printf("..."); - print_bitstring(emap[existence_size-1]); - } - printf("] }\n"); - } - return; -} -#endif /* MACH_PAGEMAP */ - -int -vm_follow_object( - vm_object_t object) -{ - int count = 0; - int orig_db_indent = db_indent; - - while (TRUE) { - if (object == VM_OBJECT_NULL) { - db_indent = orig_db_indent; - return count; - } - - count += 1; - - iprintf("object 0x%x", object); - printf(", shadow=0x%x", object->shadow); - printf(", copy=0x%x", object->copy); - printf(", pager=0x%x", object->pager); - printf(", ref=%d\n", object->ref_count); - - db_indent += 2; - object = object->shadow; - } - -} - -/* - * vm_object_print: [ debug ] - */ -void -vm_object_print(db_expr_t db_addr, __unused boolean_t have_addr, - __unused db_expr_t arg_count, __unused char *modif) -{ - vm_object_t object; - register vm_page_t p; - const char *s; - - register int count; - - object = (vm_object_t) (long) db_addr; - if (object == VM_OBJECT_NULL) - return; - - iprintf("object 0x%x\n", object); - - db_indent += 2; - - iprintf("size=0x%x", object->size); - printf(", memq_hint=%p", object->memq_hint); - printf(", ref_count=%d\n", object->ref_count); - iprintf(""); -#if TASK_SWAPPER - printf("res_count=%d, ", object->res_count); -#endif /* TASK_SWAPPER */ - printf("resident_page_count=%d\n", object->resident_page_count); - - iprintf("shadow=0x%x", object->shadow); - if (object->shadow) { - register int i = 0; - vm_object_t shadow = object; - while((shadow = shadow->shadow)) - i++; - printf(" (depth %d)", i); - } - printf(", copy=0x%x", object->copy); - printf(", shadow_offset=0x%x", object->shadow_offset); - printf(", last_alloc=0x%x\n", object->last_alloc); - - iprintf("pager=0x%x", object->pager); - printf(", paging_offset=0x%x", object->paging_offset); - printf(", pager_control=0x%x\n", object->pager_control); - - iprintf("copy_strategy=%d[", object->copy_strategy); - switch (object->copy_strategy) { - case MEMORY_OBJECT_COPY_NONE: - printf("copy_none"); - break; - - case MEMORY_OBJECT_COPY_CALL: - printf("copy_call"); - break; - - case MEMORY_OBJECT_COPY_DELAY: - printf("copy_delay"); - break; - - case MEMORY_OBJECT_COPY_SYMMETRIC: - printf("copy_symmetric"); - break; - - case MEMORY_OBJECT_COPY_INVALID: - printf("copy_invalid"); - break; - - default: - printf("?"); - } - printf("]"); - - iprintf("all_wanted=0x%x<", object->all_wanted); - s = ""; - if (vm_object_wanted(object, VM_OBJECT_EVENT_INITIALIZED)) { - printf("%sinit", s); - s = ","; - } - if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGER_READY)) { - printf("%sready", s); - s = ","; - } - if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS)) { - printf("%spaging", s); - s = ","; - } - if (vm_object_wanted(object, VM_OBJECT_EVENT_LOCK_IN_PROGRESS)) { - printf("%slock", s); - s = ","; - } - if (vm_object_wanted(object, VM_OBJECT_EVENT_UNCACHING)) { - printf("%suncaching", s); - s = ","; - } - if (vm_object_wanted(object, VM_OBJECT_EVENT_COPY_CALL)) { - printf("%scopy_call", s); - s = ","; - } - if (vm_object_wanted(object, VM_OBJECT_EVENT_CACHING)) { - printf("%scaching", s); - s = ","; - } - printf(">"); - printf(", paging_in_progress=%d\n", object->paging_in_progress); - - iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n", - (object->pager_created ? "" : "!"), - (object->pager_initialized ? "" : "!"), - (object->pager_ready ? "" : "!"), - (object->can_persist ? "" : "!"), - (object->pager_trusted ? "" : "!"), - (object->pageout ? "" : "!"), - (object->internal ? "internal" : "external"), - (object->temporary ? "temporary" : "permanent")); - iprintf("%salive, %spurgeable, %spurgeable_volatile, %spurgeable_empty, %sshadowed, %scached, %sprivate\n", - (object->alive ? "" : "!"), - ((object->purgable != VM_PURGABLE_DENY) ? "" : "!"), - ((object->purgable == VM_PURGABLE_VOLATILE) ? "" : "!"), - ((object->purgable == VM_PURGABLE_EMPTY) ? "" : "!"), - (object->shadowed ? "" : "!"), - (vm_object_cached(object) ? "" : "!"), - (object->private ? "" : "!")); - iprintf("%sadvisory_pageout, %ssilent_overwrite\n", - (object->advisory_pageout ? "" : "!"), - (object->silent_overwrite ? "" : "!")); - -#if MACH_PAGEMAP - iprintf("existence_map="); - vm_external_print(object->existence_map, object->size); -#endif /* MACH_PAGEMAP */ -#if MACH_ASSERT - iprintf("paging_object=0x%x\n", object->paging_object); -#endif /* MACH_ASSERT */ - - if (vm_object_print_pages) { - count = 0; - p = (vm_page_t) queue_first(&object->memq); - while (!queue_end(&object->memq, (queue_entry_t) p)) { - if (count == 0) { - iprintf("memory:="); - } else if (count == 2) { - printf("\n"); - iprintf(" ..."); - count = 0; - } else { - printf(","); - } - count++; - - printf("(off=0x%llX,page=%p)", p->offset, p); - p = (vm_page_t) queue_next(&p->listq); - } - if (count != 0) { - printf("\n"); - } - } - db_indent -= 2; -} - - -/* - * vm_object_find [ debug ] - * - * Find all tasks which reference the given vm_object. - */ - -boolean_t vm_object_find(vm_object_t object); -boolean_t vm_object_print_verbose = FALSE; - -boolean_t -vm_object_find( - vm_object_t object) -{ - task_t task; - vm_map_t map; - vm_map_entry_t entry; - boolean_t found = FALSE; - - queue_iterate(&tasks, task, task_t, tasks) { - map = task->map; - for (entry = vm_map_first_entry(map); - entry && entry != vm_map_to_entry(map); - entry = entry->vme_next) { - - vm_object_t obj; - - /* - * For the time being skip submaps, - * only the kernel can have submaps, - * and unless we are interested in - * kernel objects, we can simply skip - * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm - * for a full solution. - */ - if (entry->is_sub_map) - continue; - if (entry) - obj = entry->object.vm_object; - else - continue; - - while (obj != VM_OBJECT_NULL) { - if (obj == object) { - if (!found) { - printf("TASK\t\tMAP\t\tENTRY\n"); - found = TRUE; - } - printf("0x%x\t0x%x\t0x%x\n", - task, map, entry); - } - obj = obj->shadow; - } - } - } - - return(found); -} - -#endif /* MACH_KDB */ - kern_return_t vm_object_populate_with_private( vm_object_t object, @@ -4686,40 +5722,45 @@ vm_object_populate_with_private( vm_object_offset_t base_offset; - if(!object->private) + if (!object->private) return KERN_FAILURE; base_page = phys_page; vm_object_lock(object); - if(!object->phys_contiguous) { + + if (!object->phys_contiguous) { vm_page_t m; - if((base_offset = trunc_page_64(offset)) != offset) { + + if ((base_offset = trunc_page_64(offset)) != offset) { vm_object_unlock(object); return KERN_FAILURE; } base_offset += object->paging_offset; - while(size) { + + while (size) { m = vm_page_lookup(object, base_offset); - if(m != VM_PAGE_NULL) { - if(m->fictitious) { - if (m->phys_page != - vm_page_guard_addr) { + + if (m != VM_PAGE_NULL) { + if (m->fictitious) { + if (m->phys_page != vm_page_guard_addr) { + vm_page_lockspin_queues(); - m->fictitious = FALSE; m->private = TRUE; - m->phys_page = base_page; - if(!m->busy) { - m->busy = TRUE; - } - if(!m->absent) { - m->absent = TRUE; - } - m->list_req_pending = TRUE; vm_page_unlock_queues(); + + m->fictitious = FALSE; + m->phys_page = base_page; } } else if (m->phys_page != base_page) { - if (m->pmapped) { + + if ( !m->private) { + /* + * we'd leak a real page... that can't be right + */ + panic("vm_object_populate_with_private - %p not private", m); + } + if (m->pmapped) { /* * pmap call to clear old mapping */ @@ -4727,30 +5768,27 @@ vm_object_populate_with_private( } m->phys_page = base_page; } - - /* - * ENCRYPTED SWAP: - * We're not pointing to the same - * physical page any longer and the - * contents of the new one are not - * supposed to be encrypted. - * XXX What happens to the original - * physical page. Is it lost ? - */ - m->encrypted = FALSE; + if (m->encrypted) { + /* + * we should never see this on a ficticious or private page + */ + panic("vm_object_populate_with_private - %p encrypted", m); + } } else { - while ((m = vm_page_grab_fictitious()) - == VM_PAGE_NULL) + while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL) vm_page_more_fictitious(); - vm_page_lockspin_queues(); - m->fictitious = FALSE; + + /* + * private normally requires lock_queues but since we + * are initializing the page, its not necessary here + */ m->private = TRUE; + m->fictitious = FALSE; m->phys_page = base_page; - m->list_req_pending = TRUE; - m->absent = TRUE; m->unusual = TRUE; - vm_page_unlock_queues(); + m->busy = FALSE; + vm_page_insert(m, object, base_offset); } base_page++; /* Go to the next physical page */ @@ -4766,10 +5804,11 @@ vm_object_populate_with_private( /* shadows on contiguous memory are not allowed */ /* we therefore can use the offset field */ - object->shadow_offset = (vm_object_offset_t)phys_page << PAGE_SHIFT; - object->size = size; + object->vo_shadow_offset = (vm_object_offset_t)phys_page << PAGE_SHIFT; + object->vo_size = size; } vm_object_unlock(object); + return KERN_SUCCESS; } @@ -4788,10 +5827,10 @@ vm_object_populate_with_private( __private_extern__ kern_return_t memory_object_free_from_cache( __unused host_t host, - memory_object_pager_ops_t pager_ops, + __unused memory_object_pager_ops_t pager_ops, int *count) { - +#if VM_OBJECT_CACHE int object_released = 0; register vm_object_t object = VM_OBJECT_NULL; @@ -4814,6 +5853,7 @@ memory_object_free_from_cache( vm_object_t, cached_list); vm_object_cached_count--; + vm_object_cache_unlock(); /* * Since this object is in the cache, we know * that it is initialized and has only a pager's @@ -4836,6 +5876,7 @@ memory_object_free_from_cache( * (We are careful here to limit recursion.) */ shadow = object->pageout?VM_OBJECT_NULL:object->shadow; + if ((vm_object_terminate(object) == KERN_SUCCESS) && (shadow != VM_OBJECT_NULL)) { vm_object_deallocate(shadow); @@ -4848,6 +5889,9 @@ memory_object_free_from_cache( } vm_object_cache_unlock(); *count = object_released; +#else + *count = 0; +#endif return KERN_SUCCESS; } @@ -4861,21 +5905,22 @@ memory_object_create_named( { vm_object_t object; vm_object_hash_entry_t entry; + lck_mtx_t *lck; *control = MEMORY_OBJECT_CONTROL_NULL; if (pager == MEMORY_OBJECT_NULL) return KERN_INVALID_ARGUMENT; - vm_object_cache_lock(); + lck = vm_object_hash_lock_spin(pager); entry = vm_object_hash_lookup(pager, FALSE); + if ((entry != VM_OBJECT_HASH_ENTRY_NULL) && (entry->object != VM_OBJECT_NULL)) { if (entry->object->named == TRUE) panic("memory_object_create_named: caller already holds the right"); } + vm_object_hash_unlock(lck); - vm_object_cache_unlock(); - if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE)) - == VM_OBJECT_NULL) { + if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE)) == VM_OBJECT_NULL) { return(KERN_INVALID_OBJECT); } @@ -4914,50 +5959,47 @@ memory_object_recover_named( { vm_object_t object; - vm_object_cache_lock(); object = memory_object_control_to_vm_object(control); if (object == VM_OBJECT_NULL) { - vm_object_cache_unlock(); return (KERN_INVALID_ARGUMENT); } - restart: vm_object_lock(object); if (object->terminating && wait_on_terminating) { - vm_object_cache_unlock(); vm_object_wait(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS, THREAD_UNINT); - vm_object_cache_lock(); goto restart; } if (!object->alive) { - vm_object_cache_unlock(); vm_object_unlock(object); return KERN_FAILURE; } if (object->named == TRUE) { - vm_object_cache_unlock(); vm_object_unlock(object); return KERN_SUCCESS; } - - if((object->ref_count == 0) && (!object->terminating)){ +#if VM_OBJECT_CACHE + if ((object->ref_count == 0) && (!object->terminating)) { + if (!vm_object_cache_lock_try()) { + vm_object_unlock(object); + goto restart; + } queue_remove(&vm_object_cached_list, object, vm_object_t, cached_list); - vm_object_cached_count--; - XPR(XPR_VM_OBJECT_CACHE, - "memory_object_recover_named: removing %X, head (%X, %X)\n", - (integer_t)object, - (integer_t)vm_object_cached_list.next, - (integer_t)vm_object_cached_list.prev, 0,0); + vm_object_cached_count--; + XPR(XPR_VM_OBJECT_CACHE, + "memory_object_recover_named: removing %X, head (%X, %X)\n", + object, + vm_object_cached_list.next, + vm_object_cached_list.prev, 0,0); + + vm_object_cache_unlock(); } - - vm_object_cache_unlock(); - +#endif object->named = TRUE; vm_object_lock_assert_exclusive(object); object->ref_count++; @@ -4999,17 +6041,10 @@ vm_object_release_name( while (object != VM_OBJECT_NULL) { - /* - * The cache holds a reference (uncounted) to - * the object. We must locke it before removing - * the object. - * - */ - - vm_object_cache_lock(); vm_object_lock(object); + assert(object->alive); - if(original_object) + if (original_object) assert(object->named); assert(object->ref_count > 0); @@ -5024,7 +6059,6 @@ vm_object_release_name( VM_OBJECT_EVENT_INITIALIZED, THREAD_UNINT); vm_object_unlock(object); - vm_object_cache_unlock(); thread_block(THREAD_CONTINUE_NULL); continue; } @@ -5033,22 +6067,19 @@ vm_object_release_name( && (flags & MEMORY_OBJECT_TERMINATE_IDLE)) || (object->terminating)) { vm_object_unlock(object); - vm_object_cache_unlock(); return KERN_FAILURE; } else { if (flags & MEMORY_OBJECT_RELEASE_NO_OP) { vm_object_unlock(object); - vm_object_cache_unlock(); return KERN_SUCCESS; } } if ((flags & MEMORY_OBJECT_RESPECT_CACHE) && (object->ref_count == 1)) { - if(original_object) + if (original_object) object->named = FALSE; vm_object_unlock(object); - vm_object_cache_unlock(); /* let vm_object_deallocate push this thing into */ /* the cache, if that it is where it is bound */ vm_object_deallocate(object); @@ -5056,9 +6087,10 @@ vm_object_release_name( } VM_OBJ_RES_DECR(object); shadow = object->pageout?VM_OBJECT_NULL:object->shadow; - if(object->ref_count == 1) { - if(vm_object_terminate(object) != KERN_SUCCESS) { - if(original_object) { + + if (object->ref_count == 1) { + if (vm_object_terminate(object) != KERN_SUCCESS) { + if (original_object) { return KERN_FAILURE; } else { return KERN_SUCCESS; @@ -5077,7 +6109,6 @@ vm_object_release_name( if(original_object) object->named = FALSE; vm_object_unlock(object); - vm_object_cache_unlock(); return KERN_SUCCESS; } } @@ -5102,7 +6133,7 @@ vm_object_lock_request( XPR(XPR_MEMORY_OBJECT, "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n", - (integer_t)object, offset, size, + object, offset, size, (((should_return&1)<<1)|should_flush), prot); /* @@ -5141,125 +6172,44 @@ vm_object_lock_request( * than happy to grab these since this is a purgeable object. We mark the * object as "empty" after reaping its pages. * - * On entry the object and page queues are locked, the object must be a - * purgeable object with no delayed copies pending. + * On entry the object must be locked and it must be + * purgeable with no delayed copies pending. */ -unsigned int +void vm_object_purge(vm_object_t object) { - vm_page_t p, next; - unsigned int num_purged_pages; - vm_page_t local_freeq; - unsigned long local_freed; - int purge_loop_quota; -/* free pages as soon as we gather PURGE_BATCH_FREE_LIMIT pages to free */ -#define PURGE_BATCH_FREE_LIMIT 50 -/* release page queues lock every PURGE_LOOP_QUOTA iterations */ -#define PURGE_LOOP_QUOTA 100 - - num_purged_pages = 0; - if (object->purgable == VM_PURGABLE_DENY) - return num_purged_pages; + vm_object_lock_assert_exclusive(object); - assert(object->purgable != VM_PURGABLE_NONVOLATILE); - object->purgable = VM_PURGABLE_EMPTY; + if (object->purgable == VM_PURGABLE_DENY) + return; assert(object->copy == VM_OBJECT_NULL); assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE); - purge_loop_quota = PURGE_LOOP_QUOTA; - - local_freeq = VM_PAGE_NULL; - local_freed = 0; - - /* - * Go through the object's resident pages and try and discard them. - */ - next = (vm_page_t)queue_first(&object->memq); - while (!queue_end(&object->memq, (queue_entry_t)next)) { - p = next; - next = (vm_page_t)queue_next(&next->listq); - - if (purge_loop_quota-- == 0) { - /* - * Avoid holding the page queues lock for too long. - * Let someone else take it for a while if needed. - * Keep holding the object's lock to guarantee that - * the object's page list doesn't change under us - * while we yield. - */ - if (local_freeq != VM_PAGE_NULL) { - /* - * Flush our queue of pages to free. - */ - vm_page_free_list(local_freeq); - local_freeq = VM_PAGE_NULL; - local_freed = 0; - } - mutex_yield(&vm_page_queue_lock); - - /* resume with the current page and a new quota */ - purge_loop_quota = PURGE_LOOP_QUOTA; - } - - - if (p->busy || p->cleaning || p->laundry || - p->list_req_pending) { - /* page is being acted upon, so don't mess with it */ - continue; - } - if (p->wire_count) { - /* don't discard a wired page */ - continue; - } - - assert(!p->laundry); - assert(p->object != kernel_object); - - /* we can discard this page */ - - /* advertize that this page is in a transition state */ - p->busy = TRUE; - - if (p->pmapped == TRUE) { - /* unmap the page */ - int refmod_state; - - refmod_state = pmap_disconnect(p->phys_page); - if (refmod_state & VM_MEM_MODIFIED) { - p->dirty = TRUE; - } - } - if (p->dirty || p->precious) { - /* we saved the cost of cleaning this page ! */ - num_purged_pages++; - vm_page_purged_count++; + if(object->purgable == VM_PURGABLE_VOLATILE) { + unsigned int delta; + assert(object->resident_page_count >= + object->wired_page_count); + delta = (object->resident_page_count - + object->wired_page_count); + if (delta != 0) { + assert(vm_page_purgeable_count >= + delta); + OSAddAtomic(-delta, + (SInt32 *)&vm_page_purgeable_count); } - - vm_page_free_prepare(p); - - /* ... and put it on our queue of pages to free */ - assert(p->pageq.next == NULL && - p->pageq.prev == NULL); - p->pageq.next = (queue_entry_t) local_freeq; - local_freeq = p; - if (++local_freed >= PURGE_BATCH_FREE_LIMIT) { - /* flush our queue of pages to free */ - vm_page_free_list(local_freeq); - local_freeq = VM_PAGE_NULL; - local_freed = 0; + if (object->wired_page_count != 0) { + assert(vm_page_purgeable_wired_count >= + object->wired_page_count); + OSAddAtomic(-object->wired_page_count, + (SInt32 *)&vm_page_purgeable_wired_count); } } - - /* flush our local queue of pages to free one last time */ - if (local_freeq != VM_PAGE_NULL) { - vm_page_free_list(local_freeq); - local_freeq = VM_PAGE_NULL; - local_freed = 0; - } - - return num_purged_pages; + object->purgable = VM_PURGABLE_EMPTY; + + vm_object_reap_pages(object, REAP_PURGEABLE); } + /* * vm_object_purgeable_control() allows the caller to control and investigate the @@ -5371,39 +6321,88 @@ vm_object_purgable_control( return KERN_SUCCESS; } + if ((*state) & VM_PURGABLE_DEBUG_EMPTY) { + object->volatile_empty = TRUE; + } + if ((*state) & VM_PURGABLE_DEBUG_FAULT) { + object->volatile_fault = TRUE; + } + new_state = *state & VM_PURGABLE_STATE_MASK; + if (new_state == VM_PURGABLE_VOLATILE && + object->volatile_empty) { + new_state = VM_PURGABLE_EMPTY; + } + switch (new_state) { case VM_PURGABLE_DENY: case VM_PURGABLE_NONVOLATILE: object->purgable = new_state; - if (old_state != VM_PURGABLE_NONVOLATILE) { + if (old_state == VM_PURGABLE_VOLATILE) { + unsigned int delta; + + assert(object->resident_page_count >= + object->wired_page_count); + delta = (object->resident_page_count - + object->wired_page_count); + + assert(vm_page_purgeable_count >= delta); + + if (delta != 0) { + OSAddAtomic(-delta, + (SInt32 *)&vm_page_purgeable_count); + } + if (object->wired_page_count != 0) { + assert(vm_page_purgeable_wired_count >= + object->wired_page_count); + OSAddAtomic(-object->wired_page_count, + (SInt32 *)&vm_page_purgeable_wired_count); + } + vm_page_lock_queues(); - assert(vm_page_purgeable_count >= - object->resident_page_count); - vm_page_purgeable_count -= object->resident_page_count; - if (old_state==VM_PURGABLE_VOLATILE) { - assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */ - purgeable_q_t queue = vm_purgeable_object_remove(object); - assert(queue); + assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */ + purgeable_q_t queue = vm_purgeable_object_remove(object); + assert(queue); + + if (object->purgeable_when_ripe) { + vm_purgeable_token_delete_last(queue); + } + assert(queue->debug_count_objects>=0); - vm_purgeable_token_delete_first(queue); - assert(queue->debug_count_objects>=0); - }; vm_page_unlock_queues(); } break; case VM_PURGABLE_VOLATILE: - - if ((old_state != VM_PURGABLE_NONVOLATILE) && (old_state != VM_PURGABLE_VOLATILE)) + if (object->volatile_fault) { + vm_page_t p; + int refmod; + + queue_iterate(&object->memq, p, vm_page_t, listq) { + if (p->busy || + VM_PAGE_WIRED(p) || + p->fictitious) { + continue; + } + refmod = pmap_disconnect(p->phys_page); + if ((refmod & VM_MEM_MODIFIED) && + !p->dirty) { + SET_PAGE_DIRTY(p, FALSE); + } + } + } + + if (old_state == VM_PURGABLE_EMPTY && + object->resident_page_count == 0) break; + purgeable_q_t queue; /* find the correct queue */ if ((*state&VM_PURGABLE_ORDERING_MASK) == VM_PURGABLE_ORDERING_OBSOLETE) - queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO]; + queue = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE]; else { if ((*state&VM_PURGABLE_BEHAVIOR_MASK) == VM_PURGABLE_BEHAVIOR_FIFO) queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO]; @@ -5411,18 +6410,44 @@ vm_object_purgable_control( queue = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO]; } - if (old_state == VM_PURGABLE_NONVOLATILE) { - /* try to add token... this can fail */ - vm_page_lock_queues(); - - kern_return_t result = vm_purgeable_token_add(queue); - if (result != KERN_SUCCESS) { - vm_page_unlock_queues(); - return result; + if (old_state == VM_PURGABLE_NONVOLATILE || + old_state == VM_PURGABLE_EMPTY) { + unsigned int delta; + + if ((*state & VM_PURGABLE_NO_AGING_MASK) == + VM_PURGABLE_NO_AGING) { + object->purgeable_when_ripe = FALSE; + } else { + object->purgeable_when_ripe = TRUE; } - vm_page_purgeable_count += object->resident_page_count; + + if (object->purgeable_when_ripe) { + kern_return_t result; - vm_page_unlock_queues(); + /* try to add token... this can fail */ + vm_page_lock_queues(); + + result = vm_purgeable_token_add(queue); + if (result != KERN_SUCCESS) { + vm_page_unlock_queues(); + return result; + } + vm_page_unlock_queues(); + } + + assert(object->resident_page_count >= + object->wired_page_count); + delta = (object->resident_page_count - + object->wired_page_count); + + if (delta != 0) { + OSAddAtomic(delta, + &vm_page_purgeable_count); + } + if (object->wired_page_count != 0) { + OSAddAtomic(object->wired_page_count, + &vm_page_purgeable_wired_count); + } object->purgable = new_state; @@ -5430,6 +6455,9 @@ vm_object_purgable_control( assert(object->objq.next == NULL && object->objq.prev == NULL); } else if (old_state == VM_PURGABLE_VOLATILE) { + purgeable_q_t old_queue; + boolean_t purgeable_when_ripe; + /* * if reassigning priorities / purgeable groups, we don't change the * token queue. So moving priorities will not make pages stay around longer. @@ -5440,19 +6468,33 @@ vm_object_purgable_control( */ assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */ - purgeable_q_t old_queue=vm_purgeable_object_remove(object); + old_queue = vm_purgeable_object_remove(object); assert(old_queue); - if (old_queue != queue) { + if ((*state & VM_PURGABLE_NO_AGING_MASK) == + VM_PURGABLE_NO_AGING) { + purgeable_when_ripe = FALSE; + } else { + purgeable_when_ripe = TRUE; + } + + if (old_queue != queue || + (purgeable_when_ripe != + object->purgeable_when_ripe)) { kern_return_t result; /* Changing queue. Have to move token. */ vm_page_lock_queues(); - vm_purgeable_token_delete_first(old_queue); - result = vm_purgeable_token_add(queue); + if (object->purgeable_when_ripe) { + vm_purgeable_token_delete_last(old_queue); + } + object->purgeable_when_ripe = purgeable_when_ripe; + if (object->purgeable_when_ripe) { + result = vm_purgeable_token_add(queue); + assert(result==KERN_SUCCESS); /* this should never fail since we just freed a token */ + } vm_page_unlock_queues(); - assert(result==KERN_SUCCESS); /* this should never fail since we just freed a token */ } }; vm_purgeable_object_add(object, queue, (*state&VM_VOLATILE_GROUP_MASK)>>VM_VOLATILE_GROUP_SHIFT ); @@ -5463,23 +6505,42 @@ vm_object_purgable_control( case VM_PURGABLE_EMPTY: - if (old_state != new_state) - { - assert(old_state==VM_PURGABLE_NONVOLATILE || old_state==VM_PURGABLE_VOLATILE); - if(old_state==VM_PURGABLE_VOLATILE) { - assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */ - purgeable_q_t old_queue=vm_purgeable_object_remove(object); - assert(old_queue); - vm_page_lock_queues(); - vm_purgeable_token_delete_first(old_queue); + if (object->volatile_fault) { + vm_page_t p; + int refmod; + + queue_iterate(&object->memq, p, vm_page_t, listq) { + if (p->busy || + VM_PAGE_WIRED(p) || + p->fictitious) { + continue; + } + refmod = pmap_disconnect(p->phys_page); + if ((refmod & VM_MEM_MODIFIED) && + !p->dirty) { + SET_PAGE_DIRTY(p, FALSE); + } } + } - if (old_state==VM_PURGABLE_NONVOLATILE) { - vm_page_purgeable_count += object->resident_page_count; - vm_page_lock_queues(); + if (old_state != new_state) { + assert(old_state == VM_PURGABLE_NONVOLATILE || + old_state == VM_PURGABLE_VOLATILE); + if (old_state == VM_PURGABLE_VOLATILE) { + purgeable_q_t old_queue; + + /* object should be on a queue */ + assert(object->objq.next != NULL && + object->objq.prev != NULL); + old_queue = vm_purgeable_object_remove(object); + assert(old_queue); + if (object->purgeable_when_ripe) { + vm_page_lock_queues(); + vm_purgeable_token_delete_first(old_queue); + vm_page_unlock_queues(); + } } (void) vm_object_purge(object); - vm_page_unlock_queues(); } break; @@ -5489,6 +6550,103 @@ vm_object_purgable_control( return KERN_SUCCESS; } +kern_return_t +vm_object_get_page_counts( + vm_object_t object, + vm_object_offset_t offset, + vm_object_size_t size, + unsigned int *resident_page_count, + unsigned int *dirty_page_count) +{ + + kern_return_t kr = KERN_SUCCESS; + boolean_t count_dirty_pages = FALSE; + vm_page_t p = VM_PAGE_NULL; + unsigned int local_resident_count = 0; + unsigned int local_dirty_count = 0; + vm_object_offset_t cur_offset = 0; + vm_object_offset_t end_offset = 0; + + if (object == VM_OBJECT_NULL) + return KERN_INVALID_ARGUMENT; + + + cur_offset = offset; + + end_offset = offset + size; + + vm_object_lock_assert_exclusive(object); + + if (dirty_page_count != NULL) { + + count_dirty_pages = TRUE; + } + + if (resident_page_count != NULL && count_dirty_pages == FALSE) { + /* + * Fast path when: + * - we only want the resident page count, and, + * - the entire object is exactly covered by the request. + */ + if (offset == 0 && (object->vo_size == size)) { + + *resident_page_count = object->resident_page_count; + goto out; + } + } + + if (object->resident_page_count <= (size >> PAGE_SHIFT)) { + + queue_iterate(&object->memq, p, vm_page_t, listq) { + + if (p->offset >= cur_offset && p->offset < end_offset) { + + local_resident_count++; + + if (count_dirty_pages) { + + if (p->dirty || (p->wpmapped && pmap_is_modified(p->phys_page))) { + + local_dirty_count++; + } + } + } + } + } else { + + for (cur_offset = offset; cur_offset < end_offset; cur_offset += PAGE_SIZE_64) { + + p = vm_page_lookup(object, cur_offset); + + if (p != VM_PAGE_NULL) { + + local_resident_count++; + + if (count_dirty_pages) { + + if (p->dirty || (p->wpmapped && pmap_is_modified(p->phys_page))) { + + local_dirty_count++; + } + } + } + } + + } + + if (resident_page_count != NULL) { + *resident_page_count = local_resident_count; + } + + if (dirty_page_count != NULL) { + *dirty_page_count = local_dirty_count; + } + +out: + return kr; +} + + #if TASK_SWAPPER /* * vm_object_res_deallocate @@ -5606,10 +6764,12 @@ vm_object_reference( kern_return_t adjust_vm_object_cache( __unused vm_size_t oval, - vm_size_t nval) + __unused vm_size_t nval) { +#if VM_OBJECT_CACHE vm_object_cached_max = nval; vm_object_cache_trim(FALSE); +#endif return (KERN_SUCCESS); } #endif /* MACH_BSD */ @@ -5625,6 +6785,7 @@ adjust_vm_object_cache( * * The VM objects must not be locked by caller. */ +unsigned int vm_object_transpose_count = 0; kern_return_t vm_object_transpose( vm_object_t object1, @@ -5634,13 +6795,13 @@ vm_object_transpose( vm_object_t tmp_object; kern_return_t retval; boolean_t object1_locked, object2_locked; - boolean_t object1_paging, object2_paging; vm_page_t page; vm_object_offset_t page_offset; + lck_mtx_t *hash_lck; + vm_object_hash_entry_t hash_entry; tmp_object = VM_OBJECT_NULL; object1_locked = FALSE; object2_locked = FALSE; - object1_paging = FALSE; object2_paging = FALSE; if (object1 == object2 || object1 == VM_OBJECT_NULL || @@ -5653,6 +6814,29 @@ vm_object_transpose( goto done; } + /* + * Since we need to lock both objects at the same time, + * make sure we always lock them in the same order to + * avoid deadlocks. + */ + if (object1 > object2) { + tmp_object = object1; + object1 = object2; + object2 = tmp_object; + } + + /* + * Allocate a temporary VM object to hold object1's contents + * while we copy object2 to object1. + */ + tmp_object = vm_object_allocate(transpose_size); + vm_object_lock(tmp_object); + tmp_object->can_persist = FALSE; + + + /* + * Grab control of the 1st VM object. + */ vm_object_lock(object1); object1_locked = TRUE; if (!object1->alive || object1->terminating || @@ -5665,17 +6849,19 @@ vm_object_transpose( goto done; } /* - * Since we're about to mess with the object's backing store, - * mark it as "paging_in_progress". Note that this is not enough + * We're about to mess with the object's backing store and + * taking a "paging_in_progress" reference wouldn't be enough * to prevent any paging activity on this object, so the caller should * have "quiesced" the objects beforehand, via a UPL operation with * UPL_SET_IO_WIRE (to make sure all the pages are there and wired) * and UPL_BLOCK_ACCESS (to mark the pages "busy"). + * + * Wait for any paging operation to complete (but only paging, not + * other kind of activities not linked to the pager). After we're + * statisfied that there's no more paging in progress, we keep the + * object locked, to guarantee that no one tries to access its pager. */ - vm_object_paging_begin(object1); - object1_paging = TRUE; - vm_object_unlock(object1); - object1_locked = FALSE; + vm_object_paging_only_wait(object1, THREAD_UNINT); /* * Same as above for the 2nd object... @@ -5688,37 +6874,11 @@ vm_object_transpose( retval = KERN_INVALID_VALUE; goto done; } - vm_object_paging_begin(object2); - object2_paging = TRUE; - vm_object_unlock(object2); - object2_locked = FALSE; - - /* - * Allocate a temporary VM object to hold object1's contents - * while we copy object2 to object1. - */ - tmp_object = vm_object_allocate(transpose_size); - vm_object_lock(tmp_object); - vm_object_paging_begin(tmp_object); - tmp_object->can_persist = FALSE; + vm_object_paging_only_wait(object2, THREAD_UNINT); - /* - * Since we need to lock both objects at the same time, - * make sure we always lock them in the same order to - * avoid deadlocks. - */ - if (object1 < object2) { - vm_object_lock(object1); - vm_object_lock(object2); - } else { - vm_object_lock(object2); - vm_object_lock(object1); - } - object1_locked = TRUE; - object2_locked = TRUE; - if (object1->size != object2->size || - object1->size != transpose_size) { + if (object1->vo_size != object2->vo_size || + object1->vo_size != transpose_size) { /* * If the 2 objects don't have the same size, we can't * exchange their backing stores or one would overflow. @@ -5760,15 +6920,13 @@ vm_object_transpose( assert(queue_empty(&object1->memq)); } else { /* transfer object1's pages to tmp_object */ - vm_page_lock_queues(); while (!queue_empty(&object1->memq)) { page = (vm_page_t) queue_first(&object1->memq); page_offset = page->offset; - vm_page_remove(page); + vm_page_remove(page, TRUE); page->offset = page_offset; queue_enter(&tmp_object->memq, page, vm_page_t, listq); } - vm_page_unlock_queues(); assert(queue_empty(&object1->memq)); /* transfer object2's pages to object1 */ while (!queue_empty(&object2->memq)) { @@ -5793,21 +6951,24 @@ MACRO_BEGIN \ object2->field = tmp_object->field; \ MACRO_END - /* "size" should be identical */ - assert(object1->size == object2->size); /* "Lock" refers to the object not its contents */ + /* "size" should be identical */ + assert(object1->vo_size == object2->vo_size); + /* "memq_hint" was updated above when transposing pages */ /* "ref_count" refers to the object not its contents */ #if TASK_SWAPPER /* "res_count" refers to the object not its contents */ #endif /* "resident_page_count" was updated above when transposing pages */ + /* "wired_page_count" was updated above when transposing pages */ + /* "reusable_page_count" was updated above when transposing pages */ /* there should be no "copy" */ assert(!object1->copy); assert(!object2->copy); /* there should be no "shadow" */ assert(!object1->shadow); assert(!object2->shadow); - __TRANSPOSE_FIELD(shadow_offset); /* used by phys_contiguous objects */ + __TRANSPOSE_FIELD(vo_shadow_offset); /* used by phys_contiguous objects */ __TRANSPOSE_FIELD(pager); __TRANSPOSE_FIELD(paging_offset); __TRANSPOSE_FIELD(pager_control); @@ -5822,8 +6983,10 @@ MACRO_END } __TRANSPOSE_FIELD(copy_strategy); /* "paging_in_progress" refers to the object not its contents */ - assert(object1->paging_in_progress); - assert(object2->paging_in_progress); + assert(!object1->paging_in_progress); + assert(!object2->paging_in_progress); + assert(object1->activity_in_progress); + assert(object2->activity_in_progress); /* "all_wanted" refers to the object not its contents */ __TRANSPOSE_FIELD(pager_created); __TRANSPOSE_FIELD(pager_initialized); @@ -5841,7 +7004,7 @@ MACRO_END assert(object1->purgable == VM_PURGABLE_DENY); assert(object2->purgable == VM_PURGABLE_DENY); /* "shadowed" refers to the the object not its contents */ - __TRANSPOSE_FIELD(silent_overwrite); + __TRANSPOSE_FIELD(purgeable_when_ripe); __TRANSPOSE_FIELD(advisory_pageout); __TRANSPOSE_FIELD(true_share); /* "terminating" should not be set */ @@ -5851,11 +7014,12 @@ MACRO_END /* "shadow_severed" refers to the object not its contents */ __TRANSPOSE_FIELD(phys_contiguous); __TRANSPOSE_FIELD(nophyscache); - /* "cached_list" should be NULL */ + /* "cached_list.next" points to transposed object */ + object1->cached_list.next = (queue_entry_t) object2; + object2->cached_list.next = (queue_entry_t) object1; + /* "cached_list.prev" should be NULL */ assert(object1->cached_list.prev == NULL); - assert(object1->cached_list.next == NULL); assert(object2->cached_list.prev == NULL); - assert(object2->cached_list.next == NULL); /* "msr_q" is linked to the object not its contents */ assert(queue_empty(&object1->msr_q)); assert(queue_empty(&object2->msr_q)); @@ -5863,6 +7027,7 @@ MACRO_END __TRANSPOSE_FIELD(sequential); __TRANSPOSE_FIELD(pages_created); __TRANSPOSE_FIELD(pages_used); + __TRANSPOSE_FIELD(scan_collisions); #if MACH_PAGEMAP __TRANSPOSE_FIELD(existence_map); #endif @@ -5871,11 +7036,40 @@ MACRO_END __TRANSPOSE_FIELD(paging_object); #endif __TRANSPOSE_FIELD(wimg_bits); + __TRANSPOSE_FIELD(set_cache_attr); __TRANSPOSE_FIELD(code_signed); - __TRANSPOSE_FIELD(not_in_use); -#ifdef UPL_DEBUG + if (object1->hashed) { + hash_lck = vm_object_hash_lock_spin(object2->pager); + hash_entry = vm_object_hash_lookup(object2->pager, FALSE); + assert(hash_entry != VM_OBJECT_HASH_ENTRY_NULL); + hash_entry->object = object2; + vm_object_hash_unlock(hash_lck); + } + if (object2->hashed) { + hash_lck = vm_object_hash_lock_spin(object1->pager); + hash_entry = vm_object_hash_lookup(object1->pager, FALSE); + assert(hash_entry != VM_OBJECT_HASH_ENTRY_NULL); + hash_entry->object = object1; + vm_object_hash_unlock(hash_lck); + } + __TRANSPOSE_FIELD(hashed); + object1->transposed = TRUE; + object2->transposed = TRUE; + __TRANSPOSE_FIELD(mapping_in_progress); + __TRANSPOSE_FIELD(volatile_empty); + __TRANSPOSE_FIELD(volatile_fault); + __TRANSPOSE_FIELD(all_reusable); + assert(object1->blocked_access); + assert(object2->blocked_access); + assert(object1->__object2_unused_bits == 0); + assert(object2->__object2_unused_bits == 0); +#if UPL_DEBUG /* "uplq" refers to the object not its contents (see upl_transpose()) */ #endif + assert(object1->objq.next == NULL); + assert(object1->objq.prev == NULL); + assert(object2->objq.next == NULL); + assert(object2->objq.prev == NULL); #undef __TRANSPOSE_FIELD @@ -5886,7 +7080,6 @@ done: * Cleanup. */ if (tmp_object != VM_OBJECT_NULL) { - vm_object_paging_end(tmp_object); vm_object_unlock(tmp_object); /* * Re-initialize the temporary object to avoid @@ -5905,25 +7098,15 @@ done: vm_object_unlock(object2); object2_locked = FALSE; } - if (object1_paging) { - vm_object_lock(object1); - vm_object_paging_end(object1); - vm_object_unlock(object1); - object1_paging = FALSE; - } - if (object2_paging) { - vm_object_lock(object2); - vm_object_paging_end(object2); - vm_object_unlock(object2); - object2_paging = FALSE; - } + + vm_object_transpose_count++; return retval; } /* - * vm_object_build_cluster + * vm_object_cluster_size * * Determine how big a cluster we should issue an I/O for... * @@ -5936,15 +7119,18 @@ done: * */ extern int speculative_reads_disabled; +extern int ignore_is_ssd; -uint32_t pre_heat_scaling[MAX_UPL_TRANSFER]; -uint32_t pre_heat_cluster[MAX_UPL_TRANSFER]; +unsigned int preheat_pages_max = MAX_UPL_TRANSFER; +unsigned int preheat_pages_min = 8; + +uint32_t pre_heat_scaling[MAX_UPL_TRANSFER + 1]; +uint32_t pre_heat_cluster[MAX_UPL_TRANSFER + 1]; -#define PRE_HEAT_MULTIPLIER 4 __private_extern__ void vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, - vm_size_t *length, vm_object_fault_info_t fault_info) + vm_size_t *length, vm_object_fault_info_t fault_info, uint32_t *io_streaming) { vm_size_t pre_heat_size; vm_size_t tail_size; @@ -5958,22 +7144,30 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, vm_behavior_t behavior; boolean_t look_behind = TRUE; boolean_t look_ahead = TRUE; + boolean_t isSSD = FALSE; + uint32_t throttle_limit; int sequential_run; int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL; + unsigned int max_ph_size; + unsigned int min_ph_size; + unsigned int min_ph_size_in_bytes; assert( !(*length & PAGE_MASK)); assert( !(*start & PAGE_MASK_64)); - if ( (max_length = *length) > (MAX_UPL_TRANSFER * PAGE_SIZE) ) - max_length = (MAX_UPL_TRANSFER * PAGE_SIZE); + /* + * remember maxiumum length of run requested + */ + max_length = *length; /* * we'll always return a cluster size of at least * 1 page, since the original fault must always * be processed */ *length = PAGE_SIZE; + *io_streaming = 0; - if (speculative_reads_disabled || fault_info == NULL || max_length == 0) { + if (speculative_reads_disabled || fault_info == NULL) { /* * no cluster... just fault the page in */ @@ -5981,17 +7175,44 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, } orig_start = *start; target_start = orig_start; - cluster_size = round_page_32(fault_info->cluster_size); + cluster_size = round_page(fault_info->cluster_size); behavior = fault_info->behavior; vm_object_lock(object); + if (object->pager == MEMORY_OBJECT_NULL) + goto out; /* pager is gone for this object, nothing more to do */ + + if (!ignore_is_ssd) + vnode_pager_get_isSSD(object->pager, &isSSD); + + min_ph_size = preheat_pages_min; + max_ph_size = preheat_pages_max; + + if (isSSD) { + min_ph_size /= 2; + max_ph_size /= 8; + } + if (min_ph_size < 1) + min_ph_size = 1; + + if (max_ph_size < 1) + max_ph_size = 1; + else if (max_ph_size > MAX_UPL_TRANSFER) + max_ph_size = MAX_UPL_TRANSFER; + + if (max_length > (max_ph_size * PAGE_SIZE)) + max_length = max_ph_size * PAGE_SIZE; + + if (max_length <= PAGE_SIZE) + goto out; + + min_ph_size_in_bytes = min_ph_size * PAGE_SIZE; + if (object->internal) - object_size = object->size; - else if (object->pager != MEMORY_OBJECT_NULL) - vnode_pager_get_object_size(object->pager, &object_size); + object_size = object->vo_size; else - goto out; /* pager is gone for this object, nothing more to do */ + vnode_pager_get_object_size(object->pager, &object_size); object_size = round_page_64(object_size); @@ -6017,8 +7238,9 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, } else { sequential_behavior = VM_BEHAVIOR_SEQUENTIAL; } + } - switch(behavior) { + switch (behavior) { default: behavior = VM_BEHAVIOR_DEFAULT; @@ -6027,33 +7249,35 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, if (object->internal && fault_info->user_tag == VM_MEMORY_STACK) goto out; - if (sequential_run >= (3 * PAGE_SIZE)) { + if (sequential_run >= (3 * PAGE_SIZE)) { pre_heat_size = sequential_run + PAGE_SIZE; - if ((behavior = sequential_behavior) == VM_BEHAVIOR_SEQUENTIAL) + if (sequential_behavior == VM_BEHAVIOR_SEQUENTIAL) look_behind = FALSE; else look_ahead = FALSE; + + *io_streaming = 1; } else { - uint32_t pages_unused; - if (object->pages_created < 32 * PRE_HEAT_MULTIPLIER) { + if (object->pages_created < (20 * min_ph_size)) { /* * prime the pump */ - pre_heat_size = PAGE_SIZE * 8 * PRE_HEAT_MULTIPLIER; - break; - } - pages_unused = object->pages_created - object->pages_used; - - if (pages_unused < (object->pages_created / 8)) { - pre_heat_size = PAGE_SIZE * 32 * PRE_HEAT_MULTIPLIER; - } else if (pages_unused < (object->pages_created / 4)) { - pre_heat_size = PAGE_SIZE * 16 * PRE_HEAT_MULTIPLIER; - } else if (pages_unused < (object->pages_created / 2)) { - pre_heat_size = PAGE_SIZE * 8 * PRE_HEAT_MULTIPLIER; + pre_heat_size = min_ph_size_in_bytes; } else { - pre_heat_size = PAGE_SIZE * 4 * PRE_HEAT_MULTIPLIER; + /* + * Linear growth in PH size: The maximum size is max_length... + * this cacluation will result in a size that is neither a + * power of 2 nor a multiple of PAGE_SIZE... so round + * it up to the nearest PAGE_SIZE boundary + */ + pre_heat_size = (max_length * object->pages_used) / object->pages_created; + + if (pre_heat_size < min_ph_size_in_bytes) + pre_heat_size = min_ph_size_in_bytes; + else + pre_heat_size = round_page(pre_heat_size); } } break; @@ -6067,6 +7291,7 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, if ((pre_heat_size = cluster_size) == 0) pre_heat_size = sequential_run + PAGE_SIZE; look_behind = FALSE; + *io_streaming = 1; break; @@ -6074,29 +7299,82 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, if ((pre_heat_size = cluster_size) == 0) pre_heat_size = sequential_run + PAGE_SIZE; look_ahead = FALSE; + *io_streaming = 1; break; } + throttle_limit = (uint32_t) max_length; + assert(throttle_limit == max_length); + + if (vnode_pager_get_throttle_io_limit(object->pager, &throttle_limit) == KERN_SUCCESS) { + if (max_length > throttle_limit) + max_length = throttle_limit; + } if (pre_heat_size > max_length) pre_heat_size = max_length; - if (behavior == VM_BEHAVIOR_DEFAULT && vm_page_free_count < vm_page_free_target) - pre_heat_size /= 2; + if (behavior == VM_BEHAVIOR_DEFAULT && (pre_heat_size > min_ph_size_in_bytes)) { + unsigned int consider_free = vm_page_free_count + vm_page_cleaned_count; + + if (consider_free < vm_page_throttle_limit) { + pre_heat_size = trunc_page(pre_heat_size / 16); + } else if (consider_free < vm_page_free_target) { + pre_heat_size = trunc_page(pre_heat_size / 4); + } + + if (pre_heat_size < min_ph_size_in_bytes) + pre_heat_size = min_ph_size_in_bytes; + } if (look_ahead == TRUE) { - if (look_behind == TRUE) - target_start &= ~(pre_heat_size - 1); + if (look_behind == TRUE) { + /* + * if we get here its due to a random access... + * so we want to center the original fault address + * within the cluster we will issue... make sure + * to calculate 'head_size' as a multiple of PAGE_SIZE... + * 'pre_heat_size' is a multiple of PAGE_SIZE but not + * necessarily an even number of pages so we need to truncate + * the result to a PAGE_SIZE boundary + */ + head_size = trunc_page(pre_heat_size / 2); - if ((target_start + pre_heat_size) > object_size) - pre_heat_size = (vm_size_t)(trunc_page_64(object_size - target_start)); + if (target_start > head_size) + target_start -= head_size; + else + target_start = 0; - tail_size = pre_heat_size - (orig_start - target_start) - PAGE_SIZE; + /* + * 'target_start' at this point represents the beginning offset + * of the cluster we are considering... 'orig_start' will be in + * the center of this cluster if we didn't have to clip the start + * due to running into the start of the file + */ + } + if ((target_start + pre_heat_size) > object_size) + pre_heat_size = (vm_size_t)(round_page_64(object_size - target_start)); + /* + * at this point caclulate the number of pages beyond the original fault + * address that we want to consider... this is guaranteed not to extend beyond + * the current EOF... + */ + assert((vm_size_t)(orig_start - target_start) == (orig_start - target_start)); + tail_size = pre_heat_size - (vm_size_t)(orig_start - target_start) - PAGE_SIZE; } else { - if (pre_heat_size > target_start) - pre_heat_size = target_start; + if (pre_heat_size > target_start) { + /* + * since pre_heat_size is always smaller then 2^32, + * if it is larger then target_start (a 64 bit value) + * it is safe to clip target_start to 32 bits + */ + pre_heat_size = (vm_size_t) target_start; + } tail_size = 0; } + assert( !(target_start & PAGE_MASK_64)); + assert( !(pre_heat_size & PAGE_MASK)); + pre_heat_scaling[pre_heat_size / PAGE_SIZE]++; if (pre_heat_size <= PAGE_SIZE) @@ -6105,7 +7383,9 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, if (look_behind == TRUE) { /* * take a look at the pages before the original - * faulting offset + * faulting offset... recalculate this in case + * we had to clip 'pre_heat_size' above to keep + * from running past the EOF. */ head_size = pre_heat_size - tail_size - PAGE_SIZE; @@ -6127,7 +7407,11 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, */ break; } -#endif +#endif /* MACH_PAGEMAP */ + if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset) + == VM_EXTERNAL_STATE_ABSENT) { + break; + } if (vm_page_lookup(object, offset) != VM_PAGE_NULL) { /* * don't bridge resident pages @@ -6145,6 +7429,8 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, */ if (offset >= fault_info->hi_offset) break; + assert(offset < object_size); + /* * for external objects and internal objects w/o an existence map * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN @@ -6157,7 +7443,11 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, */ break; } -#endif +#endif /* MACH_PAGEMAP */ + if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset) + == VM_EXTERNAL_STATE_ABSENT) { + break; + } if (vm_page_lookup(object, offset) != VM_PAGE_NULL) { /* * don't bridge resident pages @@ -6168,9 +7458,14 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, } } out: + if (*length > max_length) + *length = max_length; + pre_heat_cluster[*length / PAGE_SIZE]++; vm_object_unlock(object); + + DTRACE_VM1(clustersize, vm_size_t, *length); } @@ -6195,7 +7490,7 @@ vm_object_page_op( if(object->phys_contiguous) { if (phys_entry) { *phys_entry = (ppnum_t) - (object->shadow_offset >> PAGE_SHIFT); + (object->vo_shadow_offset >> PAGE_SHIFT); } vm_object_unlock(object); return KERN_SUCCESS; @@ -6229,10 +7524,7 @@ vm_object_page_op( if (dst_page->pmapped == TRUE) pmap_disconnect(dst_page->phys_page); - vm_page_lock_queues(); - vm_page_free(dst_page); - vm_page_unlock_queues(); - + VM_PAGE_FREE(dst_page); break; } @@ -6260,7 +7552,9 @@ vm_object_page_op( /* if such violations occur we will assert sooner */ /* or later. */ assert(dst_page->busy || (ops & UPL_POP_BUSY)); - if (ops & UPL_POP_DIRTY) dst_page->dirty = TRUE; + if (ops & UPL_POP_DIRTY) { + SET_PAGE_DIRTY(dst_page, FALSE); + } if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE; if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE; if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE; @@ -6346,17 +7640,23 @@ vm_object_range_op( vm_object_offset_t offset_beg, vm_object_offset_t offset_end, int ops, - int *range) + uint32_t *range) { vm_object_offset_t offset; vm_page_t dst_page; + if (offset_end - offset_beg > (uint32_t) -1) { + /* range is too big and would overflow "*range" */ + return KERN_INVALID_ARGUMENT; + } if (object->resident_page_count == 0) { if (range) { - if (ops & UPL_ROP_PRESENT) + if (ops & UPL_ROP_PRESENT) { *range = 0; - else - *range = offset_end - offset_beg; + } else { + *range = (uint32_t) (offset_end - offset_beg); + assert(*range == (offset_end - offset_beg)); + } } return KERN_SUCCESS; } @@ -6374,7 +7674,7 @@ vm_object_range_op( if (dst_page != VM_PAGE_NULL) { if (ops & UPL_ROP_DUMP) { if (dst_page->busy || dst_page->cleaning) { - /* + /* * someone else is playing with the * page, we will have to wait */ @@ -6387,14 +7687,17 @@ vm_object_range_op( */ continue; } + if (dst_page->laundry) { + dst_page->pageout = FALSE; + + vm_pageout_steal_laundry(dst_page, FALSE); + } if (dst_page->pmapped == TRUE) pmap_disconnect(dst_page->phys_page); - vm_page_lock_queues(); - vm_page_free(dst_page); - vm_page_unlock_queues(); + VM_PAGE_FREE(dst_page); - } else if (ops & UPL_ROP_ABSENT) + } else if ((ops & UPL_ROP_ABSENT) && !dst_page->absent) break; } else if (ops & UPL_ROP_PRESENT) break; @@ -6406,13 +7709,56 @@ vm_object_range_op( if (range) { if (offset > offset_end) offset = offset_end; - if(offset > offset_beg) - *range = offset - offset_beg; - else *range=0; + if(offset > offset_beg) { + *range = (uint32_t) (offset - offset_beg); + assert(*range == (offset - offset_beg)); + } else { + *range = 0; + } } return KERN_SUCCESS; } +/* + * Used to point a pager directly to a range of memory (when the pager may be associated + * with a non-device vnode). Takes a virtual address, an offset, and a size. We currently + * expect that the virtual address will denote the start of a range that is physically contiguous. + */ +kern_return_t pager_map_to_phys_contiguous( + memory_object_control_t object, + memory_object_offset_t offset, + addr64_t base_vaddr, + vm_size_t size) +{ + ppnum_t page_num; + boolean_t clobbered_private; + kern_return_t retval; + vm_object_t pager_object; + + page_num = pmap_find_phys(kernel_pmap, base_vaddr); + + if (!page_num) { + retval = KERN_FAILURE; + goto out; + } + + pager_object = memory_object_control_to_vm_object(object); + + if (!pager_object) { + retval = KERN_FAILURE; + goto out; + } + + clobbered_private = pager_object->private; + pager_object->private = TRUE; + retval = vm_object_populate_with_private(pager_object, offset, page_num, size); + + if (retval != KERN_SUCCESS) + pager_object->private = clobbered_private; + +out: + return retval; +} uint32_t scan_object_collision = 0; @@ -6427,20 +7773,37 @@ vm_object_lock(vm_object_t object) } boolean_t -vm_object_lock_try(vm_object_t object) +vm_object_lock_avoid(vm_object_t object) { if (object == vm_pageout_scan_wants_object) { scan_object_collision++; - mutex_pause(2); + return TRUE; } + return FALSE; +} + +boolean_t +_vm_object_lock_try(vm_object_t object) +{ return (lck_rw_try_lock_exclusive(&object->Lock)); } +boolean_t +vm_object_lock_try(vm_object_t object) +{ + /* + * Called from hibernate path so check before blocking. + */ + if (vm_object_lock_avoid(object) && ml_get_interrupts_enabled() && get_preemption_level()==0) { + mutex_pause(2); + } + return _vm_object_lock_try(object); +} + void vm_object_lock_shared(vm_object_t object) { - if (object == vm_pageout_scan_wants_object) { - scan_object_collision++; + if (vm_object_lock_avoid(object)) { mutex_pause(2); } lck_rw_lock_shared(&object->Lock); @@ -6449,9 +7812,294 @@ vm_object_lock_shared(vm_object_t object) boolean_t vm_object_lock_try_shared(vm_object_t object) { - if (object == vm_pageout_scan_wants_object) { - scan_object_collision++; + if (vm_object_lock_avoid(object)) { mutex_pause(2); } return (lck_rw_try_lock_shared(&object->Lock)); } + + +unsigned int vm_object_change_wimg_mode_count = 0; + +/* + * The object must be locked + */ +void +vm_object_change_wimg_mode(vm_object_t object, unsigned int wimg_mode) +{ + vm_page_t p; + + vm_object_lock_assert_exclusive(object); + + vm_object_paging_wait(object, THREAD_UNINT); + + queue_iterate(&object->memq, p, vm_page_t, listq) { + + if (!p->fictitious) + pmap_set_cache_attributes(p->phys_page, wimg_mode); + } + if (wimg_mode == VM_WIMG_USE_DEFAULT) + object->set_cache_attr = FALSE; + else + object->set_cache_attr = TRUE; + + object->wimg_bits = wimg_mode; + + vm_object_change_wimg_mode_count++; +} + +#if CONFIG_FREEZE + +kern_return_t vm_object_pack( + unsigned int *purgeable_count, + unsigned int *wired_count, + unsigned int *clean_count, + unsigned int *dirty_count, + unsigned int dirty_budget, + boolean_t *shared, + vm_object_t src_object, + struct default_freezer_handle *df_handle) +{ + kern_return_t kr = KERN_SUCCESS; + + vm_object_lock(src_object); + + *purgeable_count = *wired_count = *clean_count = *dirty_count = 0; + *shared = FALSE; + + if (!src_object->alive || src_object->terminating){ + kr = KERN_FAILURE; + goto done; + } + + if (src_object->purgable == VM_PURGABLE_VOLATILE) { + *purgeable_count = src_object->resident_page_count; + + /* If the default freezer handle is null, we're just walking the pages to discover how many can be hibernated */ + if (df_handle != NULL) { + purgeable_q_t queue; + /* object should be on a queue */ + assert(src_object->objq.next != NULL && + src_object->objq.prev != NULL); + queue = vm_purgeable_object_remove(src_object); + assert(queue); + if (src_object->purgeable_when_ripe) { + vm_page_lock_queues(); + vm_purgeable_token_delete_first(queue); + vm_page_unlock_queues(); + } + vm_object_purge(src_object); + } + goto done; + } + + if (src_object->ref_count == 1) { + vm_object_pack_pages(wired_count, clean_count, dirty_count, dirty_budget, src_object, df_handle); + } else { + if (src_object->internal) { + *shared = TRUE; + } + } +done: + vm_object_unlock(src_object); + + return kr; +} + + +void +vm_object_pack_pages( + unsigned int *wired_count, + unsigned int *clean_count, + unsigned int *dirty_count, + unsigned int dirty_budget, + vm_object_t src_object, + struct default_freezer_handle *df_handle) +{ + vm_page_t p, next; + + next = (vm_page_t)queue_first(&src_object->memq); + + while (!queue_end(&src_object->memq, (queue_entry_t)next)) { + p = next; + next = (vm_page_t)queue_next(&next->listq); + + /* Finish up if we've hit our pageout limit */ + if (dirty_budget && (dirty_budget == *dirty_count)) { + break; + } + assert(!p->laundry); + + if (p->fictitious || p->busy ) + continue; + + if (p->absent || p->unusual || p->error) + continue; + + if (VM_PAGE_WIRED(p)) { + (*wired_count)++; + continue; + } + + if (df_handle == NULL) { + if (p->dirty || pmap_is_modified(p->phys_page)) { + (*dirty_count)++; + } else { + (*clean_count)++; + } + continue; + } + + if (p->cleaning) { + p->pageout = TRUE; + continue; + } + + if (p->pmapped == TRUE) { + int refmod_state; + refmod_state = pmap_disconnect(p->phys_page); + if (refmod_state & VM_MEM_MODIFIED) { + SET_PAGE_DIRTY(p, FALSE); + } + } + + if (p->dirty) { + default_freezer_pack_page(p, df_handle); + (*dirty_count)++; + } + else { + VM_PAGE_FREE(p); + (*clean_count)++; + } + } +} + +void +vm_object_pageout( + vm_object_t object) +{ + vm_page_t p, next; + struct vm_pageout_queue *iq; + + iq = &vm_pageout_queue_internal; + + assert(object != VM_OBJECT_NULL ); + + vm_object_lock(object); + + if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) { + if (!object->pager_initialized) { + /* + * If there is no memory object for the page, create + * one and hand it to the default pager. + */ + vm_object_pager_create(object); + } + } + +ReScan: + next = (vm_page_t)queue_first(&object->memq); + + while (!queue_end(&object->memq, (queue_entry_t)next)) { + p = next; + next = (vm_page_t)queue_next(&next->listq); + + /* Throw to the pageout queue */ + vm_page_lockspin_queues(); + + /* + * see if page is already in the process of + * being cleaned... if so, leave it alone + */ + if (!p->laundry) { + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + + if (VM_PAGE_Q_THROTTLED(iq)) { + + iq->pgo_draining = TRUE; + + assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE); + vm_page_unlock_queues(); + vm_object_unlock(object); + + thread_block(THREAD_CONTINUE_NULL); + + vm_object_lock(object); + goto ReScan; + } + + if (p->fictitious || p->busy ) { + vm_page_unlock_queues(); + continue; + } + + if (p->absent || p->unusual || p->error || VM_PAGE_WIRED(p)) { + vm_page_unlock_queues(); + continue; + } + + if (p->cleaning) { + p->pageout = TRUE; + vm_page_unlock_queues(); + continue; + } + + if (p->pmapped == TRUE) { + int refmod_state; + refmod_state = pmap_disconnect_options(p->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL); + if (refmod_state & VM_MEM_MODIFIED) { + SET_PAGE_DIRTY(p, FALSE); + } + } + + if (p->dirty == FALSE) { + vm_page_unlock_queues(); + VM_PAGE_FREE(p); + continue; + } + } + + VM_PAGE_QUEUES_REMOVE(p); + vm_pageout_cluster(p, TRUE); + } + vm_page_unlock_queues(); + } + + vm_object_unlock(object); +} + +kern_return_t +vm_object_pagein( + vm_object_t object) +{ + memory_object_t pager; + kern_return_t kr; + + vm_object_lock(object); + + pager = object->pager; + + if (!object->pager_ready || pager == MEMORY_OBJECT_NULL) { + vm_object_unlock(object); + return KERN_FAILURE; + } + + vm_object_paging_wait(object, THREAD_UNINT); + vm_object_paging_begin(object); + + object->blocked_access = TRUE; + vm_object_unlock(object); + + kr = memory_object_data_reclaim(pager, TRUE); + + vm_object_lock(object); + + object->blocked_access = FALSE; + vm_object_paging_end(object); + + vm_object_unlock(object); + + return kr; +} +#endif /* CONFIG_FREEZE */