X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/89b3af67bb32e691275bf6fa803d1834b2284115..cf7d32b81c573a0536dc4da4157f9c26f8d0bed3:/osfmk/vm/vm_object.c

diff --git a/osfmk/vm/vm_object.c b/osfmk/vm/vm_object.c
index 83b3a0eeb..a573d49ea 100644
--- a/osfmk/vm/vm_object.c
+++ b/osfmk/vm/vm_object.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -62,6 +62,7 @@
  *	Virtual memory object module.
  */
 
+#include <debug.h>
 #include <mach_pagemap.h>
 #include <task_swapper.h>
 
@@ -92,6 +93,7 @@
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_protos.h>
+#include <vm/vm_purgeable_internal.h>
 
 /*
  *	Virtual memory objects maintain the actual data
@@ -205,6 +207,7 @@ static zone_t		vm_object_zone;		/* vm backing store zone */
 static struct vm_object			kernel_object_store;
 vm_object_t						kernel_object;
 
+
 /*
  *	The submap object is used as a placeholder for vm_map_submap
  *	operations.  The object is declared in vm_map.c because it
@@ -403,6 +406,12 @@ vm_object_allocate(
 	return object;
 }
 
+
+lck_grp_t	vm_object_lck_grp;
+lck_grp_attr_t	vm_object_lck_grp_attr;
+lck_attr_t	vm_object_lck_attr;
+lck_attr_t	kernel_object_lck_attr;
+
 /*
  *	vm_object_bootstrap:
  *
@@ -418,6 +427,7 @@ vm_object_bootstrap(void)
 				round_page_32(12*1024),
 				"vm objects");
 
+	queue_init(&vm_object_reaper_queue);
 	queue_init(&vm_object_cached_list);
 	mutex_init(&vm_object_cached_lock_data, 0);
 
@@ -430,11 +440,25 @@ vm_object_bootstrap(void)
 	for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
 		queue_init(&vm_object_hashtable[i]);
 
+	vm_object_init_lck_grp();
+
 	/*
 	 *	Fill in a template object, for quick initialization
 	 */
 
 	/* memq; Lock; init after allocation */
+	vm_object_template.memq.prev = NULL;
+	vm_object_template.memq.next = NULL;
+#if 0
+	/*
+	 * We can't call vm_object_lock_init() here because that will
+	 * allocate some memory and VM is not fully initialized yet.
+	 * The lock will be initialized for each allocate object in
+	 * _vm_object_allocate(), so we don't need to initialize it in
+	 * the vm_object_template.
+	 */
+	vm_object_lock_init(&vm_object_template);
+#endif
 	vm_object_template.size = 0;
 	vm_object_template.memq_hint = VM_PAGE_NULL;
 	vm_object_template.ref_count = 1;
@@ -445,16 +469,10 @@ vm_object_bootstrap(void)
 	vm_object_template.copy = VM_OBJECT_NULL;
 	vm_object_template.shadow = VM_OBJECT_NULL;
 	vm_object_template.shadow_offset = (vm_object_offset_t) 0;
-	vm_object_template.cow_hint = ~(vm_offset_t)0;
-	vm_object_template.true_share = FALSE;
-
 	vm_object_template.pager = MEMORY_OBJECT_NULL;
 	vm_object_template.paging_offset = 0;
 	vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL;
-	/* msr_q; init after allocation */
-
 	vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC;
-	vm_object_template.absent_count = 0;
 	vm_object_template.paging_in_progress = 0;
 
 	/* Begin bitfields */
@@ -469,29 +487,53 @@ vm_object_bootstrap(void)
 	vm_object_template.private = FALSE;
 	vm_object_template.pageout = FALSE;
 	vm_object_template.alive = TRUE;
-	vm_object_template.purgable = VM_OBJECT_NONPURGABLE;
+	vm_object_template.purgable = VM_PURGABLE_DENY;
+	vm_object_template.shadowed = FALSE;
 	vm_object_template.silent_overwrite = FALSE;
 	vm_object_template.advisory_pageout = FALSE;
-	vm_object_template.shadowed = FALSE;
+	vm_object_template.true_share = FALSE;
 	vm_object_template.terminating = FALSE;
+	vm_object_template.named = FALSE;
 	vm_object_template.shadow_severed = FALSE;
 	vm_object_template.phys_contiguous = FALSE;
 	vm_object_template.nophyscache = FALSE;
 	/* End bitfields */
 
-	/* cache bitfields */
-	vm_object_template.wimg_bits = VM_WIMG_DEFAULT;
-
-	/* cached_list; init after allocation */
+	vm_object_template.cached_list.prev = NULL;
+	vm_object_template.cached_list.next = NULL;
+	vm_object_template.msr_q.prev = NULL;
+	vm_object_template.msr_q.next = NULL;
+	
 	vm_object_template.last_alloc = (vm_object_offset_t) 0;
-	vm_object_template.cluster_size = 0;
+	vm_object_template.sequential = (vm_object_offset_t) 0;
+	vm_object_template.pages_created = 0;
+	vm_object_template.pages_used = 0;
+
 #if	MACH_PAGEMAP
 	vm_object_template.existence_map = VM_EXTERNAL_NULL;
 #endif	/* MACH_PAGEMAP */
+	vm_object_template.cow_hint = ~(vm_offset_t)0;
 #if	MACH_ASSERT
 	vm_object_template.paging_object = VM_OBJECT_NULL;
 #endif	/* MACH_ASSERT */
 
+	/* cache bitfields */
+	vm_object_template.wimg_bits = VM_WIMG_DEFAULT;
+	vm_object_template.code_signed = FALSE;
+	vm_object_template.not_in_use = 0;
+#ifdef UPL_DEBUG
+	vm_object_template.uplq.prev = NULL;
+	vm_object_template.uplq.next = NULL;
+#endif /* UPL_DEBUG */
+#ifdef VM_PIP_DEBUG
+	bzero(&vm_object_template.pip_holders,
+	      sizeof (vm_object_template.pip_holders));
+#endif /* VM_PIP_DEBUG */
+
+	vm_object_template.objq.next=NULL;
+	vm_object_template.objq.prev=NULL;
+
+	
 	/*
 	 *	Initialize the "kernel object"
 	 */
@@ -545,14 +587,13 @@ vm_object_reaper_init(void)
 	kern_return_t	kr;
 	thread_t	thread;
 
-	queue_init(&vm_object_reaper_queue);
 	kr = kernel_thread_start_priority(
 		(thread_continue_t) vm_object_reaper_thread,
 		NULL,
 		BASEPRI_PREEMPT - 1,
 		&thread);
 	if (kr != KERN_SUCCESS) {
-		panic("failed to launch vm_object_reaper_thread kr=0x%x\n", kr);
+		panic("failed to launch vm_object_reaper_thread kr=0x%x", kr);
 	}
 	thread_deallocate(thread);
 }
@@ -565,13 +606,20 @@ vm_object_init(void)
 	 */
 }
 
-/* remove the typedef below when emergency work-around is taken out */
-typedef struct vnode_pager {
-	memory_object_t pager;
-	memory_object_t pager_handle;	/* pager */
-	memory_object_control_t 	control_handle;	/* memory object's control handle */
-	void	   	*vnode_handle;  /* vnode handle */
-} *vnode_pager_t;
+
+__private_extern__ void
+vm_object_init_lck_grp(void)
+{
+        /*
+	 * initialze the vm_object lock world
+	 */
+        lck_grp_attr_setdefault(&vm_object_lck_grp_attr);
+	lck_grp_init(&vm_object_lck_grp, "vm_object", &vm_object_lck_grp_attr);
+	lck_attr_setdefault(&vm_object_lck_attr);
+	lck_attr_setdefault(&kernel_object_lck_attr);
+	lck_attr_cleardebug(&kernel_object_lck_attr);
+}
+
 
 #define	MIGHT_NOT_CACHE_SHADOWS		1
 #if	MIGHT_NOT_CACHE_SHADOWS
@@ -589,16 +637,83 @@ static int cache_shadows = TRUE;
  *
  *	No object may be locked.
  */
+unsigned long vm_object_deallocate_shared_successes = 0;
+unsigned long vm_object_deallocate_shared_failures = 0;
+unsigned long vm_object_deallocate_shared_swap_failures = 0;
 __private_extern__ void
 vm_object_deallocate(
 	register vm_object_t	object)
 {
-	boolean_t retry_cache_trim = FALSE;
-	vm_object_t shadow = VM_OBJECT_NULL;
+	boolean_t	retry_cache_trim = FALSE;
+	vm_object_t	shadow = VM_OBJECT_NULL;
+	uint32_t	try_failed_count = 0;
 	
 //	if(object)dbgLog(object, object->ref_count, object->can_persist, 3);	/* (TEST/DEBUG) */
 //	else dbgLog(object, 0, 0, 3);	/* (TEST/DEBUG) */
 
+	if (object == VM_OBJECT_NULL)
+	        return;
+
+	if (object == kernel_object) {
+		vm_object_lock(kernel_object);
+		kernel_object->ref_count--;
+		if (kernel_object->ref_count == 0) {
+			panic("vm_object_deallocate: losing kernel_object\n");
+		}
+		vm_object_unlock(kernel_object);
+		return;
+	}
+
+	if (object->ref_count > 2 ||
+	    (!object->named && object->ref_count > 1)) {
+		UInt32		original_ref_count;
+		volatile UInt32	*ref_count_p;
+		Boolean		atomic_swap;
+
+		/*
+		 * The object currently looks like it is not being
+		 * kept alive solely by the reference we're about to release.
+		 * Let's try and release our reference without taking
+		 * all the locks we would need if we had to terminate the
+		 * object (cache lock + exclusive object lock).
+		 * Lock the object "shared" to make sure we don't race with
+		 * anyone holding it "exclusive".
+		 */
+	        vm_object_lock_shared(object);
+		ref_count_p = (volatile UInt32 *) &object->ref_count;
+		original_ref_count = object->ref_count;
+		/*
+		 * Test again as "ref_count" could have changed.
+		 * "named" shouldn't change.
+		 */
+		if (original_ref_count > 2 ||
+		    (!object->named && original_ref_count > 1)) {
+			atomic_swap = OSCompareAndSwap(
+				original_ref_count,
+				original_ref_count - 1,
+				(UInt32 *) &object->ref_count);
+			if (atomic_swap == FALSE) {
+				vm_object_deallocate_shared_swap_failures++;
+			}
+
+		} else {
+			atomic_swap = FALSE;
+		}
+		vm_object_unlock(object);
+
+		if (atomic_swap) {
+			/* ref_count was updated atomically ! */
+			vm_object_deallocate_shared_successes++;
+			return;
+		}
+
+		/*
+		 * Someone else updated the ref_count at the same
+		 * time and we lost the race.  Fall back to the usual
+		 * slow but safe path...
+		 */
+		vm_object_deallocate_shared_failures++;
+	}
 
 	while (object != VM_OBJECT_NULL) {
 
@@ -620,7 +735,9 @@ vm_object_deallocate(
 			if (vm_object_lock_try(object))
 			        break;
 		        vm_object_cache_unlock();
-			mutex_pause();  /* wait a bit */
+			try_failed_count++;
+
+			mutex_pause(try_failed_count);  /* wait a bit */
 		}
 		assert(object->ref_count > 0);
 
@@ -641,6 +758,7 @@ vm_object_deallocate(
 					
 				memory_object_unmap(pager);
 
+				try_failed_count = 0;
 				for (;;) {
 				        vm_object_cache_lock();
 
@@ -654,7 +772,9 @@ vm_object_deallocate(
 					if (vm_object_lock_try(object))
 					        break;
 					vm_object_cache_unlock();
-					mutex_pause();  /* wait a bit */
+					try_failed_count++;
+
+					mutex_pause(try_failed_count);  /* wait a bit */
 				}
 				assert(object->ref_count > 0);
 			}
@@ -673,6 +793,7 @@ vm_object_deallocate(
 		/* terminate again.  */
 
 		if ((object->ref_count > 1) || object->terminating) {
+			vm_object_lock_assert_exclusive(object);
 			object->ref_count--;
 			vm_object_res_deallocate(object);
 			vm_object_cache_unlock();
@@ -738,6 +859,7 @@ vm_object_deallocate(
 			 *	Now it is safe to decrement reference count,
 			 *	and to return if reference count is > 0.
 			 */
+			vm_object_lock_assert_exclusive(object);
 			if (--object->ref_count > 0) {
 				vm_object_res_deallocate(object);
 				vm_object_unlock(object);
@@ -908,6 +1030,7 @@ vm_object_cache_trim(
 
 		assert(object->pager_initialized);
 		assert(object->ref_count == 0);
+		vm_object_lock_assert_exclusive(object);
 		object->ref_count++;
 
 		/*
@@ -931,7 +1054,15 @@ vm_object_cache_trim(
 	}
 }
 
-boolean_t	vm_object_terminate_remove_all = FALSE;
+#define VM_OBJ_TERM_STATS DEBUG
+#if VM_OBJ_TERM_STATS
+uint32_t vm_object_terminate_pages_freed = 0;
+uint32_t vm_object_terminate_pages_removed = 0;
+uint32_t vm_object_terminate_batches = 0;
+uint32_t vm_object_terminate_biggest_batch = 0;
+#endif /* VM_OBJ_TERM_STATS */
+
+#define V_O_T_MAX_BATCH	256
 
 /*
  *	Routine:	vm_object_terminate
@@ -956,14 +1087,72 @@ vm_object_terminate(
 {
 	register vm_page_t	p;
 	vm_object_t		shadow_object;
+	vm_page_t		local_free_q;
+	int			loop_count;
+#if VM_OBJ_TERM_STATS
+	uint32_t		local_free_count;
+	uint32_t		pages_removed;
+#endif /* VM_OBJ_TERM_STATS */
+
+#if VM_OBJ_TERM_STATS
+#define VM_OBJ_TERM_FREELIST_DEBUG(_pages_removed, _local_free_count)	\
+	MACRO_BEGIN							\
+		if (_pages_removed) {					\
+			hw_atomic_add(&vm_object_terminate_batches, 1); \
+			hw_atomic_add(&vm_object_terminate_pages_removed, \
+				      _pages_removed);			\
+			hw_atomic_add(&vm_object_terminate_pages_freed, \
+				      _local_free_count);		\
+			if (_local_free_count >				\
+			    vm_object_terminate_biggest_batch) {	\
+				vm_object_terminate_biggest_batch =	\
+					_local_free_count;		\
+			}						\
+			_local_free_count = 0;				\
+		}							\
+	MACRO_END
+#else /* VM_OBJ_TERM_STATS */
+#define VM_OBJ_TERM_FREELIST_DEBUG(_pages_removed, _local_free_count)
+#endif /* VM_OBJ_TERM_STATS */
+
+#define VM_OBJ_TERM_FREELIST(_pages_removed, _local_free_count, _local_free_q) \
+	MACRO_BEGIN							\
+		VM_OBJ_TERM_FREELIST_DEBUG(_pages_removed, _local_free_count); \
+		if (_local_free_q) {					\
+			vm_page_free_list(_local_free_q);		\
+			_local_free_q = VM_PAGE_NULL;			\
+		}							\
+	MACRO_END
+
+
 
 	XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n",
 		(integer_t)object, object->ref_count, 0, 0, 0);
 
+	local_free_q = VM_PAGE_NULL;
+#if VM_OBJ_TERM_STATS
+	local_free_count = 0;
+	pages_removed = 0;
+#endif /* VM_OBJ_TERM_STATS */
+
 	if (!object->pageout && (!object->temporary || object->can_persist)
 			&& (object->pager != NULL || object->shadow_severed)) {
 	   vm_object_cache_unlock();
+	   loop_count = V_O_T_MAX_BATCH;
+	   vm_page_lock_queues();
 	   while (!queue_empty(&object->memq)) {
+		if (--loop_count == 0) {
+			/*
+			 * Free the pages we've reclaimed so far and
+			 * take a little break to avoid hogging
+			 * the page queues lock too long.
+			 */
+			VM_OBJ_TERM_FREELIST(pages_removed,
+					     local_free_count,
+					     local_free_q);
+			mutex_yield(&vm_page_queue_lock);
+			loop_count = V_O_T_MAX_BATCH;
+		}
 		/*
 		 * Clear pager_trusted bit so that the pages get yanked
 		 * out of the object instead of cleaned in place.  This
@@ -977,17 +1166,24 @@ vm_object_terminate(
 
 		if (p->busy || p->cleaning) {
 			if(p->cleaning || p->absent) {
+				/* free the pages reclaimed so far */
+				VM_OBJ_TERM_FREELIST(pages_removed,
+						     local_free_count,
+						     local_free_q);
+				vm_page_unlock_queues();
 				vm_object_paging_wait(object, THREAD_UNINT);
+				vm_page_lock_queues();
 				continue;
 			} else {
-			   panic("vm_object_terminate.3 0x%x 0x%x", object, p);
+				panic("vm_object_terminate.3 %p %p", object, p);
 			}
 		}
 
-		vm_page_lock_queues();
 		p->busy = TRUE;
 		VM_PAGE_QUEUES_REMOVE(p);
-		vm_page_unlock_queues();
+#if VM_OBJ_TERM_STATS
+		pages_removed++;
+#endif /* VM_OBJ_TERM_STATS */
 
 		if (p->absent || p->private) {
 
@@ -1002,23 +1198,50 @@ vm_object_terminate(
 			goto free_page;
 		}
 
-		if (p->fictitious)
-			panic("vm_object_terminate.4 0x%x 0x%x", object, p);
+		if (p->fictitious) {
+			if (p->phys_page == vm_page_guard_addr) {
+				goto free_page;
+			}
+			panic("vm_object_terminate.4 %p %p", object, p);
+		}
 
-		if (!p->dirty)
+		if (!p->dirty && p->wpmapped)
 			p->dirty = pmap_is_modified(p->phys_page);
 
 		if ((p->dirty || p->precious) && !p->error && object->alive) {
+			/* free the pages reclaimed so far */
+			VM_OBJ_TERM_FREELIST(pages_removed,
+					     local_free_count,
+					     local_free_q);
+			vm_page_unlock_queues();
 			vm_pageout_cluster(p); /* flush page */
 			vm_object_paging_wait(object, THREAD_UNINT);
 			XPR(XPR_VM_OBJECT,
 			    "vm_object_terminate restart, object 0x%X ref %d\n",
 			    (integer_t)object, object->ref_count, 0, 0, 0);
+			vm_page_lock_queues();
 		} else {
 		    free_page:
-		    	VM_PAGE_FREE(p);
+			/*
+			 * Add this page to our list of reclaimed pages,
+			 * to be freed later.
+			 */
+			vm_page_free_prepare(p);
+			p->pageq.next = (queue_entry_t) local_free_q;
+			local_free_q = p;
+#if VM_OBJ_TERM_STATS
+			local_free_count++;
+#endif /* VM_OBJ_TERM_STATS */
 		}
 	   }
+
+	   /*
+	    * Free the remaining reclaimed pages.
+	    */
+	   VM_OBJ_TERM_FREELIST(pages_removed,
+				local_free_count,
+				local_free_q);
+	   vm_page_unlock_queues();
 	   vm_object_unlock(object);
 	   vm_object_cache_lock();
 	   vm_object_lock(object);
@@ -1028,7 +1251,8 @@ vm_object_terminate(
 	 *	Make sure the object isn't already being terminated
 	 */
 	if(object->terminating) {
-		object->ref_count -= 1;
+		vm_object_lock_assert_exclusive(object);
+		object->ref_count--;
 		assert(object->ref_count > 0);
 		vm_object_cache_unlock();
 		vm_object_unlock(object);
@@ -1040,7 +1264,8 @@ vm_object_terminate(
 	 * cleaning it?
 	 */
 	if(object->ref_count != 1) {
-		object->ref_count -= 1;
+		vm_object_lock_assert_exclusive(object);
+		object->ref_count--;
 		assert(object->ref_count > 0);
 		vm_object_res_deallocate(object);
 		vm_object_cache_unlock();
@@ -1069,7 +1294,7 @@ vm_object_terminate(
 		vm_object_unlock(shadow_object);
 	}
 
-	if (FALSE && object->paging_in_progress != 0) {
+	if (object->paging_in_progress != 0) {
 		/*
 		 * There are still some paging_in_progress references
 		 * on this object, meaning that there are some paging
@@ -1094,7 +1319,14 @@ vm_object_terminate(
 		vm_object_reap_async(object);
 		vm_object_cache_unlock();
 		vm_object_unlock(object);
-		return KERN_SUCCESS;
+		/*
+		 * Return KERN_FAILURE to let the caller know that we
+		 * haven't completed the termination and it can't drop this
+		 * object's reference on its shadow object yet.
+		 * The reaper thread will take care of that once it has
+		 * completed this object's termination.
+		 */
+		return KERN_FAILURE;
 	}
 
 	/* complete the VM object termination */
@@ -1102,6 +1334,14 @@ vm_object_terminate(
 	object = VM_OBJECT_NULL;
 	/* cache lock and object lock were released by vm_object_reap() */
 
+	/*
+	 * KERN_SUCCESS means that this object has been terminated
+	 * and no longer needs its shadow object but still holds a
+	 * reference on it.
+	 * The caller is responsible for dropping that reference.
+	 * We can't call vm_object_deallocate() here because that
+	 * would create a recursion.
+	 */
 	return KERN_SUCCESS;
 }
 
@@ -1120,32 +1360,25 @@ vm_object_reap(
 {
 	memory_object_t		pager;
 	vm_page_t		p;
+	vm_page_t		local_free_q;
+	int			loop_count;
+#if VM_OBJ_TERM_STATS
+	uint32_t		local_free_count;
+#endif /* VM_OBJ_TERM_STATS */
 
 #if DEBUG
 	mutex_assert(&vm_object_cached_lock_data, MA_OWNED);
-	mutex_assert(&object->Lock, MA_OWNED);
 #endif /* DEBUG */
+	vm_object_lock_assert_exclusive(object);
+	assert(object->paging_in_progress == 0);
 
 	vm_object_reap_count++;
 
-	/*
-	 * The pageout daemon might be playing with our pages.
-	 * Now that the object is dead, it won't touch any more
-	 * pages, but some pages might already be on their way out.
-	 * Hence, we wait until the active paging activities have
-	 * ceased before we break the association with the pager
-	 * itself.
-	 */
-	while (object->paging_in_progress != 0) {
-		vm_object_cache_unlock();
-		vm_object_wait(object,
-			       VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
-			       THREAD_UNINT);
-		vm_object_cache_lock();
-		vm_object_lock(object);
-	}
+	local_free_q = VM_PAGE_NULL;
+#if VM_OBJ_TERM_STATS
+	local_free_count = 0;
+#endif /* VM_OBJ_TERM_STATS */
 
-	assert(object->paging_in_progress == 0);
 	pager = object->pager;
 	object->pager = MEMORY_OBJECT_NULL;
 
@@ -1153,6 +1386,7 @@ vm_object_reap(
 		memory_object_control_disable(object->pager_control);
 	vm_object_cache_unlock();
 
+	vm_object_lock_assert_exclusive(object);
 	object->ref_count--;
 #if	TASK_SWAPPER
 	assert(object->res_count == 0);
@@ -1160,6 +1394,19 @@ vm_object_reap(
 
 	assert (object->ref_count == 0);
 
+	/* remove from purgeable queue if it's on */
+	if (object->objq.next || object->objq.prev) {
+	        purgeable_q_t queue = vm_purgeable_object_remove(object);
+		assert(queue);
+
+		/* Must take page lock for this - using it to protect token queue */
+		vm_page_lock_queues();
+		vm_purgeable_token_delete_first(queue);
+        
+		assert(queue->debug_count_objects>=0);
+		vm_page_unlock_queues();
+	}
+    
 	/*
 	 *	Clean or free the pages, as appropriate.
 	 *	It is possible for us to find busy/absent pages,
@@ -1172,12 +1419,39 @@ vm_object_reap(
 
 	} else if ((object->temporary && !object->can_persist) ||
 		   (pager == MEMORY_OBJECT_NULL)) {
+		loop_count = V_O_T_MAX_BATCH;
+		vm_page_lock_queues();
 		while (!queue_empty(&object->memq)) {
+			if (--loop_count == 0) {
+				/*
+				 * Free the pages we reclaimed so far
+				 * and take a little break to avoid
+				 * hogging the page queue lock too long
+				 */
+				VM_OBJ_TERM_FREELIST(local_free_count,
+						     local_free_count,
+						     local_free_q);
+				mutex_yield(&vm_page_queue_lock);
+				loop_count = V_O_T_MAX_BATCH;
+			}
 			p = (vm_page_t) queue_first(&object->memq);
 
-			VM_PAGE_CHECK(p);
-			VM_PAGE_FREE(p);
+			vm_page_free_prepare(p);
+
+			assert(p->pageq.next == NULL && p->pageq.prev == NULL);
+			p->pageq.next = (queue_entry_t) local_free_q;
+			local_free_q = p;
+#if VM_OBJ_TERM_STATS
+			local_free_count++;
+#endif /* VM_OBJ_TERM_STATS */
 		}
+		/*
+		 * Free the remaining reclaimed pages
+		 */
+		VM_OBJ_TERM_FREELIST(local_free_count,
+				     local_free_count,
+				     local_free_q);
+		vm_page_unlock_queues();
 	} else if (!queue_empty(&object->memq)) {
 		panic("vm_object_reap: queue just emptied isn't");
 	}
@@ -1206,6 +1480,9 @@ vm_object_reap(
 	vm_external_destroy(object->existence_map, object->size);
 #endif	/* MACH_PAGEMAP */
 
+	object->shadow = VM_OBJECT_NULL;
+
+	vm_object_lock_destroy(object);
 	/*
 	 *	Free the space for the object.
 	 */
@@ -1219,8 +1496,8 @@ vm_object_reap_async(
 {
 #if DEBUG
 	mutex_assert(&vm_object_cached_lock_data, MA_OWNED);
-	mutex_assert(&object->Lock, MA_OWNED);
 #endif /* DEBUG */
+	vm_object_lock_assert_exclusive(object);
 
 	vm_object_reap_count_async++;
 
@@ -1234,7 +1511,7 @@ vm_object_reap_async(
 void
 vm_object_reaper_thread(void)
 {
-	vm_object_t	object;
+	vm_object_t	object, shadow_object;
 
 	vm_object_cache_lock();
 
@@ -1247,10 +1524,39 @@ vm_object_reaper_thread(void)
 		assert(object->terminating);
 		assert(!object->alive);
 		
+		/*
+		 * The pageout daemon might be playing with our pages.
+		 * Now that the object is dead, it won't touch any more
+		 * pages, but some pages might already be on their way out.
+		 * Hence, we wait until the active paging activities have
+		 * ceased before we break the association with the pager
+		 * itself.
+		 */
+		while (object->paging_in_progress != 0) {
+			vm_object_cache_unlock();
+			vm_object_wait(object,
+				       VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
+				       THREAD_UNINT);
+			vm_object_cache_lock();
+			vm_object_lock(object);
+		}
+
+		shadow_object =
+			object->pageout ? VM_OBJECT_NULL : object->shadow;
+
 		vm_object_reap(object);
 		/* cache is unlocked and object is no longer valid */
 		object = VM_OBJECT_NULL;
 
+		if (shadow_object != VM_OBJECT_NULL) {
+			/*
+			 * Drop the reference "object" was holding on
+			 * its shadow object.
+			 */
+			vm_object_deallocate(shadow_object);
+			shadow_object = VM_OBJECT_NULL;
+		}
+
 		vm_object_cache_lock();
 	}
 
@@ -1390,6 +1696,11 @@ vm_object_destroy(
 	return(KERN_SUCCESS);
 }
 
+#define VM_OBJ_DEACT_ALL_STATS DEBUG
+#if VM_OBJ_DEACT_ALL_STATS
+uint32_t vm_object_deactivate_all_pages_batches = 0;
+uint32_t vm_object_deactivate_all_pages_pages = 0;
+#endif /* VM_OBJ_DEACT_ALL_STATS */
 /*
  *	vm_object_deactivate_pages
  *
@@ -1403,13 +1714,45 @@ vm_object_deactivate_all_pages(
 	register vm_object_t	object)
 {
 	register vm_page_t	p;
-
+	int			loop_count;
+#if VM_OBJ_DEACT_ALL_STATS
+	int			pages_count;
+#endif /* VM_OBJ_DEACT_ALL_STATS */
+#define V_O_D_A_P_MAX_BATCH	256
+
+	loop_count = V_O_D_A_P_MAX_BATCH;
+#if VM_OBJ_DEACT_ALL_STATS
+	pages_count = 0;
+#endif /* VM_OBJ_DEACT_ALL_STATS */
+	vm_page_lock_queues();
 	queue_iterate(&object->memq, p, vm_page_t, listq) {
-		vm_page_lock_queues();
-		if (!p->busy)
+		if (--loop_count == 0) {
+#if VM_OBJ_DEACT_ALL_STATS
+			hw_atomic_add(&vm_object_deactivate_all_pages_batches,
+				      1);
+			hw_atomic_add(&vm_object_deactivate_all_pages_pages,
+				      pages_count);
+			pages_count = 0;
+#endif /* VM_OBJ_DEACT_ALL_STATS */
+			mutex_yield(&vm_page_queue_lock);
+			loop_count = V_O_D_A_P_MAX_BATCH;
+		}
+		if (!p->busy && !p->throttled) {
+#if VM_OBJ_DEACT_ALL_STATS
+			pages_count++;
+#endif /* VM_OBJ_DEACT_ALL_STATS */
 			vm_page_deactivate(p);
-		vm_page_unlock_queues();
+		}
+	}
+#if VM_OBJ_DEACT_ALL_STATS
+	if (pages_count) {
+		hw_atomic_add(&vm_object_deactivate_all_pages_batches, 1);
+		hw_atomic_add(&vm_object_deactivate_all_pages_pages,
+			      pages_count);
+		pages_count = 0;
 	}
+#endif /* VM_OBJ_DEACT_ALL_STATS */
+	vm_page_unlock_queues();
 }
 
 __private_extern__ void
@@ -1455,29 +1798,40 @@ vm_object_deactivate_pages(
 				        	m->precious = FALSE;
 					        m->dirty = FALSE;
 						pmap_clear_modify(m->phys_page);
+#if	MACH_PAGEMAP
 						vm_external_state_clr(object->existence_map, offset);
+#endif	/* MACH_PAGEMAP */
 					}
-					VM_PAGE_QUEUES_REMOVE(m);
 
-					assert(!m->laundry);
-					assert(m->object != kernel_object);
-					assert(m->pageq.next == NULL &&
-					       m->pageq.prev == NULL);
-					if(m->zero_fill) {
-						queue_enter_first(
+					if (!m->throttled) {
+						VM_PAGE_QUEUES_REMOVE(m);
+
+						assert(!m->laundry);
+						assert(m->object != kernel_object);
+						assert(m->pageq.next == NULL &&
+						       m->pageq.prev == NULL);
+					
+						if(m->zero_fill) {
+							queue_enter_first(
 							&vm_page_queue_zf, 
 							m, vm_page_t, pageq);
-					} else {
-						queue_enter_first(
-							&vm_page_queue_inactive, 
-							m, vm_page_t, pageq);
-					}
+							vm_zf_queue_count++;
+						} else {
+						        queue_enter_first(
+									  &vm_page_queue_inactive, 
+									  m, vm_page_t, pageq);
+						}
 
-					m->inactive = TRUE;
-					if (!m->fictitious)  
-					        vm_page_inactive_count++;
+						m->inactive = TRUE;
+						if (!m->fictitious) {
+							vm_page_inactive_count++;
+							token_new_pagecount++;
+						} else {
+							assert(m->phys_page == vm_page_fictitious_addr);
+						}
 
-					pages_moved++;
+						pages_moved++;
+					}
 				}
 			}
 		}
@@ -1544,6 +1898,28 @@ vm_object_pmap_protect(
 
 	vm_object_lock(object);
 
+	if (object->phys_contiguous) {
+		if (pmap != NULL) {
+			vm_object_unlock(object);
+			pmap_protect(pmap, pmap_start, pmap_start + size, prot);
+		} else {
+			vm_object_offset_t phys_start, phys_end, phys_addr;
+
+			phys_start = object->shadow_offset + offset;
+			phys_end = phys_start + size;
+			assert(phys_start <= phys_end);
+			assert(phys_end <= object->shadow_offset + object->size);
+			vm_object_unlock(object);
+
+			for (phys_addr = phys_start;
+			     phys_addr < phys_end;
+			     phys_addr += PAGE_SIZE_64) {
+				pmap_page_protect(phys_addr >> 12, prot);
+			}
+		}
+		return;
+	}
+
 	assert(object->internal);
 
 	while (TRUE) {
@@ -1577,8 +1953,7 @@ vm_object_pmap_protect(
 		    if (!p->fictitious &&
 			(offset <= p->offset) && (p->offset < end)) {
 
-			    pmap_page_protect(p->phys_page,
-					      prot & ~p->page_lock);
+		        pmap_page_protect(p->phys_page, prot);
 		    }
 		  }
 		}
@@ -1607,8 +1982,7 @@ vm_object_pmap_protect(
 				target_off < end; target_off += PAGE_SIZE) {
 				p = vm_page_lookup(object, target_off);
 				if (p != VM_PAGE_NULL) {
-		    			pmap_page_protect(p->phys_page,
-						      prot & ~p->page_lock);
+				        pmap_page_protect(p->phys_page, prot);
 				}
 		    	}
 		}
@@ -1688,8 +2062,7 @@ vm_object_copy_slowly(
 	vm_object_t		new_object;
 	vm_object_offset_t	new_offset;
 
-	vm_object_offset_t	src_lo_offset = src_offset;
-	vm_object_offset_t	src_hi_offset = src_offset + size;
+	struct vm_object_fault_info fault_info;
 
 	XPR(XPR_VM_OBJECT, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
 	    src_object, src_offset, size, 0, 0);
@@ -1704,9 +2077,7 @@ vm_object_copy_slowly(
 	 *	Prevent destruction of the source object while we copy.
 	 */
 
-	assert(src_object->ref_count > 0);
-	src_object->ref_count++;
-	VM_OBJ_RES_INCR(src_object);
+	vm_object_reference_locked(src_object);
 	vm_object_unlock(src_object);
 
 	/*
@@ -1720,10 +2091,16 @@ vm_object_copy_slowly(
 
 	new_object = vm_object_allocate(size);
 	new_offset = 0;
-	vm_object_lock(new_object);
 
 	assert(size == trunc_page_64(size));	/* Will the loop terminate? */
 
+	fault_info.interruptible = interruptible;
+	fault_info.behavior  = VM_BEHAVIOR_SEQUENTIAL;
+	fault_info.user_tag  = 0;
+	fault_info.lo_offset = src_offset;
+	fault_info.hi_offset = src_offset + size;
+	fault_info.no_cache  = FALSE;
+
 	for ( ;
 	    size != 0 ;
 	    src_offset += PAGE_SIZE_64, 
@@ -1732,16 +2109,22 @@ vm_object_copy_slowly(
 		vm_page_t	new_page;
 		vm_fault_return_t result;
 
+		vm_object_lock(new_object);
+
 		while ((new_page = vm_page_alloc(new_object, new_offset))
 				== VM_PAGE_NULL) {
+
+			vm_object_unlock(new_object);
+
 			if (!vm_page_wait(interruptible)) {
-			        vm_object_unlock(new_object);
 				vm_object_deallocate(new_object);
 				vm_object_deallocate(src_object);
 				*_result_object = VM_OBJECT_NULL;
 				return(MACH_SEND_INTERRUPTED);
 			}
+			vm_object_lock(new_object);
 		}
+		vm_object_unlock(new_object);
 
 		do {
 			vm_prot_t	prot = VM_PROT_READ;
@@ -1754,14 +2137,14 @@ vm_object_copy_slowly(
 			vm_object_lock(src_object);
 			vm_object_paging_begin(src_object);
 
+			fault_info.cluster_size = size;
+
 			XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
 			result = vm_fault_page(src_object, src_offset,
-				VM_PROT_READ, FALSE, interruptible,
-				src_lo_offset, src_hi_offset,
-				VM_BEHAVIOR_SEQUENTIAL,
+				VM_PROT_READ, FALSE,
 				&prot, &_result_page, &top_page,
 			        (int *)0,
-				&error_code, FALSE, FALSE, NULL, 0);
+				&error_code, FALSE, FALSE, &fault_info);
 
 			switch(result) {
 				case VM_FAULT_SUCCESS:
@@ -1788,15 +2171,18 @@ vm_object_copy_slowly(
 					 *	Let go of both pages (make them
 					 *	not busy, perform wakeup, activate).
 					 */
-
-					new_page->busy = FALSE;
+					vm_object_lock(new_object);
 					new_page->dirty = TRUE;
+					PAGE_WAKEUP_DONE(new_page);
+					vm_object_unlock(new_object);
+
 					vm_object_lock(result_page->object);
 					PAGE_WAKEUP_DONE(result_page);
 
-					vm_page_lock_queues();
+					vm_page_lockspin_queues();
 					if (!result_page->active &&
-					    !result_page->inactive)
+					    !result_page->inactive &&
+					    !result_page->throttled)
 						vm_page_activate(result_page);
 					vm_page_activate(new_page);
 					vm_page_unlock_queues();
@@ -1825,7 +2211,6 @@ vm_object_copy_slowly(
 
 				case VM_FAULT_INTERRUPTED:
 					vm_page_free(new_page);
-					vm_object_unlock(new_object);
 					vm_object_deallocate(new_object);
 					vm_object_deallocate(src_object);
 					*_result_object = VM_OBJECT_NULL;
@@ -1843,7 +2228,7 @@ vm_object_copy_slowly(
 					vm_page_lock_queues();
 					vm_page_free(new_page);
 					vm_page_unlock_queues();
-					vm_object_unlock(new_object);
+
 					vm_object_deallocate(new_object);
 					vm_object_deallocate(src_object);
 					*_result_object = VM_OBJECT_NULL;
@@ -1856,8 +2241,6 @@ vm_object_copy_slowly(
 	/*
 	 *	Lose the extra reference, and return our object.
 	 */
-
-        vm_object_unlock(new_object);
 	vm_object_deallocate(src_object);
 	*_result_object = new_object;
 	return(KERN_SUCCESS);
@@ -1913,9 +2296,7 @@ vm_object_copy_quickly(
 		 *	Leave object/offset unchanged.
 		 */
 
-		assert(object->ref_count > 0);
-		object->ref_count++;
-		vm_object_res_reference(object);
+		vm_object_reference_locked(object);
 		object->shadowed = TRUE;
 		vm_object_unlock(object);
 
@@ -1973,6 +2354,7 @@ vm_object_copy_call(
 	kern_return_t	kr;
 	vm_object_t	copy;
 	boolean_t	check_ready = FALSE;
+	uint32_t	try_failed_count = 0;
 
 	/*
 	 *	If a copy is already in progress, wait and retry.
@@ -2032,7 +2414,10 @@ Retry:
 	copy = src_object->copy;
 	if (!vm_object_lock_try(copy)) {
 		vm_object_unlock(src_object);
-		mutex_pause();	/* wait a bit */
+
+		try_failed_count++;
+		mutex_pause(try_failed_count);	/* wait a bit */
+
 		vm_object_lock(src_object);
 		goto Retry;
 	}
@@ -2084,13 +2469,15 @@ __private_extern__ vm_object_t
 vm_object_copy_delayed(
 	vm_object_t		src_object,
 	vm_object_offset_t	src_offset,
-	vm_object_size_t	size)
+	vm_object_size_t	size,
+	boolean_t		src_object_shared)
 {
 	vm_object_t		new_copy = VM_OBJECT_NULL;
 	vm_object_t		old_copy;
 	vm_page_t		p;
 	vm_object_size_t	copy_size = src_offset + size;
 
+
 	int collisions = 0;
 	/*
 	 *	The user-level memory manager wants to see all of the changes
@@ -2136,9 +2523,15 @@ vm_object_copy_delayed(
 	/*
 	 * Wait for paging in progress.
 	 */
-	if (!src_object->true_share)
+	if (!src_object->true_share && src_object->paging_in_progress) {
+	        if (src_object_shared == TRUE) {
+		        vm_object_unlock(src_object);
+				
+			vm_object_lock(src_object);
+			src_object_shared = FALSE;
+		}
 		vm_object_paging_wait(src_object, THREAD_UNINT);
-
+	}
 	/*
 	 *	See whether we can reuse the result of a previous
 	 *	copy operation.
@@ -2146,22 +2539,34 @@ vm_object_copy_delayed(
 
 	old_copy = src_object->copy;
 	if (old_copy != VM_OBJECT_NULL) {
+	        int lock_granted;
+
 		/*
 		 *	Try to get the locks (out of order)
 		 */
-		if (!vm_object_lock_try(old_copy)) {
+		if (src_object_shared == TRUE)
+		        lock_granted = vm_object_lock_try_shared(old_copy);
+		else
+		        lock_granted = vm_object_lock_try(old_copy);
+
+		if (!lock_granted) {
 			vm_object_unlock(src_object);
-			mutex_pause();
 
-			/* Heisenberg Rules */
-			copy_delayed_lock_collisions++;
 			if (collisions++ == 0)
 				copy_delayed_lock_contention++;
+			mutex_pause(collisions);
+
+			/* Heisenberg Rules */
+			copy_delayed_lock_collisions++;
 
 			if (collisions > copy_delayed_max_collisions)
 				copy_delayed_max_collisions = collisions;
 
-			vm_object_lock(src_object);
+			if (src_object_shared == TRUE)
+			        vm_object_lock_shared(src_object);
+			else
+			        vm_object_lock(src_object);
+
 			goto Retry;
 		}
 
@@ -2182,6 +2587,14 @@ vm_object_copy_delayed(
 			 */
 
 			if (old_copy->size < copy_size) {
+			        if (src_object_shared == TRUE) {
+				        vm_object_unlock(old_copy);
+					vm_object_unlock(src_object);
+				
+					vm_object_lock(src_object);
+					src_object_shared = FALSE;
+					goto Retry;
+				}
 				/*
 				 * We can't perform a delayed copy if any of the
 				 * pages in the extended range are wired (because
@@ -2190,6 +2603,7 @@ vm_object_copy_delayed(
 				 * go ahead and protect them.
 				 */
 				copy_delayed_protect_iterate++;
+
 				queue_iterate(&src_object->memq, p, vm_page_t, listq) {
 					if (!p->fictitious && 
 					    p->offset >= old_copy->size && 
@@ -2206,15 +2620,16 @@ vm_object_copy_delayed(
 							return VM_OBJECT_NULL;
 						} else {
 							pmap_page_protect(p->phys_page, 
-								(VM_PROT_ALL & ~VM_PROT_WRITE &
-								 ~p->page_lock));
+									  (VM_PROT_ALL & ~VM_PROT_WRITE));
 						}
 					}
 				}
 				old_copy->size = copy_size;
 			}
-				
-			vm_object_reference_locked(old_copy);
+			if (src_object_shared == TRUE)
+			        vm_object_reference_shared(old_copy);
+			else
+			        vm_object_reference_locked(old_copy);
 			vm_object_unlock(old_copy);
 			vm_object_unlock(src_object);
 
@@ -2222,9 +2637,10 @@ vm_object_copy_delayed(
 				vm_object_unlock(new_copy);
 				vm_object_deallocate(new_copy);
 			}
-
 			return(old_copy);
 		}
+		
+		
 
 		/*
 		 * Adjust the size argument so that the newly-created 
@@ -2240,6 +2656,8 @@ vm_object_copy_delayed(
 			new_copy = vm_object_allocate(copy_size);
 			vm_object_lock(src_object);
 			vm_object_lock(new_copy);
+
+			src_object_shared = FALSE;
 			goto Retry;
 		}
 		new_copy->size = copy_size;	
@@ -2259,6 +2677,8 @@ vm_object_copy_delayed(
 		new_copy = vm_object_allocate(copy_size);
 		vm_object_lock(src_object);
 		vm_object_lock(new_copy);
+
+		src_object_shared = FALSE;
 		goto Retry;
 	}
 
@@ -2274,6 +2694,7 @@ vm_object_copy_delayed(
 	 * wired, then go ahead and protect them.
 	 */
 	copy_delayed_protect_iterate++;
+
 	queue_iterate(&src_object->memq, p, vm_page_t, listq) {
 		if (!p->fictitious && p->offset < copy_size) {
 			if (p->wire_count > 0) {
@@ -2285,12 +2706,10 @@ vm_object_copy_delayed(
 				return VM_OBJECT_NULL;
 			} else {
 				pmap_page_protect(p->phys_page, 
-					(VM_PROT_ALL & ~VM_PROT_WRITE &
-					 ~p->page_lock));
+						  (VM_PROT_ALL & ~VM_PROT_WRITE));
 			}
 		}
 	}
-
 	if (old_copy != VM_OBJECT_NULL) {
 		/*
 		 *	Make the old copy-object shadow the new one.
@@ -2298,9 +2717,13 @@ vm_object_copy_delayed(
 		 *	object.
 		 */
 
-		src_object->ref_count--;	/* remove ref. from old_copy */
+		/* remove ref. from old_copy */
+		vm_object_lock_assert_exclusive(src_object);
+		src_object->ref_count--;
 		assert(src_object->ref_count > 0);
+		vm_object_lock_assert_exclusive(old_copy);
 		old_copy->shadow = new_copy;
+		vm_object_lock_assert_exclusive(new_copy);
 		assert(new_copy->ref_count > 0);
 		new_copy->ref_count++;		/* for old_copy->shadow ref. */
 
@@ -2317,12 +2740,13 @@ vm_object_copy_delayed(
 	/*
 	 *	Point the new copy at the existing object.
 	 */
+	vm_object_lock_assert_exclusive(new_copy);
 	new_copy->shadow = src_object;
 	new_copy->shadow_offset = 0;
 	new_copy->shadowed = TRUE;	/* caller must set needs_copy */
-	assert(src_object->ref_count > 0);
-	src_object->ref_count++;
-	VM_OBJ_RES_INCR(src_object);
+
+	vm_object_lock_assert_exclusive(src_object);
+	vm_object_reference_locked(src_object);
 	src_object->copy = new_copy;
 	vm_object_unlock(src_object);
 	vm_object_unlock(new_copy);
@@ -2331,7 +2755,7 @@ vm_object_copy_delayed(
 		"vm_object_copy_delayed: used copy object %X for source %X\n",
 		(integer_t)new_copy, (integer_t)src_object, 0, 0, 0);
 
-	return(new_copy);
+	return new_copy;
 }
 
 /*
@@ -2353,11 +2777,18 @@ vm_object_copy_strategically(
 {
 	boolean_t	result;
 	boolean_t	interruptible = THREAD_ABORTSAFE; /* XXX */
+	boolean_t	object_lock_shared = FALSE;
 	memory_object_copy_strategy_t copy_strategy;
 
 	assert(src_object != VM_OBJECT_NULL);
 
-	vm_object_lock(src_object);
+	copy_strategy = src_object->copy_strategy;
+
+	if (copy_strategy == MEMORY_OBJECT_COPY_DELAY) {
+	        vm_object_lock_shared(src_object);
+		object_lock_shared = TRUE;
+	} else
+	        vm_object_lock(src_object);
 
 	/*
 	 *	The copy strategy is only valid if the memory manager
@@ -2367,6 +2798,12 @@ vm_object_copy_strategically(
 	while (!src_object->internal && !src_object->pager_ready) {
 		wait_result_t wait_result;
 
+		if (object_lock_shared == TRUE) {
+		        vm_object_unlock(src_object);
+			vm_object_lock(src_object);
+			object_lock_shared = FALSE;
+			continue;
+		}
 		wait_result = vm_object_sleep(	src_object,
 						VM_OBJECT_EVENT_PAGER_READY,
 						interruptible);
@@ -2379,8 +2816,6 @@ vm_object_copy_strategically(
 		}
 	}
 
-	copy_strategy = src_object->copy_strategy;
-
 	/*
 	 *	Use the appropriate copy strategy.
 	 */
@@ -2388,7 +2823,7 @@ vm_object_copy_strategically(
 	switch (copy_strategy) {
 	    case MEMORY_OBJECT_COPY_DELAY:
 		*dst_object = vm_object_copy_delayed(src_object,
-						     src_offset, size);
+						     src_offset, size, object_lock_shared);
 		if (*dst_object != VM_OBJECT_NULL) {
 			*dst_offset = src_offset;
 			*dst_needs_copy = TRUE;
@@ -2451,7 +2886,22 @@ vm_object_shadow(
 	register vm_object_t	result;
 
 	source = *object;
+#if 0
+	/*
+	 * XXX FBDP
+	 * This assertion is valid but it gets triggered by Rosetta for example
+	 * due to a combination of vm_remap() that changes a VM object's
+	 * copy_strategy from SYMMETRIC to DELAY and vm_protect(VM_PROT_COPY)
+	 * that then sets "needs_copy" on its map entry.  This creates a
+	 * mapping situation that VM should never see and doesn't know how to
+	 * handle.
+	 * It's not clear if this can create any real problem but we should
+	 * look into fixing this, probably by having vm_protect(VM_PROT_COPY)
+	 * do more than just set "needs_copy" to handle the copy-on-write...
+	 * In the meantime, let's disable the assertion.
+	 */
 	assert(source->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
+#endif
 
 	/*
 	 *	Determine if we really need a shadow.
@@ -2563,175 +3013,19 @@ vm_object_shadow(
  *	Because the pager field may be cleared spontaneously, it
  *	cannot be used to determine whether a memory object has
  *	ever been associated with a particular vm_object.  [This
- *	knowledge is important to the shadow object mechanism.]
- *	For this reason, an additional "created" attribute is
- *	provided.
- *
- *	During various paging operations, the pager reference found in the
- *	vm_object must be valid.  To prevent this from being released,
- *	(other than being removed, i.e., made null), routines may use
- *	the vm_object_paging_begin/end routines [actually, macros].
- *	The implementation uses the "paging_in_progress" and "wanted" fields.
- *	[Operations that alter the validity of the pager values include the
- *	termination routines and vm_object_collapse.]
- */
-
-#if 0
-static void		vm_object_abort_activity(
-				vm_object_t	object);
-
-/*
- *	Routine:	vm_object_abort_activity [internal use only]
- *	Purpose:
- *		Abort paging requests pending on this object.
- *	In/out conditions:
- *		The object is locked on entry and exit.
- */
-static void
-vm_object_abort_activity(
-	vm_object_t	object)
-{
-	register
-	vm_page_t	p;
-	vm_page_t	next;
-
-	XPR(XPR_VM_OBJECT, "vm_object_abort_activity, object 0x%X\n",
-		(integer_t)object, 0, 0, 0, 0);
-
-	/*
-	 *	Abort all activity that would be waiting
-	 *	for a result on this memory object.
-	 *
-	 *	We could also choose to destroy all pages
-	 *	that we have in memory for this object, but
-	 *	we don't.
-	 */
-
-	p = (vm_page_t) queue_first(&object->memq);
-	while (!queue_end(&object->memq, (queue_entry_t) p)) {
-		next = (vm_page_t) queue_next(&p->listq);
-
-		/*
-		 *	If it's being paged in, destroy it.
-		 *	If an unlock has been requested, start it again.
-		 */
-
-		if (p->busy && p->absent) {
-			VM_PAGE_FREE(p);
-		}
-		 else {
-		 	if (p->unlock_request != VM_PROT_NONE)
-			 	p->unlock_request = VM_PROT_NONE;
-			PAGE_WAKEUP(p);
-		}
-		
-		p = next;
-	}
-
-	/*
-	 *	Wake up threads waiting for the memory object to
-	 *	become ready.
-	 */
-
-	object->pager_ready = TRUE;
-	vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
-}
-
-/*
- *	Routine:	vm_object_pager_dead
- *
- *	Purpose:
- *		A port is being destroy, and the IPC kobject code
- *		can't tell if it represents a pager port or not.
- *		So this function is called each time it sees a port
- *		die.
- *		THIS IS HORRIBLY INEFFICIENT.  We should only call
- *		this routine if we had requested a notification on
- *		the port.
- */
-
-__private_extern__ void
-vm_object_pager_dead(
-	ipc_port_t	pager)
-{
-	vm_object_t		object;
-	vm_object_hash_entry_t	entry;
-
-	/*
-	 *	Perform essentially the same operations as in vm_object_lookup,
-	 *	except that this time we look up based on the memory_object
-	 *	port, not the control port.
-	 */
-	vm_object_cache_lock();
-	entry = vm_object_hash_lookup(pager, FALSE);
-	if (entry == VM_OBJECT_HASH_ENTRY_NULL ||
-	    		entry->object == VM_OBJECT_NULL) {
-		vm_object_cache_unlock();
-		return;
-	}
-
-	object = entry->object;
-	entry->object = VM_OBJECT_NULL;
-
-	vm_object_lock(object);
-	if (object->ref_count == 0) {
-		XPR(XPR_VM_OBJECT_CACHE,
-		   "vm_object_destroy: removing %x from cache, head (%x, %x)\n",
-			(integer_t)object,
-			(integer_t)vm_object_cached_list.next,
-			(integer_t)vm_object_cached_list.prev, 0,0);
-
-		queue_remove(&vm_object_cached_list, object,
-				vm_object_t, cached_list);
-		vm_object_cached_count--;
-	}
-	object->ref_count++;
-	vm_object_res_reference(object);
-
-	object->can_persist = FALSE;
-
-	assert(object->pager == pager);
-
-	/*
-	 *	Remove the pager association.
-	 *
-	 *	Note that the memory_object itself is dead, so
-	 *	we don't bother with it.
-	 */
-
-	object->pager = MEMORY_OBJECT_NULL;
-
-	vm_object_unlock(object);
-	vm_object_cache_unlock();
-
-	vm_object_pager_wakeup(pager);
-
-	/*
-	 *	Release the pager reference.  Note that there's no
-	 *	point in trying the memory_object_terminate call
-	 *	because the memory_object itself is dead.  Also
-	 *	release the memory_object_control reference, since
-	 *	the pager didn't do that either.
-	 */
-
-	memory_object_deallocate(pager);
-	memory_object_control_deallocate(object->pager_request);
-	
-
-	/*
-	 *	Restart pending page requests
-	 */
-	vm_object_lock(object);
-	vm_object_abort_activity(object);
-	vm_object_unlock(object);
-
-	/*
-	 *	Lose the object reference.
-	 */
+ *	knowledge is important to the shadow object mechanism.]
+ *	For this reason, an additional "created" attribute is
+ *	provided.
+ *
+ *	During various paging operations, the pager reference found in the
+ *	vm_object must be valid.  To prevent this from being released,
+ *	(other than being removed, i.e., made null), routines may use
+ *	the vm_object_paging_begin/end routines [actually, macros].
+ *	The implementation uses the "paging_in_progress" and "wanted" fields.
+ *	[Operations that alter the validity of the pager values include the
+ *	termination routines and vm_object_collapse.]
+ */
 
-	vm_object_deallocate(object);
-}
-#endif
 
 /*
  *	Routine:	vm_object_enter
@@ -2752,6 +3046,7 @@ vm_object_enter(
 	vm_object_t		new_object;
 	boolean_t		must_init;
 	vm_object_hash_entry_t	entry, new_entry;
+	uint32_t        try_failed_count = 0;
 
 	if (pager == MEMORY_OBJECT_NULL)
 		return(vm_object_allocate(size));
@@ -2763,7 +3058,7 @@ vm_object_enter(
 	/*
 	 *	Look for an object associated with this port.
 	 */
-
+Retry:
 	vm_object_cache_lock();
 	do {
 		entry = vm_object_hash_lookup(pager, FALSE);
@@ -2810,7 +3105,15 @@ vm_object_enter(
 	assert(object != VM_OBJECT_NULL);
 
 	if (!must_init) {
-		vm_object_lock(object);
+	        if (!vm_object_lock_try(object)) {
+
+		        vm_object_cache_unlock();
+
+		        try_failed_count++;
+			mutex_pause(try_failed_count);  /* wait a bit */
+
+			goto Retry;
+		}
 		assert(!internal || object->internal);
 		if (named) {
 			assert(!object->named);
@@ -2826,15 +3129,16 @@ vm_object_enter(
 				     vm_object_t, cached_list);
 			vm_object_cached_count--;
 		}
+		vm_object_lock_assert_exclusive(object);
 		object->ref_count++;
 		vm_object_res_reference(object);
 		vm_object_unlock(object);
 
-		VM_STAT(hits++);
+		VM_STAT_INCR(hits);
 	} 
 	assert(object->ref_count > 0);
 
-	VM_STAT(lookups++);
+	VM_STAT_INCR(lookups);
 
 	vm_object_cache_unlock();
 
@@ -3010,13 +3314,10 @@ vm_object_pager_create(
 	 */
 	{
 		memory_object_default_t		dmm;
-		vm_size_t	cluster_size;
 
 		/* acquire a reference for the default memory manager */
-		dmm = memory_manager_default_reference(&cluster_size);
-		assert(cluster_size >= PAGE_SIZE);
+		dmm = memory_manager_default_reference();
 
-		object->cluster_size = cluster_size; /* XXX ??? */
 		assert(object->temporary);
 
 		/* create our new memory object */
@@ -3091,8 +3392,10 @@ static long	object_bypasses  = 0;
 static boolean_t	vm_object_collapse_allowed = TRUE;
 static boolean_t	vm_object_bypass_allowed = TRUE;
 
+#if MACH_PAGEMAP
 static int	vm_external_discarded;
 static int	vm_external_collapsed;
+#endif
 
 unsigned long vm_object_collapse_encrypted = 0;
 
@@ -3148,16 +3451,15 @@ vm_object_do_collapse(
 			/*
 			 * ENCRYPTED SWAP:
 			 * The encryption key includes the "pager" and the
-			 * "paging_offset".  These might not be the same in
-			 * the new object, so we can't just move an encrypted
-			 * page from one object to the other.  We can't just
-			 * decrypt the page here either, because that would drop
+			 * "paging_offset".  These will not change during the 
+			 * object collapse, so we can just move an encrypted
+			 * page from one object to the other in this case.
+			 * We can't decrypt the page here, since we can't drop
 			 * the object lock.
-			 * The caller should check for encrypted pages before
-			 * attempting to collapse.
 			 */
-			ASSERT_PAGE_DECRYPTED(p);
-
+			if (p->encrypted) {
+				vm_object_collapse_encrypted++;
+			}
 			pp = vm_page_lookup(object, new_offset);
 			if (pp == VM_PAGE_NULL) {
 
@@ -3166,7 +3468,7 @@ vm_object_do_collapse(
 				 *	Move the backing object's page up.
 				 */
 
-				vm_page_rename(p, object, new_offset);
+				vm_page_rename(p, object, new_offset, TRUE);
 #if	MACH_PAGEMAP
 			} else if (pp->absent) {
 
@@ -3183,7 +3485,7 @@ vm_object_do_collapse(
 				 */
 
 				VM_PAGE_FREE(pp);
-				vm_page_rename(p, object, new_offset);
+				vm_page_rename(p, object, new_offset, TRUE);
 #endif	/* MACH_PAGEMAP */
 			} else {
 				assert(! pp->absent);
@@ -3199,9 +3501,9 @@ vm_object_do_collapse(
 	}
 	
 #if	!MACH_PAGEMAP
-	assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL
+	assert((!object->pager_created && (object->pager == MEMORY_OBJECT_NULL))
 		|| (!backing_object->pager_created
-		&&  backing_object->pager == MEMORY_OBJECT_NULL));
+		&&  (backing_object->pager == MEMORY_OBJECT_NULL)));
 #else 
         assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL);
 #endif	/* !MACH_PAGEMAP */
@@ -3226,7 +3528,6 @@ vm_object_do_collapse(
 		object->pager_control = backing_object->pager_control;
 		object->pager_ready = backing_object->pager_ready;
 		object->pager_initialized = backing_object->pager_initialized;
-		object->cluster_size = backing_object->cluster_size;
 		object->paging_offset =
 		    backing_object->paging_offset + backing_offset;
 		if (object->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
@@ -3299,6 +3600,8 @@ vm_object_do_collapse(
 	XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n",
 		(integer_t)backing_object, 0,0,0,0);
 
+	vm_object_lock_destroy(backing_object);
+
 	zfree(vm_object_zone, backing_object);
 	
 	object_collapses++;
@@ -3314,6 +3617,8 @@ vm_object_do_bypass(
 	 *	in the chain.
 	 */
 	
+	vm_object_lock_assert_exclusive(backing_object);
+
 #if	TASK_SWAPPER
 	/*
 	 *	Do object reference in-line to 
@@ -3324,6 +3629,7 @@ vm_object_do_bypass(
 	 */
 	if (backing_object->shadow != VM_OBJECT_NULL) {
 		vm_object_lock(backing_object->shadow);
+		vm_object_lock_assert_exclusive(backing_object->shadow);
 		backing_object->shadow->ref_count++;
 		if (object->res_count != 0)
 			vm_object_res_reference(backing_object->shadow);
@@ -3376,6 +3682,7 @@ vm_object_do_bypass(
 #endif
 	 */
 	if (backing_object->ref_count > 1) {
+		vm_object_lock_assert_exclusive(backing_object);
 		backing_object->ref_count--;
 #if	TASK_SWAPPER
 		if (object->res_count != 0)
@@ -3428,6 +3735,7 @@ static unsigned long vm_object_collapse_calls = 0;
 static unsigned long vm_object_collapse_objects = 0;
 static unsigned long vm_object_collapse_do_collapse = 0;
 static unsigned long vm_object_collapse_do_bypass = 0;
+static unsigned long vm_object_collapse_delays = 0;
 __private_extern__ void
 vm_object_collapse(
 	register vm_object_t			object,
@@ -3437,9 +3745,6 @@ vm_object_collapse(
 	register vm_object_t			backing_object;
 	register unsigned int			rcount;
 	register unsigned int			size;
-	vm_object_offset_t			collapse_min_offset;
-	vm_object_offset_t			collapse_max_offset;
-	vm_page_t				page;
 	vm_object_t				original_object;
 
 	vm_object_collapse_calls++;
@@ -3480,8 +3785,7 @@ vm_object_collapse(
 		 *	No pages in the object are currently
 		 *	being paged out, and
 		 */
-		if (object->paging_in_progress != 0 ||
-		    object->absent_count != 0) {
+		if (object->paging_in_progress != 0) {
 			/* try and collapse the rest of the shadow chain */
 			vm_object_lock(backing_object);
 			if (object != original_object) {
@@ -3578,39 +3882,6 @@ vm_object_collapse(
 				return;
 			}
 
-			/*
-			 * ENCRYPTED SWAP
-			 * We can't collapse the object if it contains
-			 * any encypted page, because the encryption key
-			 * includes the <object,offset> info.  We can't
-			 * drop the object lock in vm_object_do_collapse()
-			 * so we can't decrypt the page there either.
-			 */
-			if (vm_pages_encrypted) {
-				collapse_min_offset = object->shadow_offset;
-				collapse_max_offset =
-					object->shadow_offset + object->size;
-				queue_iterate(&backing_object->memq,
-					      page, vm_page_t, listq) {
-					if (page->encrypted &&
-					    (page->offset >=
-					     collapse_min_offset) &&
-					    (page->offset <
-					     collapse_max_offset)) {
-						/*
-						 * We found an encrypted page
-						 * in the backing object,
-						 * within the range covered 
-						 * by the parent object: we can
-						 * not collapse them.
-						 */
-						vm_object_collapse_encrypted++;
-						vm_object_cache_unlock();
-						goto try_bypass;
-					}
-				}
-			}
-		       
 			/*
 			 *	Collapse the object with its backing
 			 *	object, and try again with the object's
@@ -3622,7 +3893,6 @@ vm_object_collapse(
 			continue;
 		}
 
-	try_bypass:
 		/*
 		 *	Collapsing the backing object was not possible
 		 *	or permitted, so let's try bypassing it.
@@ -3703,10 +3973,15 @@ vm_object_collapse(
 			backing_offset = object->shadow_offset;
 			backing_rcount = backing_object->resident_page_count;
 
+#if	MACH_PAGEMAP
 #define EXISTS_IN_OBJECT(obj, off, rc) \
 	(vm_external_state_get((obj)->existence_map, \
 	 (vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS || \
 	 ((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
+#else
+#define EXISTS_IN_OBJECT(obj, off, rc) \
+	(((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
+#endif	/* MACH_PAGEMAP */
 
 			/*
 			 * Check the hint location first
@@ -3745,9 +4020,14 @@ vm_object_collapse(
 			 * double-decrement the rcount.  We also may or
 			 * may not have found the 
 			 */
-			if (backing_rcount && size >
-			    ((backing_object->existence_map) ?
-			     backing_rcount : (backing_rcount >> 1))) {
+			if (backing_rcount && 
+#if	MACH_PAGEMAP
+			    size > ((backing_object->existence_map) ?
+			     backing_rcount : (backing_rcount >> 1))
+#else
+			    size > (backing_rcount >> 1)
+#endif	/* MACH_PAGEMAP */
+				) {
 				unsigned int rc = rcount;
 				vm_page_t p;
 
@@ -3756,8 +4036,9 @@ vm_object_collapse(
 				do {
 					/* Until we get more than one lookup lock */
 					if (lookups > 256) {
+						vm_object_collapse_delays++;
 						lookups = 0;
-						delay(1);
+						mutex_pause(0);
 					}
 
 					offset = (p->offset - backing_offset);
@@ -3785,7 +4066,11 @@ vm_object_collapse(
 			 * Walk through the offsets looking for pages in the
 			 * backing object that show through to the object.
 			 */
+#if	MACH_PAGEMAP
 			if (backing_rcount || backing_object->existence_map) {
+#else
+			if (backing_rcount) {
+#endif	/* MACH_PAGEMAP */
 				offset = hint_offset;
 				
 				while((offset =
@@ -3794,8 +4079,9 @@ vm_object_collapse(
 
 					/* Until we get more than one lookup lock */
 					if (lookups > 256) {
+						vm_object_collapse_delays++;
 						lookups = 0;
-						delay(1);
+						mutex_pause(0);
 					}
 
 					if (EXISTS_IN_OBJECT(backing_object, offset +
@@ -3876,7 +4162,7 @@ vm_object_page_remove(
 			p = vm_page_lookup(object, start);
 			if (p != VM_PAGE_NULL) {
 				assert(!p->cleaning && !p->pageout);
-				if (!p->fictitious)
+				if (!p->fictitious && p->pmapped)
 				        pmap_disconnect(p->phys_page);
 				VM_PAGE_FREE(p);
 			}
@@ -3889,7 +4175,7 @@ vm_object_page_remove(
 			next = (vm_page_t) queue_next(&p->listq);
 			if ((start <= p->offset) && (p->offset < end)) {
 				assert(!p->cleaning && !p->pageout);
-				if (!p->fictitious)
+				if (!p->fictitious && p->pmapped)
 				        pmap_disconnect(p->phys_page);
 				VM_PAGE_FREE(p);
 			}
@@ -3965,7 +4251,7 @@ vm_object_coalesce(
 	 *	. paged out
 	 *	. shadows another object
 	 *	. has a copy elsewhere
-	 *	. is purgable
+	 *	. is purgeable
 	 *	. paging references (pages might be in page-list)
 	 */
 
@@ -3974,7 +4260,7 @@ vm_object_coalesce(
 	    (prev_object->shadow != VM_OBJECT_NULL) ||
 	    (prev_object->copy != VM_OBJECT_NULL) ||
 	    (prev_object->true_share != FALSE) ||
-	    (prev_object->purgable != VM_OBJECT_NONPURGABLE) ||
+	    (prev_object->purgable != VM_PURGABLE_DENY) ||
 	    (prev_object->paging_in_progress != 0)) {
 		vm_object_unlock(prev_object);
 		return(FALSE);
@@ -4172,11 +4458,8 @@ vm_follow_object(
  *	vm_object_print:	[ debug ]
  */
 void
-vm_object_print(
-	db_addr_t	db_addr,
-	__unused boolean_t	have_addr,
-	__unused int		arg_count,
-	__unused char		*modif)
+vm_object_print(db_expr_t db_addr, __unused boolean_t have_addr,
+		__unused db_expr_t arg_count, __unused char *modif)
 {
 	vm_object_t	object;
 	register vm_page_t p;
@@ -4193,7 +4476,6 @@ vm_object_print(
 	db_indent += 2;
 
 	iprintf("size=0x%x", object->size);
-	printf(", cluster=0x%x", object->cluster_size);
 	printf(", memq_hint=%p", object->memq_hint);
 	printf(", ref_count=%d\n", object->ref_count);
 	iprintf("");
@@ -4244,7 +4526,6 @@ vm_object_print(
 		printf("?");
 	}
 	printf("]");
-	printf(", absent_count=%d\n", object->absent_count);
 
 	iprintf("all_wanted=0x%x<", object->all_wanted);
 	s = "";
@@ -4260,10 +4541,6 @@ vm_object_print(
 		printf("%spaging", s);
 		s = ",";
 	}
-	if (vm_object_wanted(object, VM_OBJECT_EVENT_ABSENT_COUNT)) {
-		printf("%sabsent", s);
-		s = ",";
-	}
 	if (vm_object_wanted(object, VM_OBJECT_EVENT_LOCK_IN_PROGRESS)) {
 		printf("%slock", s);
 		s = ",";
@@ -4292,11 +4569,11 @@ vm_object_print(
 		(object->pageout ? "" : "!"),
 		(object->internal ? "internal" : "external"),
 		(object->temporary ? "temporary" : "permanent"));
-	iprintf("%salive, %spurgable, %spurgable_volatile, %spurgable_empty, %sshadowed, %scached, %sprivate\n",
+	iprintf("%salive, %spurgeable, %spurgeable_volatile, %spurgeable_empty, %sshadowed, %scached, %sprivate\n",
 		(object->alive ? "" : "!"),
-		((object->purgable != VM_OBJECT_NONPURGABLE) ? "" : "!"),
-		((object->purgable == VM_OBJECT_PURGABLE_VOLATILE) ? "" : "!"),
-		((object->purgable == VM_OBJECT_PURGABLE_EMPTY) ? "" : "!"),
+		((object->purgable != VM_PURGABLE_DENY) ? "" : "!"),
+		((object->purgable == VM_PURGABLE_VOLATILE) ? "" : "!"),
+		((object->purgable == VM_PURGABLE_EMPTY) ? "" : "!"),
 		(object->shadowed ? "" : "!"),
 		(vm_object_cached(object) ? "" : "!"),
 		(object->private ? "" : "!"));
@@ -4354,10 +4631,9 @@ vm_object_find(
         task_t task;
 	vm_map_t map;
 	vm_map_entry_t entry;
-	processor_set_t pset = &default_pset;
 	boolean_t found = FALSE;
 
-	queue_iterate(&pset->tasks, task, task_t, pset_tasks) {
+	queue_iterate(&tasks, task, task_t, tasks) {
 		map = task->map;
 		for (entry = vm_map_first_entry(map);
 			 entry && entry != vm_map_to_entry(map);
@@ -4427,22 +4703,28 @@ vm_object_populate_with_private(
 			m = vm_page_lookup(object, base_offset);
 			if(m != VM_PAGE_NULL) {
 				if(m->fictitious) {
-					vm_page_lock_queues();
-					m->fictitious = FALSE;
-					m->private = TRUE;
-					m->phys_page = base_page;
-					if(!m->busy) {
-						m->busy = TRUE;
-					}
-					if(!m->absent) {
-						m->absent = TRUE;
-						object->absent_count++;
+					if (m->phys_page !=
+					    vm_page_guard_addr) {
+						vm_page_lockspin_queues();
+						m->fictitious = FALSE;
+						m->private = TRUE;
+						m->phys_page = base_page;
+						if(!m->busy) {
+							m->busy = TRUE;
+						}
+						if(!m->absent) {
+							m->absent = TRUE;
+						}
+						m->list_req_pending = TRUE;
+						vm_page_unlock_queues();
 					}
-					m->list_req_pending = TRUE;
-					vm_page_unlock_queues();
 				} else if (m->phys_page != base_page) {
-					/* pmap call to clear old mapping */
-				        pmap_disconnect(m->phys_page);
+				        if (m->pmapped) {
+					        /*
+						 * pmap call to clear old mapping
+						 */
+					        pmap_disconnect(m->phys_page);
+					}
 					m->phys_page = base_page;
 				}
 
@@ -4461,14 +4743,13 @@ vm_object_populate_with_private(
 				while ((m = vm_page_grab_fictitious()) 
 							 == VM_PAGE_NULL)
                 			vm_page_more_fictitious();	
-				vm_page_lock_queues();
+				vm_page_lockspin_queues();
 				m->fictitious = FALSE;
 				m->private = TRUE;
 				m->phys_page = base_page;
 				m->list_req_pending = TRUE;
 				m->absent = TRUE;
 				m->unusual = TRUE;
-				object->absent_count++;
 				vm_page_unlock_queues();
 	    			vm_page_insert(m, object, base_offset);
 			}
@@ -4542,6 +4823,7 @@ memory_object_free_from_cache(
 
 			assert(object->pager_initialized);
 			assert(object->ref_count == 0);
+			vm_object_lock_assert_exclusive(object);
 			object->ref_count++;
 
 			/*
@@ -4677,6 +4959,7 @@ restart:
 	vm_object_cache_unlock();
 
 	object->named = TRUE;
+	vm_object_lock_assert_exclusive(object);
 	object->ref_count++;
 	vm_object_res_reference(object);
 	while (!object->pager_ready) {
@@ -4788,6 +5071,7 @@ vm_object_release_name(
 			}
 			return KERN_SUCCESS;
 		} else {
+			vm_object_lock_assert_exclusive(object);
 			object->ref_count--;
 			assert(object->ref_count > 0);
 			if(original_object)
@@ -4849,16 +5133,16 @@ vm_object_lock_request(
 }
 
 /*
- * Empty a purgable object by grabbing the physical pages assigned to it and
+ * Empty a purgeable object by grabbing the physical pages assigned to it and
  * putting them on the free queue without writing them to backing store, etc.
  * When the pages are next touched they will be demand zero-fill pages.  We
  * skip pages which are busy, being paged in/out, wired, etc.  We do _not_
  * skip referenced/dirty pages, pages on the active queue, etc.  We're more
- * than happy to grab these since this is a purgable object.  We mark the
+ * than happy to grab these since this is a purgeable object.  We mark the
  * object as "empty" after reaping its pages.
  *
  * On entry the object and page queues are locked, the object must be a
- * purgable object with no delayed copies pending.
+ * purgeable object with no delayed copies pending.
  */
 unsigned int
 vm_object_purge(vm_object_t object)
@@ -4874,10 +5158,11 @@ vm_object_purge(vm_object_t object)
 #define PURGE_LOOP_QUOTA	100
 
 	num_purged_pages = 0;
-	if (object->purgable == VM_OBJECT_NONPURGABLE)
+	if (object->purgable == VM_PURGABLE_DENY)
 		return num_purged_pages;
 
-	object->purgable = VM_OBJECT_PURGABLE_EMPTY;
+	assert(object->purgable != VM_PURGABLE_NONVOLATILE);
+	object->purgable = VM_PURGABLE_EMPTY;
 
 	assert(object->copy == VM_OBJECT_NULL);
 	assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
@@ -4910,9 +5195,7 @@ vm_object_purge(vm_object_t object)
 				local_freeq = VM_PAGE_NULL;
 				local_freed = 0;
 			}
-			vm_page_unlock_queues();
-			mutex_pause();
-			vm_page_lock_queues();
+			mutex_yield(&vm_page_queue_lock);
 
 			/* resume with the current page and a new quota */
 			purge_loop_quota = PURGE_LOOP_QUOTA;
@@ -4929,24 +5212,15 @@ vm_object_purge(vm_object_t object)
 			continue;
 		}
 
-		if (p->tabled) {
-			/* clean up the object/offset table */
-			vm_page_remove(p);
-		}
-		if (p->absent) {
-			/* update the object's count of absent pages */
-			vm_object_absent_release(object);
-		}
+		assert(!p->laundry);
+		assert(p->object != kernel_object);
 
 		/* we can discard this page */
 
 		/* advertize that this page is in a transition state */
 		p->busy = TRUE;
 
-		if (p->no_isync == TRUE) {
-			/* the page hasn't been mapped yet */
-			/* (optimization to delay the i-cache sync) */
-		} else {
+		if (p->pmapped == TRUE) {
 			/* unmap the page */
 			int refmod_state;
 
@@ -4962,12 +5236,9 @@ vm_object_purge(vm_object_t object)
 			vm_page_purged_count++;
 		}
 
-		/* remove page from active or inactive queue... */
-		VM_PAGE_QUEUES_REMOVE(p);
+		vm_page_free_prepare(p);
 
 		/* ... and put it on our queue of pages to free */
-		assert(!p->laundry);
-		assert(p->object != kernel_object);
 		assert(p->pageq.next == NULL &&
 		       p->pageq.prev == NULL);
 		p->pageq.next = (queue_entry_t) local_freeq;
@@ -4991,72 +5262,72 @@ vm_object_purge(vm_object_t object)
 }
 
 /*
- * vm_object_purgable_control() allows the caller to control and investigate the
- * state of a purgable object.  A purgable object is created via a call to
- * vm_allocate() with VM_FLAGS_PURGABLE specified.  A purgable object will
- * never be coalesced with any other object -- even other purgable objects --
- * and will thus always remain a distinct object.  A purgable object has
+ * vm_object_purgeable_control() allows the caller to control and investigate the
+ * state of a purgeable object.  A purgeable object is created via a call to
+ * vm_allocate() with VM_FLAGS_PURGABLE specified.  A purgeable object will
+ * never be coalesced with any other object -- even other purgeable objects --
+ * and will thus always remain a distinct object.  A purgeable object has
  * special semantics when its reference count is exactly 1.  If its reference
- * count is greater than 1, then a purgable object will behave like a normal
+ * count is greater than 1, then a purgeable object will behave like a normal
  * object and attempts to use this interface will result in an error return
  * of KERN_INVALID_ARGUMENT.
  *
- * A purgable object may be put into a "volatile" state which will make the
+ * A purgeable object may be put into a "volatile" state which will make the
  * object's pages elligable for being reclaimed without paging to backing
  * store if the system runs low on memory.  If the pages in a volatile
- * purgable object are reclaimed, the purgable object is said to have been
- * "emptied."  When a purgable object is emptied the system will reclaim as
+ * purgeable object are reclaimed, the purgeable object is said to have been
+ * "emptied."  When a purgeable object is emptied the system will reclaim as
  * many pages from the object as it can in a convenient manner (pages already
  * en route to backing store or busy for other reasons are left as is).  When
- * a purgable object is made volatile, its pages will generally be reclaimed
+ * a purgeable object is made volatile, its pages will generally be reclaimed
  * before other pages in the application's working set.  This semantic is
  * generally used by applications which can recreate the data in the object
  * faster than it can be paged in.  One such example might be media assets
  * which can be reread from a much faster RAID volume.
  *
- * A purgable object may be designated as "non-volatile" which means it will
+ * A purgeable object may be designated as "non-volatile" which means it will
  * behave like all other objects in the system with pages being written to and
  * read from backing store as needed to satisfy system memory needs.  If the
  * object was emptied before the object was made non-volatile, that fact will
- * be returned as the old state of the purgable object (see
+ * be returned as the old state of the purgeable object (see
  * VM_PURGABLE_SET_STATE below).  In this case, any pages of the object which
  * were reclaimed as part of emptying the object will be refaulted in as
  * zero-fill on demand.  It is up to the application to note that an object
  * was emptied and recreate the objects contents if necessary.  When a
- * purgable object is made non-volatile, its pages will generally not be paged
- * out to backing store in the immediate future.  A purgable object may also
+ * purgeable object is made non-volatile, its pages will generally not be paged
+ * out to backing store in the immediate future.  A purgeable object may also
  * be manually emptied.
  *
  * Finally, the current state (non-volatile, volatile, volatile & empty) of a
- * volatile purgable object may be queried at any time.  This information may
+ * volatile purgeable object may be queried at any time.  This information may
  * be used as a control input to let the application know when the system is
  * experiencing memory pressure and is reclaiming memory.
  *
- * The specified address may be any address within the purgable object.  If
+ * The specified address may be any address within the purgeable object.  If
  * the specified address does not represent any object in the target task's
  * virtual address space, then KERN_INVALID_ADDRESS will be returned.  If the
- * object containing the specified address is not a purgable object, then
+ * object containing the specified address is not a purgeable object, then
  * KERN_INVALID_ARGUMENT will be returned.  Otherwise, KERN_SUCCESS will be
  * returned.
  *
  * The control parameter may be any one of VM_PURGABLE_SET_STATE or
  * VM_PURGABLE_GET_STATE.  For VM_PURGABLE_SET_STATE, the in/out parameter
- * state is used to set the new state of the purgable object and return its
- * old state.  For VM_PURGABLE_GET_STATE, the current state of the purgable
+ * state is used to set the new state of the purgeable object and return its
+ * old state.  For VM_PURGABLE_GET_STATE, the current state of the purgeable
  * object is returned in the parameter state.
  *
  * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE,
  * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY.  These, respectively, represent
  * the non-volatile, volatile and volatile/empty states described above.
- * Setting the state of a purgable object to VM_PURGABLE_EMPTY will
+ * Setting the state of a purgeable object to VM_PURGABLE_EMPTY will
  * immediately reclaim as many pages in the object as can be conveniently
  * collected (some may have already been written to backing store or be
  * otherwise busy).
  *
- * The process of making a purgable object non-volatile and determining its
- * previous state is atomic.  Thus, if a purgable object is made
+ * The process of making a purgeable object non-volatile and determining its
+ * previous state is atomic.  Thus, if a purgeable object is made
  * VM_PURGABLE_NONVOLATILE and the old state is returned as
- * VM_PURGABLE_VOLATILE, then the purgable object's previous contents are
+ * VM_PURGABLE_VOLATILE, then the purgeable object's previous contents are
  * completely intact and will remain so until the object is made volatile
  * again.  If the old state is returned as VM_PURGABLE_EMPTY then the object
  * was reclaimed while it was in a volatile state and its previous contents
@@ -5072,42 +5343,23 @@ vm_object_purgable_control(
 	int		*state)
 {
 	int		old_state;
-	vm_page_t	p;
+	int		new_state;
 
 	if (object == VM_OBJECT_NULL) {
 		/*
-		 * Object must already be present or it can't be purgable.
+		 * Object must already be present or it can't be purgeable.
 		 */
 		return KERN_INVALID_ARGUMENT;
 	}
 
 	/*
-	 * Get current state of the purgable object.
+	 * Get current state of the purgeable object.
 	 */
-	switch (object->purgable) {
-	    case VM_OBJECT_NONPURGABLE:
+	old_state = object->purgable;
+	if (old_state == VM_PURGABLE_DENY)
 		return KERN_INVALID_ARGUMENT;
     
-	    case VM_OBJECT_PURGABLE_NONVOLATILE:
-		old_state = VM_PURGABLE_NONVOLATILE;
-		break;
-
-	    case VM_OBJECT_PURGABLE_VOLATILE:
-		old_state = VM_PURGABLE_VOLATILE;
-		break;
-
-	    case VM_OBJECT_PURGABLE_EMPTY:
-		old_state = VM_PURGABLE_EMPTY;
-		break;
-
-	    default:
-		old_state = VM_PURGABLE_NONVOLATILE;
-		panic("Bad state (%d) for purgable object!\n",
-		      object->purgable);
-		/*NOTREACHED*/
-	}
-
-	/* purgable cant have delayed copies - now or in the future */
+	/* purgeable cant have delayed copies - now or in the future */
 	assert(object->copy == VM_OBJECT_NULL); 
 	assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
 
@@ -5119,86 +5371,116 @@ vm_object_purgable_control(
 		return KERN_SUCCESS;
 	}
 
-	switch (*state) {
+	new_state = *state & VM_PURGABLE_STATE_MASK;
+	switch (new_state) {
+	case VM_PURGABLE_DENY:
 	case VM_PURGABLE_NONVOLATILE:
-		vm_page_lock_queues();
-		if (object->purgable != VM_OBJECT_PURGABLE_NONVOLATILE) {
+		object->purgable = new_state;
+
+		if (old_state != VM_PURGABLE_NONVOLATILE) {
+			vm_page_lock_queues();
 			assert(vm_page_purgeable_count >=
 			       object->resident_page_count);
 			vm_page_purgeable_count -= object->resident_page_count;
-		}
 
-		object->purgable = VM_OBJECT_PURGABLE_NONVOLATILE;
+			if (old_state==VM_PURGABLE_VOLATILE) {
+			        assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */
+				purgeable_q_t queue = vm_purgeable_object_remove(object);
+				assert(queue);
 
-		/*
-		 * If the object wasn't emptied, then mark all pages of the
-		 * object as referenced in order to give them a complete turn
-		 * of the virtual memory "clock" before becoming candidates
-		 * for paging out (if the system is suffering from memory
-		 * pressure).  We don't really need to set the pmap reference
-		 * bits (which would be expensive) since the software copies
-		 * are believed if they're set to true ...
-		 */
-		if (old_state != VM_PURGABLE_EMPTY) {
-			for (p = (vm_page_t)queue_first(&object->memq);
-			     !queue_end(&object->memq, (queue_entry_t)p);
-			     p = (vm_page_t)queue_next(&p->listq))
-				p->reference = TRUE;
+				vm_purgeable_token_delete_first(queue);
+				assert(queue->debug_count_objects>=0);
+			};
+			vm_page_unlock_queues();
 		}
-
-		vm_page_unlock_queues();
-
 		break;
 
 	case VM_PURGABLE_VOLATILE:
-		vm_page_lock_queues();
 
-		if (object->purgable != VM_OBJECT_PURGABLE_VOLATILE &&
-		    object->purgable != VM_OBJECT_PURGABLE_EMPTY) {
-			vm_page_purgeable_count += object->resident_page_count;
+		if ((old_state != VM_PURGABLE_NONVOLATILE) && (old_state != VM_PURGABLE_VOLATILE))
+			break;
+		purgeable_q_t queue;
+        
+		/* find the correct queue */
+		if ((*state&VM_PURGABLE_ORDERING_MASK) == VM_PURGABLE_ORDERING_OBSOLETE)
+		        queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
+		else {
+		        if ((*state&VM_PURGABLE_BEHAVIOR_MASK) == VM_PURGABLE_BEHAVIOR_FIFO)
+			        queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
+			else
+			        queue = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
 		}
+        
+		if (old_state == VM_PURGABLE_NONVOLATILE) {
+		        /* try to add token... this can fail */
+		        vm_page_lock_queues();
 
-		object->purgable = VM_OBJECT_PURGABLE_VOLATILE;
-
-		/*
-		 * We want the newly volatile purgable object to be a
-		 * candidate for the pageout scan before other pages in the
-		 * application if the system is suffering from memory
-		 * pressure.  To do this, we move a page of the object from
-		 * the active queue onto the inactive queue in order to
-		 * promote the object for early reclaim.  We only need to move
-		 * a single page since the pageout scan will reap the entire
-		 * purgable object if it finds a single page in a volatile
-		 * state.  Obviously we don't do this if there are no pages
-		 * associated with the object or we find a page of the object
-		 * already on the inactive queue.
-		 */
-		for (p = (vm_page_t)queue_first(&object->memq);
-		     !queue_end(&object->memq, (queue_entry_t)p);
-		     p = (vm_page_t)queue_next(&p->listq)) {
-			if (p->inactive) {
-				/* already a page on the inactive queue */
-				break;
-			}
-			if (p->active && !p->busy) {
-				/* found one we can move */
-				vm_page_deactivate(p);
-				break;
+			kern_return_t result = vm_purgeable_token_add(queue);
+			if (result != KERN_SUCCESS) {
+			        vm_page_unlock_queues();
+				return result;
 			}
+			vm_page_purgeable_count += object->resident_page_count;
+
+			vm_page_unlock_queues();
+
+			object->purgable = new_state;
+
+			/* object should not be on a queue */
+			assert(object->objq.next == NULL && object->objq.prev == NULL);
 		}
-		vm_page_unlock_queues();
+		else if (old_state == VM_PURGABLE_VOLATILE) {
+		        /*
+			 * if reassigning priorities / purgeable groups, we don't change the
+			 * token queue. So moving priorities will not make pages stay around longer.
+			 * Reasoning is that the algorithm gives most priority to the most important
+			 * object. If a new token is added, the most important object' priority is boosted.
+			 * This biases the system already for purgeable queues that move a lot.
+			 * It doesn't seem more biasing is neccessary in this case, where no new object is added.
+			 */
+		        assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */
+            
+			purgeable_q_t old_queue=vm_purgeable_object_remove(object);
+			assert(old_queue);
+            
+			if (old_queue != queue) {
+				kern_return_t result;
+
+			        /* Changing queue. Have to move token. */
+			        vm_page_lock_queues();
+				vm_purgeable_token_delete_first(old_queue);
+				result = vm_purgeable_token_add(queue);
+				vm_page_unlock_queues();
 
+				assert(result==KERN_SUCCESS);   /* this should never fail since we just freed a token */
+			}
+		};
+		vm_purgeable_object_add(object, queue, (*state&VM_VOLATILE_GROUP_MASK)>>VM_VOLATILE_GROUP_SHIFT );
+
+		assert(queue->debug_count_objects>=0);
+        
 		break;
 
 
 	case VM_PURGABLE_EMPTY:
-		vm_page_lock_queues();
-		if (object->purgable != VM_OBJECT_PURGABLE_VOLATILE &&
-		    object->purgable != VM_OBJECT_PURGABLE_EMPTY) {
-			vm_page_purgeable_count += object->resident_page_count;
+		if (old_state != new_state) 
+		{
+			assert(old_state==VM_PURGABLE_NONVOLATILE || old_state==VM_PURGABLE_VOLATILE);
+			if(old_state==VM_PURGABLE_VOLATILE) {
+				assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */
+				purgeable_q_t old_queue=vm_purgeable_object_remove(object);
+				assert(old_queue);
+				vm_page_lock_queues();
+				vm_purgeable_token_delete_first(old_queue);
+			}
+
+			if (old_state==VM_PURGABLE_NONVOLATILE) {
+				vm_page_purgeable_count += object->resident_page_count;
+				vm_page_lock_queues();
+			}
+			(void) vm_object_purge(object);
+			vm_page_unlock_queues();
 		}
-		(void) vm_object_purge(object);
-		vm_page_unlock_queues();
 		break;
 
 	}
@@ -5373,8 +5655,9 @@ vm_object_transpose(
 
 	vm_object_lock(object1);
 	object1_locked = TRUE;
-	if (object1->copy || object1->shadow || object1->shadowed ||
-	    object1->purgable != VM_OBJECT_NONPURGABLE) {
+	if (!object1->alive || object1->terminating ||
+	    object1->copy || object1->shadow || object1->shadowed ||
+	    object1->purgable != VM_PURGABLE_DENY) {
 		/*
 		 * We don't deal with copy or shadow objects (yet).
 		 */
@@ -5399,8 +5682,9 @@ vm_object_transpose(
 	 */
 	vm_object_lock(object2);
 	object2_locked = TRUE;
-	if (object2->copy || object2->shadow || object2->shadowed ||
-	    object2->purgable != VM_OBJECT_NONPURGABLE) {
+	if (! object2->alive || object2->terminating ||
+	    object2->copy || object2->shadow || object2->shadowed ||
+	    object2->purgable != VM_PURGABLE_DENY) {
 		retval = KERN_INVALID_VALUE;
 		goto done;
 	}
@@ -5450,6 +5734,7 @@ vm_object_transpose(
 
 	/*
 	 * Transpose the lists of resident pages.
+	 * This also updates the resident_page_count and the memq_hint.
 	 */
 	if (object1->phys_contiguous || queue_empty(&object1->memq)) {
 		/*
@@ -5459,7 +5744,7 @@ vm_object_transpose(
 		 */
 		while (!queue_empty(&object2->memq)) {
 			page = (vm_page_t) queue_first(&object2->memq);
-			vm_page_rename(page, object1, page->offset);
+			vm_page_rename(page, object1, page->offset, FALSE);
 		}
 		assert(queue_empty(&object2->memq));
 	} else if (object2->phys_contiguous || queue_empty(&object2->memq)) {
@@ -5470,7 +5755,7 @@ vm_object_transpose(
 		 */
 		while (!queue_empty(&object1->memq)) {
 			page = (vm_page_t) queue_first(&object1->memq);
-			vm_page_rename(page, object2, page->offset);
+			vm_page_rename(page, object2, page->offset, FALSE);
 		}
 		assert(queue_empty(&object1->memq));
 	} else {
@@ -5488,7 +5773,7 @@ vm_object_transpose(
 		/* transfer object2's pages to object1 */
 		while (!queue_empty(&object2->memq)) {
 			page = (vm_page_t) queue_first(&object2->memq);
-			vm_page_rename(page, object1, page->offset);
+			vm_page_rename(page, object1, page->offset, FALSE);
 		}
 		assert(queue_empty(&object2->memq));
 		/* transfer tmp_object's pages to object1 */
@@ -5501,9 +5786,6 @@ vm_object_transpose(
 		assert(queue_empty(&tmp_object->memq));
 	}
 
-	/* no need to transpose the size: they should be identical */
-	assert(object1->size == object2->size);
-
 #define __TRANSPOSE_FIELD(field)				\
 MACRO_BEGIN							\
 	tmp_object->field = object1->field;			\
@@ -5511,16 +5793,23 @@ MACRO_BEGIN							\
 	object2->field = tmp_object->field;			\
 MACRO_END
 
+	/* "size" should be identical */
+	assert(object1->size == object2->size);
+	/* "Lock" refers to the object not its contents */
+	/* "ref_count" refers to the object not its contents */
+#if TASK_SWAPPER
+	/* "res_count" refers to the object not its contents */
+#endif
+	/* "resident_page_count" was updated above when transposing pages */
+	/* there should be no "copy" */
 	assert(!object1->copy);
 	assert(!object2->copy);
-
+	/* there should be no "shadow" */
 	assert(!object1->shadow);
 	assert(!object2->shadow);
-
 	__TRANSPOSE_FIELD(shadow_offset); /* used by phys_contiguous objects */
 	__TRANSPOSE_FIELD(pager);
 	__TRANSPOSE_FIELD(paging_offset);
-
 	__TRANSPOSE_FIELD(pager_control);
 	/* update the memory_objects' pointers back to the VM objects */
 	if (object1->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
@@ -5531,29 +5820,62 @@ MACRO_END
 		memory_object_control_collapse(object2->pager_control,
 					       object2);
 	}
-		
-	__TRANSPOSE_FIELD(absent_count);
-
+	__TRANSPOSE_FIELD(copy_strategy);
+	/* "paging_in_progress" refers to the object not its contents */
 	assert(object1->paging_in_progress);
 	assert(object2->paging_in_progress);
-
+	/* "all_wanted" refers to the object not its contents */
 	__TRANSPOSE_FIELD(pager_created);
 	__TRANSPOSE_FIELD(pager_initialized);
 	__TRANSPOSE_FIELD(pager_ready);
 	__TRANSPOSE_FIELD(pager_trusted);
+	__TRANSPOSE_FIELD(can_persist);
 	__TRANSPOSE_FIELD(internal);
 	__TRANSPOSE_FIELD(temporary);
 	__TRANSPOSE_FIELD(private);
 	__TRANSPOSE_FIELD(pageout);
+	/* "alive" should be set */
+	assert(object1->alive);
+	assert(object2->alive);
+	/* "purgeable" should be non-purgeable */
+	assert(object1->purgable == VM_PURGABLE_DENY);
+	assert(object2->purgable == VM_PURGABLE_DENY);
+	/* "shadowed" refers to the the object not its contents */
+	__TRANSPOSE_FIELD(silent_overwrite);
+	__TRANSPOSE_FIELD(advisory_pageout);
 	__TRANSPOSE_FIELD(true_share);
+	/* "terminating" should not be set */
+	assert(!object1->terminating);
+	assert(!object2->terminating);
+	__TRANSPOSE_FIELD(named);
+	/* "shadow_severed" refers to the object not its contents */
 	__TRANSPOSE_FIELD(phys_contiguous);
 	__TRANSPOSE_FIELD(nophyscache);
+	/* "cached_list" should be NULL */
+	assert(object1->cached_list.prev == NULL);
+	assert(object1->cached_list.next == NULL);
+	assert(object2->cached_list.prev == NULL);
+	assert(object2->cached_list.next == NULL);
+	/* "msr_q" is linked to the object not its contents */
+	assert(queue_empty(&object1->msr_q));
+	assert(queue_empty(&object2->msr_q));
 	__TRANSPOSE_FIELD(last_alloc);
 	__TRANSPOSE_FIELD(sequential);
-	__TRANSPOSE_FIELD(cluster_size);
+	__TRANSPOSE_FIELD(pages_created);
+	__TRANSPOSE_FIELD(pages_used);
+#if MACH_PAGEMAP
 	__TRANSPOSE_FIELD(existence_map);
+#endif
 	__TRANSPOSE_FIELD(cow_hint);
+#if MACH_ASSERT
+	__TRANSPOSE_FIELD(paging_object);
+#endif
 	__TRANSPOSE_FIELD(wimg_bits);
+	__TRANSPOSE_FIELD(code_signed);
+	__TRANSPOSE_FIELD(not_in_use);
+#ifdef UPL_DEBUG
+	/* "uplq" refers to the object not its contents (see upl_transpose()) */
+#endif
 
 #undef __TRANSPOSE_FIELD
 
@@ -5600,8 +5922,262 @@ done:
 }
 
 
-/* Allow manipulation of individual page state.  This is actually part of */
-/* the UPL regimen but takes place on the VM object rather than on a UPL */
+/*
+ *      vm_object_build_cluster
+ *
+ *      Determine how big a cluster we should issue an I/O for...
+ *
+ *	Inputs:   *start == offset of page needed
+ *		  *length == maximum cluster pager can handle
+ *	Outputs:  *start == beginning offset of cluster
+ *		  *length == length of cluster to try
+ *
+ *	The original *start will be encompassed by the cluster
+ *
+ */
+extern int speculative_reads_disabled;
+
+uint32_t pre_heat_scaling[MAX_UPL_TRANSFER];
+uint32_t pre_heat_cluster[MAX_UPL_TRANSFER];
+
+#define PRE_HEAT_MULTIPLIER 4
+
+__private_extern__ void
+vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start,
+		       vm_size_t *length, vm_object_fault_info_t fault_info)
+{
+	vm_size_t		pre_heat_size;
+	vm_size_t		tail_size;
+	vm_size_t		head_size;
+	vm_size_t		max_length;
+	vm_size_t		cluster_size;
+	vm_object_offset_t	object_size;
+	vm_object_offset_t	orig_start;
+	vm_object_offset_t	target_start;
+	vm_object_offset_t	offset;
+	vm_behavior_t		behavior;
+	boolean_t		look_behind = TRUE;
+	boolean_t		look_ahead  = TRUE;
+	int			sequential_run;
+	int			sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
+
+	assert( !(*length & PAGE_MASK));
+	assert( !(*start & PAGE_MASK_64));
+
+	if ( (max_length = *length) > (MAX_UPL_TRANSFER * PAGE_SIZE) ) 
+	        max_length = (MAX_UPL_TRANSFER * PAGE_SIZE);
+	/*
+	 * we'll always return a cluster size of at least
+	 * 1 page, since the original fault must always
+	 * be processed
+	 */
+	*length = PAGE_SIZE;
+
+	if (speculative_reads_disabled || fault_info == NULL || max_length == 0) {
+	        /*
+		 * no cluster... just fault the page in
+		 */
+	        return;
+	}
+	orig_start = *start;
+	target_start = orig_start;
+	cluster_size = round_page_32(fault_info->cluster_size);
+	behavior = fault_info->behavior;
+
+	vm_object_lock(object);
+
+	if (object->internal)
+	        object_size = object->size;
+	else if (object->pager != MEMORY_OBJECT_NULL)
+	        vnode_pager_get_object_size(object->pager, &object_size);
+	else
+		goto out;	/* pager is gone for this object, nothing more to do */
+
+	object_size = round_page_64(object_size);
+
+	if (orig_start >= object_size) {
+	        /*
+		 * fault occurred beyond the EOF...
+		 * we need to punt w/o changing the
+		 * starting offset
+		 */
+	        goto out;
+	}
+	if (object->pages_used > object->pages_created) {
+	        /*
+		 * must have wrapped our 32 bit counters
+		 * so reset
+		 */
+ 	        object->pages_used = object->pages_created = 0;
+	}
+	if ((sequential_run = object->sequential)) {
+		  if (sequential_run < 0) {
+		          sequential_behavior = VM_BEHAVIOR_RSEQNTL;
+			  sequential_run = 0 - sequential_run;
+		  } else {
+		          sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
+		  }
+	}
+	switch(behavior) {
+
+	default:
+	        behavior = VM_BEHAVIOR_DEFAULT;
+
+	case VM_BEHAVIOR_DEFAULT:
+	        if (object->internal && fault_info->user_tag == VM_MEMORY_STACK)
+		        goto out;
+
+	        if (sequential_run >= (3 * PAGE_SIZE)) {
+		        pre_heat_size = sequential_run + PAGE_SIZE;
+
+			if ((behavior = sequential_behavior) == VM_BEHAVIOR_SEQUENTIAL)
+			        look_behind = FALSE;
+			else
+			        look_ahead = FALSE;
+		} else {
+		        uint32_t	pages_unused;
+
+			if (object->pages_created < 32 * PRE_HEAT_MULTIPLIER) {
+			        /*
+				 * prime the pump
+				 */
+			        pre_heat_size = PAGE_SIZE * 8 * PRE_HEAT_MULTIPLIER;
+			        break;
+			}
+			pages_unused = object->pages_created - object->pages_used;
+
+			if (pages_unused < (object->pages_created / 8)) {
+			        pre_heat_size = PAGE_SIZE * 32 * PRE_HEAT_MULTIPLIER;
+			} else if (pages_unused < (object->pages_created / 4)) {
+			        pre_heat_size = PAGE_SIZE * 16 * PRE_HEAT_MULTIPLIER;
+			} else if (pages_unused < (object->pages_created / 2)) {
+			        pre_heat_size = PAGE_SIZE * 8 * PRE_HEAT_MULTIPLIER;
+			} else {
+			        pre_heat_size = PAGE_SIZE * 4 * PRE_HEAT_MULTIPLIER;
+			}
+		}
+		break;
+
+	case VM_BEHAVIOR_RANDOM:
+	        if ((pre_heat_size = cluster_size) <= PAGE_SIZE)
+		        goto out;
+	        break;
+
+	case VM_BEHAVIOR_SEQUENTIAL:
+	        if ((pre_heat_size = cluster_size) == 0)
+		        pre_heat_size = sequential_run + PAGE_SIZE;
+		look_behind = FALSE;
+
+	        break;
+
+	case VM_BEHAVIOR_RSEQNTL:
+	        if ((pre_heat_size = cluster_size) == 0)
+		        pre_heat_size = sequential_run + PAGE_SIZE;
+		look_ahead = FALSE;
+
+	        break;
+
+	}
+	if (pre_heat_size > max_length)
+	        pre_heat_size = max_length;
+
+	if (behavior == VM_BEHAVIOR_DEFAULT && vm_page_free_count < vm_page_free_target)
+	        pre_heat_size /= 2;
+
+	if (look_ahead == TRUE) {
+	        if (look_behind == TRUE)
+		        target_start &= ~(pre_heat_size - 1);
+
+	        if ((target_start + pre_heat_size) > object_size)
+		        pre_heat_size = (vm_size_t)(trunc_page_64(object_size - target_start));
+
+	        tail_size = pre_heat_size - (orig_start - target_start) - PAGE_SIZE;
+	} else {
+	        if (pre_heat_size > target_start)
+	                pre_heat_size = target_start;
+		tail_size = 0;
+	}
+	pre_heat_scaling[pre_heat_size / PAGE_SIZE]++;
+
+	if (pre_heat_size <= PAGE_SIZE)
+	        goto out;
+
+	if (look_behind == TRUE) {
+	        /*
+		 * take a look at the pages before the original
+		 * faulting offset 
+		 */
+	        head_size = pre_heat_size - tail_size - PAGE_SIZE;
+
+	        for (offset = orig_start - PAGE_SIZE_64; head_size; offset -= PAGE_SIZE_64, head_size -= PAGE_SIZE) {
+		        /*
+			 * don't poke below the lowest offset 
+			 */
+		        if (offset < fault_info->lo_offset)
+			        break;
+		        /*
+			 * for external objects and internal objects w/o an existence map
+			 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
+			 */
+#if MACH_PAGEMAP
+		        if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) {
+			        /*
+				 * we know for a fact that the pager can't provide the page
+				 * so don't include it or any pages beyond it in this cluster
+				 */
+			        break;
+			}
+#endif
+			if (vm_page_lookup(object, offset) != VM_PAGE_NULL) {
+			        /*
+				 * don't bridge resident pages
+				 */
+			        break;
+			}
+			*start = offset;
+			*length += PAGE_SIZE;
+		}
+	}
+	if (look_ahead == TRUE) {
+	        for (offset = orig_start + PAGE_SIZE_64; tail_size; offset += PAGE_SIZE_64, tail_size -= PAGE_SIZE) {
+		        /*
+			 * don't poke above the highest offset 
+			 */
+		        if (offset >= fault_info->hi_offset)
+			        break;
+		        /*
+			 * for external objects and internal objects w/o an existence map
+			 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
+			 */
+#if MACH_PAGEMAP
+		        if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) {
+			        /*
+				 * we know for a fact that the pager can't provide the page
+				 * so don't include it or any pages beyond it in this cluster
+				 */
+			        break;
+			}
+#endif
+			if (vm_page_lookup(object, offset) != VM_PAGE_NULL) {
+			        /*
+				 * don't bridge resident pages
+				 */
+			        break;
+			}
+			*length += PAGE_SIZE;
+		}
+	}
+out:
+	pre_heat_cluster[*length / PAGE_SIZE]++;
+
+	vm_object_unlock(object);
+}
+
+
+/*
+ * Allow manipulation of individual page state.  This is actually part of
+ * the UPL regimen but takes place on the VM object rather than on a UPL
+ */
 
 kern_return_t
 vm_object_page_op(
@@ -5650,13 +6226,13 @@ vm_object_page_op(
 		}
 
 		if (ops & UPL_POP_DUMP) {
-		        vm_page_lock_queues();
-
-			if (dst_page->no_isync == FALSE)
+			if (dst_page->pmapped == TRUE)
 			        pmap_disconnect(dst_page->phys_page);
-			vm_page_free(dst_page);
 
+		        vm_page_lock_queues();
+			vm_page_free(dst_page);
 			vm_page_unlock_queues();
+
 			break;
 		}
 
@@ -5791,7 +6367,7 @@ vm_object_range_op(
 	        return KERN_INVALID_OBJECT;
 	}
 	
-	offset = offset_beg;
+	offset = offset_beg & ~PAGE_MASK_64;
 
 	while (offset < offset_end) {
 		dst_page = vm_page_lookup(object, offset);
@@ -5802,8 +6378,7 @@ vm_object_range_op(
 					 * someone else is playing with the 
 					 * page, we will have to wait
 					 */
-				        PAGE_SLEEP(object,
-						dst_page, THREAD_UNINT);
+				        PAGE_SLEEP(object, dst_page, THREAD_UNINT);
 					/*
 					 * need to relook the page up since it's
 					 * state may have changed while we slept
@@ -5812,13 +6387,13 @@ vm_object_range_op(
 					 */
 					continue;
 				}
-				vm_page_lock_queues();
-
-				if (dst_page->no_isync == FALSE)
+				if (dst_page->pmapped == TRUE)
 				        pmap_disconnect(dst_page->phys_page);
-				vm_page_free(dst_page);
 
+				vm_page_lock_queues();
+				vm_page_free(dst_page);
 				vm_page_unlock_queues();
+
 			} else if (ops & UPL_ROP_ABSENT)
 			        break;
 		} else if (ops & UPL_ROP_PRESENT)
@@ -5828,8 +6403,55 @@ vm_object_range_op(
 	}
 	vm_object_unlock(object);
 
-	if (range)
-	        *range = offset - offset_beg;
-
+	if (range) {
+	        if (offset > offset_end)
+		        offset = offset_end;
+		if(offset > offset_beg)
+			*range = offset - offset_beg;
+		else *range=0;
+	}
 	return KERN_SUCCESS;
 }
+
+
+uint32_t scan_object_collision = 0;
+
+void
+vm_object_lock(vm_object_t object)
+{
+        if (object == vm_pageout_scan_wants_object) {
+	        scan_object_collision++;
+	        mutex_pause(2);
+	}
+        lck_rw_lock_exclusive(&object->Lock);
+}
+
+boolean_t
+vm_object_lock_try(vm_object_t object)
+{
+        if (object == vm_pageout_scan_wants_object) {
+	        scan_object_collision++;
+	        mutex_pause(2);
+	}
+	return (lck_rw_try_lock_exclusive(&object->Lock));
+}
+
+void
+vm_object_lock_shared(vm_object_t object)
+{
+        if (object == vm_pageout_scan_wants_object) {
+	        scan_object_collision++;
+	        mutex_pause(2);
+	}
+	lck_rw_lock_shared(&object->Lock);
+}
+
+boolean_t
+vm_object_lock_try_shared(vm_object_t object)
+{
+        if (object == vm_pageout_scan_wants_object) {
+	        scan_object_collision++;
+	        mutex_pause(2);
+	}
+	return (lck_rw_try_lock_shared(&object->Lock));
+}