X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/43866e378188c25dd1e2208016ab3cbeb086ae6c..13f56ec4e58bf8687e2a68032c093c0213dd519b:/osfmk/vm/memory_object.c?ds=inline

diff --git a/osfmk/vm/memory_object.c b/osfmk/vm/memory_object.c
index e82bd42b6..de7baff29 100644
--- a/osfmk/vm/memory_object.c
+++ b/osfmk/vm/memory_object.c
@@ -1,16 +1,19 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
  * 
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
@@ -20,7 +23,7 @@
  * Please see the License for the specific language governing rights and
  * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_COPYRIGHT@
@@ -97,22 +100,18 @@
 #include <vm/pmap.h>		/* For pmap_clear_modify */
 #include <vm/vm_kern.h>		/* For kernel_map, vm_move */
 #include <vm/vm_map.h>		/* For vm_map_pageable */
+#include <vm/vm_purgeable_internal.h>	/* Needed by some vm_page.h macros */
+#include <vm/vm_shared_region.h>
 
 #if	MACH_PAGEMAP
 #include <vm/vm_external.h>
 #endif	/* MACH_PAGEMAP */
 
+#include <vm/vm_protos.h>
 
-memory_object_default_t	memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
-vm_size_t		memory_manager_default_cluster = 0;
-decl_mutex_data(,	memory_manager_default_lock)
 
-/*
- *	Forward ref to file-local function:
- */
-boolean_t
-vm_object_update(vm_object_t, vm_object_offset_t, 
-		vm_size_t, memory_object_return_t, int, vm_prot_t);
+memory_object_default_t	memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
+decl_lck_mtx_data(,	memory_manager_default_lock)
 
 
 /*
@@ -135,16 +134,16 @@ vm_object_update(vm_object_t, vm_object_offset_t,
 
 #define	memory_object_should_return_page(m, should_return) \
     (should_return != MEMORY_OBJECT_RETURN_NONE && \
-     (((m)->dirty || ((m)->dirty = pmap_is_modified((m)->phys_addr))) || \
+     (((m)->dirty || ((m)->dirty = pmap_is_modified((m)->phys_page))) || \
       ((m)->precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
       (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
 
 typedef	int	memory_object_lock_result_t;
 
-#define MEMORY_OBJECT_LOCK_RESULT_DONE          0
-#define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK    1
-#define MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN    2
-#define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN   3
+#define MEMORY_OBJECT_LOCK_RESULT_DONE          	0
+#define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK    	1
+#define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN   	2
+#define MEMORY_OBJECT_LOCK_RESULT_MUST_FREE		3
 
 memory_object_lock_result_t memory_object_lock_page(
 				vm_page_t		m,
@@ -174,187 +173,151 @@ memory_object_lock_page(
 {
         XPR(XPR_MEMORY_OBJECT,
             "m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n",
-            (integer_t)m, should_return, should_flush, prot, 0);
+            m, should_return, should_flush, prot, 0);
 
-	/*
-	 *	If we cannot change access to the page,
-	 *	either because a mapping is in progress
-	 *	(busy page) or because a mapping has been
-	 *	wired, then give up.
-	 */
 
-	if (m->busy || m->cleaning)
-		return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
+	if (m->busy || m->cleaning) {
+		if (m->list_req_pending &&
+		    should_return == MEMORY_OBJECT_RETURN_NONE &&
+		    should_flush == TRUE) {
+
+			if (m->absent) {
+				/*
+				 * this is the list_req_pending | absent | busy case
+				 * which originates from vm_fault_page. 
+				 * Combine that with should_flush == TRUE and we
+				 * have a case where we need to toss the page from
+				 * the object.
+				 */
+				if (!VM_PAGE_WIRED(m)) {
+					return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE);
+				} else {
+					return (MEMORY_OBJECT_LOCK_RESULT_DONE);
+				}
+			}
+			if  (m->pageout || m->cleaning) {
+				/*
+				 * if pageout is set, page was earmarked by vm_pageout_scan
+				 * to be cleaned and stolen... if cleaning is set, we're
+				 * pre-cleaning pages for a hibernate...
+				 * in either case, we're going
+				 * to take it back since we are being asked to
+				 * flush the page w/o cleaning it (i.e. we don't
+				 * care that it's dirty, we want it gone from
+				 * the cache) and we don't want to stall
+				 * waiting for it to be cleaned for 2 reasons...
+				 * 1 - no use paging it out since we're probably
+				 *     shrinking the file at this point or we no
+				 *     longer care about the data in the page
+				 * 2 - if we stall, we may casue a deadlock in
+				 *     the FS trying to acquire its locks
+				 *     on the VNOP_PAGEOUT path presuming that
+				 *     those locks are already held on the truncate
+				 *     path before calling through to this function
+				 *
+				 * so undo all of the state that vm_pageout_scan
+				 * hung on this page
+				 */
 
+				vm_pageout_queue_steal(m, FALSE);
+				PAGE_WAKEUP_DONE(m);
+			} else {
+				panic("list_req_pending on page %p without absent/pageout/cleaning set\n", m);
+			}
+		} else
+			return (MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
+	}
 	/*
 	 *	Don't worry about pages for which the kernel
 	 *	does not have any data.
 	 */
-
 	if (m->absent || m->error || m->restart) {
-		if(m->error && should_flush) {
-			/* dump the page, pager wants us to */
-			/* clean it up and there is no      */
-			/* relevant data to return */
-			if(m->wire_count == 0) {
-				VM_PAGE_FREE(m);
-				return(MEMORY_OBJECT_LOCK_RESULT_DONE);
-			}
-		} else {
-			return(MEMORY_OBJECT_LOCK_RESULT_DONE);
+		if (m->error && should_flush && !VM_PAGE_WIRED(m)) {
+			/*
+			 * dump the page, pager wants us to
+			 * clean it up and there is no
+			 * relevant data to return
+			 */
+			return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE);
 		}
+		return (MEMORY_OBJECT_LOCK_RESULT_DONE);
 	}
-
 	assert(!m->fictitious);
 
-	if (m->wire_count != 0) {
+	if (VM_PAGE_WIRED(m)) {
 		/*
-		 *	If no change would take place
-		 *	anyway, return successfully.
-		 *
-		 *	No change means:
-		 *		Not flushing AND
-		 *		No change to page lock [2 checks]  AND
-		 *		Should not return page
-		 *
-		 * XXX	This doesn't handle sending a copy of a wired
-		 * XXX	page to the pager, but that will require some
-		 * XXX	significant surgery.
+		 * The page is wired... just clean or return the page if needed.
+		 * Wired pages don't get flushed or disconnected from the pmap.
 		 */
-		if (!should_flush &&
-		    (m->page_lock == prot || prot == VM_PROT_NO_CHANGE) &&
-		    ! memory_object_should_return_page(m, should_return)) {
-
-			/*
-			 *	Restart page unlock requests,
-			 *	even though no change took place.
-			 *	[Memory managers may be expecting
-			 *	to see new requests.]
-			 */
-			m->unlock_request = VM_PROT_NONE;
-			PAGE_WAKEUP(m);
-
-			return(MEMORY_OBJECT_LOCK_RESULT_DONE);
-		}
-
-		return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
-	}
+		if (memory_object_should_return_page(m, should_return))
+			return (MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
 
-	/*
-	 *	If the page is to be flushed, allow
-	 *	that to be done as part of the protection.
-	 */
+		return (MEMORY_OBJECT_LOCK_RESULT_DONE);
+	}		
 
-	if (should_flush)
-		prot = VM_PROT_ALL;
-
-	/*
-	 *	Set the page lock.
-	 *
-	 *	If we are decreasing permission, do it now;
-	 *	let the fault handler take care of increases
-	 *	(pmap_page_protect may not increase protection).
-	 */
-
-	if (prot != VM_PROT_NO_CHANGE) {
-		if ((m->page_lock ^ prot) & prot) {
-			pmap_page_protect(m->phys_addr, VM_PROT_ALL & ~prot);
-		}
-#if 0
-		/* code associated with the vestigial 
-		 * memory_object_data_unlock
+	if (should_flush) {
+		/*
+		 * must do the pmap_disconnect before determining the 
+		 * need to return the page... otherwise it's possible
+		 * for the page to go from the clean to the dirty state
+		 * after we've made our decision
 		 */
-		m->page_lock = prot;
-		m->lock_supplied = TRUE;
-		if (prot != VM_PROT_NONE)
-			m->unusual = TRUE;
-		else
-			m->unusual = FALSE;
-
+		if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
+			m->dirty = TRUE;
+	} else {
 		/*
-		 *	Restart any past unlock requests, even if no
-		 *	change resulted.  If the manager explicitly
-		 *	requested no protection change, then it is assumed
-		 *	to be remembering past requests.
+		 * If we are decreasing permission, do it now;
+		 * let the fault handler take care of increases
+		 * (pmap_page_protect may not increase protection).
 		 */
-
-		m->unlock_request = VM_PROT_NONE;
-#endif /* 0 */
-		PAGE_WAKEUP(m);
+		if (prot != VM_PROT_NO_CHANGE)
+			pmap_page_protect(m->phys_page, VM_PROT_ALL & ~prot);
 	}
-
 	/*
-	 *	Handle page returning.
+	 *	Handle returning dirty or precious pages
 	 */
-
 	if (memory_object_should_return_page(m, should_return)) {
-
 		/*
-		 *	If we weren't planning
-		 *	to flush the page anyway,
-		 *	we may need to remove the
-		 *	page from the pageout
-		 *	system and from physical
-		 *	maps now.
+		 * we use to do a pmap_disconnect here in support
+		 * of memory_object_lock_request, but that routine
+		 * no longer requires this...  in any event, in
+		 * our world, it would turn into a big noop since
+		 * we don't lock the page in any way and as soon
+		 * as we drop the object lock, the page can be
+		 * faulted back into an address space
+		 *
+		 *	if (!should_flush)
+		 *		pmap_disconnect(m->phys_page);
 		 */
-		
-		vm_page_lock_queues();
-		VM_PAGE_QUEUES_REMOVE(m);
-		vm_page_unlock_queues();
-
-		if (!should_flush)
-			pmap_page_protect(m->phys_addr, VM_PROT_NONE);
-
-		if (m->dirty)
-			return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN);
-		else
-			return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
+		return (MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
 	}
 
 	/*
-	 *	Handle flushing
+	 *	Handle flushing clean pages
 	 */
+	if (should_flush)
+		return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE);
 
-	if (should_flush) {
-		VM_PAGE_FREE(m);
-	} else {
-		extern boolean_t vm_page_deactivate_hint;
-
-		/*
-		 *	XXX Make clean but not flush a paging hint,
-		 *	and deactivate the pages.  This is a hack
-		 *	because it overloads flush/clean with
-		 *	implementation-dependent meaning.  This only
-		 *	happens to pages that are already clean.
-		 */
-
-		if (vm_page_deactivate_hint &&
-		    (should_return != MEMORY_OBJECT_RETURN_NONE)) {
-			vm_page_lock_queues();
-			vm_page_deactivate(m);
-			vm_page_unlock_queues();
-		}
-	}
+	/*
+	 * we use to deactivate clean pages at this point,
+	 * but we do not believe that an msync should change
+	 * the 'age' of a page in the cache... here is the
+	 * original comment and code concerning this...
+	 *
+	 *	XXX Make clean but not flush a paging hint,
+	 *	and deactivate the pages.  This is a hack
+	 *	because it overloads flush/clean with
+	 *	implementation-dependent meaning.  This only
+	 *	happens to pages that are already clean.
+	 *
+	 *   if (vm_page_deactivate_hint && (should_return != MEMORY_OBJECT_RETURN_NONE))
+	 *	return (MEMORY_OBJECT_LOCK_RESULT_MUST_DEACTIVATE);
+	 */
 
-	return(MEMORY_OBJECT_LOCK_RESULT_DONE);
+	return (MEMORY_OBJECT_LOCK_RESULT_DONE);
 }
 
-#define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, action, po) \
-MACRO_BEGIN								\
-									\
-	register int		i;                                      \
-	register vm_page_t	hp;					\
-									\
-	vm_object_unlock(object);					\
-									\
-	   	(void) memory_object_data_return(object->pager,		\
-		po,							\
-		data_cnt,						\
-		(action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN),	\
-		!should_flush);                                 	\
-									\
-	vm_object_lock(object);						\
-MACRO_END
+
 
 /*
  *	Routine:	memory_object_lock_request [user interface]
@@ -386,20 +349,15 @@ memory_object_lock_request(
 	memory_object_control_t		control,
 	memory_object_offset_t		offset,
 	memory_object_size_t		size,
+	memory_object_offset_t	*	resid_offset,
+	int			*	io_errno,
 	memory_object_return_t		should_return,
 	int				flags,
 	vm_prot_t			prot)
 {
 	vm_object_t	object;
-	vm_object_offset_t	original_offset = offset;
-	boolean_t		should_flush=flags & MEMORY_OBJECT_DATA_FLUSH;
-
-        XPR(XPR_MEMORY_OBJECT,
-	    "m_o_lock_request, control 0x%X off 0x%X size 0x%X flags %X prot %X\n",
-	    (integer_t)control, offset, size, 
- 	    (((should_return&1)<<1)|should_flush), prot);
 
-	/*
+        /*
 	 *	Check for bogus arguments.
 	 */
 	object = memory_object_control_to_vm_object(control);
@@ -409,7 +367,7 @@ memory_object_lock_request(
 	if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
 		return (KERN_INVALID_ARGUMENT);
 
-	size = round_page(size);
+	size = round_page_64(size);
 
 	/*
 	 *	Lock the object, and acquire a paging reference to
@@ -417,10 +375,20 @@ memory_object_lock_request(
 	 */
 	vm_object_lock(object);
 	vm_object_paging_begin(object);
+
+	if (flags & MEMORY_OBJECT_DATA_FLUSH_ALL) {
+		if ((should_return != MEMORY_OBJECT_RETURN_NONE) || offset || object->copy) {
+			flags &= ~MEMORY_OBJECT_DATA_FLUSH_ALL;
+			flags |= MEMORY_OBJECT_DATA_FLUSH;
+		}
+	}
 	offset -= object->paging_offset;
 
-	(void)vm_object_update(object,
-		offset, size, should_return, flags, prot);
+	if (flags & MEMORY_OBJECT_DATA_FLUSH_ALL)
+		vm_object_reap_pages(object, REAP_DATA_FLUSH);
+	else
+		(void)vm_object_update(object, offset, size, resid_offset,
+				       io_errno, should_return, flags, prot);
 
 	vm_object_paging_end(object);
 	vm_object_unlock(object);
@@ -510,15 +478,17 @@ boolean_t
 vm_object_sync(
 	vm_object_t		object,
 	vm_object_offset_t	offset,
-	vm_size_t		size,
+	vm_object_size_t	size,
 	boolean_t		should_flush,
-	boolean_t		should_return)
+	boolean_t		should_return,
+	boolean_t		should_iosync)
 {
 	boolean_t	rv;
+	int             flags;
 
         XPR(XPR_VM_OBJECT,
             "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n",
-            (integer_t)object, offset, size, should_flush, should_return);
+            object, offset, size, should_flush, should_return);
 
 	/*
 	 * Lock the object, and acquire a paging reference to
@@ -528,12 +498,19 @@ vm_object_sync(
 	vm_object_lock(object);
 	vm_object_paging_begin(object);
 
-	rv = vm_object_update(object, offset, size,
+	if (should_flush)
+	        flags = MEMORY_OBJECT_DATA_FLUSH;
+	else
+	        flags = 0;
+
+	if (should_iosync)
+	        flags |= MEMORY_OBJECT_IO_SYNC;
+
+	rv = vm_object_update(object, offset, (vm_object_size_t)size, NULL, NULL,
 		(should_return) ?
 			MEMORY_OBJECT_RETURN_ALL :
 			MEMORY_OBJECT_RETURN_NONE,
-		(should_flush) ?
-			MEMORY_OBJECT_DATA_FLUSH : 0,
+		flags,
 		VM_PROT_NO_CHANGE);
 
 
@@ -542,6 +519,199 @@ vm_object_sync(
 	return rv;
 }
 
+
+
+#define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, po, ro, ioerr, iosync)    \
+MACRO_BEGIN								\
+									\
+        int			upl_flags;                              \
+	memory_object_t		pager;					\
+									\
+	if (object == slide_info.slide_object) {					\
+		panic("Objects with slid pages not allowed\n");		\
+	}								\
+				                   			\
+	if ((pager = (object)->pager) != MEMORY_OBJECT_NULL) {		\
+		vm_object_paging_begin(object);				\
+		vm_object_unlock(object);				\
+									\
+                if (iosync)                                     	\
+                        upl_flags = UPL_MSYNC | UPL_IOSYNC;     	\
+                else                                            	\
+                        upl_flags = UPL_MSYNC;                  	\
+				                   			\
+	   	(void) memory_object_data_return(pager,			\
+			po,						\
+			(memory_object_cluster_size_t)data_cnt,		\
+	                ro,                                             \
+	                ioerr,                                          \
+			FALSE,						\
+			FALSE,		                                \
+			upl_flags);                                 	\
+									\
+		vm_object_lock(object);					\
+		vm_object_paging_end(object);				\
+	}								\
+MACRO_END
+
+
+
+static int
+vm_object_update_extent(
+        vm_object_t		object,
+        vm_object_offset_t	offset,
+	vm_object_offset_t	offset_end,
+	vm_object_offset_t	*offset_resid,
+	int			*io_errno,
+        boolean_t		should_flush,
+	memory_object_return_t	should_return,
+        boolean_t		should_iosync, 
+        vm_prot_t		prot)
+{
+        vm_page_t	m;
+        int		retval = 0;
+	vm_object_offset_t	paging_offset = 0;
+	vm_object_offset_t	next_offset = offset;
+        memory_object_lock_result_t	page_lock_result;
+	memory_object_cluster_size_t	data_cnt = 0;
+	struct vm_page_delayed_work	dw_array[DEFAULT_DELAYED_WORK_LIMIT];
+	struct vm_page_delayed_work	*dwp;
+	int		dw_count;
+	int		dw_limit;
+
+        dwp = &dw_array[0];
+        dw_count = 0;
+	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
+
+	for (;
+	     offset < offset_end && object->resident_page_count;
+	     offset += PAGE_SIZE_64) {
+
+	        /*
+		 * Limit the number of pages to be cleaned at once to a contiguous
+		 * run, or at most MAX_UPL_TRANSFER size
+		 */
+		if (data_cnt) {
+			if ((data_cnt >= PAGE_SIZE * MAX_UPL_TRANSFER) || (next_offset != offset)) {
+
+				if (dw_count) {
+					vm_page_do_delayed_work(object, &dw_array[0], dw_count);
+					dwp = &dw_array[0];
+					dw_count = 0;
+				}
+				LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
+						       paging_offset, offset_resid, io_errno, should_iosync);
+				data_cnt = 0;
+			}
+		}
+		while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
+
+			dwp->dw_mask = 0;
+		        
+			page_lock_result = memory_object_lock_page(m, should_return, should_flush, prot);
+
+			if (data_cnt && page_lock_result != MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN) {
+				/*
+				 *	End of a run of dirty/precious pages.
+				 */
+				if (dw_count) {
+					vm_page_do_delayed_work(object, &dw_array[0], dw_count);
+					dwp = &dw_array[0];
+					dw_count = 0;
+				}
+				LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
+						       paging_offset, offset_resid, io_errno, should_iosync);
+				/*
+				 * LIST_REQ_PAGEOUT_PAGES will drop the object lock which will
+				 * allow the state of page 'm' to change... we need to re-lookup
+				 * the current offset
+				 */
+				data_cnt = 0;
+				continue;
+			}
+
+			switch (page_lock_result) {
+
+			case MEMORY_OBJECT_LOCK_RESULT_DONE:
+				break;
+
+			case MEMORY_OBJECT_LOCK_RESULT_MUST_FREE:
+				dwp->dw_mask |= DW_vm_page_free;
+				break;
+
+			case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK:
+				PAGE_SLEEP(object, m, THREAD_UNINT);
+				continue;
+
+			case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN:
+				if (data_cnt == 0)
+					paging_offset = offset;
+
+				data_cnt += PAGE_SIZE;
+				next_offset = offset + PAGE_SIZE_64;
+
+				/*
+				 * Clean
+				 */
+				m->list_req_pending = TRUE;
+				m->cleaning = TRUE;
+
+				/*
+				 * wired pages shouldn't be flushed and
+				 * since they aren't on any queue,
+				 * no need to remove them
+				 */
+				if (!VM_PAGE_WIRED(m)) {
+
+					if (should_flush) {
+						/*
+						 * add additional state for the flush
+						 */
+						m->busy = TRUE;
+						m->pageout = TRUE;
+
+						dwp->dw_mask |= DW_vm_page_wire;
+					}
+					/*
+					 * we use to remove the page from the queues at this
+					 * point, but we do not believe that an msync
+					 * should cause the 'age' of a page to be changed
+					 *
+					 *    else
+					 *	dwp->dw_mask |= DW_VM_PAGE_QUEUES_REMOVE;
+					 */
+				}
+				retval = 1;
+				break;
+			}
+			if (dwp->dw_mask) {
+				VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
+
+				if (dw_count >= dw_limit) {
+					vm_page_do_delayed_work(object, &dw_array[0], dw_count);
+					dwp = &dw_array[0];
+					dw_count = 0;
+				}
+			}
+			break;
+		}
+	}
+	/*
+	 *	We have completed the scan for applicable pages.
+	 *	Clean any pages that have been saved.
+	 */
+	if (dw_count)
+		vm_page_do_delayed_work(object, &dw_array[0], dw_count);
+
+	if (data_cnt) {
+	        LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
+				       paging_offset, offset_resid, io_errno, should_iosync);
+	}
+	return (retval);
+}
+
+
+
 /*
  *	Routine:	vm_object_update
  *	Description:
@@ -551,26 +721,31 @@ vm_object_sync(
  */
 kern_return_t
 vm_object_update(
-	register vm_object_t		object,
-	register vm_object_offset_t	offset,
-	register vm_size_t		size,
-	memory_object_return_t		should_return,
-	int				flags,
-	vm_prot_t			prot)
+	vm_object_t		object,
+	vm_object_offset_t	offset,
+	vm_object_size_t	size,
+	vm_object_offset_t	*resid_offset,
+	int			*io_errno,
+	memory_object_return_t	should_return,
+	int			flags,
+	vm_prot_t		protection)
 {
-	register vm_page_t	m;
-	vm_page_t		holding_page;
-	vm_size_t		original_size = size;
-	vm_object_offset_t	paging_offset = 0;
-	vm_object_t		copy_object;
-	vm_size_t		data_cnt = 0;
-	vm_object_offset_t	last_offset = offset;
-	memory_object_lock_result_t	page_lock_result;
-	memory_object_lock_result_t	pageout_action;
+        vm_object_t		copy_object = VM_OBJECT_NULL;
 	boolean_t		data_returned = FALSE;
 	boolean_t		update_cow;
-	boolean_t		should_flush = flags & MEMORY_OBJECT_DATA_FLUSH;
-	boolean_t		pending_pageout = FALSE;
+	boolean_t		should_flush = (flags & MEMORY_OBJECT_DATA_FLUSH) ? TRUE : FALSE;
+	boolean_t		should_iosync = (flags & MEMORY_OBJECT_IO_SYNC) ? TRUE : FALSE;
+	vm_fault_return_t	result;
+	int			num_of_extents;
+	int			n;
+#define MAX_EXTENTS	8
+#define EXTENT_SIZE	(1024 * 1024 * 256)
+#define RESIDENT_LIMIT	(1024 * 32)
+	struct extent {
+	        vm_object_offset_t e_base;
+	        vm_object_offset_t e_min;
+	        vm_object_offset_t e_max;
+	} extents[MAX_EXTENTS];
 
 	/*
 	 *	To avoid blocking while scanning for pages, save
@@ -594,89 +769,134 @@ vm_object_update(
 					!(flags & MEMORY_OBJECT_DATA_PURGE)))
 				|| (flags & MEMORY_OBJECT_COPY_SYNC);
 			
+	if (update_cow || (flags & (MEMORY_OBJECT_DATA_PURGE | MEMORY_OBJECT_DATA_SYNC))) {
+	        int collisions = 0;
+
+	        while ((copy_object = object->copy) != VM_OBJECT_NULL) {
+		        /*
+			 * need to do a try here since we're swimming upstream
+			 * against the normal lock ordering... however, we need
+			 * to hold the object stable until we gain control of the
+			 * copy object so we have to be careful how we approach this
+			 */
+		        if (vm_object_lock_try(copy_object)) {
+			       /*
+				* we 'won' the lock on the copy object...
+				* no need to hold the object lock any longer...
+				* take a real reference on the copy object because
+				* we're going to call vm_fault_page on it which may
+				* under certain conditions drop the lock and the paging
+				* reference we're about to take... the reference
+				* will keep the copy object from going away if that happens
+				*/
+			       vm_object_unlock(object);
+			       vm_object_reference_locked(copy_object);
+			       break;
+			}
+			vm_object_unlock(object);
+
+			collisions++;
+			mutex_pause(collisions);
 
-	if((((copy_object = object->copy) != NULL) && update_cow) ||
-					(flags & MEMORY_OBJECT_DATA_SYNC)) {
-		vm_size_t		i;
-		vm_size_t		copy_size;
-		vm_object_offset_t	copy_offset;
+			vm_object_lock(object);
+		}
+	}
+	if ((copy_object != VM_OBJECT_NULL && update_cow) || (flags & MEMORY_OBJECT_DATA_SYNC)) {
+		vm_map_size_t		i;
+		vm_map_size_t		copy_size;
+		vm_map_offset_t		copy_offset;
 		vm_prot_t		prot;
 		vm_page_t		page;
 		vm_page_t		top_page;
 		kern_return_t		error = 0;
+		struct vm_object_fault_info fault_info;
+
+		if (copy_object != VM_OBJECT_NULL) {
+		        /*
+			 * translate offset with respect to shadow's offset
+			 */
+		        copy_offset = (offset >= copy_object->vo_shadow_offset) ?
+			  (vm_map_offset_t)(offset - copy_object->vo_shadow_offset) :
+			  (vm_map_offset_t) 0;
+
+			if (copy_offset > copy_object->vo_size)
+			        copy_offset = copy_object->vo_size;
+
+			/*
+			 * clip size with respect to shadow offset
+			 */
+			if (offset >= copy_object->vo_shadow_offset) {
+			        copy_size = size;
+			} else if (size >= copy_object->vo_shadow_offset - offset) {
+			        copy_size = size - (copy_object->vo_shadow_offset - offset);
+			} else {
+			        copy_size = 0;
+			}
+			
+			if (copy_offset + copy_size > copy_object->vo_size) {
+			        if (copy_object->vo_size >= copy_offset) {
+				        copy_size = copy_object->vo_size - copy_offset;
+				} else {
+				        copy_size = 0;
+				}
+			}
+			copy_size+=copy_offset;
 
-		if(copy_object != NULL) {
-		   /* translate offset with respect to shadow's offset */
-		   copy_offset = (offset >= copy_object->shadow_offset)?
-			offset - copy_object->shadow_offset :
-			(vm_object_offset_t) 0;
-		   if(copy_offset > copy_object->size)
-			copy_offset = copy_object->size;
-
-		   /* clip size with respect to shadow offset */
-		   copy_size = (offset >= copy_object->shadow_offset) ?
-			size : size - (copy_object->shadow_offset - offset);
-
-		   if(copy_size <= 0) {
-			copy_size = 0;
-		   } else {
-			copy_size = ((copy_offset + copy_size) 
-				<= copy_object->size) ?
-				copy_size : copy_object->size - copy_offset;
-		   }
-		   /* check for a copy_offset which is beyond the end of */
-		   /* the copy_object */
-		   if(copy_size < 0)
-			copy_size = 0;
-
-		   copy_size+=offset;
-
-		   vm_object_unlock(object);
-		   vm_object_lock(copy_object);
 		} else {
 			copy_object = object;
 
 			copy_size   = offset + size;
 			copy_offset = offset;
 		}
+		fault_info.interruptible = THREAD_UNINT;
+		fault_info.behavior  = VM_BEHAVIOR_SEQUENTIAL;
+		fault_info.user_tag  = 0;
+		fault_info.lo_offset = copy_offset;
+		fault_info.hi_offset = copy_size;
+		fault_info.no_cache   = FALSE;
+		fault_info.stealth = TRUE;
+		fault_info.io_sync = FALSE;
+		fault_info.cs_bypass = FALSE;
+		fault_info.mark_zf_absent = FALSE;
 
 		vm_object_paging_begin(copy_object);
-		for (i=copy_offset; i<copy_size; i+=PAGE_SIZE) {
+
+		for (i = copy_offset; i < copy_size; i += PAGE_SIZE) {
 	RETRY_COW_OF_LOCK_REQUEST:
-			prot = 	VM_PROT_WRITE|VM_PROT_READ;
-			switch (vm_fault_page(copy_object, i, 
-				VM_PROT_WRITE|VM_PROT_READ,
-				FALSE,
-				THREAD_UNINT,
-				copy_offset,
-				copy_offset+copy_size,
-				VM_BEHAVIOR_SEQUENTIAL,
-				&prot,
-				&page,
-				&top_page,
-				(int *)0,
-				&error,
-			        FALSE,
-				FALSE, NULL, 0)) {
+			fault_info.cluster_size = (vm_size_t) (copy_size - i);
+			assert(fault_info.cluster_size == copy_size - i);
 
+			prot = 	VM_PROT_WRITE|VM_PROT_READ;
+			result = vm_fault_page(copy_object, i, 
+					       VM_PROT_WRITE|VM_PROT_READ,
+					       FALSE,
+					       &prot,
+					       &page,
+					       &top_page,
+					       (int *)0,
+					       &error,
+					       FALSE,
+					       FALSE, &fault_info);
+
+			switch (result) {
 			case VM_FAULT_SUCCESS:
-				if(top_page) {
+				if (top_page) {
 					vm_fault_cleanup(
 						page->object, top_page);
-					PAGE_WAKEUP_DONE(page);
-					vm_page_lock_queues();
-					if (!page->active && !page->inactive)
-						vm_page_activate(page);
-					vm_page_unlock_queues();
 					vm_object_lock(copy_object);
 					vm_object_paging_begin(copy_object);
-				} else {
-					PAGE_WAKEUP_DONE(page);
-					vm_page_lock_queues();
-					if (!page->active && !page->inactive)
-						vm_page_activate(page);
+				}
+				if (!page->active &&
+				    !page->inactive &&
+				    !page->throttled) {
+					vm_page_lockspin_queues();
+					if (!page->active &&
+					    !page->inactive &&
+					    !page->throttled)
+						vm_page_deactivate(page);
 					vm_page_unlock_queues();
 				}
+				PAGE_WAKEUP_DONE(page);
 				break;
 			case VM_FAULT_RETRY:
 				prot = 	VM_PROT_WRITE|VM_PROT_READ;
@@ -694,175 +914,175 @@ vm_object_update(
 				vm_object_lock(copy_object);
 				vm_object_paging_begin(copy_object);
 				goto RETRY_COW_OF_LOCK_REQUEST;
-			case VM_FAULT_FICTITIOUS_SHORTAGE:
-				vm_page_more_fictitious();
-				prot = 	VM_PROT_WRITE|VM_PROT_READ;
-				vm_object_lock(copy_object);
-				vm_object_paging_begin(copy_object);
-				goto RETRY_COW_OF_LOCK_REQUEST;
+			case VM_FAULT_SUCCESS_NO_VM_PAGE:
+				/* success but no VM page: fail */
+				vm_object_paging_end(copy_object);
+				vm_object_unlock(copy_object);
+				/*FALLTHROUGH*/
 			case VM_FAULT_MEMORY_ERROR:
+			        if (object != copy_object)
+				        vm_object_deallocate(copy_object);
 				vm_object_lock(object);
 				goto BYPASS_COW_COPYIN;
+			default:
+				panic("vm_object_update: unexpected error 0x%x"
+				      " from vm_fault_page()\n", result);
 			}
 
 		}
 		vm_object_paging_end(copy_object);
-		if(copy_object != object) {
+	}
+	if ((flags & (MEMORY_OBJECT_DATA_SYNC | MEMORY_OBJECT_COPY_SYNC))) {
+	        if (copy_object != VM_OBJECT_NULL && copy_object != object) {
 			vm_object_unlock(copy_object);
+		        vm_object_deallocate(copy_object);
 			vm_object_lock(object);
 		}
+		return KERN_SUCCESS;
 	}
-	if((flags & (MEMORY_OBJECT_DATA_SYNC | MEMORY_OBJECT_COPY_SYNC))) {
-			return KERN_SUCCESS;
-	}
-	if(((copy_object = object->copy) != NULL) && 
-					(flags & MEMORY_OBJECT_DATA_PURGE)) {
-		copy_object->shadow_severed = TRUE;
-		copy_object->shadowed = FALSE;
-		copy_object->shadow = NULL;
-		/* delete the ref the COW was holding on the target object */
-		vm_object_deallocate(object);
+	if (copy_object != VM_OBJECT_NULL && copy_object != object) {
+	        if ((flags & MEMORY_OBJECT_DATA_PURGE)) {
+		        copy_object->shadow_severed = TRUE;
+			copy_object->shadowed = FALSE;
+			copy_object->shadow = NULL;
+			/*
+			 * delete the ref the COW was holding on the target object
+			 */
+			vm_object_deallocate(object);
+		}
+		vm_object_unlock(copy_object);
+	        vm_object_deallocate(copy_object);
+		vm_object_lock(object);
 	}
 BYPASS_COW_COPYIN:
 
-	for (;
-	     size != 0;
-	     size -= PAGE_SIZE, offset += PAGE_SIZE_64)
-	{
-	    /*
-	     *	Limit the number of pages to be cleaned at once.
-	     */
-	    if (pending_pageout &&
-		    data_cnt >= PAGE_SIZE * DATA_WRITE_MAX)
-	    {
- 		LIST_REQ_PAGEOUT_PAGES(object, data_cnt, 
-				pageout_action, paging_offset);
-		data_cnt = 0;
-		pending_pageout = FALSE;
-	    }
-
-	    while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
-		page_lock_result = memory_object_lock_page(m, should_return,
-					should_flush, prot);
-
-		XPR(XPR_MEMORY_OBJECT,
-                    "m_o_update: lock_page, obj 0x%X offset 0x%X result %d\n",
-                    (integer_t)object, offset, page_lock_result, 0, 0);
-
-		switch (page_lock_result)
-		{
-		    case MEMORY_OBJECT_LOCK_RESULT_DONE:
-			/*
-			 *	End of a cluster of dirty pages.
-			 */
-			if(pending_pageout) {
- 	    		    	LIST_REQ_PAGEOUT_PAGES(object, 
-					data_cnt, pageout_action, 
-					paging_offset);
-				data_cnt = 0;
-				pending_pageout = FALSE;
-				continue;
-			}
-			break;
+	/*
+	 * when we have a really large range to check relative
+	 * to the number of actual resident pages, we'd like
+	 * to use the resident page list to drive our checks
+	 * however, the object lock will get dropped while processing
+	 * the page which means the resident queue can change which
+	 * means we can't walk the queue as we process the pages
+	 * we also want to do the processing in offset order to allow
+	 * 'runs' of pages to be collected if we're being told to 
+	 * flush to disk... the resident page queue is NOT ordered.
+	 * 
+	 * a temporary solution (until we figure out how to deal with
+	 * large address spaces more generically) is to pre-flight
+	 * the resident page queue (if it's small enough) and develop
+	 * a collection of extents (that encompass actual resident pages)
+	 * to visit.  This will at least allow us to deal with some of the
+	 * more pathological cases in a more efficient manner.  The current
+	 * worst case (a single resident page at the end of an extremely large
+	 * range) can take minutes to complete for ranges in the terrabyte
+	 * category... since this routine is called when truncating a file,
+	 * and we currently support files up to 16 Tbytes in size, this
+	 * is not a theoretical problem
+	 */
 
-		    case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK:
-			/*
-			 *	Since it is necessary to block,
-			 *	clean any dirty pages now.
-			 */
-			if(pending_pageout) {
- 	    		    	LIST_REQ_PAGEOUT_PAGES(object,
-					data_cnt, pageout_action, 
-					paging_offset);
-				pending_pageout = FALSE;
-				data_cnt = 0;
-				continue;
-			}
+	if ((object->resident_page_count < RESIDENT_LIMIT) && 
+	    (atop_64(size) > (unsigned)(object->resident_page_count/(8 * MAX_EXTENTS)))) {
+		vm_page_t		next;
+		vm_object_offset_t	start;
+		vm_object_offset_t	end;
+		vm_object_size_t	e_mask;
+		vm_page_t               m;
 
-			PAGE_SLEEP(object, m, THREAD_UNINT);
-			continue;
+		start = offset;
+		end   = offset + size;
+		num_of_extents = 0;
+		e_mask = ~((vm_object_size_t)(EXTENT_SIZE - 1));
 
-		    case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN:
-		    case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN:
-			/*
-			 * The clean and return cases are similar.
-			 *
-			 */
+		m = (vm_page_t) queue_first(&object->memq);
 
-			/*
-			 * if this would form a discontiguous block,
-			 * clean the old pages and start anew.
-			 *
-			 */
+		while (!queue_end(&object->memq, (queue_entry_t) m)) {
+			next = (vm_page_t) queue_next(&m->listq);
 
-			/*
-			 * Mark the page busy since we unlock the
-			 * object below.
-			 */
-			m->busy = TRUE;
-			if (pending_pageout &&
-			    (last_offset != offset ||
-			     pageout_action != page_lock_result)) {
- 	    			LIST_REQ_PAGEOUT_PAGES(object, 
-						data_cnt, pageout_action, 
-						paging_offset);
-				pending_pageout = FALSE;
-				data_cnt = 0;
-			}
-			m->busy = FALSE;
-			holding_page = VM_PAGE_NULL;
-			if(m->cleaning) {
-				PAGE_SLEEP(object, m, THREAD_UNINT);
-				continue;
-			}
-			if(!pending_pageout) {
-				pending_pageout = TRUE;
-				pageout_action = page_lock_result;
-				paging_offset = offset;
-			}
-			if (should_flush) {
-				vm_page_lock_queues();
-				m->list_req_pending = TRUE;
-				m->cleaning = TRUE;
-				m->busy = TRUE;
-				m->pageout = TRUE;
-				vm_page_wire(m);
-				vm_page_unlock_queues();
-			} else {
-				/*
-				 * Clean but do not flush
+			if ((m->offset >= start) && (m->offset < end)) {
+			        /*
+				 * this is a page we're interested in
+				 * try to fit it into a current extent
 				 */
-				vm_page_lock_queues();
-				m->list_req_pending = TRUE;
-				m->cleaning = TRUE;
-				vm_page_unlock_queues();
-
+			        for (n = 0; n < num_of_extents; n++) {
+				        if ((m->offset & e_mask) == extents[n].e_base) {
+					        /*
+						 * use (PAGE_SIZE - 1) to determine the
+						 * max offset so that we don't wrap if
+						 * we're at the last page of the space
+						 */
+					        if (m->offset < extents[n].e_min)
+						        extents[n].e_min = m->offset;
+						else if ((m->offset + (PAGE_SIZE - 1)) > extents[n].e_max)
+						        extents[n].e_max = m->offset + (PAGE_SIZE - 1);
+					        break;
+					}
+				}
+				if (n == num_of_extents) {
+				        /*
+					 * didn't find a current extent that can encompass
+					 * this page
+					 */
+				        if (n < MAX_EXTENTS) {
+					        /*
+						 * if we still have room, 
+						 * create a new extent
+						 */
+					        extents[n].e_base = m->offset & e_mask;
+						extents[n].e_min  = m->offset;
+						extents[n].e_max  = m->offset + (PAGE_SIZE - 1);
+
+						num_of_extents++;
+					} else {
+						/*
+						 * no room to create a new extent...
+						 * fall back to a single extent based
+						 * on the min and max page offsets 
+						 * we find in the range we're interested in...
+						 * first, look through the extent list and
+						 * develop the overall min and max for the
+						 * pages we've looked at up to this point
+						 */						
+					        for (n = 1; n < num_of_extents; n++) {
+						        if (extents[n].e_min < extents[0].e_min)
+						                extents[0].e_min = extents[n].e_min;
+							if (extents[n].e_max > extents[0].e_max)
+						                extents[0].e_max = extents[n].e_max;
+						}
+						/*
+						 * now setup to run through the remaining pages
+						 * to determine the overall min and max
+						 * offset for the specified range
+						 */
+						extents[0].e_base = 0;
+						e_mask = 0;
+						num_of_extents = 1;
+
+						/*
+						 * by continuing, we'll reprocess the
+						 * page that forced us to abandon trying
+						 * to develop multiple extents
+						 */
+						continue;
+					}
+				}
 			}
-			vm_object_unlock(object);
-
-
-			data_cnt += PAGE_SIZE;
-			last_offset = offset + PAGE_SIZE_64;
-			data_returned = TRUE;
-
-			vm_object_lock(object);
-			break;
+			m = next;
 		}
-		break;
-	    }
-	}
-
-	/*
-	 *	We have completed the scan for applicable pages.
-	 *	Clean any pages that have been saved.
-	 */
-	if (pending_pageout) {
- 	    LIST_REQ_PAGEOUT_PAGES(object,
-				data_cnt, pageout_action, paging_offset);
+	} else {
+	        extents[0].e_min = offset;
+		extents[0].e_max = offset + (size - 1);
+
+		num_of_extents = 1;
+	}
+	for (n = 0; n < num_of_extents; n++) {
+	        if (vm_object_update_extent(object, extents[n].e_min, extents[n].e_max, resid_offset, io_errno,
+					    should_flush, should_return, should_iosync, protection))
+		        data_returned = TRUE;
 	}
 	return (data_returned);
 }
 
+
 /*
  *	Routine:	memory_object_synchronize_completed [user interface]
  *
@@ -878,20 +1098,21 @@ kern_return_t
 memory_object_synchronize_completed(
 	memory_object_control_t	control,
 	memory_object_offset_t	offset,
-	vm_offset_t			length)
+	memory_object_size_t    length)
 {
 	vm_object_t			object;
 	msync_req_t			msr;
 
+	object = memory_object_control_to_vm_object(control);
+
         XPR(XPR_MEMORY_OBJECT,
 	    "m_o_sync_completed, object 0x%X, offset 0x%X length 0x%X\n",
-	    (integer_t)object, offset, length, 0, 0);
+	    object, offset, length, 0, 0);
 
 	/*
 	 *      Look for bogus arguments
 	 */
 
-	object = memory_object_control_to_vm_object(control);
 	if (object == VM_OBJECT_NULL)
 		return (KERN_INVALID_ARGUMENT);
 
@@ -927,7 +1148,6 @@ vm_object_set_attributes_common(
 	boolean_t	may_cache,
 	memory_object_copy_strategy_t copy_strategy,
 	boolean_t	temporary,
-	vm_size_t	cluster_size,
         boolean_t	silent_overwrite,
 	boolean_t	advisory_pageout)
 {
@@ -935,7 +1155,7 @@ vm_object_set_attributes_common(
 
         XPR(XPR_MEMORY_OBJECT,
 	    "m_o_set_attr_com, object 0x%X flg %x strat %d\n",
-	    (integer_t)object, (may_cache&1)|((temporary&1)<1), copy_strategy, 0, 0);
+	    object, (may_cache&1)|((temporary&1)<1), copy_strategy, 0, 0);
 
 	if (object == VM_OBJECT_NULL)
 		return(KERN_INVALID_ARGUMENT);
@@ -961,17 +1181,6 @@ vm_object_set_attributes_common(
 		may_cache = TRUE;
 	if (temporary)
 		temporary = TRUE;
-	if (cluster_size != 0) {
-		int	pages_per_cluster;
-		pages_per_cluster = atop(cluster_size);
-		/*
-		 * Cluster size must be integral multiple of page size,
-		 * and be a power of 2 number of pages.
-		 */
-		if ((cluster_size & (PAGE_SIZE-1)) ||
-		    ((pages_per_cluster-1) & pages_per_cluster))
-			return KERN_INVALID_ARGUMENT;
-	}
 
 	vm_object_lock(object);
 
@@ -985,12 +1194,6 @@ vm_object_set_attributes_common(
 	object->temporary = temporary;
 	object->silent_overwrite = silent_overwrite;
 	object->advisory_pageout = advisory_pageout;
-	if (cluster_size == 0)
-		cluster_size = PAGE_SIZE;
-	object->cluster_size = cluster_size;
-
-	assert(cluster_size >= PAGE_SIZE &&
-	       cluster_size % PAGE_SIZE == 0);
 
 	/*
 	 *	Wake up anyone waiting for the ready attribute
@@ -1027,7 +1230,6 @@ memory_object_change_attributes(
 	boolean_t       		temporary;
 	boolean_t       		may_cache;
 	boolean_t       		invalidate;
-	vm_size_t			cluster_size;
 	memory_object_copy_strategy_t	copy_strategy;
 	boolean_t       		silent_overwrite;
 	boolean_t			advisory_pageout;
@@ -1046,7 +1248,6 @@ memory_object_change_attributes(
 #if notyet
 	invalidate = object->invalidate;
 #endif
-	cluster_size = object->cluster_size;
 	vm_object_unlock(object);	
 
 	switch (flavor) {
@@ -1099,7 +1300,6 @@ memory_object_change_attributes(
                 perf = (memory_object_perf_info_t) attributes;
 
 		may_cache = perf->may_cache;
-		cluster_size = round_page(perf->cluster_size);
 
 		break;
 	    }
@@ -1117,7 +1317,6 @@ memory_object_change_attributes(
 
                 may_cache = attr->may_cache;
                 copy_strategy = attr->copy_strategy;
-		cluster_size = page_size;
 
 		break;
 	    }
@@ -1135,7 +1334,6 @@ memory_object_change_attributes(
 
 		copy_strategy = attr->copy_strategy;
                 may_cache = attr->may_cache_object;
-		cluster_size = attr->cluster_size;
 		temporary = attr->temporary;
 
 		break;
@@ -1164,7 +1362,6 @@ memory_object_change_attributes(
 						     may_cache,
 						     copy_strategy,
 						     temporary,
-						     cluster_size,
 						     silent_overwrite,
 						     advisory_pageout));
 }
@@ -1241,7 +1438,7 @@ memory_object_get_attributes(
 		}
 
 		perf = (memory_object_perf_info_t) attributes;
-		perf->cluster_size = object->cluster_size;
+		perf->cluster_size = PAGE_SIZE;
 		perf->may_cache = object->can_persist;
 
 		*count = MEMORY_OBJECT_PERF_INFO_COUNT;
@@ -1276,7 +1473,7 @@ memory_object_get_attributes(
 
                 attr = (memory_object_attr_info_t) attributes;
         	attr->copy_strategy = object->copy_strategy;
-		attr->cluster_size = object->cluster_size;
+		attr->cluster_size = PAGE_SIZE;
         	attr->may_cache_object = object->can_persist;
 		attr->temporary = object->temporary;
 
@@ -1295,6 +1492,143 @@ memory_object_get_attributes(
 }
 
 
+kern_return_t
+memory_object_iopl_request(
+	ipc_port_t		port,
+	memory_object_offset_t	offset,
+	upl_size_t		*upl_size,
+	upl_t			*upl_ptr,
+	upl_page_info_array_t	user_page_list,
+	unsigned int		*page_list_count,
+	int			*flags)
+{
+	vm_object_t		object;
+	kern_return_t		ret;
+	int			caller_flags;
+
+	caller_flags = *flags;
+
+	if (caller_flags & ~UPL_VALID_FLAGS) {
+		/*
+		 * For forward compatibility's sake,
+		 * reject any unknown flag.
+		 */
+		return KERN_INVALID_VALUE;
+	}
+
+	if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
+		vm_named_entry_t	named_entry;
+
+		named_entry = (vm_named_entry_t)port->ip_kobject;
+		/* a few checks to make sure user is obeying rules */
+		if(*upl_size == 0) {
+			if(offset >= named_entry->size)
+				return(KERN_INVALID_RIGHT);
+			*upl_size = (upl_size_t)(named_entry->size - offset);
+			if (*upl_size != named_entry->size - offset)
+				return KERN_INVALID_ARGUMENT;
+		}
+		if(caller_flags & UPL_COPYOUT_FROM) {
+			if((named_entry->protection & VM_PROT_READ) 
+						!= VM_PROT_READ) {
+				return(KERN_INVALID_RIGHT);
+			}
+		} else {
+			if((named_entry->protection & 
+				(VM_PROT_READ | VM_PROT_WRITE)) 
+				!= (VM_PROT_READ | VM_PROT_WRITE)) {
+				return(KERN_INVALID_RIGHT);
+			}
+		}
+		if(named_entry->size < (offset + *upl_size))
+			return(KERN_INVALID_ARGUMENT);
+
+		/* the callers parameter offset is defined to be the */
+		/* offset from beginning of named entry offset in object */
+		offset = offset + named_entry->offset;
+
+		if(named_entry->is_sub_map) 
+			return (KERN_INVALID_ARGUMENT);
+		
+		named_entry_lock(named_entry);
+
+		if (named_entry->is_pager) {
+			object = vm_object_enter(named_entry->backing.pager, 
+					named_entry->offset + named_entry->size, 
+					named_entry->internal, 
+					FALSE,
+					FALSE);
+			if (object == VM_OBJECT_NULL) {
+				named_entry_unlock(named_entry);
+				return(KERN_INVALID_OBJECT);
+			}
+
+			/* JMM - drop reference on pager here? */
+
+			/* create an extra reference for the named entry */
+			vm_object_lock(object);
+			vm_object_reference_locked(object);
+			named_entry->backing.object = object;
+			named_entry->is_pager = FALSE;
+			named_entry_unlock(named_entry);
+
+			/* wait for object to be ready */
+			while (!object->pager_ready) {
+				vm_object_wait(object,
+						VM_OBJECT_EVENT_PAGER_READY,
+						THREAD_UNINT);
+				vm_object_lock(object);
+			}
+			vm_object_unlock(object);
+		} else {
+			/* This is the case where we are going to map */
+			/* an already mapped object.  If the object is */
+			/* not ready it is internal.  An external     */
+			/* object cannot be mapped until it is ready  */
+			/* we can therefore avoid the ready check     */
+			/* in this case.  */
+			object = named_entry->backing.object;
+			vm_object_reference(object);
+			named_entry_unlock(named_entry);
+		}
+	} else if (ip_kotype(port) == IKOT_MEM_OBJ_CONTROL) {
+		memory_object_control_t	control;
+		control = (memory_object_control_t) port;
+		if (control == NULL)
+			return (KERN_INVALID_ARGUMENT);
+		object = memory_object_control_to_vm_object(control);
+		if (object == VM_OBJECT_NULL)
+			return (KERN_INVALID_ARGUMENT);
+		vm_object_reference(object);
+	} else {
+		return KERN_INVALID_ARGUMENT;
+	}
+	if (object == VM_OBJECT_NULL)
+		return (KERN_INVALID_ARGUMENT);
+
+	if (!object->private) {
+		if (*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE))
+			*upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE);
+		if (object->phys_contiguous) {
+			*flags = UPL_PHYS_CONTIG;
+		} else {
+			*flags = 0;
+		}
+	} else {
+		*flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG;
+	}
+
+	ret = vm_object_iopl_request(object,
+				     offset,
+				     *upl_size,
+				     upl_ptr,
+				     user_page_list,
+				     page_list_count,
+				     caller_flags);
+	vm_object_deallocate(object);
+	return ret;
+}
+
 /*  
  *	Routine:	memory_object_upl_request [interface]
  *	Purpose:
@@ -1308,7 +1642,7 @@ kern_return_t
 memory_object_upl_request(
 	memory_object_control_t	control,
 	memory_object_offset_t	offset,
-	vm_size_t		size,
+	upl_size_t		size,
 	upl_t			*upl_ptr,
 	upl_page_info_array_t	user_page_list,
 	unsigned int		*page_list_count,
@@ -1318,7 +1652,7 @@ memory_object_upl_request(
 
 	object = memory_object_control_to_vm_object(control);
 	if (object == VM_OBJECT_NULL)
-		return (KERN_INVALID_ARGUMENT);
+		return (KERN_TERMINATED);
 
 	return vm_object_upl_request(object,
 				     offset,
@@ -1344,8 +1678,8 @@ kern_return_t
 memory_object_super_upl_request(
 	memory_object_control_t control,
 	memory_object_offset_t	offset,
-	vm_size_t		size,
-	vm_size_t		super_cluster,
+	upl_size_t		size,
+	upl_size_t		super_cluster,
 	upl_t			*upl,
 	upl_page_info_t		*user_page_list,
 	unsigned int		*page_list_count,
@@ -1367,6 +1701,27 @@ memory_object_super_upl_request(
 					   cntrl_flags);
 }
 
+kern_return_t
+memory_object_cluster_size(memory_object_control_t control, memory_object_offset_t *start,
+			   vm_size_t *length, uint32_t *io_streaming, memory_object_fault_info_t fault_info)
+{
+	vm_object_t		object;
+
+	object = memory_object_control_to_vm_object(control);
+
+	if (object == VM_OBJECT_NULL || object->paging_offset > *start)
+		return (KERN_INVALID_ARGUMENT);
+
+	*start -= object->paging_offset;
+
+	vm_object_cluster_size(object, (vm_object_offset_t *)start, length, (vm_object_fault_info_t)fault_info, io_streaming);
+
+	*start += object->paging_offset;
+
+	return (KERN_SUCCESS);
+}
+
+
 int vm_stat_discard_cleared_reply = 0;
 int vm_stat_discard_cleared_unset = 0;
 int vm_stat_discard_cleared_too_late = 0;
@@ -1385,11 +1740,12 @@ kern_return_t
 host_default_memory_manager(
 	host_priv_t		host_priv,
 	memory_object_default_t	*default_manager,
-	vm_size_t		cluster_size)
+	__unused memory_object_cluster_size_t cluster_size)
 {
 	memory_object_default_t current_manager;
 	memory_object_default_t new_manager;
 	memory_object_default_t returned_manager;
+	kern_return_t result = KERN_SUCCESS;
 
 	if (host_priv == HOST_PRIV_NULL)
 		return(KERN_INVALID_HOST);
@@ -1397,16 +1753,28 @@ host_default_memory_manager(
 	assert(host_priv == &realhost);
 
 	new_manager = *default_manager;
-	mutex_lock(&memory_manager_default_lock);
+	lck_mtx_lock(&memory_manager_default_lock);
 	current_manager = memory_manager_default;
+	returned_manager = MEMORY_OBJECT_DEFAULT_NULL;
 
 	if (new_manager == MEMORY_OBJECT_DEFAULT_NULL) {
 		/*
 		 *	Retrieve the current value.
 		 */
-		memory_object_default_reference(current_manager);
 		returned_manager = current_manager;
+		memory_object_default_reference(returned_manager);
 	} else {
+
+		/*
+		 *	If this is the first non-null manager, start
+		 *	up the internal pager support.
+		 */
+		if (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
+			result = vm_pageout_internal_start();
+			if (result != KERN_SUCCESS)
+				goto out;
+		}
+
 		/*
 		 *	Retrieve the current value,
 		 *	and replace it with the supplied value.
@@ -1414,33 +1782,34 @@ host_default_memory_manager(
 		 *	but we have to take a reference on the new
 		 *	one.
 		 */
-
 		returned_manager = current_manager;
 		memory_manager_default = new_manager;
 		memory_object_default_reference(new_manager);
 
-		if (cluster_size % PAGE_SIZE != 0) {
-#if 0
-			mutex_unlock(&memory_manager_default_lock);
-			return KERN_INVALID_ARGUMENT;
-#else
-			cluster_size = round_page(cluster_size);
-#endif
-		}
-		memory_manager_default_cluster = cluster_size;
-
 		/*
 		 *	In case anyone's been waiting for a memory
 		 *	manager to be established, wake them up.
 		 */
 
 		thread_wakeup((event_t) &memory_manager_default);
-	}
 
-	mutex_unlock(&memory_manager_default_lock);
+#ifndef CONFIG_FREEZE
+		/*
+		 * Now that we have a default pager for anonymous memory,
+		 * reactivate all the throttled pages (i.e. dirty pages with
+		 * no pager).
+		 */
+		if (current_manager == MEMORY_OBJECT_DEFAULT_NULL)
+		{
+			vm_page_reactivate_all_throttled();
+		}
+#endif
+	}
+ out:
+	lck_mtx_unlock(&memory_manager_default_lock);
 
 	*default_manager = returned_manager;
-	return(KERN_SUCCESS);
+	return(result);
 }
 
 /*
@@ -1452,25 +1821,24 @@ host_default_memory_manager(
  */
 
 __private_extern__ memory_object_default_t
-memory_manager_default_reference(
-	vm_size_t	*cluster_size)
+memory_manager_default_reference(void)
 {
 	memory_object_default_t current_manager;
 
-	mutex_lock(&memory_manager_default_lock);
+	lck_mtx_lock(&memory_manager_default_lock);
 	current_manager = memory_manager_default;
 	while (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
 		wait_result_t res;
 
-		res = thread_sleep_mutex((event_t) &memory_manager_default,
-					 &memory_manager_default_lock,
-					 THREAD_UNINT);
+		res = lck_mtx_sleep(&memory_manager_default_lock,
+					LCK_SLEEP_DEFAULT,
+					(event_t) &memory_manager_default,
+					THREAD_UNINT);
 		assert(res == THREAD_AWAKENED);
 		current_manager = memory_manager_default;
 	}
 	memory_object_default_reference(current_manager);
-	*cluster_size = memory_manager_default_cluster;
-	mutex_unlock(&memory_manager_default_lock);
+	lck_mtx_unlock(&memory_manager_default_lock);
 
 	return current_manager;
 }
@@ -1492,18 +1860,18 @@ memory_manager_default_check(void)
 {
 	memory_object_default_t current;
 
-	mutex_lock(&memory_manager_default_lock);
+	lck_mtx_lock(&memory_manager_default_lock);
 	current = memory_manager_default;
 	if (current == MEMORY_OBJECT_DEFAULT_NULL) {
 		static boolean_t logged;	/* initialized to 0 */
 		boolean_t	complain = !logged;
 		logged = TRUE;
-		mutex_unlock(&memory_manager_default_lock);
+		lck_mtx_unlock(&memory_manager_default_lock);
 		if (complain)
 			printf("Warning: No default memory manager\n");
 		return(KERN_FAILURE);
 	} else {
-		mutex_unlock(&memory_manager_default_lock);
+		lck_mtx_unlock(&memory_manager_default_lock);
 		return(KERN_SUCCESS);
 	}
 }
@@ -1512,209 +1880,145 @@ __private_extern__ void
 memory_manager_default_init(void)
 {
 	memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
-	mutex_init(&memory_manager_default_lock, ETAP_VM_MEMMAN);
+	lck_mtx_init(&memory_manager_default_lock, &vm_object_lck_grp, &vm_object_lck_attr);
 }
 
 
-void
-memory_object_deactivate_pages(
-	vm_object_t		object,
-	vm_object_offset_t	offset,
-	vm_object_size_t	size,
-	boolean_t               kill_page)
-{
-	vm_object_t		orig_object;
-	int pages_moved = 0;
-	int pages_found = 0;
 
-	/*
-	 * entered with object lock held, acquire a paging reference to
-	 * prevent the memory_object and control ports from
-	 * being destroyed.
-	 */
-	orig_object = object;
+/* Allow manipulation of individual page state.  This is actually part of */
+/* the UPL regimen but takes place on the object rather than on a UPL */
 
-	for (;;) {
-	        register vm_page_t	m;
-	        vm_object_offset_t	toffset;
-		vm_object_size_t	tsize;
+kern_return_t
+memory_object_page_op(
+	memory_object_control_t	control,
+	memory_object_offset_t	offset,
+	int			ops,
+	ppnum_t			*phys_entry,
+	int			*flags)
+{
+	vm_object_t		object;
 
-	        vm_object_paging_begin(object);
-		vm_page_lock_queues();
+	object = memory_object_control_to_vm_object(control);
+	if (object == VM_OBJECT_NULL)
+		return (KERN_INVALID_ARGUMENT);
 
-		for (tsize = size, toffset = offset; tsize; tsize -= PAGE_SIZE, toffset += PAGE_SIZE) {
+	return vm_object_page_op(object, offset, ops, phys_entry, flags);
+}
 
-		        if ((m = vm_page_lookup(object, toffset)) != VM_PAGE_NULL) {
+/*
+ * memory_object_range_op offers performance enhancement over 
+ * memory_object_page_op for page_op functions which do not require page 
+ * level state to be returned from the call.  Page_op was created to provide 
+ * a low-cost alternative to page manipulation via UPLs when only a single 
+ * page was involved.  The range_op call establishes the ability in the _op 
+ * family of functions to work on multiple pages where the lack of page level
+ * state handling allows the caller to avoid the overhead of the upl structures.
+ */
 
-			        pages_found++;
+kern_return_t
+memory_object_range_op(
+	memory_object_control_t	control,
+	memory_object_offset_t	offset_beg,
+	memory_object_offset_t	offset_end,
+	int                     ops,
+	int                     *range)
+{
+	vm_object_t		object;
 
-				if ((m->wire_count == 0) && (!m->private) && (!m->gobbled) && (!m->busy)) {
+	object = memory_object_control_to_vm_object(control);
+	if (object == VM_OBJECT_NULL)
+		return (KERN_INVALID_ARGUMENT);
 
-					m->reference = FALSE;
-					pmap_clear_reference(m->phys_addr);
+	return vm_object_range_op(object,
+				  offset_beg,
+				  offset_end,
+				  ops,
+				  (uint32_t *) range);
+}
 
-					if ((kill_page) && (object->internal)) {
-				        	m->precious = FALSE;
-					        m->dirty = FALSE;
-						pmap_clear_modify(m->phys_addr);
-						vm_external_state_clr(object->existence_map, offset);
-					}
-					VM_PAGE_QUEUES_REMOVE(m);
 
-					if(m->zero_fill) {
-						queue_enter_first(
-							&vm_page_queue_zf, 
-							m, vm_page_t, pageq);
-					} else {
-						queue_enter_first(
-							&vm_page_queue_inactive, 
-							m, vm_page_t, pageq);
-					}
+void
+memory_object_mark_used(
+        memory_object_control_t	control)
+{
+	vm_object_t		object;
 
-					m->inactive = TRUE;
-					if (!m->fictitious)  
-					        vm_page_inactive_count++;
+	if (control == NULL)
+		return;
 
-					pages_moved++;
-				}
-			}
-		}
-		vm_page_unlock_queues();
-		vm_object_paging_end(object);
+	object = memory_object_control_to_vm_object(control);
 
-		if (object->shadow) {
-		        vm_object_t	tmp_object;
+	if (object != VM_OBJECT_NULL)
+		vm_object_cache_remove(object);
+}
 
-			kill_page = 0;
 
-		        offset += object->shadow_offset;
+void
+memory_object_mark_unused(
+	memory_object_control_t	control,
+	__unused boolean_t	rage)
+{
+	vm_object_t		object;
 
-		        tmp_object = object->shadow;
-		        vm_object_lock(tmp_object);
+	if (control == NULL)
+		return;
 
-			if (object != orig_object)
-			        vm_object_unlock(object);
-			object = tmp_object;
-		} else
-		        break;
-	}
-	if (object != orig_object)
-	        vm_object_unlock(object);
+	object = memory_object_control_to_vm_object(control);
+
+	if (object != VM_OBJECT_NULL)
+		vm_object_cache_add(object);
 }
 
-/* Allow manipulation of individual page state.  This is actually part of */
-/* the UPL regimen but takes place on the object rather than on a UPL */
 
 kern_return_t
-memory_object_page_op(
+memory_object_pages_resident(
 	memory_object_control_t	control,
-	memory_object_offset_t	offset,
-	int			ops,
-	vm_offset_t		*phys_entry,
-	int			*flags)
+	boolean_t			*	has_pages_resident)
 {
 	vm_object_t		object;
-	vm_page_t		dst_page;
 
+	*has_pages_resident = FALSE;
 
 	object = memory_object_control_to_vm_object(control);
 	if (object == VM_OBJECT_NULL)
 		return (KERN_INVALID_ARGUMENT);
 
-	vm_object_lock(object);
-
-	if(ops & UPL_POP_PHYSICAL) {
-		if(object->phys_contiguous) {
-			if (phys_entry) {
-				*phys_entry = (vm_offset_t)
-						object->shadow_offset;
-			}
-			vm_object_unlock(object);
-			return KERN_SUCCESS;
-		} else {
-			vm_object_unlock(object);
-			return KERN_INVALID_OBJECT;
-		}
-	}
-
-	while(TRUE) {
-		if(object->phys_contiguous) {
-			vm_object_unlock(object);
-			return KERN_INVALID_OBJECT;
-		}
-
-		if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) {
-			vm_object_unlock(object);
-			return KERN_FAILURE;
-		}
+	if (object->resident_page_count)
+		*has_pages_resident = TRUE;
+	
+	return (KERN_SUCCESS);
+}
 
-		/* Sync up on getting the busy bit */
-		if((dst_page->busy || dst_page->cleaning) && 
-			   (((ops & UPL_POP_SET) && 
-			   (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) {
-			/* someone else is playing with the page, we will */
-			/* have to wait */
-			PAGE_SLEEP(object, dst_page, THREAD_UNINT);
-			continue;
-		}
+kern_return_t
+memory_object_signed(
+	memory_object_control_t	control,
+	boolean_t		is_signed)
+{
+	vm_object_t	object;
 
-		if (ops & UPL_POP_DUMP) {
-		  vm_page_lock_queues();
-                  vm_page_free(dst_page);
-		  vm_page_unlock_queues();
-		  break;
-		}
+	object = memory_object_control_to_vm_object(control);
+	if (object == VM_OBJECT_NULL)
+		return KERN_INVALID_ARGUMENT;
 
-		if (flags) {
-		        *flags = 0;
+	vm_object_lock(object);
+	object->code_signed = is_signed;
+	vm_object_unlock(object);
 
-			/* Get the condition of flags before requested ops */
-			/* are undertaken */
+	return KERN_SUCCESS;
+}
 
-			if(dst_page->dirty) *flags |= UPL_POP_DIRTY;
-			if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT;
-			if(dst_page->precious) *flags |= UPL_POP_PRECIOUS;
-			if(dst_page->absent) *flags |= UPL_POP_ABSENT;
-			if(dst_page->busy) *flags |= UPL_POP_BUSY;
-		}
-		if (phys_entry)
-			*phys_entry = dst_page->phys_addr;
-	
-		/* The caller should have made a call either contingent with */
-		/* or prior to this call to set UPL_POP_BUSY */
-		if(ops & UPL_POP_SET) {
-			/* The protection granted with this assert will */
-			/* not be complete.  If the caller violates the */
-			/* convention and attempts to change page state */
-			/* without first setting busy we may not see it */
-			/* because the page may already be busy.  However */
-			/* if such violations occur we will assert sooner */
-			/* or later. */
-			assert(dst_page->busy || (ops & UPL_POP_BUSY));
-			if (ops & UPL_POP_DIRTY) dst_page->dirty = TRUE;
-			if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE;
-			if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE;
-			if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE;
-			if (ops & UPL_POP_BUSY) dst_page->busy = TRUE;
-		}
+boolean_t
+memory_object_is_slid(
+	memory_object_control_t	control)
+{
+	vm_object_t	object = VM_OBJECT_NULL;
+	vm_object_t	slide_object = slide_info.slide_object;
 
-		if(ops & UPL_POP_CLR) {
-			assert(dst_page->busy);
-			if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE;
-			if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE;
-			if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE;
-			if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE;
-			if (ops & UPL_POP_BUSY) {
-			        dst_page->busy = FALSE;
-				PAGE_WAKEUP(dst_page);
-			}
-		}
-		break;
-	}
+	object = memory_object_control_to_vm_object(control);
+	if (object == VM_OBJECT_NULL)
+		return FALSE;
 
-	vm_object_unlock(object);
-	return KERN_SUCCESS;
-				
+	return (object == slide_object);
 }
 
 static zone_t mem_obj_control_zone;
@@ -1726,6 +2030,8 @@ memory_object_control_bootstrap(void)
 
 	i = (vm_size_t) sizeof (struct memory_object_control);
 	mem_obj_control_zone = zinit (i, 8192*i, 4096, "mem_obj_control");
+	zone_change(mem_obj_control_zone, Z_CALLERACCT, FALSE);
+	zone_change(mem_obj_control_zone, Z_NOENCRYPT, TRUE);
 	return;
 }
 
@@ -1736,8 +2042,10 @@ memory_object_control_allocate(
 	memory_object_control_t control;
 
 	control = (memory_object_control_t)zalloc(mem_obj_control_zone);
-	if (control != MEMORY_OBJECT_CONTROL_NULL)
-		control->object = object;
+	if (control != MEMORY_OBJECT_CONTROL_NULL) {
+		control->moc_object = object;
+		control->moc_ikot = IKOT_MEM_OBJ_CONTROL; /* fake ip_kotype */
+	}
 	return (control);
 }
 
@@ -1746,24 +2054,25 @@ memory_object_control_collapse(
 	memory_object_control_t control,		       
 	vm_object_t		object)
 {		       
-	assert((control->object != VM_OBJECT_NULL) &&
-	       (control->object != object));
-	control->object = object;
+	assert((control->moc_object != VM_OBJECT_NULL) &&
+	       (control->moc_object != object));
+	control->moc_object = object;
 }
 
 __private_extern__ vm_object_t
 memory_object_control_to_vm_object(
 	memory_object_control_t	control)
 {
-	if (control == MEMORY_OBJECT_CONTROL_NULL)
+	if (control == MEMORY_OBJECT_CONTROL_NULL ||
+	    control->moc_ikot != IKOT_MEM_OBJ_CONTROL)
 		return VM_OBJECT_NULL;
 
-	return (control->object);
+	return (control->moc_object);
 }
 
 memory_object_control_t
 convert_port_to_mo_control(
-	mach_port_t	port)
+	__unused mach_port_t	port)
 {
 	return MEMORY_OBJECT_CONTROL_NULL;
 }
@@ -1771,14 +2080,14 @@ convert_port_to_mo_control(
 
 mach_port_t
 convert_mo_control_to_port(
-	memory_object_control_t	control)
+	__unused memory_object_control_t	control)
 {
 	return MACH_PORT_NULL;
 }
 
 void
 memory_object_control_reference(
-	memory_object_control_t	control)
+	__unused memory_object_control_t	control)
 {
 	return;
 }
@@ -1792,15 +2101,15 @@ void
 memory_object_control_deallocate(
 	memory_object_control_t	control)
 {
-	zfree(mem_obj_control_zone, (vm_offset_t)control);
+	zfree(mem_obj_control_zone, control);
 }
 
 void
 memory_object_control_disable(
 	memory_object_control_t	control)
 {
-	assert(control->object != VM_OBJECT_NULL);
-	control->object = VM_OBJECT_NULL;
+	assert(control->moc_object != VM_OBJECT_NULL);
+	control->moc_object = VM_OBJECT_NULL;
 }
 
 void
@@ -1819,7 +2128,7 @@ memory_object_default_deallocate(
 
 memory_object_t
 convert_port_to_memory_object(
-	mach_port_t	port)
+	__unused mach_port_t	port)
 {
 	return (MEMORY_OBJECT_NULL);
 }
@@ -1827,54 +2136,26 @@ convert_port_to_memory_object(
 
 mach_port_t
 convert_memory_object_to_port(
-	memory_object_t	object)
+	__unused memory_object_t	object)
 {
 	return (MACH_PORT_NULL);
 }
 
-#ifdef MACH_BSD
-/* remove after component interface available */
-extern int	vnode_pager_workaround;
-extern int	device_pager_workaround;
-#endif
-
 
 /* Routine memory_object_reference */
 void memory_object_reference(
 	memory_object_t memory_object)
 {
-extern void   dp_memory_object_reference(memory_object_t);
-
-#ifdef	MACH_BSD
- extern void   vnode_pager_reference(memory_object_t);
- extern void   device_pager_reference(memory_object_t);
-
-		if(memory_object->pager == &vnode_pager_workaround) {
-			vnode_pager_reference(memory_object);
-		} else if(memory_object->pager == &device_pager_workaround) {
-			device_pager_reference(memory_object);
-		} else
-#endif
-			dp_memory_object_reference(memory_object);
+	(memory_object->mo_pager_ops->memory_object_reference)(
+		memory_object);
 }
 
 /* Routine memory_object_deallocate */
 void memory_object_deallocate(
 	memory_object_t memory_object)
 {
-extern void   dp_memory_object_deallocate(memory_object_t);
-
-#ifdef	MACH_BSD
- extern void   vnode_pager_deallocate(memory_object_t);
- extern void   device_pager_deallocate(memory_object_t);
-
-		if(memory_object->pager == &vnode_pager_workaround) {
-			vnode_pager_deallocate(memory_object);
-		} else if(memory_object->pager == &device_pager_workaround) {
-			device_pager_deallocate(memory_object);
-		} else
-#endif
-			dp_memory_object_deallocate(memory_object);
+	(memory_object->mo_pager_ops->memory_object_deallocate)(
+		 memory_object);
 }
 
 
@@ -1883,33 +2164,13 @@ kern_return_t memory_object_init
 (
 	memory_object_t memory_object,
 	memory_object_control_t memory_control,
-	vm_size_t memory_object_page_size
+	memory_object_cluster_size_t memory_object_page_size
 )
 {
-extern kern_return_t   dp_memory_object_init(memory_object_t,
-					     memory_object_control_t,
-					     vm_size_t);
-#ifdef	MACH_BSD
-extern kern_return_t   vnode_pager_init(memory_object_t,
-					memory_object_control_t,
-					vm_size_t);
-extern kern_return_t   device_pager_init(memory_object_t,
-					memory_object_control_t,
-					vm_size_t);
-
-		if(memory_object->pager == &vnode_pager_workaround) {
-			return vnode_pager_init(memory_object,
-				memory_control,
-				memory_object_page_size);
-		} else if(memory_object->pager == &device_pager_workaround) {
-			return device_pager_init(memory_object,
-				memory_control,
-				memory_object_page_size);
-		} else
-#endif
-			return dp_memory_object_init(memory_object,
-				memory_control,
-				memory_object_page_size);
+	return (memory_object->mo_pager_ops->memory_object_init)(
+		memory_object,
+		memory_control,
+		memory_object_page_size);
 }
 
 /* Routine memory_object_terminate */
@@ -1918,19 +2179,8 @@ kern_return_t memory_object_terminate
 	memory_object_t memory_object
 )
 {
-extern	kern_return_t dp_memory_object_terminate(memory_object_t);
-
-#ifdef	MACH_BSD
-extern	kern_return_t vnode_pager_terminate(memory_object_t);
-extern	kern_return_t device_pager_terminate(memory_object_t);
-
-	if(memory_object->pager == &vnode_pager_workaround) {
-		return vnode_pager_terminate(memory_object);
-	} else if(memory_object->pager == &device_pager_workaround) {
-		return device_pager_terminate(memory_object);
-	} else
-#endif
-		return dp_memory_object_terminate(memory_object);
+	return (memory_object->mo_pager_ops->memory_object_terminate)(
+		memory_object);
 }
 
 /* Routine memory_object_data_request */
@@ -1938,35 +2188,17 @@ kern_return_t memory_object_data_request
 (
 	memory_object_t memory_object,
 	memory_object_offset_t offset,
-	vm_size_t length,
-	vm_prot_t desired_access
+	memory_object_cluster_size_t length,
+	vm_prot_t desired_access,
+	memory_object_fault_info_t fault_info
 )
 {
-extern	kern_return_t   dp_memory_object_data_request(memory_object_t, 
-			memory_object_offset_t, vm_size_t, vm_prot_t);
-
-#ifdef	MACH_BSD
-extern	kern_return_t   vnode_pager_data_request(memory_object_t, 
-			memory_object_offset_t, vm_size_t, vm_prot_t);
-extern	kern_return_t   device_pager_data_request(memory_object_t, 
-			memory_object_offset_t, vm_size_t, vm_prot_t);
-
-		if (memory_object->pager == &vnode_pager_workaround) {
-	        	return vnode_pager_data_request(memory_object, 
-				      offset, 
-				      length,
-				      desired_access);
-		} else if (memory_object->pager == &device_pager_workaround) {
-	        	return device_pager_data_request(memory_object, 
-				      offset, 
-				      length,
-				      desired_access);
-		} else
-#endif
-	       		return dp_memory_object_data_request(memory_object, 
-					offset, 
-					length,
-					desired_access);
+	return (memory_object->mo_pager_ops->memory_object_data_request)(
+		memory_object,
+		offset, 
+		length,
+		desired_access,
+		fault_info);
 }
 
 /* Routine memory_object_data_return */
@@ -1974,47 +2206,23 @@ kern_return_t memory_object_data_return
 (
 	memory_object_t memory_object,
 	memory_object_offset_t offset,
-	vm_size_t size,
+	memory_object_cluster_size_t size,
+	memory_object_offset_t *resid_offset,
+	int	*io_error,
 	boolean_t dirty,
-	boolean_t kernel_copy
+	boolean_t kernel_copy,
+	int	upl_flags
 )
 {
-  extern kern_return_t dp_memory_object_data_return(memory_object_t,
-						    memory_object_offset_t,
-						    vm_size_t,
-						    boolean_t,
-						    boolean_t);
-#ifdef MACH_BSD
-  extern kern_return_t vnode_pager_data_return(memory_object_t,
-						    memory_object_offset_t,
-						    vm_size_t,
-						    boolean_t,
-						    boolean_t);
-  extern kern_return_t device_pager_data_return(memory_object_t,
-						    memory_object_offset_t,
-						    vm_size_t,
-						    boolean_t,
-						    boolean_t);
-
-	if (memory_object->pager == &vnode_pager_workaround) {
-		return vnode_pager_data_return(memory_object,
-				       offset,
-				       size,
-				       dirty,
-				       kernel_copy);
-	} else if (memory_object->pager == &device_pager_workaround) {
-		return device_pager_data_return(memory_object,
-				       offset,
-				       size,
-				       dirty,
-				       kernel_copy);
-	} else
-#endif
-		return dp_memory_object_data_return(memory_object,
-				       offset,
-				       size,
-				       dirty,
-				       kernel_copy);
+	return (memory_object->mo_pager_ops->memory_object_data_return)(
+		memory_object,
+		offset,
+		size,
+		resid_offset,
+		io_error,
+		dirty,
+		kernel_copy,
+		upl_flags);
 }
 
 /* Routine memory_object_data_initialize */
@@ -2022,34 +2230,13 @@ kern_return_t memory_object_data_initialize
 (
 	memory_object_t memory_object,
 	memory_object_offset_t offset,
-	vm_size_t size
+	memory_object_cluster_size_t size
 )
 {
-
-  extern kern_return_t dp_memory_object_data_initialize(memory_object_t,
-						    memory_object_offset_t,
-						    vm_size_t);
-#ifdef MACH_BSD
-  extern kern_return_t vnode_pager_data_initialize(memory_object_t,
-						    memory_object_offset_t,
-						    vm_size_t);
-  extern kern_return_t device_pager_data_initialize(memory_object_t,
-						    memory_object_offset_t,
-						    vm_size_t);
-
-	if (memory_object->pager == &vnode_pager_workaround) {
-		return vnode_pager_data_initialize(memory_object,
-				       offset,
-				       size);
-	} else if (memory_object->pager == &device_pager_workaround) {
-		return device_pager_data_initialize(memory_object,
-				       offset,
-				       size);
-	} else
-#endif
-		return dp_memory_object_data_initialize(memory_object,
-				       offset,
-				       size);
+	return (memory_object->mo_pager_ops->memory_object_data_initialize)(
+		memory_object,
+		offset,
+		size);
 }
 
 /* Routine memory_object_data_unlock */
@@ -2057,41 +2244,15 @@ kern_return_t memory_object_data_unlock
 (
 	memory_object_t memory_object,
 	memory_object_offset_t offset,
-	vm_size_t size,
+	memory_object_size_t size,
 	vm_prot_t desired_access
 )
 {
-  extern kern_return_t dp_memory_object_data_unlock(memory_object_t,
-						    memory_object_offset_t,
-						    vm_size_t,
-						    vm_prot_t);
-#ifdef MACH_BSD
-  extern kern_return_t vnode_pager_data_unlock(memory_object_t,
-						    memory_object_offset_t,
-						    vm_size_t,
-						    vm_prot_t);
-  extern kern_return_t device_pager_data_unlock(memory_object_t,
-						    memory_object_offset_t,
-						    vm_size_t,
-						    vm_prot_t);
-
-	if (memory_object->pager == &vnode_pager_workaround) {
-		return vnode_pager_data_unlock(memory_object,
-				       offset,
-				       size,
-				       desired_access);
-	} else if (memory_object->pager == &device_pager_workaround) {
-		return device_pager_data_unlock(memory_object,
-				       offset,
-				       size,
-				       desired_access);
-	} else
-#endif
-		return dp_memory_object_data_unlock(memory_object,
-				       offset,
-				       size,
-				       desired_access);
-
+	return (memory_object->mo_pager_ops->memory_object_data_unlock)(
+		memory_object,
+		offset,
+		size,
+		desired_access);
 }
 
 /* Routine memory_object_synchronize */
@@ -2099,63 +2260,70 @@ kern_return_t memory_object_synchronize
 (
 	memory_object_t memory_object,
 	memory_object_offset_t offset,
-	vm_size_t size,
+	memory_object_size_t size,
 	vm_sync_t sync_flags
 )
 {
-  extern kern_return_t dp_memory_object_data_synchronize(memory_object_t,
-						    memory_object_offset_t,
-						    vm_size_t,
-						    vm_sync_t);
-#ifdef MACH_BSD
-  extern kern_return_t vnode_pager_data_synchronize(memory_object_t,
-						    memory_object_offset_t,
-						    vm_size_t,
-						    vm_sync_t);
-  extern kern_return_t device_pager_data_synchronize(memory_object_t,
-						    memory_object_offset_t,
-						    vm_size_t,
-						    vm_sync_t);
-
-	if (memory_object->pager == &vnode_pager_workaround) {
-                	return vnode_pager_synchronize(
-				memory_object,
-				offset,
-				size,
-				sync_flags);
-	} else if (memory_object->pager == &device_pager_workaround) {
-                	return device_pager_synchronize(
-				memory_object,
-				offset,
-				size,
-				sync_flags);
-	} else
-#endif
-			return dp_memory_object_synchronize(
-				memory_object,
-				offset,
-				size,
-				sync_flags);
+	return (memory_object->mo_pager_ops->memory_object_synchronize)(
+		memory_object,
+		offset,
+		size,
+		sync_flags);
+}
+
+
+/*
+ * memory_object_map() is called by VM (in vm_map_enter() and its variants)
+ * each time a "named" VM object gets mapped directly or indirectly
+ * (copy-on-write mapping).  A "named" VM object has an extra reference held
+ * by the pager to keep it alive until the pager decides that the 
+ * memory object (and its VM object) can be reclaimed.
+ * VM calls memory_object_last_unmap() (in vm_object_deallocate()) when all
+ * the mappings of that memory object have been removed.
+ *
+ * For a given VM object, calls to memory_object_map() and memory_object_unmap()
+ * are serialized (through object->mapping_in_progress), to ensure that the
+ * pager gets a consistent view of the mapping status of the memory object.
+ *
+ * This allows the pager to keep track of how many times a memory object
+ * has been mapped and with which protections, to decide when it can be
+ * reclaimed.
+ */
+
+/* Routine memory_object_map */
+kern_return_t memory_object_map
+(
+	memory_object_t memory_object,
+	vm_prot_t prot
+)
+{
+	return (memory_object->mo_pager_ops->memory_object_map)(
+		memory_object,
+		prot);
 }
 
-/* Routine memory_object_unmap */
-kern_return_t memory_object_unmap
+/* Routine memory_object_last_unmap */
+kern_return_t memory_object_last_unmap
 (
 	memory_object_t memory_object
 )
 {
-	extern kern_return_t dp_memory_object_unmap(memory_object_t);
-#ifdef MACH_BSD
-	extern kern_return_t vnode_pager_unmap(memory_object_t);
-	extern kern_return_t device_pager_unmap(memory_object_t);
-
-	if (memory_object->pager == &vnode_pager_workaround) {
-                	return vnode_pager_unmap(memory_object);
-	} else if (memory_object->pager == &device_pager_workaround) {
-                	return device_pager_unmap(memory_object);
-	} else
-#endif
-			return dp_memory_object_unmap(memory_object);
+	return (memory_object->mo_pager_ops->memory_object_last_unmap)(
+		memory_object);
+}
+
+/* Routine memory_object_data_reclaim */
+kern_return_t memory_object_data_reclaim
+(
+	memory_object_t memory_object,
+	boolean_t	reclaim_backing_store
+)
+{
+	if (memory_object->mo_pager_ops->memory_object_data_reclaim == NULL)
+		return KERN_NOT_SUPPORTED;
+	return (memory_object->mo_pager_ops->memory_object_data_reclaim)(
+		memory_object,
+		reclaim_backing_store);
 }
 
 /* Routine memory_object_create */
@@ -2166,12 +2334,41 @@ kern_return_t memory_object_create
 	memory_object_t *new_memory_object
 )
 {
-extern kern_return_t default_pager_memory_object_create(memory_object_default_t,
-							vm_size_t,
-							memory_object_t *);
-
 	return default_pager_memory_object_create(default_memory_manager,
 						  new_memory_object_size,
 						  new_memory_object);
 }
 
+upl_t
+convert_port_to_upl(
+	ipc_port_t	port)
+{
+	upl_t upl;
+
+	ip_lock(port);
+	if (!ip_active(port) || (ip_kotype(port) != IKOT_UPL)) {
+			ip_unlock(port);
+			return (upl_t)NULL;
+	}
+	upl = (upl_t) port->ip_kobject;
+	ip_unlock(port);
+	upl_lock(upl);
+	upl->ref_count+=1;
+	upl_unlock(upl);
+	return upl;
+}
+
+mach_port_t
+convert_upl_to_port(
+	__unused upl_t		upl)
+{
+	return MACH_PORT_NULL;
+}
+
+__private_extern__ void
+upl_no_senders(
+	__unused ipc_port_t				port,
+	__unused mach_port_mscount_t	mscount)
+{
+	return;
+}