/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License"). You may not use this file except in compliance with the
- * License. Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
*
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
*
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
* @OSF_COPYRIGHT@
* External memory management interface control functions.
*/
-#ifdef MACH_BSD
-/* THIS code should be removed when the component merge is completed */
-extern int vnode_pager_workaround;
-#endif
-
-#include <advisory_pageout.h>
-
/*
* Interface dependencies:
*/
#include <mach/std_types.h> /* For pointer_t */
#include <mach/mach_types.h>
+#include <mach/mig.h>
#include <mach/kern_return.h>
#include <mach/memory_object.h>
#include <mach/memory_object_default.h>
#include <mach/memory_object_control_server.h>
-#include <mach/mach_host_server.h>
+#include <mach/host_priv_server.h>
#include <mach/boolean.h>
#include <mach/vm_prot.h>
#include <mach/message.h>
-#include <vm/vm_object.h>
-#include <vm/vm_fault.h>
/*
* Implementation dependencies:
*/
#include <string.h> /* For memcpy() */
+#include <kern/xpr.h>
+#include <kern/host.h>
+#include <kern/thread.h> /* For current_thread() */
+#include <kern/ipc_mig.h>
+#include <kern/misc_protos.h>
+
+#include <vm/vm_object.h>
+#include <vm/vm_fault.h>
#include <vm/memory_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/pmap.h> /* For pmap_clear_modify */
-#include <kern/xpr.h>
-#include <kern/thread.h> /* For current_thread() */
-#include <kern/host.h>
#include <vm/vm_kern.h> /* For kernel_map, vm_move */
#include <vm/vm_map.h> /* For vm_map_pageable */
-#include <ipc/ipc_port.h>
-#include <ipc/ipc_space.h>
+#include <vm/vm_purgeable_internal.h> /* Needed by some vm_page.h macros */
+#include <vm/vm_shared_region.h>
-#include <kern/misc_protos.h>
-
-#if MACH_PAGEMAP
#include <vm/vm_external.h>
-#endif /* MACH_PAGEMAP */
+#include <vm/vm_protos.h>
-ipc_port_t memory_manager_default = IP_NULL;
-vm_size_t memory_manager_default_cluster = 0;
-decl_mutex_data(,memory_manager_default_lock)
-
-/*
- * Forward ref to file-local function:
- */
-boolean_t
-memory_object_update(vm_object_t, vm_object_offset_t,
- vm_size_t, memory_object_return_t, int, vm_prot_t);
+memory_object_default_t memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
+decl_lck_mtx_data(, memory_manager_default_lock)
/*
#define memory_object_should_return_page(m, should_return) \
(should_return != MEMORY_OBJECT_RETURN_NONE && \
- (((m)->dirty || ((m)->dirty = pmap_is_modified((m)->phys_addr))) || \
+ (((m)->dirty || ((m)->dirty = pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m)))) || \
((m)->precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
(should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
typedef int memory_object_lock_result_t;
-#define MEMORY_OBJECT_LOCK_RESULT_DONE 0
-#define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
-#define MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN 2
-#define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 3
+#define MEMORY_OBJECT_LOCK_RESULT_DONE 0
+#define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
+#define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 2
+#define MEMORY_OBJECT_LOCK_RESULT_MUST_FREE 3
memory_object_lock_result_t memory_object_lock_page(
vm_page_t m,
{
XPR(XPR_MEMORY_OBJECT,
"m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n",
- (integer_t)m, should_return, should_flush, prot, 0);
+ m, should_return, should_flush, prot, 0);
- /*
- * If we cannot change access to the page,
- * either because a mapping is in progress
- * (busy page) or because a mapping has been
- * wired, then give up.
- */
if (m->busy || m->cleaning)
- return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
+ return (MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
+
+ if (m->laundry)
+ vm_pageout_steal_laundry(m, FALSE);
/*
* Don't worry about pages for which the kernel
* does not have any data.
*/
-
- if (m->absent || m->error || m->restart)
- return(MEMORY_OBJECT_LOCK_RESULT_DONE);
-
- assert(!m->fictitious);
-
- if (m->wire_count != 0) {
- /*
- * If no change would take place
- * anyway, return successfully.
- *
- * No change means:
- * Not flushing AND
- * No change to page lock [2 checks] AND
- * Should not return page
- *
- * XXX This doesn't handle sending a copy of a wired
- * XXX page to the pager, but that will require some
- * XXX significant surgery.
- */
- if (!should_flush &&
- (m->page_lock == prot || prot == VM_PROT_NO_CHANGE) &&
- ! memory_object_should_return_page(m, should_return)) {
-
+ if (m->absent || m->error || m->restart) {
+ if (m->error && should_flush && !VM_PAGE_WIRED(m)) {
/*
- * Restart page unlock requests,
- * even though no change took place.
- * [Memory managers may be expecting
- * to see new requests.]
+ * dump the page, pager wants us to
+ * clean it up and there is no
+ * relevant data to return
*/
- m->unlock_request = VM_PROT_NONE;
- PAGE_WAKEUP(m);
-
- return(MEMORY_OBJECT_LOCK_RESULT_DONE);
+ return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE);
}
-
- return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
+ return (MEMORY_OBJECT_LOCK_RESULT_DONE);
}
+ assert(!m->fictitious);
- /*
- * If the page is to be flushed, allow
- * that to be done as part of the protection.
- */
-
- if (should_flush)
- prot = VM_PROT_ALL;
+ if (VM_PAGE_WIRED(m)) {
+ /*
+ * The page is wired... just clean or return the page if needed.
+ * Wired pages don't get flushed or disconnected from the pmap.
+ */
+ if (memory_object_should_return_page(m, should_return))
+ return (MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
- /*
- * Set the page lock.
- *
- * If we are decreasing permission, do it now;
- * let the fault handler take care of increases
- * (pmap_page_protect may not increase protection).
- */
+ return (MEMORY_OBJECT_LOCK_RESULT_DONE);
+ }
- if (prot != VM_PROT_NO_CHANGE) {
-#if 0
- /* code associated with the vestigial
- * memory_object_data_unlock
+ if (should_flush) {
+ /*
+ * must do the pmap_disconnect before determining the
+ * need to return the page... otherwise it's possible
+ * for the page to go from the clean to the dirty state
+ * after we've made our decision
*/
- if ((m->page_lock ^ prot) & prot) {
- pmap_page_protect(m->phys_addr, VM_PROT_ALL & ~prot);
+ if (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED) {
+ SET_PAGE_DIRTY(m, FALSE);
}
- m->page_lock = prot;
- m->lock_supplied = TRUE;
- if (prot != VM_PROT_NONE)
- m->unusual = TRUE;
- else
- m->unusual = FALSE;
-
+ } else {
/*
- * Restart any past unlock requests, even if no
- * change resulted. If the manager explicitly
- * requested no protection change, then it is assumed
- * to be remembering past requests.
+ * If we are decreasing permission, do it now;
+ * let the fault handler take care of increases
+ * (pmap_page_protect may not increase protection).
*/
-
- m->unlock_request = VM_PROT_NONE;
-#endif /* 0 */
- PAGE_WAKEUP(m);
+ if (prot != VM_PROT_NO_CHANGE)
+ pmap_page_protect(VM_PAGE_GET_PHYS_PAGE(m), VM_PROT_ALL & ~prot);
}
-
/*
- * Handle page returning.
+ * Handle returning dirty or precious pages
*/
-
if (memory_object_should_return_page(m, should_return)) {
-
/*
- * If we weren't planning
- * to flush the page anyway,
- * we may need to remove the
- * page from the pageout
- * system and from physical
- * maps now.
+ * we use to do a pmap_disconnect here in support
+ * of memory_object_lock_request, but that routine
+ * no longer requires this... in any event, in
+ * our world, it would turn into a big noop since
+ * we don't lock the page in any way and as soon
+ * as we drop the object lock, the page can be
+ * faulted back into an address space
+ *
+ * if (!should_flush)
+ * pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
*/
-
- vm_page_lock_queues();
- VM_PAGE_QUEUES_REMOVE(m);
- vm_page_unlock_queues();
-
- if (!should_flush)
- pmap_page_protect(m->phys_addr, VM_PROT_NONE);
-
- if (m->dirty)
- return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN);
- else
- return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
+ return (MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
}
/*
- * Handle flushing
+ * Handle flushing clean pages
*/
+ if (should_flush)
+ return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE);
- if (should_flush) {
- VM_PAGE_FREE(m);
- } else {
- extern boolean_t vm_page_deactivate_hint;
-
- /*
- * XXX Make clean but not flush a paging hint,
- * and deactivate the pages. This is a hack
- * because it overloads flush/clean with
- * implementation-dependent meaning. This only
- * happens to pages that are already clean.
- */
-
- if (vm_page_deactivate_hint &&
- (should_return != MEMORY_OBJECT_RETURN_NONE)) {
- vm_page_lock_queues();
- vm_page_deactivate(m);
- vm_page_unlock_queues();
- }
- }
+ /*
+ * we use to deactivate clean pages at this point,
+ * but we do not believe that an msync should change
+ * the 'age' of a page in the cache... here is the
+ * original comment and code concerning this...
+ *
+ * XXX Make clean but not flush a paging hint,
+ * and deactivate the pages. This is a hack
+ * because it overloads flush/clean with
+ * implementation-dependent meaning. This only
+ * happens to pages that are already clean.
+ *
+ * if (vm_page_deactivate_hint && (should_return != MEMORY_OBJECT_RETURN_NONE))
+ * return (MEMORY_OBJECT_LOCK_RESULT_MUST_DEACTIVATE);
+ */
- return(MEMORY_OBJECT_LOCK_RESULT_DONE);
+ return (MEMORY_OBJECT_LOCK_RESULT_DONE);
}
-#define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, action, po) \
-MACRO_BEGIN \
- \
- register int i; \
- register vm_page_t hp; \
- \
- vm_object_unlock(object); \
- \
- if(((rpc_subsystem_t)pager_mux_hash_lookup(object->pager)) == \
- ((rpc_subsystem_t) &vnode_pager_workaround)) { \
- (void) vnode_pager_data_return(object->pager, \
- object->pager_request, \
- po, \
- POINTER_T(0), \
- data_cnt, \
- (action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN), \
- !should_flush); \
- } else { \
- (void) memory_object_data_return(object->pager, \
- object->pager_request, \
- po, \
- POINTER_T(0), \
- data_cnt, \
- (action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN), \
- !should_flush); \
- } \
- \
- vm_object_lock(object); \
- \
-MACRO_END
-#ifdef MACH_BSD
-#define PAGEOUT_PAGES(object, new_object, new_offset, action, po) \
-MACRO_BEGIN \
- \
- vm_map_copy_t copy; \
- register int i; \
- register vm_page_t hp; \
- \
- vm_object_unlock(object); \
- \
- (void) vm_map_copyin_object(new_object, 0, new_offset, ©); \
- \
- if(((rpc_subsystem_t)pager_mux_hash_lookup(object->pager)) == \
- ((rpc_subsystem_t) &vnode_pager_workaround)) { \
- (void) vnode_pager_data_return(object->pager, \
- object->pager_request, \
- po, \
- POINTER_T(copy), \
- new_offset, \
- (action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN), \
- !should_flush); \
- } else { \
- (void) memory_object_data_return(object->pager, \
- object->pager_request, \
- po, \
- POINTER_T(copy), \
- new_offset, \
- (action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN), \
- !should_flush); \
- } \
- \
- vm_object_lock(object); \
- \
- for (i = 0; i < atop(new_offset); i++) { \
- hp = holding_pages[i]; \
- if (hp != VM_PAGE_NULL) { \
- vm_object_paging_end(object); \
- VM_PAGE_FREE(hp); \
- } \
- } \
- \
- new_object = VM_OBJECT_NULL; \
-MACRO_END
-#else
-#define PAGEOUT_PAGES(object, new_object, new_offset, action, po) \
-MACRO_BEGIN \
- \
- vm_map_copy_t copy; \
- register int i; \
- register vm_page_t hp; \
- \
- vm_object_unlock(object); \
- \
- (void) vm_map_copyin_object(new_object, 0, new_offset, ©); \
- \
- (void) memory_object_data_return( \
- object->pager, \
- object->pager_request, \
- po, \
- POINTER_T(copy), \
- new_offset, \
- (action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN), \
- !should_flush); \
- \
- vm_object_lock(object); \
- \
- for (i = 0; i < atop(new_offset); i++) { \
- hp = holding_pages[i]; \
- if (hp != VM_PAGE_NULL) { \
- vm_object_paging_end(object); \
- VM_PAGE_FREE(hp); \
- } \
- } \
- \
- new_object = VM_OBJECT_NULL; \
-MACRO_END
-#endif
+
/*
* Routine: memory_object_lock_request [user interface]
kern_return_t
memory_object_lock_request(
- register vm_object_t object,
- register vm_object_offset_t offset,
- register vm_object_size_t size,
+ memory_object_control_t control,
+ memory_object_offset_t offset,
+ memory_object_size_t size,
+ memory_object_offset_t * resid_offset,
+ int * io_errno,
memory_object_return_t should_return,
int flags,
- vm_prot_t prot,
- ipc_port_t reply_to,
- mach_msg_type_name_t reply_to_type)
+ vm_prot_t prot)
{
- vm_object_offset_t original_offset = offset;
- boolean_t should_flush=flags & MEMORY_OBJECT_DATA_FLUSH;
-
- XPR(XPR_MEMORY_OBJECT,
- "m_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n",
- (integer_t)object, offset, size,
- (((should_return&1)<<1)|should_flush), prot);
+ vm_object_t object;
- /*
+ /*
* Check for bogus arguments.
*/
+ object = memory_object_control_to_vm_object(control);
if (object == VM_OBJECT_NULL)
return (KERN_INVALID_ARGUMENT);
- if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE) {
- vm_object_deallocate(object);
+ if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
return (KERN_INVALID_ARGUMENT);
- }
- size = round_page(size);
+ size = round_page_64(size);
/*
* Lock the object, and acquire a paging reference to
- * prevent the memory_object and control ports from
- * being destroyed.
+ * prevent the memory_object reference from being released.
*/
-
vm_object_lock(object);
vm_object_paging_begin(object);
- offset -= object->paging_offset;
-
- (void)memory_object_update(object,
- offset, size, should_return, flags, prot);
- if (IP_VALID(reply_to)) {
- vm_object_unlock(object);
-
- /* consumes our naked send-once/send right for reply_to */
- (void) memory_object_lock_completed(reply_to, reply_to_type,
- object->pager_request, original_offset, size);
-
- vm_object_lock(object);
+ if (flags & MEMORY_OBJECT_DATA_FLUSH_ALL) {
+ if ((should_return != MEMORY_OBJECT_RETURN_NONE) || offset || object->copy) {
+ flags &= ~MEMORY_OBJECT_DATA_FLUSH_ALL;
+ flags |= MEMORY_OBJECT_DATA_FLUSH;
+ }
}
+ offset -= object->paging_offset;
+
+ if (flags & MEMORY_OBJECT_DATA_FLUSH_ALL)
+ vm_object_reap_pages(object, REAP_DATA_FLUSH);
+ else
+ (void)vm_object_update(object, offset, size, resid_offset,
+ io_errno, should_return, flags, prot);
vm_object_paging_end(object);
vm_object_unlock(object);
- vm_object_deallocate(object);
return (KERN_SUCCESS);
}
/*
- * Routine: memory_object_sync
+ * memory_object_release_name: [interface]
+ *
+ * Enforces name semantic on memory_object reference count decrement
+ * This routine should not be called unless the caller holds a name
+ * reference gained through the memory_object_named_create or the
+ * memory_object_rename call.
+ * If the TERMINATE_IDLE flag is set, the call will return if the
+ * reference count is not 1. i.e. idle with the only remaining reference
+ * being the name.
+ * If the decision is made to proceed the name field flag is set to
+ * false and the reference count is decremented. If the RESPECT_CACHE
+ * flag is set and the reference count has gone to zero, the
+ * memory_object is checked to see if it is cacheable otherwise when
+ * the reference count is zero, it is simply terminated.
+ */
+
+kern_return_t
+memory_object_release_name(
+ memory_object_control_t control,
+ int flags)
+{
+ vm_object_t object;
+
+ object = memory_object_control_to_vm_object(control);
+ if (object == VM_OBJECT_NULL)
+ return (KERN_INVALID_ARGUMENT);
+
+ return vm_object_release_name(object, flags);
+}
+
+
+
+/*
+ * Routine: memory_object_destroy [user interface]
+ * Purpose:
+ * Shut down a memory object, despite the
+ * presence of address map (or other) references
+ * to the vm_object.
+ */
+kern_return_t
+memory_object_destroy(
+ memory_object_control_t control,
+ kern_return_t reason)
+{
+ vm_object_t object;
+
+ object = memory_object_control_to_vm_object(control);
+ if (object == VM_OBJECT_NULL)
+ return (KERN_INVALID_ARGUMENT);
+
+ return (vm_object_destroy(object, reason));
+}
+
+/*
+ * Routine: vm_object_sync
*
* Kernel internal function to synch out pages in a given
* range within an object to its memory manager. Much the
*/
boolean_t
-memory_object_sync(
+vm_object_sync(
vm_object_t object,
vm_object_offset_t offset,
vm_object_size_t size,
boolean_t should_flush,
- boolean_t should_return)
+ boolean_t should_return,
+ boolean_t should_iosync)
{
boolean_t rv;
+ int flags;
- XPR(XPR_MEMORY_OBJECT,
- "m_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n",
- (integer_t)object, offset, size, should_flush, should_return);
+ XPR(XPR_VM_OBJECT,
+ "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n",
+ object, offset, size, should_flush, should_return);
/*
* Lock the object, and acquire a paging reference to
vm_object_lock(object);
vm_object_paging_begin(object);
- rv = memory_object_update(object, offset, size,
+ if (should_flush) {
+ flags = MEMORY_OBJECT_DATA_FLUSH;
+ /*
+ * This flush is from an msync(), not a truncate(), so the
+ * contents of the file are not affected.
+ * MEMORY_OBECT_DATA_NO_CHANGE lets vm_object_update() know
+ * that the data is not changed and that there's no need to
+ * push the old contents to a copy object.
+ */
+ flags |= MEMORY_OBJECT_DATA_NO_CHANGE;
+ } else
+ flags = 0;
+
+ if (should_iosync)
+ flags |= MEMORY_OBJECT_IO_SYNC;
+
+ rv = vm_object_update(object, offset, (vm_object_size_t)size, NULL, NULL,
(should_return) ?
MEMORY_OBJECT_RETURN_ALL :
MEMORY_OBJECT_RETURN_NONE,
- (should_flush) ?
- MEMORY_OBJECT_DATA_FLUSH : 0,
+ flags,
VM_PROT_NO_CHANGE);
return rv;
}
+
+
+#define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, po, ro, ioerr, iosync) \
+MACRO_BEGIN \
+ \
+ int upl_flags; \
+ memory_object_t pager; \
+ \
+ if (object->object_slid) { \
+ panic("Objects with slid pages not allowed\n"); \
+ } \
+ \
+ if ((pager = (object)->pager) != MEMORY_OBJECT_NULL) { \
+ vm_object_paging_begin(object); \
+ vm_object_unlock(object); \
+ \
+ if (iosync) \
+ upl_flags = UPL_MSYNC | UPL_IOSYNC; \
+ else \
+ upl_flags = UPL_MSYNC; \
+ \
+ (void) memory_object_data_return(pager, \
+ po, \
+ (memory_object_cluster_size_t)data_cnt, \
+ ro, \
+ ioerr, \
+ FALSE, \
+ FALSE, \
+ upl_flags); \
+ \
+ vm_object_lock(object); \
+ vm_object_paging_end(object); \
+ } \
+MACRO_END
+
+extern struct vnode *
+vnode_pager_lookup_vnode(memory_object_t);
+
+static int
+vm_object_update_extent(
+ vm_object_t object,
+ vm_object_offset_t offset,
+ vm_object_offset_t offset_end,
+ vm_object_offset_t *offset_resid,
+ int *io_errno,
+ boolean_t should_flush,
+ memory_object_return_t should_return,
+ boolean_t should_iosync,
+ vm_prot_t prot)
+{
+ vm_page_t m;
+ int retval = 0;
+ vm_object_offset_t paging_offset = 0;
+ vm_object_offset_t next_offset = offset;
+ memory_object_lock_result_t page_lock_result;
+ memory_object_cluster_size_t data_cnt = 0;
+ struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT];
+ struct vm_page_delayed_work *dwp;
+ int dw_count;
+ int dw_limit;
+ int dirty_count;
+
+ dwp = &dw_array[0];
+ dw_count = 0;
+ dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
+ dirty_count = 0;
+
+ for (;
+ offset < offset_end && object->resident_page_count;
+ offset += PAGE_SIZE_64) {
+
+ /*
+ * Limit the number of pages to be cleaned at once to a contiguous
+ * run, or at most MAX_UPL_TRANSFER_BYTES
+ */
+ if (data_cnt) {
+ if ((data_cnt >= MAX_UPL_TRANSFER_BYTES) || (next_offset != offset)) {
+
+ if (dw_count) {
+ vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
+ dwp = &dw_array[0];
+ dw_count = 0;
+ }
+ LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
+ paging_offset, offset_resid, io_errno, should_iosync);
+ data_cnt = 0;
+ }
+ }
+ while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
+
+ dwp->dw_mask = 0;
+
+ page_lock_result = memory_object_lock_page(m, should_return, should_flush, prot);
+
+ if (data_cnt && page_lock_result != MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN) {
+ /*
+ * End of a run of dirty/precious pages.
+ */
+ if (dw_count) {
+ vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
+ dwp = &dw_array[0];
+ dw_count = 0;
+ }
+ LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
+ paging_offset, offset_resid, io_errno, should_iosync);
+ /*
+ * LIST_REQ_PAGEOUT_PAGES will drop the object lock which will
+ * allow the state of page 'm' to change... we need to re-lookup
+ * the current offset
+ */
+ data_cnt = 0;
+ continue;
+ }
+
+ switch (page_lock_result) {
+
+ case MEMORY_OBJECT_LOCK_RESULT_DONE:
+ break;
+
+ case MEMORY_OBJECT_LOCK_RESULT_MUST_FREE:
+ if (m->dirty == TRUE)
+ dirty_count++;
+ dwp->dw_mask |= DW_vm_page_free;
+ break;
+
+ case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK:
+ PAGE_SLEEP(object, m, THREAD_UNINT);
+ continue;
+
+ case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN:
+ if (data_cnt == 0)
+ paging_offset = offset;
+
+ data_cnt += PAGE_SIZE;
+ next_offset = offset + PAGE_SIZE_64;
+
+ /*
+ * wired pages shouldn't be flushed and
+ * since they aren't on any queue,
+ * no need to remove them
+ */
+ if (!VM_PAGE_WIRED(m)) {
+
+ if (should_flush) {
+ /*
+ * add additional state for the flush
+ */
+ m->free_when_done = TRUE;
+ }
+ /*
+ * we use to remove the page from the queues at this
+ * point, but we do not believe that an msync
+ * should cause the 'age' of a page to be changed
+ *
+ * else
+ * dwp->dw_mask |= DW_VM_PAGE_QUEUES_REMOVE;
+ */
+ }
+ retval = 1;
+ break;
+ }
+ if (dwp->dw_mask) {
+ VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
+
+ if (dw_count >= dw_limit) {
+ vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
+ dwp = &dw_array[0];
+ dw_count = 0;
+ }
+ }
+ break;
+ }
+ }
+
+ if (object->pager)
+ task_update_logical_writes(current_task(), (dirty_count * PAGE_SIZE), TASK_WRITE_INVALIDATED, vnode_pager_lookup_vnode(object->pager));
+ /*
+ * We have completed the scan for applicable pages.
+ * Clean any pages that have been saved.
+ */
+ if (dw_count)
+ vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
+
+ if (data_cnt) {
+ LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
+ paging_offset, offset_resid, io_errno, should_iosync);
+ }
+ return (retval);
+}
+
+
+
/*
- * Routine: memory_object_update
+ * Routine: vm_object_update
* Description:
- * Work function for m_o_lock_request(), m_o_sync().
+ * Work function for m_o_lock_request(), vm_o_sync().
*
* Called with object locked and paging ref taken.
*/
kern_return_t
-memory_object_update(
- register vm_object_t object,
- register vm_object_offset_t offset,
- register vm_size_t size,
- memory_object_return_t should_return,
- int flags,
- vm_prot_t prot)
+vm_object_update(
+ vm_object_t object,
+ vm_object_offset_t offset,
+ vm_object_size_t size,
+ vm_object_offset_t *resid_offset,
+ int *io_errno,
+ memory_object_return_t should_return,
+ int flags,
+ vm_prot_t protection)
{
- register vm_page_t m;
- vm_page_t holding_page;
- vm_size_t original_size = size;
- vm_object_offset_t paging_offset = 0;
- vm_object_t copy_object;
- vm_size_t data_cnt = 0;
- vm_object_offset_t last_offset = offset;
- memory_object_lock_result_t page_lock_result;
- memory_object_lock_result_t pageout_action;
+ vm_object_t copy_object = VM_OBJECT_NULL;
boolean_t data_returned = FALSE;
boolean_t update_cow;
- boolean_t should_flush = flags & MEMORY_OBJECT_DATA_FLUSH;
-#ifndef NOT_LIST_REQ
- boolean_t pending_pageout = FALSE;
-#endif
+ boolean_t should_flush = (flags & MEMORY_OBJECT_DATA_FLUSH) ? TRUE : FALSE;
+ boolean_t should_iosync = (flags & MEMORY_OBJECT_IO_SYNC) ? TRUE : FALSE;
+ vm_fault_return_t result;
+ int num_of_extents;
+ int n;
+#define MAX_EXTENTS 8
+#define EXTENT_SIZE (1024 * 1024 * 256)
+#define RESIDENT_LIMIT (1024 * 32)
+ struct extent {
+ vm_object_offset_t e_base;
+ vm_object_offset_t e_min;
+ vm_object_offset_t e_max;
+ } extents[MAX_EXTENTS];
/*
* To avoid blocking while scanning for pages, save
!(flags & MEMORY_OBJECT_DATA_PURGE)))
|| (flags & MEMORY_OBJECT_COPY_SYNC);
+ if (update_cow || (flags & (MEMORY_OBJECT_DATA_PURGE | MEMORY_OBJECT_DATA_SYNC))) {
+ int collisions = 0;
+
+ while ((copy_object = object->copy) != VM_OBJECT_NULL) {
+ /*
+ * need to do a try here since we're swimming upstream
+ * against the normal lock ordering... however, we need
+ * to hold the object stable until we gain control of the
+ * copy object so we have to be careful how we approach this
+ */
+ if (vm_object_lock_try(copy_object)) {
+ /*
+ * we 'won' the lock on the copy object...
+ * no need to hold the object lock any longer...
+ * take a real reference on the copy object because
+ * we're going to call vm_fault_page on it which may
+ * under certain conditions drop the lock and the paging
+ * reference we're about to take... the reference
+ * will keep the copy object from going away if that happens
+ */
+ vm_object_unlock(object);
+ vm_object_reference_locked(copy_object);
+ break;
+ }
+ vm_object_unlock(object);
- if((((copy_object = object->copy) != NULL) && update_cow) ||
- (flags & MEMORY_OBJECT_DATA_SYNC)) {
- vm_size_t i;
- vm_size_t copy_size;
- vm_object_offset_t copy_offset;
+ collisions++;
+ mutex_pause(collisions);
+
+ vm_object_lock(object);
+ }
+ }
+ if ((copy_object != VM_OBJECT_NULL && update_cow) || (flags & MEMORY_OBJECT_DATA_SYNC)) {
+ vm_map_size_t i;
+ vm_map_size_t copy_size;
+ vm_map_offset_t copy_offset;
vm_prot_t prot;
vm_page_t page;
vm_page_t top_page;
kern_return_t error = 0;
+ struct vm_object_fault_info fault_info;
+
+ if (copy_object != VM_OBJECT_NULL) {
+ /*
+ * translate offset with respect to shadow's offset
+ */
+ copy_offset = (offset >= copy_object->vo_shadow_offset) ?
+ (vm_map_offset_t)(offset - copy_object->vo_shadow_offset) :
+ (vm_map_offset_t) 0;
+
+ if (copy_offset > copy_object->vo_size)
+ copy_offset = copy_object->vo_size;
+
+ /*
+ * clip size with respect to shadow offset
+ */
+ if (offset >= copy_object->vo_shadow_offset) {
+ copy_size = size;
+ } else if (size >= copy_object->vo_shadow_offset - offset) {
+ copy_size = size - (copy_object->vo_shadow_offset - offset);
+ } else {
+ copy_size = 0;
+ }
+
+ if (copy_offset + copy_size > copy_object->vo_size) {
+ if (copy_object->vo_size >= copy_offset) {
+ copy_size = copy_object->vo_size - copy_offset;
+ } else {
+ copy_size = 0;
+ }
+ }
+ copy_size+=copy_offset;
- if(copy_object != NULL) {
- /* translate offset with respect to shadow's offset */
- copy_offset = (offset >= copy_object->shadow_offset)?
- offset - copy_object->shadow_offset :
- (vm_object_offset_t) 0;
- if(copy_offset > copy_object->size)
- copy_offset = copy_object->size;
-
- /* clip size with respect to shadow offset */
- copy_size = (offset >= copy_object->shadow_offset) ?
- size : size - (copy_object->shadow_offset - offset);
-
- if(copy_size <= 0) {
- copy_size = 0;
- } else {
- copy_size = ((copy_offset + copy_size)
- <= copy_object->size) ?
- copy_size : copy_object->size - copy_offset;
- }
- /* check for a copy_offset which is beyond the end of */
- /* the copy_object */
- if(copy_size < 0)
- copy_size = 0;
-
- copy_size+=offset;
-
- vm_object_unlock(object);
- vm_object_lock(copy_object);
} else {
copy_object = object;
copy_size = offset + size;
copy_offset = offset;
}
+ fault_info.interruptible = THREAD_UNINT;
+ fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
+ fault_info.user_tag = 0;
+ fault_info.pmap_options = 0;
+ fault_info.lo_offset = copy_offset;
+ fault_info.hi_offset = copy_size;
+ fault_info.no_cache = FALSE;
+ fault_info.stealth = TRUE;
+ fault_info.io_sync = FALSE;
+ fault_info.cs_bypass = FALSE;
+ fault_info.mark_zf_absent = FALSE;
+ fault_info.batch_pmap_op = FALSE;
vm_object_paging_begin(copy_object);
- for (i=copy_offset; i<copy_size; i+=PAGE_SIZE) {
+
+ for (i = copy_offset; i < copy_size; i += PAGE_SIZE) {
RETRY_COW_OF_LOCK_REQUEST:
- prot = VM_PROT_WRITE|VM_PROT_READ;
- switch (vm_fault_page(copy_object, i,
- VM_PROT_WRITE|VM_PROT_READ,
- FALSE,
- THREAD_UNINT,
- copy_offset,
- copy_offset+copy_size,
- VM_BEHAVIOR_SEQUENTIAL,
- &prot,
- &page,
- &top_page,
- (int *)0,
- &error,
- FALSE,
- FALSE)) {
+ fault_info.cluster_size = (vm_size_t) (copy_size - i);
+ assert(fault_info.cluster_size == copy_size - i);
+ prot = VM_PROT_WRITE|VM_PROT_READ;
+ page = VM_PAGE_NULL;
+ result = vm_fault_page(copy_object, i,
+ VM_PROT_WRITE|VM_PROT_READ,
+ FALSE,
+ FALSE, /* page not looked up */
+ &prot,
+ &page,
+ &top_page,
+ (int *)0,
+ &error,
+ FALSE,
+ FALSE, &fault_info);
+
+ switch (result) {
case VM_FAULT_SUCCESS:
- if(top_page) {
+ if (top_page) {
vm_fault_cleanup(
- page->object, top_page);
- PAGE_WAKEUP_DONE(page);
- vm_page_lock_queues();
- if (!page->active && !page->inactive)
- vm_page_activate(page);
- vm_page_unlock_queues();
+ VM_PAGE_OBJECT(page), top_page);
vm_object_lock(copy_object);
vm_object_paging_begin(copy_object);
- } else {
- PAGE_WAKEUP_DONE(page);
- vm_page_lock_queues();
- if (!page->active && !page->inactive)
- vm_page_activate(page);
+ }
+ if (( !VM_PAGE_NON_SPECULATIVE_PAGEABLE(page))) {
+
+ vm_page_lockspin_queues();
+
+ if (( !VM_PAGE_NON_SPECULATIVE_PAGEABLE(page))) {
+ vm_page_deactivate(page);
+ }
vm_page_unlock_queues();
}
+ PAGE_WAKEUP_DONE(page);
break;
case VM_FAULT_RETRY:
prot = VM_PROT_WRITE|VM_PROT_READ;
vm_object_lock(copy_object);
vm_object_paging_begin(copy_object);
goto RETRY_COW_OF_LOCK_REQUEST;
- case VM_FAULT_FICTITIOUS_SHORTAGE:
- vm_page_more_fictitious();
- prot = VM_PROT_WRITE|VM_PROT_READ;
- vm_object_lock(copy_object);
- vm_object_paging_begin(copy_object);
- goto RETRY_COW_OF_LOCK_REQUEST;
+ case VM_FAULT_SUCCESS_NO_VM_PAGE:
+ /* success but no VM page: fail */
+ vm_object_paging_end(copy_object);
+ vm_object_unlock(copy_object);
+ /*FALLTHROUGH*/
case VM_FAULT_MEMORY_ERROR:
+ if (object != copy_object)
+ vm_object_deallocate(copy_object);
vm_object_lock(object);
goto BYPASS_COW_COPYIN;
+ default:
+ panic("vm_object_update: unexpected error 0x%x"
+ " from vm_fault_page()\n", result);
}
}
vm_object_paging_end(copy_object);
- if(copy_object != object) {
+ }
+ if ((flags & (MEMORY_OBJECT_DATA_SYNC | MEMORY_OBJECT_COPY_SYNC))) {
+ if (copy_object != VM_OBJECT_NULL && copy_object != object) {
vm_object_unlock(copy_object);
+ vm_object_deallocate(copy_object);
vm_object_lock(object);
}
+ return KERN_SUCCESS;
}
- if((flags & (MEMORY_OBJECT_DATA_SYNC | MEMORY_OBJECT_COPY_SYNC))) {
- return KERN_SUCCESS;
- }
- if(((copy_object = object->copy) != NULL) &&
- (flags & MEMORY_OBJECT_DATA_PURGE)) {
- copy_object->shadow_severed = TRUE;
- copy_object->shadowed = FALSE;
- copy_object->shadow = NULL;
- /* delete the ref the COW was holding on the target object */
- vm_object_deallocate(object);
+ if (copy_object != VM_OBJECT_NULL && copy_object != object) {
+ if ((flags & MEMORY_OBJECT_DATA_PURGE)) {
+ vm_object_lock_assert_exclusive(copy_object);
+ copy_object->shadow_severed = TRUE;
+ copy_object->shadowed = FALSE;
+ copy_object->shadow = NULL;
+ /*
+ * delete the ref the COW was holding on the target object
+ */
+ vm_object_deallocate(object);
+ }
+ vm_object_unlock(copy_object);
+ vm_object_deallocate(copy_object);
+ vm_object_lock(object);
}
BYPASS_COW_COPYIN:
- for (;
- size != 0;
- size -= PAGE_SIZE, offset += PAGE_SIZE_64)
- {
- /*
- * Limit the number of pages to be cleaned at once.
- */
- if (pending_pageout &&
- data_cnt >= PAGE_SIZE * DATA_WRITE_MAX)
- {
- LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
- pageout_action, paging_offset);
- data_cnt = 0;
- pending_pageout = FALSE;
- }
+ /*
+ * when we have a really large range to check relative
+ * to the number of actual resident pages, we'd like
+ * to use the resident page list to drive our checks
+ * however, the object lock will get dropped while processing
+ * the page which means the resident queue can change which
+ * means we can't walk the queue as we process the pages
+ * we also want to do the processing in offset order to allow
+ * 'runs' of pages to be collected if we're being told to
+ * flush to disk... the resident page queue is NOT ordered.
+ *
+ * a temporary solution (until we figure out how to deal with
+ * large address spaces more generically) is to pre-flight
+ * the resident page queue (if it's small enough) and develop
+ * a collection of extents (that encompass actual resident pages)
+ * to visit. This will at least allow us to deal with some of the
+ * more pathological cases in a more efficient manner. The current
+ * worst case (a single resident page at the end of an extremely large
+ * range) can take minutes to complete for ranges in the terrabyte
+ * category... since this routine is called when truncating a file,
+ * and we currently support files up to 16 Tbytes in size, this
+ * is not a theoretical problem
+ */
- while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
- page_lock_result = memory_object_lock_page(m, should_return,
- should_flush, prot);
+ if ((object->resident_page_count < RESIDENT_LIMIT) &&
+ (atop_64(size) > (unsigned)(object->resident_page_count/(8 * MAX_EXTENTS)))) {
+ vm_page_t next;
+ vm_object_offset_t start;
+ vm_object_offset_t end;
+ vm_object_size_t e_mask;
+ vm_page_t m;
- XPR(XPR_MEMORY_OBJECT,
- "m_o_update: lock_page, obj 0x%X offset 0x%X result %d\n",
- (integer_t)object, offset, page_lock_result, 0, 0);
+ start = offset;
+ end = offset + size;
+ num_of_extents = 0;
+ e_mask = ~((vm_object_size_t)(EXTENT_SIZE - 1));
- switch (page_lock_result)
- {
- case MEMORY_OBJECT_LOCK_RESULT_DONE:
- /*
- * End of a cluster of dirty pages.
- */
- if(pending_pageout) {
- LIST_REQ_PAGEOUT_PAGES(object,
- data_cnt, pageout_action,
- paging_offset);
- data_cnt = 0;
- pending_pageout = FALSE;
- continue;
- }
- break;
+ m = (vm_page_t) vm_page_queue_first(&object->memq);
- case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK:
- /*
- * Since it is necessary to block,
- * clean any dirty pages now.
- */
- if(pending_pageout) {
- LIST_REQ_PAGEOUT_PAGES(object,
- data_cnt, pageout_action,
- paging_offset);
- pending_pageout = FALSE;
- data_cnt = 0;
- continue;
- }
-
- PAGE_ASSERT_WAIT(m, THREAD_UNINT);
- vm_object_unlock(object);
- thread_block((void (*)(void))0);
- vm_object_lock(object);
- continue;
-
- case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN:
- case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN:
- /*
- * The clean and return cases are similar.
- *
- */
+ while (!vm_page_queue_end(&object->memq, (vm_page_queue_entry_t) m)) {
+ next = (vm_page_t) vm_page_queue_next(&m->listq);
- /*
- * if this would form a discontiguous block,
- * clean the old pages and start anew.
- *
- */
-
- /*
- * Mark the page busy since we unlock the
- * object below.
- */
- m->busy = TRUE;
- if (pending_pageout &&
- (last_offset != offset ||
- pageout_action != page_lock_result)) {
- LIST_REQ_PAGEOUT_PAGES(object,
- data_cnt, pageout_action,
- paging_offset);
- pending_pageout = FALSE;
- data_cnt = 0;
- }
- m->busy = FALSE;
- holding_page = VM_PAGE_NULL;
- if(m->cleaning) {
- PAGE_ASSERT_WAIT(m, THREAD_UNINT);
- vm_object_unlock(object);
- thread_block((void (*)(void))0);
- continue;
- }
- if(!pending_pageout) {
- pending_pageout = TRUE;
- pageout_action = page_lock_result;
- paging_offset = offset;
- }
- if (should_flush) {
- vm_page_lock_queues();
- m->list_req_pending = TRUE;
- m->cleaning = TRUE;
- m->busy = TRUE;
- m->pageout = TRUE;
- vm_page_wire(m);
- vm_page_unlock_queues();
- } else {
- /*
- * Clean but do not flush
+ if ((m->offset >= start) && (m->offset < end)) {
+ /*
+ * this is a page we're interested in
+ * try to fit it into a current extent
*/
- vm_page_lock_queues();
- m->list_req_pending = TRUE;
- m->cleaning = TRUE;
- vm_page_unlock_queues();
-
+ for (n = 0; n < num_of_extents; n++) {
+ if ((m->offset & e_mask) == extents[n].e_base) {
+ /*
+ * use (PAGE_SIZE - 1) to determine the
+ * max offset so that we don't wrap if
+ * we're at the last page of the space
+ */
+ if (m->offset < extents[n].e_min)
+ extents[n].e_min = m->offset;
+ else if ((m->offset + (PAGE_SIZE - 1)) > extents[n].e_max)
+ extents[n].e_max = m->offset + (PAGE_SIZE - 1);
+ break;
+ }
+ }
+ if (n == num_of_extents) {
+ /*
+ * didn't find a current extent that can encompass
+ * this page
+ */
+ if (n < MAX_EXTENTS) {
+ /*
+ * if we still have room,
+ * create a new extent
+ */
+ extents[n].e_base = m->offset & e_mask;
+ extents[n].e_min = m->offset;
+ extents[n].e_max = m->offset + (PAGE_SIZE - 1);
+
+ num_of_extents++;
+ } else {
+ /*
+ * no room to create a new extent...
+ * fall back to a single extent based
+ * on the min and max page offsets
+ * we find in the range we're interested in...
+ * first, look through the extent list and
+ * develop the overall min and max for the
+ * pages we've looked at up to this point
+ */
+ for (n = 1; n < num_of_extents; n++) {
+ if (extents[n].e_min < extents[0].e_min)
+ extents[0].e_min = extents[n].e_min;
+ if (extents[n].e_max > extents[0].e_max)
+ extents[0].e_max = extents[n].e_max;
+ }
+ /*
+ * now setup to run through the remaining pages
+ * to determine the overall min and max
+ * offset for the specified range
+ */
+ extents[0].e_base = 0;
+ e_mask = 0;
+ num_of_extents = 1;
+
+ /*
+ * by continuing, we'll reprocess the
+ * page that forced us to abandon trying
+ * to develop multiple extents
+ */
+ continue;
+ }
+ }
}
- vm_object_unlock(object);
-
-
- data_cnt += PAGE_SIZE;
- last_offset = offset + PAGE_SIZE_64;
- data_returned = TRUE;
-
- vm_object_lock(object);
- break;
+ m = next;
}
- break;
- }
- }
+ } else {
+ extents[0].e_min = offset;
+ extents[0].e_max = offset + (size - 1);
- /*
- * We have completed the scan for applicable pages.
- * Clean any pages that have been saved.
- */
-#ifdef NOT_LIST_REQ
- if (new_object != VM_OBJECT_NULL) {
- PAGEOUT_PAGES(object, new_object, new_offset, pageout_action,
- paging_offset);
+ num_of_extents = 1;
}
-#else
- if (pending_pageout) {
- LIST_REQ_PAGEOUT_PAGES(object,
- data_cnt, pageout_action, paging_offset);
+ for (n = 0; n < num_of_extents; n++) {
+ if (vm_object_update_extent(object, extents[n].e_min, extents[n].e_max, resid_offset, io_errno,
+ should_flush, should_return, should_iosync, protection))
+ data_returned = TRUE;
}
-#endif
return (data_returned);
}
+
/*
* Routine: memory_object_synchronize_completed [user interface]
*
kern_return_t
memory_object_synchronize_completed(
- vm_object_t object,
- vm_object_offset_t offset,
- vm_offset_t length)
+ memory_object_control_t control,
+ memory_object_offset_t offset,
+ memory_object_size_t length)
{
- msync_req_t msr;
+ vm_object_t object;
+ msync_req_t msr;
+
+ object = memory_object_control_to_vm_object(control);
XPR(XPR_MEMORY_OBJECT,
"m_o_sync_completed, object 0x%X, offset 0x%X length 0x%X\n",
- (integer_t)object, offset, length, 0, 0);
+ object, offset, length, 0, 0);
/*
* Look for bogus arguments
*/
- if (object == VM_OBJECT_NULL) {
- return KERN_INVALID_ARGUMENT;
- }
+ if (object == VM_OBJECT_NULL)
+ return (KERN_INVALID_ARGUMENT);
vm_object_lock(object);
if (queue_end(&object->msr_q, (queue_entry_t)msr)) {
vm_object_unlock(object);
- vm_object_deallocate(object);
return KERN_INVALID_ARGUMENT;
}
msr->flag = VM_MSYNC_DONE;
msr_unlock(msr);
thread_wakeup((event_t) msr);
- vm_object_deallocate(object);
return KERN_SUCCESS;
}/* memory_object_synchronize_completed */
-
-kern_return_t
-memory_object_set_attributes_common(
+
+static kern_return_t
+vm_object_set_attributes_common(
vm_object_t object,
boolean_t may_cache,
memory_object_copy_strategy_t copy_strategy,
boolean_t temporary,
- vm_size_t cluster_size,
- boolean_t silent_overwrite,
+ __unused boolean_t silent_overwrite,
boolean_t advisory_pageout)
{
boolean_t object_became_ready;
XPR(XPR_MEMORY_OBJECT,
"m_o_set_attr_com, object 0x%X flg %x strat %d\n",
- (integer_t)object, (may_cache&1)|((temporary&1)<1), copy_strategy, 0, 0);
+ object, (may_cache&1)|((temporary&1)<1), copy_strategy, 0, 0);
if (object == VM_OBJECT_NULL)
return(KERN_INVALID_ARGUMENT);
case MEMORY_OBJECT_COPY_DELAY:
break;
default:
- vm_object_deallocate(object);
return(KERN_INVALID_ARGUMENT);
}
-#if !ADVISORY_PAGEOUT
- if (silent_overwrite || advisory_pageout) {
- vm_object_deallocate(object);
- return(KERN_INVALID_ARGUMENT);
- }
-#endif /* !ADVISORY_PAGEOUT */
if (may_cache)
may_cache = TRUE;
if (temporary)
temporary = TRUE;
- if (cluster_size != 0) {
- int pages_per_cluster;
- pages_per_cluster = atop(cluster_size);
- /*
- * Cluster size must be integral multiple of page size,
- * and be a power of 2 number of pages.
- */
- if ((cluster_size & (PAGE_SIZE-1)) ||
- ((pages_per_cluster-1) & pages_per_cluster)) {
- vm_object_deallocate(object);
- return KERN_INVALID_ARGUMENT;
- }
- }
vm_object_lock(object);
object->copy_strategy = copy_strategy;
object->can_persist = may_cache;
object->temporary = temporary;
- object->silent_overwrite = silent_overwrite;
+// object->silent_overwrite = silent_overwrite;
object->advisory_pageout = advisory_pageout;
- if (cluster_size == 0)
- cluster_size = PAGE_SIZE;
- object->cluster_size = cluster_size;
-
- assert(cluster_size >= PAGE_SIZE &&
- cluster_size % PAGE_SIZE == 0);
/*
* Wake up anyone waiting for the ready attribute
vm_object_unlock(object);
- vm_object_deallocate(object);
-
return(KERN_SUCCESS);
}
*
* XXX This routine cannot be completed until the vm_msync, clean
* in place, and cluster work is completed. See ifdef notyet
- * below and note that memory_object_set_attributes_common()
+ * below and note that vm_object_set_attributes_common()
* may have to be expanded.
*/
kern_return_t
memory_object_change_attributes(
- vm_object_t object,
- memory_object_flavor_t flavor,
- memory_object_info_t attributes,
- mach_msg_type_number_t count,
- ipc_port_t reply_to,
- mach_msg_type_name_t reply_to_type)
+ memory_object_control_t control,
+ memory_object_flavor_t flavor,
+ memory_object_info_t attributes,
+ mach_msg_type_number_t count)
{
- kern_return_t result = KERN_SUCCESS;
- boolean_t temporary;
- boolean_t may_cache;
- boolean_t invalidate;
- vm_size_t cluster_size;
+ vm_object_t object;
+ kern_return_t result = KERN_SUCCESS;
+ boolean_t temporary;
+ boolean_t may_cache;
+ boolean_t invalidate;
memory_object_copy_strategy_t copy_strategy;
- boolean_t silent_overwrite;
+ boolean_t silent_overwrite;
boolean_t advisory_pageout;
+ object = memory_object_control_to_vm_object(control);
if (object == VM_OBJECT_NULL)
- return(KERN_INVALID_ARGUMENT);
+ return (KERN_INVALID_ARGUMENT);
vm_object_lock(object);
+
temporary = object->temporary;
may_cache = object->can_persist;
copy_strategy = object->copy_strategy;
- silent_overwrite = object->silent_overwrite;
+// silent_overwrite = object->silent_overwrite;
+ silent_overwrite = FALSE;
advisory_pageout = object->advisory_pageout;
#if notyet
invalidate = object->invalidate;
#endif
- cluster_size = object->cluster_size;
vm_object_unlock(object);
switch (flavor) {
perf = (memory_object_perf_info_t) attributes;
may_cache = perf->may_cache;
- cluster_size = round_page(perf->cluster_size);
break;
}
may_cache = attr->may_cache;
copy_strategy = attr->copy_strategy;
- cluster_size = page_size;
break;
}
copy_strategy = attr->copy_strategy;
may_cache = attr->may_cache_object;
- cluster_size = attr->cluster_size;
temporary = attr->temporary;
break;
break;
}
- if (result != KERN_SUCCESS) {
- vm_object_deallocate(object);
+ if (result != KERN_SUCCESS)
return(result);
- }
if (copy_strategy == MEMORY_OBJECT_COPY_TEMPORARY) {
copy_strategy = MEMORY_OBJECT_COPY_DELAY;
}
/*
- * Do the work and throw away our object reference. It
- * is important that the object reference be deallocated
- * BEFORE sending the reply. The whole point of the reply
- * is that it shows up after the terminate message that
- * may be generated by setting the object uncacheable.
- *
* XXX may_cache may become a tri-valued variable to handle
* XXX uncache if not in use.
*/
- result = memory_object_set_attributes_common(object,
+ return (vm_object_set_attributes_common(object,
may_cache,
copy_strategy,
temporary,
- cluster_size,
silent_overwrite,
- advisory_pageout);
-
- if (IP_VALID(reply_to)) {
- /* consumes our naked send-once/send right for reply_to */
- (void) memory_object_change_completed(reply_to, reply_to_type,
- object->alive ?
- object->pager_request : PAGER_REQUEST_NULL,
- flavor);
- }
-
- return(result);
+ advisory_pageout));
}
kern_return_t
memory_object_get_attributes(
- vm_object_t object,
+ memory_object_control_t control,
memory_object_flavor_t flavor,
memory_object_info_t attributes, /* pointer to OUT array */
mach_msg_type_number_t *count) /* IN/OUT */
{
- kern_return_t ret = KERN_SUCCESS;
+ kern_return_t ret = KERN_SUCCESS;
+ vm_object_t object;
- if (object == VM_OBJECT_NULL)
- return(KERN_INVALID_ARGUMENT);
+ object = memory_object_control_to_vm_object(control);
+ if (object == VM_OBJECT_NULL)
+ return (KERN_INVALID_ARGUMENT);
vm_object_lock(object);
behave->invalidate = FALSE;
#endif
behave->advisory_pageout = object->advisory_pageout;
- behave->silent_overwrite = object->silent_overwrite;
+// behave->silent_overwrite = object->silent_overwrite;
+ behave->silent_overwrite = FALSE;
*count = MEMORY_OBJECT_BEHAVE_INFO_COUNT;
break;
}
}
perf = (memory_object_perf_info_t) attributes;
- perf->cluster_size = object->cluster_size;
+ perf->cluster_size = PAGE_SIZE;
perf->may_cache = object->can_persist;
*count = MEMORY_OBJECT_PERF_INFO_COUNT;
attr = (memory_object_attr_info_t) attributes;
attr->copy_strategy = object->copy_strategy;
- attr->cluster_size = object->cluster_size;
+ attr->cluster_size = PAGE_SIZE;
attr->may_cache_object = object->can_persist;
attr->temporary = object->temporary;
vm_object_unlock(object);
- vm_object_deallocate(object);
-
return(ret);
}
-int vm_stat_discard_cleared_reply = 0;
-int vm_stat_discard_cleared_unset = 0;
-int vm_stat_discard_cleared_too_late = 0;
+kern_return_t
+memory_object_iopl_request(
+ ipc_port_t port,
+ memory_object_offset_t offset,
+ upl_size_t *upl_size,
+ upl_t *upl_ptr,
+ upl_page_info_array_t user_page_list,
+ unsigned int *page_list_count,
+ upl_control_flags_t *flags)
+{
+ vm_object_t object;
+ kern_return_t ret;
+ upl_control_flags_t caller_flags;
-/*
- * vm_set_default_memory_manager():
- * [Obsolete]
+ caller_flags = *flags;
+
+ if (caller_flags & ~UPL_VALID_FLAGS) {
+ /*
+ * For forward compatibility's sake,
+ * reject any unknown flag.
+ */
+ return KERN_INVALID_VALUE;
+ }
+
+ if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
+ vm_named_entry_t named_entry;
+
+ named_entry = (vm_named_entry_t)port->ip_kobject;
+ /* a few checks to make sure user is obeying rules */
+ if(*upl_size == 0) {
+ if(offset >= named_entry->size)
+ return(KERN_INVALID_RIGHT);
+ *upl_size = (upl_size_t)(named_entry->size - offset);
+ if (*upl_size != named_entry->size - offset)
+ return KERN_INVALID_ARGUMENT;
+ }
+ if(caller_flags & UPL_COPYOUT_FROM) {
+ if((named_entry->protection & VM_PROT_READ)
+ != VM_PROT_READ) {
+ return(KERN_INVALID_RIGHT);
+ }
+ } else {
+ if((named_entry->protection &
+ (VM_PROT_READ | VM_PROT_WRITE))
+ != (VM_PROT_READ | VM_PROT_WRITE)) {
+ return(KERN_INVALID_RIGHT);
+ }
+ }
+ if(named_entry->size < (offset + *upl_size))
+ return(KERN_INVALID_ARGUMENT);
+
+ /* the callers parameter offset is defined to be the */
+ /* offset from beginning of named entry offset in object */
+ offset = offset + named_entry->offset;
+
+ if (named_entry->is_sub_map ||
+ named_entry->is_copy)
+ return KERN_INVALID_ARGUMENT;
+
+ named_entry_lock(named_entry);
+
+ if (named_entry->is_pager) {
+ object = vm_object_enter(named_entry->backing.pager,
+ named_entry->offset + named_entry->size,
+ named_entry->internal,
+ FALSE,
+ FALSE);
+ if (object == VM_OBJECT_NULL) {
+ named_entry_unlock(named_entry);
+ return(KERN_INVALID_OBJECT);
+ }
+
+ /* JMM - drop reference on pager here? */
+
+ /* create an extra reference for the named entry */
+ vm_object_lock(object);
+ vm_object_reference_locked(object);
+ named_entry->backing.object = object;
+ named_entry->is_pager = FALSE;
+ named_entry_unlock(named_entry);
+
+ /* wait for object to be ready */
+ while (!object->pager_ready) {
+ vm_object_wait(object,
+ VM_OBJECT_EVENT_PAGER_READY,
+ THREAD_UNINT);
+ vm_object_lock(object);
+ }
+ vm_object_unlock(object);
+ } else {
+ /* This is the case where we are going to map */
+ /* an already mapped object. If the object is */
+ /* not ready it is internal. An external */
+ /* object cannot be mapped until it is ready */
+ /* we can therefore avoid the ready check */
+ /* in this case. */
+ object = named_entry->backing.object;
+ vm_object_reference(object);
+ named_entry_unlock(named_entry);
+ }
+ } else if (ip_kotype(port) == IKOT_MEM_OBJ_CONTROL) {
+ memory_object_control_t control;
+ control = (memory_object_control_t) port;
+ if (control == NULL)
+ return (KERN_INVALID_ARGUMENT);
+ object = memory_object_control_to_vm_object(control);
+ if (object == VM_OBJECT_NULL)
+ return (KERN_INVALID_ARGUMENT);
+ vm_object_reference(object);
+ } else {
+ return KERN_INVALID_ARGUMENT;
+ }
+ if (object == VM_OBJECT_NULL)
+ return (KERN_INVALID_ARGUMENT);
+
+ if (!object->private) {
+ if (object->phys_contiguous) {
+ *flags = UPL_PHYS_CONTIG;
+ } else {
+ *flags = 0;
+ }
+ } else {
+ *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG;
+ }
+
+ ret = vm_object_iopl_request(object,
+ offset,
+ *upl_size,
+ upl_ptr,
+ user_page_list,
+ page_list_count,
+ caller_flags);
+ vm_object_deallocate(object);
+ return ret;
+}
+
+/*
+ * Routine: memory_object_upl_request [interface]
+ * Purpose:
+ * Cause the population of a portion of a vm_object.
+ * Depending on the nature of the request, the pages
+ * returned may be contain valid data or be uninitialized.
+ *
*/
+
kern_return_t
-vm_set_default_memory_manager(
- host_t host,
- ipc_port_t *default_manager)
+memory_object_upl_request(
+ memory_object_control_t control,
+ memory_object_offset_t offset,
+ upl_size_t size,
+ upl_t *upl_ptr,
+ upl_page_info_array_t user_page_list,
+ unsigned int *page_list_count,
+ int cntrl_flags)
{
- return(host_default_memory_manager(host_priv_self(), default_manager, 4*PAGE_SIZE));
+ vm_object_t object;
+
+ object = memory_object_control_to_vm_object(control);
+ if (object == VM_OBJECT_NULL)
+ return (KERN_TERMINATED);
+
+ return vm_object_upl_request(object,
+ offset,
+ size,
+ upl_ptr,
+ user_page_list,
+ page_list_count,
+ (upl_control_flags_t)(unsigned int) cntrl_flags);
}
+/*
+ * Routine: memory_object_super_upl_request [interface]
+ * Purpose:
+ * Cause the population of a portion of a vm_object
+ * in much the same way as memory_object_upl_request.
+ * Depending on the nature of the request, the pages
+ * returned may be contain valid data or be uninitialized.
+ * However, the region may be expanded up to the super
+ * cluster size provided.
+ */
+
+kern_return_t
+memory_object_super_upl_request(
+ memory_object_control_t control,
+ memory_object_offset_t offset,
+ upl_size_t size,
+ upl_size_t super_cluster,
+ upl_t *upl,
+ upl_page_info_t *user_page_list,
+ unsigned int *page_list_count,
+ int cntrl_flags)
+{
+ vm_object_t object;
+
+ object = memory_object_control_to_vm_object(control);
+ if (object == VM_OBJECT_NULL)
+ return (KERN_INVALID_ARGUMENT);
+
+ return vm_object_super_upl_request(object,
+ offset,
+ size,
+ super_cluster,
+ upl,
+ user_page_list,
+ page_list_count,
+ (upl_control_flags_t)(unsigned int) cntrl_flags);
+}
+
+kern_return_t
+memory_object_cluster_size(memory_object_control_t control, memory_object_offset_t *start,
+ vm_size_t *length, uint32_t *io_streaming, memory_object_fault_info_t fault_info)
+{
+ vm_object_t object;
+
+ object = memory_object_control_to_vm_object(control);
+
+ if (object == VM_OBJECT_NULL || object->paging_offset > *start)
+ return (KERN_INVALID_ARGUMENT);
+
+ *start -= object->paging_offset;
+
+ vm_object_cluster_size(object, (vm_object_offset_t *)start, length, (vm_object_fault_info_t)fault_info, io_streaming);
+
+ *start += object->paging_offset;
+
+ return (KERN_SUCCESS);
+}
+
+
+int vm_stat_discard_cleared_reply = 0;
+int vm_stat_discard_cleared_unset = 0;
+int vm_stat_discard_cleared_too_late = 0;
+
+
+
/*
- * Routine: host_default_memory_manager
+ * Routine: host_default_memory_manager [interface]
* Purpose:
* set/get the default memory manager port and default cluster
* size.
*/
kern_return_t
host_default_memory_manager(
- host_priv_t host_priv,
- ipc_port_t *default_manager,
- vm_size_t cluster_size)
+ host_priv_t host_priv,
+ memory_object_default_t *default_manager,
+ __unused memory_object_cluster_size_t cluster_size)
{
- ipc_port_t current_manager;
- ipc_port_t new_manager;
- ipc_port_t returned_manager;
+ memory_object_default_t current_manager;
+ memory_object_default_t new_manager;
+ memory_object_default_t returned_manager;
+ kern_return_t result = KERN_SUCCESS;
if (host_priv == HOST_PRIV_NULL)
return(KERN_INVALID_HOST);
assert(host_priv == &realhost);
new_manager = *default_manager;
- mutex_lock(&memory_manager_default_lock);
+ lck_mtx_lock(&memory_manager_default_lock);
current_manager = memory_manager_default;
+ returned_manager = MEMORY_OBJECT_DEFAULT_NULL;
- if (new_manager == IP_NULL) {
+ if (new_manager == MEMORY_OBJECT_DEFAULT_NULL) {
/*
* Retrieve the current value.
*/
-
- returned_manager = ipc_port_copy_send(current_manager);
+ returned_manager = current_manager;
+ memory_object_default_reference(returned_manager);
} else {
+ /*
+ * Only allow the kernel to change the value.
+ */
+ extern task_t kernel_task;
+ if (current_task() != kernel_task) {
+ result = KERN_NO_ACCESS;
+ goto out;
+ }
+
+ /*
+ * If this is the first non-null manager, start
+ * up the internal pager support.
+ */
+ if (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
+ result = vm_pageout_internal_start();
+ if (result != KERN_SUCCESS)
+ goto out;
+ }
+
/*
* Retrieve the current value,
* and replace it with the supplied value.
- * We consume the supplied naked send right.
+ * We return the old reference to the caller
+ * but we have to take a reference on the new
+ * one.
*/
-
returned_manager = current_manager;
memory_manager_default = new_manager;
- if (cluster_size % PAGE_SIZE != 0) {
-#if 0
- mutex_unlock(&memory_manager_default_lock);
- return KERN_INVALID_ARGUMENT;
-#else
- cluster_size = round_page(cluster_size);
-#endif
- }
- memory_manager_default_cluster = cluster_size;
+ memory_object_default_reference(new_manager);
/*
* In case anyone's been waiting for a memory
*/
thread_wakeup((event_t) &memory_manager_default);
- }
- mutex_unlock(&memory_manager_default_lock);
+ /*
+ * Now that we have a default pager for anonymous memory,
+ * reactivate all the throttled pages (i.e. dirty pages with
+ * no pager).
+ */
+ if (current_manager == MEMORY_OBJECT_DEFAULT_NULL)
+ {
+ vm_page_reactivate_all_throttled();
+ }
+ }
+ out:
+ lck_mtx_unlock(&memory_manager_default_lock);
*default_manager = returned_manager;
- return(KERN_SUCCESS);
+ return(result);
}
/*
* valid (not IP_NULL or IP_DEAD).
*/
-ipc_port_t
-memory_manager_default_reference(
- vm_size_t *cluster_size)
+__private_extern__ memory_object_default_t
+memory_manager_default_reference(void)
{
- ipc_port_t current_manager;
-
- mutex_lock(&memory_manager_default_lock);
+ memory_object_default_t current_manager;
- while (current_manager = ipc_port_copy_send(memory_manager_default),
- !IP_VALID(current_manager)) {
- thread_sleep_mutex((event_t) &memory_manager_default,
- &memory_manager_default_lock, THREAD_UNINT);
- mutex_lock(&memory_manager_default_lock);
+ lck_mtx_lock(&memory_manager_default_lock);
+ current_manager = memory_manager_default;
+ while (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
+ wait_result_t res;
+
+ res = lck_mtx_sleep(&memory_manager_default_lock,
+ LCK_SLEEP_DEFAULT,
+ (event_t) &memory_manager_default,
+ THREAD_UNINT);
+ assert(res == THREAD_AWAKENED);
+ current_manager = memory_manager_default;
}
- *cluster_size = memory_manager_default_cluster;
-
- mutex_unlock(&memory_manager_default_lock);
+ memory_object_default_reference(current_manager);
+ lck_mtx_unlock(&memory_manager_default_lock);
return current_manager;
}
-/*
- * Routine: memory_manager_default_port
- * Purpose:
- * Returns true if the receiver for the port
- * is the default memory manager.
- *
- * This is a hack to let ds_read_done
- * know when it should keep memory wired.
- */
-
-boolean_t
-memory_manager_default_port(
- ipc_port_t port)
-{
- ipc_port_t current;
- boolean_t result;
-
- mutex_lock(&memory_manager_default_lock);
- current = memory_manager_default;
- if (IP_VALID(current)) {
- /*
- * There is no point in bothering to lock
- * both ports, which would be painful to do.
- * If the receive rights are moving around,
- * we might be inaccurate.
- */
-
- result = port->ip_receiver == current->ip_receiver;
- } else
- result = FALSE;
- mutex_unlock(&memory_manager_default_lock);
-
- return result;
-}
-
/*
* Routine: memory_manager_default_check
*
* but only the first time.
*
*/
-kern_return_t
+__private_extern__ kern_return_t
memory_manager_default_check(void)
{
- ipc_port_t current;
+ memory_object_default_t current;
- mutex_lock(&memory_manager_default_lock);
+ lck_mtx_lock(&memory_manager_default_lock);
current = memory_manager_default;
- if (!IP_VALID(current)) {
+ if (current == MEMORY_OBJECT_DEFAULT_NULL) {
static boolean_t logged; /* initialized to 0 */
boolean_t complain = !logged;
logged = TRUE;
- mutex_unlock(&memory_manager_default_lock);
+ lck_mtx_unlock(&memory_manager_default_lock);
if (complain)
printf("Warning: No default memory manager\n");
return(KERN_FAILURE);
} else {
- mutex_unlock(&memory_manager_default_lock);
+ lck_mtx_unlock(&memory_manager_default_lock);
return(KERN_SUCCESS);
}
}
-void
+__private_extern__ void
memory_manager_default_init(void)
{
- memory_manager_default = IP_NULL;
- mutex_init(&memory_manager_default_lock, ETAP_VM_MEMMAN);
+ memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
+ lck_mtx_init(&memory_manager_default_lock, &vm_object_lck_grp, &vm_object_lck_attr);
}
-void
-memory_object_deactivate_pages(
- vm_object_t object,
- vm_object_offset_t offset,
- vm_object_size_t size,
- boolean_t kill_page)
+
+/* Allow manipulation of individual page state. This is actually part of */
+/* the UPL regimen but takes place on the object rather than on a UPL */
+
+kern_return_t
+memory_object_page_op(
+ memory_object_control_t control,
+ memory_object_offset_t offset,
+ int ops,
+ ppnum_t *phys_entry,
+ int *flags)
{
- vm_object_t orig_object;
- int pages_moved = 0;
- int pages_found = 0;
+ vm_object_t object;
- /*
- * entered with object lock held, acquire a paging reference to
- * prevent the memory_object and control ports from
- * being destroyed.
- */
- orig_object = object;
+ object = memory_object_control_to_vm_object(control);
+ if (object == VM_OBJECT_NULL)
+ return (KERN_INVALID_ARGUMENT);
- for (;;) {
- register vm_page_t m;
- vm_object_offset_t toffset;
- vm_object_size_t tsize;
+ return vm_object_page_op(object, offset, ops, phys_entry, flags);
+}
- vm_object_paging_begin(object);
- vm_page_lock_queues();
+/*
+ * memory_object_range_op offers performance enhancement over
+ * memory_object_page_op for page_op functions which do not require page
+ * level state to be returned from the call. Page_op was created to provide
+ * a low-cost alternative to page manipulation via UPLs when only a single
+ * page was involved. The range_op call establishes the ability in the _op
+ * family of functions to work on multiple pages where the lack of page level
+ * state handling allows the caller to avoid the overhead of the upl structures.
+ */
- for (tsize = size, toffset = offset; tsize; tsize -= PAGE_SIZE, toffset += PAGE_SIZE) {
+kern_return_t
+memory_object_range_op(
+ memory_object_control_t control,
+ memory_object_offset_t offset_beg,
+ memory_object_offset_t offset_end,
+ int ops,
+ int *range)
+{
+ vm_object_t object;
+
+ object = memory_object_control_to_vm_object(control);
+ if (object == VM_OBJECT_NULL)
+ return (KERN_INVALID_ARGUMENT);
+
+ return vm_object_range_op(object,
+ offset_beg,
+ offset_end,
+ ops,
+ (uint32_t *) range);
+}
- if ((m = vm_page_lookup(object, toffset)) != VM_PAGE_NULL) {
- pages_found++;
+void
+memory_object_mark_used(
+ memory_object_control_t control)
+{
+ vm_object_t object;
- if ((m->wire_count == 0) && (!m->private) && (!m->gobbled) && (!m->busy)) {
+ if (control == NULL)
+ return;
- m->reference = FALSE;
- pmap_clear_reference(m->phys_addr);
+ object = memory_object_control_to_vm_object(control);
- if ((kill_page) && (object->internal)) {
- m->precious = FALSE;
- m->dirty = FALSE;
- pmap_clear_modify(m->phys_addr);
- vm_external_state_clr(object->existence_map, offset);
- }
- VM_PAGE_QUEUES_REMOVE(m);
+ if (object != VM_OBJECT_NULL)
+ vm_object_cache_remove(object);
+}
- queue_enter_first(&vm_page_queue_inactive, m, vm_page_t, pageq);
- m->inactive = TRUE;
- if (!m->fictitious)
- vm_page_inactive_count++;
+void
+memory_object_mark_unused(
+ memory_object_control_t control,
+ __unused boolean_t rage)
+{
+ vm_object_t object;
- pages_moved++;
- }
- }
- }
- vm_page_unlock_queues();
- vm_object_paging_end(object);
+ if (control == NULL)
+ return;
- if (object->shadow) {
- vm_object_t tmp_object;
+ object = memory_object_control_to_vm_object(control);
- kill_page = 0;
+ if (object != VM_OBJECT_NULL)
+ vm_object_cache_add(object);
+}
- offset += object->shadow_offset;
+void
+memory_object_mark_io_tracking(
+ memory_object_control_t control)
+{
+ vm_object_t object;
- tmp_object = object->shadow;
- vm_object_lock(tmp_object);
+ if (control == NULL)
+ return;
+ object = memory_object_control_to_vm_object(control);
- if (object != orig_object)
- vm_object_unlock(object);
- object = tmp_object;
- } else
- break;
+ if (object != VM_OBJECT_NULL) {
+ vm_object_lock(object);
+ object->io_tracking = TRUE;
+ vm_object_unlock(object);
}
- if (object != orig_object)
- vm_object_unlock(object);
}
-/* Allow manipulation of individual page state. This is actually part of */
-/* the UPL regimen but takes place on the object rather than on a UPL */
-
-kern_return_t
-memory_object_page_op(
- vm_object_t object,
- vm_object_offset_t offset,
- int ops,
- vm_offset_t *phys_entry,
- int *flags)
+#if CONFIG_SECLUDED_MEMORY
+void
+memory_object_mark_eligible_for_secluded(
+ memory_object_control_t control,
+ boolean_t eligible_for_secluded)
{
- vm_page_t dst_page;
+ vm_object_t object;
- vm_object_lock(object);
+ if (control == NULL)
+ return;
+ object = memory_object_control_to_vm_object(control);
- while(TRUE) {
- if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) {
- vm_object_unlock(object);
- return KERN_FAILURE;
- }
+ if (object == VM_OBJECT_NULL) {
+ return;
+ }
- /* Sync up on getting the busy bit */
- if((dst_page->busy || dst_page->cleaning) &&
- (((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) {
- /* someone else is playing with the page, we will */
- /* have to wait */
- PAGE_ASSERT_WAIT(dst_page, THREAD_UNINT);
- vm_object_unlock(object);
- thread_block((void(*)(void))0);
- vm_object_lock(object);
- continue;
+ vm_object_lock(object);
+ if (eligible_for_secluded &&
+ secluded_for_filecache && /* global boot-arg */
+ !object->eligible_for_secluded) {
+ object->eligible_for_secluded = TRUE;
+ vm_page_secluded.eligible_for_secluded += object->resident_page_count;
+ } else if (!eligible_for_secluded &&
+ object->eligible_for_secluded) {
+ object->eligible_for_secluded = FALSE;
+ vm_page_secluded.eligible_for_secluded -= object->resident_page_count;
+ if (object->resident_page_count) {
+ /* XXX FBDP TODO: flush pages from secluded queue? */
+ // printf("FBDP TODO: flush %d pages from %p from secluded queue\n", object->resident_page_count, object);
}
+ }
+ vm_object_unlock(object);
+}
+#endif /* CONFIG_SECLUDED_MEMORY */
- if (ops & UPL_POP_DUMP) {
- vm_page_lock_queues();
- vm_page_free(dst_page);
- vm_page_unlock_queues();
- break;
- }
+kern_return_t
+memory_object_pages_resident(
+ memory_object_control_t control,
+ boolean_t * has_pages_resident)
+{
+ vm_object_t object;
- if (flags) {
- *flags = 0;
+ *has_pages_resident = FALSE;
- /* Get the condition of flags before requested ops */
- /* are undertaken */
+ object = memory_object_control_to_vm_object(control);
+ if (object == VM_OBJECT_NULL)
+ return (KERN_INVALID_ARGUMENT);
- if(dst_page->dirty) *flags |= UPL_POP_DIRTY;
- if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT;
- if(dst_page->precious) *flags |= UPL_POP_PRECIOUS;
- if(dst_page->absent) *flags |= UPL_POP_ABSENT;
- if(dst_page->busy) *flags |= UPL_POP_BUSY;
- }
- if (phys_entry)
- *phys_entry = dst_page->phys_addr;
+ if (object->resident_page_count)
+ *has_pages_resident = TRUE;
- /* The caller should have made a call either contingent with */
- /* or prior to this call to set UPL_POP_BUSY */
- if(ops & UPL_POP_SET) {
- /* The protection granted with this assert will */
- /* not be complete. If the caller violates the */
- /* convention and attempts to change page state */
- /* without first setting busy we may not see it */
- /* because the page may already be busy. However */
- /* if such violations occur we will assert sooner */
- /* or later. */
- assert(dst_page->busy || (ops & UPL_POP_BUSY));
- if (ops & UPL_POP_DIRTY) dst_page->dirty = TRUE;
- if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE;
- if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE;
- if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE;
- if (ops & UPL_POP_BUSY) dst_page->busy = TRUE;
- }
+ return (KERN_SUCCESS);
+}
- if(ops & UPL_POP_CLR) {
- assert(dst_page->busy);
- if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE;
- if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE;
- if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE;
- if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE;
- if (ops & UPL_POP_BUSY) {
- dst_page->busy = FALSE;
- PAGE_WAKEUP(dst_page);
- }
- }
- break;
- }
+kern_return_t
+memory_object_signed(
+ memory_object_control_t control,
+ boolean_t is_signed)
+{
+ vm_object_t object;
+ object = memory_object_control_to_vm_object(control);
+ if (object == VM_OBJECT_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ vm_object_lock(object);
+ object->code_signed = is_signed;
vm_object_unlock(object);
+
return KERN_SUCCESS;
-
}
+boolean_t
+memory_object_is_signed(
+ memory_object_control_t control)
+{
+ boolean_t is_signed;
+ vm_object_t object;
+
+ object = memory_object_control_to_vm_object(control);
+ if (object == VM_OBJECT_NULL)
+ return FALSE;
+
+ vm_object_lock_shared(object);
+ is_signed = object->code_signed;
+ vm_object_unlock(object);
+
+ return is_signed;
+}
+
+boolean_t
+memory_object_is_slid(
+ memory_object_control_t control)
+{
+ vm_object_t object = VM_OBJECT_NULL;
+
+ object = memory_object_control_to_vm_object(control);
+ if (object == VM_OBJECT_NULL)
+ return FALSE;
+
+ return object->object_slid;
+}
+static zone_t mem_obj_control_zone;
+
+__private_extern__ void
+memory_object_control_bootstrap(void)
+{
+ int i;
+
+ i = (vm_size_t) sizeof (struct memory_object_control);
+ mem_obj_control_zone = zinit (i, 8192*i, 4096, "mem_obj_control");
+ zone_change(mem_obj_control_zone, Z_CALLERACCT, FALSE);
+ zone_change(mem_obj_control_zone, Z_NOENCRYPT, TRUE);
+ return;
+}
+
+__private_extern__ memory_object_control_t
+memory_object_control_allocate(
+ vm_object_t object)
+{
+ memory_object_control_t control;
+
+ control = (memory_object_control_t)zalloc(mem_obj_control_zone);
+ if (control != MEMORY_OBJECT_CONTROL_NULL) {
+ control->moc_object = object;
+ control->moc_ikot = IKOT_MEM_OBJ_CONTROL; /* fake ip_kotype */
+ }
+ return (control);
+}
+
+__private_extern__ void
+memory_object_control_collapse(
+ memory_object_control_t control,
+ vm_object_t object)
+{
+ assert((control->moc_object != VM_OBJECT_NULL) &&
+ (control->moc_object != object));
+ control->moc_object = object;
+}
+
+__private_extern__ vm_object_t
+memory_object_control_to_vm_object(
+ memory_object_control_t control)
+{
+ if (control == MEMORY_OBJECT_CONTROL_NULL ||
+ control->moc_ikot != IKOT_MEM_OBJ_CONTROL)
+ return VM_OBJECT_NULL;
+
+ return (control->moc_object);
+}
+
+memory_object_control_t
+convert_port_to_mo_control(
+ __unused mach_port_t port)
+{
+ return MEMORY_OBJECT_CONTROL_NULL;
+}
+
+
+mach_port_t
+convert_mo_control_to_port(
+ __unused memory_object_control_t control)
+{
+ return MACH_PORT_NULL;
+}
+
+void
+memory_object_control_reference(
+ __unused memory_object_control_t control)
+{
+ return;
+}
+
+/*
+ * We only every issue one of these references, so kill it
+ * when that gets released (should switch the real reference
+ * counting in true port-less EMMI).
+ */
+void
+memory_object_control_deallocate(
+ memory_object_control_t control)
+{
+ zfree(mem_obj_control_zone, control);
+}
+
+void
+memory_object_control_disable(
+ memory_object_control_t control)
+{
+ assert(control->moc_object != VM_OBJECT_NULL);
+ control->moc_object = VM_OBJECT_NULL;
+}
+
+void
+memory_object_default_reference(
+ memory_object_default_t dmm)
+{
+ ipc_port_make_send(dmm);
+}
+
+void
+memory_object_default_deallocate(
+ memory_object_default_t dmm)
+{
+ ipc_port_release_send(dmm);
+}
+
+memory_object_t
+convert_port_to_memory_object(
+ __unused mach_port_t port)
+{
+ return (MEMORY_OBJECT_NULL);
+}
+
+
+mach_port_t
+convert_memory_object_to_port(
+ __unused memory_object_t object)
+{
+ return (MACH_PORT_NULL);
+}
+
+
+/* Routine memory_object_reference */
+void memory_object_reference(
+ memory_object_t memory_object)
+{
+ (memory_object->mo_pager_ops->memory_object_reference)(
+ memory_object);
+}
+
+/* Routine memory_object_deallocate */
+void memory_object_deallocate(
+ memory_object_t memory_object)
+{
+ (memory_object->mo_pager_ops->memory_object_deallocate)(
+ memory_object);
+}
+
+
+/* Routine memory_object_init */
+kern_return_t memory_object_init
+(
+ memory_object_t memory_object,
+ memory_object_control_t memory_control,
+ memory_object_cluster_size_t memory_object_page_size
+)
+{
+ return (memory_object->mo_pager_ops->memory_object_init)(
+ memory_object,
+ memory_control,
+ memory_object_page_size);
+}
+
+/* Routine memory_object_terminate */
+kern_return_t memory_object_terminate
+(
+ memory_object_t memory_object
+)
+{
+ return (memory_object->mo_pager_ops->memory_object_terminate)(
+ memory_object);
+}
+
+/* Routine memory_object_data_request */
+kern_return_t memory_object_data_request
+(
+ memory_object_t memory_object,
+ memory_object_offset_t offset,
+ memory_object_cluster_size_t length,
+ vm_prot_t desired_access,
+ memory_object_fault_info_t fault_info
+)
+{
+ return (memory_object->mo_pager_ops->memory_object_data_request)(
+ memory_object,
+ offset,
+ length,
+ desired_access,
+ fault_info);
+}
+
+/* Routine memory_object_data_return */
+kern_return_t memory_object_data_return
+(
+ memory_object_t memory_object,
+ memory_object_offset_t offset,
+ memory_object_cluster_size_t size,
+ memory_object_offset_t *resid_offset,
+ int *io_error,
+ boolean_t dirty,
+ boolean_t kernel_copy,
+ int upl_flags
+)
+{
+ return (memory_object->mo_pager_ops->memory_object_data_return)(
+ memory_object,
+ offset,
+ size,
+ resid_offset,
+ io_error,
+ dirty,
+ kernel_copy,
+ upl_flags);
+}
+
+/* Routine memory_object_data_initialize */
+kern_return_t memory_object_data_initialize
+(
+ memory_object_t memory_object,
+ memory_object_offset_t offset,
+ memory_object_cluster_size_t size
+)
+{
+ return (memory_object->mo_pager_ops->memory_object_data_initialize)(
+ memory_object,
+ offset,
+ size);
+}
+
+/* Routine memory_object_data_unlock */
+kern_return_t memory_object_data_unlock
+(
+ memory_object_t memory_object,
+ memory_object_offset_t offset,
+ memory_object_size_t size,
+ vm_prot_t desired_access
+)
+{
+ return (memory_object->mo_pager_ops->memory_object_data_unlock)(
+ memory_object,
+ offset,
+ size,
+ desired_access);
+}
+
+/* Routine memory_object_synchronize */
+kern_return_t memory_object_synchronize
+(
+ memory_object_t memory_object,
+ memory_object_offset_t offset,
+ memory_object_size_t size,
+ vm_sync_t sync_flags
+)
+{
+ return (memory_object->mo_pager_ops->memory_object_synchronize)(
+ memory_object,
+ offset,
+ size,
+ sync_flags);
+}
+
+
+/*
+ * memory_object_map() is called by VM (in vm_map_enter() and its variants)
+ * each time a "named" VM object gets mapped directly or indirectly
+ * (copy-on-write mapping). A "named" VM object has an extra reference held
+ * by the pager to keep it alive until the pager decides that the
+ * memory object (and its VM object) can be reclaimed.
+ * VM calls memory_object_last_unmap() (in vm_object_deallocate()) when all
+ * the mappings of that memory object have been removed.
+ *
+ * For a given VM object, calls to memory_object_map() and memory_object_unmap()
+ * are serialized (through object->mapping_in_progress), to ensure that the
+ * pager gets a consistent view of the mapping status of the memory object.
+ *
+ * This allows the pager to keep track of how many times a memory object
+ * has been mapped and with which protections, to decide when it can be
+ * reclaimed.
+ */
+
+/* Routine memory_object_map */
+kern_return_t memory_object_map
+(
+ memory_object_t memory_object,
+ vm_prot_t prot
+)
+{
+ return (memory_object->mo_pager_ops->memory_object_map)(
+ memory_object,
+ prot);
+}
+
+/* Routine memory_object_last_unmap */
+kern_return_t memory_object_last_unmap
+(
+ memory_object_t memory_object
+)
+{
+ return (memory_object->mo_pager_ops->memory_object_last_unmap)(
+ memory_object);
+}
+
+/* Routine memory_object_data_reclaim */
+kern_return_t memory_object_data_reclaim
+(
+ memory_object_t memory_object,
+ boolean_t reclaim_backing_store
+)
+{
+ if (memory_object->mo_pager_ops->memory_object_data_reclaim == NULL)
+ return KERN_NOT_SUPPORTED;
+ return (memory_object->mo_pager_ops->memory_object_data_reclaim)(
+ memory_object,
+ reclaim_backing_store);
+}
+
+upl_t
+convert_port_to_upl(
+ ipc_port_t port)
+{
+ upl_t upl;
+
+ ip_lock(port);
+ if (!ip_active(port) || (ip_kotype(port) != IKOT_UPL)) {
+ ip_unlock(port);
+ return (upl_t)NULL;
+ }
+ upl = (upl_t) port->ip_kobject;
+ ip_unlock(port);
+ upl_lock(upl);
+ upl->ref_count+=1;
+ upl_unlock(upl);
+ return upl;
+}
+
+mach_port_t
+convert_upl_to_port(
+ __unused upl_t upl)
+{
+ return MACH_PORT_NULL;
+}
+
+__private_extern__ void
+upl_no_senders(
+ __unused ipc_port_t port,
+ __unused mach_port_mscount_t mscount)
+{
+ return;
+}