X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/1c79356b52d46aa6b508fb032f5ae709b1f2897b..13f56ec4e58bf8687e2a68032c093c0213dd519b:/osfmk/vm/memory_object.c diff --git a/osfmk/vm/memory_object.c b/osfmk/vm/memory_object.c index 6c51bdcb9..de7baff29 100644 --- a/osfmk/vm/memory_object.c +++ b/osfmk/vm/memory_object.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ @@ -56,11 +62,6 @@ * External memory management interface control functions. */ -#ifdef MACH_BSD -/* THIS code should be removed when the component merge is completed */ -extern int vnode_pager_workaround; -#endif - #include /* @@ -70,51 +71,47 @@ extern int vnode_pager_workaround; #include /* For pointer_t */ #include +#include #include #include #include #include -#include +#include #include #include #include -#include -#include /* * Implementation dependencies: */ #include /* For memcpy() */ +#include +#include +#include /* For current_thread() */ +#include +#include + +#include +#include #include #include #include #include /* For pmap_clear_modify */ -#include -#include /* For current_thread() */ -#include #include /* For kernel_map, vm_move */ #include /* For vm_map_pageable */ -#include -#include - -#include +#include /* Needed by some vm_page.h macros */ +#include #if MACH_PAGEMAP #include #endif /* MACH_PAGEMAP */ +#include -ipc_port_t memory_manager_default = IP_NULL; -vm_size_t memory_manager_default_cluster = 0; -decl_mutex_data(,memory_manager_default_lock) -/* - * Forward ref to file-local function: - */ -boolean_t -memory_object_update(vm_object_t, vm_object_offset_t, - vm_size_t, memory_object_return_t, int, vm_prot_t); +memory_object_default_t memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL; +decl_lck_mtx_data(, memory_manager_default_lock) /* @@ -137,16 +134,16 @@ memory_object_update(vm_object_t, vm_object_offset_t, #define memory_object_should_return_page(m, should_return) \ (should_return != MEMORY_OBJECT_RETURN_NONE && \ - (((m)->dirty || ((m)->dirty = pmap_is_modified((m)->phys_addr))) || \ + (((m)->dirty || ((m)->dirty = pmap_is_modified((m)->phys_page))) || \ ((m)->precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \ (should_return) == MEMORY_OBJECT_RETURN_ANYTHING)) typedef int memory_object_lock_result_t; -#define MEMORY_OBJECT_LOCK_RESULT_DONE 0 -#define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1 -#define MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN 2 -#define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 3 +#define MEMORY_OBJECT_LOCK_RESULT_DONE 0 +#define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1 +#define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 2 +#define MEMORY_OBJECT_LOCK_RESULT_MUST_FREE 3 memory_object_lock_result_t memory_object_lock_page( vm_page_t m, @@ -176,267 +173,151 @@ memory_object_lock_page( { XPR(XPR_MEMORY_OBJECT, "m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n", - (integer_t)m, should_return, should_flush, prot, 0); + m, should_return, should_flush, prot, 0); - /* - * If we cannot change access to the page, - * either because a mapping is in progress - * (busy page) or because a mapping has been - * wired, then give up. - */ - if (m->busy || m->cleaning) - return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK); + if (m->busy || m->cleaning) { + if (m->list_req_pending && + should_return == MEMORY_OBJECT_RETURN_NONE && + should_flush == TRUE) { + + if (m->absent) { + /* + * this is the list_req_pending | absent | busy case + * which originates from vm_fault_page. + * Combine that with should_flush == TRUE and we + * have a case where we need to toss the page from + * the object. + */ + if (!VM_PAGE_WIRED(m)) { + return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE); + } else { + return (MEMORY_OBJECT_LOCK_RESULT_DONE); + } + } + if (m->pageout || m->cleaning) { + /* + * if pageout is set, page was earmarked by vm_pageout_scan + * to be cleaned and stolen... if cleaning is set, we're + * pre-cleaning pages for a hibernate... + * in either case, we're going + * to take it back since we are being asked to + * flush the page w/o cleaning it (i.e. we don't + * care that it's dirty, we want it gone from + * the cache) and we don't want to stall + * waiting for it to be cleaned for 2 reasons... + * 1 - no use paging it out since we're probably + * shrinking the file at this point or we no + * longer care about the data in the page + * 2 - if we stall, we may casue a deadlock in + * the FS trying to acquire its locks + * on the VNOP_PAGEOUT path presuming that + * those locks are already held on the truncate + * path before calling through to this function + * + * so undo all of the state that vm_pageout_scan + * hung on this page + */ + vm_pageout_queue_steal(m, FALSE); + PAGE_WAKEUP_DONE(m); + } else { + panic("list_req_pending on page %p without absent/pageout/cleaning set\n", m); + } + } else + return (MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK); + } /* * Don't worry about pages for which the kernel * does not have any data. */ - - if (m->absent || m->error || m->restart) - return(MEMORY_OBJECT_LOCK_RESULT_DONE); - - assert(!m->fictitious); - - if (m->wire_count != 0) { - /* - * If no change would take place - * anyway, return successfully. - * - * No change means: - * Not flushing AND - * No change to page lock [2 checks] AND - * Should not return page - * - * XXX This doesn't handle sending a copy of a wired - * XXX page to the pager, but that will require some - * XXX significant surgery. - */ - if (!should_flush && - (m->page_lock == prot || prot == VM_PROT_NO_CHANGE) && - ! memory_object_should_return_page(m, should_return)) { - + if (m->absent || m->error || m->restart) { + if (m->error && should_flush && !VM_PAGE_WIRED(m)) { /* - * Restart page unlock requests, - * even though no change took place. - * [Memory managers may be expecting - * to see new requests.] + * dump the page, pager wants us to + * clean it up and there is no + * relevant data to return */ - m->unlock_request = VM_PROT_NONE; - PAGE_WAKEUP(m); - - return(MEMORY_OBJECT_LOCK_RESULT_DONE); + return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE); } - - return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK); + return (MEMORY_OBJECT_LOCK_RESULT_DONE); } + assert(!m->fictitious); - /* - * If the page is to be flushed, allow - * that to be done as part of the protection. - */ - - if (should_flush) - prot = VM_PROT_ALL; + if (VM_PAGE_WIRED(m)) { + /* + * The page is wired... just clean or return the page if needed. + * Wired pages don't get flushed or disconnected from the pmap. + */ + if (memory_object_should_return_page(m, should_return)) + return (MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN); - /* - * Set the page lock. - * - * If we are decreasing permission, do it now; - * let the fault handler take care of increases - * (pmap_page_protect may not increase protection). - */ + return (MEMORY_OBJECT_LOCK_RESULT_DONE); + } - if (prot != VM_PROT_NO_CHANGE) { -#if 0 - /* code associated with the vestigial - * memory_object_data_unlock + if (should_flush) { + /* + * must do the pmap_disconnect before determining the + * need to return the page... otherwise it's possible + * for the page to go from the clean to the dirty state + * after we've made our decision */ - if ((m->page_lock ^ prot) & prot) { - pmap_page_protect(m->phys_addr, VM_PROT_ALL & ~prot); - } - m->page_lock = prot; - m->lock_supplied = TRUE; - if (prot != VM_PROT_NONE) - m->unusual = TRUE; - else - m->unusual = FALSE; - + if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED) + m->dirty = TRUE; + } else { /* - * Restart any past unlock requests, even if no - * change resulted. If the manager explicitly - * requested no protection change, then it is assumed - * to be remembering past requests. + * If we are decreasing permission, do it now; + * let the fault handler take care of increases + * (pmap_page_protect may not increase protection). */ - - m->unlock_request = VM_PROT_NONE; -#endif /* 0 */ - PAGE_WAKEUP(m); + if (prot != VM_PROT_NO_CHANGE) + pmap_page_protect(m->phys_page, VM_PROT_ALL & ~prot); } - /* - * Handle page returning. + * Handle returning dirty or precious pages */ - if (memory_object_should_return_page(m, should_return)) { - /* - * If we weren't planning - * to flush the page anyway, - * we may need to remove the - * page from the pageout - * system and from physical - * maps now. + * we use to do a pmap_disconnect here in support + * of memory_object_lock_request, but that routine + * no longer requires this... in any event, in + * our world, it would turn into a big noop since + * we don't lock the page in any way and as soon + * as we drop the object lock, the page can be + * faulted back into an address space + * + * if (!should_flush) + * pmap_disconnect(m->phys_page); */ - - vm_page_lock_queues(); - VM_PAGE_QUEUES_REMOVE(m); - vm_page_unlock_queues(); - - if (!should_flush) - pmap_page_protect(m->phys_addr, VM_PROT_NONE); - - if (m->dirty) - return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN); - else - return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN); + return (MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN); } /* - * Handle flushing + * Handle flushing clean pages */ + if (should_flush) + return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE); - if (should_flush) { - VM_PAGE_FREE(m); - } else { - extern boolean_t vm_page_deactivate_hint; - - /* - * XXX Make clean but not flush a paging hint, - * and deactivate the pages. This is a hack - * because it overloads flush/clean with - * implementation-dependent meaning. This only - * happens to pages that are already clean. - */ - - if (vm_page_deactivate_hint && - (should_return != MEMORY_OBJECT_RETURN_NONE)) { - vm_page_lock_queues(); - vm_page_deactivate(m); - vm_page_unlock_queues(); - } - } + /* + * we use to deactivate clean pages at this point, + * but we do not believe that an msync should change + * the 'age' of a page in the cache... here is the + * original comment and code concerning this... + * + * XXX Make clean but not flush a paging hint, + * and deactivate the pages. This is a hack + * because it overloads flush/clean with + * implementation-dependent meaning. This only + * happens to pages that are already clean. + * + * if (vm_page_deactivate_hint && (should_return != MEMORY_OBJECT_RETURN_NONE)) + * return (MEMORY_OBJECT_LOCK_RESULT_MUST_DEACTIVATE); + */ - return(MEMORY_OBJECT_LOCK_RESULT_DONE); + return (MEMORY_OBJECT_LOCK_RESULT_DONE); } -#define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, action, po) \ -MACRO_BEGIN \ - \ - register int i; \ - register vm_page_t hp; \ - \ - vm_object_unlock(object); \ - \ - if(((rpc_subsystem_t)pager_mux_hash_lookup(object->pager)) == \ - ((rpc_subsystem_t) &vnode_pager_workaround)) { \ - (void) vnode_pager_data_return(object->pager, \ - object->pager_request, \ - po, \ - POINTER_T(0), \ - data_cnt, \ - (action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN), \ - !should_flush); \ - } else { \ - (void) memory_object_data_return(object->pager, \ - object->pager_request, \ - po, \ - POINTER_T(0), \ - data_cnt, \ - (action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN), \ - !should_flush); \ - } \ - \ - vm_object_lock(object); \ - \ -MACRO_END -#ifdef MACH_BSD -#define PAGEOUT_PAGES(object, new_object, new_offset, action, po) \ -MACRO_BEGIN \ - \ - vm_map_copy_t copy; \ - register int i; \ - register vm_page_t hp; \ - \ - vm_object_unlock(object); \ - \ - (void) vm_map_copyin_object(new_object, 0, new_offset, ©); \ - \ - if(((rpc_subsystem_t)pager_mux_hash_lookup(object->pager)) == \ - ((rpc_subsystem_t) &vnode_pager_workaround)) { \ - (void) vnode_pager_data_return(object->pager, \ - object->pager_request, \ - po, \ - POINTER_T(copy), \ - new_offset, \ - (action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN), \ - !should_flush); \ - } else { \ - (void) memory_object_data_return(object->pager, \ - object->pager_request, \ - po, \ - POINTER_T(copy), \ - new_offset, \ - (action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN), \ - !should_flush); \ - } \ - \ - vm_object_lock(object); \ - \ - for (i = 0; i < atop(new_offset); i++) { \ - hp = holding_pages[i]; \ - if (hp != VM_PAGE_NULL) { \ - vm_object_paging_end(object); \ - VM_PAGE_FREE(hp); \ - } \ - } \ - \ - new_object = VM_OBJECT_NULL; \ -MACRO_END -#else -#define PAGEOUT_PAGES(object, new_object, new_offset, action, po) \ -MACRO_BEGIN \ - \ - vm_map_copy_t copy; \ - register int i; \ - register vm_page_t hp; \ - \ - vm_object_unlock(object); \ - \ - (void) vm_map_copyin_object(new_object, 0, new_offset, ©); \ - \ - (void) memory_object_data_return( \ - object->pager, \ - object->pager_request, \ - po, \ - POINTER_T(copy), \ - new_offset, \ - (action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN), \ - !should_flush); \ - \ - vm_object_lock(object); \ - \ - for (i = 0; i < atop(new_offset); i++) { \ - hp = holding_pages[i]; \ - if (hp != VM_PAGE_NULL) { \ - vm_object_paging_end(object); \ - VM_PAGE_FREE(hp); \ - } \ - } \ - \ - new_object = VM_OBJECT_NULL; \ -MACRO_END -#endif + /* * Routine: memory_object_lock_request [user interface] @@ -465,68 +346,112 @@ MACRO_END kern_return_t memory_object_lock_request( - register vm_object_t object, - register vm_object_offset_t offset, - register vm_object_size_t size, + memory_object_control_t control, + memory_object_offset_t offset, + memory_object_size_t size, + memory_object_offset_t * resid_offset, + int * io_errno, memory_object_return_t should_return, int flags, - vm_prot_t prot, - ipc_port_t reply_to, - mach_msg_type_name_t reply_to_type) + vm_prot_t prot) { - vm_object_offset_t original_offset = offset; - boolean_t should_flush=flags & MEMORY_OBJECT_DATA_FLUSH; - - XPR(XPR_MEMORY_OBJECT, - "m_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n", - (integer_t)object, offset, size, - (((should_return&1)<<1)|should_flush), prot); + vm_object_t object; - /* + /* * Check for bogus arguments. */ + object = memory_object_control_to_vm_object(control); if (object == VM_OBJECT_NULL) return (KERN_INVALID_ARGUMENT); - if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE) { - vm_object_deallocate(object); + if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE) return (KERN_INVALID_ARGUMENT); - } - size = round_page(size); + size = round_page_64(size); /* * Lock the object, and acquire a paging reference to - * prevent the memory_object and control ports from - * being destroyed. + * prevent the memory_object reference from being released. */ - vm_object_lock(object); vm_object_paging_begin(object); - offset -= object->paging_offset; - - (void)memory_object_update(object, - offset, size, should_return, flags, prot); - - if (IP_VALID(reply_to)) { - vm_object_unlock(object); - /* consumes our naked send-once/send right for reply_to */ - (void) memory_object_lock_completed(reply_to, reply_to_type, - object->pager_request, original_offset, size); - - vm_object_lock(object); + if (flags & MEMORY_OBJECT_DATA_FLUSH_ALL) { + if ((should_return != MEMORY_OBJECT_RETURN_NONE) || offset || object->copy) { + flags &= ~MEMORY_OBJECT_DATA_FLUSH_ALL; + flags |= MEMORY_OBJECT_DATA_FLUSH; + } } + offset -= object->paging_offset; + + if (flags & MEMORY_OBJECT_DATA_FLUSH_ALL) + vm_object_reap_pages(object, REAP_DATA_FLUSH); + else + (void)vm_object_update(object, offset, size, resid_offset, + io_errno, should_return, flags, prot); vm_object_paging_end(object); vm_object_unlock(object); - vm_object_deallocate(object); return (KERN_SUCCESS); } /* - * Routine: memory_object_sync + * memory_object_release_name: [interface] + * + * Enforces name semantic on memory_object reference count decrement + * This routine should not be called unless the caller holds a name + * reference gained through the memory_object_named_create or the + * memory_object_rename call. + * If the TERMINATE_IDLE flag is set, the call will return if the + * reference count is not 1. i.e. idle with the only remaining reference + * being the name. + * If the decision is made to proceed the name field flag is set to + * false and the reference count is decremented. If the RESPECT_CACHE + * flag is set and the reference count has gone to zero, the + * memory_object is checked to see if it is cacheable otherwise when + * the reference count is zero, it is simply terminated. + */ + +kern_return_t +memory_object_release_name( + memory_object_control_t control, + int flags) +{ + vm_object_t object; + + object = memory_object_control_to_vm_object(control); + if (object == VM_OBJECT_NULL) + return (KERN_INVALID_ARGUMENT); + + return vm_object_release_name(object, flags); +} + + + +/* + * Routine: memory_object_destroy [user interface] + * Purpose: + * Shut down a memory object, despite the + * presence of address map (or other) references + * to the vm_object. + */ +kern_return_t +memory_object_destroy( + memory_object_control_t control, + kern_return_t reason) +{ + vm_object_t object; + + object = memory_object_control_to_vm_object(control); + if (object == VM_OBJECT_NULL) + return (KERN_INVALID_ARGUMENT); + + return (vm_object_destroy(object, reason)); +} + +/* + * Routine: vm_object_sync * * Kernel internal function to synch out pages in a given * range within an object to its memory manager. Much the @@ -550,18 +475,20 @@ memory_object_lock_request( */ boolean_t -memory_object_sync( +vm_object_sync( vm_object_t object, vm_object_offset_t offset, vm_object_size_t size, boolean_t should_flush, - boolean_t should_return) + boolean_t should_return, + boolean_t should_iosync) { boolean_t rv; + int flags; - XPR(XPR_MEMORY_OBJECT, - "m_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n", - (integer_t)object, offset, size, should_flush, should_return); + XPR(XPR_VM_OBJECT, + "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n", + object, offset, size, should_flush, should_return); /* * Lock the object, and acquire a paging reference to @@ -571,12 +498,19 @@ memory_object_sync( vm_object_lock(object); vm_object_paging_begin(object); - rv = memory_object_update(object, offset, size, + if (should_flush) + flags = MEMORY_OBJECT_DATA_FLUSH; + else + flags = 0; + + if (should_iosync) + flags |= MEMORY_OBJECT_IO_SYNC; + + rv = vm_object_update(object, offset, (vm_object_size_t)size, NULL, NULL, (should_return) ? MEMORY_OBJECT_RETURN_ALL : MEMORY_OBJECT_RETURN_NONE, - (should_flush) ? - MEMORY_OBJECT_DATA_FLUSH : 0, + flags, VM_PROT_NO_CHANGE); @@ -585,37 +519,233 @@ memory_object_sync( return rv; } + + +#define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, po, ro, ioerr, iosync) \ +MACRO_BEGIN \ + \ + int upl_flags; \ + memory_object_t pager; \ + \ + if (object == slide_info.slide_object) { \ + panic("Objects with slid pages not allowed\n"); \ + } \ + \ + if ((pager = (object)->pager) != MEMORY_OBJECT_NULL) { \ + vm_object_paging_begin(object); \ + vm_object_unlock(object); \ + \ + if (iosync) \ + upl_flags = UPL_MSYNC | UPL_IOSYNC; \ + else \ + upl_flags = UPL_MSYNC; \ + \ + (void) memory_object_data_return(pager, \ + po, \ + (memory_object_cluster_size_t)data_cnt, \ + ro, \ + ioerr, \ + FALSE, \ + FALSE, \ + upl_flags); \ + \ + vm_object_lock(object); \ + vm_object_paging_end(object); \ + } \ +MACRO_END + + + +static int +vm_object_update_extent( + vm_object_t object, + vm_object_offset_t offset, + vm_object_offset_t offset_end, + vm_object_offset_t *offset_resid, + int *io_errno, + boolean_t should_flush, + memory_object_return_t should_return, + boolean_t should_iosync, + vm_prot_t prot) +{ + vm_page_t m; + int retval = 0; + vm_object_offset_t paging_offset = 0; + vm_object_offset_t next_offset = offset; + memory_object_lock_result_t page_lock_result; + memory_object_cluster_size_t data_cnt = 0; + struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT]; + struct vm_page_delayed_work *dwp; + int dw_count; + int dw_limit; + + dwp = &dw_array[0]; + dw_count = 0; + dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); + + for (; + offset < offset_end && object->resident_page_count; + offset += PAGE_SIZE_64) { + + /* + * Limit the number of pages to be cleaned at once to a contiguous + * run, or at most MAX_UPL_TRANSFER size + */ + if (data_cnt) { + if ((data_cnt >= PAGE_SIZE * MAX_UPL_TRANSFER) || (next_offset != offset)) { + + if (dw_count) { + vm_page_do_delayed_work(object, &dw_array[0], dw_count); + dwp = &dw_array[0]; + dw_count = 0; + } + LIST_REQ_PAGEOUT_PAGES(object, data_cnt, + paging_offset, offset_resid, io_errno, should_iosync); + data_cnt = 0; + } + } + while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { + + dwp->dw_mask = 0; + + page_lock_result = memory_object_lock_page(m, should_return, should_flush, prot); + + if (data_cnt && page_lock_result != MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN) { + /* + * End of a run of dirty/precious pages. + */ + if (dw_count) { + vm_page_do_delayed_work(object, &dw_array[0], dw_count); + dwp = &dw_array[0]; + dw_count = 0; + } + LIST_REQ_PAGEOUT_PAGES(object, data_cnt, + paging_offset, offset_resid, io_errno, should_iosync); + /* + * LIST_REQ_PAGEOUT_PAGES will drop the object lock which will + * allow the state of page 'm' to change... we need to re-lookup + * the current offset + */ + data_cnt = 0; + continue; + } + + switch (page_lock_result) { + + case MEMORY_OBJECT_LOCK_RESULT_DONE: + break; + + case MEMORY_OBJECT_LOCK_RESULT_MUST_FREE: + dwp->dw_mask |= DW_vm_page_free; + break; + + case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK: + PAGE_SLEEP(object, m, THREAD_UNINT); + continue; + + case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN: + if (data_cnt == 0) + paging_offset = offset; + + data_cnt += PAGE_SIZE; + next_offset = offset + PAGE_SIZE_64; + + /* + * Clean + */ + m->list_req_pending = TRUE; + m->cleaning = TRUE; + + /* + * wired pages shouldn't be flushed and + * since they aren't on any queue, + * no need to remove them + */ + if (!VM_PAGE_WIRED(m)) { + + if (should_flush) { + /* + * add additional state for the flush + */ + m->busy = TRUE; + m->pageout = TRUE; + + dwp->dw_mask |= DW_vm_page_wire; + } + /* + * we use to remove the page from the queues at this + * point, but we do not believe that an msync + * should cause the 'age' of a page to be changed + * + * else + * dwp->dw_mask |= DW_VM_PAGE_QUEUES_REMOVE; + */ + } + retval = 1; + break; + } + if (dwp->dw_mask) { + VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count); + + if (dw_count >= dw_limit) { + vm_page_do_delayed_work(object, &dw_array[0], dw_count); + dwp = &dw_array[0]; + dw_count = 0; + } + } + break; + } + } + /* + * We have completed the scan for applicable pages. + * Clean any pages that have been saved. + */ + if (dw_count) + vm_page_do_delayed_work(object, &dw_array[0], dw_count); + + if (data_cnt) { + LIST_REQ_PAGEOUT_PAGES(object, data_cnt, + paging_offset, offset_resid, io_errno, should_iosync); + } + return (retval); +} + + + /* - * Routine: memory_object_update + * Routine: vm_object_update * Description: - * Work function for m_o_lock_request(), m_o_sync(). + * Work function for m_o_lock_request(), vm_o_sync(). * * Called with object locked and paging ref taken. */ kern_return_t -memory_object_update( - register vm_object_t object, - register vm_object_offset_t offset, - register vm_size_t size, - memory_object_return_t should_return, - int flags, - vm_prot_t prot) +vm_object_update( + vm_object_t object, + vm_object_offset_t offset, + vm_object_size_t size, + vm_object_offset_t *resid_offset, + int *io_errno, + memory_object_return_t should_return, + int flags, + vm_prot_t protection) { - register vm_page_t m; - vm_page_t holding_page; - vm_size_t original_size = size; - vm_object_offset_t paging_offset = 0; - vm_object_t copy_object; - vm_size_t data_cnt = 0; - vm_object_offset_t last_offset = offset; - memory_object_lock_result_t page_lock_result; - memory_object_lock_result_t pageout_action; + vm_object_t copy_object = VM_OBJECT_NULL; boolean_t data_returned = FALSE; boolean_t update_cow; - boolean_t should_flush = flags & MEMORY_OBJECT_DATA_FLUSH; -#ifndef NOT_LIST_REQ - boolean_t pending_pageout = FALSE; -#endif + boolean_t should_flush = (flags & MEMORY_OBJECT_DATA_FLUSH) ? TRUE : FALSE; + boolean_t should_iosync = (flags & MEMORY_OBJECT_IO_SYNC) ? TRUE : FALSE; + vm_fault_return_t result; + int num_of_extents; + int n; +#define MAX_EXTENTS 8 +#define EXTENT_SIZE (1024 * 1024 * 256) +#define RESIDENT_LIMIT (1024 * 32) + struct extent { + vm_object_offset_t e_base; + vm_object_offset_t e_min; + vm_object_offset_t e_max; + } extents[MAX_EXTENTS]; /* * To avoid blocking while scanning for pages, save @@ -639,89 +769,134 @@ memory_object_update( !(flags & MEMORY_OBJECT_DATA_PURGE))) || (flags & MEMORY_OBJECT_COPY_SYNC); + if (update_cow || (flags & (MEMORY_OBJECT_DATA_PURGE | MEMORY_OBJECT_DATA_SYNC))) { + int collisions = 0; + + while ((copy_object = object->copy) != VM_OBJECT_NULL) { + /* + * need to do a try here since we're swimming upstream + * against the normal lock ordering... however, we need + * to hold the object stable until we gain control of the + * copy object so we have to be careful how we approach this + */ + if (vm_object_lock_try(copy_object)) { + /* + * we 'won' the lock on the copy object... + * no need to hold the object lock any longer... + * take a real reference on the copy object because + * we're going to call vm_fault_page on it which may + * under certain conditions drop the lock and the paging + * reference we're about to take... the reference + * will keep the copy object from going away if that happens + */ + vm_object_unlock(object); + vm_object_reference_locked(copy_object); + break; + } + vm_object_unlock(object); - if((((copy_object = object->copy) != NULL) && update_cow) || - (flags & MEMORY_OBJECT_DATA_SYNC)) { - vm_size_t i; - vm_size_t copy_size; - vm_object_offset_t copy_offset; + collisions++; + mutex_pause(collisions); + + vm_object_lock(object); + } + } + if ((copy_object != VM_OBJECT_NULL && update_cow) || (flags & MEMORY_OBJECT_DATA_SYNC)) { + vm_map_size_t i; + vm_map_size_t copy_size; + vm_map_offset_t copy_offset; vm_prot_t prot; vm_page_t page; vm_page_t top_page; kern_return_t error = 0; + struct vm_object_fault_info fault_info; - if(copy_object != NULL) { - /* translate offset with respect to shadow's offset */ - copy_offset = (offset >= copy_object->shadow_offset)? - offset - copy_object->shadow_offset : - (vm_object_offset_t) 0; - if(copy_offset > copy_object->size) - copy_offset = copy_object->size; - - /* clip size with respect to shadow offset */ - copy_size = (offset >= copy_object->shadow_offset) ? - size : size - (copy_object->shadow_offset - offset); - - if(copy_size <= 0) { - copy_size = 0; - } else { - copy_size = ((copy_offset + copy_size) - <= copy_object->size) ? - copy_size : copy_object->size - copy_offset; - } - /* check for a copy_offset which is beyond the end of */ - /* the copy_object */ - if(copy_size < 0) - copy_size = 0; - - copy_size+=offset; - - vm_object_unlock(object); - vm_object_lock(copy_object); - } else { - copy_object = object; + if (copy_object != VM_OBJECT_NULL) { + /* + * translate offset with respect to shadow's offset + */ + copy_offset = (offset >= copy_object->vo_shadow_offset) ? + (vm_map_offset_t)(offset - copy_object->vo_shadow_offset) : + (vm_map_offset_t) 0; - copy_size = offset + size; - copy_offset = offset; - } + if (copy_offset > copy_object->vo_size) + copy_offset = copy_object->vo_size; + + /* + * clip size with respect to shadow offset + */ + if (offset >= copy_object->vo_shadow_offset) { + copy_size = size; + } else if (size >= copy_object->vo_shadow_offset - offset) { + copy_size = size - (copy_object->vo_shadow_offset - offset); + } else { + copy_size = 0; + } + + if (copy_offset + copy_size > copy_object->vo_size) { + if (copy_object->vo_size >= copy_offset) { + copy_size = copy_object->vo_size - copy_offset; + } else { + copy_size = 0; + } + } + copy_size+=copy_offset; + + } else { + copy_object = object; + + copy_size = offset + size; + copy_offset = offset; + } + fault_info.interruptible = THREAD_UNINT; + fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; + fault_info.user_tag = 0; + fault_info.lo_offset = copy_offset; + fault_info.hi_offset = copy_size; + fault_info.no_cache = FALSE; + fault_info.stealth = TRUE; + fault_info.io_sync = FALSE; + fault_info.cs_bypass = FALSE; + fault_info.mark_zf_absent = FALSE; vm_object_paging_begin(copy_object); - for (i=copy_offset; iobject, top_page); - PAGE_WAKEUP_DONE(page); - vm_page_lock_queues(); - if (!page->active && !page->inactive) - vm_page_activate(page); - vm_page_unlock_queues(); vm_object_lock(copy_object); vm_object_paging_begin(copy_object); - } else { - PAGE_WAKEUP_DONE(page); - vm_page_lock_queues(); - if (!page->active && !page->inactive) - vm_page_activate(page); + } + if (!page->active && + !page->inactive && + !page->throttled) { + vm_page_lockspin_queues(); + if (!page->active && + !page->inactive && + !page->throttled) + vm_page_deactivate(page); vm_page_unlock_queues(); } + PAGE_WAKEUP_DONE(page); break; case VM_FAULT_RETRY: prot = VM_PROT_WRITE|VM_PROT_READ; @@ -739,187 +914,175 @@ memory_object_update( vm_object_lock(copy_object); vm_object_paging_begin(copy_object); goto RETRY_COW_OF_LOCK_REQUEST; - case VM_FAULT_FICTITIOUS_SHORTAGE: - vm_page_more_fictitious(); - prot = VM_PROT_WRITE|VM_PROT_READ; - vm_object_lock(copy_object); - vm_object_paging_begin(copy_object); - goto RETRY_COW_OF_LOCK_REQUEST; + case VM_FAULT_SUCCESS_NO_VM_PAGE: + /* success but no VM page: fail */ + vm_object_paging_end(copy_object); + vm_object_unlock(copy_object); + /*FALLTHROUGH*/ case VM_FAULT_MEMORY_ERROR: + if (object != copy_object) + vm_object_deallocate(copy_object); vm_object_lock(object); goto BYPASS_COW_COPYIN; + default: + panic("vm_object_update: unexpected error 0x%x" + " from vm_fault_page()\n", result); } } vm_object_paging_end(copy_object); - if(copy_object != object) { + } + if ((flags & (MEMORY_OBJECT_DATA_SYNC | MEMORY_OBJECT_COPY_SYNC))) { + if (copy_object != VM_OBJECT_NULL && copy_object != object) { vm_object_unlock(copy_object); + vm_object_deallocate(copy_object); vm_object_lock(object); } + return KERN_SUCCESS; } - if((flags & (MEMORY_OBJECT_DATA_SYNC | MEMORY_OBJECT_COPY_SYNC))) { - return KERN_SUCCESS; - } - if(((copy_object = object->copy) != NULL) && - (flags & MEMORY_OBJECT_DATA_PURGE)) { - copy_object->shadow_severed = TRUE; - copy_object->shadowed = FALSE; - copy_object->shadow = NULL; - /* delete the ref the COW was holding on the target object */ - vm_object_deallocate(object); + if (copy_object != VM_OBJECT_NULL && copy_object != object) { + if ((flags & MEMORY_OBJECT_DATA_PURGE)) { + copy_object->shadow_severed = TRUE; + copy_object->shadowed = FALSE; + copy_object->shadow = NULL; + /* + * delete the ref the COW was holding on the target object + */ + vm_object_deallocate(object); + } + vm_object_unlock(copy_object); + vm_object_deallocate(copy_object); + vm_object_lock(object); } BYPASS_COW_COPYIN: - for (; - size != 0; - size -= PAGE_SIZE, offset += PAGE_SIZE_64) - { - /* - * Limit the number of pages to be cleaned at once. - */ - if (pending_pageout && - data_cnt >= PAGE_SIZE * DATA_WRITE_MAX) - { - LIST_REQ_PAGEOUT_PAGES(object, data_cnt, - pageout_action, paging_offset); - data_cnt = 0; - pending_pageout = FALSE; - } - - while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { - page_lock_result = memory_object_lock_page(m, should_return, - should_flush, prot); + /* + * when we have a really large range to check relative + * to the number of actual resident pages, we'd like + * to use the resident page list to drive our checks + * however, the object lock will get dropped while processing + * the page which means the resident queue can change which + * means we can't walk the queue as we process the pages + * we also want to do the processing in offset order to allow + * 'runs' of pages to be collected if we're being told to + * flush to disk... the resident page queue is NOT ordered. + * + * a temporary solution (until we figure out how to deal with + * large address spaces more generically) is to pre-flight + * the resident page queue (if it's small enough) and develop + * a collection of extents (that encompass actual resident pages) + * to visit. This will at least allow us to deal with some of the + * more pathological cases in a more efficient manner. The current + * worst case (a single resident page at the end of an extremely large + * range) can take minutes to complete for ranges in the terrabyte + * category... since this routine is called when truncating a file, + * and we currently support files up to 16 Tbytes in size, this + * is not a theoretical problem + */ - XPR(XPR_MEMORY_OBJECT, - "m_o_update: lock_page, obj 0x%X offset 0x%X result %d\n", - (integer_t)object, offset, page_lock_result, 0, 0); + if ((object->resident_page_count < RESIDENT_LIMIT) && + (atop_64(size) > (unsigned)(object->resident_page_count/(8 * MAX_EXTENTS)))) { + vm_page_t next; + vm_object_offset_t start; + vm_object_offset_t end; + vm_object_size_t e_mask; + vm_page_t m; - switch (page_lock_result) - { - case MEMORY_OBJECT_LOCK_RESULT_DONE: - /* - * End of a cluster of dirty pages. - */ - if(pending_pageout) { - LIST_REQ_PAGEOUT_PAGES(object, - data_cnt, pageout_action, - paging_offset); - data_cnt = 0; - pending_pageout = FALSE; - continue; - } - break; + start = offset; + end = offset + size; + num_of_extents = 0; + e_mask = ~((vm_object_size_t)(EXTENT_SIZE - 1)); - case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK: - /* - * Since it is necessary to block, - * clean any dirty pages now. - */ - if(pending_pageout) { - LIST_REQ_PAGEOUT_PAGES(object, - data_cnt, pageout_action, - paging_offset); - pending_pageout = FALSE; - data_cnt = 0; - continue; - } + m = (vm_page_t) queue_first(&object->memq); - PAGE_ASSERT_WAIT(m, THREAD_UNINT); - vm_object_unlock(object); - thread_block((void (*)(void))0); - vm_object_lock(object); - continue; + while (!queue_end(&object->memq, (queue_entry_t) m)) { + next = (vm_page_t) queue_next(&m->listq); - case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN: - case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN: - /* - * The clean and return cases are similar. - * - */ - - /* - * if this would form a discontiguous block, - * clean the old pages and start anew. - * - */ - - /* - * Mark the page busy since we unlock the - * object below. - */ - m->busy = TRUE; - if (pending_pageout && - (last_offset != offset || - pageout_action != page_lock_result)) { - LIST_REQ_PAGEOUT_PAGES(object, - data_cnt, pageout_action, - paging_offset); - pending_pageout = FALSE; - data_cnt = 0; - } - m->busy = FALSE; - holding_page = VM_PAGE_NULL; - if(m->cleaning) { - PAGE_ASSERT_WAIT(m, THREAD_UNINT); - vm_object_unlock(object); - thread_block((void (*)(void))0); - continue; - } - if(!pending_pageout) { - pending_pageout = TRUE; - pageout_action = page_lock_result; - paging_offset = offset; - } - if (should_flush) { - vm_page_lock_queues(); - m->list_req_pending = TRUE; - m->cleaning = TRUE; - m->busy = TRUE; - m->pageout = TRUE; - vm_page_wire(m); - vm_page_unlock_queues(); - } else { - /* - * Clean but do not flush + if ((m->offset >= start) && (m->offset < end)) { + /* + * this is a page we're interested in + * try to fit it into a current extent */ - vm_page_lock_queues(); - m->list_req_pending = TRUE; - m->cleaning = TRUE; - vm_page_unlock_queues(); - + for (n = 0; n < num_of_extents; n++) { + if ((m->offset & e_mask) == extents[n].e_base) { + /* + * use (PAGE_SIZE - 1) to determine the + * max offset so that we don't wrap if + * we're at the last page of the space + */ + if (m->offset < extents[n].e_min) + extents[n].e_min = m->offset; + else if ((m->offset + (PAGE_SIZE - 1)) > extents[n].e_max) + extents[n].e_max = m->offset + (PAGE_SIZE - 1); + break; + } + } + if (n == num_of_extents) { + /* + * didn't find a current extent that can encompass + * this page + */ + if (n < MAX_EXTENTS) { + /* + * if we still have room, + * create a new extent + */ + extents[n].e_base = m->offset & e_mask; + extents[n].e_min = m->offset; + extents[n].e_max = m->offset + (PAGE_SIZE - 1); + + num_of_extents++; + } else { + /* + * no room to create a new extent... + * fall back to a single extent based + * on the min and max page offsets + * we find in the range we're interested in... + * first, look through the extent list and + * develop the overall min and max for the + * pages we've looked at up to this point + */ + for (n = 1; n < num_of_extents; n++) { + if (extents[n].e_min < extents[0].e_min) + extents[0].e_min = extents[n].e_min; + if (extents[n].e_max > extents[0].e_max) + extents[0].e_max = extents[n].e_max; + } + /* + * now setup to run through the remaining pages + * to determine the overall min and max + * offset for the specified range + */ + extents[0].e_base = 0; + e_mask = 0; + num_of_extents = 1; + + /* + * by continuing, we'll reprocess the + * page that forced us to abandon trying + * to develop multiple extents + */ + continue; + } + } } - vm_object_unlock(object); - - - data_cnt += PAGE_SIZE; - last_offset = offset + PAGE_SIZE_64; - data_returned = TRUE; - - vm_object_lock(object); - break; + m = next; } - break; - } - } + } else { + extents[0].e_min = offset; + extents[0].e_max = offset + (size - 1); - /* - * We have completed the scan for applicable pages. - * Clean any pages that have been saved. - */ -#ifdef NOT_LIST_REQ - if (new_object != VM_OBJECT_NULL) { - PAGEOUT_PAGES(object, new_object, new_offset, pageout_action, - paging_offset); + num_of_extents = 1; } -#else - if (pending_pageout) { - LIST_REQ_PAGEOUT_PAGES(object, - data_cnt, pageout_action, paging_offset); + for (n = 0; n < num_of_extents; n++) { + if (vm_object_update_extent(object, extents[n].e_min, extents[n].e_max, resid_offset, io_errno, + should_flush, should_return, should_iosync, protection)) + data_returned = TRUE; } -#endif return (data_returned); } + /* * Routine: memory_object_synchronize_completed [user interface] * @@ -933,23 +1096,25 @@ BYPASS_COW_COPYIN: kern_return_t memory_object_synchronize_completed( - vm_object_t object, - vm_object_offset_t offset, - vm_offset_t length) + memory_object_control_t control, + memory_object_offset_t offset, + memory_object_size_t length) { - msync_req_t msr; + vm_object_t object; + msync_req_t msr; + + object = memory_object_control_to_vm_object(control); XPR(XPR_MEMORY_OBJECT, "m_o_sync_completed, object 0x%X, offset 0x%X length 0x%X\n", - (integer_t)object, offset, length, 0, 0); + object, offset, length, 0, 0); /* * Look for bogus arguments */ - if (object == VM_OBJECT_NULL) { - return KERN_INVALID_ARGUMENT; - } + if (object == VM_OBJECT_NULL) + return (KERN_INVALID_ARGUMENT); vm_object_lock(object); @@ -965,7 +1130,6 @@ memory_object_synchronize_completed( if (queue_end(&object->msr_q, (queue_entry_t)msr)) { vm_object_unlock(object); - vm_object_deallocate(object); return KERN_INVALID_ARGUMENT; } @@ -974,18 +1138,16 @@ memory_object_synchronize_completed( msr->flag = VM_MSYNC_DONE; msr_unlock(msr); thread_wakeup((event_t) msr); - vm_object_deallocate(object); return KERN_SUCCESS; }/* memory_object_synchronize_completed */ - -kern_return_t -memory_object_set_attributes_common( + +static kern_return_t +vm_object_set_attributes_common( vm_object_t object, boolean_t may_cache, memory_object_copy_strategy_t copy_strategy, boolean_t temporary, - vm_size_t cluster_size, boolean_t silent_overwrite, boolean_t advisory_pageout) { @@ -993,7 +1155,7 @@ memory_object_set_attributes_common( XPR(XPR_MEMORY_OBJECT, "m_o_set_attr_com, object 0x%X flg %x strat %d\n", - (integer_t)object, (may_cache&1)|((temporary&1)<1), copy_strategy, 0, 0); + object, (may_cache&1)|((temporary&1)<1), copy_strategy, 0, 0); if (object == VM_OBJECT_NULL) return(KERN_INVALID_ARGUMENT); @@ -1007,33 +1169,18 @@ memory_object_set_attributes_common( case MEMORY_OBJECT_COPY_DELAY: break; default: - vm_object_deallocate(object); return(KERN_INVALID_ARGUMENT); } #if !ADVISORY_PAGEOUT - if (silent_overwrite || advisory_pageout) { - vm_object_deallocate(object); + if (silent_overwrite || advisory_pageout) return(KERN_INVALID_ARGUMENT); - } + #endif /* !ADVISORY_PAGEOUT */ if (may_cache) may_cache = TRUE; if (temporary) temporary = TRUE; - if (cluster_size != 0) { - int pages_per_cluster; - pages_per_cluster = atop(cluster_size); - /* - * Cluster size must be integral multiple of page size, - * and be a power of 2 number of pages. - */ - if ((cluster_size & (PAGE_SIZE-1)) || - ((pages_per_cluster-1) & pages_per_cluster)) { - vm_object_deallocate(object); - return KERN_INVALID_ARGUMENT; - } - } vm_object_lock(object); @@ -1047,12 +1194,6 @@ memory_object_set_attributes_common( object->temporary = temporary; object->silent_overwrite = silent_overwrite; object->advisory_pageout = advisory_pageout; - if (cluster_size == 0) - cluster_size = PAGE_SIZE; - object->cluster_size = cluster_size; - - assert(cluster_size >= PAGE_SIZE && - cluster_size % PAGE_SIZE == 0); /* * Wake up anyone waiting for the ready attribute @@ -1066,8 +1207,6 @@ memory_object_set_attributes_common( vm_object_unlock(object); - vm_object_deallocate(object); - return(KERN_SUCCESS); } @@ -1076,31 +1215,31 @@ memory_object_set_attributes_common( * * XXX This routine cannot be completed until the vm_msync, clean * in place, and cluster work is completed. See ifdef notyet - * below and note that memory_object_set_attributes_common() + * below and note that vm_object_set_attributes_common() * may have to be expanded. */ kern_return_t memory_object_change_attributes( - vm_object_t object, - memory_object_flavor_t flavor, - memory_object_info_t attributes, - mach_msg_type_number_t count, - ipc_port_t reply_to, - mach_msg_type_name_t reply_to_type) + memory_object_control_t control, + memory_object_flavor_t flavor, + memory_object_info_t attributes, + mach_msg_type_number_t count) { - kern_return_t result = KERN_SUCCESS; - boolean_t temporary; - boolean_t may_cache; - boolean_t invalidate; - vm_size_t cluster_size; + vm_object_t object; + kern_return_t result = KERN_SUCCESS; + boolean_t temporary; + boolean_t may_cache; + boolean_t invalidate; memory_object_copy_strategy_t copy_strategy; - boolean_t silent_overwrite; + boolean_t silent_overwrite; boolean_t advisory_pageout; + object = memory_object_control_to_vm_object(control); if (object == VM_OBJECT_NULL) - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); vm_object_lock(object); + temporary = object->temporary; may_cache = object->can_persist; copy_strategy = object->copy_strategy; @@ -1109,7 +1248,6 @@ memory_object_change_attributes( #if notyet invalidate = object->invalidate; #endif - cluster_size = object->cluster_size; vm_object_unlock(object); switch (flavor) { @@ -1162,7 +1300,6 @@ memory_object_change_attributes( perf = (memory_object_perf_info_t) attributes; may_cache = perf->may_cache; - cluster_size = round_page(perf->cluster_size); break; } @@ -1180,7 +1317,6 @@ memory_object_change_attributes( may_cache = attr->may_cache; copy_strategy = attr->copy_strategy; - cluster_size = page_size; break; } @@ -1198,7 +1334,6 @@ memory_object_change_attributes( copy_strategy = attr->copy_strategy; may_cache = attr->may_cache_object; - cluster_size = attr->cluster_size; temporary = attr->temporary; break; @@ -1209,10 +1344,8 @@ memory_object_change_attributes( break; } - if (result != KERN_SUCCESS) { - vm_object_deallocate(object); + if (result != KERN_SUCCESS) return(result); - } if (copy_strategy == MEMORY_OBJECT_COPY_TEMPORARY) { copy_strategy = MEMORY_OBJECT_COPY_DELAY; @@ -1222,45 +1355,30 @@ memory_object_change_attributes( } /* - * Do the work and throw away our object reference. It - * is important that the object reference be deallocated - * BEFORE sending the reply. The whole point of the reply - * is that it shows up after the terminate message that - * may be generated by setting the object uncacheable. - * * XXX may_cache may become a tri-valued variable to handle * XXX uncache if not in use. */ - result = memory_object_set_attributes_common(object, + return (vm_object_set_attributes_common(object, may_cache, copy_strategy, temporary, - cluster_size, silent_overwrite, - advisory_pageout); - - if (IP_VALID(reply_to)) { - /* consumes our naked send-once/send right for reply_to */ - (void) memory_object_change_completed(reply_to, reply_to_type, - object->alive ? - object->pager_request : PAGER_REQUEST_NULL, - flavor); - } - - return(result); + advisory_pageout)); } kern_return_t memory_object_get_attributes( - vm_object_t object, + memory_object_control_t control, memory_object_flavor_t flavor, memory_object_info_t attributes, /* pointer to OUT array */ mach_msg_type_number_t *count) /* IN/OUT */ { - kern_return_t ret = KERN_SUCCESS; + kern_return_t ret = KERN_SUCCESS; + vm_object_t object; - if (object == VM_OBJECT_NULL) - return(KERN_INVALID_ARGUMENT); + object = memory_object_control_to_vm_object(control); + if (object == VM_OBJECT_NULL) + return (KERN_INVALID_ARGUMENT); vm_object_lock(object); @@ -1320,7 +1438,7 @@ memory_object_get_attributes( } perf = (memory_object_perf_info_t) attributes; - perf->cluster_size = object->cluster_size; + perf->cluster_size = PAGE_SIZE; perf->may_cache = object->can_persist; *count = MEMORY_OBJECT_PERF_INFO_COUNT; @@ -1355,7 +1473,7 @@ memory_object_get_attributes( attr = (memory_object_attr_info_t) attributes; attr->copy_strategy = object->copy_strategy; - attr->cluster_size = object->cluster_size; + attr->cluster_size = PAGE_SIZE; attr->may_cache_object = object->can_persist; attr->temporary = object->temporary; @@ -1370,30 +1488,248 @@ memory_object_get_attributes( vm_object_unlock(object); - vm_object_deallocate(object); - return(ret); } -int vm_stat_discard_cleared_reply = 0; -int vm_stat_discard_cleared_unset = 0; -int vm_stat_discard_cleared_too_late = 0; +kern_return_t +memory_object_iopl_request( + ipc_port_t port, + memory_object_offset_t offset, + upl_size_t *upl_size, + upl_t *upl_ptr, + upl_page_info_array_t user_page_list, + unsigned int *page_list_count, + int *flags) +{ + vm_object_t object; + kern_return_t ret; + int caller_flags; -/* - * vm_set_default_memory_manager(): - * [Obsolete] + caller_flags = *flags; + + if (caller_flags & ~UPL_VALID_FLAGS) { + /* + * For forward compatibility's sake, + * reject any unknown flag. + */ + return KERN_INVALID_VALUE; + } + + if (ip_kotype(port) == IKOT_NAMED_ENTRY) { + vm_named_entry_t named_entry; + + named_entry = (vm_named_entry_t)port->ip_kobject; + /* a few checks to make sure user is obeying rules */ + if(*upl_size == 0) { + if(offset >= named_entry->size) + return(KERN_INVALID_RIGHT); + *upl_size = (upl_size_t)(named_entry->size - offset); + if (*upl_size != named_entry->size - offset) + return KERN_INVALID_ARGUMENT; + } + if(caller_flags & UPL_COPYOUT_FROM) { + if((named_entry->protection & VM_PROT_READ) + != VM_PROT_READ) { + return(KERN_INVALID_RIGHT); + } + } else { + if((named_entry->protection & + (VM_PROT_READ | VM_PROT_WRITE)) + != (VM_PROT_READ | VM_PROT_WRITE)) { + return(KERN_INVALID_RIGHT); + } + } + if(named_entry->size < (offset + *upl_size)) + return(KERN_INVALID_ARGUMENT); + + /* the callers parameter offset is defined to be the */ + /* offset from beginning of named entry offset in object */ + offset = offset + named_entry->offset; + + if(named_entry->is_sub_map) + return (KERN_INVALID_ARGUMENT); + + named_entry_lock(named_entry); + + if (named_entry->is_pager) { + object = vm_object_enter(named_entry->backing.pager, + named_entry->offset + named_entry->size, + named_entry->internal, + FALSE, + FALSE); + if (object == VM_OBJECT_NULL) { + named_entry_unlock(named_entry); + return(KERN_INVALID_OBJECT); + } + + /* JMM - drop reference on pager here? */ + + /* create an extra reference for the named entry */ + vm_object_lock(object); + vm_object_reference_locked(object); + named_entry->backing.object = object; + named_entry->is_pager = FALSE; + named_entry_unlock(named_entry); + + /* wait for object to be ready */ + while (!object->pager_ready) { + vm_object_wait(object, + VM_OBJECT_EVENT_PAGER_READY, + THREAD_UNINT); + vm_object_lock(object); + } + vm_object_unlock(object); + } else { + /* This is the case where we are going to map */ + /* an already mapped object. If the object is */ + /* not ready it is internal. An external */ + /* object cannot be mapped until it is ready */ + /* we can therefore avoid the ready check */ + /* in this case. */ + object = named_entry->backing.object; + vm_object_reference(object); + named_entry_unlock(named_entry); + } + } else if (ip_kotype(port) == IKOT_MEM_OBJ_CONTROL) { + memory_object_control_t control; + control = (memory_object_control_t) port; + if (control == NULL) + return (KERN_INVALID_ARGUMENT); + object = memory_object_control_to_vm_object(control); + if (object == VM_OBJECT_NULL) + return (KERN_INVALID_ARGUMENT); + vm_object_reference(object); + } else { + return KERN_INVALID_ARGUMENT; + } + if (object == VM_OBJECT_NULL) + return (KERN_INVALID_ARGUMENT); + + if (!object->private) { + if (*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE)) + *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE); + if (object->phys_contiguous) { + *flags = UPL_PHYS_CONTIG; + } else { + *flags = 0; + } + } else { + *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG; + } + + ret = vm_object_iopl_request(object, + offset, + *upl_size, + upl_ptr, + user_page_list, + page_list_count, + caller_flags); + vm_object_deallocate(object); + return ret; +} + +/* + * Routine: memory_object_upl_request [interface] + * Purpose: + * Cause the population of a portion of a vm_object. + * Depending on the nature of the request, the pages + * returned may be contain valid data or be uninitialized. + * */ + +kern_return_t +memory_object_upl_request( + memory_object_control_t control, + memory_object_offset_t offset, + upl_size_t size, + upl_t *upl_ptr, + upl_page_info_array_t user_page_list, + unsigned int *page_list_count, + int cntrl_flags) +{ + vm_object_t object; + + object = memory_object_control_to_vm_object(control); + if (object == VM_OBJECT_NULL) + return (KERN_TERMINATED); + + return vm_object_upl_request(object, + offset, + size, + upl_ptr, + user_page_list, + page_list_count, + cntrl_flags); +} + +/* + * Routine: memory_object_super_upl_request [interface] + * Purpose: + * Cause the population of a portion of a vm_object + * in much the same way as memory_object_upl_request. + * Depending on the nature of the request, the pages + * returned may be contain valid data or be uninitialized. + * However, the region may be expanded up to the super + * cluster size provided. + */ + +kern_return_t +memory_object_super_upl_request( + memory_object_control_t control, + memory_object_offset_t offset, + upl_size_t size, + upl_size_t super_cluster, + upl_t *upl, + upl_page_info_t *user_page_list, + unsigned int *page_list_count, + int cntrl_flags) +{ + vm_object_t object; + + object = memory_object_control_to_vm_object(control); + if (object == VM_OBJECT_NULL) + return (KERN_INVALID_ARGUMENT); + + return vm_object_super_upl_request(object, + offset, + size, + super_cluster, + upl, + user_page_list, + page_list_count, + cntrl_flags); +} + kern_return_t -vm_set_default_memory_manager( - host_t host, - ipc_port_t *default_manager) +memory_object_cluster_size(memory_object_control_t control, memory_object_offset_t *start, + vm_size_t *length, uint32_t *io_streaming, memory_object_fault_info_t fault_info) { - return(host_default_memory_manager(host_priv_self(), default_manager, 4*PAGE_SIZE)); + vm_object_t object; + + object = memory_object_control_to_vm_object(control); + + if (object == VM_OBJECT_NULL || object->paging_offset > *start) + return (KERN_INVALID_ARGUMENT); + + *start -= object->paging_offset; + + vm_object_cluster_size(object, (vm_object_offset_t *)start, length, (vm_object_fault_info_t)fault_info, io_streaming); + + *start += object->paging_offset; + + return (KERN_SUCCESS); } + +int vm_stat_discard_cleared_reply = 0; +int vm_stat_discard_cleared_unset = 0; +int vm_stat_discard_cleared_too_late = 0; + + + /* - * Routine: host_default_memory_manager + * Routine: host_default_memory_manager [interface] * Purpose: * set/get the default memory manager port and default cluster * size. @@ -1402,13 +1738,14 @@ vm_set_default_memory_manager( */ kern_return_t host_default_memory_manager( - host_priv_t host_priv, - ipc_port_t *default_manager, - vm_size_t cluster_size) + host_priv_t host_priv, + memory_object_default_t *default_manager, + __unused memory_object_cluster_size_t cluster_size) { - ipc_port_t current_manager; - ipc_port_t new_manager; - ipc_port_t returned_manager; + memory_object_default_t current_manager; + memory_object_default_t new_manager; + memory_object_default_t returned_manager; + kern_return_t result = KERN_SUCCESS; if (host_priv == HOST_PRIV_NULL) return(KERN_INVALID_HOST); @@ -1416,33 +1753,38 @@ host_default_memory_manager( assert(host_priv == &realhost); new_manager = *default_manager; - mutex_lock(&memory_manager_default_lock); + lck_mtx_lock(&memory_manager_default_lock); current_manager = memory_manager_default; + returned_manager = MEMORY_OBJECT_DEFAULT_NULL; - if (new_manager == IP_NULL) { + if (new_manager == MEMORY_OBJECT_DEFAULT_NULL) { /* * Retrieve the current value. */ - - returned_manager = ipc_port_copy_send(current_manager); + returned_manager = current_manager; + memory_object_default_reference(returned_manager); } else { + + /* + * If this is the first non-null manager, start + * up the internal pager support. + */ + if (current_manager == MEMORY_OBJECT_DEFAULT_NULL) { + result = vm_pageout_internal_start(); + if (result != KERN_SUCCESS) + goto out; + } + /* * Retrieve the current value, * and replace it with the supplied value. - * We consume the supplied naked send right. + * We return the old reference to the caller + * but we have to take a reference on the new + * one. */ - returned_manager = current_manager; memory_manager_default = new_manager; - if (cluster_size % PAGE_SIZE != 0) { -#if 0 - mutex_unlock(&memory_manager_default_lock); - return KERN_INVALID_ARGUMENT; -#else - cluster_size = round_page(cluster_size); -#endif - } - memory_manager_default_cluster = cluster_size; + memory_object_default_reference(new_manager); /* * In case anyone's been waiting for a memory @@ -1450,12 +1792,24 @@ host_default_memory_manager( */ thread_wakeup((event_t) &memory_manager_default); - } - mutex_unlock(&memory_manager_default_lock); +#ifndef CONFIG_FREEZE + /* + * Now that we have a default pager for anonymous memory, + * reactivate all the throttled pages (i.e. dirty pages with + * no pager). + */ + if (current_manager == MEMORY_OBJECT_DEFAULT_NULL) + { + vm_page_reactivate_all_throttled(); + } +#endif + } + out: + lck_mtx_unlock(&memory_manager_default_lock); *default_manager = returned_manager; - return(KERN_SUCCESS); + return(result); } /* @@ -1466,62 +1820,29 @@ host_default_memory_manager( * valid (not IP_NULL or IP_DEAD). */ -ipc_port_t -memory_manager_default_reference( - vm_size_t *cluster_size) +__private_extern__ memory_object_default_t +memory_manager_default_reference(void) { - ipc_port_t current_manager; + memory_object_default_t current_manager; - mutex_lock(&memory_manager_default_lock); - - while (current_manager = ipc_port_copy_send(memory_manager_default), - !IP_VALID(current_manager)) { - thread_sleep_mutex((event_t) &memory_manager_default, - &memory_manager_default_lock, THREAD_UNINT); - mutex_lock(&memory_manager_default_lock); + lck_mtx_lock(&memory_manager_default_lock); + current_manager = memory_manager_default; + while (current_manager == MEMORY_OBJECT_DEFAULT_NULL) { + wait_result_t res; + + res = lck_mtx_sleep(&memory_manager_default_lock, + LCK_SLEEP_DEFAULT, + (event_t) &memory_manager_default, + THREAD_UNINT); + assert(res == THREAD_AWAKENED); + current_manager = memory_manager_default; } - *cluster_size = memory_manager_default_cluster; - - mutex_unlock(&memory_manager_default_lock); + memory_object_default_reference(current_manager); + lck_mtx_unlock(&memory_manager_default_lock); return current_manager; } -/* - * Routine: memory_manager_default_port - * Purpose: - * Returns true if the receiver for the port - * is the default memory manager. - * - * This is a hack to let ds_read_done - * know when it should keep memory wired. - */ - -boolean_t -memory_manager_default_port( - ipc_port_t port) -{ - ipc_port_t current; - boolean_t result; - - mutex_lock(&memory_manager_default_lock); - current = memory_manager_default; - if (IP_VALID(current)) { - /* - * There is no point in bothering to lock - * both ports, which would be painful to do. - * If the receive rights are moving around, - * we might be inaccurate. - */ - - result = port->ip_receiver == current->ip_receiver; - } else - result = FALSE; - mutex_unlock(&memory_manager_default_lock); - - return result; -} - /* * Routine: memory_manager_default_check * @@ -1534,203 +1855,520 @@ memory_manager_default_port( * but only the first time. * */ -kern_return_t +__private_extern__ kern_return_t memory_manager_default_check(void) { - ipc_port_t current; + memory_object_default_t current; - mutex_lock(&memory_manager_default_lock); + lck_mtx_lock(&memory_manager_default_lock); current = memory_manager_default; - if (!IP_VALID(current)) { + if (current == MEMORY_OBJECT_DEFAULT_NULL) { static boolean_t logged; /* initialized to 0 */ boolean_t complain = !logged; logged = TRUE; - mutex_unlock(&memory_manager_default_lock); + lck_mtx_unlock(&memory_manager_default_lock); if (complain) printf("Warning: No default memory manager\n"); return(KERN_FAILURE); } else { - mutex_unlock(&memory_manager_default_lock); + lck_mtx_unlock(&memory_manager_default_lock); return(KERN_SUCCESS); } } -void +__private_extern__ void memory_manager_default_init(void) { - memory_manager_default = IP_NULL; - mutex_init(&memory_manager_default_lock, ETAP_VM_MEMMAN); + memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL; + lck_mtx_init(&memory_manager_default_lock, &vm_object_lck_grp, &vm_object_lck_attr); } -void -memory_object_deactivate_pages( - vm_object_t object, - vm_object_offset_t offset, - vm_object_size_t size, - boolean_t kill_page) -{ - vm_object_t orig_object; - int pages_moved = 0; - int pages_found = 0; - /* - * entered with object lock held, acquire a paging reference to - * prevent the memory_object and control ports from - * being destroyed. - */ - orig_object = object; +/* Allow manipulation of individual page state. This is actually part of */ +/* the UPL regimen but takes place on the object rather than on a UPL */ - for (;;) { - register vm_page_t m; - vm_object_offset_t toffset; - vm_object_size_t tsize; +kern_return_t +memory_object_page_op( + memory_object_control_t control, + memory_object_offset_t offset, + int ops, + ppnum_t *phys_entry, + int *flags) +{ + vm_object_t object; - vm_object_paging_begin(object); - vm_page_lock_queues(); + object = memory_object_control_to_vm_object(control); + if (object == VM_OBJECT_NULL) + return (KERN_INVALID_ARGUMENT); - for (tsize = size, toffset = offset; tsize; tsize -= PAGE_SIZE, toffset += PAGE_SIZE) { + return vm_object_page_op(object, offset, ops, phys_entry, flags); +} - if ((m = vm_page_lookup(object, toffset)) != VM_PAGE_NULL) { +/* + * memory_object_range_op offers performance enhancement over + * memory_object_page_op for page_op functions which do not require page + * level state to be returned from the call. Page_op was created to provide + * a low-cost alternative to page manipulation via UPLs when only a single + * page was involved. The range_op call establishes the ability in the _op + * family of functions to work on multiple pages where the lack of page level + * state handling allows the caller to avoid the overhead of the upl structures. + */ - pages_found++; +kern_return_t +memory_object_range_op( + memory_object_control_t control, + memory_object_offset_t offset_beg, + memory_object_offset_t offset_end, + int ops, + int *range) +{ + vm_object_t object; - if ((m->wire_count == 0) && (!m->private) && (!m->gobbled) && (!m->busy)) { + object = memory_object_control_to_vm_object(control); + if (object == VM_OBJECT_NULL) + return (KERN_INVALID_ARGUMENT); - m->reference = FALSE; - pmap_clear_reference(m->phys_addr); + return vm_object_range_op(object, + offset_beg, + offset_end, + ops, + (uint32_t *) range); +} - if ((kill_page) && (object->internal)) { - m->precious = FALSE; - m->dirty = FALSE; - pmap_clear_modify(m->phys_addr); - vm_external_state_clr(object->existence_map, offset); - } - VM_PAGE_QUEUES_REMOVE(m); - queue_enter_first(&vm_page_queue_inactive, m, vm_page_t, pageq); +void +memory_object_mark_used( + memory_object_control_t control) +{ + vm_object_t object; - m->inactive = TRUE; - if (!m->fictitious) - vm_page_inactive_count++; + if (control == NULL) + return; - pages_moved++; - } - } - } - vm_page_unlock_queues(); - vm_object_paging_end(object); + object = memory_object_control_to_vm_object(control); + + if (object != VM_OBJECT_NULL) + vm_object_cache_remove(object); +} - if (object->shadow) { - vm_object_t tmp_object; - kill_page = 0; +void +memory_object_mark_unused( + memory_object_control_t control, + __unused boolean_t rage) +{ + vm_object_t object; - offset += object->shadow_offset; + if (control == NULL) + return; - tmp_object = object->shadow; - vm_object_lock(tmp_object); + object = memory_object_control_to_vm_object(control); - if (object != orig_object) - vm_object_unlock(object); - object = tmp_object; - } else - break; - } - if (object != orig_object) - vm_object_unlock(object); + if (object != VM_OBJECT_NULL) + vm_object_cache_add(object); } -/* Allow manipulation of individual page state. This is actually part of */ -/* the UPL regimen but takes place on the object rather than on a UPL */ kern_return_t -memory_object_page_op( - vm_object_t object, - vm_object_offset_t offset, - int ops, - vm_offset_t *phys_entry, - int *flags) +memory_object_pages_resident( + memory_object_control_t control, + boolean_t * has_pages_resident) +{ + vm_object_t object; + + *has_pages_resident = FALSE; + + object = memory_object_control_to_vm_object(control); + if (object == VM_OBJECT_NULL) + return (KERN_INVALID_ARGUMENT); + + if (object->resident_page_count) + *has_pages_resident = TRUE; + + return (KERN_SUCCESS); +} + +kern_return_t +memory_object_signed( + memory_object_control_t control, + boolean_t is_signed) { - vm_page_t dst_page; + vm_object_t object; + + object = memory_object_control_to_vm_object(control); + if (object == VM_OBJECT_NULL) + return KERN_INVALID_ARGUMENT; vm_object_lock(object); + object->code_signed = is_signed; + vm_object_unlock(object); - while(TRUE) { - if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) { - vm_object_unlock(object); - return KERN_FAILURE; - } + return KERN_SUCCESS; +} - /* Sync up on getting the busy bit */ - if((dst_page->busy || dst_page->cleaning) && - (((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) { - /* someone else is playing with the page, we will */ - /* have to wait */ - PAGE_ASSERT_WAIT(dst_page, THREAD_UNINT); - vm_object_unlock(object); - thread_block((void(*)(void))0); - vm_object_lock(object); - continue; - } +boolean_t +memory_object_is_slid( + memory_object_control_t control) +{ + vm_object_t object = VM_OBJECT_NULL; + vm_object_t slide_object = slide_info.slide_object; - if (ops & UPL_POP_DUMP) { - vm_page_lock_queues(); - vm_page_free(dst_page); - vm_page_unlock_queues(); - break; - } + object = memory_object_control_to_vm_object(control); + if (object == VM_OBJECT_NULL) + return FALSE; - if (flags) { - *flags = 0; + return (object == slide_object); +} - /* Get the condition of flags before requested ops */ - /* are undertaken */ +static zone_t mem_obj_control_zone; - if(dst_page->dirty) *flags |= UPL_POP_DIRTY; - if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT; - if(dst_page->precious) *flags |= UPL_POP_PRECIOUS; - if(dst_page->absent) *flags |= UPL_POP_ABSENT; - if(dst_page->busy) *flags |= UPL_POP_BUSY; - } - if (phys_entry) - *phys_entry = dst_page->phys_addr; - - /* The caller should have made a call either contingent with */ - /* or prior to this call to set UPL_POP_BUSY */ - if(ops & UPL_POP_SET) { - /* The protection granted with this assert will */ - /* not be complete. If the caller violates the */ - /* convention and attempts to change page state */ - /* without first setting busy we may not see it */ - /* because the page may already be busy. However */ - /* if such violations occur we will assert sooner */ - /* or later. */ - assert(dst_page->busy || (ops & UPL_POP_BUSY)); - if (ops & UPL_POP_DIRTY) dst_page->dirty = TRUE; - if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE; - if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE; - if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE; - if (ops & UPL_POP_BUSY) dst_page->busy = TRUE; - } +__private_extern__ void +memory_object_control_bootstrap(void) +{ + int i; - if(ops & UPL_POP_CLR) { - assert(dst_page->busy); - if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE; - if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE; - if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE; - if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE; - if (ops & UPL_POP_BUSY) { - dst_page->busy = FALSE; - PAGE_WAKEUP(dst_page); - } - } - break; + i = (vm_size_t) sizeof (struct memory_object_control); + mem_obj_control_zone = zinit (i, 8192*i, 4096, "mem_obj_control"); + zone_change(mem_obj_control_zone, Z_CALLERACCT, FALSE); + zone_change(mem_obj_control_zone, Z_NOENCRYPT, TRUE); + return; +} + +__private_extern__ memory_object_control_t +memory_object_control_allocate( + vm_object_t object) +{ + memory_object_control_t control; + + control = (memory_object_control_t)zalloc(mem_obj_control_zone); + if (control != MEMORY_OBJECT_CONTROL_NULL) { + control->moc_object = object; + control->moc_ikot = IKOT_MEM_OBJ_CONTROL; /* fake ip_kotype */ } + return (control); +} - vm_object_unlock(object); - return KERN_SUCCESS; - +__private_extern__ void +memory_object_control_collapse( + memory_object_control_t control, + vm_object_t object) +{ + assert((control->moc_object != VM_OBJECT_NULL) && + (control->moc_object != object)); + control->moc_object = object; +} + +__private_extern__ vm_object_t +memory_object_control_to_vm_object( + memory_object_control_t control) +{ + if (control == MEMORY_OBJECT_CONTROL_NULL || + control->moc_ikot != IKOT_MEM_OBJ_CONTROL) + return VM_OBJECT_NULL; + + return (control->moc_object); +} + +memory_object_control_t +convert_port_to_mo_control( + __unused mach_port_t port) +{ + return MEMORY_OBJECT_CONTROL_NULL; +} + + +mach_port_t +convert_mo_control_to_port( + __unused memory_object_control_t control) +{ + return MACH_PORT_NULL; +} + +void +memory_object_control_reference( + __unused memory_object_control_t control) +{ + return; +} + +/* + * We only every issue one of these references, so kill it + * when that gets released (should switch the real reference + * counting in true port-less EMMI). + */ +void +memory_object_control_deallocate( + memory_object_control_t control) +{ + zfree(mem_obj_control_zone, control); +} + +void +memory_object_control_disable( + memory_object_control_t control) +{ + assert(control->moc_object != VM_OBJECT_NULL); + control->moc_object = VM_OBJECT_NULL; +} + +void +memory_object_default_reference( + memory_object_default_t dmm) +{ + ipc_port_make_send(dmm); +} + +void +memory_object_default_deallocate( + memory_object_default_t dmm) +{ + ipc_port_release_send(dmm); +} + +memory_object_t +convert_port_to_memory_object( + __unused mach_port_t port) +{ + return (MEMORY_OBJECT_NULL); +} + + +mach_port_t +convert_memory_object_to_port( + __unused memory_object_t object) +{ + return (MACH_PORT_NULL); +} + + +/* Routine memory_object_reference */ +void memory_object_reference( + memory_object_t memory_object) +{ + (memory_object->mo_pager_ops->memory_object_reference)( + memory_object); } +/* Routine memory_object_deallocate */ +void memory_object_deallocate( + memory_object_t memory_object) +{ + (memory_object->mo_pager_ops->memory_object_deallocate)( + memory_object); +} + + +/* Routine memory_object_init */ +kern_return_t memory_object_init +( + memory_object_t memory_object, + memory_object_control_t memory_control, + memory_object_cluster_size_t memory_object_page_size +) +{ + return (memory_object->mo_pager_ops->memory_object_init)( + memory_object, + memory_control, + memory_object_page_size); +} + +/* Routine memory_object_terminate */ +kern_return_t memory_object_terminate +( + memory_object_t memory_object +) +{ + return (memory_object->mo_pager_ops->memory_object_terminate)( + memory_object); +} + +/* Routine memory_object_data_request */ +kern_return_t memory_object_data_request +( + memory_object_t memory_object, + memory_object_offset_t offset, + memory_object_cluster_size_t length, + vm_prot_t desired_access, + memory_object_fault_info_t fault_info +) +{ + return (memory_object->mo_pager_ops->memory_object_data_request)( + memory_object, + offset, + length, + desired_access, + fault_info); +} + +/* Routine memory_object_data_return */ +kern_return_t memory_object_data_return +( + memory_object_t memory_object, + memory_object_offset_t offset, + memory_object_cluster_size_t size, + memory_object_offset_t *resid_offset, + int *io_error, + boolean_t dirty, + boolean_t kernel_copy, + int upl_flags +) +{ + return (memory_object->mo_pager_ops->memory_object_data_return)( + memory_object, + offset, + size, + resid_offset, + io_error, + dirty, + kernel_copy, + upl_flags); +} + +/* Routine memory_object_data_initialize */ +kern_return_t memory_object_data_initialize +( + memory_object_t memory_object, + memory_object_offset_t offset, + memory_object_cluster_size_t size +) +{ + return (memory_object->mo_pager_ops->memory_object_data_initialize)( + memory_object, + offset, + size); +} + +/* Routine memory_object_data_unlock */ +kern_return_t memory_object_data_unlock +( + memory_object_t memory_object, + memory_object_offset_t offset, + memory_object_size_t size, + vm_prot_t desired_access +) +{ + return (memory_object->mo_pager_ops->memory_object_data_unlock)( + memory_object, + offset, + size, + desired_access); +} + +/* Routine memory_object_synchronize */ +kern_return_t memory_object_synchronize +( + memory_object_t memory_object, + memory_object_offset_t offset, + memory_object_size_t size, + vm_sync_t sync_flags +) +{ + return (memory_object->mo_pager_ops->memory_object_synchronize)( + memory_object, + offset, + size, + sync_flags); +} + + +/* + * memory_object_map() is called by VM (in vm_map_enter() and its variants) + * each time a "named" VM object gets mapped directly or indirectly + * (copy-on-write mapping). A "named" VM object has an extra reference held + * by the pager to keep it alive until the pager decides that the + * memory object (and its VM object) can be reclaimed. + * VM calls memory_object_last_unmap() (in vm_object_deallocate()) when all + * the mappings of that memory object have been removed. + * + * For a given VM object, calls to memory_object_map() and memory_object_unmap() + * are serialized (through object->mapping_in_progress), to ensure that the + * pager gets a consistent view of the mapping status of the memory object. + * + * This allows the pager to keep track of how many times a memory object + * has been mapped and with which protections, to decide when it can be + * reclaimed. + */ + +/* Routine memory_object_map */ +kern_return_t memory_object_map +( + memory_object_t memory_object, + vm_prot_t prot +) +{ + return (memory_object->mo_pager_ops->memory_object_map)( + memory_object, + prot); +} + +/* Routine memory_object_last_unmap */ +kern_return_t memory_object_last_unmap +( + memory_object_t memory_object +) +{ + return (memory_object->mo_pager_ops->memory_object_last_unmap)( + memory_object); +} + +/* Routine memory_object_data_reclaim */ +kern_return_t memory_object_data_reclaim +( + memory_object_t memory_object, + boolean_t reclaim_backing_store +) +{ + if (memory_object->mo_pager_ops->memory_object_data_reclaim == NULL) + return KERN_NOT_SUPPORTED; + return (memory_object->mo_pager_ops->memory_object_data_reclaim)( + memory_object, + reclaim_backing_store); +} +/* Routine memory_object_create */ +kern_return_t memory_object_create +( + memory_object_default_t default_memory_manager, + vm_size_t new_memory_object_size, + memory_object_t *new_memory_object +) +{ + return default_pager_memory_object_create(default_memory_manager, + new_memory_object_size, + new_memory_object); +} + +upl_t +convert_port_to_upl( + ipc_port_t port) +{ + upl_t upl; + + ip_lock(port); + if (!ip_active(port) || (ip_kotype(port) != IKOT_UPL)) { + ip_unlock(port); + return (upl_t)NULL; + } + upl = (upl_t) port->ip_kobject; + ip_unlock(port); + upl_lock(upl); + upl->ref_count+=1; + upl_unlock(upl); + return upl; +} + +mach_port_t +convert_upl_to_port( + __unused upl_t upl) +{ + return MACH_PORT_NULL; +} + +__private_extern__ void +upl_no_senders( + __unused ipc_port_t port, + __unused mach_port_mscount_t mscount) +{ + return; +}