X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/378393581903b274cb7a4d18e0d978071a6b592d..c18c124eaa464aaaa5549e99e5a70fc9cbb50944:/osfmk/vm/vm_user.c diff --git a/osfmk/vm/vm_user.c b/osfmk/vm/vm_user.c index a659f45a9..024140fb5 100644 --- a/osfmk/vm/vm_user.c +++ b/osfmk/vm/vm_user.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2007 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ @@ -56,6 +62,29 @@ * User-exported virtual memory functions. */ +/* + * There are three implementations of the "XXX_allocate" functionality in + * the kernel: mach_vm_allocate (for any task on the platform), vm_allocate + * (for a task with the same address space size, especially the current task), + * and vm32_vm_allocate (for the specific case of a 32-bit task). vm_allocate + * in the kernel should only be used on the kernel_task. vm32_vm_allocate only + * makes sense on platforms where a user task can either be 32 or 64, or the kernel + * task can be 32 or 64. mach_vm_allocate makes sense everywhere, and is preferred + * for new code. + * + * The entrypoints into the kernel are more complex. All platforms support a + * mach_vm_allocate-style API (subsystem 4800) which operates with the largest + * size types for the platform. On platforms that only support U32/K32, + * subsystem 4800 is all you need. On platforms that support both U32 and U64, + * subsystem 3800 is used disambiguate the size of parameters, and they will + * always be 32-bit and call into the vm32_vm_allocate APIs. On non-U32/K32 platforms, + * the MIG glue should never call into vm_allocate directly, because the calling + * task and kernel_task are unlikely to use the same size parameters + * + * New VM call implementations should be added here and to mach_vm.defs + * (subsystem 4800), and use mach_vm_* "wide" types. + */ + #include #include @@ -72,9 +101,7 @@ #include #include -#include #include -#include #include #include @@ -87,6 +114,7 @@ #include #include #include +#include vm_size_t upl_offset_to_pagelist = 0; @@ -110,7 +138,11 @@ mach_vm_allocate( vm_map_offset_t map_addr; vm_map_size_t map_size; kern_return_t result; - boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); + boolean_t anywhere; + + /* filter out any kernel-only flags */ + if (flags & ~VM_FLAGS_USER_ALLOCATE) + return KERN_INVALID_ARGUMENT; if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); @@ -119,6 +151,7 @@ mach_vm_allocate( return(KERN_SUCCESS); } + anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); if (anywhere) { /* * No specific address requested, so start candidate address @@ -132,10 +165,12 @@ mach_vm_allocate( */ map_addr = vm_map_min(map); if (map_addr == 0) - map_addr += PAGE_SIZE; + map_addr += VM_MAP_PAGE_SIZE(map); } else - map_addr = vm_map_trunc_page(*addr); - map_size = vm_map_round_page(size); + map_addr = vm_map_trunc_page(*addr, + VM_MAP_PAGE_MASK(map)); + map_size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(map)); if (map_size == 0) { return(KERN_INVALID_ARGUMENT); } @@ -172,7 +207,11 @@ vm_allocate( vm_map_offset_t map_addr; vm_map_size_t map_size; kern_return_t result; - boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); + boolean_t anywhere; + + /* filter out any kernel-only flags */ + if (flags & ~VM_FLAGS_USER_ALLOCATE) + return KERN_INVALID_ARGUMENT; if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); @@ -181,6 +220,7 @@ vm_allocate( return(KERN_SUCCESS); } + anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); if (anywhere) { /* * No specific address requested, so start candidate address @@ -194,10 +234,12 @@ vm_allocate( */ map_addr = vm_map_min(map); if (map_addr == 0) - map_addr += PAGE_SIZE; + map_addr += VM_MAP_PAGE_SIZE(map); } else - map_addr = vm_map_trunc_page(*addr); - map_size = vm_map_round_page(size); + map_addr = vm_map_trunc_page(*addr, + VM_MAP_PAGE_MASK(map)); + map_size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(map)); if (map_size == 0) { return(KERN_INVALID_ARGUMENT); } @@ -236,8 +278,12 @@ mach_vm_deallocate( if (size == (mach_vm_offset_t) 0) return(KERN_SUCCESS); - return(vm_map_remove(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), VM_MAP_NO_FLAGS)); + return(vm_map_remove(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + VM_MAP_NO_FLAGS)); } /* @@ -258,8 +304,12 @@ vm_deallocate( if (size == (vm_offset_t) 0) return(KERN_SUCCESS); - return(vm_map_remove(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), VM_MAP_NO_FLAGS)); + return(vm_map_remove(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + VM_MAP_NO_FLAGS)); } /* @@ -282,8 +332,10 @@ mach_vm_inherit( return KERN_SUCCESS; return(vm_map_inherit(map, - vm_map_trunc_page(start), - vm_map_round_page(start+size), + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), new_inheritance)); } @@ -307,8 +359,10 @@ vm_inherit( return KERN_SUCCESS; return(vm_map_inherit(map, - vm_map_trunc_page(start), - vm_map_round_page(start+size), + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), new_inheritance)); } @@ -334,8 +388,10 @@ mach_vm_protect( return KERN_SUCCESS; return(vm_map_protect(map, - vm_map_trunc_page(start), - vm_map_round_page(start+size), + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), new_protection, set_maximum)); } @@ -363,8 +419,10 @@ vm_protect( return KERN_SUCCESS; return(vm_map_protect(map, - vm_map_trunc_page(start), - vm_map_round_page(start+size), + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), new_protection, set_maximum)); } @@ -388,11 +446,14 @@ mach_vm_machine_attribute( if (size == 0) return KERN_SUCCESS; - return vm_map_machine_attribute(map, - vm_map_trunc_page(addr), - vm_map_round_page(addr+size), - attribute, - value); + return vm_map_machine_attribute( + map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(addr+size, + VM_MAP_PAGE_MASK(map)), + attribute, + value); } /* @@ -415,11 +476,14 @@ vm_machine_attribute( if (size == 0) return KERN_SUCCESS; - return vm_map_machine_attribute(map, - vm_map_trunc_page(addr), - vm_map_round_page(addr+size), - attribute, - value); + return vm_map_machine_attribute( + map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(addr+size, + VM_MAP_PAGE_MASK(map)), + attribute, + value); } /* @@ -449,6 +513,8 @@ mach_vm_read( if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); + if ((mach_msg_type_number_t) size != size) + return KERN_INVALID_ARGUMENT; error = vm_map_copyin(map, (vm_map_address_t)addr, @@ -458,7 +524,8 @@ mach_vm_read( if (KERN_SUCCESS == error) { *data = (pointer_t) ipc_address; - *data_size = size; + *data_size = (mach_msg_type_number_t) size; + assert(*data_size == size); } return(error); } @@ -487,6 +554,16 @@ vm_read( if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); + if (size > (unsigned)(mach_msg_type_number_t) -1) { + /* + * The kernel could handle a 64-bit "size" value, but + * it could not return the size of the data in "*data_size" + * without overflowing. + * Let's reject this "size" as invalid. + */ + return KERN_INVALID_ARGUMENT; + } + error = vm_map_copyin(map, (vm_map_address_t)addr, (vm_map_size_t)size, @@ -495,7 +572,8 @@ vm_read( if (KERN_SUCCESS == error) { *data = (pointer_t) ipc_address; - *data_size = size; + *data_size = (mach_msg_type_number_t) size; + assert(*data_size == size); } return(error); } @@ -518,7 +596,8 @@ mach_vm_read_list( kern_return_t error; vm_map_copy_t copy; - if (map == VM_MAP_NULL) + if (map == VM_MAP_NULL || + count > VM_MAP_ENTRY_MAX) return(KERN_INVALID_ARGUMENT); error = KERN_SUCCESS; @@ -582,7 +661,8 @@ vm_read_list( kern_return_t error; vm_map_copy_t copy; - if (map == VM_MAP_NULL) + if (map == VM_MAP_NULL || + count > VM_MAP_ENTRY_MAX) return(KERN_INVALID_ARGUMENT); error = KERN_SUCCESS; @@ -828,276 +908,29 @@ mach_vm_map( vm_prot_t max_protection, vm_inherit_t inheritance) { - vm_map_address_t map_addr; - vm_map_size_t map_size; - vm_object_t object; - vm_object_size_t size; - kern_return_t result; - - /* - * Check arguments for validity - */ - if ((target_map == VM_MAP_NULL) || - (cur_protection & ~VM_PROT_ALL) || - (max_protection & ~VM_PROT_ALL) || - (inheritance > VM_INHERIT_LAST_VALID) || - initial_size == 0) - return(KERN_INVALID_ARGUMENT); - - map_addr = vm_map_trunc_page(*address); - map_size = vm_map_round_page(initial_size); - size = vm_object_round_page(initial_size); - - /* - * Find the vm object (if any) corresponding to this port. - */ - if (!IP_VALID(port)) { - object = VM_OBJECT_NULL; - offset = 0; - copy = FALSE; - } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) { - vm_named_entry_t named_entry; - - named_entry = (vm_named_entry_t)port->ip_kobject; - /* a few checks to make sure user is obeying rules */ - if(size == 0) { - if(offset >= named_entry->size) - return(KERN_INVALID_RIGHT); - size = named_entry->size - offset; - } - if((named_entry->protection & max_protection) != max_protection) - return(KERN_INVALID_RIGHT); - if((named_entry->protection & cur_protection) != cur_protection) - return(KERN_INVALID_RIGHT); - if(named_entry->size < (offset + size)) - return(KERN_INVALID_ARGUMENT); - - /* the callers parameter offset is defined to be the */ - /* offset from beginning of named entry offset in object */ - offset = offset + named_entry->offset; - - named_entry_lock(named_entry); - if(named_entry->is_sub_map) { - vm_map_entry_t map_entry; - - named_entry_unlock(named_entry); - vm_object_reference(vm_submap_object); - if ((result = vm_map_enter(target_map, - &map_addr, map_size, - (vm_map_offset_t)mask, flags, - vm_submap_object, 0, - FALSE, - cur_protection, max_protection, inheritance - )) != KERN_SUCCESS) { - vm_object_deallocate(vm_submap_object); - } else { - char alias; - - VM_GET_FLAGS_ALIAS(flags, alias); - if ((alias == VM_MEMORY_SHARED_PMAP) && - !copy) { - vm_map_submap(target_map, map_addr, - map_addr + map_size, - named_entry->backing.map, - (vm_map_offset_t)offset, TRUE); - } else { - vm_map_submap(target_map, map_addr, - map_addr + map_size, - named_entry->backing.map, - (vm_map_offset_t)offset, FALSE); - } - if(copy) { - if(vm_map_lookup_entry( - target_map, map_addr, &map_entry)) { - map_entry->needs_copy = TRUE; - } - } - *address = map_addr; - } - return(result); - - } else if (named_entry->is_pager) { - unsigned int access; - vm_prot_t protections; - unsigned int wimg_mode; - boolean_t cache_attr; - - protections = named_entry->protection - & VM_PROT_ALL; - access = GET_MAP_MEM(named_entry->protection); - - object = vm_object_enter( - named_entry->backing.pager, - named_entry->size, - named_entry->internal, - FALSE, - FALSE); - if (object == VM_OBJECT_NULL) { - named_entry_unlock(named_entry); - return(KERN_INVALID_OBJECT); - } - - /* JMM - drop reference on pager here */ - - /* create an extra ref for the named entry */ - vm_object_lock(object); - vm_object_reference_locked(object); - named_entry->backing.object = object; - named_entry->is_pager = FALSE; - named_entry_unlock(named_entry); - - wimg_mode = object->wimg_bits; - if(access == MAP_MEM_IO) { - wimg_mode = VM_WIMG_IO; - } else if (access == MAP_MEM_COPYBACK) { - wimg_mode = VM_WIMG_USE_DEFAULT; - } else if (access == MAP_MEM_WTHRU) { - wimg_mode = VM_WIMG_WTHRU; - } else if (access == MAP_MEM_WCOMB) { - wimg_mode = VM_WIMG_WCOMB; - } - if ((wimg_mode == VM_WIMG_IO) - || (wimg_mode == VM_WIMG_WCOMB)) - cache_attr = TRUE; - else - cache_attr = FALSE; - - /* wait for object (if any) to be ready */ - if (!named_entry->internal) { - while (!object->pager_ready) { - vm_object_wait(object, - VM_OBJECT_EVENT_PAGER_READY, - THREAD_UNINT); - vm_object_lock(object); - } - } - - if(object->wimg_bits != wimg_mode) { - vm_page_t p; - - vm_object_paging_wait(object, THREAD_UNINT); - - object->wimg_bits = wimg_mode; - queue_iterate(&object->memq, p, vm_page_t, listq) { - if (!p->fictitious) { - pmap_disconnect(p->phys_page); - if (cache_attr) - pmap_sync_page_attributes_phys(p->phys_page); - } - } - } - object->true_share = TRUE; - if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) - object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; - vm_object_unlock(object); - } else { - /* This is the case where we are going to map */ - /* an already mapped object. If the object is */ - /* not ready it is internal. An external */ - /* object cannot be mapped until it is ready */ - /* we can therefore avoid the ready check */ - /* in this case. */ - object = named_entry->backing.object; - assert(object != VM_OBJECT_NULL); - named_entry_unlock(named_entry); - vm_object_reference(object); - } - } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) { - /* - * JMM - This is temporary until we unify named entries - * and raw memory objects. - * - * Detected fake ip_kotype for a memory object. In - * this case, the port isn't really a port at all, but - * instead is just a raw memory object. - */ - - if ((object = vm_object_enter((memory_object_t)port, - size, FALSE, FALSE, FALSE)) - == VM_OBJECT_NULL) - return(KERN_INVALID_OBJECT); - - /* wait for object (if any) to be ready */ - if (object != VM_OBJECT_NULL) { - if(object == kernel_object) { - printf("Warning: Attempt to map kernel object" - " by a non-private kernel entity\n"); - return(KERN_INVALID_OBJECT); - } - vm_object_lock(object); - while (!object->pager_ready) { - vm_object_wait(object, - VM_OBJECT_EVENT_PAGER_READY, - THREAD_UNINT); - vm_object_lock(object); - } - vm_object_unlock(object); - } - } else { - return (KERN_INVALID_OBJECT); - } - - /* - * Perform the copy if requested - */ - - if (copy) { - vm_object_t new_object; - vm_object_offset_t new_offset; - - result = vm_object_copy_strategically(object, offset, size, - &new_object, &new_offset, - ©); - - - if (result == KERN_MEMORY_RESTART_COPY) { - boolean_t success; - boolean_t src_needs_copy; - - /* - * XXX - * We currently ignore src_needs_copy. - * This really is the issue of how to make - * MEMORY_OBJECT_COPY_SYMMETRIC safe for - * non-kernel users to use. Solution forthcoming. - * In the meantime, since we don't allow non-kernel - * memory managers to specify symmetric copy, - * we won't run into problems here. - */ - new_object = object; - new_offset = offset; - success = vm_object_copy_quickly(&new_object, - new_offset, size, - &src_needs_copy, - ©); - assert(success); - result = KERN_SUCCESS; - } - /* - * Throw away the reference to the - * original object, as it won't be mapped. - */ - - vm_object_deallocate(object); + kern_return_t kr; + vm_map_offset_t vmmaddr; - if (result != KERN_SUCCESS) - return (result); + vmmaddr = (vm_map_offset_t) *address; - object = new_object; - offset = new_offset; - } + /* filter out any kernel-only flags */ + if (flags & ~VM_FLAGS_USER_MAP) + return KERN_INVALID_ARGUMENT; - if ((result = vm_map_enter(target_map, - &map_addr, map_size, - (vm_map_offset_t)mask, - flags, - object, offset, - copy, - cur_protection, max_protection, inheritance - )) != KERN_SUCCESS) - vm_object_deallocate(object); - *address = map_addr; - return(result); + kr = vm_map_enter_mem_object(target_map, + &vmmaddr, + initial_size, + mask, + flags, + port, + offset, + copy, + cur_protection, + max_protection, + inheritance); + + *address = vmmaddr; + return kr; } @@ -1128,7 +961,7 @@ vm_map_64( kr = mach_vm_map(target_map, &map_addr, map_size, map_mask, flags, port, offset, copy, cur_protection, max_protection, inheritance); - *address = CAST_DOWN(vm_address_t, map_addr); + *address = CAST_DOWN(vm_offset_t, map_addr); return kr; } @@ -1161,7 +994,7 @@ vm_map( kr = mach_vm_map(target_map, &map_addr, map_size, map_mask, flags, port, obj_offset, copy, cur_protection, max_protection, inheritance); - *address = CAST_DOWN(vm_address_t, map_addr); + *address = CAST_DOWN(vm_offset_t, map_addr); return kr; } @@ -1179,7 +1012,7 @@ mach_vm_remap( mach_vm_offset_t *address, mach_vm_size_t size, mach_vm_offset_t mask, - boolean_t anywhere, + int flags, vm_map_t src_map, mach_vm_offset_t memory_address, boolean_t copy, @@ -1193,13 +1026,17 @@ mach_vm_remap( if (VM_MAP_NULL == target_map || VM_MAP_NULL == src_map) return KERN_INVALID_ARGUMENT; + /* filter out any kernel-only flags */ + if (flags & ~VM_FLAGS_USER_REMAP) + return KERN_INVALID_ARGUMENT; + map_addr = (vm_map_offset_t)*address; kr = vm_map_remap(target_map, &map_addr, size, mask, - anywhere, + flags, src_map, memory_address, copy, @@ -1227,7 +1064,7 @@ vm_remap( vm_offset_t *address, vm_size_t size, vm_offset_t mask, - boolean_t anywhere, + int flags, vm_map_t src_map, vm_offset_t memory_address, boolean_t copy, @@ -1241,13 +1078,17 @@ vm_remap( if (VM_MAP_NULL == target_map || VM_MAP_NULL == src_map) return KERN_INVALID_ARGUMENT; + /* filter out any kernel-only flags */ + if (flags & ~VM_FLAGS_USER_REMAP) + return KERN_INVALID_ARGUMENT; + map_addr = (vm_map_offset_t)*address; kr = vm_map_remap(target_map, &map_addr, size, mask, - anywhere, + flags, src_map, memory_address, copy, @@ -1289,15 +1130,24 @@ mach_vm_wire( if (map == VM_MAP_NULL) return KERN_INVALID_TASK; - if (access & ~VM_PROT_ALL) + if (access & ~VM_PROT_ALL || (start + size < start)) return KERN_INVALID_ARGUMENT; if (access != VM_PROT_NONE) { - rc = vm_map_wire(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), access, TRUE); + rc = vm_map_wire(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + access, + TRUE); } else { - rc = vm_map_unwire(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), TRUE); + rc = vm_map_unwire(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + TRUE); } return rc; } @@ -1334,11 +1184,20 @@ vm_wire( if (size == 0) { rc = KERN_SUCCESS; } else if (access != VM_PROT_NONE) { - rc = vm_map_wire(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), access, TRUE); + rc = vm_map_wire(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + access, + TRUE); } else { - rc = vm_map_unwire(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), TRUE); + rc = vm_map_unwire(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + TRUE); } return rc; } @@ -1439,6 +1298,32 @@ vm_msync( } +int +vm_toggle_entry_reuse(int toggle, int *old_value) +{ + vm_map_t map = current_map(); + + if(toggle == VM_TOGGLE_GETVALUE && old_value != NULL){ + *old_value = map->disable_vmentry_reuse; + } else if(toggle == VM_TOGGLE_SET){ + vm_map_lock(map); + map->disable_vmentry_reuse = TRUE; + if (map->first_free == vm_map_to_entry(map)) { + map->highest_entry_end = vm_map_min(map); + } else { + map->highest_entry_end = map->first_free->vme_end; + } + vm_map_unlock(map); + } else if (toggle == VM_TOGGLE_CLEAR){ + vm_map_lock(map); + map->disable_vmentry_reuse = FALSE; + vm_map_unlock(map); + } else + return KERN_INVALID_ARGUMENT; + + return KERN_SUCCESS; +} + /* * mach_vm_behavior_set * @@ -1461,8 +1346,12 @@ mach_vm_behavior_set( if (size == 0) return KERN_SUCCESS; - return(vm_map_behavior_set(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), new_behavior)); + return(vm_map_behavior_set(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + new_behavior)); } /* @@ -1491,8 +1380,12 @@ vm_behavior_set( if (size == 0) return KERN_SUCCESS; - return(vm_map_behavior_set(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), new_behavior)); + return(vm_map_behavior_set(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + new_behavior)); } /* @@ -1758,6 +1651,22 @@ vm_region_recurse( return kr; } +kern_return_t +mach_vm_purgable_control( + vm_map_t map, + mach_vm_offset_t address, + vm_purgable_t control, + int *state) +{ + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; + + return vm_map_purgable_control(map, + vm_map_trunc_page(address, PAGE_MASK), + control, + state); +} + kern_return_t vm_purgable_control( vm_map_t map, @@ -1769,7 +1678,7 @@ vm_purgable_control( return KERN_INVALID_ARGUMENT; return vm_map_purgable_control(map, - vm_map_trunc_page(address), + vm_map_trunc_page(address, PAGE_MASK), control, state); } @@ -1834,9 +1743,10 @@ mach_vm_page_query( if (VM_MAP_NULL == map) return KERN_INVALID_ARGUMENT; - return vm_map_page_info(map, - vm_map_trunc_page(offset), - disposition, ref_count); + return vm_map_page_query_internal( + map, + vm_map_trunc_page(offset, PAGE_MASK), + disposition, ref_count); } kern_return_t @@ -1849,9 +1759,28 @@ vm_map_page_query( if (VM_MAP_NULL == map) return KERN_INVALID_ARGUMENT; - return vm_map_page_info(map, - vm_map_trunc_page(offset), - disposition, ref_count); + return vm_map_page_query_internal( + map, + vm_map_trunc_page(offset, PAGE_MASK), + disposition, ref_count); +} + +kern_return_t +mach_vm_page_info( + vm_map_t map, + mach_vm_address_t address, + vm_page_info_flavor_t flavor, + vm_page_info_t info, + mach_msg_type_number_t *count) +{ + kern_return_t kr; + + if (map == VM_MAP_NULL) { + return KERN_INVALID_ARGUMENT; + } + + kr = vm_map_page_info(map, address, flavor, info, count); + return kr; } /* map a (whole) upl into an address space */ @@ -1859,7 +1788,7 @@ kern_return_t vm_upl_map( vm_map_t map, upl_t upl, - vm_offset_t *dst_addr) + vm_address_t *dst_addr) { vm_map_offset_t map_addr; kern_return_t kr; @@ -1868,7 +1797,7 @@ vm_upl_map( return KERN_INVALID_ARGUMENT; kr = vm_map_enter_upl(map, upl, &map_addr); - *dst_addr = CAST_DOWN(vm_offset_t, map_addr); + *dst_addr = CAST_DOWN(vm_address_t, map_addr); return kr; } @@ -1918,12 +1847,6 @@ vm_map_get_upl( return kr; } - -__private_extern__ kern_return_t -mach_memory_entry_allocate( - vm_named_entry_t *user_entry_p, - ipc_port_t *user_handle_p); /* forward */ - /* * mach_make_memory_entry_64 * @@ -1953,8 +1876,7 @@ mach_make_memory_entry_64( boolean_t wired; vm_object_offset_t obj_off; vm_prot_t prot; - vm_map_offset_t lo_offset, hi_offset; - vm_behavior_t behavior; + struct vm_object_fault_info fault_info; vm_object_t object; vm_object_t shadow_object; @@ -1969,16 +1891,28 @@ mach_make_memory_entry_64( vm_map_offset_t local_offset; vm_object_size_t mappable_size; + /* + * Stash the offset in the page for use by vm_map_enter_mem_object() + * in the VM_FLAGS_RETURN_DATA_ADDR/MAP_MEM_USE_DATA_ADDR case. + */ + vm_object_offset_t offset_in_page; + unsigned int access; vm_prot_t protections; + vm_prot_t original_protections, mask_protections; unsigned int wimg_mode; - boolean_t cache_attr = FALSE; + + boolean_t force_shadow = FALSE; + boolean_t use_data_addr; if (((permission & 0x00FF0000) & ~(MAP_MEM_ONLY | MAP_MEM_NAMED_CREATE | MAP_MEM_PURGABLE | - MAP_MEM_NAMED_REUSE))) { + MAP_MEM_NAMED_REUSE | + MAP_MEM_USE_DATA_ADDR | + MAP_MEM_VM_COPY | + MAP_MEM_VM_SHARE))) { /* * Unknown flag: reject for forward compatibility. */ @@ -1992,28 +1926,37 @@ mach_make_memory_entry_64( parent_entry = NULL; } - protections = permission & VM_PROT_ALL; + if (parent_entry && parent_entry->is_copy) { + return KERN_INVALID_ARGUMENT; + } + + original_protections = permission & VM_PROT_ALL; + protections = original_protections; + mask_protections = permission & VM_PROT_IS_MASK; access = GET_MAP_MEM(permission); + use_data_addr = ((permission & MAP_MEM_USE_DATA_ADDR) != 0); user_handle = IP_NULL; user_entry = NULL; - map_offset = vm_map_trunc_page(offset); - map_size = vm_map_round_page(*size); + map_offset = vm_map_trunc_page(offset, PAGE_MASK); if (permission & MAP_MEM_ONLY) { boolean_t parent_is_object; - if (parent_entry == NULL) { + map_size = vm_map_round_page(*size, PAGE_MASK); + + if (use_data_addr || parent_entry == NULL) { return KERN_INVALID_ARGUMENT; } - parent_is_object = !(parent_entry->is_sub_map || parent_entry->is_pager); + parent_is_object = !(parent_entry->is_sub_map || + parent_entry->is_pager); object = parent_entry->backing.object; if(parent_is_object && object != VM_OBJECT_NULL) wimg_mode = object->wimg_bits; else - wimg_mode = VM_WIMG_DEFAULT; + wimg_mode = VM_WIMG_USE_DEFAULT; if((access != GET_MAP_MEM(parent_entry->protection)) && !(parent_entry->protection & VM_PROT_WRITE)) { return KERN_INVALID_RIGHT; @@ -2023,7 +1966,10 @@ mach_make_memory_entry_64( wimg_mode = VM_WIMG_IO; } else if (access == MAP_MEM_COPYBACK) { SET_MAP_MEM(access, parent_entry->protection); - wimg_mode = VM_WIMG_DEFAULT; + wimg_mode = VM_WIMG_USE_DEFAULT; + } else if (access == MAP_MEM_INNERWBACK) { + SET_MAP_MEM(access, parent_entry->protection); + wimg_mode = VM_WIMG_INNERWBACK; } else if (access == MAP_MEM_WTHRU) { SET_MAP_MEM(access, parent_entry->protection); wimg_mode = VM_WIMG_WTHRU; @@ -2031,36 +1977,26 @@ mach_make_memory_entry_64( SET_MAP_MEM(access, parent_entry->protection); wimg_mode = VM_WIMG_WCOMB; } - if(parent_is_object && object && + if (parent_is_object && object && (access != MAP_MEM_NOOP) && (!(object->nophyscache))) { - if(object->wimg_bits != wimg_mode) { - vm_page_t p; - if ((wimg_mode == VM_WIMG_IO) - || (wimg_mode == VM_WIMG_WCOMB)) - cache_attr = TRUE; - else - cache_attr = FALSE; - vm_object_lock(object); - vm_object_paging_wait(object, THREAD_UNINT); - object->wimg_bits = wimg_mode; - queue_iterate(&object->memq, - p, vm_page_t, listq) { - if (!p->fictitious) { - pmap_disconnect(p->phys_page); - if (cache_attr) - pmap_sync_page_attributes_phys(p->phys_page); - } - } - vm_object_unlock(object); + + if (object->wimg_bits != wimg_mode) { + vm_object_lock(object); + vm_object_change_wimg_mode(object, wimg_mode); + vm_object_unlock(object); } } if (object_handle) *object_handle = IP_NULL; return KERN_SUCCESS; - } + } else if (permission & MAP_MEM_NAMED_CREATE) { + map_size = vm_map_round_page(*size, PAGE_MASK); + + if (use_data_addr) { + return KERN_INVALID_ARGUMENT; + } - if(permission & MAP_MEM_NAMED_CREATE) { kr = mach_memory_entry_allocate(&user_entry, &user_handle); if (kr != KERN_SUCCESS) { return KERN_FAILURE; @@ -2069,9 +2005,9 @@ mach_make_memory_entry_64( /* * Force the creation of the VM object now. */ - if (map_size > (vm_map_size_t) VM_MAX_ADDRESS) { + if (map_size > (vm_map_size_t) ANON_MAX_SIZE) { /* - * LP64todo - for now, we can only allocate 4GB + * LP64todo - for now, we can only allocate 4GB-4096 * internal objects because the default pager can't * page bigger ones. Remove this when it can. */ @@ -2089,7 +2025,14 @@ mach_make_memory_entry_64( kr = KERN_INVALID_ARGUMENT; goto make_mem_done; } - object->purgable = VM_OBJECT_PURGABLE_NONVOLATILE; + object->purgable = VM_PURGABLE_NONVOLATILE; + assert(object->vo_purgeable_owner == NULL); + assert(object->resident_page_count == 0); + assert(object->wired_page_count == 0); + vm_object_lock(object); + vm_purgeable_nonvolatile_enqueue(object, + current_task()); + vm_object_unlock(object); } /* @@ -2101,7 +2044,9 @@ mach_make_memory_entry_64( if (access == MAP_MEM_IO) { wimg_mode = VM_WIMG_IO; } else if (access == MAP_MEM_COPYBACK) { - wimg_mode = VM_WIMG_DEFAULT; + wimg_mode = VM_WIMG_USE_DEFAULT; + } else if (access == MAP_MEM_INNERWBACK) { + wimg_mode = VM_WIMG_INNERWBACK; } else if (access == MAP_MEM_WTHRU) { wimg_mode = VM_WIMG_WTHRU; } else if (access == MAP_MEM_WCOMB) { @@ -2122,12 +2067,14 @@ mach_make_memory_entry_64( * shadow objects either... */ object->copy_strategy = MEMORY_OBJECT_COPY_NONE; + object->true_share = TRUE; user_entry->backing.object = object; user_entry->internal = TRUE; user_entry->is_sub_map = FALSE; user_entry->is_pager = FALSE; user_entry->offset = 0; + user_entry->data_offset = 0; user_entry->protection = protections; SET_MAP_MEM(access, user_entry->protection); user_entry->size = map_size; @@ -2140,13 +2087,144 @@ mach_make_memory_entry_64( return KERN_SUCCESS; } + if (permission & MAP_MEM_VM_COPY) { + vm_map_copy_t copy; + + if (target_map == VM_MAP_NULL) { + return KERN_INVALID_TASK; + } + + if (use_data_addr) { + map_size = (vm_map_round_page(offset + *size, + PAGE_MASK) - + map_offset); + offset_in_page = offset - map_offset; + } else { + map_size = vm_map_round_page(*size, PAGE_MASK); + offset_in_page = 0; + } + + kr = vm_map_copyin(target_map, + map_offset, + map_size, + FALSE, + ©); + if (kr != KERN_SUCCESS) { + return kr; + } + + kr = mach_memory_entry_allocate(&user_entry, &user_handle); + if (kr != KERN_SUCCESS) { + vm_map_copy_discard(copy); + return KERN_FAILURE; + } + + user_entry->backing.copy = copy; + user_entry->internal = FALSE; + user_entry->is_sub_map = FALSE; + user_entry->is_pager = FALSE; + user_entry->is_copy = TRUE; + user_entry->offset = 0; + user_entry->protection = protections; + user_entry->size = map_size; + user_entry->data_offset = offset_in_page; + + *size = CAST_DOWN(vm_size_t, map_size); + *object_handle = user_handle; + return KERN_SUCCESS; + } + + if (permission & MAP_MEM_VM_SHARE) { + vm_map_copy_t copy; + vm_prot_t cur_prot, max_prot; + + if (target_map == VM_MAP_NULL) { + return KERN_INVALID_TASK; + } + + if (use_data_addr) { + map_size = (vm_map_round_page(offset + *size, + PAGE_MASK) - + map_offset); + offset_in_page = offset - map_offset; + } else { + map_size = vm_map_round_page(*size, PAGE_MASK); + offset_in_page = 0; + } + + kr = vm_map_copy_extract(target_map, + map_offset, + map_size, + ©, + &cur_prot, + &max_prot); + if (kr != KERN_SUCCESS) { + return kr; + } + + if (mask_protections) { + /* + * We just want as much of "original_protections" + * as we can get out of the actual "cur_prot". + */ + protections &= cur_prot; + if (protections == VM_PROT_NONE) { + /* no access at all: fail */ + vm_map_copy_discard(copy); + return KERN_PROTECTION_FAILURE; + } + } else { + /* + * We want exactly "original_protections" + * out of "cur_prot". + */ + if ((cur_prot & protections) != protections) { + vm_map_copy_discard(copy); + return KERN_PROTECTION_FAILURE; + } + } + + kr = mach_memory_entry_allocate(&user_entry, &user_handle); + if (kr != KERN_SUCCESS) { + vm_map_copy_discard(copy); + return KERN_FAILURE; + } + + user_entry->backing.copy = copy; + user_entry->internal = FALSE; + user_entry->is_sub_map = FALSE; + user_entry->is_pager = FALSE; + user_entry->is_copy = TRUE; + user_entry->offset = 0; + user_entry->protection = protections; + user_entry->size = map_size; + user_entry->data_offset = offset_in_page; + + *size = CAST_DOWN(vm_size_t, map_size); + *object_handle = user_handle; + return KERN_SUCCESS; + } + if (parent_entry == NULL || (permission & MAP_MEM_NAMED_REUSE)) { + if (use_data_addr) { + map_size = vm_map_round_page(offset + *size, PAGE_MASK) - map_offset; + offset_in_page = offset - map_offset; + } else { + map_size = vm_map_round_page(*size, PAGE_MASK); + offset_in_page = 0; + } + /* Create a named object based on address range within the task map */ /* Go find the object at given address */ + if (target_map == VM_MAP_NULL) { + return KERN_INVALID_TASK; + } + redo_lookup: + protections = original_protections; vm_map_lock_read(target_map); /* get the object associated with the target address */ @@ -2154,13 +2232,23 @@ redo_lookup: /* that requested by the caller */ kr = vm_map_lookup_locked(&target_map, map_offset, - protections, &version, - &object, &obj_off, &prot, &wired, &behavior, - &lo_offset, &hi_offset, &real_map); + protections | mask_protections, + OBJECT_LOCK_EXCLUSIVE, &version, + &object, &obj_off, &prot, &wired, + &fault_info, + &real_map); if (kr != KERN_SUCCESS) { vm_map_unlock_read(target_map); goto make_mem_done; } + if (mask_protections) { + /* + * The caller asked us to use the "protections" as + * a mask, so restrict "protections" to what this + * mapping actually allows. + */ + protections &= prot; + } if (((prot & protections) != protections) || (object == kernel_object)) { kr = KERN_INVALID_RIGHT; @@ -2246,6 +2334,14 @@ redo_lookup: /* JMM - The check below should be reworked instead. */ object->true_share = TRUE; } + if (mask_protections) { + /* + * The caller asked us to use the "protections" as + * a mask, so restrict "protections" to what this + * mapping actually allows. + */ + protections &= map_entry->max_protection; + } if(((map_entry->max_protection) & protections) != protections) { kr = KERN_INVALID_RIGHT; vm_object_unlock(object); @@ -2257,7 +2353,7 @@ redo_lookup: goto make_mem_done; } - mappable_size = hi_offset - obj_off; + mappable_size = fault_info.hi_offset - obj_off; total_size = map_entry->vme_end - map_entry->vme_start; if(map_size > mappable_size) { /* try to extend mappable size if the entries */ @@ -2274,6 +2370,20 @@ redo_lookup: next_entry->vme_prev->offset + (next_entry->vme_prev->vme_end - next_entry->vme_prev->vme_start))) { + if (mask_protections) { + /* + * The caller asked us to use + * the "protections" as a mask, + * so restrict "protections" to + * what this mapping actually + * allows. + */ + protections &= next_entry->max_protection; + } + if ((next_entry->wired_count) && + (map_entry->wired_count == 0)) { + break; + } if(((next_entry->max_protection) & protections) != protections) { break; @@ -2293,16 +2403,57 @@ redo_lookup: } } - if(object->internal) { + if (vm_map_entry_should_cow_for_true_share(map_entry) && + object->vo_size > map_size && + map_size != 0) { + /* + * Set up the targeted range for copy-on-write to + * limit the impact of "true_share"/"copy_delay" to + * that range instead of the entire VM object... + */ + + vm_object_unlock(object); + if (vm_map_lock_read_to_write(target_map)) { + vm_object_deallocate(object); + target_map = original_map; + goto redo_lookup; + } + + vm_map_clip_start(target_map, + map_entry, + vm_map_trunc_page(offset, + VM_MAP_PAGE_MASK(target_map))); + vm_map_clip_end(target_map, + map_entry, + (vm_map_round_page(offset + map_size, + VM_MAP_PAGE_MASK(target_map)))); + force_shadow = TRUE; + + if ((map_entry->vme_end - offset) < map_size) { + map_size = map_entry->vme_end - offset; + } + total_size = map_entry->vme_end - map_entry->vme_start; + + vm_map_lock_write_to_read(target_map); + vm_object_lock(object); + } + + if (object->internal) { /* vm_map_lookup_locked will create a shadow if */ /* needs_copy is set but does not check for the */ /* other two conditions shown. It is important to */ /* set up an object which will not be pulled from */ /* under us. */ - if ((map_entry->needs_copy || object->shadowed || - (object->size > total_size)) - && !object->true_share) { + if (force_shadow || + ((map_entry->needs_copy || + object->shadowed || + (object->vo_size > total_size && + (map_entry->offset != 0 || + object->vo_size > + vm_map_round_page(total_size, + VM_MAP_PAGE_MASK(target_map))))) + && !object->true_share)) { /* * We have to unlock the VM object before * trying to upgrade the VM map lock, to @@ -2327,7 +2478,9 @@ redo_lookup: target_map = original_map; goto redo_lookup; } +#if 00 vm_object_lock(object); +#endif /* * JMM - We need to avoid coming here when the object @@ -2338,27 +2491,38 @@ redo_lookup: /* create a shadow object */ vm_object_shadow(&map_entry->object.vm_object, - &map_entry->offset, total_size); + &map_entry->offset, total_size); shadow_object = map_entry->object.vm_object; +#if 00 vm_object_unlock(object); +#endif + + prot = map_entry->protection & ~VM_PROT_WRITE; + + if (override_nx(target_map, map_entry->alias) && prot) + prot |= VM_PROT_EXECUTE; vm_object_pmap_protect( object, map_entry->offset, total_size, ((map_entry->is_shared - || target_map->mapped) + || target_map->mapped_in_other_pmaps) ? PMAP_NULL : target_map->pmap), map_entry->vme_start, - map_entry->protection & ~VM_PROT_WRITE); + prot); total_size -= (map_entry->vme_end - map_entry->vme_start); next_entry = map_entry->vme_next; map_entry->needs_copy = FALSE; + + vm_object_lock(shadow_object); while (total_size) { + assert((next_entry->wired_count == 0) || + (map_entry->wired_count)); + if(next_entry->object.vm_object == object) { - shadow_object->ref_count++; - vm_object_res_reference(shadow_object); + vm_object_reference_locked(shadow_object); next_entry->object.vm_object = shadow_object; vm_object_deallocate(object); @@ -2389,8 +2553,6 @@ redo_lookup: + map_entry->offset; vm_map_lock_write_to_read(target_map); - vm_object_lock(object); - } } @@ -2410,6 +2572,8 @@ redo_lookup: wimg_mode = VM_WIMG_IO; } else if (access == MAP_MEM_COPYBACK) { wimg_mode = VM_WIMG_USE_DEFAULT; + } else if (access == MAP_MEM_INNERWBACK) { + wimg_mode = VM_WIMG_INNERWBACK; } else if (access == MAP_MEM_WTHRU) { wimg_mode = VM_WIMG_WTHRU; } else if (access == MAP_MEM_WCOMB) { @@ -2417,6 +2581,22 @@ redo_lookup: } } +#if VM_OBJECT_TRACKING_OP_TRUESHARE + if (!object->true_share && + vm_object_tracking_inited) { + void *bt[VM_OBJECT_TRACKING_BTDEPTH]; + int num = 0; + + num = OSBacktrace(bt, + VM_OBJECT_TRACKING_BTDEPTH); + btlog_add_entry(vm_object_tracking_btlog, + object, + VM_OBJECT_TRACKING_OP_TRUESHARE, + bt, + num); + } +#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */ + object->true_share = TRUE; if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; @@ -2432,27 +2612,8 @@ redo_lookup: if(real_map != target_map) vm_map_unlock_read(real_map); - if(object->wimg_bits != wimg_mode) { - vm_page_t p; - - vm_object_paging_wait(object, THREAD_UNINT); - - if ((wimg_mode == VM_WIMG_IO) - || (wimg_mode == VM_WIMG_WCOMB)) - cache_attr = TRUE; - else - cache_attr = FALSE; - - queue_iterate(&object->memq, - p, vm_page_t, listq) { - if (!p->fictitious) { - pmap_disconnect(p->phys_page); - if (cache_attr) - pmap_sync_page_attributes_phys(p->phys_page); - } - } - object->wimg_bits = wimg_mode; - } + if (object->wimg_bits != wimg_mode) + vm_object_change_wimg_mode(object, wimg_mode); /* the size of mapped entry that overlaps with our region */ /* which is targeted for share. */ @@ -2476,7 +2637,9 @@ redo_lookup: parent_entry->is_pager == FALSE && parent_entry->offset == obj_off && parent_entry->protection == protections && - parent_entry->size == map_size) { + parent_entry->size == map_size && + ((!use_data_addr && (parent_entry->data_offset == 0)) || + (use_data_addr && (parent_entry->data_offset == offset_in_page)))) { /* * We have a match: re-use "parent_entry". */ @@ -2486,6 +2649,8 @@ redo_lookup: /* parent_entry->ref_count++; XXX ? */ /* Get an extra send-right on handle */ ipc_port_copy_send(parent_handle); + + *size = CAST_DOWN(vm_size_t, map_size); *object_handle = parent_handle; return KERN_SUCCESS; } else { @@ -2509,7 +2674,9 @@ redo_lookup: user_entry->is_sub_map = FALSE; user_entry->is_pager = FALSE; user_entry->offset = obj_off; - user_entry->protection = permission; + user_entry->data_offset = offset_in_page; + user_entry->protection = protections; + SET_MAP_MEM(GET_MAP_MEM(permission), user_entry->protection); user_entry->size = map_size; /* user_object pager and internal fields are not used */ @@ -2521,16 +2688,49 @@ redo_lookup: } else { /* The new object will be base on an existing named object */ - if (parent_entry == NULL) { kr = KERN_INVALID_ARGUMENT; goto make_mem_done; } - if((offset + map_size) > parent_entry->size) { - kr = KERN_INVALID_ARGUMENT; - goto make_mem_done; + + if (use_data_addr) { + /* + * submaps and pagers should only be accessible from within + * the kernel, which shouldn't use the data address flag, so can fail here. + */ + if (parent_entry->is_pager || parent_entry->is_sub_map) { + panic("Shouldn't be using data address with a parent entry that is a submap or pager."); + } + /* + * Account for offset to data in parent entry and + * compute our own offset to data. + */ + if((offset + *size + parent_entry->data_offset) > parent_entry->size) { + kr = KERN_INVALID_ARGUMENT; + goto make_mem_done; + } + + map_offset = vm_map_trunc_page(offset + parent_entry->data_offset, PAGE_MASK); + offset_in_page = (offset + parent_entry->data_offset) - map_offset; + map_size = vm_map_round_page(offset + parent_entry->data_offset + *size, PAGE_MASK) - map_offset; + } else { + map_size = vm_map_round_page(*size, PAGE_MASK); + offset_in_page = 0; + + if((offset + map_size) > parent_entry->size) { + kr = KERN_INVALID_ARGUMENT; + goto make_mem_done; + } } + if (mask_protections) { + /* + * The caller asked us to use the "protections" as + * a mask, so restrict "protections" to what this + * mapping actually allows. + */ + protections &= parent_entry->protection; + } if((protections & parent_entry->protection) != protections) { kr = KERN_PROTECTION_FAILURE; goto make_mem_done; @@ -2544,8 +2744,10 @@ redo_lookup: user_entry->size = map_size; user_entry->offset = parent_entry->offset + map_offset; + user_entry->data_offset = offset_in_page; user_entry->is_sub_map = parent_entry->is_sub_map; user_entry->is_pager = parent_entry->is_pager; + user_entry->is_copy = parent_entry->is_copy; user_entry->internal = parent_entry->internal; user_entry->protection = protections; @@ -2569,6 +2771,22 @@ redo_lookup: /* we now point to this object, hold on */ vm_object_reference(object); vm_object_lock(object); +#if VM_OBJECT_TRACKING_OP_TRUESHARE + if (!object->true_share && + vm_object_tracking_inited) { + void *bt[VM_OBJECT_TRACKING_BTDEPTH]; + int num = 0; + + num = OSBacktrace(bt, + VM_OBJECT_TRACKING_BTDEPTH); + btlog_add_entry(vm_object_tracking_btlog, + object, + VM_OBJECT_TRACKING_OP_TRUESHARE, + bt, + num); + } +#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */ + object->true_share = TRUE; if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; @@ -2581,10 +2799,12 @@ redo_lookup: make_mem_done: if (user_handle != IP_NULL) { - ipc_port_dealloc_kernel(user_handle); - } - if (user_entry != NULL) { - kfree(user_entry, sizeof *user_entry); + /* + * Releasing "user_handle" causes the kernel object + * associated with it ("user_entry" here) to also be + * released and freed. + */ + mach_memory_entry_port_release(user_handle); } return kr; } @@ -2598,10 +2818,10 @@ _mach_make_memory_entry( ipc_port_t *object_handle, ipc_port_t parent_entry) { - memory_object_offset_t mo_size; + memory_object_size_t mo_size; kern_return_t kr; - mo_size = (memory_object_offset_t)*size; + mo_size = (memory_object_size_t)*size; kr = mach_make_memory_entry_64(target_map, &mo_size, (memory_object_offset_t)offset, permission, object_handle, parent_entry); @@ -2618,10 +2838,10 @@ mach_make_memory_entry( ipc_port_t *object_handle, ipc_port_t parent_entry) { - memory_object_offset_t mo_size; + memory_object_size_t mo_size; kern_return_t kr; - mo_size = (memory_object_offset_t)*size; + mo_size = (memory_object_size_t)*size; kr = mach_make_memory_entry_64(target_map, &mo_size, (memory_object_offset_t)offset, permission, object_handle, parent_entry); @@ -2694,8 +2914,12 @@ mach_memory_entry_allocate( user_entry->backing.pager = NULL; user_entry->is_sub_map = FALSE; user_entry->is_pager = FALSE; - user_entry->size = 0; + user_entry->is_copy = FALSE; user_entry->internal = FALSE; + user_entry->size = 0; + user_entry->offset = 0; + user_entry->data_offset = 0; + user_entry->protection = VM_PROT_NONE; user_entry->ref_count = 1; ipc_kobject_set(user_handle, (ipc_kobject_t) user_entry, @@ -2779,12 +3003,22 @@ mach_memory_entry_purgable_control( ip_kotype(entry_port) != IKOT_NAMED_ENTRY) { return KERN_INVALID_ARGUMENT; } + if (control != VM_PURGABLE_SET_STATE && + control != VM_PURGABLE_GET_STATE) + return(KERN_INVALID_ARGUMENT); + + if (control == VM_PURGABLE_SET_STATE && + (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) || + ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) + return(KERN_INVALID_ARGUMENT); mem_entry = (vm_named_entry_t) entry_port->ip_kobject; named_entry_lock(mem_entry); - if (mem_entry->is_sub_map || mem_entry->is_pager) { + if (mem_entry->is_sub_map || + mem_entry->is_pager || + mem_entry->is_copy) { named_entry_unlock(mem_entry); return KERN_INVALID_ARGUMENT; } @@ -2798,7 +3032,7 @@ mach_memory_entry_purgable_control( vm_object_lock(object); /* check that named entry covers entire object ? */ - if (mem_entry->offset != 0 || object->size != mem_entry->size) { + if (mem_entry->offset != 0 || object->vo_size != mem_entry->size) { vm_object_unlock(object); named_entry_unlock(mem_entry); return KERN_INVALID_ARGUMENT; @@ -2813,6 +3047,54 @@ mach_memory_entry_purgable_control( return kr; } +kern_return_t +mach_memory_entry_get_page_counts( + ipc_port_t entry_port, + unsigned int *resident_page_count, + unsigned int *dirty_page_count) +{ + kern_return_t kr; + vm_named_entry_t mem_entry; + vm_object_t object; + vm_object_offset_t offset; + vm_object_size_t size; + + if (entry_port == IP_NULL || + ip_kotype(entry_port) != IKOT_NAMED_ENTRY) { + return KERN_INVALID_ARGUMENT; + } + + mem_entry = (vm_named_entry_t) entry_port->ip_kobject; + + named_entry_lock(mem_entry); + + if (mem_entry->is_sub_map || + mem_entry->is_pager || + mem_entry->is_copy) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + object = mem_entry->backing.object; + if (object == VM_OBJECT_NULL) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + vm_object_lock(object); + + offset = mem_entry->offset; + size = mem_entry->size; + + named_entry_unlock(mem_entry); + + kr = vm_object_get_page_counts(object, offset, size, resident_page_count, dirty_page_count); + + vm_object_unlock(object); + + return kr; +} + /* * mach_memory_entry_port_release: * @@ -2849,24 +3131,135 @@ mach_destroy_memory_entry( assert(ip_kotype(port) == IKOT_NAMED_ENTRY); #endif /* MACH_ASSERT */ named_entry = (vm_named_entry_t)port->ip_kobject; - mutex_lock(&(named_entry)->Lock); + + named_entry_lock(named_entry); named_entry->ref_count -= 1; + if(named_entry->ref_count == 0) { if (named_entry->is_sub_map) { vm_map_deallocate(named_entry->backing.map); - } else if (!named_entry->is_pager) { - /* release the memory object we've been pointing to */ + } else if (named_entry->is_pager) { + /* JMM - need to drop reference on pager in that case */ + } else if (named_entry->is_copy) { + vm_map_copy_discard(named_entry->backing.copy); + } else { + /* release the VM object we've been pointing to */ vm_object_deallocate(named_entry->backing.object); - } /* else JMM - need to drop reference on pager in that case */ + } - mutex_unlock(&(named_entry)->Lock); + named_entry_unlock(named_entry); + named_entry_lock_destroy(named_entry); kfree((void *) port->ip_kobject, sizeof (struct vm_named_entry)); } else - mutex_unlock(&(named_entry)->Lock); + named_entry_unlock(named_entry); +} + +/* Allow manipulation of individual page state. This is actually part of */ +/* the UPL regimen but takes place on the memory entry rather than on a UPL */ + +kern_return_t +mach_memory_entry_page_op( + ipc_port_t entry_port, + vm_object_offset_t offset, + int ops, + ppnum_t *phys_entry, + int *flags) +{ + vm_named_entry_t mem_entry; + vm_object_t object; + kern_return_t kr; + + if (entry_port == IP_NULL || + ip_kotype(entry_port) != IKOT_NAMED_ENTRY) { + return KERN_INVALID_ARGUMENT; + } + + mem_entry = (vm_named_entry_t) entry_port->ip_kobject; + + named_entry_lock(mem_entry); + + if (mem_entry->is_sub_map || + mem_entry->is_pager || + mem_entry->is_copy) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + object = mem_entry->backing.object; + if (object == VM_OBJECT_NULL) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + vm_object_reference(object); + named_entry_unlock(mem_entry); + + kr = vm_object_page_op(object, offset, ops, phys_entry, flags); + + vm_object_deallocate(object); + + return kr; } +/* + * mach_memory_entry_range_op offers performance enhancement over + * mach_memory_entry_page_op for page_op functions which do not require page + * level state to be returned from the call. Page_op was created to provide + * a low-cost alternative to page manipulation via UPLs when only a single + * page was involved. The range_op call establishes the ability in the _op + * family of functions to work on multiple pages where the lack of page level + * state handling allows the caller to avoid the overhead of the upl structures. + */ + +kern_return_t +mach_memory_entry_range_op( + ipc_port_t entry_port, + vm_object_offset_t offset_beg, + vm_object_offset_t offset_end, + int ops, + int *range) +{ + vm_named_entry_t mem_entry; + vm_object_t object; + kern_return_t kr; + + if (entry_port == IP_NULL || + ip_kotype(entry_port) != IKOT_NAMED_ENTRY) { + return KERN_INVALID_ARGUMENT; + } + + mem_entry = (vm_named_entry_t) entry_port->ip_kobject; + + named_entry_lock(mem_entry); + + if (mem_entry->is_sub_map || + mem_entry->is_pager || + mem_entry->is_copy) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + object = mem_entry->backing.object; + if (object == VM_OBJECT_NULL) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + vm_object_reference(object); + named_entry_unlock(mem_entry); + + kr = vm_object_range_op(object, + offset_beg, + offset_end, + ops, + (uint32_t *) range); + + vm_object_deallocate(object); + + return kr; +} kern_return_t @@ -2979,6 +3372,10 @@ kernel_upl_commit_range( if (flags & UPL_COMMIT_FREE_ON_EMPTY) flags |= UPL_COMMIT_NOTIFY_EMPTY; + if (flags & UPL_COMMIT_KERNEL_ONLY_FLAGS) { + return KERN_INVALID_ARGUMENT; + } + kr = upl_commit_range(upl, offset, size, flags, pl, count, &finished); if ((flags & UPL_COMMIT_NOTIFY_EMPTY) && finished) @@ -3045,7 +3442,10 @@ vm_region_object_create( /* Create a named object based on a submap of specified size */ new_map = vm_map_create(PMAP_NULL, VM_MAP_MIN_ADDRESS, - vm_map_round_page(size), TRUE); + vm_map_round_page(size, + VM_MAP_PAGE_MASK(target_map)), + TRUE); + vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(target_map)); user_entry->backing.map = new_map; user_entry->internal = TRUE; @@ -3075,7 +3475,7 @@ vm_map_get_phys_page( vm_map_entry_t entry; ppnum_t phys_page = 0; - map_offset = vm_map_trunc_page(addr); + map_offset = vm_map_trunc_page(addr, PAGE_MASK); vm_map_lock(map); while (vm_map_lookup_entry(map, map_offset, &entry)) { @@ -3098,7 +3498,7 @@ vm_map_get_phys_page( /* If they are not present in the object they will */ /* have to be picked up from the pager through the */ /* fault mechanism. */ - if(entry->object.vm_object->shadow_offset == 0) { + if(entry->object.vm_object->vo_shadow_offset == 0) { /* need to call vm_fault */ vm_map_unlock(map); vm_fault(map, map_offset, VM_PROT_NONE, @@ -3108,8 +3508,8 @@ vm_map_get_phys_page( } offset = entry->offset + (map_offset - entry->vme_start); phys_page = (ppnum_t) - ((entry->object.vm_object->shadow_offset - + offset) >> 12); + ((entry->object.vm_object->vo_shadow_offset + + offset) >> PAGE_SHIFT); break; } @@ -3123,7 +3523,7 @@ vm_map_get_phys_page( vm_object_t old_object; vm_object_lock(object->shadow); old_object = object; - offset = offset + object->shadow_offset; + offset = offset + object->vo_shadow_offset; object = object->shadow; vm_object_unlock(old_object); } else { @@ -3149,7 +3549,7 @@ vm_map_get_phys_page( kern_return_t kernel_object_iopl_request( /* forward */ vm_named_entry_t named_entry, memory_object_offset_t offset, - vm_size_t *upl_size, + upl_size_t *upl_size, upl_t *upl_ptr, upl_page_info_array_t user_page_list, unsigned int *page_list_count, @@ -3159,7 +3559,7 @@ kern_return_t kernel_object_iopl_request( vm_named_entry_t named_entry, memory_object_offset_t offset, - vm_size_t *upl_size, + upl_size_t *upl_size, upl_t *upl_ptr, upl_page_info_array_t user_page_list, unsigned int *page_list_count, @@ -3184,7 +3584,9 @@ kernel_object_iopl_request( if(*upl_size == 0) { if(offset >= named_entry->size) return(KERN_INVALID_RIGHT); - *upl_size = named_entry->size - offset; + *upl_size = (upl_size_t) (named_entry->size - offset); + if (*upl_size != named_entry->size - offset) + return KERN_INVALID_ARGUMENT; } if(caller_flags & UPL_COPYOUT_FROM) { if((named_entry->protection & VM_PROT_READ) @@ -3205,8 +3607,9 @@ kernel_object_iopl_request( /* offset from beginning of named entry offset in object */ offset = offset + named_entry->offset; - if(named_entry->is_sub_map) - return (KERN_INVALID_ARGUMENT); + if (named_entry->is_sub_map || + named_entry->is_copy) + return KERN_INVALID_ARGUMENT; named_entry_lock(named_entry); @@ -3254,8 +3657,8 @@ kernel_object_iopl_request( } if (!object->private) { - if (*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE)) - *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE); + if (*upl_size > MAX_UPL_TRANSFER_BYTES) + *upl_size = MAX_UPL_TRANSFER_BYTES; if (object->phys_contiguous) { *flags = UPL_PHYS_CONTIG; } else {