X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/cc9f6e38162d3c1bf6ca97536c2477f476c8e01b..d26ffc64f583ab2d29df48f13518685602bc8832:/osfmk/vm/vm_user.c diff --git a/osfmk/vm/vm_user.c b/osfmk/vm/vm_user.c index a659f45a9..a1e2c51c3 100644 --- a/osfmk/vm/vm_user.c +++ b/osfmk/vm/vm_user.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2007 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ @@ -56,6 +62,29 @@ * User-exported virtual memory functions. */ +/* + * There are three implementations of the "XXX_allocate" functionality in + * the kernel: mach_vm_allocate (for any task on the platform), vm_allocate + * (for a task with the same address space size, especially the current task), + * and vm32_vm_allocate (for the specific case of a 32-bit task). vm_allocate + * in the kernel should only be used on the kernel_task. vm32_vm_allocate only + * makes sense on platforms where a user task can either be 32 or 64, or the kernel + * task can be 32 or 64. mach_vm_allocate makes sense everywhere, and is preferred + * for new code. + * + * The entrypoints into the kernel are more complex. All platforms support a + * mach_vm_allocate-style API (subsystem 4800) which operates with the largest + * size types for the platform. On platforms that only support U32/K32, + * subsystem 4800 is all you need. On platforms that support both U32 and U64, + * subsystem 3800 is used disambiguate the size of parameters, and they will + * always be 32-bit and call into the vm32_vm_allocate APIs. On non-U32/K32 platforms, + * the MIG glue should never call into vm_allocate directly, because the calling + * task and kernel_task are unlikely to use the same size parameters + * + * New VM call implementations should be added here and to mach_vm.defs + * (subsystem 4800), and use mach_vm_* "wide" types. + */ + #include #include @@ -69,12 +98,11 @@ #include #include #include +#include #include #include -#include #include -#include #include #include @@ -87,6 +115,10 @@ #include #include #include +#include +#include + +#include vm_size_t upl_offset_to_pagelist = 0; @@ -94,23 +126,39 @@ vm_size_t upl_offset_to_pagelist = 0; #include #endif /* VM_CPM */ -ipc_port_t dynamic_pager_control_port=NULL; - /* * mach_vm_allocate allocates "zero fill" memory in the specfied * map. */ kern_return_t -mach_vm_allocate( +mach_vm_allocate_external( vm_map_t map, mach_vm_offset_t *addr, mach_vm_size_t size, int flags) +{ + vm_tag_t tag; + + VM_GET_FLAGS_ALIAS(flags, tag); + return (mach_vm_allocate_kernel(map, addr, size, flags, tag)); +} + +kern_return_t +mach_vm_allocate_kernel( + vm_map_t map, + mach_vm_offset_t *addr, + mach_vm_size_t size, + int flags, + vm_tag_t tag) { vm_map_offset_t map_addr; vm_map_size_t map_size; kern_return_t result; - boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); + boolean_t anywhere; + + /* filter out any kernel-only flags */ + if (flags & ~VM_FLAGS_USER_ALLOCATE) + return KERN_INVALID_ARGUMENT; if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); @@ -119,6 +167,7 @@ mach_vm_allocate( return(KERN_SUCCESS); } + anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); if (anywhere) { /* * No specific address requested, so start candidate address @@ -132,10 +181,12 @@ mach_vm_allocate( */ map_addr = vm_map_min(map); if (map_addr == 0) - map_addr += PAGE_SIZE; + map_addr += VM_MAP_PAGE_SIZE(map); } else - map_addr = vm_map_trunc_page(*addr); - map_size = vm_map_round_page(size); + map_addr = vm_map_trunc_page(*addr, + VM_MAP_PAGE_MASK(map)); + map_size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(map)); if (map_size == 0) { return(KERN_INVALID_ARGUMENT); } @@ -146,6 +197,8 @@ mach_vm_allocate( map_size, (vm_map_offset_t)0, flags, + VM_MAP_KERNEL_FLAGS_NONE, + tag, VM_OBJECT_NULL, (vm_object_offset_t)0, FALSE, @@ -163,16 +216,34 @@ mach_vm_allocate( * map (which is limited to the same size as the kernel). */ kern_return_t -vm_allocate( +vm_allocate_external( vm_map_t map, vm_offset_t *addr, vm_size_t size, int flags) +{ + vm_tag_t tag; + + VM_GET_FLAGS_ALIAS(flags, tag); + return (vm_allocate_kernel(map, addr, size, flags, tag)); +} + +kern_return_t +vm_allocate_kernel( + vm_map_t map, + vm_offset_t *addr, + vm_size_t size, + int flags, + vm_tag_t tag) { vm_map_offset_t map_addr; vm_map_size_t map_size; kern_return_t result; - boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); + boolean_t anywhere; + + /* filter out any kernel-only flags */ + if (flags & ~VM_FLAGS_USER_ALLOCATE) + return KERN_INVALID_ARGUMENT; if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); @@ -181,6 +252,7 @@ vm_allocate( return(KERN_SUCCESS); } + anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); if (anywhere) { /* * No specific address requested, so start candidate address @@ -194,10 +266,12 @@ vm_allocate( */ map_addr = vm_map_min(map); if (map_addr == 0) - map_addr += PAGE_SIZE; + map_addr += VM_MAP_PAGE_SIZE(map); } else - map_addr = vm_map_trunc_page(*addr); - map_size = vm_map_round_page(size); + map_addr = vm_map_trunc_page(*addr, + VM_MAP_PAGE_MASK(map)); + map_size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(map)); if (map_size == 0) { return(KERN_INVALID_ARGUMENT); } @@ -208,6 +282,8 @@ vm_allocate( map_size, (vm_map_offset_t)0, flags, + VM_MAP_KERNEL_FLAGS_NONE, + tag, VM_OBJECT_NULL, (vm_object_offset_t)0, FALSE, @@ -215,6 +291,12 @@ vm_allocate( VM_PROT_ALL, VM_INHERIT_DEFAULT); +#if KASAN + if (result == KERN_SUCCESS && map->pmap == kernel_pmap) { + kasan_notify_address(map_addr, map_size); + } +#endif + *addr = CAST_DOWN(vm_offset_t, map_addr); return(result); } @@ -236,8 +318,12 @@ mach_vm_deallocate( if (size == (mach_vm_offset_t) 0) return(KERN_SUCCESS); - return(vm_map_remove(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), VM_MAP_NO_FLAGS)); + return(vm_map_remove(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + VM_MAP_NO_FLAGS)); } /* @@ -248,7 +334,7 @@ mach_vm_deallocate( */ kern_return_t vm_deallocate( - register vm_map_t map, + vm_map_t map, vm_offset_t start, vm_size_t size) { @@ -258,8 +344,12 @@ vm_deallocate( if (size == (vm_offset_t) 0) return(KERN_SUCCESS); - return(vm_map_remove(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), VM_MAP_NO_FLAGS)); + return(vm_map_remove(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + VM_MAP_NO_FLAGS)); } /* @@ -282,8 +372,10 @@ mach_vm_inherit( return KERN_SUCCESS; return(vm_map_inherit(map, - vm_map_trunc_page(start), - vm_map_round_page(start+size), + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), new_inheritance)); } @@ -294,7 +386,7 @@ mach_vm_inherit( */ kern_return_t vm_inherit( - register vm_map_t map, + vm_map_t map, vm_offset_t start, vm_size_t size, vm_inherit_t new_inheritance) @@ -307,8 +399,10 @@ vm_inherit( return KERN_SUCCESS; return(vm_map_inherit(map, - vm_map_trunc_page(start), - vm_map_round_page(start+size), + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), new_inheritance)); } @@ -334,8 +428,10 @@ mach_vm_protect( return KERN_SUCCESS; return(vm_map_protect(map, - vm_map_trunc_page(start), - vm_map_round_page(start+size), + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), new_protection, set_maximum)); } @@ -363,8 +459,10 @@ vm_protect( return KERN_SUCCESS; return(vm_map_protect(map, - vm_map_trunc_page(start), - vm_map_round_page(start+size), + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), new_protection, set_maximum)); } @@ -388,11 +486,14 @@ mach_vm_machine_attribute( if (size == 0) return KERN_SUCCESS; - return vm_map_machine_attribute(map, - vm_map_trunc_page(addr), - vm_map_round_page(addr+size), - attribute, - value); + return vm_map_machine_attribute( + map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(addr+size, + VM_MAP_PAGE_MASK(map)), + attribute, + value); } /* @@ -415,11 +516,14 @@ vm_machine_attribute( if (size == 0) return KERN_SUCCESS; - return vm_map_machine_attribute(map, - vm_map_trunc_page(addr), - vm_map_round_page(addr+size), - attribute, - value); + return vm_map_machine_attribute( + map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(addr+size, + VM_MAP_PAGE_MASK(map)), + attribute, + value); } /* @@ -449,6 +553,8 @@ mach_vm_read( if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); + if ((mach_msg_type_number_t) size != size) + return KERN_INVALID_ARGUMENT; error = vm_map_copyin(map, (vm_map_address_t)addr, @@ -458,7 +564,8 @@ mach_vm_read( if (KERN_SUCCESS == error) { *data = (pointer_t) ipc_address; - *data_size = size; + *data_size = (mach_msg_type_number_t) size; + assert(*data_size == size); } return(error); } @@ -487,6 +594,16 @@ vm_read( if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); + if (size > (unsigned)(mach_msg_type_number_t) -1) { + /* + * The kernel could handle a 64-bit "size" value, but + * it could not return the size of the data in "*data_size" + * without overflowing. + * Let's reject this "size" as invalid. + */ + return KERN_INVALID_ARGUMENT; + } + error = vm_map_copyin(map, (vm_map_address_t)addr, (vm_map_size_t)size, @@ -495,7 +612,8 @@ vm_read( if (KERN_SUCCESS == error) { *data = (pointer_t) ipc_address; - *data_size = size; + *data_size = (mach_msg_type_number_t) size; + assert(*data_size == size); } return(error); } @@ -518,7 +636,8 @@ mach_vm_read_list( kern_return_t error; vm_map_copy_t copy; - if (map == VM_MAP_NULL) + if (map == VM_MAP_NULL || + count > VM_MAP_ENTRY_MAX) return(KERN_INVALID_ARGUMENT); error = KERN_SUCCESS; @@ -582,7 +701,8 @@ vm_read_list( kern_return_t error; vm_map_copy_t copy; - if (map == VM_MAP_NULL) + if (map == VM_MAP_NULL || + count > VM_MAP_ENTRY_MAX) return(KERN_INVALID_ARGUMENT); error = KERN_SUCCESS; @@ -815,7 +935,7 @@ vm_copy( * */ kern_return_t -mach_vm_map( +mach_vm_map_external( vm_map_t target_map, mach_vm_offset_t *address, mach_vm_size_t initial_size, @@ -828,287 +948,92 @@ mach_vm_map( vm_prot_t max_protection, vm_inherit_t inheritance) { - vm_map_address_t map_addr; - vm_map_size_t map_size; - vm_object_t object; - vm_object_size_t size; - kern_return_t result; - - /* - * Check arguments for validity - */ - if ((target_map == VM_MAP_NULL) || - (cur_protection & ~VM_PROT_ALL) || - (max_protection & ~VM_PROT_ALL) || - (inheritance > VM_INHERIT_LAST_VALID) || - initial_size == 0) - return(KERN_INVALID_ARGUMENT); - - map_addr = vm_map_trunc_page(*address); - map_size = vm_map_round_page(initial_size); - size = vm_object_round_page(initial_size); - - /* - * Find the vm object (if any) corresponding to this port. - */ - if (!IP_VALID(port)) { - object = VM_OBJECT_NULL; - offset = 0; - copy = FALSE; - } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) { - vm_named_entry_t named_entry; - - named_entry = (vm_named_entry_t)port->ip_kobject; - /* a few checks to make sure user is obeying rules */ - if(size == 0) { - if(offset >= named_entry->size) - return(KERN_INVALID_RIGHT); - size = named_entry->size - offset; - } - if((named_entry->protection & max_protection) != max_protection) - return(KERN_INVALID_RIGHT); - if((named_entry->protection & cur_protection) != cur_protection) - return(KERN_INVALID_RIGHT); - if(named_entry->size < (offset + size)) - return(KERN_INVALID_ARGUMENT); - - /* the callers parameter offset is defined to be the */ - /* offset from beginning of named entry offset in object */ - offset = offset + named_entry->offset; - - named_entry_lock(named_entry); - if(named_entry->is_sub_map) { - vm_map_entry_t map_entry; - - named_entry_unlock(named_entry); - vm_object_reference(vm_submap_object); - if ((result = vm_map_enter(target_map, - &map_addr, map_size, - (vm_map_offset_t)mask, flags, - vm_submap_object, 0, - FALSE, - cur_protection, max_protection, inheritance - )) != KERN_SUCCESS) { - vm_object_deallocate(vm_submap_object); - } else { - char alias; - - VM_GET_FLAGS_ALIAS(flags, alias); - if ((alias == VM_MEMORY_SHARED_PMAP) && - !copy) { - vm_map_submap(target_map, map_addr, - map_addr + map_size, - named_entry->backing.map, - (vm_map_offset_t)offset, TRUE); - } else { - vm_map_submap(target_map, map_addr, - map_addr + map_size, - named_entry->backing.map, - (vm_map_offset_t)offset, FALSE); - } - if(copy) { - if(vm_map_lookup_entry( - target_map, map_addr, &map_entry)) { - map_entry->needs_copy = TRUE; - } - } - *address = map_addr; - } - return(result); - - } else if (named_entry->is_pager) { - unsigned int access; - vm_prot_t protections; - unsigned int wimg_mode; - boolean_t cache_attr; - - protections = named_entry->protection - & VM_PROT_ALL; - access = GET_MAP_MEM(named_entry->protection); - - object = vm_object_enter( - named_entry->backing.pager, - named_entry->size, - named_entry->internal, - FALSE, - FALSE); - if (object == VM_OBJECT_NULL) { - named_entry_unlock(named_entry); - return(KERN_INVALID_OBJECT); - } - - /* JMM - drop reference on pager here */ - - /* create an extra ref for the named entry */ - vm_object_lock(object); - vm_object_reference_locked(object); - named_entry->backing.object = object; - named_entry->is_pager = FALSE; - named_entry_unlock(named_entry); + vm_tag_t tag; - wimg_mode = object->wimg_bits; - if(access == MAP_MEM_IO) { - wimg_mode = VM_WIMG_IO; - } else if (access == MAP_MEM_COPYBACK) { - wimg_mode = VM_WIMG_USE_DEFAULT; - } else if (access == MAP_MEM_WTHRU) { - wimg_mode = VM_WIMG_WTHRU; - } else if (access == MAP_MEM_WCOMB) { - wimg_mode = VM_WIMG_WCOMB; - } - if ((wimg_mode == VM_WIMG_IO) - || (wimg_mode == VM_WIMG_WCOMB)) - cache_attr = TRUE; - else - cache_attr = FALSE; - - /* wait for object (if any) to be ready */ - if (!named_entry->internal) { - while (!object->pager_ready) { - vm_object_wait(object, - VM_OBJECT_EVENT_PAGER_READY, - THREAD_UNINT); - vm_object_lock(object); - } - } + VM_GET_FLAGS_ALIAS(flags, tag); + return (mach_vm_map_kernel(target_map, address, initial_size, mask, flags, tag, port, + offset, copy, cur_protection, max_protection, inheritance)); +} - if(object->wimg_bits != wimg_mode) { - vm_page_t p; +kern_return_t +mach_vm_map_kernel( + vm_map_t target_map, + mach_vm_offset_t *address, + mach_vm_size_t initial_size, + mach_vm_offset_t mask, + int flags, + vm_tag_t tag, + ipc_port_t port, + vm_object_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + kern_return_t kr; + vm_map_offset_t vmmaddr; - vm_object_paging_wait(object, THREAD_UNINT); + vmmaddr = (vm_map_offset_t) *address; - object->wimg_bits = wimg_mode; - queue_iterate(&object->memq, p, vm_page_t, listq) { - if (!p->fictitious) { - pmap_disconnect(p->phys_page); - if (cache_attr) - pmap_sync_page_attributes_phys(p->phys_page); - } - } - } - object->true_share = TRUE; - if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) - object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; - vm_object_unlock(object); - } else { - /* This is the case where we are going to map */ - /* an already mapped object. If the object is */ - /* not ready it is internal. An external */ - /* object cannot be mapped until it is ready */ - /* we can therefore avoid the ready check */ - /* in this case. */ - object = named_entry->backing.object; - assert(object != VM_OBJECT_NULL); - named_entry_unlock(named_entry); - vm_object_reference(object); - } - } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) { - /* - * JMM - This is temporary until we unify named entries - * and raw memory objects. - * - * Detected fake ip_kotype for a memory object. In - * this case, the port isn't really a port at all, but - * instead is just a raw memory object. - */ - - if ((object = vm_object_enter((memory_object_t)port, - size, FALSE, FALSE, FALSE)) - == VM_OBJECT_NULL) - return(KERN_INVALID_OBJECT); + /* filter out any kernel-only flags */ + if (flags & ~VM_FLAGS_USER_MAP) + return KERN_INVALID_ARGUMENT; - /* wait for object (if any) to be ready */ - if (object != VM_OBJECT_NULL) { - if(object == kernel_object) { - printf("Warning: Attempt to map kernel object" - " by a non-private kernel entity\n"); - return(KERN_INVALID_OBJECT); - } - vm_object_lock(object); - while (!object->pager_ready) { - vm_object_wait(object, - VM_OBJECT_EVENT_PAGER_READY, - THREAD_UNINT); - vm_object_lock(object); - } - vm_object_unlock(object); - } - } else { - return (KERN_INVALID_OBJECT); + kr = vm_map_enter_mem_object(target_map, + &vmmaddr, + initial_size, + mask, + flags, + VM_MAP_KERNEL_FLAGS_NONE, + tag, + port, + offset, + copy, + cur_protection, + max_protection, + inheritance); + +#if KASAN + if (kr == KERN_SUCCESS && target_map->pmap == kernel_pmap) { + kasan_notify_address(vmmaddr, initial_size); } +#endif - /* - * Perform the copy if requested - */ - - if (copy) { - vm_object_t new_object; - vm_object_offset_t new_offset; - - result = vm_object_copy_strategically(object, offset, size, - &new_object, &new_offset, - ©); - - - if (result == KERN_MEMORY_RESTART_COPY) { - boolean_t success; - boolean_t src_needs_copy; - - /* - * XXX - * We currently ignore src_needs_copy. - * This really is the issue of how to make - * MEMORY_OBJECT_COPY_SYMMETRIC safe for - * non-kernel users to use. Solution forthcoming. - * In the meantime, since we don't allow non-kernel - * memory managers to specify symmetric copy, - * we won't run into problems here. - */ - new_object = object; - new_offset = offset; - success = vm_object_copy_quickly(&new_object, - new_offset, size, - &src_needs_copy, - ©); - assert(success); - result = KERN_SUCCESS; - } - /* - * Throw away the reference to the - * original object, as it won't be mapped. - */ - - vm_object_deallocate(object); + *address = vmmaddr; + return kr; +} - if (result != KERN_SUCCESS) - return (result); - object = new_object; - offset = new_offset; - } +/* legacy interface */ +kern_return_t +vm_map_64_external( + vm_map_t target_map, + vm_offset_t *address, + vm_size_t size, + vm_offset_t mask, + int flags, + ipc_port_t port, + vm_object_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + vm_tag_t tag; - if ((result = vm_map_enter(target_map, - &map_addr, map_size, - (vm_map_offset_t)mask, - flags, - object, offset, - copy, - cur_protection, max_protection, inheritance - )) != KERN_SUCCESS) - vm_object_deallocate(object); - *address = map_addr; - return(result); + VM_GET_FLAGS_ALIAS(flags, tag); + return (vm_map_64_kernel(target_map, address, size, mask, flags, tag, port, offset, + copy, cur_protection, max_protection, inheritance)); } - -/* legacy interface */ kern_return_t -vm_map_64( +vm_map_64_kernel( vm_map_t target_map, vm_offset_t *address, vm_size_t size, vm_offset_t mask, int flags, + vm_tag_t tag, ipc_port_t port, vm_object_offset_t offset, boolean_t copy, @@ -1125,21 +1050,42 @@ vm_map_64( map_size = (mach_vm_size_t)size; map_mask = (mach_vm_offset_t)mask; - kr = mach_vm_map(target_map, &map_addr, map_size, map_mask, flags, + kr = mach_vm_map_kernel(target_map, &map_addr, map_size, map_mask, flags, tag, port, offset, copy, cur_protection, max_protection, inheritance); - *address = CAST_DOWN(vm_address_t, map_addr); + *address = CAST_DOWN(vm_offset_t, map_addr); return kr; } /* temporary, until world build */ kern_return_t -vm_map( +vm_map_external( + vm_map_t target_map, + vm_offset_t *address, + vm_size_t size, + vm_offset_t mask, + int flags, + ipc_port_t port, + vm_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + vm_tag_t tag; + + VM_GET_FLAGS_ALIAS(flags, tag); + return (vm_map_kernel(target_map, address, size, mask, flags, tag, port, offset, copy, cur_protection, max_protection, inheritance)); +} + +kern_return_t +vm_map_kernel( vm_map_t target_map, vm_offset_t *address, vm_size_t size, vm_offset_t mask, int flags, + vm_tag_t tag, ipc_port_t port, vm_offset_t offset, boolean_t copy, @@ -1158,10 +1104,10 @@ vm_map( map_mask = (mach_vm_offset_t)mask; obj_offset = (vm_object_offset_t)offset; - kr = mach_vm_map(target_map, &map_addr, map_size, map_mask, flags, + kr = mach_vm_map_kernel(target_map, &map_addr, map_size, map_mask, flags, tag, port, obj_offset, copy, cur_protection, max_protection, inheritance); - *address = CAST_DOWN(vm_address_t, map_addr); + *address = CAST_DOWN(vm_offset_t, map_addr); return kr; } @@ -1172,14 +1118,35 @@ vm_map( * over top of itself (with altered permissions and/or * as an in-place copy of itself). */ +kern_return_t +mach_vm_remap_external( + vm_map_t target_map, + mach_vm_offset_t *address, + mach_vm_size_t size, + mach_vm_offset_t mask, + int flags, + vm_map_t src_map, + mach_vm_offset_t memory_address, + boolean_t copy, + vm_prot_t *cur_protection, + vm_prot_t *max_protection, + vm_inherit_t inheritance) +{ + vm_tag_t tag; + VM_GET_FLAGS_ALIAS(flags, tag); + + return (mach_vm_remap_kernel(target_map, address, size, mask, flags, tag, src_map, memory_address, + copy, cur_protection, max_protection, inheritance)); +} kern_return_t -mach_vm_remap( +mach_vm_remap_kernel( vm_map_t target_map, mach_vm_offset_t *address, mach_vm_size_t size, mach_vm_offset_t mask, - boolean_t anywhere, + int flags, + vm_tag_t tag, vm_map_t src_map, mach_vm_offset_t memory_address, boolean_t copy, @@ -1193,13 +1160,19 @@ mach_vm_remap( if (VM_MAP_NULL == target_map || VM_MAP_NULL == src_map) return KERN_INVALID_ARGUMENT; + /* filter out any kernel-only flags */ + if (flags & ~VM_FLAGS_USER_REMAP) + return KERN_INVALID_ARGUMENT; + map_addr = (vm_map_offset_t)*address; kr = vm_map_remap(target_map, &map_addr, size, mask, - anywhere, + flags, + VM_MAP_KERNEL_FLAGS_NONE, + tag, src_map, memory_address, copy, @@ -1222,12 +1195,34 @@ mach_vm_remap( * kernel context). */ kern_return_t -vm_remap( +vm_remap_external( + vm_map_t target_map, + vm_offset_t *address, + vm_size_t size, + vm_offset_t mask, + int flags, + vm_map_t src_map, + vm_offset_t memory_address, + boolean_t copy, + vm_prot_t *cur_protection, + vm_prot_t *max_protection, + vm_inherit_t inheritance) +{ + vm_tag_t tag; + VM_GET_FLAGS_ALIAS(flags, tag); + + return (vm_remap_kernel(target_map, address, size, mask, flags, tag, src_map, + memory_address, copy, cur_protection, max_protection, inheritance)); +} + +kern_return_t +vm_remap_kernel( vm_map_t target_map, vm_offset_t *address, vm_size_t size, vm_offset_t mask, - boolean_t anywhere, + int flags, + vm_tag_t tag, vm_map_t src_map, vm_offset_t memory_address, boolean_t copy, @@ -1241,13 +1236,19 @@ vm_remap( if (VM_MAP_NULL == target_map || VM_MAP_NULL == src_map) return KERN_INVALID_ARGUMENT; + /* filter out any kernel-only flags */ + if (flags & ~VM_FLAGS_USER_REMAP) + return KERN_INVALID_ARGUMENT; + map_addr = (vm_map_offset_t)*address; kr = vm_map_remap(target_map, &map_addr, size, mask, - anywhere, + flags, + VM_MAP_KERNEL_FLAGS_NONE, + tag, src_map, memory_address, copy, @@ -1272,12 +1273,24 @@ vm_remap( * [ To unwire the pages, specify VM_PROT_NONE. ] */ kern_return_t -mach_vm_wire( +mach_vm_wire_external( host_priv_t host_priv, vm_map_t map, mach_vm_offset_t start, mach_vm_size_t size, vm_prot_t access) +{ + return (mach_vm_wire_kernel(host_priv, map, start, size, access, VM_KERN_MEMORY_MLOCK)); +} + +kern_return_t +mach_vm_wire_kernel( + host_priv_t host_priv, + vm_map_t map, + mach_vm_offset_t start, + mach_vm_size_t size, + vm_prot_t access, + vm_tag_t tag) { kern_return_t rc; @@ -1289,15 +1302,24 @@ mach_vm_wire( if (map == VM_MAP_NULL) return KERN_INVALID_TASK; - if (access & ~VM_PROT_ALL) + if (access & ~VM_PROT_ALL || (start + size < start)) return KERN_INVALID_ARGUMENT; if (access != VM_PROT_NONE) { - rc = vm_map_wire(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), access, TRUE); + rc = vm_map_wire_kernel(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + access, tag, + TRUE); } else { - rc = vm_map_unwire(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), TRUE); + rc = vm_map_unwire(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + TRUE); } return rc; } @@ -1313,7 +1335,7 @@ mach_vm_wire( kern_return_t vm_wire( host_priv_t host_priv, - register vm_map_t map, + vm_map_t map, vm_offset_t start, vm_size_t size, vm_prot_t access) @@ -1334,11 +1356,20 @@ vm_wire( if (size == 0) { rc = KERN_SUCCESS; } else if (access != VM_PROT_NONE) { - rc = vm_map_wire(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), access, TRUE); + rc = vm_map_wire_kernel(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + access, VM_KERN_MEMORY_OSFMK, + TRUE); } else { - rc = vm_map_unwire(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), TRUE); + rc = vm_map_unwire(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + TRUE); } return rc; } @@ -1439,6 +1470,37 @@ vm_msync( } +int +vm_toggle_entry_reuse(int toggle, int *old_value) +{ + vm_map_t map = current_map(); + + assert(!map->is_nested_map); + if(toggle == VM_TOGGLE_GETVALUE && old_value != NULL){ + *old_value = map->disable_vmentry_reuse; + } else if(toggle == VM_TOGGLE_SET){ + vm_map_entry_t map_to_entry; + + vm_map_lock(map); + vm_map_disable_hole_optimization(map); + map->disable_vmentry_reuse = TRUE; + __IGNORE_WCASTALIGN(map_to_entry = vm_map_to_entry(map)); + if (map->first_free == map_to_entry) { + map->highest_entry_end = vm_map_min(map); + } else { + map->highest_entry_end = map->first_free->vme_end; + } + vm_map_unlock(map); + } else if (toggle == VM_TOGGLE_CLEAR){ + vm_map_lock(map); + map->disable_vmentry_reuse = FALSE; + vm_map_unlock(map); + } else + return KERN_INVALID_ARGUMENT; + + return KERN_SUCCESS; +} + /* * mach_vm_behavior_set * @@ -1452,17 +1514,37 @@ kern_return_t mach_vm_behavior_set( vm_map_t map, mach_vm_offset_t start, - mach_vm_size_t size, + mach_vm_size_t size, vm_behavior_t new_behavior) { + vm_map_offset_t align_mask; + if ((map == VM_MAP_NULL) || (start + size < start)) return(KERN_INVALID_ARGUMENT); if (size == 0) return KERN_SUCCESS; - return(vm_map_behavior_set(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), new_behavior)); + switch (new_behavior) { + case VM_BEHAVIOR_REUSABLE: + case VM_BEHAVIOR_REUSE: + case VM_BEHAVIOR_CAN_REUSE: + /* + * Align to the hardware page size, to allow + * malloc() to maximize the amount of re-usability, + * even on systems with larger software page size. + */ + align_mask = PAGE_MASK; + break; + default: + align_mask = VM_MAP_PAGE_MASK(map); + break; + } + + return vm_map_behavior_set(map, + vm_map_trunc_page(start, align_mask), + vm_map_round_page(start+size, align_mask), + new_behavior); } /* @@ -1485,14 +1567,13 @@ vm_behavior_set( vm_size_t size, vm_behavior_t new_behavior) { - if ((map == VM_MAP_NULL) || (start + size < start)) - return(KERN_INVALID_ARGUMENT); - - if (size == 0) - return KERN_SUCCESS; + if (start + size < start) + return KERN_INVALID_ARGUMENT; - return(vm_map_behavior_set(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), new_behavior)); + return mach_vm_behavior_set(map, + (mach_vm_offset_t) start, + (mach_vm_size_t) size, + new_behavior); } /* @@ -1759,34 +1840,60 @@ vm_region_recurse( } kern_return_t -vm_purgable_control( +mach_vm_purgable_control( vm_map_t map, - vm_offset_t address, + mach_vm_offset_t address, vm_purgable_t control, int *state) { if (VM_MAP_NULL == map) return KERN_INVALID_ARGUMENT; + if (control == VM_PURGABLE_SET_STATE_FROM_KERNEL) { + /* not allowed from user-space */ + return KERN_INVALID_ARGUMENT; + } + return vm_map_purgable_control(map, - vm_map_trunc_page(address), + vm_map_trunc_page(address, PAGE_MASK), control, state); } - - -/* - * Ordinarily, the right to allocate CPM is restricted - * to privileged applications (those that can gain access - * to the host priv port). Set this variable to zero if - * you want to let any application allocate CPM. - */ -unsigned int vm_allocate_cpm_privileged = 0; -/* - * Allocate memory in the specified map, with the caveat that - * the memory is physically contiguous. This call may fail - * if the system can't find sufficient contiguous memory. +kern_return_t +vm_purgable_control( + vm_map_t map, + vm_offset_t address, + vm_purgable_t control, + int *state) +{ + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; + + if (control == VM_PURGABLE_SET_STATE_FROM_KERNEL) { + /* not allowed from user-space */ + return KERN_INVALID_ARGUMENT; + } + + return vm_map_purgable_control(map, + vm_map_trunc_page(address, PAGE_MASK), + control, + state); +} + + +/* + * Ordinarily, the right to allocate CPM is restricted + * to privileged applications (those that can gain access + * to the host priv port). Set this variable to zero if + * you want to let any application allocate CPM. + */ +unsigned int vm_allocate_cpm_privileged = 0; + +/* + * Allocate memory in the specified map, with the caveat that + * the memory is physically contiguous. This call may fail + * if the system can't find sufficient contiguous memory. * This call may cause or lead to heart-stopping amounts of * paging activity. * @@ -1834,9 +1941,10 @@ mach_vm_page_query( if (VM_MAP_NULL == map) return KERN_INVALID_ARGUMENT; - return vm_map_page_info(map, - vm_map_trunc_page(offset), - disposition, ref_count); + return vm_map_page_query_internal( + map, + vm_map_trunc_page(offset, PAGE_MASK), + disposition, ref_count); } kern_return_t @@ -1849,9 +1957,150 @@ vm_map_page_query( if (VM_MAP_NULL == map) return KERN_INVALID_ARGUMENT; - return vm_map_page_info(map, - vm_map_trunc_page(offset), - disposition, ref_count); + return vm_map_page_query_internal( + map, + vm_map_trunc_page(offset, PAGE_MASK), + disposition, ref_count); +} + +kern_return_t +mach_vm_page_range_query( + vm_map_t map, + mach_vm_offset_t address, + mach_vm_size_t size, + mach_vm_address_t dispositions_addr, + mach_vm_size_t *dispositions_count) +{ + kern_return_t kr = KERN_SUCCESS; + int num_pages = 0, i = 0; + mach_vm_size_t curr_sz = 0, copy_sz = 0; + mach_vm_size_t disp_buf_req_size = 0, disp_buf_total_size = 0; + mach_msg_type_number_t count = 0; + + void *info = NULL; + void *local_disp = NULL;; + vm_map_size_t info_size = 0, local_disp_size = 0; + mach_vm_offset_t start = 0, end = 0; + + if (map == VM_MAP_NULL || dispositions_count == NULL) { + return KERN_INVALID_ARGUMENT; + } + + disp_buf_req_size = ( *dispositions_count * sizeof(int)); + start = mach_vm_trunc_page(address); + end = mach_vm_round_page(address + size); + + if (end < start) { + return KERN_INVALID_ARGUMENT; + } + + if (disp_buf_req_size == 0 || (end == start)) { + return KERN_SUCCESS; + } + + /* + * For large requests, we will go through them + * MAX_PAGE_RANGE_QUERY chunk at a time. + */ + + curr_sz = MIN(end - start, MAX_PAGE_RANGE_QUERY); + num_pages = (int) (curr_sz >> PAGE_SHIFT); + + info_size = num_pages * sizeof(vm_page_info_basic_data_t); + info = kalloc(info_size); + + if (info == NULL) { + return KERN_RESOURCE_SHORTAGE; + } + + local_disp_size = num_pages * sizeof(int); + local_disp = kalloc(local_disp_size); + + if (local_disp == NULL) { + + kfree(info, info_size); + info = NULL; + return KERN_RESOURCE_SHORTAGE; + } + + while (size) { + + count = VM_PAGE_INFO_BASIC_COUNT; + kr = vm_map_page_range_info_internal( + map, + start, + mach_vm_round_page(start + curr_sz), + VM_PAGE_INFO_BASIC, + (vm_page_info_t) info, + &count); + + assert(kr == KERN_SUCCESS); + + for (i = 0; i < num_pages; i++) { + + ((int*)local_disp)[i] = ((vm_page_info_basic_t)info)[i].disposition; + } + + copy_sz = MIN(disp_buf_req_size, num_pages * sizeof(int)/* an int per page */); + kr = copyout(local_disp, (mach_vm_address_t)dispositions_addr, copy_sz); + + start += curr_sz; + disp_buf_req_size -= copy_sz; + disp_buf_total_size += copy_sz; + + if (kr != 0) { + break; + } + + if ((disp_buf_req_size == 0) || (curr_sz >= size)) { + + /* + * We might have inspected the full range OR + * more than it esp. if the user passed in + * non-page aligned start/size and/or if we + * descended into a submap. We are done here. + */ + + size = 0; + + } else { + + dispositions_addr += copy_sz; + + size -= curr_sz; + + curr_sz = MIN(mach_vm_round_page(size), MAX_PAGE_RANGE_QUERY); + num_pages = (int)(curr_sz >> PAGE_SHIFT); + } + } + + *dispositions_count = disp_buf_total_size / sizeof(int); + + kfree(local_disp, local_disp_size); + local_disp = NULL; + + kfree(info, info_size); + info = NULL; + + return kr; +} + +kern_return_t +mach_vm_page_info( + vm_map_t map, + mach_vm_address_t address, + vm_page_info_flavor_t flavor, + vm_page_info_t info, + mach_msg_type_number_t *count) +{ + kern_return_t kr; + + if (map == VM_MAP_NULL) { + return KERN_INVALID_ARGUMENT; + } + + kr = vm_map_page_info(map, address, flavor, info, count); + return kr; } /* map a (whole) upl into an address space */ @@ -1859,7 +2108,7 @@ kern_return_t vm_upl_map( vm_map_t map, upl_t upl, - vm_offset_t *dst_addr) + vm_address_t *dst_addr) { vm_map_offset_t map_addr; kern_return_t kr; @@ -1868,7 +2117,7 @@ vm_upl_map( return KERN_INVALID_ARGUMENT; kr = vm_map_enter_upl(map, upl, &map_addr); - *dst_addr = CAST_DOWN(vm_offset_t, map_addr); + *dst_addr = CAST_DOWN(vm_address_t, map_addr); return kr; } @@ -1893,11 +2142,12 @@ vm_map_get_upl( upl_t *upl, upl_page_info_array_t page_list, unsigned int *count, - int *flags, + upl_control_flags_t *flags, + vm_tag_t tag, int force_data_sync) { - int map_flags; - kern_return_t kr; + upl_control_flags_t map_flags; + kern_return_t kr; if (VM_MAP_NULL == map) return KERN_INVALID_ARGUMENT; @@ -1912,17 +2162,19 @@ vm_map_get_upl( upl, page_list, count, - &map_flags); + &map_flags, + tag); *flags = (map_flags & ~UPL_FORCE_DATA_SYNC); return kr; } - -__private_extern__ kern_return_t -mach_memory_entry_allocate( - vm_named_entry_t *user_entry_p, - ipc_port_t *user_handle_p); /* forward */ +#if CONFIG_EMBEDDED +extern int proc_selfpid(void); +extern char *proc_name_address(void *p); +int cs_executable_mem_entry = 0; +int log_executable_mem_entry = 0; +#endif /* CONFIG_EMBEDDED */ /* * mach_make_memory_entry_64 @@ -1932,7 +2184,6 @@ mach_memory_entry_allocate( * somewhere else. Rather than doing it all at once (and * without needing access to the other whole map). */ - kern_return_t mach_make_memory_entry_64( vm_map_t target_map, @@ -1941,6 +2192,32 @@ mach_make_memory_entry_64( vm_prot_t permission, ipc_port_t *object_handle, ipc_port_t parent_handle) +{ + if ((permission & MAP_MEM_FLAGS_MASK) & ~MAP_MEM_FLAGS_USER) { + /* + * Unknown flag: reject for forward compatibility. + */ + return KERN_INVALID_VALUE; + } + + return mach_make_memory_entry_internal(target_map, + size, + offset, + permission, + object_handle, + parent_handle); +} + +extern int pacified_purgeable_iokit; + +kern_return_t +mach_make_memory_entry_internal( + vm_map_t target_map, + memory_object_size_t *size, + memory_object_offset_t offset, + vm_prot_t permission, + ipc_port_t *object_handle, + ipc_port_t parent_handle) { vm_map_version_t version; vm_named_entry_t parent_entry; @@ -1951,10 +2228,10 @@ mach_make_memory_entry_64( /* needed for call to vm_map_lookup_locked */ boolean_t wired; + boolean_t iskernel; vm_object_offset_t obj_off; vm_prot_t prot; - vm_map_offset_t lo_offset, hi_offset; - vm_behavior_t behavior; + struct vm_object_fault_info fault_info; vm_object_t object; vm_object_t shadow_object; @@ -1963,22 +2240,27 @@ mach_make_memory_entry_64( vm_map_entry_t next_entry; vm_map_t local_map; vm_map_t original_map = target_map; - vm_map_size_t total_size; - vm_map_size_t map_size; - vm_map_offset_t map_offset; + vm_map_size_t total_size, map_size; + vm_map_offset_t map_start, map_end; vm_map_offset_t local_offset; vm_object_size_t mappable_size; + /* + * Stash the offset in the page for use by vm_map_enter_mem_object() + * in the VM_FLAGS_RETURN_DATA_ADDR/MAP_MEM_USE_DATA_ADDR case. + */ + vm_object_offset_t offset_in_page; + unsigned int access; vm_prot_t protections; + vm_prot_t original_protections, mask_protections; unsigned int wimg_mode; - boolean_t cache_attr = FALSE; - if (((permission & 0x00FF0000) & - ~(MAP_MEM_ONLY | - MAP_MEM_NAMED_CREATE | - MAP_MEM_PURGABLE | - MAP_MEM_NAMED_REUSE))) { + boolean_t force_shadow = FALSE; + boolean_t use_data_addr; + boolean_t use_4K_compat; + + if ((permission & MAP_MEM_FLAGS_MASK) & ~MAP_MEM_FLAGS_ALL) { /* * Unknown flag: reject for forward compatibility. */ @@ -1992,75 +2274,66 @@ mach_make_memory_entry_64( parent_entry = NULL; } - protections = permission & VM_PROT_ALL; + if (parent_entry && parent_entry->is_copy) { + return KERN_INVALID_ARGUMENT; + } + + original_protections = permission & VM_PROT_ALL; + protections = original_protections; + mask_protections = permission & VM_PROT_IS_MASK; access = GET_MAP_MEM(permission); + use_data_addr = ((permission & MAP_MEM_USE_DATA_ADDR) != 0); + use_4K_compat = ((permission & MAP_MEM_4K_DATA_ADDR) != 0); user_handle = IP_NULL; user_entry = NULL; - map_offset = vm_map_trunc_page(offset); - map_size = vm_map_round_page(*size); + map_start = vm_map_trunc_page(offset, PAGE_MASK); if (permission & MAP_MEM_ONLY) { boolean_t parent_is_object; - if (parent_entry == NULL) { + map_end = vm_map_round_page(offset + *size, PAGE_MASK); + map_size = map_end - map_start; + + if (use_data_addr || use_4K_compat || parent_entry == NULL) { return KERN_INVALID_ARGUMENT; } - parent_is_object = !(parent_entry->is_sub_map || parent_entry->is_pager); + parent_is_object = !parent_entry->is_sub_map; object = parent_entry->backing.object; if(parent_is_object && object != VM_OBJECT_NULL) wimg_mode = object->wimg_bits; else - wimg_mode = VM_WIMG_DEFAULT; + wimg_mode = VM_WIMG_USE_DEFAULT; if((access != GET_MAP_MEM(parent_entry->protection)) && !(parent_entry->protection & VM_PROT_WRITE)) { return KERN_INVALID_RIGHT; } - if(access == MAP_MEM_IO) { - SET_MAP_MEM(access, parent_entry->protection); - wimg_mode = VM_WIMG_IO; - } else if (access == MAP_MEM_COPYBACK) { - SET_MAP_MEM(access, parent_entry->protection); - wimg_mode = VM_WIMG_DEFAULT; - } else if (access == MAP_MEM_WTHRU) { - SET_MAP_MEM(access, parent_entry->protection); - wimg_mode = VM_WIMG_WTHRU; - } else if (access == MAP_MEM_WCOMB) { - SET_MAP_MEM(access, parent_entry->protection); - wimg_mode = VM_WIMG_WCOMB; - } - if(parent_is_object && object && + vm_prot_to_wimg(access, &wimg_mode); + if (access != MAP_MEM_NOOP) + SET_MAP_MEM(access, parent_entry->protection); + if (parent_is_object && object && (access != MAP_MEM_NOOP) && (!(object->nophyscache))) { - if(object->wimg_bits != wimg_mode) { - vm_page_t p; - if ((wimg_mode == VM_WIMG_IO) - || (wimg_mode == VM_WIMG_WCOMB)) - cache_attr = TRUE; - else - cache_attr = FALSE; - vm_object_lock(object); - vm_object_paging_wait(object, THREAD_UNINT); - object->wimg_bits = wimg_mode; - queue_iterate(&object->memq, - p, vm_page_t, listq) { - if (!p->fictitious) { - pmap_disconnect(p->phys_page); - if (cache_attr) - pmap_sync_page_attributes_phys(p->phys_page); - } - } - vm_object_unlock(object); + + if (object->wimg_bits != wimg_mode) { + vm_object_lock(object); + vm_object_change_wimg_mode(object, wimg_mode); + vm_object_unlock(object); } } if (object_handle) *object_handle = IP_NULL; return KERN_SUCCESS; - } + } else if (permission & MAP_MEM_NAMED_CREATE) { + map_end = vm_map_round_page(offset + *size, PAGE_MASK); + map_size = map_end - map_start; + + if (use_data_addr || use_4K_compat) { + return KERN_INVALID_ARGUMENT; + } - if(permission & MAP_MEM_NAMED_CREATE) { kr = mach_memory_entry_allocate(&user_entry, &user_handle); if (kr != KERN_SUCCESS) { return KERN_FAILURE; @@ -2069,9 +2342,9 @@ mach_make_memory_entry_64( /* * Force the creation of the VM object now. */ - if (map_size > (vm_map_size_t) VM_MAX_ADDRESS) { + if (map_size > (vm_map_size_t) ANON_MAX_SIZE) { /* - * LP64todo - for now, we can only allocate 4GB + * LP64todo - for now, we can only allocate 4GB-4096 * internal objects because the default pager can't * page bigger ones. Remove this when it can. */ @@ -2089,8 +2362,52 @@ mach_make_memory_entry_64( kr = KERN_INVALID_ARGUMENT; goto make_mem_done; } - object->purgable = VM_OBJECT_PURGABLE_NONVOLATILE; + object->purgable = VM_PURGABLE_NONVOLATILE; + if (permission & MAP_MEM_PURGABLE_KERNEL_ONLY) { + object->purgeable_only_by_kernel = TRUE; + } + assert(object->vo_purgeable_owner == NULL); + assert(object->resident_page_count == 0); + assert(object->wired_page_count == 0); + vm_object_lock(object); + if (pacified_purgeable_iokit) { + if (permission & MAP_MEM_LEDGER_TAG_NETWORK) { + vm_purgeable_nonvolatile_enqueue(object, + kernel_task); + } else { + vm_purgeable_nonvolatile_enqueue(object, + current_task()); + } + } else { + if (object->purgeable_only_by_kernel) { + vm_purgeable_nonvolatile_enqueue(object, + kernel_task); + } else { + vm_purgeable_nonvolatile_enqueue(object, + current_task()); + } + } + vm_object_unlock(object); + } + +#if CONFIG_SECLUDED_MEMORY + if (secluded_for_iokit && /* global boot-arg */ + ((permission & MAP_MEM_GRAB_SECLUDED) +#if 11 + /* XXX FBDP for my testing only */ + || (secluded_for_fbdp && map_size == 97550336) +#endif + )) { +#if 11 + if (!(permission & MAP_MEM_GRAB_SECLUDED) && + secluded_for_fbdp) { + printf("FBDP: object %p size %lld can grab secluded\n", object, (uint64_t) map_size); + } +#endif + object->can_grab_secluded = TRUE; + assert(!object->eligible_for_secluded); } +#endif /* CONFIG_SECLUDED_MEMORY */ /* * The VM object is brand new and nobody else knows about it, @@ -2098,18 +2415,11 @@ mach_make_memory_entry_64( */ wimg_mode = object->wimg_bits; - if (access == MAP_MEM_IO) { - wimg_mode = VM_WIMG_IO; - } else if (access == MAP_MEM_COPYBACK) { - wimg_mode = VM_WIMG_DEFAULT; - } else if (access == MAP_MEM_WTHRU) { - wimg_mode = VM_WIMG_WTHRU; - } else if (access == MAP_MEM_WCOMB) { - wimg_mode = VM_WIMG_WCOMB; - } - if (access != MAP_MEM_NOOP) { - object->wimg_bits = wimg_mode; - } + vm_prot_to_wimg(access, &wimg_mode); + if (access != MAP_MEM_NOOP) { + object->wimg_bits = wimg_mode; + } + /* the object has no pages, so no WIMG bits to update here */ /* @@ -2122,12 +2432,13 @@ mach_make_memory_entry_64( * shadow objects either... */ object->copy_strategy = MEMORY_OBJECT_COPY_NONE; + object->true_share = TRUE; user_entry->backing.object = object; user_entry->internal = TRUE; user_entry->is_sub_map = FALSE; - user_entry->is_pager = FALSE; user_entry->offset = 0; + user_entry->data_offset = 0; user_entry->protection = protections; SET_MAP_MEM(access, user_entry->protection); user_entry->size = map_size; @@ -2135,7 +2446,127 @@ mach_make_memory_entry_64( /* user_object pager and internal fields are not used */ /* when the object field is filled in. */ - *size = CAST_DOWN(vm_size_t, map_size); + *size = CAST_DOWN(vm_size_t, (user_entry->size - + user_entry->data_offset)); + *object_handle = user_handle; + return KERN_SUCCESS; + } + + if (permission & MAP_MEM_VM_COPY) { + vm_map_copy_t copy; + + if (target_map == VM_MAP_NULL) { + return KERN_INVALID_TASK; + } + + map_end = vm_map_round_page(offset + *size, PAGE_MASK); + map_size = map_end - map_start; + if (use_data_addr || use_4K_compat) { + offset_in_page = offset - map_start; + if (use_4K_compat) + offset_in_page &= ~((signed)(0xFFF)); + } else { + offset_in_page = 0; + } + + kr = vm_map_copyin_internal(target_map, + map_start, + map_size, + VM_MAP_COPYIN_ENTRY_LIST, + ©); + if (kr != KERN_SUCCESS) { + return kr; + } + + kr = mach_memory_entry_allocate(&user_entry, &user_handle); + if (kr != KERN_SUCCESS) { + vm_map_copy_discard(copy); + return KERN_FAILURE; + } + + user_entry->backing.copy = copy; + user_entry->internal = FALSE; + user_entry->is_sub_map = FALSE; + user_entry->is_copy = TRUE; + user_entry->offset = 0; + user_entry->protection = protections; + user_entry->size = map_size; + user_entry->data_offset = offset_in_page; + + *size = CAST_DOWN(vm_size_t, (user_entry->size - + user_entry->data_offset)); + *object_handle = user_handle; + return KERN_SUCCESS; + } + + if (permission & MAP_MEM_VM_SHARE) { + vm_map_copy_t copy; + vm_prot_t cur_prot, max_prot; + + if (target_map == VM_MAP_NULL) { + return KERN_INVALID_TASK; + } + + map_end = vm_map_round_page(offset + *size, PAGE_MASK); + map_size = map_end - map_start; + if (use_data_addr || use_4K_compat) { + offset_in_page = offset - map_start; + if (use_4K_compat) + offset_in_page &= ~((signed)(0xFFF)); + } else { + offset_in_page = 0; + } + + cur_prot = VM_PROT_ALL; + kr = vm_map_copy_extract(target_map, + map_start, + map_size, + ©, + &cur_prot, + &max_prot); + if (kr != KERN_SUCCESS) { + return kr; + } + + if (mask_protections) { + /* + * We just want as much of "original_protections" + * as we can get out of the actual "cur_prot". + */ + protections &= cur_prot; + if (protections == VM_PROT_NONE) { + /* no access at all: fail */ + vm_map_copy_discard(copy); + return KERN_PROTECTION_FAILURE; + } + } else { + /* + * We want exactly "original_protections" + * out of "cur_prot". + */ + if ((cur_prot & protections) != protections) { + vm_map_copy_discard(copy); + return KERN_PROTECTION_FAILURE; + } + } + + kr = mach_memory_entry_allocate(&user_entry, &user_handle); + if (kr != KERN_SUCCESS) { + vm_map_copy_discard(copy); + return KERN_FAILURE; + } + + user_entry->backing.copy = copy; + user_entry->internal = FALSE; + user_entry->is_sub_map = FALSE; + user_entry->is_copy = TRUE; + user_entry->offset = 0; + user_entry->protection = protections; + user_entry->size = map_size; + user_entry->data_offset = offset_in_page; + + *size = CAST_DOWN(vm_size_t, (user_entry->size - + user_entry->data_offset)); *object_handle = user_handle; return KERN_SUCCESS; } @@ -2143,26 +2574,96 @@ mach_make_memory_entry_64( if (parent_entry == NULL || (permission & MAP_MEM_NAMED_REUSE)) { + map_end = vm_map_round_page(offset + *size, PAGE_MASK); + map_size = map_end - map_start; + if (use_data_addr || use_4K_compat) { + offset_in_page = offset - map_start; + if (use_4K_compat) + offset_in_page &= ~((signed)(0xFFF)); + } else { + offset_in_page = 0; + } + /* Create a named object based on address range within the task map */ /* Go find the object at given address */ + if (target_map == VM_MAP_NULL) { + return KERN_INVALID_TASK; + } + redo_lookup: + protections = original_protections; vm_map_lock_read(target_map); /* get the object associated with the target address */ /* note we check the permission of the range against */ /* that requested by the caller */ - kr = vm_map_lookup_locked(&target_map, map_offset, - protections, &version, - &object, &obj_off, &prot, &wired, &behavior, - &lo_offset, &hi_offset, &real_map); + kr = vm_map_lookup_locked(&target_map, map_start, + protections | mask_protections, + OBJECT_LOCK_EXCLUSIVE, &version, + &object, &obj_off, &prot, &wired, + &fault_info, + &real_map); if (kr != KERN_SUCCESS) { vm_map_unlock_read(target_map); goto make_mem_done; } + if (mask_protections) { + /* + * The caller asked us to use the "protections" as + * a mask, so restrict "protections" to what this + * mapping actually allows. + */ + protections &= prot; + } +#if CONFIG_EMBEDDED + /* + * Wiring would copy the pages to a shadow object. + * The shadow object would not be code-signed so + * attempting to execute code from these copied pages + * would trigger a code-signing violation. + */ + if (prot & VM_PROT_EXECUTE) { + if (log_executable_mem_entry) { + void *bsd_info; + bsd_info = current_task()->bsd_info; + printf("pid %d[%s] making memory entry out of " + "executable range from 0x%llx to 0x%llx:" + "might cause code-signing issues " + "later\n", + proc_selfpid(), + (bsd_info != NULL + ? proc_name_address(bsd_info) + : "?"), + (uint64_t) map_start, + (uint64_t) map_end); + } + DTRACE_VM2(cs_executable_mem_entry, + uint64_t, (uint64_t)map_start, + uint64_t, (uint64_t)map_end); + cs_executable_mem_entry++; + +#if 11 + /* + * We don't know how the memory entry will be used. + * It might never get wired and might not cause any + * trouble, so let's not reject this request... + */ +#else /* 11 */ + kr = KERN_PROTECTION_FAILURE; + vm_object_unlock(object); + vm_map_unlock_read(target_map); + if(real_map != target_map) + vm_map_unlock_read(real_map); + goto make_mem_done; +#endif /* 11 */ + + } +#endif /* CONFIG_EMBEDDED */ + if (((prot & protections) != protections) - || (object == kernel_object)) { + || (object == kernel_object)) { kr = KERN_INVALID_RIGHT; vm_object_unlock(object); vm_map_unlock_read(target_map); @@ -2193,7 +2694,7 @@ redo_lookup: vm_object_unlock(object); local_map = original_map; - local_offset = map_offset; + local_offset = map_start; if(target_map != local_map) { vm_map_unlock_read(target_map); if(real_map != target_map) @@ -2213,8 +2714,9 @@ redo_lookup: object = VM_OBJECT_NULL; goto make_mem_done; } + iskernel = (local_map->pmap == kernel_pmap); if(!(map_entry->is_sub_map)) { - if(map_entry->object.vm_object != object) { + if (VME_OBJECT(map_entry) != object) { kr = KERN_INVALID_ARGUMENT; vm_map_unlock_read(target_map); if(real_map != target_map) @@ -2227,14 +2729,14 @@ redo_lookup: } else { vm_map_t tmap; tmap = local_map; - local_map = map_entry->object.sub_map; + local_map = VME_SUBMAP(map_entry); vm_map_lock_read(local_map); vm_map_unlock_read(tmap); target_map = local_map; real_map = local_map; local_offset = local_offset - map_entry->vme_start; - local_offset += map_entry->offset; + local_offset += VME_OFFSET(map_entry); } } @@ -2246,6 +2748,14 @@ redo_lookup: /* JMM - The check below should be reworked instead. */ object->true_share = TRUE; } + if (mask_protections) { + /* + * The caller asked us to use the "protections" as + * a mask, so restrict "protections" to what this + * mapping actually allows. + */ + protections &= map_entry->max_protection; + } if(((map_entry->max_protection) & protections) != protections) { kr = KERN_INVALID_RIGHT; vm_object_unlock(object); @@ -2257,7 +2767,7 @@ redo_lookup: goto make_mem_done; } - mappable_size = hi_offset - obj_off; + mappable_size = fault_info.hi_offset - obj_off; total_size = map_entry->vme_end - map_entry->vme_start; if(map_size > mappable_size) { /* try to extend mappable size if the entries */ @@ -2267,13 +2777,27 @@ redo_lookup: /* lets see if the next map entry is still */ /* pointing at this object and is contiguous */ while(map_size > mappable_size) { - if((next_entry->object.vm_object == object) && - (next_entry->vme_start == - next_entry->vme_prev->vme_end) && - (next_entry->offset == - next_entry->vme_prev->offset + - (next_entry->vme_prev->vme_end - - next_entry->vme_prev->vme_start))) { + if ((VME_OBJECT(next_entry) == object) && + (next_entry->vme_start == + next_entry->vme_prev->vme_end) && + (VME_OFFSET(next_entry) == + (VME_OFFSET(next_entry->vme_prev) + + (next_entry->vme_prev->vme_end - + next_entry->vme_prev->vme_start)))) { + if (mask_protections) { + /* + * The caller asked us to use + * the "protections" as a mask, + * so restrict "protections" to + * what this mapping actually + * allows. + */ + protections &= next_entry->max_protection; + } + if ((next_entry->wired_count) && + (map_entry->wired_count == 0)) { + break; + } if(((next_entry->max_protection) & protections) != protections) { break; @@ -2293,16 +2817,59 @@ redo_lookup: } } - if(object->internal) { + /* vm_map_entry_should_cow_for_true_share() checks for malloc tags, + * never true in kernel */ + if (!iskernel && vm_map_entry_should_cow_for_true_share(map_entry) && + object->vo_size > map_size && + map_size != 0) { + /* + * Set up the targeted range for copy-on-write to + * limit the impact of "true_share"/"copy_delay" to + * that range instead of the entire VM object... + */ + + vm_object_unlock(object); + if (vm_map_lock_read_to_write(target_map)) { + vm_object_deallocate(object); + target_map = original_map; + goto redo_lookup; + } + + vm_map_clip_start(target_map, + map_entry, + vm_map_trunc_page(map_start, + VM_MAP_PAGE_MASK(target_map))); + vm_map_clip_end(target_map, + map_entry, + (vm_map_round_page(map_end, + VM_MAP_PAGE_MASK(target_map)))); + force_shadow = TRUE; + + if ((map_entry->vme_end - offset) < map_size) { + map_size = map_entry->vme_end - map_start; + } + total_size = map_entry->vme_end - map_entry->vme_start; + + vm_map_lock_write_to_read(target_map); + vm_object_lock(object); + } + + if (object->internal) { /* vm_map_lookup_locked will create a shadow if */ /* needs_copy is set but does not check for the */ /* other two conditions shown. It is important to */ /* set up an object which will not be pulled from */ /* under us. */ - if ((map_entry->needs_copy || object->shadowed || - (object->size > total_size)) - && !object->true_share) { + if (force_shadow || + ((map_entry->needs_copy || + object->shadowed || + (object->vo_size > total_size && + (VME_OFFSET(map_entry) != 0 || + object->vo_size > + vm_map_round_page(total_size, + VM_MAP_PAGE_MASK(target_map))))) + && !object->true_share)) { /* * We have to unlock the VM object before * trying to upgrade the VM map lock, to @@ -2327,7 +2894,9 @@ redo_lookup: target_map = original_map; goto redo_lookup; } +#if 00 vm_object_lock(object); +#endif /* * JMM - We need to avoid coming here when the object @@ -2337,35 +2906,49 @@ redo_lookup: */ /* create a shadow object */ - vm_object_shadow(&map_entry->object.vm_object, - &map_entry->offset, total_size); - shadow_object = map_entry->object.vm_object; + VME_OBJECT_SHADOW(map_entry, total_size); + shadow_object = VME_OBJECT(map_entry); +#if 00 vm_object_unlock(object); +#endif + + prot = map_entry->protection & ~VM_PROT_WRITE; + + if (override_nx(target_map, + VME_ALIAS(map_entry)) + && prot) + prot |= VM_PROT_EXECUTE; vm_object_pmap_protect( - object, map_entry->offset, + object, VME_OFFSET(map_entry), total_size, ((map_entry->is_shared - || target_map->mapped) + || target_map->mapped_in_other_pmaps) ? PMAP_NULL : target_map->pmap), map_entry->vme_start, - map_entry->protection & ~VM_PROT_WRITE); + prot); total_size -= (map_entry->vme_end - map_entry->vme_start); next_entry = map_entry->vme_next; map_entry->needs_copy = FALSE; + + vm_object_lock(shadow_object); while (total_size) { - if(next_entry->object.vm_object == object) { - shadow_object->ref_count++; - vm_object_res_reference(shadow_object); - next_entry->object.vm_object - = shadow_object; + assert((next_entry->wired_count == 0) || + (map_entry->wired_count)); + + if (VME_OBJECT(next_entry) == object) { + vm_object_reference_locked(shadow_object); + VME_OBJECT_SET(next_entry, + shadow_object); vm_object_deallocate(object); - next_entry->offset - = next_entry->vme_prev->offset + - (next_entry->vme_prev->vme_end - - next_entry->vme_prev->vme_start); + VME_OFFSET_SET( + next_entry, + (VME_OFFSET(next_entry->vme_prev) + + (next_entry->vme_prev->vme_end + - next_entry->vme_prev->vme_start))); + next_entry->use_pmap = TRUE; next_entry->needs_copy = FALSE; } else { panic("mach_make_memory_entry_64:" @@ -2385,12 +2968,10 @@ redo_lookup: vm_object_deallocate(object); /* extra ref */ object = shadow_object; - obj_off = (local_offset - map_entry->vme_start) - + map_entry->offset; + obj_off = ((local_offset - map_entry->vme_start) + + VME_OFFSET(map_entry)); vm_map_lock_write_to_read(target_map); - vm_object_lock(object); - } } @@ -2405,18 +2986,26 @@ redo_lookup: /* against delayed copy, etc. is mostly defensive. */ wimg_mode = object->wimg_bits; - if(!(object->nophyscache)) { - if(access == MAP_MEM_IO) { - wimg_mode = VM_WIMG_IO; - } else if (access == MAP_MEM_COPYBACK) { - wimg_mode = VM_WIMG_USE_DEFAULT; - } else if (access == MAP_MEM_WTHRU) { - wimg_mode = VM_WIMG_WTHRU; - } else if (access == MAP_MEM_WCOMB) { - wimg_mode = VM_WIMG_WCOMB; - } + if(!(object->nophyscache)) + vm_prot_to_wimg(access, &wimg_mode); + +#if VM_OBJECT_TRACKING_OP_TRUESHARE + if (!object->true_share && + vm_object_tracking_inited) { + void *bt[VM_OBJECT_TRACKING_BTDEPTH]; + int num = 0; + + num = OSBacktrace(bt, + VM_OBJECT_TRACKING_BTDEPTH); + btlog_add_entry(vm_object_tracking_btlog, + object, + VM_OBJECT_TRACKING_OP_TRUESHARE, + bt, + num); } +#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */ + vm_object_lock_assert_exclusive(object); object->true_share = TRUE; if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; @@ -2432,27 +3021,8 @@ redo_lookup: if(real_map != target_map) vm_map_unlock_read(real_map); - if(object->wimg_bits != wimg_mode) { - vm_page_t p; - - vm_object_paging_wait(object, THREAD_UNINT); - - if ((wimg_mode == VM_WIMG_IO) - || (wimg_mode == VM_WIMG_WCOMB)) - cache_attr = TRUE; - else - cache_attr = FALSE; - - queue_iterate(&object->memq, - p, vm_page_t, listq) { - if (!p->fictitious) { - pmap_disconnect(p->phys_page); - if (cache_attr) - pmap_sync_page_attributes_phys(p->phys_page); - } - } - object->wimg_bits = wimg_mode; - } + if (object->wimg_bits != wimg_mode) + vm_object_change_wimg_mode(object, wimg_mode); /* the size of mapped entry that overlaps with our region */ /* which is targeted for share. */ @@ -2473,10 +3043,13 @@ redo_lookup: parent_entry->backing.object == object && parent_entry->internal == object->internal && parent_entry->is_sub_map == FALSE && - parent_entry->is_pager == FALSE && parent_entry->offset == obj_off && parent_entry->protection == protections && - parent_entry->size == map_size) { + parent_entry->size == map_size && + ((!(use_data_addr || use_4K_compat) && + (parent_entry->data_offset == 0)) || + ((use_data_addr || use_4K_compat) && + (parent_entry->data_offset == offset_in_page)))) { /* * We have a match: re-use "parent_entry". */ @@ -2486,6 +3059,10 @@ redo_lookup: /* parent_entry->ref_count++; XXX ? */ /* Get an extra send-right on handle */ ipc_port_copy_send(parent_handle); + + *size = CAST_DOWN(vm_size_t, + (parent_entry->size - + parent_entry->data_offset)); *object_handle = parent_handle; return KERN_SUCCESS; } else { @@ -2507,30 +3084,69 @@ redo_lookup: user_entry->backing.object = object; user_entry->internal = object->internal; user_entry->is_sub_map = FALSE; - user_entry->is_pager = FALSE; user_entry->offset = obj_off; - user_entry->protection = permission; + user_entry->data_offset = offset_in_page; + user_entry->protection = protections; + SET_MAP_MEM(GET_MAP_MEM(permission), user_entry->protection); user_entry->size = map_size; /* user_object pager and internal fields are not used */ /* when the object field is filled in. */ - *size = CAST_DOWN(vm_size_t, map_size); + *size = CAST_DOWN(vm_size_t, (user_entry->size - + user_entry->data_offset)); *object_handle = user_handle; return KERN_SUCCESS; } else { /* The new object will be base on an existing named object */ - if (parent_entry == NULL) { kr = KERN_INVALID_ARGUMENT; goto make_mem_done; } - if((offset + map_size) > parent_entry->size) { - kr = KERN_INVALID_ARGUMENT; - goto make_mem_done; + + if (use_data_addr || use_4K_compat) { + /* + * submaps and pagers should only be accessible from within + * the kernel, which shouldn't use the data address flag, so can fail here. + */ + if (parent_entry->is_sub_map) { + panic("Shouldn't be using data address with a parent entry that is a submap."); + } + /* + * Account for offset to data in parent entry and + * compute our own offset to data. + */ + if((offset + *size + parent_entry->data_offset) > parent_entry->size) { + kr = KERN_INVALID_ARGUMENT; + goto make_mem_done; + } + + map_start = vm_map_trunc_page(offset + parent_entry->data_offset, PAGE_MASK); + offset_in_page = (offset + parent_entry->data_offset) - map_start; + if (use_4K_compat) + offset_in_page &= ~((signed)(0xFFF)); + map_end = vm_map_round_page(offset + parent_entry->data_offset + *size, PAGE_MASK); + map_size = map_end - map_start; + } else { + map_end = vm_map_round_page(offset + *size, PAGE_MASK); + map_size = map_end - map_start; + offset_in_page = 0; + + if((offset + map_size) > parent_entry->size) { + kr = KERN_INVALID_ARGUMENT; + goto make_mem_done; + } } + if (mask_protections) { + /* + * The caller asked us to use the "protections" as + * a mask, so restrict "protections" to what this + * mapping actually allows. + */ + protections &= parent_entry->protection; + } if((protections & parent_entry->protection) != protections) { kr = KERN_PROTECTION_FAILURE; goto make_mem_done; @@ -2543,9 +3159,10 @@ redo_lookup: } user_entry->size = map_size; - user_entry->offset = parent_entry->offset + map_offset; + user_entry->offset = parent_entry->offset + map_start; + user_entry->data_offset = offset_in_page; user_entry->is_sub_map = parent_entry->is_sub_map; - user_entry->is_pager = parent_entry->is_pager; + user_entry->is_copy = parent_entry->is_copy; user_entry->internal = parent_entry->internal; user_entry->protection = protections; @@ -2558,33 +3175,48 @@ redo_lookup: vm_map_lock(user_entry->backing.map); user_entry->backing.map->ref_count++; vm_map_unlock(user_entry->backing.map); - } - else if (parent_entry->is_pager) { - user_entry->backing.pager = parent_entry->backing.pager; - /* JMM - don't we need a reference here? */ } else { object = parent_entry->backing.object; assert(object != VM_OBJECT_NULL); user_entry->backing.object = object; /* we now point to this object, hold on */ - vm_object_reference(object); vm_object_lock(object); + vm_object_reference_locked(object); +#if VM_OBJECT_TRACKING_OP_TRUESHARE + if (!object->true_share && + vm_object_tracking_inited) { + void *bt[VM_OBJECT_TRACKING_BTDEPTH]; + int num = 0; + + num = OSBacktrace(bt, + VM_OBJECT_TRACKING_BTDEPTH); + btlog_add_entry(vm_object_tracking_btlog, + object, + VM_OBJECT_TRACKING_OP_TRUESHARE, + bt, + num); + } +#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */ + object->true_share = TRUE; if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; vm_object_unlock(object); } - *size = CAST_DOWN(vm_size_t, map_size); + *size = CAST_DOWN(vm_size_t, (user_entry->size - + user_entry->data_offset)); *object_handle = user_handle; return KERN_SUCCESS; } make_mem_done: if (user_handle != IP_NULL) { - ipc_port_dealloc_kernel(user_handle); - } - if (user_entry != NULL) { - kfree(user_entry, sizeof *user_entry); + /* + * Releasing "user_handle" causes the kernel object + * associated with it ("user_entry" here) to also be + * released and freed. + */ + mach_memory_entry_port_release(user_handle); } return kr; } @@ -2598,10 +3230,10 @@ _mach_make_memory_entry( ipc_port_t *object_handle, ipc_port_t parent_entry) { - memory_object_offset_t mo_size; + memory_object_size_t mo_size; kern_return_t kr; - mo_size = (memory_object_offset_t)*size; + mo_size = (memory_object_size_t)*size; kr = mach_make_memory_entry_64(target_map, &mo_size, (memory_object_offset_t)offset, permission, object_handle, parent_entry); @@ -2618,10 +3250,10 @@ mach_make_memory_entry( ipc_port_t *object_handle, ipc_port_t parent_entry) { - memory_object_offset_t mo_size; + memory_object_size_t mo_size; kern_return_t kr; - mo_size = (memory_object_offset_t)*size; + mo_size = (memory_object_size_t)*size; kr = mach_make_memory_entry_64(target_map, &mo_size, (memory_object_offset_t)offset, permission, object_handle, parent_entry); @@ -2653,6 +3285,20 @@ task_wire( return(KERN_SUCCESS); } +kern_return_t +vm_map_exec_lockdown( + vm_map_t map) +{ + if (map == VM_MAP_NULL) + return(KERN_INVALID_ARGUMENT); + + vm_map_lock(map); + map->map_disallow_new_exec = TRUE; + vm_map_unlock(map); + + return(KERN_SUCCESS); +} + __private_extern__ kern_return_t mach_memory_entry_allocate( vm_named_entry_t *user_entry_p, @@ -2691,11 +3337,14 @@ mach_memory_entry_allocate( ipc_port_nsrequest(user_handle, 1, user_handle, &previous); /* nsrequest unlocks user_handle */ - user_entry->backing.pager = NULL; + user_entry->backing.object = NULL; user_entry->is_sub_map = FALSE; - user_entry->is_pager = FALSE; - user_entry->size = 0; + user_entry->is_copy = FALSE; user_entry->internal = FALSE; + user_entry->size = 0; + user_entry->offset = 0; + user_entry->data_offset = 0; + user_entry->protection = VM_PROT_NONE; user_entry->ref_count = 1; ipc_kobject_set(user_handle, (ipc_kobject_t) user_entry, @@ -2712,8 +3361,6 @@ mach_memory_entry_allocate( * * Create a named entry backed by the provided pager. * - * JMM - we need to hold a reference on the pager - - * and release it when the named entry is destroyed. */ kern_return_t mach_memory_object_memory_entry_64( @@ -2727,29 +3374,47 @@ mach_memory_object_memory_entry_64( unsigned int access; vm_named_entry_t user_entry; ipc_port_t user_handle; + vm_object_t object; if (host == HOST_NULL) return(KERN_INVALID_HOST); + if (pager == MEMORY_OBJECT_NULL && internal) { + object = vm_object_allocate(size); + if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) { + object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; + } + } else { + object = memory_object_to_vm_object(pager); + if (object != VM_OBJECT_NULL) { + vm_object_reference(object); + } + } + if (object == VM_OBJECT_NULL) { + return KERN_INVALID_ARGUMENT; + } + if (mach_memory_entry_allocate(&user_entry, &user_handle) != KERN_SUCCESS) { + vm_object_deallocate(object); return KERN_FAILURE; } - user_entry->backing.pager = pager; user_entry->size = size; user_entry->offset = 0; user_entry->protection = permission & VM_PROT_ALL; access = GET_MAP_MEM(permission); SET_MAP_MEM(access, user_entry->protection); - user_entry->internal = internal; user_entry->is_sub_map = FALSE; - user_entry->is_pager = TRUE; assert(user_entry->ref_count == 1); + user_entry->backing.object = object; + user_entry->internal = object->internal; + assert(object->internal == internal); + *entry_handle = user_handle; return KERN_SUCCESS; -} +} kern_return_t mach_memory_object_memory_entry( @@ -2770,6 +3435,20 @@ mach_memory_entry_purgable_control( ipc_port_t entry_port, vm_purgable_t control, int *state) +{ + if (control == VM_PURGABLE_SET_STATE_FROM_KERNEL) { + /* not allowed from user-space */ + return KERN_INVALID_ARGUMENT; + } + + return memory_entry_purgeable_control_internal(entry_port, control, state); +} + +kern_return_t +memory_entry_purgeable_control_internal( + ipc_port_t entry_port, + vm_purgable_t control, + int *state) { kern_return_t kr; vm_named_entry_t mem_entry; @@ -2779,12 +3458,23 @@ mach_memory_entry_purgable_control( ip_kotype(entry_port) != IKOT_NAMED_ENTRY) { return KERN_INVALID_ARGUMENT; } + if (control != VM_PURGABLE_SET_STATE && + control != VM_PURGABLE_GET_STATE && + control != VM_PURGABLE_SET_STATE_FROM_KERNEL) + return(KERN_INVALID_ARGUMENT); + + if ((control == VM_PURGABLE_SET_STATE || + control == VM_PURGABLE_SET_STATE_FROM_KERNEL) && + (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) || + ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) + return(KERN_INVALID_ARGUMENT); mem_entry = (vm_named_entry_t) entry_port->ip_kobject; named_entry_lock(mem_entry); - if (mem_entry->is_sub_map || mem_entry->is_pager) { + if (mem_entry->is_sub_map || + mem_entry->is_copy) { named_entry_unlock(mem_entry); return KERN_INVALID_ARGUMENT; } @@ -2798,7 +3488,7 @@ mach_memory_entry_purgable_control( vm_object_lock(object); /* check that named entry covers entire object ? */ - if (mem_entry->offset != 0 || object->size != mem_entry->size) { + if (mem_entry->offset != 0 || object->vo_size != mem_entry->size) { vm_object_unlock(object); named_entry_unlock(mem_entry); return KERN_INVALID_ARGUMENT; @@ -2813,6 +3503,53 @@ mach_memory_entry_purgable_control( return kr; } +kern_return_t +mach_memory_entry_get_page_counts( + ipc_port_t entry_port, + unsigned int *resident_page_count, + unsigned int *dirty_page_count) +{ + kern_return_t kr; + vm_named_entry_t mem_entry; + vm_object_t object; + vm_object_offset_t offset; + vm_object_size_t size; + + if (entry_port == IP_NULL || + ip_kotype(entry_port) != IKOT_NAMED_ENTRY) { + return KERN_INVALID_ARGUMENT; + } + + mem_entry = (vm_named_entry_t) entry_port->ip_kobject; + + named_entry_lock(mem_entry); + + if (mem_entry->is_sub_map || + mem_entry->is_copy) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + object = mem_entry->backing.object; + if (object == VM_OBJECT_NULL) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + vm_object_lock(object); + + offset = mem_entry->offset; + size = mem_entry->size; + + named_entry_unlock(mem_entry); + + kr = vm_object_get_page_counts(object, offset, size, resident_page_count, dirty_page_count); + + vm_object_unlock(object); + + return kr; +} + /* * mach_memory_entry_port_release: * @@ -2849,52 +3586,130 @@ mach_destroy_memory_entry( assert(ip_kotype(port) == IKOT_NAMED_ENTRY); #endif /* MACH_ASSERT */ named_entry = (vm_named_entry_t)port->ip_kobject; - mutex_lock(&(named_entry)->Lock); + + named_entry_lock(named_entry); named_entry->ref_count -= 1; + if(named_entry->ref_count == 0) { if (named_entry->is_sub_map) { vm_map_deallocate(named_entry->backing.map); - } else if (!named_entry->is_pager) { - /* release the memory object we've been pointing to */ + } else if (named_entry->is_copy) { + vm_map_copy_discard(named_entry->backing.copy); + } else { + /* release the VM object we've been pointing to */ vm_object_deallocate(named_entry->backing.object); - } /* else JMM - need to drop reference on pager in that case */ + } - mutex_unlock(&(named_entry)->Lock); + named_entry_unlock(named_entry); + named_entry_lock_destroy(named_entry); kfree((void *) port->ip_kobject, sizeof (struct vm_named_entry)); } else - mutex_unlock(&(named_entry)->Lock); + named_entry_unlock(named_entry); } - +/* Allow manipulation of individual page state. This is actually part of */ +/* the UPL regimen but takes place on the memory entry rather than on a UPL */ kern_return_t -set_dp_control_port( - host_priv_t host_priv, - ipc_port_t control_port) +mach_memory_entry_page_op( + ipc_port_t entry_port, + vm_object_offset_t offset, + int ops, + ppnum_t *phys_entry, + int *flags) { - if (host_priv == HOST_PRIV_NULL) - return (KERN_INVALID_HOST); + vm_named_entry_t mem_entry; + vm_object_t object; + kern_return_t kr; - if (IP_VALID(dynamic_pager_control_port)) - ipc_port_release_send(dynamic_pager_control_port); + if (entry_port == IP_NULL || + ip_kotype(entry_port) != IKOT_NAMED_ENTRY) { + return KERN_INVALID_ARGUMENT; + } - dynamic_pager_control_port = control_port; - return KERN_SUCCESS; + mem_entry = (vm_named_entry_t) entry_port->ip_kobject; + + named_entry_lock(mem_entry); + + if (mem_entry->is_sub_map || + mem_entry->is_copy) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + object = mem_entry->backing.object; + if (object == VM_OBJECT_NULL) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + vm_object_reference(object); + named_entry_unlock(mem_entry); + + kr = vm_object_page_op(object, offset, ops, phys_entry, flags); + + vm_object_deallocate(object); + + return kr; } +/* + * mach_memory_entry_range_op offers performance enhancement over + * mach_memory_entry_page_op for page_op functions which do not require page + * level state to be returned from the call. Page_op was created to provide + * a low-cost alternative to page manipulation via UPLs when only a single + * page was involved. The range_op call establishes the ability in the _op + * family of functions to work on multiple pages where the lack of page level + * state handling allows the caller to avoid the overhead of the upl structures. + */ + kern_return_t -get_dp_control_port( - host_priv_t host_priv, - ipc_port_t *control_port) +mach_memory_entry_range_op( + ipc_port_t entry_port, + vm_object_offset_t offset_beg, + vm_object_offset_t offset_end, + int ops, + int *range) { - if (host_priv == HOST_PRIV_NULL) - return (KERN_INVALID_HOST); + vm_named_entry_t mem_entry; + vm_object_t object; + kern_return_t kr; - *control_port = ipc_port_copy_send(dynamic_pager_control_port); - return KERN_SUCCESS; - + if (entry_port == IP_NULL || + ip_kotype(entry_port) != IKOT_NAMED_ENTRY) { + return KERN_INVALID_ARGUMENT; + } + + mem_entry = (vm_named_entry_t) entry_port->ip_kobject; + + named_entry_lock(mem_entry); + + if (mem_entry->is_sub_map || + mem_entry->is_copy) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + object = mem_entry->backing.object; + if (object == VM_OBJECT_NULL) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + vm_object_reference(object); + named_entry_unlock(mem_entry); + + kr = vm_object_range_op(object, + offset_beg, + offset_end, + ops, + (uint32_t *) range); + + vm_object_deallocate(object); + + return kr; } /* ******* Temporary Internal calls to UPL for BSD ***** */ @@ -2979,6 +3794,10 @@ kernel_upl_commit_range( if (flags & UPL_COMMIT_FREE_ON_EMPTY) flags |= UPL_COMMIT_NOTIFY_EMPTY; + if (flags & UPL_COMMIT_KERNEL_ONLY_FLAGS) { + return KERN_INVALID_ARGUMENT; + } + kr = upl_commit_range(upl, offset, size, flags, pl, count, &finished); if ((flags & UPL_COMMIT_NOTIFY_EMPTY) && finished) @@ -3045,7 +3864,10 @@ vm_region_object_create( /* Create a named object based on a submap of specified size */ new_map = vm_map_create(PMAP_NULL, VM_MAP_MIN_ADDRESS, - vm_map_round_page(size), TRUE); + vm_map_round_page(size, + VM_MAP_PAGE_MASK(target_map)), + TRUE); + vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(target_map)); user_entry->backing.map = new_map; user_entry->internal = TRUE; @@ -3075,46 +3897,49 @@ vm_map_get_phys_page( vm_map_entry_t entry; ppnum_t phys_page = 0; - map_offset = vm_map_trunc_page(addr); + map_offset = vm_map_trunc_page(addr, PAGE_MASK); vm_map_lock(map); while (vm_map_lookup_entry(map, map_offset, &entry)) { - if (entry->object.vm_object == VM_OBJECT_NULL) { + if (VME_OBJECT(entry) == VM_OBJECT_NULL) { vm_map_unlock(map); return (ppnum_t) 0; } if (entry->is_sub_map) { vm_map_t old_map; - vm_map_lock(entry->object.sub_map); + vm_map_lock(VME_SUBMAP(entry)); old_map = map; - map = entry->object.sub_map; - map_offset = entry->offset + (map_offset - entry->vme_start); + map = VME_SUBMAP(entry); + map_offset = (VME_OFFSET(entry) + + (map_offset - entry->vme_start)); vm_map_unlock(old_map); continue; } - if (entry->object.vm_object->phys_contiguous) { + if (VME_OBJECT(entry)->phys_contiguous) { /* These are not standard pageable memory mappings */ /* If they are not present in the object they will */ /* have to be picked up from the pager through the */ /* fault mechanism. */ - if(entry->object.vm_object->shadow_offset == 0) { + if (VME_OBJECT(entry)->vo_shadow_offset == 0) { /* need to call vm_fault */ vm_map_unlock(map); vm_fault(map, map_offset, VM_PROT_NONE, - FALSE, THREAD_UNINT, NULL, 0); + FALSE /* change_wiring */, VM_KERN_MEMORY_NONE, + THREAD_UNINT, NULL, 0); vm_map_lock(map); continue; } - offset = entry->offset + (map_offset - entry->vme_start); + offset = (VME_OFFSET(entry) + + (map_offset - entry->vme_start)); phys_page = (ppnum_t) - ((entry->object.vm_object->shadow_offset - + offset) >> 12); + ((VME_OBJECT(entry)->vo_shadow_offset + + offset) >> PAGE_SHIFT); break; } - offset = entry->offset + (map_offset - entry->vme_start); - object = entry->object.vm_object; + offset = (VME_OFFSET(entry) + (map_offset - entry->vme_start)); + object = VME_OBJECT(entry); vm_object_lock(object); while (TRUE) { vm_page_t dst_page = vm_page_lookup(object,offset); @@ -3123,7 +3948,7 @@ vm_map_get_phys_page( vm_object_t old_object; vm_object_lock(object->shadow); old_object = object; - offset = offset + object->shadow_offset; + offset = offset + object->vo_shadow_offset; object = object->shadow; vm_object_unlock(old_object); } else { @@ -3131,7 +3956,7 @@ vm_map_get_phys_page( break; } } else { - phys_page = (ppnum_t)(dst_page->phys_page); + phys_page = (ppnum_t)(VM_PAGE_GET_PHYS_PAGE(dst_page)); vm_object_unlock(object); break; } @@ -3144,12 +3969,11 @@ vm_map_get_phys_page( return phys_page; } - - +#if 0 kern_return_t kernel_object_iopl_request( /* forward */ vm_named_entry_t named_entry, memory_object_offset_t offset, - vm_size_t *upl_size, + upl_size_t *upl_size, upl_t *upl_ptr, upl_page_info_array_t user_page_list, unsigned int *page_list_count, @@ -3159,7 +3983,7 @@ kern_return_t kernel_object_iopl_request( vm_named_entry_t named_entry, memory_object_offset_t offset, - vm_size_t *upl_size, + upl_size_t *upl_size, upl_t *upl_ptr, upl_page_info_array_t user_page_list, unsigned int *page_list_count, @@ -3184,7 +4008,9 @@ kernel_object_iopl_request( if(*upl_size == 0) { if(offset >= named_entry->size) return(KERN_INVALID_RIGHT); - *upl_size = named_entry->size - offset; + *upl_size = (upl_size_t) (named_entry->size - offset); + if (*upl_size != named_entry->size - offset) + return KERN_INVALID_ARGUMENT; } if(caller_flags & UPL_COPYOUT_FROM) { if((named_entry->protection & VM_PROT_READ) @@ -3205,57 +4031,25 @@ kernel_object_iopl_request( /* offset from beginning of named entry offset in object */ offset = offset + named_entry->offset; - if(named_entry->is_sub_map) - return (KERN_INVALID_ARGUMENT); + if (named_entry->is_sub_map || + named_entry->is_copy) + return KERN_INVALID_ARGUMENT; named_entry_lock(named_entry); - if (named_entry->is_pager) { - object = vm_object_enter(named_entry->backing.pager, - named_entry->offset + named_entry->size, - named_entry->internal, - FALSE, - FALSE); - if (object == VM_OBJECT_NULL) { - named_entry_unlock(named_entry); - return(KERN_INVALID_OBJECT); - } - - /* JMM - drop reference on the pager here? */ - - /* create an extra reference for the object */ - vm_object_lock(object); - vm_object_reference_locked(object); - named_entry->backing.object = object; - named_entry->is_pager = FALSE; - named_entry_unlock(named_entry); - - /* wait for object (if any) to be ready */ - if (!named_entry->internal) { - while (!object->pager_ready) { - vm_object_wait(object, - VM_OBJECT_EVENT_PAGER_READY, - THREAD_UNINT); - vm_object_lock(object); - } - } - vm_object_unlock(object); - - } else { - /* This is the case where we are going to operate */ - /* an an already known object. If the object is */ - /* not ready it is internal. An external */ - /* object cannot be mapped until it is ready */ - /* we can therefore avoid the ready check */ - /* in this case. */ - object = named_entry->backing.object; - vm_object_reference(object); - named_entry_unlock(named_entry); - } + /* This is the case where we are going to operate */ + /* on an already known object. If the object is */ + /* not ready it is internal. An external */ + /* object cannot be mapped until it is ready */ + /* we can therefore avoid the ready check */ + /* in this case. */ + object = named_entry->backing.object; + vm_object_reference(object); + named_entry_unlock(named_entry); if (!object->private) { - if (*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE)) - *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE); + if (*upl_size > MAX_UPL_TRANSFER_BYTES) + *upl_size = MAX_UPL_TRANSFER_BYTES; if (object->phys_contiguous) { *flags = UPL_PHYS_CONTIG; } else { @@ -3271,7 +4065,115 @@ kernel_object_iopl_request( upl_ptr, user_page_list, page_list_count, - caller_flags); + (upl_control_flags_t)(unsigned int)caller_flags); vm_object_deallocate(object); return ret; } +#endif + +/* + * These symbols are looked up at runtime by vmware, VirtualBox, + * despite not being exported in the symbol sets. + */ + +#if defined(__x86_64__) + +kern_return_t +mach_vm_map( + vm_map_t target_map, + mach_vm_offset_t *address, + mach_vm_size_t initial_size, + mach_vm_offset_t mask, + int flags, + ipc_port_t port, + vm_object_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance); + +kern_return_t +mach_vm_remap( + vm_map_t target_map, + mach_vm_offset_t *address, + mach_vm_size_t size, + mach_vm_offset_t mask, + int flags, + vm_map_t src_map, + mach_vm_offset_t memory_address, + boolean_t copy, + vm_prot_t *cur_protection, + vm_prot_t *max_protection, + vm_inherit_t inheritance); + +kern_return_t +mach_vm_map( + vm_map_t target_map, + mach_vm_offset_t *address, + mach_vm_size_t initial_size, + mach_vm_offset_t mask, + int flags, + ipc_port_t port, + vm_object_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + return (mach_vm_map_external(target_map, address, initial_size, mask, flags, port, + offset, copy, cur_protection, max_protection, inheritance)); +} + +kern_return_t +mach_vm_remap( + vm_map_t target_map, + mach_vm_offset_t *address, + mach_vm_size_t size, + mach_vm_offset_t mask, + int flags, + vm_map_t src_map, + mach_vm_offset_t memory_address, + boolean_t copy, + vm_prot_t *cur_protection, + vm_prot_t *max_protection, + vm_inherit_t inheritance) +{ + return (mach_vm_remap_external(target_map, address, size, mask, flags, src_map, memory_address, + copy, cur_protection, max_protection, inheritance)); +} + +kern_return_t +vm_map( + vm_map_t target_map, + vm_offset_t *address, + vm_size_t size, + vm_offset_t mask, + int flags, + ipc_port_t port, + vm_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance); + +kern_return_t +vm_map( + vm_map_t target_map, + vm_offset_t *address, + vm_size_t size, + vm_offset_t mask, + int flags, + ipc_port_t port, + vm_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + vm_tag_t tag; + + VM_GET_FLAGS_ALIAS(flags, tag); + return (vm_map_kernel(target_map, address, size, mask, flags, tag, port, offset, copy, cur_protection, max_protection, inheritance)); +} + +#endif /* __x86_64__ */