X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/9bccf70c0258c7cac2dcb80011b2a964d884c552..813fb2f63a553c957e917ede5f119b021d6ce391:/osfmk/vm/vm_user.c diff --git a/osfmk/vm/vm_user.c b/osfmk/vm/vm_user.c index ede0d2591..886dbb6ff 100644 --- a/osfmk/vm/vm_user.c +++ b/osfmk/vm/vm_user.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2007 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ @@ -56,31 +62,61 @@ * User-exported virtual memory functions. */ +/* + * There are three implementations of the "XXX_allocate" functionality in + * the kernel: mach_vm_allocate (for any task on the platform), vm_allocate + * (for a task with the same address space size, especially the current task), + * and vm32_vm_allocate (for the specific case of a 32-bit task). vm_allocate + * in the kernel should only be used on the kernel_task. vm32_vm_allocate only + * makes sense on platforms where a user task can either be 32 or 64, or the kernel + * task can be 32 or 64. mach_vm_allocate makes sense everywhere, and is preferred + * for new code. + * + * The entrypoints into the kernel are more complex. All platforms support a + * mach_vm_allocate-style API (subsystem 4800) which operates with the largest + * size types for the platform. On platforms that only support U32/K32, + * subsystem 4800 is all you need. On platforms that support both U32 and U64, + * subsystem 3800 is used disambiguate the size of parameters, and they will + * always be 32-bit and call into the vm32_vm_allocate APIs. On non-U32/K32 platforms, + * the MIG glue should never call into vm_allocate directly, because the calling + * task and kernel_task are unlikely to use the same size parameters + * + * New VM call implementations should be added here and to mach_vm.defs + * (subsystem 4800), and use mach_vm_* "wide" types. + */ + +#include + #include #include #include #include /* to get vm_address_t */ #include #include /* to get pointer_t */ +#include #include #include #include -#include #include +#include -#include -#include +#include +#include +#include #include +#include #include #include +#include #include #include #include #include #include - - +#include +#include +#include vm_size_t upl_offset_to_pagelist = 0; @@ -88,21 +124,29 @@ vm_size_t upl_offset_to_pagelist = 0; #include #endif /* VM_CPM */ +lck_grp_t dynamic_pager_control_port_lock_group; +decl_lck_mtx_data(, dynamic_pager_control_port_lock); ipc_port_t dynamic_pager_control_port=NULL; /* - * vm_allocate allocates "zero fill" memory in the specfied + * mach_vm_allocate allocates "zero fill" memory in the specfied * map. */ kern_return_t -vm_allocate( - register vm_map_t map, - register vm_offset_t *addr, - register vm_size_t size, +mach_vm_allocate( + vm_map_t map, + mach_vm_offset_t *addr, + mach_vm_size_t size, int flags) { + vm_map_offset_t map_addr; + vm_map_size_t map_size; kern_return_t result; - boolean_t anywhere = VM_FLAGS_ANYWHERE & flags; + boolean_t anywhere; + + /* filter out any kernel-only flags */ + if (flags & ~VM_FLAGS_USER_ALLOCATE) + return KERN_INVALID_ARGUMENT; if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); @@ -111,20 +155,104 @@ vm_allocate( return(KERN_SUCCESS); } - if (anywhere) - *addr = vm_map_min(map); - else - *addr = trunc_page(*addr); - size = round_page(size); + anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); + if (anywhere) { + /* + * No specific address requested, so start candidate address + * search at the minimum address in the map. However, if that + * minimum is 0, bump it up by PAGE_SIZE. We want to limit + * allocations of PAGEZERO to explicit requests since its + * normal use is to catch dereferences of NULL and many + * applications also treat pointers with a value of 0 as + * special and suddenly having address 0 contain useable + * memory would tend to confuse those applications. + */ + map_addr = vm_map_min(map); + if (map_addr == 0) + map_addr += VM_MAP_PAGE_SIZE(map); + } else + map_addr = vm_map_trunc_page(*addr, + VM_MAP_PAGE_MASK(map)); + map_size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(map)); + if (map_size == 0) { + return(KERN_INVALID_ARGUMENT); + } + + result = vm_map_enter( + map, + &map_addr, + map_size, + (vm_map_offset_t)0, + flags, + VM_OBJECT_NULL, + (vm_object_offset_t)0, + FALSE, + VM_PROT_DEFAULT, + VM_PROT_ALL, + VM_INHERIT_DEFAULT); + + *addr = map_addr; + return(result); +} + +/* + * vm_allocate + * Legacy routine that allocates "zero fill" memory in the specfied + * map (which is limited to the same size as the kernel). + */ +kern_return_t +vm_allocate( + vm_map_t map, + vm_offset_t *addr, + vm_size_t size, + int flags) +{ + vm_map_offset_t map_addr; + vm_map_size_t map_size; + kern_return_t result; + boolean_t anywhere; + + /* filter out any kernel-only flags */ + if (flags & ~VM_FLAGS_USER_ALLOCATE) + return KERN_INVALID_ARGUMENT; + + if (map == VM_MAP_NULL) + return(KERN_INVALID_ARGUMENT); if (size == 0) { + *addr = 0; + return(KERN_SUCCESS); + } + + anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); + if (anywhere) { + /* + * No specific address requested, so start candidate address + * search at the minimum address in the map. However, if that + * minimum is 0, bump it up by PAGE_SIZE. We want to limit + * allocations of PAGEZERO to explicit requests since its + * normal use is to catch dereferences of NULL and many + * applications also treat pointers with a value of 0 as + * special and suddenly having address 0 contain useable + * memory would tend to confuse those applications. + */ + map_addr = vm_map_min(map); + if (map_addr == 0) + map_addr += VM_MAP_PAGE_SIZE(map); + } else + map_addr = vm_map_trunc_page(*addr, + VM_MAP_PAGE_MASK(map)); + map_size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(map)); + if (map_size == 0) { return(KERN_INVALID_ARGUMENT); } result = vm_map_enter( map, - addr, - size, - (vm_offset_t)0, + &map_addr, + map_size, + (vm_map_offset_t)0, flags, VM_OBJECT_NULL, (vm_object_offset_t)0, @@ -133,98 +261,293 @@ vm_allocate( VM_PROT_ALL, VM_INHERIT_DEFAULT); + *addr = CAST_DOWN(vm_offset_t, map_addr); return(result); } /* - * vm_deallocate deallocates the specified range of addresses in the + * mach_vm_deallocate - + * deallocates the specified range of addresses in the * specified address map. */ kern_return_t +mach_vm_deallocate( + vm_map_t map, + mach_vm_offset_t start, + mach_vm_size_t size) +{ + if ((map == VM_MAP_NULL) || (start + size < start)) + return(KERN_INVALID_ARGUMENT); + + if (size == (mach_vm_offset_t) 0) + return(KERN_SUCCESS); + + return(vm_map_remove(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + VM_MAP_NO_FLAGS)); +} + +/* + * vm_deallocate - + * deallocates the specified range of addresses in the + * specified address map (limited to addresses the same + * size as the kernel). + */ +kern_return_t vm_deallocate( - register vm_map_t map, + vm_map_t map, vm_offset_t start, vm_size_t size) { - if (map == VM_MAP_NULL) + if ((map == VM_MAP_NULL) || (start + size < start)) return(KERN_INVALID_ARGUMENT); if (size == (vm_offset_t) 0) return(KERN_SUCCESS); - return(vm_map_remove(map, trunc_page(start), - round_page(start+size), VM_MAP_NO_FLAGS)); + return(vm_map_remove(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + VM_MAP_NO_FLAGS)); } /* - * vm_inherit sets the inheritance of the specified range in the + * mach_vm_inherit - + * Sets the inheritance of the specified range in the * specified map. */ kern_return_t +mach_vm_inherit( + vm_map_t map, + mach_vm_offset_t start, + mach_vm_size_t size, + vm_inherit_t new_inheritance) +{ + if ((map == VM_MAP_NULL) || (start + size < start) || + (new_inheritance > VM_INHERIT_LAST_VALID)) + return(KERN_INVALID_ARGUMENT); + + if (size == 0) + return KERN_SUCCESS; + + return(vm_map_inherit(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + new_inheritance)); +} + +/* + * vm_inherit - + * Sets the inheritance of the specified range in the + * specified map (range limited to addresses + */ +kern_return_t vm_inherit( - register vm_map_t map, + vm_map_t map, vm_offset_t start, vm_size_t size, vm_inherit_t new_inheritance) { - if (map == VM_MAP_NULL) - return(KERN_INVALID_ARGUMENT); - - if (new_inheritance > VM_INHERIT_LAST_VALID) + if ((map == VM_MAP_NULL) || (start + size < start) || + (new_inheritance > VM_INHERIT_LAST_VALID)) return(KERN_INVALID_ARGUMENT); + if (size == 0) + return KERN_SUCCESS; + return(vm_map_inherit(map, - trunc_page(start), - round_page(start+size), + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), new_inheritance)); } /* - * vm_protect sets the protection of the specified range in the + * mach_vm_protect - + * Sets the protection of the specified range in the * specified map. */ +kern_return_t +mach_vm_protect( + vm_map_t map, + mach_vm_offset_t start, + mach_vm_size_t size, + boolean_t set_maximum, + vm_prot_t new_protection) +{ + if ((map == VM_MAP_NULL) || (start + size < start) || + (new_protection & ~(VM_PROT_ALL | VM_PROT_COPY))) + return(KERN_INVALID_ARGUMENT); + + if (size == 0) + return KERN_SUCCESS; + + return(vm_map_protect(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + new_protection, + set_maximum)); +} + +/* + * vm_protect - + * Sets the protection of the specified range in the + * specified map. Addressability of the range limited + * to the same size as the kernel. + */ + kern_return_t vm_protect( - register vm_map_t map, + vm_map_t map, vm_offset_t start, vm_size_t size, boolean_t set_maximum, vm_prot_t new_protection) { - if ((map == VM_MAP_NULL) || - (new_protection & ~(VM_PROT_ALL | VM_PROT_COPY))) + if ((map == VM_MAP_NULL) || (start + size < start) || + (new_protection & ~(VM_PROT_ALL | VM_PROT_COPY))) return(KERN_INVALID_ARGUMENT); + if (size == 0) + return KERN_SUCCESS; + return(vm_map_protect(map, - trunc_page(start), - round_page(start+size), + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), new_protection, set_maximum)); } /* + * mach_vm_machine_attributes - * Handle machine-specific attributes for a mapping, such * as cachability, migrability, etc. */ kern_return_t +mach_vm_machine_attribute( + vm_map_t map, + mach_vm_address_t addr, + mach_vm_size_t size, + vm_machine_attribute_t attribute, + vm_machine_attribute_val_t* value) /* IN/OUT */ +{ + if ((map == VM_MAP_NULL) || (addr + size < addr)) + return(KERN_INVALID_ARGUMENT); + + if (size == 0) + return KERN_SUCCESS; + + return vm_map_machine_attribute( + map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(addr+size, + VM_MAP_PAGE_MASK(map)), + attribute, + value); +} + +/* + * vm_machine_attribute - + * Handle machine-specific attributes for a mapping, such + * as cachability, migrability, etc. Limited addressability + * (same range limits as for the native kernel map). + */ +kern_return_t vm_machine_attribute( vm_map_t map, - vm_address_t address, + vm_address_t addr, vm_size_t size, vm_machine_attribute_t attribute, vm_machine_attribute_val_t* value) /* IN/OUT */ { + if ((map == VM_MAP_NULL) || (addr + size < addr)) + return(KERN_INVALID_ARGUMENT); + + if (size == 0) + return KERN_SUCCESS; + + return vm_map_machine_attribute( + map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(addr+size, + VM_MAP_PAGE_MASK(map)), + attribute, + value); +} + +/* + * mach_vm_read - + * Read/copy a range from one address space and return it to the caller. + * + * It is assumed that the address for the returned memory is selected by + * the IPC implementation as part of receiving the reply to this call. + * If IPC isn't used, the caller must deal with the vm_map_copy_t object + * that gets returned. + * + * JMM - because of mach_msg_type_number_t, this call is limited to a + * single 4GB region at this time. + * + */ +kern_return_t +mach_vm_read( + vm_map_t map, + mach_vm_address_t addr, + mach_vm_size_t size, + pointer_t *data, + mach_msg_type_number_t *data_size) +{ + kern_return_t error; + vm_map_copy_t ipc_address; + if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); - return vm_map_machine_attribute(map, address, size, attribute, value); + if ((mach_msg_type_number_t) size != size) + return KERN_INVALID_ARGUMENT; + + error = vm_map_copyin(map, + (vm_map_address_t)addr, + (vm_map_size_t)size, + FALSE, /* src_destroy */ + &ipc_address); + + if (KERN_SUCCESS == error) { + *data = (pointer_t) ipc_address; + *data_size = (mach_msg_type_number_t) size; + assert(*data_size == size); + } + return(error); } +/* + * vm_read - + * Read/copy a range from one address space and return it to the caller. + * Limited addressability (same range limits as for the native kernel map). + * + * It is assumed that the address for the returned memory is selected by + * the IPC implementation as part of receiving the reply to this call. + * If IPC isn't used, the caller must deal with the vm_map_copy_t object + * that gets returned. + */ kern_return_t vm_read( vm_map_t map, - vm_address_t address, + vm_address_t addr, vm_size_t size, pointer_t *data, mach_msg_type_number_t *data_size) @@ -235,397 +558,419 @@ vm_read( if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); - if ((error = vm_map_copyin(map, - address, - size, - FALSE, /* src_destroy */ - &ipc_address)) == KERN_SUCCESS) { + if (size > (unsigned)(mach_msg_type_number_t) -1) { + /* + * The kernel could handle a 64-bit "size" value, but + * it could not return the size of the data in "*data_size" + * without overflowing. + * Let's reject this "size" as invalid. + */ + return KERN_INVALID_ARGUMENT; + } + + error = vm_map_copyin(map, + (vm_map_address_t)addr, + (vm_map_size_t)size, + FALSE, /* src_destroy */ + &ipc_address); + + if (KERN_SUCCESS == error) { *data = (pointer_t) ipc_address; - *data_size = size; + *data_size = (mach_msg_type_number_t) size; + assert(*data_size == size); } return(error); } +/* + * mach_vm_read_list - + * Read/copy a list of address ranges from specified map. + * + * MIG does not know how to deal with a returned array of + * vm_map_copy_t structures, so we have to do the copyout + * manually here. + */ kern_return_t -vm_read_list( - vm_map_t map, - vm_read_entry_t data_list, - mach_msg_type_number_t count) +mach_vm_read_list( + vm_map_t map, + mach_vm_read_entry_t data_list, + natural_t count) { mach_msg_type_number_t i; kern_return_t error; - vm_map_copy_t ipc_address; + vm_map_copy_t copy; - if (map == VM_MAP_NULL) + if (map == VM_MAP_NULL || + count > VM_MAP_ENTRY_MAX) return(KERN_INVALID_ARGUMENT); + error = KERN_SUCCESS; for(i=0; imap, - &(data_list[i].address), - (vm_map_copy_t) ipc_address); - if(error != KERN_SUCCESS) { - data_list[i].address = (vm_address_t)0; - data_list[i].size = (vm_size_t)0; - break; + vm_map_address_t map_addr; + vm_map_size_t map_size; + + map_addr = (vm_map_address_t)(data_list[i].address); + map_size = (vm_map_size_t)(data_list[i].size); + + if(map_size != 0) { + error = vm_map_copyin(map, + map_addr, + map_size, + FALSE, /* src_destroy */ + ©); + if (KERN_SUCCESS == error) { + error = vm_map_copyout( + current_task()->map, + &map_addr, + copy); + if (KERN_SUCCESS == error) { + data_list[i].address = map_addr; + continue; + } + vm_map_copy_discard(copy); } } + data_list[i].address = (mach_vm_address_t)0; + data_list[i].size = (mach_vm_size_t)0; } return(error); } -/* - * This routine reads from the specified map and overwrites part of the current - * activation's map. In making an assumption that the current thread is local, - * it is no longer cluster-safe without a fully supportive local proxy thread/ - * task (but we don't support cluster's anymore so this is moot). +/* + * vm_read_list - + * Read/copy a list of address ranges from specified map. + * + * MIG does not know how to deal with a returned array of + * vm_map_copy_t structures, so we have to do the copyout + * manually here. + * + * The source and destination ranges are limited to those + * that can be described with a vm_address_t (i.e. same + * size map as the kernel). + * + * JMM - If the result of the copyout is an address range + * that cannot be described with a vm_address_t (i.e. the + * caller had a larger address space but used this call + * anyway), it will result in a truncated address being + * returned (and a likely confused caller). */ -#define VM_OVERWRITE_SMALL 512 - kern_return_t -vm_read_overwrite( - vm_map_t map, - vm_address_t address, - vm_size_t size, - vm_address_t data, - vm_size_t *data_size) -{ - struct { - long align; - char buf[VM_OVERWRITE_SMALL]; - } inbuf; - vm_map_t oldmap; - kern_return_t error = KERN_SUCCESS; +vm_read_list( + vm_map_t map, + vm_read_entry_t data_list, + natural_t count) +{ + mach_msg_type_number_t i; + kern_return_t error; vm_map_copy_t copy; - if (map == VM_MAP_NULL) + if (map == VM_MAP_NULL || + count > VM_MAP_ENTRY_MAX) return(KERN_INVALID_ARGUMENT); - if (size <= VM_OVERWRITE_SMALL) { - if(vm_map_read_user(map, (vm_offset_t)address, - (vm_offset_t)&inbuf, size)) { - error = KERN_INVALID_ADDRESS; - } else { - if(vm_map_write_user(current_map(), - (vm_offset_t)&inbuf, (vm_offset_t)data, size)) - error = KERN_INVALID_ADDRESS; - } - } - else { - if ((error = vm_map_copyin(map, - address, - size, + error = KERN_SUCCESS; + for(i=0; imap, - data, - copy, - FALSE)) == KERN_SUCCESS) { - } - else { + ©); + if (KERN_SUCCESS == error) { + error = vm_map_copyout(current_task()->map, + &map_addr, + copy); + if (KERN_SUCCESS == error) { + data_list[i].address = + CAST_DOWN(vm_offset_t, map_addr); + continue; + } vm_map_copy_discard(copy); } } + data_list[i].address = (mach_vm_address_t)0; + data_list[i].size = (mach_vm_size_t)0; } - *data_size = size; return(error); } +/* + * mach_vm_read_overwrite - + * Overwrite a range of the current map with data from the specified + * map/address range. + * + * In making an assumption that the current thread is local, it is + * no longer cluster-safe without a fully supportive local proxy + * thread/task (but we don't support cluster's anymore so this is moot). + */ - - -/*ARGSUSED*/ kern_return_t -vm_write( +mach_vm_read_overwrite( vm_map_t map, - vm_address_t address, - vm_offset_t data, - mach_msg_type_number_t size) + mach_vm_address_t address, + mach_vm_size_t size, + mach_vm_address_t data, + mach_vm_size_t *data_size) { + kern_return_t error; + vm_map_copy_t copy; + if (map == VM_MAP_NULL) - return KERN_INVALID_ARGUMENT; + return(KERN_INVALID_ARGUMENT); - return vm_map_copy_overwrite(map, address, (vm_map_copy_t) data, - FALSE /* interruptible XXX */); + error = vm_map_copyin(map, (vm_map_address_t)address, + (vm_map_size_t)size, FALSE, ©); + + if (KERN_SUCCESS == error) { + error = vm_map_copy_overwrite(current_thread()->map, + (vm_map_address_t)data, + copy, FALSE); + if (KERN_SUCCESS == error) { + *data_size = size; + return error; + } + vm_map_copy_discard(copy); + } + return(error); } +/* + * vm_read_overwrite - + * Overwrite a range of the current map with data from the specified + * map/address range. + * + * This routine adds the additional limitation that the source and + * destination ranges must be describable with vm_address_t values + * (i.e. the same size address spaces as the kernel, or at least the + * the ranges are in that first portion of the respective address + * spaces). + */ + kern_return_t -vm_copy( +vm_read_overwrite( vm_map_t map, - vm_address_t source_address, + vm_address_t address, vm_size_t size, - vm_address_t dest_address) + vm_address_t data, + vm_size_t *data_size) { - vm_map_copy_t copy; - kern_return_t kr; + kern_return_t error; + vm_map_copy_t copy; if (map == VM_MAP_NULL) - return KERN_INVALID_ARGUMENT; + return(KERN_INVALID_ARGUMENT); - kr = vm_map_copyin(map, source_address, size, - FALSE, ©); - if (kr != KERN_SUCCESS) - return kr; + error = vm_map_copyin(map, (vm_map_address_t)address, + (vm_map_size_t)size, FALSE, ©); - kr = vm_map_copy_overwrite(map, dest_address, copy, - FALSE /* interruptible XXX */); - if (kr != KERN_SUCCESS) { + if (KERN_SUCCESS == error) { + error = vm_map_copy_overwrite(current_thread()->map, + (vm_map_address_t)data, + copy, FALSE); + if (KERN_SUCCESS == error) { + *data_size = size; + return error; + } vm_map_copy_discard(copy); - return kr; } - - return KERN_SUCCESS; + return(error); } + /* - * Routine: vm_map + * mach_vm_write - + * Overwrite the specified address range with the data provided + * (from the current map). */ kern_return_t -vm_map_64( - vm_map_t target_map, - vm_offset_t *address, - vm_size_t initial_size, - vm_offset_t mask, - int flags, - ipc_port_t port, - vm_object_offset_t offset, - boolean_t copy, - vm_prot_t cur_protection, - vm_prot_t max_protection, - vm_inherit_t inheritance) +mach_vm_write( + vm_map_t map, + mach_vm_address_t address, + pointer_t data, + __unused mach_msg_type_number_t size) { - register - vm_object_t object; - vm_prot_t prot; - vm_object_size_t size = (vm_object_size_t)initial_size; - kern_return_t result; + if (map == VM_MAP_NULL) + return KERN_INVALID_ARGUMENT; - /* - * Check arguments for validity - */ - if ((target_map == VM_MAP_NULL) || - (cur_protection & ~VM_PROT_ALL) || - (max_protection & ~VM_PROT_ALL) || - (inheritance > VM_INHERIT_LAST_VALID) || - size == 0) - return(KERN_INVALID_ARGUMENT); + return vm_map_copy_overwrite(map, (vm_map_address_t)address, + (vm_map_copy_t) data, FALSE /* interruptible XXX */); +} - /* - * Find the vm object (if any) corresponding to this port. - */ - if (!IP_VALID(port)) { - object = VM_OBJECT_NULL; - offset = 0; - copy = FALSE; - } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) { - vm_named_entry_t named_entry; - - named_entry = (vm_named_entry_t)port->ip_kobject; - /* a few checks to make sure user is obeying rules */ - if(size == 0) { - if(offset >= named_entry->size) - return(KERN_INVALID_RIGHT); - size = named_entry->size - offset; - } - if((named_entry->protection & max_protection) != max_protection) - return(KERN_INVALID_RIGHT); - if((named_entry->protection & cur_protection) != cur_protection) - return(KERN_INVALID_RIGHT); - if(named_entry->size < (offset + size)) - return(KERN_INVALID_ARGUMENT); +/* + * vm_write - + * Overwrite the specified address range with the data provided + * (from the current map). + * + * The addressability of the range of addresses to overwrite is + * limited bu the use of a vm_address_t (same size as kernel map). + * Either the target map is also small, or the range is in the + * low addresses within it. + */ +kern_return_t +vm_write( + vm_map_t map, + vm_address_t address, + pointer_t data, + __unused mach_msg_type_number_t size) +{ + if (map == VM_MAP_NULL) + return KERN_INVALID_ARGUMENT; - /* the callers parameter offset is defined to be the */ - /* offset from beginning of named entry offset in object */ - offset = offset + named_entry->offset; - - named_entry_lock(named_entry); - if(named_entry->is_sub_map) { - vm_map_entry_t map_entry; + return vm_map_copy_overwrite(map, (vm_map_address_t)address, + (vm_map_copy_t) data, FALSE /* interruptible XXX */); +} - named_entry_unlock(named_entry); - *address = trunc_page(*address); - size = round_page(size); - vm_object_reference(vm_submap_object); - if ((result = vm_map_enter(target_map, - address, size, mask, flags, - vm_submap_object, 0, - FALSE, - cur_protection, max_protection, inheritance - )) != KERN_SUCCESS) { - vm_object_deallocate(vm_submap_object); - } else { - char alias; - - VM_GET_FLAGS_ALIAS(flags, alias); - if ((alias == VM_MEMORY_SHARED_PMAP) && - !copy) { - vm_map_submap(target_map, *address, - (*address) + size, - named_entry->backing.map, - (vm_offset_t)offset, TRUE); - } else { - vm_map_submap(target_map, *address, - (*address) + size, - named_entry->backing.map, - (vm_offset_t)offset, FALSE); - } - if(copy) { - if(vm_map_lookup_entry( - target_map, *address, &map_entry)) { - map_entry->needs_copy = TRUE; - } - } - } - return(result); - - } else if(named_entry->object) { - /* This is the case where we are going to map */ - /* an already mapped object. If the object is */ - /* not ready it is internal. An external */ - /* object cannot be mapped until it is ready */ - /* we can therefore avoid the ready check */ - /* in this case. */ - named_entry_unlock(named_entry); - vm_object_reference(named_entry->object); - object = named_entry->object; - } else { - object = vm_object_enter(named_entry->backing.pager, - named_entry->size, - named_entry->internal, - FALSE, - FALSE); - if (object == VM_OBJECT_NULL) { - named_entry_unlock(named_entry); - return(KERN_INVALID_OBJECT); - } - object->true_share = TRUE; - named_entry->object = object; - named_entry_unlock(named_entry); - /* create an extra reference for the named entry */ - vm_object_reference(named_entry->object); - /* wait for object (if any) to be ready */ - if (object != VM_OBJECT_NULL) { - vm_object_lock(object); - while (!object->pager_ready) { - vm_object_wait(object, - VM_OBJECT_EVENT_PAGER_READY, - THREAD_UNINT); - vm_object_lock(object); - } - vm_object_unlock(object); - } - } - } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) { - /* - * JMM - This is temporary until we unify named entries - * and raw memory objects. - * - * Detected fake ip_kotype for a memory object. In - * this case, the port isn't really a port at all, but - * instead is just a raw memory object. - */ - - if ((object = vm_object_enter((memory_object_t)port, - size, FALSE, FALSE, FALSE)) - == VM_OBJECT_NULL) - return(KERN_INVALID_OBJECT); +/* + * mach_vm_copy - + * Overwrite one range of the specified map with the contents of + * another range within that same map (i.e. both address ranges + * are "over there"). + */ +kern_return_t +mach_vm_copy( + vm_map_t map, + mach_vm_address_t source_address, + mach_vm_size_t size, + mach_vm_address_t dest_address) +{ + vm_map_copy_t copy; + kern_return_t kr; - /* wait for object (if any) to be ready */ - if (object != VM_OBJECT_NULL) { - if(object == kernel_object) { - printf("Warning: Attempt to map kernel object" - " by a non-private kernel entity\n"); - return(KERN_INVALID_OBJECT); - } - vm_object_lock(object); - while (!object->pager_ready) { - vm_object_wait(object, - VM_OBJECT_EVENT_PAGER_READY, - THREAD_UNINT); - vm_object_lock(object); - } - vm_object_unlock(object); - } - } else { - return (KERN_INVALID_OBJECT); + if (map == VM_MAP_NULL) + return KERN_INVALID_ARGUMENT; + + kr = vm_map_copyin(map, (vm_map_address_t)source_address, + (vm_map_size_t)size, FALSE, ©); + + if (KERN_SUCCESS == kr) { + kr = vm_map_copy_overwrite(map, + (vm_map_address_t)dest_address, + copy, FALSE /* interruptible XXX */); + + if (KERN_SUCCESS != kr) + vm_map_copy_discard(copy); } + return kr; +} + +kern_return_t +vm_copy( + vm_map_t map, + vm_address_t source_address, + vm_size_t size, + vm_address_t dest_address) +{ + vm_map_copy_t copy; + kern_return_t kr; + + if (map == VM_MAP_NULL) + return KERN_INVALID_ARGUMENT; - *address = trunc_page(*address); - size = round_page(size); + kr = vm_map_copyin(map, (vm_map_address_t)source_address, + (vm_map_size_t)size, FALSE, ©); - /* - * Perform the copy if requested - */ + if (KERN_SUCCESS == kr) { + kr = vm_map_copy_overwrite(map, + (vm_map_address_t)dest_address, + copy, FALSE /* interruptible XXX */); - if (copy) { - vm_object_t new_object; - vm_object_offset_t new_offset; + if (KERN_SUCCESS != kr) + vm_map_copy_discard(copy); + } + return kr; +} - result = vm_object_copy_strategically(object, offset, size, - &new_object, &new_offset, - ©); +/* + * mach_vm_map - + * Map some range of an object into an address space. + * + * The object can be one of several types of objects: + * NULL - anonymous memory + * a named entry - a range within another address space + * or a range within a memory object + * a whole memory object + * + */ +kern_return_t +mach_vm_map( + vm_map_t target_map, + mach_vm_offset_t *address, + mach_vm_size_t initial_size, + mach_vm_offset_t mask, + int flags, + ipc_port_t port, + vm_object_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + kern_return_t kr; + vm_map_offset_t vmmaddr; + vmmaddr = (vm_map_offset_t) *address; - if (result == KERN_MEMORY_RESTART_COPY) { - boolean_t success; - boolean_t src_needs_copy; + /* filter out any kernel-only flags */ + if (flags & ~VM_FLAGS_USER_MAP) + return KERN_INVALID_ARGUMENT; - /* - * XXX - * We currently ignore src_needs_copy. - * This really is the issue of how to make - * MEMORY_OBJECT_COPY_SYMMETRIC safe for - * non-kernel users to use. Solution forthcoming. - * In the meantime, since we don't allow non-kernel - * memory managers to specify symmetric copy, - * we won't run into problems here. - */ - new_object = object; - new_offset = offset; - success = vm_object_copy_quickly(&new_object, - new_offset, size, - &src_needs_copy, - ©); - assert(success); - result = KERN_SUCCESS; - } - /* - * Throw away the reference to the - * original object, as it won't be mapped. - */ + kr = vm_map_enter_mem_object(target_map, + &vmmaddr, + initial_size, + mask, + flags, + port, + offset, + copy, + cur_protection, + max_protection, + inheritance); + + *address = vmmaddr; + return kr; +} - vm_object_deallocate(object); - if (result != KERN_SUCCESS) - return (result); +/* legacy interface */ +kern_return_t +vm_map_64( + vm_map_t target_map, + vm_offset_t *address, + vm_size_t size, + vm_offset_t mask, + int flags, + ipc_port_t port, + vm_object_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + mach_vm_address_t map_addr; + mach_vm_size_t map_size; + mach_vm_offset_t map_mask; + kern_return_t kr; - object = new_object; - offset = new_offset; - } + map_addr = (mach_vm_address_t)*address; + map_size = (mach_vm_size_t)size; + map_mask = (mach_vm_offset_t)mask; - if ((result = vm_map_enter(target_map, - address, size, mask, flags, - object, offset, - copy, - cur_protection, max_protection, inheritance - )) != KERN_SUCCESS) - vm_object_deallocate(object); - return(result); + kr = mach_vm_map(target_map, &map_addr, map_size, map_mask, flags, + port, offset, copy, + cur_protection, max_protection, inheritance); + *address = CAST_DOWN(vm_offset_t, map_addr); + return kr; } /* temporary, until world build */ +kern_return_t vm_map( vm_map_t target_map, vm_offset_t *address, @@ -639,18 +984,180 @@ vm_map( vm_prot_t max_protection, vm_inherit_t inheritance) { - vm_map_64(target_map, address, size, mask, flags, - port, (vm_object_offset_t)offset, copy, - cur_protection, max_protection, inheritance); + mach_vm_address_t map_addr; + mach_vm_size_t map_size; + mach_vm_offset_t map_mask; + vm_object_offset_t obj_offset; + kern_return_t kr; + + map_addr = (mach_vm_address_t)*address; + map_size = (mach_vm_size_t)size; + map_mask = (mach_vm_offset_t)mask; + obj_offset = (vm_object_offset_t)offset; + + kr = mach_vm_map(target_map, &map_addr, map_size, map_mask, flags, + port, obj_offset, copy, + cur_protection, max_protection, inheritance); + *address = CAST_DOWN(vm_offset_t, map_addr); + return kr; +} + +/* + * mach_vm_remap - + * Remap a range of memory from one task into another, + * to another address range within the same task, or + * over top of itself (with altered permissions and/or + * as an in-place copy of itself). + */ + +kern_return_t +mach_vm_remap( + vm_map_t target_map, + mach_vm_offset_t *address, + mach_vm_size_t size, + mach_vm_offset_t mask, + int flags, + vm_map_t src_map, + mach_vm_offset_t memory_address, + boolean_t copy, + vm_prot_t *cur_protection, + vm_prot_t *max_protection, + vm_inherit_t inheritance) +{ + vm_map_offset_t map_addr; + kern_return_t kr; + + if (VM_MAP_NULL == target_map || VM_MAP_NULL == src_map) + return KERN_INVALID_ARGUMENT; + + /* filter out any kernel-only flags */ + if (flags & ~VM_FLAGS_USER_REMAP) + return KERN_INVALID_ARGUMENT; + + map_addr = (vm_map_offset_t)*address; + + kr = vm_map_remap(target_map, + &map_addr, + size, + mask, + flags, + src_map, + memory_address, + copy, + cur_protection, + max_protection, + inheritance); + *address = map_addr; + return kr; } +/* + * vm_remap - + * Remap a range of memory from one task into another, + * to another address range within the same task, or + * over top of itself (with altered permissions and/or + * as an in-place copy of itself). + * + * The addressability of the source and target address + * range is limited by the size of vm_address_t (in the + * kernel context). + */ +kern_return_t +vm_remap( + vm_map_t target_map, + vm_offset_t *address, + vm_size_t size, + vm_offset_t mask, + int flags, + vm_map_t src_map, + vm_offset_t memory_address, + boolean_t copy, + vm_prot_t *cur_protection, + vm_prot_t *max_protection, + vm_inherit_t inheritance) +{ + vm_map_offset_t map_addr; + kern_return_t kr; + + if (VM_MAP_NULL == target_map || VM_MAP_NULL == src_map) + return KERN_INVALID_ARGUMENT; + + /* filter out any kernel-only flags */ + if (flags & ~VM_FLAGS_USER_REMAP) + return KERN_INVALID_ARGUMENT; + + map_addr = (vm_map_offset_t)*address; + + kr = vm_map_remap(target_map, + &map_addr, + size, + mask, + flags, + src_map, + memory_address, + copy, + cur_protection, + max_protection, + inheritance); + *address = CAST_DOWN(vm_offset_t, map_addr); + return kr; +} /* - * NOTE: this routine (and this file) will no longer require mach_host_server.h - * when vm_wire is changed to use ledgers. + * NOTE: these routine (and this file) will no longer require mach_host_server.h + * when mach_vm_wire and vm_wire are changed to use ledgers. */ #include /* + * mach_vm_wire + * Specify that the range of the virtual address space + * of the target task must not cause page faults for + * the indicated accesses. + * + * [ To unwire the pages, specify VM_PROT_NONE. ] + */ +kern_return_t +mach_vm_wire( + host_priv_t host_priv, + vm_map_t map, + mach_vm_offset_t start, + mach_vm_size_t size, + vm_prot_t access) +{ + kern_return_t rc; + + if (host_priv == HOST_PRIV_NULL) + return KERN_INVALID_HOST; + + assert(host_priv == &realhost); + + if (map == VM_MAP_NULL) + return KERN_INVALID_TASK; + + if (access & ~VM_PROT_ALL || (start + size < start)) + return KERN_INVALID_ARGUMENT; + + if (access != VM_PROT_NONE) { + rc = vm_map_wire(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + access | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK), + TRUE); + } else { + rc = vm_map_unwire(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + TRUE); + } + return rc; +} + +/* + * vm_wire - * Specify that the range of the virtual address space * of the target task must not cause page faults for * the indicated accesses. @@ -660,7 +1167,7 @@ vm_map( kern_return_t vm_wire( host_priv_t host_priv, - register vm_map_t map, + vm_map_t map, vm_offset_t start, vm_size_t size, vm_prot_t access) @@ -675,15 +1182,26 @@ vm_wire( if (map == VM_MAP_NULL) return KERN_INVALID_TASK; - if (access & ~VM_PROT_ALL) + if ((access & ~VM_PROT_ALL) || (start + size < start)) return KERN_INVALID_ARGUMENT; - if (access != VM_PROT_NONE) { - rc = vm_map_wire(map, trunc_page(start), - round_page(start+size), access, TRUE); + if (size == 0) { + rc = KERN_SUCCESS; + } else if (access != VM_PROT_NONE) { + rc = vm_map_wire(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + access | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK), + TRUE); } else { - rc = vm_map_unwire(map, trunc_page(start), - round_page(start+size), TRUE); + rc = vm_map_unwire(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + TRUE); } return rc; } @@ -693,10 +1211,53 @@ vm_wire( * * Synchronises the memory range specified with its backing store * image by either flushing or cleaning the contents to the appropriate - * memory manager engaging in a memory object synchronize dialog with - * the manager. The client doesn't return until the manager issues - * m_o_s_completed message. MIG Magically converts user task parameter - * to the task's address map. + * memory manager. + * + * interpretation of sync_flags + * VM_SYNC_INVALIDATE - discard pages, only return precious + * pages to manager. + * + * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS) + * - discard pages, write dirty or precious + * pages back to memory manager. + * + * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS + * - write dirty or precious pages back to + * the memory manager. + * + * VM_SYNC_CONTIGUOUS - does everything normally, but if there + * is a hole in the region, and we would + * have returned KERN_SUCCESS, return + * KERN_INVALID_ADDRESS instead. + * + * RETURNS + * KERN_INVALID_TASK Bad task parameter + * KERN_INVALID_ARGUMENT both sync and async were specified. + * KERN_SUCCESS The usual. + * KERN_INVALID_ADDRESS There was a hole in the region. + */ + +kern_return_t +mach_vm_msync( + vm_map_t map, + mach_vm_address_t address, + mach_vm_size_t size, + vm_sync_t sync_flags) +{ + + if (map == VM_MAP_NULL) + return(KERN_INVALID_TASK); + + return vm_map_msync(map, (vm_map_address_t)address, + (vm_map_size_t)size, sync_flags); +} + +/* + * vm_msync + * + * Synchronises the memory range specified with its backing store + * image by either flushing or cleaning the contents to the appropriate + * memory manager. * * interpretation of sync_flags * VM_SYNC_INVALIDATE - discard pages, only return precious @@ -710,14 +1271,19 @@ vm_wire( * - write dirty or precious pages back to * the memory manager. * - * NOTE - * The memory object attributes have not yet been implemented, this - * function will have to deal with the invalidate attribute + * VM_SYNC_CONTIGUOUS - does everything normally, but if there + * is a hole in the region, and we would + * have returned KERN_SUCCESS, return + * KERN_INVALID_ADDRESS instead. + * + * The addressability of the range is limited to that which can + * be described by a vm_address_t. * * RETURNS * KERN_INVALID_TASK Bad task parameter * KERN_INVALID_ARGUMENT both sync and async were specified. * KERN_SUCCESS The usual. + * KERN_INVALID_ADDRESS There was a hole in the region. */ kern_return_t @@ -727,298 +1293,422 @@ vm_msync( vm_size_t size, vm_sync_t sync_flags) { - msync_req_t msr; - msync_req_t new_msr; - queue_chain_t req_q; /* queue of requests for this msync */ - vm_map_entry_t entry; - vm_size_t amount_left; - vm_object_offset_t offset; - boolean_t do_sync_req; - boolean_t modifiable; - - - if ((sync_flags & VM_SYNC_ASYNCHRONOUS) && - (sync_flags & VM_SYNC_SYNCHRONOUS)) - return(KERN_INVALID_ARGUMENT); - /* - * align address and size on page boundaries - */ - size = round_page(address + size) - trunc_page(address); - address = trunc_page(address); - - if (map == VM_MAP_NULL) - return(KERN_INVALID_TASK); + if (map == VM_MAP_NULL) + return(KERN_INVALID_TASK); - if (size == 0) - return(KERN_SUCCESS); + return vm_map_msync(map, (vm_map_address_t)address, + (vm_map_size_t)size, sync_flags); +} - queue_init(&req_q); - amount_left = size; - while (amount_left > 0) { - vm_size_t flush_size; - vm_object_t object; +int +vm_toggle_entry_reuse(int toggle, int *old_value) +{ + vm_map_t map = current_map(); + + assert(!map->is_nested_map); + if(toggle == VM_TOGGLE_GETVALUE && old_value != NULL){ + *old_value = map->disable_vmentry_reuse; + } else if(toggle == VM_TOGGLE_SET){ + vm_map_entry_t map_to_entry; vm_map_lock(map); - if (!vm_map_lookup_entry(map, address, &entry)) { - vm_size_t skip; + vm_map_disable_hole_optimization(map); + map->disable_vmentry_reuse = TRUE; + __IGNORE_WCASTALIGN(map_to_entry = vm_map_to_entry(map)); + if (map->first_free == map_to_entry) { + map->highest_entry_end = vm_map_min(map); + } else { + map->highest_entry_end = map->first_free->vme_end; + } + vm_map_unlock(map); + } else if (toggle == VM_TOGGLE_CLEAR){ + vm_map_lock(map); + map->disable_vmentry_reuse = FALSE; + vm_map_unlock(map); + } else + return KERN_INVALID_ARGUMENT; - /* - * hole in the address map. - */ + return KERN_SUCCESS; +} - /* - * Check for empty map. - */ - if (entry == vm_map_to_entry(map) && - entry->vme_next == entry) { - vm_map_unlock(map); - break; - } - /* - * Check that we don't wrap and that - * we have at least one real map entry. - */ - if ((map->hdr.nentries == 0) || - (entry->vme_next->vme_start < address)) { - vm_map_unlock(map); - break; - } - /* - * Move up to the next entry if needed - */ - skip = (entry->vme_next->vme_start - address); - if (skip >= amount_left) - amount_left = 0; - else - amount_left -= skip; - address = entry->vme_next->vme_start; - vm_map_unlock(map); - continue; - } +/* + * mach_vm_behavior_set + * + * Sets the paging behavior attribute for the specified range + * in the specified map. + * + * This routine will fail with KERN_INVALID_ADDRESS if any address + * in [start,start+size) is not a valid allocated memory region. + */ +kern_return_t +mach_vm_behavior_set( + vm_map_t map, + mach_vm_offset_t start, + mach_vm_size_t size, + vm_behavior_t new_behavior) +{ + vm_map_offset_t align_mask; - offset = address - entry->vme_start; + if ((map == VM_MAP_NULL) || (start + size < start)) + return(KERN_INVALID_ARGUMENT); + if (size == 0) + return KERN_SUCCESS; + + switch (new_behavior) { + case VM_BEHAVIOR_REUSABLE: + case VM_BEHAVIOR_REUSE: + case VM_BEHAVIOR_CAN_REUSE: /* - * do we have more to flush than is contained in this - * entry ? + * Align to the hardware page size, to allow + * malloc() to maximize the amount of re-usability, + * even on systems with larger software page size. */ - if (amount_left + entry->vme_start + offset > entry->vme_end) { - flush_size = entry->vme_end - - (entry->vme_start + offset); - } else { - flush_size = amount_left; - } - amount_left -= flush_size; - address += flush_size; + align_mask = PAGE_MASK; + break; + default: + align_mask = VM_MAP_PAGE_MASK(map); + break; + } - if (entry->is_sub_map == TRUE) { - vm_map_t local_map; - vm_offset_t local_offset; + return vm_map_behavior_set(map, + vm_map_trunc_page(start, align_mask), + vm_map_round_page(start+size, align_mask), + new_behavior); +} - local_map = entry->object.sub_map; - local_offset = entry->offset; - vm_map_unlock(map); - vm_msync( - local_map, - local_offset, - flush_size, - sync_flags); - continue; - } - object = entry->object.vm_object; +/* + * vm_behavior_set + * + * Sets the paging behavior attribute for the specified range + * in the specified map. + * + * This routine will fail with KERN_INVALID_ADDRESS if any address + * in [start,start+size) is not a valid allocated memory region. + * + * This routine is potentially limited in addressibility by the + * use of vm_offset_t (if the map provided is larger than the + * kernel's). + */ +kern_return_t +vm_behavior_set( + vm_map_t map, + vm_offset_t start, + vm_size_t size, + vm_behavior_t new_behavior) +{ + if (start + size < start) + return KERN_INVALID_ARGUMENT; - /* - * We can't sync this object if the object has not been - * created yet - */ - if (object == VM_OBJECT_NULL) { - vm_map_unlock(map); - continue; - } - offset += entry->offset; - modifiable = (entry->protection & VM_PROT_WRITE) - != VM_PROT_NONE; + return mach_vm_behavior_set(map, + (mach_vm_offset_t) start, + (mach_vm_size_t) size, + new_behavior); +} + +/* + * mach_vm_region: + * + * User call to obtain information about a region in + * a task's address map. Currently, only one flavor is + * supported. + * + * XXX The reserved and behavior fields cannot be filled + * in until the vm merge from the IK is completed, and + * vm_reserve is implemented. + * + * XXX Dependency: syscall_vm_region() also supports only one flavor. + */ - vm_object_lock(object); +kern_return_t +mach_vm_region( + vm_map_t map, + mach_vm_offset_t *address, /* IN/OUT */ + mach_vm_size_t *size, /* OUT */ + vm_region_flavor_t flavor, /* IN */ + vm_region_info_t info, /* OUT */ + mach_msg_type_number_t *count, /* IN/OUT */ + mach_port_t *object_name) /* OUT */ +{ + vm_map_offset_t map_addr; + vm_map_size_t map_size; + kern_return_t kr; - if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) { - boolean_t kill_pages = 0; + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; - if (sync_flags & VM_SYNC_KILLPAGES) { - if (object->ref_count == 1 && !entry->needs_copy && !object->shadow) - kill_pages = 1; - else - kill_pages = -1; - } - if (kill_pages != -1) - vm_object_deactivate_pages(object, offset, - (vm_object_size_t)flush_size, kill_pages); - vm_object_unlock(object); - vm_map_unlock(map); - continue; - } - /* - * We can't sync this object if there isn't a pager. - * Don't bother to sync internal objects, since there can't - * be any "permanent" storage for these objects anyway. - */ - if ((object->pager == MEMORY_OBJECT_NULL) || - (object->internal) || (object->private)) { - vm_object_unlock(object); - vm_map_unlock(map); - continue; - } - /* - * keep reference on the object until syncing is done - */ - assert(object->ref_count > 0); - object->ref_count++; - vm_object_res_reference(object); - vm_object_unlock(object); + map_addr = (vm_map_offset_t)*address; + map_size = (vm_map_size_t)*size; - vm_map_unlock(map); + /* legacy conversion */ + if (VM_REGION_BASIC_INFO == flavor) + flavor = VM_REGION_BASIC_INFO_64; - do_sync_req = vm_object_sync(object, - offset, - flush_size, - sync_flags & VM_SYNC_INVALIDATE, - (modifiable && - (sync_flags & VM_SYNC_SYNCHRONOUS || - sync_flags & VM_SYNC_ASYNCHRONOUS))); + kr = vm_map_region(map, + &map_addr, &map_size, + flavor, info, count, + object_name); - /* - * only send a m_o_s if we returned pages or if the entry - * is writable (ie dirty pages may have already been sent back) - */ - if (!do_sync_req && !modifiable) { - vm_object_deallocate(object); - continue; - } - msync_req_alloc(new_msr); + *address = map_addr; + *size = map_size; + return kr; +} - vm_object_lock(object); - offset += object->paging_offset; +/* + * vm_region_64 and vm_region: + * + * User call to obtain information about a region in + * a task's address map. Currently, only one flavor is + * supported. + * + * XXX The reserved and behavior fields cannot be filled + * in until the vm merge from the IK is completed, and + * vm_reserve is implemented. + * + * XXX Dependency: syscall_vm_region() also supports only one flavor. + */ - new_msr->offset = offset; - new_msr->length = flush_size; - new_msr->object = object; - new_msr->flag = VM_MSYNC_SYNCHRONIZING; -re_iterate: - queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) { - /* - * need to check for overlapping entry, if found, wait - * on overlapping msr to be done, then reiterate - */ - msr_lock(msr); - if (msr->flag == VM_MSYNC_SYNCHRONIZING && - ((offset >= msr->offset && - offset < (msr->offset + msr->length)) || - (msr->offset >= offset && - msr->offset < (offset + flush_size)))) - { - assert_wait((event_t) msr,THREAD_INTERRUPTIBLE); - msr_unlock(msr); - vm_object_unlock(object); - thread_block((void (*)(void))0); - vm_object_lock(object); - goto re_iterate; - } - msr_unlock(msr); - }/* queue_iterate */ +kern_return_t +vm_region_64( + vm_map_t map, + vm_offset_t *address, /* IN/OUT */ + vm_size_t *size, /* OUT */ + vm_region_flavor_t flavor, /* IN */ + vm_region_info_t info, /* OUT */ + mach_msg_type_number_t *count, /* IN/OUT */ + mach_port_t *object_name) /* OUT */ +{ + vm_map_offset_t map_addr; + vm_map_size_t map_size; + kern_return_t kr; - queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q); - vm_object_unlock(object); + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; - queue_enter(&req_q, new_msr, msync_req_t, req_q); + map_addr = (vm_map_offset_t)*address; + map_size = (vm_map_size_t)*size; - (void) memory_object_synchronize( - object->pager, - offset, - flush_size, - sync_flags); - }/* while */ + /* legacy conversion */ + if (VM_REGION_BASIC_INFO == flavor) + flavor = VM_REGION_BASIC_INFO_64; - /* - * wait for memory_object_sychronize_completed messages from pager(s) - */ + kr = vm_map_region(map, + &map_addr, &map_size, + flavor, info, count, + object_name); - while (!queue_empty(&req_q)) { - msr = (msync_req_t)queue_first(&req_q); - msr_lock(msr); - while(msr->flag != VM_MSYNC_DONE) { - assert_wait((event_t) msr, THREAD_INTERRUPTIBLE); - msr_unlock(msr); - thread_block((void (*)(void))0); - msr_lock(msr); - }/* while */ - queue_remove(&req_q, msr, msync_req_t, req_q); - msr_unlock(msr); - vm_object_deallocate(msr->object); - msync_req_free(msr); - }/* queue_iterate */ + *address = CAST_DOWN(vm_offset_t, map_addr); + *size = CAST_DOWN(vm_size_t, map_size); - return(KERN_SUCCESS); -}/* vm_msync */ + if (KERN_SUCCESS == kr && map_addr + map_size > VM_MAX_ADDRESS) + return KERN_INVALID_ADDRESS; + return kr; +} + +kern_return_t +vm_region( + vm_map_t map, + vm_address_t *address, /* IN/OUT */ + vm_size_t *size, /* OUT */ + vm_region_flavor_t flavor, /* IN */ + vm_region_info_t info, /* OUT */ + mach_msg_type_number_t *count, /* IN/OUT */ + mach_port_t *object_name) /* OUT */ +{ + vm_map_address_t map_addr; + vm_map_size_t map_size; + kern_return_t kr; + + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; + map_addr = (vm_map_address_t)*address; + map_size = (vm_map_size_t)*size; + + kr = vm_map_region(map, + &map_addr, &map_size, + flavor, info, count, + object_name); + + *address = CAST_DOWN(vm_address_t, map_addr); + *size = CAST_DOWN(vm_size_t, map_size); + + if (KERN_SUCCESS == kr && map_addr + map_size > VM_MAX_ADDRESS) + return KERN_INVALID_ADDRESS; + return kr; +} /* - * task_wire + * vm_region_recurse: A form of vm_region which follows the + * submaps in a target map * - * Set or clear the map's wiring_required flag. This flag, if set, - * will cause all future virtual memory allocation to allocate - * user wired memory. Unwiring pages wired down as a result of - * this routine is done with the vm_wire interface. */ kern_return_t -task_wire( - vm_map_t map, - boolean_t must_wire) +mach_vm_region_recurse( + vm_map_t map, + mach_vm_address_t *address, + mach_vm_size_t *size, + uint32_t *depth, + vm_region_recurse_info_t info, + mach_msg_type_number_t *infoCnt) { - if (map == VM_MAP_NULL) - return(KERN_INVALID_ARGUMENT); + vm_map_address_t map_addr; + vm_map_size_t map_size; + kern_return_t kr; - if (must_wire) - map->wiring_required = TRUE; - else - map->wiring_required = FALSE; + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; - return(KERN_SUCCESS); + map_addr = (vm_map_address_t)*address; + map_size = (vm_map_size_t)*size; + + kr = vm_map_region_recurse_64( + map, + &map_addr, + &map_size, + depth, + (vm_region_submap_info_64_t)info, + infoCnt); + + *address = map_addr; + *size = map_size; + return kr; } /* - * vm_behavior_set sets the paging behavior attribute for the - * specified range in the specified map. This routine will fail - * with KERN_INVALID_ADDRESS if any address in [start,start+size) - * is not a valid allocated or reserved memory region. + * vm_region_recurse: A form of vm_region which follows the + * submaps in a target map + * */ -kern_return_t -vm_behavior_set( +kern_return_t +vm_region_recurse_64( + vm_map_t map, + vm_address_t *address, + vm_size_t *size, + uint32_t *depth, + vm_region_recurse_info_64_t info, + mach_msg_type_number_t *infoCnt) +{ + vm_map_address_t map_addr; + vm_map_size_t map_size; + kern_return_t kr; + + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; + + map_addr = (vm_map_address_t)*address; + map_size = (vm_map_size_t)*size; + + kr = vm_map_region_recurse_64( + map, + &map_addr, + &map_size, + depth, + (vm_region_submap_info_64_t)info, + infoCnt); + + *address = CAST_DOWN(vm_address_t, map_addr); + *size = CAST_DOWN(vm_size_t, map_size); + + if (KERN_SUCCESS == kr && map_addr + map_size > VM_MAX_ADDRESS) + return KERN_INVALID_ADDRESS; + return kr; +} + +kern_return_t +vm_region_recurse( + vm_map_t map, + vm_offset_t *address, /* IN/OUT */ + vm_size_t *size, /* OUT */ + natural_t *depth, /* IN/OUT */ + vm_region_recurse_info_t info32, /* IN/OUT */ + mach_msg_type_number_t *infoCnt) /* IN/OUT */ +{ + vm_region_submap_info_data_64_t info64; + vm_region_submap_info_t info; + vm_map_address_t map_addr; + vm_map_size_t map_size; + kern_return_t kr; + + if (VM_MAP_NULL == map || *infoCnt < VM_REGION_SUBMAP_INFO_COUNT) + return KERN_INVALID_ARGUMENT; + + + map_addr = (vm_map_address_t)*address; + map_size = (vm_map_size_t)*size; + info = (vm_region_submap_info_t)info32; + *infoCnt = VM_REGION_SUBMAP_INFO_COUNT_64; + + kr = vm_map_region_recurse_64(map, &map_addr,&map_size, + depth, &info64, infoCnt); + + info->protection = info64.protection; + info->max_protection = info64.max_protection; + info->inheritance = info64.inheritance; + info->offset = (uint32_t)info64.offset; /* trouble-maker */ + info->user_tag = info64.user_tag; + info->pages_resident = info64.pages_resident; + info->pages_shared_now_private = info64.pages_shared_now_private; + info->pages_swapped_out = info64.pages_swapped_out; + info->pages_dirtied = info64.pages_dirtied; + info->ref_count = info64.ref_count; + info->shadow_depth = info64.shadow_depth; + info->external_pager = info64.external_pager; + info->share_mode = info64.share_mode; + info->is_submap = info64.is_submap; + info->behavior = info64.behavior; + info->object_id = info64.object_id; + info->user_wired_count = info64.user_wired_count; + + *address = CAST_DOWN(vm_address_t, map_addr); + *size = CAST_DOWN(vm_size_t, map_size); + *infoCnt = VM_REGION_SUBMAP_INFO_COUNT; + + if (KERN_SUCCESS == kr && map_addr + map_size > VM_MAX_ADDRESS) + return KERN_INVALID_ADDRESS; + return kr; +} + +kern_return_t +mach_vm_purgable_control( vm_map_t map, - vm_offset_t start, - vm_size_t size, - vm_behavior_t new_behavior) + mach_vm_offset_t address, + vm_purgable_t control, + int *state) { - if (map == VM_MAP_NULL) - return(KERN_INVALID_ARGUMENT); + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; - return(vm_map_behavior_set(map, trunc_page(start), - round_page(start+size), new_behavior)); + return vm_map_purgable_control(map, + vm_map_trunc_page(address, PAGE_MASK), + control, + state); } -#if VM_CPM -/* - * Control whether the kernel will permit use of - * vm_allocate_cpm at all. - */ -unsigned int vm_allocate_cpm_enabled = 1; +kern_return_t +vm_purgable_control( + vm_map_t map, + vm_offset_t address, + vm_purgable_t control, + int *state) +{ + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; + + return vm_map_purgable_control(map, + vm_map_trunc_page(address, PAGE_MASK), + control, + state); +} + /* * Ordinarily, the right to allocate CPM is restricted * to privileged applications (those that can gain access - * to the host port). Set this variable to zero if you - * want to let any application allocate CPM. + * to the host priv port). Set this variable to zero if + * you want to let any application allocate CPM. */ unsigned int vm_allocate_cpm_privileged = 0; @@ -1035,376 +1725,590 @@ unsigned int vm_allocate_cpm_privileged = 0; kern_return_t vm_allocate_cpm( host_priv_t host_priv, - register vm_map_t map, - register vm_offset_t *addr, - register vm_size_t size, + vm_map_t map, + vm_address_t *addr, + vm_size_t size, int flags) { - vm_object_t cpm_obj; - pmap_t pmap; - vm_page_t m, pages; + vm_map_address_t map_addr; + vm_map_size_t map_size; kern_return_t kr; - vm_offset_t va, start, end, offset; -#if MACH_ASSERT - extern vm_offset_t avail_start, avail_end; - vm_offset_t prev_addr; -#endif /* MACH_ASSERT */ - boolean_t anywhere = VM_FLAGS_ANYWHERE & flags; - - if (!vm_allocate_cpm_enabled) - return KERN_FAILURE; - - if (vm_allocate_cpm_privileged && host_priv == HOST_PRIV_NULL) + if (vm_allocate_cpm_privileged && HOST_PRIV_NULL == host_priv) return KERN_INVALID_HOST; - if (map == VM_MAP_NULL) + if (VM_MAP_NULL == map) return KERN_INVALID_ARGUMENT; - - assert(host_priv == &realhost); - if (size == 0) { - *addr = 0; - return KERN_SUCCESS; - } + map_addr = (vm_map_address_t)*addr; + map_size = (vm_map_size_t)size; - if (anywhere) - *addr = vm_map_min(map); - else - *addr = trunc_page(*addr); - size = round_page(size); - - if ((kr = cpm_allocate(size, &pages, TRUE)) != KERN_SUCCESS) - return kr; - - cpm_obj = vm_object_allocate(size); - assert(cpm_obj != VM_OBJECT_NULL); - assert(cpm_obj->internal); - assert(cpm_obj->size == size); - assert(cpm_obj->can_persist == FALSE); - assert(cpm_obj->pager_created == FALSE); - assert(cpm_obj->pageout == FALSE); - assert(cpm_obj->shadow == VM_OBJECT_NULL); - - /* - * Insert pages into object. - */ + kr = vm_map_enter_cpm(map, + &map_addr, + map_size, + flags); - vm_object_lock(cpm_obj); - for (offset = 0; offset < size; offset += PAGE_SIZE) { - m = pages; - pages = NEXT_PAGE(m); + *addr = CAST_DOWN(vm_address_t, map_addr); + return kr; +} - assert(!m->gobbled); - assert(!m->wanted); - assert(!m->pageout); - assert(!m->tabled); - assert(m->busy); - assert(m->phys_addr>=avail_start && m->phys_addr<=avail_end); - m->busy = FALSE; - vm_page_insert(m, cpm_obj, offset); - } - assert(cpm_obj->resident_page_count == size / PAGE_SIZE); - vm_object_unlock(cpm_obj); - - /* - * Hang onto a reference on the object in case a - * multi-threaded application for some reason decides - * to deallocate the portion of the address space into - * which we will insert this object. - * - * Unfortunately, we must insert the object now before - * we can talk to the pmap module about which addresses - * must be wired down. Hence, the race with a multi- - * threaded app. - */ - vm_object_reference(cpm_obj); +kern_return_t +mach_vm_page_query( + vm_map_t map, + mach_vm_offset_t offset, + int *disposition, + int *ref_count) +{ + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; - /* - * Insert object into map. - */ + return vm_map_page_query_internal( + map, + vm_map_trunc_page(offset, PAGE_MASK), + disposition, ref_count); +} - kr = vm_map_enter( - map, - addr, - size, - (vm_offset_t)0, - flags, - cpm_obj, - (vm_object_offset_t)0, - FALSE, - VM_PROT_ALL, - VM_PROT_ALL, - VM_INHERIT_DEFAULT); - - if (kr != KERN_SUCCESS) { - /* - * A CPM object doesn't have can_persist set, - * so all we have to do is deallocate it to - * free up these pages. - */ - assert(cpm_obj->pager_created == FALSE); - assert(cpm_obj->can_persist == FALSE); - assert(cpm_obj->pageout == FALSE); - assert(cpm_obj->shadow == VM_OBJECT_NULL); - vm_object_deallocate(cpm_obj); /* kill acquired ref */ - vm_object_deallocate(cpm_obj); /* kill creation ref */ - } +kern_return_t +vm_map_page_query( + vm_map_t map, + vm_offset_t offset, + int *disposition, + int *ref_count) +{ + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; - /* - * Inform the physical mapping system that the - * range of addresses may not fault, so that - * page tables and such can be locked down as well. - */ - start = *addr; - end = start + size; - pmap = vm_map_pmap(map); - pmap_pageable(pmap, start, end, FALSE); - - /* - * Enter each page into the pmap, to avoid faults. - * Note that this loop could be coded more efficiently, - * if the need arose, rather than looking up each page - * again. - */ - for (offset = 0, va = start; offset < size; - va += PAGE_SIZE, offset += PAGE_SIZE) { - vm_object_lock(cpm_obj); - m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset); - vm_object_unlock(cpm_obj); - assert(m != VM_PAGE_NULL); - PMAP_ENTER(pmap, va, m, VM_PROT_ALL, - VM_WIMG_USE_DEFAULT, TRUE); - } + return vm_map_page_query_internal( + map, + vm_map_trunc_page(offset, PAGE_MASK), + disposition, ref_count); +} -#if MACH_ASSERT - /* - * Verify ordering in address space. - */ - for (offset = 0; offset < size; offset += PAGE_SIZE) { - vm_object_lock(cpm_obj); - m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset); - vm_object_unlock(cpm_obj); - if (m == VM_PAGE_NULL) - panic("vm_allocate_cpm: obj 0x%x off 0x%x no page", - cpm_obj, offset); - assert(m->tabled); - assert(!m->busy); - assert(!m->wanted); - assert(!m->fictitious); - assert(!m->private); - assert(!m->absent); - assert(!m->error); - assert(!m->cleaning); - assert(!m->precious); - assert(!m->clustered); - if (offset != 0) { - if (m->phys_addr != prev_addr + PAGE_SIZE) { - printf("start 0x%x end 0x%x va 0x%x\n", - start, end, va); - printf("obj 0x%x off 0x%x\n", cpm_obj, offset); - printf("m 0x%x prev_address 0x%x\n", m, - prev_addr); - panic("vm_allocate_cpm: pages not contig!"); - } - } - prev_addr = m->phys_addr; - } -#endif /* MACH_ASSERT */ +kern_return_t +mach_vm_page_info( + vm_map_t map, + mach_vm_address_t address, + vm_page_info_flavor_t flavor, + vm_page_info_t info, + mach_msg_type_number_t *count) +{ + kern_return_t kr; - vm_object_deallocate(cpm_obj); /* kill extra ref */ + if (map == VM_MAP_NULL) { + return KERN_INVALID_ARGUMENT; + } + kr = vm_map_page_info(map, address, flavor, info, count); return kr; } - -#else /* VM_CPM */ - -/* - * Interface is defined in all cases, but unless the kernel - * is built explicitly for this option, the interface does - * nothing. - */ - +/* map a (whole) upl into an address space */ kern_return_t -vm_allocate_cpm( - host_priv_t host_priv, - register vm_map_t map, - register vm_offset_t *addr, - register vm_size_t size, - int flags) +vm_upl_map( + vm_map_t map, + upl_t upl, + vm_address_t *dst_addr) { - return KERN_FAILURE; + vm_map_offset_t map_addr; + kern_return_t kr; + + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; + + kr = vm_map_enter_upl(map, upl, &map_addr); + *dst_addr = CAST_DOWN(vm_address_t, map_addr); + return kr; } -/* - */ kern_return_t -mach_memory_object_memory_entry_64( - host_t host, - boolean_t internal, - vm_object_offset_t size, - vm_prot_t permission, - memory_object_t pager, - ipc_port_t *entry_handle) +vm_upl_unmap( + vm_map_t map, + upl_t upl) { - vm_named_entry_t user_object; - ipc_port_t user_handle; - ipc_port_t previous; - kern_return_t kr; - - if (host == HOST_NULL) - return(KERN_INVALID_HOST); + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; - user_object = (vm_named_entry_t) - kalloc(sizeof (struct vm_named_entry)); - if(user_object == NULL) - return KERN_FAILURE; - named_entry_lock_init(user_object); - user_handle = ipc_port_alloc_kernel(); - ip_lock(user_handle); + return (vm_map_remove_upl(map, upl)); +} - /* make a sonce right */ - user_handle->ip_sorights++; - ip_reference(user_handle); +/* Retrieve a upl for an object underlying an address range in a map */ - user_handle->ip_destination = IP_NULL; - user_handle->ip_receiver_name = MACH_PORT_NULL; - user_handle->ip_receiver = ipc_space_kernel; +kern_return_t +vm_map_get_upl( + vm_map_t map, + vm_map_offset_t map_offset, + upl_size_t *upl_size, + upl_t *upl, + upl_page_info_array_t page_list, + unsigned int *count, + upl_control_flags_t *flags, + int force_data_sync) +{ + upl_control_flags_t map_flags; + kern_return_t kr; - /* make a send right */ - user_handle->ip_mscount++; - user_handle->ip_srights++; - ip_reference(user_handle); + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; - ipc_port_nsrequest(user_handle, 1, user_handle, &previous); - /* nsrequest unlocks user_handle */ + map_flags = *flags & ~UPL_NOZEROFILL; + if (force_data_sync) + map_flags |= UPL_FORCE_DATA_SYNC; - user_object->object = NULL; - user_object->size = size; - user_object->offset = 0; - user_object->backing.pager = pager; - user_object->protection = permission; - user_object->internal = internal; - user_object->is_sub_map = FALSE; - user_object->ref_count = 1; - - ipc_kobject_set(user_handle, (ipc_kobject_t) user_object, - IKOT_NAMED_ENTRY); - *entry_handle = user_handle; - return KERN_SUCCESS; -} + kr = vm_map_create_upl(map, + map_offset, + upl_size, + upl, + page_list, + count, + &map_flags); -kern_return_t -mach_memory_object_memory_entry( - host_t host, - boolean_t internal, - vm_size_t size, - vm_prot_t permission, - memory_object_t pager, - ipc_port_t *entry_handle) -{ - return mach_memory_object_memory_entry_64( host, internal, - (vm_object_offset_t)size, permission, pager, entry_handle); + *flags = (map_flags & ~UPL_FORCE_DATA_SYNC); + return kr; } - /* + * mach_make_memory_entry_64 + * + * Think of it as a two-stage vm_remap() operation. First + * you get a handle. Second, you get map that handle in + * somewhere else. Rather than doing it all at once (and + * without needing access to the other whole map). */ kern_return_t mach_make_memory_entry_64( vm_map_t target_map, - vm_object_size_t *size, - vm_object_offset_t offset, + memory_object_size_t *size, + memory_object_offset_t offset, vm_prot_t permission, ipc_port_t *object_handle, - ipc_port_t parent_entry) + ipc_port_t parent_handle) { vm_map_version_t version; - vm_named_entry_t user_object; + vm_named_entry_t parent_entry; + vm_named_entry_t user_entry; ipc_port_t user_handle; - ipc_port_t previous; kern_return_t kr; - vm_map_t pmap_map; + vm_map_t real_map; /* needed for call to vm_map_lookup_locked */ - boolean_t wired; + boolean_t wired; + boolean_t iskernel; vm_object_offset_t obj_off; - vm_prot_t prot; - vm_object_offset_t lo_offset, hi_offset; - vm_behavior_t behavior; - vm_object_t object; - vm_object_t shadow_object; + vm_prot_t prot; + struct vm_object_fault_info fault_info; + vm_object_t object; + vm_object_t shadow_object; /* needed for direct map entry manipulation */ vm_map_entry_t map_entry; vm_map_entry_t next_entry; - vm_map_t local_map; - vm_map_t original_map = target_map; - vm_offset_t local_offset; + vm_map_t local_map; + vm_map_t original_map = target_map; + vm_map_size_t total_size, map_size; + vm_map_offset_t map_start, map_end; + vm_map_offset_t local_offset; vm_object_size_t mappable_size; - vm_object_size_t total_size; + /* + * Stash the offset in the page for use by vm_map_enter_mem_object() + * in the VM_FLAGS_RETURN_DATA_ADDR/MAP_MEM_USE_DATA_ADDR case. + */ + vm_object_offset_t offset_in_page; + + unsigned int access; + vm_prot_t protections; + vm_prot_t original_protections, mask_protections; + unsigned int wimg_mode; + + boolean_t force_shadow = FALSE; + boolean_t use_data_addr; + boolean_t use_4K_compat; + + if (((permission & 0x00FF0000) & + ~(MAP_MEM_ONLY | + MAP_MEM_NAMED_CREATE | + MAP_MEM_GRAB_SECLUDED | /* XXX FBDP TODO: restrict usage? */ + MAP_MEM_PURGABLE | + MAP_MEM_NAMED_REUSE | + MAP_MEM_USE_DATA_ADDR | + MAP_MEM_VM_COPY | + MAP_MEM_4K_DATA_ADDR | + MAP_MEM_VM_SHARE))) { + /* + * Unknown flag: reject for forward compatibility. + */ + return KERN_INVALID_VALUE; + } - offset = trunc_page_64(offset); - *size = round_page_64(*size); - - user_object = (vm_named_entry_t) - kalloc(sizeof (struct vm_named_entry)); - if(user_object == NULL) - return KERN_FAILURE; - named_entry_lock_init(user_object); - user_handle = ipc_port_alloc_kernel(); - ip_lock(user_handle); + if (parent_handle != IP_NULL && + ip_kotype(parent_handle) == IKOT_NAMED_ENTRY) { + parent_entry = (vm_named_entry_t) parent_handle->ip_kobject; + } else { + parent_entry = NULL; + } - /* make a sonce right */ - user_handle->ip_sorights++; - ip_reference(user_handle); + if (parent_entry && parent_entry->is_copy) { + return KERN_INVALID_ARGUMENT; + } - user_handle->ip_destination = IP_NULL; - user_handle->ip_receiver_name = MACH_PORT_NULL; - user_handle->ip_receiver = ipc_space_kernel; + original_protections = permission & VM_PROT_ALL; + protections = original_protections; + mask_protections = permission & VM_PROT_IS_MASK; + access = GET_MAP_MEM(permission); + use_data_addr = ((permission & MAP_MEM_USE_DATA_ADDR) != 0); + use_4K_compat = ((permission & MAP_MEM_4K_DATA_ADDR) != 0); - /* make a send right */ - user_handle->ip_mscount++; - user_handle->ip_srights++; - ip_reference(user_handle); + user_handle = IP_NULL; + user_entry = NULL; - ipc_port_nsrequest(user_handle, 1, user_handle, &previous); - /* nsrequest unlocks user_handle */ + map_start = vm_map_trunc_page(offset, PAGE_MASK); + + if (permission & MAP_MEM_ONLY) { + boolean_t parent_is_object; - user_object->backing.pager = NULL; - user_object->ref_count = 1; + map_end = vm_map_round_page(offset + *size, PAGE_MASK); + map_size = map_end - map_start; + + if (use_data_addr || use_4K_compat || parent_entry == NULL) { + return KERN_INVALID_ARGUMENT; + } + + parent_is_object = !(parent_entry->is_sub_map || + parent_entry->is_pager); + object = parent_entry->backing.object; + if(parent_is_object && object != VM_OBJECT_NULL) + wimg_mode = object->wimg_bits; + else + wimg_mode = VM_WIMG_USE_DEFAULT; + if((access != GET_MAP_MEM(parent_entry->protection)) && + !(parent_entry->protection & VM_PROT_WRITE)) { + return KERN_INVALID_RIGHT; + } + if(access == MAP_MEM_IO) { + SET_MAP_MEM(access, parent_entry->protection); + wimg_mode = VM_WIMG_IO; + } else if (access == MAP_MEM_COPYBACK) { + SET_MAP_MEM(access, parent_entry->protection); + wimg_mode = VM_WIMG_USE_DEFAULT; + } else if (access == MAP_MEM_INNERWBACK) { + SET_MAP_MEM(access, parent_entry->protection); + wimg_mode = VM_WIMG_INNERWBACK; + } else if (access == MAP_MEM_WTHRU) { + SET_MAP_MEM(access, parent_entry->protection); + wimg_mode = VM_WIMG_WTHRU; + } else if (access == MAP_MEM_WCOMB) { + SET_MAP_MEM(access, parent_entry->protection); + wimg_mode = VM_WIMG_WCOMB; + } + if (parent_is_object && object && + (access != MAP_MEM_NOOP) && + (!(object->nophyscache))) { + + if (object->wimg_bits != wimg_mode) { + vm_object_lock(object); + vm_object_change_wimg_mode(object, wimg_mode); + vm_object_unlock(object); + } + } + if (object_handle) + *object_handle = IP_NULL; + return KERN_SUCCESS; + } else if (permission & MAP_MEM_NAMED_CREATE) { + map_end = vm_map_round_page(offset + *size, PAGE_MASK); + map_size = map_end - map_start; + + if (use_data_addr || use_4K_compat) { + return KERN_INVALID_ARGUMENT; + } + + kr = mach_memory_entry_allocate(&user_entry, &user_handle); + if (kr != KERN_SUCCESS) { + return KERN_FAILURE; + } + + /* + * Force the creation of the VM object now. + */ + if (map_size > (vm_map_size_t) ANON_MAX_SIZE) { + /* + * LP64todo - for now, we can only allocate 4GB-4096 + * internal objects because the default pager can't + * page bigger ones. Remove this when it can. + */ + kr = KERN_FAILURE; + goto make_mem_done; + } + + object = vm_object_allocate(map_size); + assert(object != VM_OBJECT_NULL); + + if (permission & MAP_MEM_PURGABLE) { + if (! (permission & VM_PROT_WRITE)) { + /* if we can't write, we can't purge */ + vm_object_deallocate(object); + kr = KERN_INVALID_ARGUMENT; + goto make_mem_done; + } + object->purgable = VM_PURGABLE_NONVOLATILE; + assert(object->vo_purgeable_owner == NULL); + assert(object->resident_page_count == 0); + assert(object->wired_page_count == 0); + vm_object_lock(object); + vm_purgeable_nonvolatile_enqueue(object, + current_task()); + vm_object_unlock(object); + } + +#if CONFIG_SECLUDED_MEMORY + if (secluded_for_iokit && /* global boot-arg */ + ((permission & MAP_MEM_GRAB_SECLUDED) +#if 11 + /* XXX FBDP for my testing only */ + || (secluded_for_fbdp && map_size == 97550336) +#endif + )) { +#if 11 + if (!(permission & MAP_MEM_GRAB_SECLUDED) && + secluded_for_fbdp) { + printf("FBDP: object %p size %lld can grab secluded\n", object, (uint64_t) map_size); + } +#endif + object->can_grab_secluded = TRUE; + assert(!object->eligible_for_secluded); + } +#endif /* CONFIG_SECLUDED_MEMORY */ + + /* + * The VM object is brand new and nobody else knows about it, + * so we don't need to lock it. + */ + + wimg_mode = object->wimg_bits; + if (access == MAP_MEM_IO) { + wimg_mode = VM_WIMG_IO; + } else if (access == MAP_MEM_COPYBACK) { + wimg_mode = VM_WIMG_USE_DEFAULT; + } else if (access == MAP_MEM_INNERWBACK) { + wimg_mode = VM_WIMG_INNERWBACK; + } else if (access == MAP_MEM_WTHRU) { + wimg_mode = VM_WIMG_WTHRU; + } else if (access == MAP_MEM_WCOMB) { + wimg_mode = VM_WIMG_WCOMB; + } + if (access != MAP_MEM_NOOP) { + object->wimg_bits = wimg_mode; + } + /* the object has no pages, so no WIMG bits to update here */ + + /* + * XXX + * We use this path when we want to make sure that + * nobody messes with the object (coalesce, for + * example) before we map it. + * We might want to use these objects for transposition via + * vm_object_transpose() too, so we don't want any copy or + * shadow objects either... + */ + object->copy_strategy = MEMORY_OBJECT_COPY_NONE; + object->true_share = TRUE; + + user_entry->backing.object = object; + user_entry->internal = TRUE; + user_entry->is_sub_map = FALSE; + user_entry->is_pager = FALSE; + user_entry->offset = 0; + user_entry->data_offset = 0; + user_entry->protection = protections; + SET_MAP_MEM(access, user_entry->protection); + user_entry->size = map_size; + + /* user_object pager and internal fields are not used */ + /* when the object field is filled in. */ + + *size = CAST_DOWN(vm_size_t, (user_entry->size - + user_entry->data_offset)); + *object_handle = user_handle; + return KERN_SUCCESS; + } + + if (permission & MAP_MEM_VM_COPY) { + vm_map_copy_t copy; + + if (target_map == VM_MAP_NULL) { + return KERN_INVALID_TASK; + } + + map_end = vm_map_round_page(offset + *size, PAGE_MASK); + map_size = map_end - map_start; + if (use_data_addr || use_4K_compat) { + offset_in_page = offset - map_start; + if (use_4K_compat) + offset_in_page &= ~((signed)(0xFFF)); + } else { + offset_in_page = 0; + } + + kr = vm_map_copyin_internal(target_map, + map_start, + map_size, + VM_MAP_COPYIN_ENTRY_LIST, + ©); + if (kr != KERN_SUCCESS) { + return kr; + } + + kr = mach_memory_entry_allocate(&user_entry, &user_handle); + if (kr != KERN_SUCCESS) { + vm_map_copy_discard(copy); + return KERN_FAILURE; + } + + user_entry->backing.copy = copy; + user_entry->internal = FALSE; + user_entry->is_sub_map = FALSE; + user_entry->is_pager = FALSE; + user_entry->is_copy = TRUE; + user_entry->offset = 0; + user_entry->protection = protections; + user_entry->size = map_size; + user_entry->data_offset = offset_in_page; + + *size = CAST_DOWN(vm_size_t, (user_entry->size - + user_entry->data_offset)); + *object_handle = user_handle; + return KERN_SUCCESS; + } + + if (permission & MAP_MEM_VM_SHARE) { + vm_map_copy_t copy; + vm_prot_t cur_prot, max_prot; + + if (target_map == VM_MAP_NULL) { + return KERN_INVALID_TASK; + } + + map_end = vm_map_round_page(offset + *size, PAGE_MASK); + map_size = map_end - map_start; + if (use_data_addr || use_4K_compat) { + offset_in_page = offset - map_start; + if (use_4K_compat) + offset_in_page &= ~((signed)(0xFFF)); + } else { + offset_in_page = 0; + } + + cur_prot = VM_PROT_ALL; + kr = vm_map_copy_extract(target_map, + map_start, + map_size, + ©, + &cur_prot, + &max_prot); + if (kr != KERN_SUCCESS) { + return kr; + } + + if (mask_protections) { + /* + * We just want as much of "original_protections" + * as we can get out of the actual "cur_prot". + */ + protections &= cur_prot; + if (protections == VM_PROT_NONE) { + /* no access at all: fail */ + vm_map_copy_discard(copy); + return KERN_PROTECTION_FAILURE; + } + } else { + /* + * We want exactly "original_protections" + * out of "cur_prot". + */ + if ((cur_prot & protections) != protections) { + vm_map_copy_discard(copy); + return KERN_PROTECTION_FAILURE; + } + } + + kr = mach_memory_entry_allocate(&user_entry, &user_handle); + if (kr != KERN_SUCCESS) { + vm_map_copy_discard(copy); + return KERN_FAILURE; + } + + user_entry->backing.copy = copy; + user_entry->internal = FALSE; + user_entry->is_sub_map = FALSE; + user_entry->is_pager = FALSE; + user_entry->is_copy = TRUE; + user_entry->offset = 0; + user_entry->protection = protections; + user_entry->size = map_size; + user_entry->data_offset = offset_in_page; + + *size = CAST_DOWN(vm_size_t, (user_entry->size - + user_entry->data_offset)); + *object_handle = user_handle; + return KERN_SUCCESS; + } + + if (parent_entry == NULL || + (permission & MAP_MEM_NAMED_REUSE)) { + + map_end = vm_map_round_page(offset + *size, PAGE_MASK); + map_size = map_end - map_start; + if (use_data_addr || use_4K_compat) { + offset_in_page = offset - map_start; + if (use_4K_compat) + offset_in_page &= ~((signed)(0xFFF)); + } else { + offset_in_page = 0; + } + + /* Create a named object based on address range within the task map */ + /* Go find the object at given address */ - if(parent_entry == NULL) { - /* Create a named object based on address range within the task map */ - /* Go find the object at given address */ + if (target_map == VM_MAP_NULL) { + return KERN_INVALID_TASK; + } - permission &= VM_PROT_ALL; +redo_lookup: + protections = original_protections; vm_map_lock_read(target_map); /* get the object associated with the target address */ /* note we check the permission of the range against */ /* that requested by the caller */ - kr = vm_map_lookup_locked(&target_map, offset, - permission, &version, - &object, &obj_off, &prot, &wired, &behavior, - &lo_offset, &hi_offset, &pmap_map); + kr = vm_map_lookup_locked(&target_map, map_start, + protections | mask_protections, + OBJECT_LOCK_EXCLUSIVE, &version, + &object, &obj_off, &prot, &wired, + &fault_info, + &real_map); if (kr != KERN_SUCCESS) { vm_map_unlock_read(target_map); goto make_mem_done; } - if (((prot & permission) != permission) - || (object == kernel_object)) { + if (mask_protections) { + /* + * The caller asked us to use the "protections" as + * a mask, so restrict "protections" to what this + * mapping actually allows. + */ + protections &= prot; + } + + if (((prot & protections) != protections) + || (object == kernel_object)) { kr = KERN_INVALID_RIGHT; vm_object_unlock(object); vm_map_unlock_read(target_map); - if(pmap_map != target_map) - vm_map_unlock_read(pmap_map); + if(real_map != target_map) + vm_map_unlock_read(real_map); if(object == kernel_object) { printf("Warning: Attempt to create a named" " entry from the kernel_object\n"); @@ -1412,86 +2316,135 @@ mach_make_memory_entry_64( goto make_mem_done; } - /* We have an object, now check to see if this object */ - /* is suitable. If not, create a shadow and share that */ - -redo_lookup: + /* We have an object, now check to see if this object */ + /* is suitable. If not, create a shadow and share that */ + + /* + * We have to unlock the VM object to avoid deadlocking with + * a VM map lock (the lock ordering is map, the object), if we + * need to modify the VM map to create a shadow object. Since + * we might release the VM map lock below anyway, we have + * to release the VM map lock now. + * XXX FBDP There must be a way to avoid this double lookup... + * + * Take an extra reference on the VM object to make sure it's + * not going to disappear. + */ + vm_object_reference_locked(object); /* extra ref to hold obj */ + vm_object_unlock(object); + local_map = original_map; - local_offset = offset; + local_offset = map_start; if(target_map != local_map) { vm_map_unlock_read(target_map); - if(pmap_map != target_map) - vm_map_unlock_read(pmap_map); + if(real_map != target_map) + vm_map_unlock_read(real_map); vm_map_lock_read(local_map); target_map = local_map; - pmap_map = local_map; + real_map = local_map; } while(TRUE) { if(!vm_map_lookup_entry(local_map, local_offset, &map_entry)) { kr = KERN_INVALID_ARGUMENT; - vm_object_unlock(object); vm_map_unlock_read(target_map); - if(pmap_map != target_map) - vm_map_unlock_read(pmap_map); + if(real_map != target_map) + vm_map_unlock_read(real_map); + vm_object_deallocate(object); /* release extra ref */ + object = VM_OBJECT_NULL; goto make_mem_done; } + iskernel = (local_map->pmap == kernel_pmap); if(!(map_entry->is_sub_map)) { - if(map_entry->object.vm_object != object) { + if (VME_OBJECT(map_entry) != object) { kr = KERN_INVALID_ARGUMENT; - vm_object_unlock(object); vm_map_unlock_read(target_map); - if(pmap_map != target_map) - vm_map_unlock_read(pmap_map); + if(real_map != target_map) + vm_map_unlock_read(real_map); + vm_object_deallocate(object); /* release extra ref */ + object = VM_OBJECT_NULL; goto make_mem_done; } - if(map_entry->wired_count) { - object->true_share = TRUE; - } break; } else { vm_map_t tmap; tmap = local_map; - local_map = map_entry->object.sub_map; + local_map = VME_SUBMAP(map_entry); vm_map_lock_read(local_map); vm_map_unlock_read(tmap); target_map = local_map; - pmap_map = local_map; + real_map = local_map; local_offset = local_offset - map_entry->vme_start; - local_offset += map_entry->offset; + local_offset += VME_OFFSET(map_entry); } } - if(((map_entry->max_protection) & permission) != permission) { + + /* + * We found the VM map entry, lock the VM object again. + */ + vm_object_lock(object); + if(map_entry->wired_count) { + /* JMM - The check below should be reworked instead. */ + object->true_share = TRUE; + } + if (mask_protections) { + /* + * The caller asked us to use the "protections" as + * a mask, so restrict "protections" to what this + * mapping actually allows. + */ + protections &= map_entry->max_protection; + } + if(((map_entry->max_protection) & protections) != protections) { kr = KERN_INVALID_RIGHT; vm_object_unlock(object); vm_map_unlock_read(target_map); - if(pmap_map != target_map) - vm_map_unlock_read(pmap_map); + if(real_map != target_map) + vm_map_unlock_read(real_map); + vm_object_deallocate(object); + object = VM_OBJECT_NULL; goto make_mem_done; } - mappable_size = hi_offset - obj_off; + mappable_size = fault_info.hi_offset - obj_off; total_size = map_entry->vme_end - map_entry->vme_start; - if(*size > mappable_size) { + if(map_size > mappable_size) { /* try to extend mappable size if the entries */ /* following are from the same object and are */ /* compatible */ next_entry = map_entry->vme_next; /* lets see if the next map entry is still */ /* pointing at this object and is contiguous */ - while(*size > mappable_size) { - if((next_entry->object.vm_object == object) && - (next_entry->vme_start == - next_entry->vme_prev->vme_end) && - (next_entry->offset == - next_entry->vme_prev->offset + - (next_entry->vme_prev->vme_end - - next_entry->vme_prev->vme_start))) { + while(map_size > mappable_size) { + if ((VME_OBJECT(next_entry) == object) && + (next_entry->vme_start == + next_entry->vme_prev->vme_end) && + (VME_OFFSET(next_entry) == + (VME_OFFSET(next_entry->vme_prev) + + (next_entry->vme_prev->vme_end - + next_entry->vme_prev->vme_start)))) { + if (mask_protections) { + /* + * The caller asked us to use + * the "protections" as a mask, + * so restrict "protections" to + * what this mapping actually + * allows. + */ + protections &= next_entry->max_protection; + } + if ((next_entry->wired_count) && + (map_entry->wired_count == 0)) { + break; + } if(((next_entry->max_protection) - & permission) != permission) { + & protections) != protections) { break; } + if (next_entry->needs_copy != + map_entry->needs_copy) + break; mappable_size += next_entry->vme_end - next_entry->vme_start; total_size += next_entry->vme_end @@ -1504,48 +2457,137 @@ redo_lookup: } } - if(object->internal) { + /* vm_map_entry_should_cow_for_true_share() checks for malloc tags, + * never true in kernel */ + if (!iskernel && vm_map_entry_should_cow_for_true_share(map_entry) && + object->vo_size > map_size && + map_size != 0) { + /* + * Set up the targeted range for copy-on-write to + * limit the impact of "true_share"/"copy_delay" to + * that range instead of the entire VM object... + */ + + vm_object_unlock(object); + if (vm_map_lock_read_to_write(target_map)) { + vm_object_deallocate(object); + target_map = original_map; + goto redo_lookup; + } + + vm_map_clip_start(target_map, + map_entry, + vm_map_trunc_page(map_start, + VM_MAP_PAGE_MASK(target_map))); + vm_map_clip_end(target_map, + map_entry, + (vm_map_round_page(map_end, + VM_MAP_PAGE_MASK(target_map)))); + force_shadow = TRUE; + + if ((map_entry->vme_end - offset) < map_size) { + map_size = map_entry->vme_end - map_start; + } + total_size = map_entry->vme_end - map_entry->vme_start; + + vm_map_lock_write_to_read(target_map); + vm_object_lock(object); + } + + if (object->internal) { /* vm_map_lookup_locked will create a shadow if */ /* needs_copy is set but does not check for the */ /* other two conditions shown. It is important to */ /* set up an object which will not be pulled from */ /* under us. */ - if ((map_entry->needs_copy || object->shadowed || - (object->size > total_size)) - && !object->true_share) { + if (force_shadow || + ((map_entry->needs_copy || + object->shadowed || + (object->vo_size > total_size && + (VME_OFFSET(map_entry) != 0 || + object->vo_size > + vm_map_round_page(total_size, + VM_MAP_PAGE_MASK(target_map))))) + && !object->true_share)) { + /* + * We have to unlock the VM object before + * trying to upgrade the VM map lock, to + * honor lock ordering (map then object). + * Otherwise, we would deadlock if another + * thread holds a read lock on the VM map and + * is trying to acquire the VM object's lock. + * We still hold an extra reference on the + * VM object, guaranteeing that it won't + * disappear. + */ + vm_object_unlock(object); + if (vm_map_lock_read_to_write(target_map)) { - vm_map_lock_read(target_map); + /* + * We couldn't upgrade our VM map lock + * from "read" to "write" and we lost + * our "read" lock. + * Start all over again... + */ + vm_object_deallocate(object); /* extra ref */ + target_map = original_map; goto redo_lookup; } - - +#if 00 + vm_object_lock(object); +#endif + + /* + * JMM - We need to avoid coming here when the object + * is wired by anybody, not just the current map. Why + * couldn't we use the standard vm_object_copy_quickly() + * approach here? + */ + /* create a shadow object */ - vm_object_shadow(&map_entry->object.vm_object, - &map_entry->offset, total_size); - shadow_object = map_entry->object.vm_object; + VME_OBJECT_SHADOW(map_entry, total_size); + shadow_object = VME_OBJECT(map_entry); +#if 00 vm_object_unlock(object); +#endif + + prot = map_entry->protection & ~VM_PROT_WRITE; + + if (override_nx(target_map, + VME_ALIAS(map_entry)) + && prot) + prot |= VM_PROT_EXECUTE; + vm_object_pmap_protect( - object, map_entry->offset, + object, VME_OFFSET(map_entry), total_size, ((map_entry->is_shared - || target_map->mapped) + || target_map->mapped_in_other_pmaps) ? PMAP_NULL : target_map->pmap), map_entry->vme_start, - map_entry->protection & ~VM_PROT_WRITE); + prot); total_size -= (map_entry->vme_end - map_entry->vme_start); next_entry = map_entry->vme_next; map_entry->needs_copy = FALSE; + + vm_object_lock(shadow_object); while (total_size) { - if(next_entry->object.vm_object == object) { - next_entry->object.vm_object - = shadow_object; - next_entry->offset - = next_entry->vme_prev->offset + - (next_entry->vme_prev->vme_end - - next_entry->vme_prev->vme_start); + assert((next_entry->wired_count == 0) || + (map_entry->wired_count)); + + if (VME_OBJECT(next_entry) == object) { + vm_object_reference_locked(shadow_object); + VME_OBJECT_SET(next_entry, + shadow_object); + vm_object_deallocate(object); + VME_OFFSET_SET( + next_entry, + (VME_OFFSET(next_entry->vme_prev) + + (next_entry->vme_prev->vme_end + - next_entry->vme_prev->vme_start))); next_entry->needs_copy = FALSE; } else { panic("mach_make_memory_entry_64:" @@ -1557,13 +2599,18 @@ redo_lookup: next_entry = next_entry->vme_next; } + /* + * Transfer our extra reference to the + * shadow object. + */ + vm_object_reference_locked(shadow_object); + vm_object_deallocate(object); /* extra ref */ object = shadow_object; - vm_object_lock(object); - obj_off = (local_offset - map_entry->vme_start) - + map_entry->offset; - vm_map_lock_write_to_read(target_map); + obj_off = ((local_offset - map_entry->vme_start) + + VME_OFFSET(map_entry)); + vm_map_lock_write_to_read(target_map); } } @@ -1577,14 +2624,55 @@ redo_lookup: /* target of ipc's, etc. The code above, protecting */ /* against delayed copy, etc. is mostly defensive. */ + wimg_mode = object->wimg_bits; + if(!(object->nophyscache)) { + if(access == MAP_MEM_IO) { + wimg_mode = VM_WIMG_IO; + } else if (access == MAP_MEM_COPYBACK) { + wimg_mode = VM_WIMG_USE_DEFAULT; + } else if (access == MAP_MEM_INNERWBACK) { + wimg_mode = VM_WIMG_INNERWBACK; + } else if (access == MAP_MEM_WTHRU) { + wimg_mode = VM_WIMG_WTHRU; + } else if (access == MAP_MEM_WCOMB) { + wimg_mode = VM_WIMG_WCOMB; + } + } +#if VM_OBJECT_TRACKING_OP_TRUESHARE + if (!object->true_share && + vm_object_tracking_inited) { + void *bt[VM_OBJECT_TRACKING_BTDEPTH]; + int num = 0; + + num = OSBacktrace(bt, + VM_OBJECT_TRACKING_BTDEPTH); + btlog_add_entry(vm_object_tracking_btlog, + object, + VM_OBJECT_TRACKING_OP_TRUESHARE, + bt, + num); + } +#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */ + vm_object_lock_assert_exclusive(object); object->true_share = TRUE; - user_object->object = object; - user_object->internal = object->internal; - user_object->is_sub_map = FALSE; - user_object->offset = obj_off; - user_object->protection = permission; + if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) + object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; + + /* + * The memory entry now points to this VM object and we + * need to hold a reference on the VM object. Use the extra + * reference we took earlier to keep the object alive when we + * had to unlock it. + */ + + vm_map_unlock_read(target_map); + if(real_map != target_map) + vm_map_unlock_read(real_map); + + if (object->wimg_bits != wimg_mode) + vm_object_change_wimg_mode(object, wimg_mode); /* the size of mapped entry that overlaps with our region */ /* which is targeted for share. */ @@ -1592,77 +2680,221 @@ redo_lookup: /* offset of our beg addr within entry */ /* it corresponds to this: */ - if(*size > mappable_size) - *size = mappable_size; + if(map_size > mappable_size) + map_size = mappable_size; + + if (permission & MAP_MEM_NAMED_REUSE) { + /* + * Compare what we got with the "parent_entry". + * If they match, re-use the "parent_entry" instead + * of creating a new one. + */ + if (parent_entry != NULL && + parent_entry->backing.object == object && + parent_entry->internal == object->internal && + parent_entry->is_sub_map == FALSE && + parent_entry->is_pager == FALSE && + parent_entry->offset == obj_off && + parent_entry->protection == protections && + parent_entry->size == map_size && + ((!(use_data_addr || use_4K_compat) && + (parent_entry->data_offset == 0)) || + ((use_data_addr || use_4K_compat) && + (parent_entry->data_offset == offset_in_page)))) { + /* + * We have a match: re-use "parent_entry". + */ + /* release our extra reference on object */ + vm_object_unlock(object); + vm_object_deallocate(object); + /* parent_entry->ref_count++; XXX ? */ + /* Get an extra send-right on handle */ + ipc_port_copy_send(parent_handle); + + *size = CAST_DOWN(vm_size_t, + (parent_entry->size - + parent_entry->data_offset)); + *object_handle = parent_handle; + return KERN_SUCCESS; + } else { + /* + * No match: we need to create a new entry. + * fall through... + */ + } + } + + vm_object_unlock(object); + if (mach_memory_entry_allocate(&user_entry, &user_handle) + != KERN_SUCCESS) { + /* release our unused reference on the object */ + vm_object_deallocate(object); + return KERN_FAILURE; + } - user_object->size = *size; + user_entry->backing.object = object; + user_entry->internal = object->internal; + user_entry->is_sub_map = FALSE; + user_entry->is_pager = FALSE; + user_entry->offset = obj_off; + user_entry->data_offset = offset_in_page; + user_entry->protection = protections; + SET_MAP_MEM(GET_MAP_MEM(permission), user_entry->protection); + user_entry->size = map_size; /* user_object pager and internal fields are not used */ /* when the object field is filled in. */ - object->ref_count++; /* we now point to this object, hold on */ - vm_object_res_reference(object); - vm_object_unlock(object); - ipc_kobject_set(user_handle, (ipc_kobject_t) user_object, - IKOT_NAMED_ENTRY); + *size = CAST_DOWN(vm_size_t, (user_entry->size - + user_entry->data_offset)); *object_handle = user_handle; - vm_map_unlock_read(target_map); - if(pmap_map != target_map) - vm_map_unlock_read(pmap_map); return KERN_SUCCESS; - } else { - - vm_named_entry_t parent_object; + } else { /* The new object will be base on an existing named object */ - if(ip_kotype(parent_entry) != IKOT_NAMED_ENTRY) { + if (parent_entry == NULL) { kr = KERN_INVALID_ARGUMENT; goto make_mem_done; } - parent_object = (vm_named_entry_t)parent_entry->ip_kobject; - if(permission & parent_object->protection != permission) { - kr = KERN_INVALID_ARGUMENT; + + if (use_data_addr || use_4K_compat) { + /* + * submaps and pagers should only be accessible from within + * the kernel, which shouldn't use the data address flag, so can fail here. + */ + if (parent_entry->is_pager || parent_entry->is_sub_map) { + panic("Shouldn't be using data address with a parent entry that is a submap or pager."); + } + /* + * Account for offset to data in parent entry and + * compute our own offset to data. + */ + if((offset + *size + parent_entry->data_offset) > parent_entry->size) { + kr = KERN_INVALID_ARGUMENT; + goto make_mem_done; + } + + map_start = vm_map_trunc_page(offset + parent_entry->data_offset, PAGE_MASK); + offset_in_page = (offset + parent_entry->data_offset) - map_start; + if (use_4K_compat) + offset_in_page &= ~((signed)(0xFFF)); + map_end = vm_map_round_page(offset + parent_entry->data_offset + *size, PAGE_MASK); + map_size = map_end - map_start; + } else { + map_end = vm_map_round_page(offset + *size, PAGE_MASK); + map_size = map_end - map_start; + offset_in_page = 0; + + if((offset + map_size) > parent_entry->size) { + kr = KERN_INVALID_ARGUMENT; + goto make_mem_done; + } + } + + if (mask_protections) { + /* + * The caller asked us to use the "protections" as + * a mask, so restrict "protections" to what this + * mapping actually allows. + */ + protections &= parent_entry->protection; + } + if((protections & parent_entry->protection) != protections) { + kr = KERN_PROTECTION_FAILURE; goto make_mem_done; } - if((offset + *size) > parent_object->size) { - kr = KERN_INVALID_ARGUMENT; + + if (mach_memory_entry_allocate(&user_entry, &user_handle) + != KERN_SUCCESS) { + kr = KERN_FAILURE; goto make_mem_done; } - user_object->object = parent_object->object; - user_object->size = *size; - user_object->offset = parent_object->offset + offset; - user_object->protection = permission; - if(parent_object->is_sub_map) { - user_object->backing.map = parent_object->backing.map; - vm_map_lock(user_object->backing.map); - user_object->backing.map->ref_count++; - vm_map_unlock(user_object->backing.map); + user_entry->size = map_size; + user_entry->offset = parent_entry->offset + map_start; + user_entry->data_offset = offset_in_page; + user_entry->is_sub_map = parent_entry->is_sub_map; + user_entry->is_pager = parent_entry->is_pager; + user_entry->is_copy = parent_entry->is_copy; + user_entry->internal = parent_entry->internal; + user_entry->protection = protections; + + if(access != MAP_MEM_NOOP) { + SET_MAP_MEM(access, user_entry->protection); } - else { - user_object->backing.pager = parent_object->backing.pager; + + if(parent_entry->is_sub_map) { + user_entry->backing.map = parent_entry->backing.map; + vm_map_lock(user_entry->backing.map); + user_entry->backing.map->ref_count++; + vm_map_unlock(user_entry->backing.map); + } + else if (parent_entry->is_pager) { + user_entry->backing.pager = parent_entry->backing.pager; + /* JMM - don't we need a reference here? */ + } else { + object = parent_entry->backing.object; + assert(object != VM_OBJECT_NULL); + user_entry->backing.object = object; + /* we now point to this object, hold on */ + vm_object_lock(object); + vm_object_reference_locked(object); +#if VM_OBJECT_TRACKING_OP_TRUESHARE + if (!object->true_share && + vm_object_tracking_inited) { + void *bt[VM_OBJECT_TRACKING_BTDEPTH]; + int num = 0; + + num = OSBacktrace(bt, + VM_OBJECT_TRACKING_BTDEPTH); + btlog_add_entry(vm_object_tracking_btlog, + object, + VM_OBJECT_TRACKING_OP_TRUESHARE, + bt, + num); } - user_object->internal = parent_object->internal; - user_object->is_sub_map = parent_object->is_sub_map; - - if(parent_object->object != NULL) { - /* we now point to this object, hold on */ - vm_object_reference(parent_object->object); - vm_object_lock(parent_object->object); - parent_object->object->true_share = TRUE; - vm_object_unlock(parent_object->object); +#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */ + + object->true_share = TRUE; + if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) + object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; + vm_object_unlock(object); } - ipc_kobject_set(user_handle, (ipc_kobject_t) user_object, - IKOT_NAMED_ENTRY); + *size = CAST_DOWN(vm_size_t, (user_entry->size - + user_entry->data_offset)); *object_handle = user_handle; return KERN_SUCCESS; } - - make_mem_done: - ipc_port_dealloc_kernel(user_handle); - kfree((vm_offset_t)user_object, sizeof (struct vm_named_entry)); + if (user_handle != IP_NULL) { + /* + * Releasing "user_handle" causes the kernel object + * associated with it ("user_entry" here) to also be + * released and freed. + */ + mach_memory_entry_port_release(user_handle); + } + return kr; +} + +kern_return_t +_mach_make_memory_entry( + vm_map_t target_map, + memory_object_size_t *size, + memory_object_offset_t offset, + vm_prot_t permission, + ipc_port_t *object_handle, + ipc_port_t parent_entry) +{ + memory_object_size_t mo_size; + kern_return_t kr; + + mo_size = (memory_object_size_t)*size; + kr = mach_make_memory_entry_64(target_map, &mo_size, + (memory_object_offset_t)offset, permission, object_handle, + parent_entry); + *size = mo_size; return kr; } @@ -1674,47 +2906,62 @@ mach_make_memory_entry( vm_prot_t permission, ipc_port_t *object_handle, ipc_port_t parent_entry) -{ - vm_object_offset_t size_64; +{ + memory_object_size_t mo_size; kern_return_t kr; - size_64 = (vm_object_offset_t)*size; - kr = mach_make_memory_entry_64(target_map, &size_64, - (vm_object_offset_t)offset, permission, object_handle, + mo_size = (memory_object_size_t)*size; + kr = mach_make_memory_entry_64(target_map, &mo_size, + (memory_object_offset_t)offset, permission, object_handle, parent_entry); - *size = (vm_size_t)size_64; + *size = CAST_DOWN(vm_size_t, mo_size); return kr; } /* + * task_wire + * + * Set or clear the map's wiring_required flag. This flag, if set, + * will cause all future virtual memory allocation to allocate + * user wired memory. Unwiring pages wired down as a result of + * this routine is done with the vm_wire interface. */ - kern_return_t -vm_region_object_create( - vm_map_t target_map, - vm_size_t size, - ipc_port_t *object_handle) +task_wire( + vm_map_t map, + boolean_t must_wire) +{ + if (map == VM_MAP_NULL) + return(KERN_INVALID_ARGUMENT); + + if (must_wire) + map->wiring_required = TRUE; + else + map->wiring_required = FALSE; + + return(KERN_SUCCESS); +} + +__private_extern__ kern_return_t +mach_memory_entry_allocate( + vm_named_entry_t *user_entry_p, + ipc_port_t *user_handle_p) { - vm_named_entry_t user_object; + vm_named_entry_t user_entry; ipc_port_t user_handle; - kern_return_t kr; + ipc_port_t previous; - pmap_t new_pmap = pmap_create((vm_size_t) 0); - ipc_port_t previous; - vm_map_t new_map; - - if(new_pmap == PMAP_NULL) + user_entry = (vm_named_entry_t) kalloc(sizeof *user_entry); + if (user_entry == NULL) return KERN_FAILURE; - user_object = (vm_named_entry_t) - kalloc(sizeof (struct vm_named_entry)); - if(user_object == NULL) { - pmap_destroy(new_pmap); - return KERN_FAILURE; - } - named_entry_lock_init(user_object); - user_handle = ipc_port_alloc_kernel(); + named_entry_lock_init(user_entry); + user_handle = ipc_port_alloc_kernel(); + if (user_handle == IP_NULL) { + kfree(user_entry, sizeof *user_entry); + return KERN_FAILURE; + } ip_lock(user_handle); /* make a sonce right */ @@ -1733,117 +2980,217 @@ vm_region_object_create( ipc_port_nsrequest(user_handle, 1, user_handle, &previous); /* nsrequest unlocks user_handle */ - /* Create a named object based on a submap of specified size */ + user_entry->backing.pager = NULL; + user_entry->is_sub_map = FALSE; + user_entry->is_pager = FALSE; + user_entry->is_copy = FALSE; + user_entry->internal = FALSE; + user_entry->size = 0; + user_entry->offset = 0; + user_entry->data_offset = 0; + user_entry->protection = VM_PROT_NONE; + user_entry->ref_count = 1; - new_map = vm_map_create(new_pmap, 0, size, TRUE); - user_object->backing.map = new_map; + ipc_kobject_set(user_handle, (ipc_kobject_t) user_entry, + IKOT_NAMED_ENTRY); + *user_entry_p = user_entry; + *user_handle_p = user_handle; - user_object->object = VM_OBJECT_NULL; - user_object->internal = TRUE; - user_object->is_sub_map = TRUE; - user_object->offset = 0; - user_object->protection = VM_PROT_ALL; - user_object->size = size; - user_object->ref_count = 1; + return KERN_SUCCESS; +} - ipc_kobject_set(user_handle, (ipc_kobject_t) user_object, - IKOT_NAMED_ENTRY); - *object_handle = user_handle; +/* + * mach_memory_object_memory_entry_64 + * + * Create a named entry backed by the provided pager. + * + * JMM - we need to hold a reference on the pager - + * and release it when the named entry is destroyed. + */ +kern_return_t +mach_memory_object_memory_entry_64( + host_t host, + boolean_t internal, + vm_object_offset_t size, + vm_prot_t permission, + memory_object_t pager, + ipc_port_t *entry_handle) +{ + unsigned int access; + vm_named_entry_t user_entry; + ipc_port_t user_handle; + + if (host == HOST_NULL) + return(KERN_INVALID_HOST); + + if (mach_memory_entry_allocate(&user_entry, &user_handle) + != KERN_SUCCESS) { + return KERN_FAILURE; + } + + user_entry->backing.pager = pager; + user_entry->size = size; + user_entry->offset = 0; + user_entry->protection = permission & VM_PROT_ALL; + access = GET_MAP_MEM(permission); + SET_MAP_MEM(access, user_entry->protection); + user_entry->internal = internal; + user_entry->is_sub_map = FALSE; + user_entry->is_pager = TRUE; + assert(user_entry->ref_count == 1); + + *entry_handle = user_handle; return KERN_SUCCESS; +} +kern_return_t +mach_memory_object_memory_entry( + host_t host, + boolean_t internal, + vm_size_t size, + vm_prot_t permission, + memory_object_t pager, + ipc_port_t *entry_handle) +{ + return mach_memory_object_memory_entry_64( host, internal, + (vm_object_offset_t)size, permission, pager, entry_handle); } -/* For a given range, check all map entries. If the entry coresponds to */ -/* the old vm_region/map provided on the call, replace it with the */ -/* corresponding range in the new vm_region/map */ -kern_return_t vm_map_region_replace( - vm_map_t target_map, - ipc_port_t old_region, - ipc_port_t new_region, - vm_offset_t start, - vm_offset_t end) + +kern_return_t +mach_memory_entry_purgable_control( + ipc_port_t entry_port, + vm_purgable_t control, + int *state) { - vm_named_entry_t old_object; - vm_named_entry_t new_object; - vm_map_t old_submap; - vm_map_t new_submap; - vm_offset_t addr; - vm_map_entry_t entry; - int nested_pmap = 0; + kern_return_t kr; + vm_named_entry_t mem_entry; + vm_object_t object; + + if (entry_port == IP_NULL || + ip_kotype(entry_port) != IKOT_NAMED_ENTRY) { + return KERN_INVALID_ARGUMENT; + } + if (control != VM_PURGABLE_SET_STATE && + control != VM_PURGABLE_GET_STATE) + return(KERN_INVALID_ARGUMENT); + + if (control == VM_PURGABLE_SET_STATE && + (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) || + ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) + return(KERN_INVALID_ARGUMENT); + mem_entry = (vm_named_entry_t) entry_port->ip_kobject; - vm_map_lock(target_map); - old_object = (vm_named_entry_t)old_region->ip_kobject; - new_object = (vm_named_entry_t)new_region->ip_kobject; - if((!old_object->is_sub_map) || (!new_object->is_sub_map)) { - vm_map_unlock(target_map); + named_entry_lock(mem_entry); + + if (mem_entry->is_sub_map || + mem_entry->is_pager || + mem_entry->is_copy) { + named_entry_unlock(mem_entry); return KERN_INVALID_ARGUMENT; } - old_submap = (vm_map_t)old_object->backing.map; - new_submap = (vm_map_t)new_object->backing.map; - vm_map_lock(old_submap); - if((old_submap->min_offset != new_submap->min_offset) || - (old_submap->max_offset != new_submap->max_offset)) { - vm_map_unlock(old_submap); - vm_map_unlock(target_map); + + object = mem_entry->backing.object; + if (object == VM_OBJECT_NULL) { + named_entry_unlock(mem_entry); return KERN_INVALID_ARGUMENT; } - if(!vm_map_lookup_entry(target_map, start, &entry)) { - /* if the src is not contained, the entry preceeds */ - /* our range */ - addr = entry->vme_start; - if(entry == vm_map_to_entry(target_map)) { - vm_map_unlock(old_submap); - vm_map_unlock(target_map); - return KERN_SUCCESS; - } - vm_map_lookup_entry(target_map, addr, &entry); + + vm_object_lock(object); + + /* check that named entry covers entire object ? */ + if (mem_entry->offset != 0 || object->vo_size != mem_entry->size) { + vm_object_unlock(object); + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; } - addr = entry->vme_start; - vm_map_reference(old_submap); - while((entry != vm_map_to_entry(target_map)) && - (entry->vme_start < end)) { - if((entry->is_sub_map) && - (entry->object.sub_map == old_submap)) { - if(entry->use_pmap) { - if((start & 0xfffffff) || - ((end - start) != 0x10000000)) { - vm_map_unlock(old_submap); - vm_map_deallocate(old_submap); - vm_map_unlock(target_map); - return KERN_INVALID_ARGUMENT; - } - nested_pmap = 1; - } - entry->object.sub_map = new_submap; - vm_map_reference(new_submap); - vm_map_deallocate(old_submap); - } - entry = entry->vme_next; - addr = entry->vme_start; + + named_entry_unlock(mem_entry); + + kr = vm_object_purgable_control(object, control, state); + + vm_object_unlock(object); + + return kr; +} + +kern_return_t +mach_memory_entry_get_page_counts( + ipc_port_t entry_port, + unsigned int *resident_page_count, + unsigned int *dirty_page_count) +{ + kern_return_t kr; + vm_named_entry_t mem_entry; + vm_object_t object; + vm_object_offset_t offset; + vm_object_size_t size; + + if (entry_port == IP_NULL || + ip_kotype(entry_port) != IKOT_NAMED_ENTRY) { + return KERN_INVALID_ARGUMENT; } - if(nested_pmap) { -#ifndef i386 - pmap_unnest(target_map->pmap, start, end - start); - if(target_map->mapped) { - vm_map_submap_pmap_clean(target_map, - start, end, old_submap, 0); - } - pmap_nest(target_map->pmap, new_submap->pmap, - start, end - start); -#endif i386 - } else { - vm_map_submap_pmap_clean(target_map, - start, end, old_submap, 0); + + mem_entry = (vm_named_entry_t) entry_port->ip_kobject; + + named_entry_lock(mem_entry); + + if (mem_entry->is_sub_map || + mem_entry->is_pager || + mem_entry->is_copy) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; } - vm_map_unlock(old_submap); - vm_map_deallocate(old_submap); - vm_map_unlock(target_map); - return KERN_SUCCESS; + + object = mem_entry->backing.object; + if (object == VM_OBJECT_NULL) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + vm_object_lock(object); + + offset = mem_entry->offset; + size = mem_entry->size; + + named_entry_unlock(mem_entry); + + kr = vm_object_get_page_counts(object, offset, size, resident_page_count, dirty_page_count); + + vm_object_unlock(object); + + return kr; } +/* + * mach_memory_entry_port_release: + * + * Release a send right on a named entry port. This is the correct + * way to destroy a named entry. When the last right on the port is + * released, ipc_kobject_destroy() will call mach_destroy_memory_entry(). + */ +void +mach_memory_entry_port_release( + ipc_port_t port) +{ + assert(ip_kotype(port) == IKOT_NAMED_ENTRY); + ipc_port_release_send(port); +} +/* + * mach_destroy_memory_entry: + * + * Drops a reference on a memory entry and destroys the memory entry if + * there are no more references on it. + * NOTE: This routine should not be called to destroy a memory entry from the + * kernel, as it will not release the Mach port associated with the memory + * entry. The proper way to destroy a memory entry in the kernel is to + * call mach_memort_entry_port_release() to release the kernel's send-right on + * the memory entry's port. When the last send right is released, the memory + * entry will be destroyed via ipc_kobject_destroy(). + */ void mach_destroy_memory_entry( ipc_port_t port) @@ -1853,107 +3200,140 @@ mach_destroy_memory_entry( assert(ip_kotype(port) == IKOT_NAMED_ENTRY); #endif /* MACH_ASSERT */ named_entry = (vm_named_entry_t)port->ip_kobject; - mutex_lock(&(named_entry)->Lock); - named_entry->ref_count-=1; + + named_entry_lock(named_entry); + named_entry->ref_count -= 1; + if(named_entry->ref_count == 0) { - if(named_entry->object) { - /* release the memory object we've been pointing to */ - vm_object_deallocate(named_entry->object); - } - if(named_entry->is_sub_map) { + if (named_entry->is_sub_map) { vm_map_deallocate(named_entry->backing.map); + } else if (named_entry->is_pager) { + /* JMM - need to drop reference on pager in that case */ + } else if (named_entry->is_copy) { + vm_map_copy_discard(named_entry->backing.copy); + } else { + /* release the VM object we've been pointing to */ + vm_object_deallocate(named_entry->backing.object); } - kfree((vm_offset_t)port->ip_kobject, - sizeof (struct vm_named_entry)); + + named_entry_unlock(named_entry); + named_entry_lock_destroy(named_entry); + + kfree((void *) port->ip_kobject, + sizeof (struct vm_named_entry)); } else - mutex_unlock(&(named_entry)->Lock); + named_entry_unlock(named_entry); } +/* Allow manipulation of individual page state. This is actually part of */ +/* the UPL regimen but takes place on the memory entry rather than on a UPL */ kern_return_t -vm_map_page_query( - vm_map_t target_map, - vm_offset_t offset, - int *disposition, - int *ref_count) +mach_memory_entry_page_op( + ipc_port_t entry_port, + vm_object_offset_t offset, + int ops, + ppnum_t *phys_entry, + int *flags) { - vm_map_entry_t map_entry; - vm_object_t object; - vm_page_t m; + vm_named_entry_t mem_entry; + vm_object_t object; + kern_return_t kr; -restart_page_query: - *disposition = 0; - *ref_count = 0; - vm_map_lock(target_map); - if(!vm_map_lookup_entry(target_map, offset, &map_entry)) { - vm_map_unlock(target_map); - return KERN_FAILURE; + if (entry_port == IP_NULL || + ip_kotype(entry_port) != IKOT_NAMED_ENTRY) { + return KERN_INVALID_ARGUMENT; } - offset -= map_entry->vme_start; /* adjust to offset within entry */ - offset += map_entry->offset; /* adjust to target object offset */ - if(map_entry->object.vm_object != VM_OBJECT_NULL) { - if(!map_entry->is_sub_map) { - object = map_entry->object.vm_object; - } else { - vm_map_unlock(target_map); - target_map = map_entry->object.sub_map; - goto restart_page_query; - } - } else { - vm_map_unlock(target_map); - return KERN_FAILURE; + + mem_entry = (vm_named_entry_t) entry_port->ip_kobject; + + named_entry_lock(mem_entry); + + if (mem_entry->is_sub_map || + mem_entry->is_pager || + mem_entry->is_copy) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; } - vm_object_lock(object); - vm_map_unlock(target_map); - while(TRUE) { - m = vm_page_lookup(object, offset); - if (m != VM_PAGE_NULL) { - *disposition |= VM_PAGE_QUERY_PAGE_PRESENT; - break; - } else { - if(object->shadow) { - offset += object->shadow_offset; - vm_object_unlock(object); - object = object->shadow; - vm_object_lock(object); - continue; - } - vm_object_unlock(object); - return KERN_FAILURE; - } + + object = mem_entry->backing.object; + if (object == VM_OBJECT_NULL) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; } - /* The ref_count is not strictly accurate, it measures the number */ - /* of entities holding a ref on the object, they may not be mapping */ - /* the object or may not be mapping the section holding the */ - /* target page but its still a ball park number and though an over- */ - /* count, it picks up the copy-on-write cases */ + vm_object_reference(object); + named_entry_unlock(mem_entry); - /* We could also get a picture of page sharing from pmap_attributes */ - /* but this would under count as only faulted-in mappings would */ - /* show up. */ + kr = vm_object_page_op(object, offset, ops, phys_entry, flags); - *ref_count = object->ref_count; + vm_object_deallocate(object); - if (m->fictitious) { - *disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS; - vm_object_unlock(object); - return KERN_SUCCESS; + return kr; +} + +/* + * mach_memory_entry_range_op offers performance enhancement over + * mach_memory_entry_page_op for page_op functions which do not require page + * level state to be returned from the call. Page_op was created to provide + * a low-cost alternative to page manipulation via UPLs when only a single + * page was involved. The range_op call establishes the ability in the _op + * family of functions to work on multiple pages where the lack of page level + * state handling allows the caller to avoid the overhead of the upl structures. + */ + +kern_return_t +mach_memory_entry_range_op( + ipc_port_t entry_port, + vm_object_offset_t offset_beg, + vm_object_offset_t offset_end, + int ops, + int *range) +{ + vm_named_entry_t mem_entry; + vm_object_t object; + kern_return_t kr; + + if (entry_port == IP_NULL || + ip_kotype(entry_port) != IKOT_NAMED_ENTRY) { + return KERN_INVALID_ARGUMENT; } - if (m->dirty) - *disposition |= VM_PAGE_QUERY_PAGE_DIRTY; - else if(pmap_is_modified(m->phys_addr)) - *disposition |= VM_PAGE_QUERY_PAGE_DIRTY; + mem_entry = (vm_named_entry_t) entry_port->ip_kobject; - if (m->reference) - *disposition |= VM_PAGE_QUERY_PAGE_REF; - else if(pmap_is_referenced(m->phys_addr)) - *disposition |= VM_PAGE_QUERY_PAGE_REF; + named_entry_lock(mem_entry); - vm_object_unlock(object); - return KERN_SUCCESS; - + if (mem_entry->is_sub_map || + mem_entry->is_pager || + mem_entry->is_copy) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + object = mem_entry->backing.object; + if (object == VM_OBJECT_NULL) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + vm_object_reference(object); + named_entry_unlock(mem_entry); + + kr = vm_object_range_op(object, + offset_beg, + offset_end, + ops, + (uint32_t *) range); + + vm_object_deallocate(object); + + return kr; +} + +static void dp_control_port_init(void) +{ + lck_grp_init(&dynamic_pager_control_port_lock_group,"dp_control_port", LCK_GRP_ATTR_NULL); + lck_mtx_init(&dynamic_pager_control_port_lock, &dynamic_pager_control_port_lock_group, LCK_ATTR_NULL); } kern_return_t @@ -1961,13 +3341,19 @@ set_dp_control_port( host_priv_t host_priv, ipc_port_t control_port) { - if (host_priv == HOST_PRIV_NULL) - return (KERN_INVALID_HOST); + ipc_port_t old_port; - if (IP_VALID(dynamic_pager_control_port)) - ipc_port_release_send(dynamic_pager_control_port); + if (host_priv == HOST_PRIV_NULL) + return (KERN_INVALID_HOST); + lck_mtx_lock(&dynamic_pager_control_port_lock); + old_port = dynamic_pager_control_port; dynamic_pager_control_port = control_port; + lck_mtx_unlock(&dynamic_pager_control_port_lock); + + if (IP_VALID(old_port)) + ipc_port_release_send(old_port); + return KERN_SUCCESS; } @@ -1976,263 +3362,59 @@ get_dp_control_port( host_priv_t host_priv, ipc_port_t *control_port) { - if (host_priv == HOST_PRIV_NULL) + if (host_priv == HOST_PRIV_NULL) return (KERN_INVALID_HOST); + lck_mtx_lock(&dynamic_pager_control_port_lock); *control_port = ipc_port_copy_send(dynamic_pager_control_port); + lck_mtx_unlock(&dynamic_pager_control_port_lock); + return KERN_SUCCESS; } +/* ******* Temporary Internal calls to UPL for BSD ***** */ -/* Retrieve a upl for an object underlying an address range in a map */ - -kern_return_t -vm_map_get_upl( - vm_map_t map, - vm_address_t offset, - vm_size_t *upl_size, - upl_t *upl, - upl_page_info_array_t page_list, - unsigned int *count, - int *flags, - int force_data_sync) -{ - vm_map_entry_t entry; - int caller_flags; - int sync_cow_data = FALSE; - vm_object_t local_object; - vm_offset_t local_offset; - vm_offset_t local_start; - kern_return_t ret; - - caller_flags = *flags; - if (!(caller_flags & UPL_COPYOUT_FROM)) { - sync_cow_data = TRUE; - } - if(upl == NULL) - return KERN_INVALID_ARGUMENT; - - -REDISCOVER_ENTRY: - vm_map_lock(map); - if (vm_map_lookup_entry(map, offset, &entry)) { - if (entry->object.vm_object == VM_OBJECT_NULL || - !entry->object.vm_object->phys_contiguous) { - if((*upl_size/page_size) > MAX_UPL_TRANSFER) { - *upl_size = MAX_UPL_TRANSFER * page_size; - } - } - if((entry->vme_end - offset) < *upl_size) { - *upl_size = entry->vme_end - offset; - } - if (caller_flags & UPL_QUERY_OBJECT_TYPE) { - if (entry->object.vm_object == VM_OBJECT_NULL) { - *flags = 0; - } else if (entry->object.vm_object->private) { - *flags = UPL_DEV_MEMORY; - if (entry->object.vm_object->phys_contiguous) { - *flags |= UPL_PHYS_CONTIG; - } - } else { - *flags = 0; - } - vm_map_unlock(map); - return KERN_SUCCESS; - } - /* - * Create an object if necessary. - */ - if (entry->object.vm_object == VM_OBJECT_NULL) { - entry->object.vm_object = vm_object_allocate( - (vm_size_t)(entry->vme_end - entry->vme_start)); - entry->offset = 0; - } - if (!(caller_flags & UPL_COPYOUT_FROM)) { - if (entry->needs_copy) { - vm_map_t local_map; - vm_object_t object; - vm_object_offset_t offset_hi; - vm_object_offset_t offset_lo; - vm_object_offset_t new_offset; - vm_prot_t prot; - boolean_t wired; - vm_behavior_t behavior; - vm_map_version_t version; - vm_map_t pmap_map; - - local_map = map; - vm_map_lock_write_to_read(map); - if(vm_map_lookup_locked(&local_map, - offset, VM_PROT_WRITE, - &version, &object, - &new_offset, &prot, &wired, - &behavior, &offset_lo, - &offset_hi, &pmap_map)) { - vm_map_unlock(local_map); - return KERN_FAILURE; - } - if (pmap_map != map) { - vm_map_unlock(pmap_map); - } - vm_object_unlock(object); - vm_map_unlock(local_map); - - goto REDISCOVER_ENTRY; - } - } - if (entry->is_sub_map) { - vm_map_t submap; - - submap = entry->object.sub_map; - local_start = entry->vme_start; - local_offset = entry->offset; - vm_map_reference(submap); - vm_map_unlock(map); - - ret = (vm_map_get_upl(submap, - local_offset + (offset - local_start), - upl_size, upl, page_list, count, - flags, force_data_sync)); - - vm_map_deallocate(submap); - return ret; - } - - if (sync_cow_data) { - if (entry->object.vm_object->shadow - || entry->object.vm_object->copy) { - int flags; - - local_object = entry->object.vm_object; - local_start = entry->vme_start; - local_offset = entry->offset; - vm_object_reference(local_object); - vm_map_unlock(map); +extern int kernel_upl_map( + vm_map_t map, + upl_t upl, + vm_offset_t *dst_addr); - if(local_object->copy == NULL) { - flags = MEMORY_OBJECT_DATA_SYNC; - } else { - flags = MEMORY_OBJECT_COPY_SYNC; - } +extern int kernel_upl_unmap( + vm_map_t map, + upl_t upl); - if((local_object->paging_offset) && - (local_object->pager == 0)) { - /* - * do a little clean-up for our unorthodox - * entry into a pager call from a non-pager - * context. Normally the pager code - * assumes that an object it has been called - * with has a backing pager and so does - * not bother to check the pager field - * before relying on the paging_offset - */ - vm_object_lock(local_object); - if (local_object->pager == 0) { - local_object->paging_offset = 0; - } - vm_object_unlock(local_object); - } - - if (entry->object.vm_object->shadow && - entry->object.vm_object->copy) { - vm_object_lock_request( - local_object->shadow, - (vm_object_offset_t) - ((offset - local_start) + - local_offset) + - local_object->shadow_offset + - local_object->paging_offset, - *upl_size, FALSE, - MEMORY_OBJECT_DATA_SYNC, - VM_PROT_NO_CHANGE); - } - sync_cow_data = FALSE; - vm_object_deallocate(local_object); - goto REDISCOVER_ENTRY; - } - } +extern int kernel_upl_commit( + upl_t upl, + upl_page_info_t *pl, + mach_msg_type_number_t count); - if (force_data_sync) { - - local_object = entry->object.vm_object; - local_start = entry->vme_start; - local_offset = entry->offset; - vm_object_reference(local_object); - vm_map_unlock(map); - - if((local_object->paging_offset) && - (local_object->pager == 0)) { - /* - * do a little clean-up for our unorthodox - * entry into a pager call from a non-pager - * context. Normally the pager code - * assumes that an object it has been called - * with has a backing pager and so does - * not bother to check the pager field - * before relying on the paging_offset - */ - vm_object_lock(local_object); - if (local_object->pager == 0) { - local_object->paging_offset = 0; - } - vm_object_unlock(local_object); - } - - vm_object_lock_request( - local_object, - (vm_object_offset_t) - ((offset - local_start) + local_offset) + - local_object->paging_offset, - (vm_object_size_t)*upl_size, FALSE, - MEMORY_OBJECT_DATA_SYNC, - VM_PROT_NO_CHANGE); - force_data_sync = FALSE; - vm_object_deallocate(local_object); - goto REDISCOVER_ENTRY; - } +extern int kernel_upl_commit_range( + upl_t upl, + upl_offset_t offset, + upl_size_t size, + int flags, + upl_page_info_array_t pl, + mach_msg_type_number_t count); - if(!(entry->object.vm_object->private)) { - if(*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE)) - *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE); - if(entry->object.vm_object->phys_contiguous) { - *flags = UPL_PHYS_CONTIG; - } else { - *flags = 0; - } - } else { - *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG; - } - local_object = entry->object.vm_object; - local_offset = entry->offset; - local_start = entry->vme_start; - vm_object_reference(local_object); - vm_map_unlock(map); - ret = (vm_object_upl_request(local_object, - (vm_object_offset_t) - ((offset - local_start) + local_offset), - *upl_size, - upl, - page_list, - count, - caller_flags)); - vm_object_deallocate(local_object); - return(ret); - } +extern int kernel_upl_abort( + upl_t upl, + int abort_type); - vm_map_unlock(map); - return(KERN_FAILURE); +extern int kernel_upl_abort_range( + upl_t upl, + upl_offset_t offset, + upl_size_t size, + int abort_flags); -} -/* ******* Temporary Internal calls to UPL for BSD ***** */ kern_return_t kernel_upl_map( vm_map_t map, upl_t upl, vm_offset_t *dst_addr) { - return (vm_upl_map(map, upl, dst_addr)); + return vm_upl_map(map, upl, dst_addr); } @@ -2241,13 +3423,13 @@ kernel_upl_unmap( vm_map_t map, upl_t upl) { - return(vm_upl_unmap(map, upl)); + return vm_upl_unmap(map, upl); } kern_return_t kernel_upl_commit( - upl_t upl, - upl_page_info_t *pl, + upl_t upl, + upl_page_info_t *pl, mach_msg_type_number_t count) { kern_return_t kr; @@ -2261,8 +3443,8 @@ kernel_upl_commit( kern_return_t kernel_upl_commit_range( upl_t upl, - vm_offset_t offset, - vm_size_t size, + upl_offset_t offset, + upl_size_t size, int flags, upl_page_info_array_t pl, mach_msg_type_number_t count) @@ -2273,6 +3455,10 @@ kernel_upl_commit_range( if (flags & UPL_COMMIT_FREE_ON_EMPTY) flags |= UPL_COMMIT_NOTIFY_EMPTY; + if (flags & UPL_COMMIT_KERNEL_ONLY_FLAGS) { + return KERN_INVALID_ARGUMENT; + } + kr = upl_commit_range(upl, offset, size, flags, pl, count, &finished); if ((flags & UPL_COMMIT_NOTIFY_EMPTY) && finished) @@ -2284,8 +3470,8 @@ kernel_upl_commit_range( kern_return_t kernel_upl_abort_range( upl_t upl, - vm_offset_t offset, - vm_size_t size, + upl_offset_t offset, + upl_size_t size, int abort_flags) { kern_return_t kr; @@ -2314,225 +3500,106 @@ kernel_upl_abort( return kr; } +/* + * Now a kernel-private interface (for BootCache + * use only). Need a cleaner way to create an + * empty vm_map() and return a handle to it. + */ kern_return_t -vm_get_shared_region( - task_t task, - shared_region_mapping_t *shared_region) -{ - *shared_region = (shared_region_mapping_t) task->system_shared_region; - return KERN_SUCCESS; -} - -kern_return_t -vm_set_shared_region( - task_t task, - shared_region_mapping_t shared_region) -{ - task->system_shared_region = (vm_offset_t) shared_region; - return KERN_SUCCESS; -} - -kern_return_t -shared_region_mapping_info( - shared_region_mapping_t shared_region, - ipc_port_t *text_region, - vm_size_t *text_size, - ipc_port_t *data_region, - vm_size_t *data_size, - vm_offset_t *region_mappings, - vm_offset_t *client_base, - vm_offset_t *alt_base, - vm_offset_t *alt_next, - int *flags, - shared_region_mapping_t *next) +vm_region_object_create( + __unused vm_map_t target_map, + vm_size_t size, + ipc_port_t *object_handle) { - shared_region_mapping_lock(shared_region); - - *text_region = shared_region->text_region; - *text_size = shared_region->text_size; - *data_region = shared_region->data_region; - *data_size = shared_region->data_size; - *region_mappings = shared_region->region_mappings; - *client_base = shared_region->client_base; - *alt_base = shared_region->alternate_base; - *alt_next = shared_region->alternate_next; - *flags = shared_region->flags; - *next = shared_region->next; - - shared_region_mapping_unlock(shared_region); -} + vm_named_entry_t user_entry; + ipc_port_t user_handle; -kern_return_t -shared_region_object_chain_attach( - shared_region_mapping_t target_region, - shared_region_mapping_t object_chain_region) -{ - shared_region_object_chain_t object_ele; + vm_map_t new_map; - if(target_region->object_chain) + if (mach_memory_entry_allocate(&user_entry, &user_handle) + != KERN_SUCCESS) { return KERN_FAILURE; - object_ele = (shared_region_object_chain_t) - kalloc(sizeof (struct shared_region_object_chain)); - shared_region_mapping_lock(object_chain_region); - target_region->object_chain = object_ele; - object_ele->object_chain_region = object_chain_region; - object_ele->next = object_chain_region->object_chain; - object_ele->depth = object_chain_region->depth; - object_chain_region->depth++; - target_region->alternate_next = object_chain_region->alternate_next; - shared_region_mapping_unlock(object_chain_region); - return KERN_SUCCESS; -} + } -kern_return_t -shared_region_mapping_create( - ipc_port_t text_region, - vm_size_t text_size, - ipc_port_t data_region, - vm_size_t data_size, - vm_offset_t region_mappings, - vm_offset_t client_base, - shared_region_mapping_t *shared_region, - vm_offset_t alt_base, - vm_offset_t alt_next) -{ - *shared_region = (shared_region_mapping_t) - kalloc(sizeof (struct shared_region_mapping)); - if(*shared_region == NULL) - return KERN_FAILURE; - shared_region_mapping_lock_init((*shared_region)); - (*shared_region)->text_region = text_region; - (*shared_region)->text_size = text_size; - (*shared_region)->data_region = data_region; - (*shared_region)->data_size = data_size; - (*shared_region)->region_mappings = region_mappings; - (*shared_region)->client_base = client_base; - (*shared_region)->ref_count = 1; - (*shared_region)->next = NULL; - (*shared_region)->object_chain = NULL; - (*shared_region)->self = *shared_region; - (*shared_region)->flags = 0; - (*shared_region)->depth = 0; - (*shared_region)->alternate_base = alt_base; - (*shared_region)->alternate_next = alt_next; - return KERN_SUCCESS; -} + /* Create a named object based on a submap of specified size */ -kern_return_t -shared_region_mapping_set_alt_next( - shared_region_mapping_t shared_region, - vm_offset_t alt_next) -{ - shared_region->alternate_next = alt_next; - return KERN_SUCCESS; -} + new_map = vm_map_create(PMAP_NULL, VM_MAP_MIN_ADDRESS, + vm_map_round_page(size, + VM_MAP_PAGE_MASK(target_map)), + TRUE); + vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(target_map)); -kern_return_t -shared_region_mapping_ref( - shared_region_mapping_t shared_region) -{ - if(shared_region == NULL) - return KERN_SUCCESS; - hw_atomic_add(&shared_region->ref_count, 1); - return KERN_SUCCESS; -} + user_entry->backing.map = new_map; + user_entry->internal = TRUE; + user_entry->is_sub_map = TRUE; + user_entry->offset = 0; + user_entry->protection = VM_PROT_ALL; + user_entry->size = size; + assert(user_entry->ref_count == 1); -kern_return_t -shared_region_mapping_dealloc( - shared_region_mapping_t shared_region) -{ - struct shared_region_task_mappings sm_info; - shared_region_mapping_t next = NULL; - - while (shared_region) { - if (hw_atomic_sub(&shared_region->ref_count, 1) == 0) { - shared_region_mapping_lock(shared_region); - - sm_info.text_region = shared_region->text_region; - sm_info.text_size = shared_region->text_size; - sm_info.data_region = shared_region->data_region; - sm_info.data_size = shared_region->data_size; - sm_info.region_mappings = shared_region->region_mappings; - sm_info.client_base = shared_region->client_base; - sm_info.alternate_base = shared_region->alternate_base; - sm_info.alternate_next = shared_region->alternate_next; - sm_info.flags = shared_region->flags; - sm_info.self = (vm_offset_t)shared_region; - - lsf_remove_regions_mappings(shared_region, &sm_info); - pmap_remove(((vm_named_entry_t) - (shared_region->text_region->ip_kobject)) - ->backing.map->pmap, - sm_info.client_base, - sm_info.client_base + sm_info.text_size); - ipc_port_release_send(shared_region->text_region); - ipc_port_release_send(shared_region->data_region); - if (shared_region->object_chain) { - next = shared_region->object_chain->object_chain_region; - kfree((vm_offset_t)shared_region->object_chain, - sizeof (struct shared_region_object_chain)); - } else { - next = NULL; - } - shared_region_mapping_unlock(shared_region); - kfree((vm_offset_t)shared_region, - sizeof (struct shared_region_mapping)); - shared_region = next; - } else { - break; - } - } + *object_handle = user_handle; return KERN_SUCCESS; + } -vm_offset_t -vm_map_get_phys_page( +ppnum_t vm_map_get_phys_page( /* forward */ vm_map_t map, - vm_offset_t offset) + vm_offset_t offset); + +ppnum_t +vm_map_get_phys_page( + vm_map_t map, + vm_offset_t addr) { - vm_map_entry_t entry; - int ops; - int flags; - vm_offset_t phys_addr = 0; - vm_object_t object; + vm_object_offset_t offset; + vm_object_t object; + vm_map_offset_t map_offset; + vm_map_entry_t entry; + ppnum_t phys_page = 0; + + map_offset = vm_map_trunc_page(addr, PAGE_MASK); vm_map_lock(map); - while (vm_map_lookup_entry(map, offset, &entry)) { + while (vm_map_lookup_entry(map, map_offset, &entry)) { - if (entry->object.vm_object == VM_OBJECT_NULL) { + if (VME_OBJECT(entry) == VM_OBJECT_NULL) { vm_map_unlock(map); - return (vm_offset_t) 0; + return (ppnum_t) 0; } if (entry->is_sub_map) { vm_map_t old_map; - vm_map_lock(entry->object.sub_map); + vm_map_lock(VME_SUBMAP(entry)); old_map = map; - map = entry->object.sub_map; - offset = entry->offset + (offset - entry->vme_start); + map = VME_SUBMAP(entry); + map_offset = (VME_OFFSET(entry) + + (map_offset - entry->vme_start)); vm_map_unlock(old_map); continue; } - if (entry->object.vm_object->phys_contiguous) { + if (VME_OBJECT(entry)->phys_contiguous) { /* These are not standard pageable memory mappings */ /* If they are not present in the object they will */ /* have to be picked up from the pager through the */ /* fault mechanism. */ - if(entry->object.vm_object->shadow_offset == 0) { + if (VME_OBJECT(entry)->vo_shadow_offset == 0) { /* need to call vm_fault */ vm_map_unlock(map); - vm_fault(map, offset, VM_PROT_NONE, + vm_fault(map, map_offset, VM_PROT_NONE, FALSE, THREAD_UNINT, NULL, 0); vm_map_lock(map); continue; } - offset = entry->offset + (offset - entry->vme_start); - phys_addr = entry->object.vm_object->shadow_offset + offset; + offset = (VME_OFFSET(entry) + + (map_offset - entry->vme_start)); + phys_page = (ppnum_t) + ((VME_OBJECT(entry)->vo_shadow_offset + + offset) >> PAGE_SHIFT); break; } - offset = entry->offset + (offset - entry->vme_start); - object = entry->object.vm_object; + offset = (VME_OFFSET(entry) + (map_offset - entry->vme_start)); + object = VME_OBJECT(entry); vm_object_lock(object); while (TRUE) { vm_page_t dst_page = vm_page_lookup(object,offset); @@ -2541,7 +3608,7 @@ vm_map_get_phys_page( vm_object_t old_object; vm_object_lock(object->shadow); old_object = object; - offset = offset + object->shadow_offset; + offset = offset + object->vo_shadow_offset; object = object->shadow; vm_object_unlock(old_object); } else { @@ -2549,7 +3616,7 @@ vm_map_get_phys_page( break; } } else { - phys_addr = dst_page->phys_addr; + phys_page = (ppnum_t)(VM_PAGE_GET_PHYS_PAGE(dst_page)); vm_object_unlock(object); break; } @@ -2559,6 +3626,146 @@ vm_map_get_phys_page( } vm_map_unlock(map); - return phys_addr; + return phys_page; } -#endif /* VM_CPM */ + +void +vm_user_init(void) +{ + dp_control_port_init(); +} + +#if 0 +kern_return_t kernel_object_iopl_request( /* forward */ + vm_named_entry_t named_entry, + memory_object_offset_t offset, + upl_size_t *upl_size, + upl_t *upl_ptr, + upl_page_info_array_t user_page_list, + unsigned int *page_list_count, + int *flags); + +kern_return_t +kernel_object_iopl_request( + vm_named_entry_t named_entry, + memory_object_offset_t offset, + upl_size_t *upl_size, + upl_t *upl_ptr, + upl_page_info_array_t user_page_list, + unsigned int *page_list_count, + int *flags) +{ + vm_object_t object; + kern_return_t ret; + + int caller_flags; + + caller_flags = *flags; + + if (caller_flags & ~UPL_VALID_FLAGS) { + /* + * For forward compatibility's sake, + * reject any unknown flag. + */ + return KERN_INVALID_VALUE; + } + + /* a few checks to make sure user is obeying rules */ + if(*upl_size == 0) { + if(offset >= named_entry->size) + return(KERN_INVALID_RIGHT); + *upl_size = (upl_size_t) (named_entry->size - offset); + if (*upl_size != named_entry->size - offset) + return KERN_INVALID_ARGUMENT; + } + if(caller_flags & UPL_COPYOUT_FROM) { + if((named_entry->protection & VM_PROT_READ) + != VM_PROT_READ) { + return(KERN_INVALID_RIGHT); + } + } else { + if((named_entry->protection & + (VM_PROT_READ | VM_PROT_WRITE)) + != (VM_PROT_READ | VM_PROT_WRITE)) { + return(KERN_INVALID_RIGHT); + } + } + if(named_entry->size < (offset + *upl_size)) + return(KERN_INVALID_ARGUMENT); + + /* the callers parameter offset is defined to be the */ + /* offset from beginning of named entry offset in object */ + offset = offset + named_entry->offset; + + if (named_entry->is_sub_map || + named_entry->is_copy) + return KERN_INVALID_ARGUMENT; + + named_entry_lock(named_entry); + + if (named_entry->is_pager) { + object = vm_object_enter(named_entry->backing.pager, + named_entry->offset + named_entry->size, + named_entry->internal, + FALSE, + FALSE); + if (object == VM_OBJECT_NULL) { + named_entry_unlock(named_entry); + return(KERN_INVALID_OBJECT); + } + + /* JMM - drop reference on the pager here? */ + + /* create an extra reference for the object */ + vm_object_lock(object); + vm_object_reference_locked(object); + named_entry->backing.object = object; + named_entry->is_pager = FALSE; + named_entry_unlock(named_entry); + + /* wait for object (if any) to be ready */ + if (!named_entry->internal) { + while (!object->pager_ready) { + vm_object_wait(object, + VM_OBJECT_EVENT_PAGER_READY, + THREAD_UNINT); + vm_object_lock(object); + } + } + vm_object_unlock(object); + + } else { + /* This is the case where we are going to operate */ + /* an an already known object. If the object is */ + /* not ready it is internal. An external */ + /* object cannot be mapped until it is ready */ + /* we can therefore avoid the ready check */ + /* in this case. */ + object = named_entry->backing.object; + vm_object_reference(object); + named_entry_unlock(named_entry); + } + + if (!object->private) { + if (*upl_size > MAX_UPL_TRANSFER_BYTES) + *upl_size = MAX_UPL_TRANSFER_BYTES; + if (object->phys_contiguous) { + *flags = UPL_PHYS_CONTIG; + } else { + *flags = 0; + } + } else { + *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG; + } + + ret = vm_object_iopl_request(object, + offset, + *upl_size, + upl_ptr, + user_page_list, + page_list_count, + (upl_control_flags_t)(unsigned int)caller_flags); + vm_object_deallocate(object); + return ret; +} +#endif