X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/0b4e3aa066abc0728aacb4bbeb86f53f9737156e..2dced7af2b695f87fe26496a3e73c219b7880cbc:/osfmk/vm/vm_kern.c diff --git a/osfmk/vm/vm_kern.c b/osfmk/vm/vm_kern.c index 56143f42c..d015ebd2c 100644 --- a/osfmk/vm/vm_kern.c +++ b/osfmk/vm/vm_kern.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2007 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ @@ -57,11 +63,9 @@ * Kernel memory management. */ -#include #include #include #include -#include #include #include #include @@ -72,6 +76,10 @@ #include #include + +#include +#include + /* * Variables exported by this module. */ @@ -79,108 +87,134 @@ vm_map_t kernel_map; vm_map_t kernel_pageable_map; +extern boolean_t vm_kernel_ready; + /* * Forward declarations for internal functions. */ extern kern_return_t kmem_alloc_pages( register vm_object_t object, register vm_object_offset_t offset, - register vm_offset_t start, - register vm_offset_t end, - vm_prot_t protection); - -extern void kmem_remap_pages( - register vm_object_t object, - register vm_object_offset_t offset, - register vm_offset_t start, - register vm_offset_t end, - vm_prot_t protection); + register vm_object_size_t size); kern_return_t kmem_alloc_contig( - vm_map_t map, - vm_offset_t *addrp, - vm_size_t size, - vm_offset_t mask, - int flags) + vm_map_t map, + vm_offset_t *addrp, + vm_size_t size, + vm_offset_t mask, + ppnum_t max_pnum, + ppnum_t pnum_mask, + int flags, + vm_tag_t tag) { vm_object_t object; - vm_page_t m, pages; - kern_return_t kr; - vm_offset_t addr, i; vm_object_offset_t offset; + vm_map_offset_t map_addr; + vm_map_offset_t map_mask; + vm_map_size_t map_size, i; vm_map_entry_t entry; + vm_page_t m, pages; + kern_return_t kr; - if (map == VM_MAP_NULL || (flags && (flags ^ KMA_KOBJECT))) + if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) return KERN_INVALID_ARGUMENT; + + map_size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(map)); + map_mask = (vm_map_offset_t)mask; - if (size == 0) { + /* Check for zero allocation size (either directly or via overflow) */ + if (map_size == 0) { *addrp = 0; return KERN_INVALID_ARGUMENT; } - size = round_page(size); - if ((flags & KMA_KOBJECT) == 0) { - object = vm_object_allocate(size); - kr = vm_map_find_space(map, &addr, size, mask, &entry); - } - else { + /* + * Allocate a new object (if necessary) and the reference we + * will be donating to the map entry. We must do this before + * locking the map, or risk deadlock with the default pager. + */ + if ((flags & KMA_KOBJECT) != 0) { object = kernel_object; - kr = vm_map_find_space(map, &addr, size, mask, &entry); - } - - if ((flags & KMA_KOBJECT) == 0) { - entry->object.vm_object = object; - entry->offset = offset = 0; + vm_object_reference(object); } else { - offset = addr - VM_MIN_KERNEL_ADDRESS; - - if (entry->object.vm_object == VM_OBJECT_NULL) { - vm_object_reference(object); - entry->object.vm_object = object; - entry->offset = offset; - } + object = vm_object_allocate(map_size); } - if (kr != KERN_SUCCESS) { - if ((flags & KMA_KOBJECT) == 0) - vm_object_deallocate(object); + kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0, &entry); + if (KERN_SUCCESS != kr) { + vm_object_deallocate(object); return kr; } + if (object == kernel_object) { + offset = map_addr; + } else { + offset = 0; + } + VME_OBJECT_SET(entry, object); + VME_OFFSET_SET(entry, offset); + VME_ALIAS_SET(entry, tag); + + /* Take an extra object ref in case the map entry gets deleted */ + vm_object_reference(object); vm_map_unlock(map); - kr = cpm_allocate(size, &pages, FALSE); + kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags); if (kr != KERN_SUCCESS) { - vm_map_remove(map, addr, addr + size, 0); + vm_map_remove(map, + vm_map_trunc_page(map_addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(map_addr + map_size, + VM_MAP_PAGE_MASK(map)), + 0); + vm_object_deallocate(object); *addrp = 0; return kr; } vm_object_lock(object); - for (i = 0; i < size; i += PAGE_SIZE) { + for (i = 0; i < map_size; i += PAGE_SIZE) { m = pages; pages = NEXT_PAGE(m); + *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL; m->busy = FALSE; vm_page_insert(m, object, offset + i); } vm_object_unlock(object); - if ((kr = vm_map_wire(map, addr, addr + size, VM_PROT_DEFAULT, FALSE)) - != KERN_SUCCESS) { + kr = vm_map_wire(map, + vm_map_trunc_page(map_addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(map_addr + map_size, + VM_MAP_PAGE_MASK(map)), + VM_PROT_DEFAULT | VM_PROT_MEMORY_TAG_MAKE(tag), + FALSE); + + if (kr != KERN_SUCCESS) { if (object == kernel_object) { vm_object_lock(object); - vm_object_page_remove(object, offset, offset + size); + vm_object_page_remove(object, offset, offset + map_size); vm_object_unlock(object); } - vm_map_remove(map, addr, addr + size, 0); + vm_map_remove(map, + vm_map_trunc_page(map_addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(map_addr + map_size, + VM_MAP_PAGE_MASK(map)), + 0); + vm_object_deallocate(object); return kr; } + vm_object_deallocate(object); + if (object == kernel_object) - vm_map_simplify(map, addr); + vm_map_simplify(map, map_addr); - *addrp = addr; + *addrp = (vm_offset_t) map_addr; + assert((vm_map_offset_t) *addrp == map_addr); return KERN_SUCCESS; } @@ -195,6 +229,10 @@ kmem_alloc_contig( * KMA_HERE *addrp is base address, else "anywhere" * KMA_NOPAGEWAIT don't wait for pages if unavailable * KMA_KOBJECT use kernel_object + * KMA_LOMEM support for 32 bit devices in a 64 bit world + * if set and a lomemory pool is available + * grab pages from it... this also implies + * KMA_NOPAGEWAIT */ kern_return_t @@ -203,105 +241,523 @@ kernel_memory_allocate( register vm_offset_t *addrp, register vm_size_t size, register vm_offset_t mask, - int flags) + int flags, + vm_tag_t tag) { - vm_object_t object = VM_OBJECT_NULL; - vm_map_entry_t entry; - vm_object_offset_t offset; - vm_offset_t addr; - vm_offset_t i; - kern_return_t kr; - - size = round_page(size); - if ((flags & KMA_KOBJECT) == 0) { - /* - * Allocate a new object. We must do this before locking - * the map, or risk deadlock with the default pager: - * device_read_alloc uses kmem_alloc, - * which tries to allocate an object, - * which uses kmem_alloc_wired to get memory, - * which blocks for pages. - * then the default pager needs to read a block - * to process a memory_object_data_write, - * and device_read_alloc calls kmem_alloc - * and deadlocks on the map lock. - */ - object = vm_object_allocate(size); - kr = vm_map_find_space(map, &addr, size, mask, &entry); + vm_object_t object; + vm_object_offset_t offset; + vm_object_offset_t pg_offset; + vm_map_entry_t entry = NULL; + vm_map_offset_t map_addr, fill_start; + vm_map_offset_t map_mask; + vm_map_size_t map_size, fill_size; + kern_return_t kr, pe_result; + vm_page_t mem; + vm_page_t guard_page_list = NULL; + vm_page_t wired_page_list = NULL; + int guard_page_count = 0; + int wired_page_count = 0; + int i; + int vm_alloc_flags; + vm_prot_t kma_prot; + + if (! vm_kernel_ready) { + panic("kernel_memory_allocate: VM is not ready"); } - else { - object = kernel_object; - kr = vm_map_find_space(map, &addr, size, mask, &entry); + + map_size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(map)); + map_mask = (vm_map_offset_t) mask; + + vm_alloc_flags = VM_MAKE_TAG(tag); + + /* Check for zero allocation size (either directly or via overflow) */ + if (map_size == 0) { + *addrp = 0; + return KERN_INVALID_ARGUMENT; } - if (kr != KERN_SUCCESS) { - if ((flags & KMA_KOBJECT) == 0) - vm_object_deallocate(object); - return kr; + + /* + * limit the size of a single extent of wired memory + * to try and limit the damage to the system if + * too many pages get wired down + * limit raised to 2GB with 128GB max physical limit + */ + if ( !(flags & KMA_VAONLY) && map_size > (1ULL << 31)) { + return KERN_RESOURCE_SHORTAGE; + } + + /* + * Guard pages: + * + * Guard pages are implemented as ficticious pages. By placing guard pages + * on either end of a stack, they can help detect cases where a thread walks + * off either end of its stack. They are allocated and set up here and attempts + * to access those pages are trapped in vm_fault_page(). + * + * The map_size we were passed may include extra space for + * guard pages. If those were requested, then back it out of fill_size + * since vm_map_find_space() takes just the actual size not including + * guard pages. Similarly, fill_start indicates where the actual pages + * will begin in the range. + */ + + fill_start = 0; + fill_size = map_size; + + if (flags & KMA_GUARD_FIRST) { + vm_alloc_flags |= VM_FLAGS_GUARD_BEFORE; + fill_start += PAGE_SIZE_64; + fill_size -= PAGE_SIZE_64; + if (map_size < fill_start + fill_size) { + /* no space for a guard page */ + *addrp = 0; + return KERN_INVALID_ARGUMENT; + } + guard_page_count++; } + if (flags & KMA_GUARD_LAST) { + vm_alloc_flags |= VM_FLAGS_GUARD_AFTER; + fill_size -= PAGE_SIZE_64; + if (map_size <= fill_start + fill_size) { + /* no space for a guard page */ + *addrp = 0; + return KERN_INVALID_ARGUMENT; + } + guard_page_count++; + } + wired_page_count = (int) (fill_size / PAGE_SIZE_64); + assert(wired_page_count * PAGE_SIZE_64 == fill_size); - if ((flags & KMA_KOBJECT) == 0) { - entry->object.vm_object = object; - entry->offset = offset = 0; - } else { - offset = addr - VM_MIN_KERNEL_ADDRESS; + for (i = 0; i < guard_page_count; i++) { + for (;;) { + mem = vm_page_grab_guard(); - if (entry->object.vm_object == VM_OBJECT_NULL) { - vm_object_reference(object); - entry->object.vm_object = object; - entry->offset = offset; + if (mem != VM_PAGE_NULL) + break; + if (flags & KMA_NOPAGEWAIT) { + kr = KERN_RESOURCE_SHORTAGE; + goto out; + } + vm_page_more_fictitious(); } + mem->pageq.next = (queue_entry_t)guard_page_list; + guard_page_list = mem; } - /* - * Since we have not given out this address yet, - * it is safe to unlock the map. - */ - vm_map_unlock(map); + if (! (flags & KMA_VAONLY)) { + for (i = 0; i < wired_page_count; i++) { + uint64_t unavailable; + + for (;;) { + if (flags & KMA_LOMEM) + mem = vm_page_grablo(); + else + mem = vm_page_grab(); - vm_object_lock(object); - for (i = 0; i < size; i += PAGE_SIZE) { - vm_page_t mem; + if (mem != VM_PAGE_NULL) + break; - while ((mem = vm_page_alloc(object, - offset + (vm_object_offset_t)i)) - == VM_PAGE_NULL) { if (flags & KMA_NOPAGEWAIT) { - if (object == kernel_object) - vm_object_page_remove(object, offset, - offset + (vm_object_offset_t)i); - vm_object_unlock(object); - vm_map_remove(map, addr, addr + size, 0); - return KERN_RESOURCE_SHORTAGE; + kr = KERN_RESOURCE_SHORTAGE; + goto out; + } + if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) { + kr = KERN_RESOURCE_SHORTAGE; + goto out; + } + unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE; + + if (unavailable > max_mem || map_size > (max_mem - unavailable)) { + kr = KERN_RESOURCE_SHORTAGE; + goto out; } - vm_object_unlock(object); VM_PAGE_WAIT(); - vm_object_lock(object); } + mem->pageq.next = (queue_entry_t)wired_page_list; + wired_page_list = mem; + } + } + + /* + * Allocate a new object (if necessary). We must do this before + * locking the map, or risk deadlock with the default pager. + */ + if ((flags & KMA_KOBJECT) != 0) { + object = kernel_object; + vm_object_reference(object); + } else if ((flags & KMA_COMPRESSOR) != 0) { + object = compressor_object; + vm_object_reference(object); + } else { + object = vm_object_allocate(map_size); + } + + kr = vm_map_find_space(map, &map_addr, + fill_size, map_mask, + vm_alloc_flags, &entry); + if (KERN_SUCCESS != kr) { + vm_object_deallocate(object); + goto out; + } + + if (object == kernel_object || object == compressor_object) { + offset = map_addr; + } else { + offset = 0; + } + VME_OBJECT_SET(entry, object); + VME_OFFSET_SET(entry, offset); + + if (object != compressor_object) + entry->wired_count++; + + if (flags & KMA_PERMANENT) + entry->permanent = TRUE; + + if (object != kernel_object && object != compressor_object) + vm_object_reference(object); + + vm_object_lock(object); + vm_map_unlock(map); + + pg_offset = 0; + + if (fill_start) { + if (guard_page_list == NULL) + panic("kernel_memory_allocate: guard_page_list == NULL"); + + mem = guard_page_list; + guard_page_list = (vm_page_t)mem->pageq.next; + mem->pageq.next = NULL; + + vm_page_insert(mem, object, offset + pg_offset); + mem->busy = FALSE; + pg_offset += PAGE_SIZE_64; } - vm_object_unlock(object); - if ((kr = vm_map_wire(map, addr, addr + size, VM_PROT_DEFAULT, FALSE)) - != KERN_SUCCESS) { - if (object == kernel_object) { - vm_object_lock(object); - vm_object_page_remove(object, offset, offset + size); + kma_prot = VM_PROT_READ | VM_PROT_WRITE; + + if (flags & KMA_VAONLY) { + pg_offset = fill_start + fill_size; + } else { + for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) { + if (wired_page_list == NULL) + panic("kernel_memory_allocate: wired_page_list == NULL"); + + mem = wired_page_list; + wired_page_list = (vm_page_t)mem->pageq.next; + mem->pageq.next = NULL; + mem->wire_count++; + + vm_page_insert_wired(mem, object, offset + pg_offset, tag); + + mem->busy = FALSE; + mem->pmapped = TRUE; + mem->wpmapped = TRUE; + + PMAP_ENTER_OPTIONS(kernel_pmap, map_addr + pg_offset, mem, + kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE, + PMAP_OPTIONS_NOWAIT, pe_result); + + if (pe_result == KERN_RESOURCE_SHORTAGE) { vm_object_unlock(object); + + PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem, + kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE); + + vm_object_lock(object); + } + if (flags & KMA_NOENCRYPT) { + bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE); + + pmap_set_noencrypt(mem->phys_page); } - vm_map_remove(map, addr, addr + size, 0); - return (kr); } - if (object == kernel_object) - vm_map_simplify(map, addr); + } + if ((fill_start + fill_size) < map_size) { + if (guard_page_list == NULL) + panic("kernel_memory_allocate: guard_page_list == NULL"); + + mem = guard_page_list; + guard_page_list = (vm_page_t)mem->pageq.next; + mem->pageq.next = NULL; + + vm_page_insert(mem, object, offset + pg_offset); + + mem->busy = FALSE; + } + if (guard_page_list || wired_page_list) + panic("kernel_memory_allocate: non empty list\n"); + + if (! (flags & KMA_VAONLY)) { + vm_page_lockspin_queues(); + vm_page_wire_count += wired_page_count; + vm_page_unlock_queues(); + } + + vm_object_unlock(object); + + /* + * now that the pages are wired, we no longer have to fear coalesce + */ + if (object == kernel_object || object == compressor_object) + vm_map_simplify(map, map_addr); + else + vm_object_deallocate(object); /* * Return the memory, not zeroed. */ -#if (NCPUS > 1) && i860 - bzero( addr, size ); -#endif /* #if (NCPUS > 1) && i860 */ - *addrp = addr; + *addrp = CAST_DOWN(vm_offset_t, map_addr); return KERN_SUCCESS; + +out: + if (guard_page_list) + vm_page_free_list(guard_page_list, FALSE); + + if (wired_page_list) + vm_page_free_list(wired_page_list, FALSE); + + return kr; +} + +kern_return_t +kernel_memory_populate( + vm_map_t map, + vm_offset_t addr, + vm_size_t size, + int flags, + vm_tag_t tag) +{ + vm_object_t object; + vm_object_offset_t offset, pg_offset; + kern_return_t kr, pe_result; + vm_page_t mem; + vm_page_t page_list = NULL; + int page_count = 0; + int i; + + page_count = (int) (size / PAGE_SIZE_64); + + assert((flags & (KMA_COMPRESSOR|KMA_KOBJECT)) != (KMA_COMPRESSOR|KMA_KOBJECT)); + + if (flags & KMA_COMPRESSOR) { + + pg_offset = page_count * PAGE_SIZE_64; + + do { + for (;;) { + mem = vm_page_grab(); + + if (mem != VM_PAGE_NULL) + break; + + VM_PAGE_WAIT(); + } + mem->pageq.next = (queue_entry_t) page_list; + page_list = mem; + + pg_offset -= PAGE_SIZE_64; + + kr = pmap_enter_options(kernel_pmap, + addr + pg_offset, mem->phys_page, + VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE, + PMAP_OPTIONS_INTERNAL, NULL); + assert(kr == KERN_SUCCESS); + + } while (pg_offset); + + offset = addr; + object = compressor_object; + + vm_object_lock(object); + + for (pg_offset = 0; + pg_offset < size; + pg_offset += PAGE_SIZE_64) { + + mem = page_list; + page_list = (vm_page_t) mem->pageq.next; + mem->pageq.next = NULL; + + vm_page_insert(mem, object, offset + pg_offset); + assert(mem->busy); + + mem->busy = FALSE; + mem->pmapped = TRUE; + mem->wpmapped = TRUE; + mem->compressor = TRUE; + } + vm_object_unlock(object); + + return KERN_SUCCESS; + } + + for (i = 0; i < page_count; i++) { + for (;;) { + if (flags & KMA_LOMEM) + mem = vm_page_grablo(); + else + mem = vm_page_grab(); + + if (mem != VM_PAGE_NULL) + break; + + if (flags & KMA_NOPAGEWAIT) { + kr = KERN_RESOURCE_SHORTAGE; + goto out; + } + if ((flags & KMA_LOMEM) && + (vm_lopage_needed == TRUE)) { + kr = KERN_RESOURCE_SHORTAGE; + goto out; + } + VM_PAGE_WAIT(); + } + mem->pageq.next = (queue_entry_t) page_list; + page_list = mem; + } + if (flags & KMA_KOBJECT) { + offset = addr; + object = kernel_object; + + vm_object_lock(object); + } else { + /* + * If it's not the kernel object, we need to: + * lock map; + * lookup entry; + * lock object; + * take reference on object; + * unlock map; + */ + panic("kernel_memory_populate(%p,0x%llx,0x%llx,0x%x): " + "!KMA_KOBJECT", + map, (uint64_t) addr, (uint64_t) size, flags); + } + + for (pg_offset = 0; + pg_offset < size; + pg_offset += PAGE_SIZE_64) { + + if (page_list == NULL) + panic("kernel_memory_populate: page_list == NULL"); + + mem = page_list; + page_list = (vm_page_t) mem->pageq.next; + mem->pageq.next = NULL; + + mem->wire_count++; + + vm_page_insert_wired(mem, object, offset + pg_offset, tag); + + mem->busy = FALSE; + mem->pmapped = TRUE; + mem->wpmapped = TRUE; + + PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset, mem, + VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, + ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE, + PMAP_OPTIONS_NOWAIT, pe_result); + + if (pe_result == KERN_RESOURCE_SHORTAGE) { + + vm_object_unlock(object); + + PMAP_ENTER(kernel_pmap, addr + pg_offset, mem, + VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, + ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE); + + vm_object_lock(object); + } + if (flags & KMA_NOENCRYPT) { + bzero(CAST_DOWN(void *, (addr + pg_offset)), PAGE_SIZE); + pmap_set_noencrypt(mem->phys_page); + } + } + vm_page_lock_queues(); + vm_page_wire_count += page_count; + vm_page_unlock_queues(); + + vm_object_unlock(object); + + return KERN_SUCCESS; + +out: + if (page_list) + vm_page_free_list(page_list, FALSE); + + return kr; +} + + +void +kernel_memory_depopulate( + vm_map_t map, + vm_offset_t addr, + vm_size_t size, + int flags) +{ + vm_object_t object; + vm_object_offset_t offset, pg_offset; + vm_page_t mem; + vm_page_t local_freeq = NULL; + + assert((flags & (KMA_COMPRESSOR|KMA_KOBJECT)) != (KMA_COMPRESSOR|KMA_KOBJECT)); + + if (flags & KMA_COMPRESSOR) { + offset = addr; + object = compressor_object; + + vm_object_lock(object); + } else if (flags & KMA_KOBJECT) { + offset = addr; + object = kernel_object; + + vm_object_lock(object); + } else { + offset = 0; + object = NULL; + /* + * If it's not the kernel object, we need to: + * lock map; + * lookup entry; + * lock object; + * unlock map; + */ + panic("kernel_memory_depopulate(%p,0x%llx,0x%llx,0x%x): " + "!KMA_KOBJECT", + map, (uint64_t) addr, (uint64_t) size, flags); + } + pmap_protect(kernel_map->pmap, offset, offset + size, VM_PROT_NONE); + + for (pg_offset = 0; + pg_offset < size; + pg_offset += PAGE_SIZE_64) { + + mem = vm_page_lookup(object, offset + pg_offset); + + assert(mem); + + pmap_disconnect(mem->phys_page); + + mem->busy = TRUE; + + assert(mem->tabled); + vm_page_remove(mem, TRUE); + assert(mem->busy); + + assert(mem->pageq.next == NULL && + mem->pageq.prev == NULL); + mem->pageq.next = (queue_entry_t)local_freeq; + local_freeq = mem; + } + vm_object_unlock(object); + + if (local_freeq) + vm_page_free_list(local_freeq, TRUE); } /* @@ -312,12 +768,24 @@ kernel_memory_allocate( */ kern_return_t -kmem_alloc( +kmem_alloc_external( vm_map_t map, vm_offset_t *addrp, vm_size_t size) { - return kernel_memory_allocate(map, addrp, size, 0, 0); + return (kmem_alloc(map, addrp, size, vm_tag_bt())); +} + +kern_return_t +kmem_alloc( + vm_map_t map, + vm_offset_t *addrp, + vm_size_t size, + vm_tag_t tag) +{ + kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, 0, tag); + TRACE_MACHLEAKS(KMEM_ALLOC_CODE, KMEM_ALLOC_CODE_2, size, *addrp); + return kr; } /* @@ -332,40 +800,43 @@ kmem_alloc( */ kern_return_t kmem_realloc( - vm_map_t map, - vm_offset_t oldaddr, - vm_size_t oldsize, - vm_offset_t *newaddrp, - vm_size_t newsize) + vm_map_t map, + vm_offset_t oldaddr, + vm_size_t oldsize, + vm_offset_t *newaddrp, + vm_size_t newsize, + vm_tag_t tag) { - vm_offset_t oldmin, oldmax; - vm_offset_t newaddr; - vm_object_t object; - vm_map_entry_t oldentry, newentry; - kern_return_t kr; - - oldmin = trunc_page(oldaddr); - oldmax = round_page(oldaddr + oldsize); - oldsize = oldmax - oldmin; - newsize = round_page(newsize); + vm_object_t object; + vm_object_offset_t offset; + vm_map_offset_t oldmapmin; + vm_map_offset_t oldmapmax; + vm_map_offset_t newmapaddr; + vm_map_size_t oldmapsize; + vm_map_size_t newmapsize; + vm_map_entry_t oldentry; + vm_map_entry_t newentry; + vm_page_t mem; + kern_return_t kr; - /* - * Find space for the new region. - */ + oldmapmin = vm_map_trunc_page(oldaddr, + VM_MAP_PAGE_MASK(map)); + oldmapmax = vm_map_round_page(oldaddr + oldsize, + VM_MAP_PAGE_MASK(map)); + oldmapsize = oldmapmax - oldmapmin; + newmapsize = vm_map_round_page(newsize, + VM_MAP_PAGE_MASK(map)); - kr = vm_map_find_space(map, &newaddr, newsize, (vm_offset_t) 0, - &newentry); - if (kr != KERN_SUCCESS) { - return kr; - } /* * Find the VM object backing the old region. */ - if (!vm_map_lookup_entry(map, oldmin, &oldentry)) + vm_map_lock(map); + + if (!vm_map_lookup_entry(map, oldmapmin, &oldentry)) panic("kmem_realloc"); - object = oldentry->object.vm_object; + object = VME_OBJECT(oldentry); /* * Increase the size of the object and @@ -373,43 +844,74 @@ kmem_realloc( */ vm_object_reference(object); + /* by grabbing the object lock before unlocking the map */ + /* we guarantee that we will panic if more than one */ + /* attempt is made to realloc a kmem_alloc'd area */ vm_object_lock(object); - if (object->size != oldsize) + vm_map_unlock(map); + if (object->vo_size != oldmapsize) panic("kmem_realloc"); - object->size = newsize; + object->vo_size = newmapsize; vm_object_unlock(object); - newentry->object.vm_object = object; - newentry->offset = 0; - assert (newentry->wired_count == 0); - newentry->wired_count = 1; + /* allocate the new pages while expanded portion of the */ + /* object is still not mapped */ + kmem_alloc_pages(object, vm_object_round_page(oldmapsize), + vm_object_round_page(newmapsize-oldmapsize)); /* - * Since we have not given out this address yet, - * it is safe to unlock the map. We are trusting - * that nobody will play with either region. + * Find space for the new region. */ - vm_map_unlock(map); + kr = vm_map_find_space(map, &newmapaddr, newmapsize, + (vm_map_offset_t) 0, 0, &newentry); + if (kr != KERN_SUCCESS) { + vm_object_lock(object); + for(offset = oldmapsize; + offset < newmapsize; offset += PAGE_SIZE) { + if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { + VM_PAGE_FREE(mem); + } + } + object->vo_size = oldmapsize; + vm_object_unlock(object); + vm_object_deallocate(object); + return kr; + } + VME_OBJECT_SET(newentry, object); + VME_OFFSET_SET(newentry, 0); + VME_ALIAS_SET(newentry, tag); + assert(newentry->wired_count == 0); - /* - * Remap the pages in the old region and - * allocate more pages for the new region. - */ + + /* add an extra reference in case we have someone doing an */ + /* unexpected deallocate */ + vm_object_reference(object); + vm_map_unlock(map); - kmem_remap_pages(object, 0, - newaddr, newaddr + oldsize, - VM_PROT_DEFAULT); - kmem_alloc_pages(object, oldsize, - newaddr + oldsize, newaddr + newsize, - VM_PROT_DEFAULT); + kr = vm_map_wire(map, newmapaddr, newmapaddr + newmapsize, + VM_PROT_DEFAULT | VM_PROT_MEMORY_TAG_MAKE(tag), FALSE); + if (KERN_SUCCESS != kr) { + vm_map_remove(map, newmapaddr, newmapaddr + newmapsize, 0); + vm_object_lock(object); + for(offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) { + if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { + VM_PAGE_FREE(mem); + } + } + object->vo_size = oldmapsize; + vm_object_unlock(object); + vm_object_deallocate(object); + return (kr); + } + vm_object_deallocate(object); - *newaddrp = newaddr; + *newaddrp = CAST_DOWN(vm_offset_t, newmapaddr); return KERN_SUCCESS; } /* - * kmem_alloc_wired: + * kmem_alloc_kobject: * * Allocate wired-down memory in the kernel's address map * or a submap. The memory is not zero-filled. @@ -420,18 +922,28 @@ kmem_realloc( */ kern_return_t -kmem_alloc_wired( +kmem_alloc_kobject_external( vm_map_t map, vm_offset_t *addrp, vm_size_t size) { - return kernel_memory_allocate(map, addrp, size, 0, KMA_KOBJECT); + return (kmem_alloc_kobject(map, addrp, size, vm_tag_bt())); +} + +kern_return_t +kmem_alloc_kobject( + vm_map_t map, + vm_offset_t *addrp, + vm_size_t size, + vm_tag_t tag) +{ + return kernel_memory_allocate(map, addrp, size, 0, KMA_KOBJECT, tag); } /* * kmem_alloc_aligned: * - * Like kmem_alloc_wired, except that the memory is aligned. + * Like kmem_alloc_kobject, except that the memory is aligned. * The size should be a power-of-2. */ @@ -439,11 +951,12 @@ kern_return_t kmem_alloc_aligned( vm_map_t map, vm_offset_t *addrp, - vm_size_t size) + vm_size_t size, + vm_tag_t tag) { if ((size & (size - 1)) != 0) panic("kmem_alloc_aligned: size not aligned"); - return kernel_memory_allocate(map, addrp, size, size - 1, KMA_KOBJECT); + return kernel_memory_allocate(map, addrp, size, size - 1, KMA_KOBJECT, tag); } /* @@ -453,27 +966,43 @@ kmem_alloc_aligned( */ kern_return_t -kmem_alloc_pageable( +kmem_alloc_pageable_external( vm_map_t map, vm_offset_t *addrp, vm_size_t size) { - vm_offset_t addr; + return (kmem_alloc_pageable(map, addrp, size, vm_tag_bt())); +} + +kern_return_t +kmem_alloc_pageable( + vm_map_t map, + vm_offset_t *addrp, + vm_size_t size, + vm_tag_t tag) +{ + vm_map_offset_t map_addr; + vm_map_size_t map_size; kern_return_t kr; #ifndef normal - addr = (vm_map_min(map)) + 0x1000; + map_addr = (vm_map_min(map)) + PAGE_SIZE; #else - addr = vm_map_min(map); + map_addr = vm_map_min(map); #endif - kr = vm_map_enter(map, &addr, round_page(size), - (vm_offset_t) 0, TRUE, + map_size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(map)); + + kr = vm_map_enter(map, &map_addr, map_size, + (vm_map_offset_t) 0, + VM_FLAGS_ANYWHERE | VM_MAKE_TAG(tag), VM_OBJECT_NULL, (vm_object_offset_t) 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); + if (kr != KERN_SUCCESS) return kr; - *addrp = addr; + *addrp = CAST_DOWN(vm_offset_t, map_addr); return KERN_SUCCESS; } @@ -481,7 +1010,7 @@ kmem_alloc_pageable( * kmem_free: * * Release a region of kernel virtual memory allocated - * with kmem_alloc, kmem_alloc_wired, or kmem_alloc_pageable, + * with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable, * and return the physical pages associated with that region. */ @@ -493,117 +1022,61 @@ kmem_free( { kern_return_t kr; - kr = vm_map_remove(map, trunc_page(addr), - round_page(addr + size), VM_MAP_REMOVE_KUNWIRE); + assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS); + + TRACE_MACHLEAKS(KMEM_FREE_CODE, KMEM_FREE_CODE_2, size, addr); + + if(size == 0) { +#if MACH_ASSERT + printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n",map,(uint64_t)addr); +#endif + return; + } + + kr = vm_map_remove(map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(addr + size, + VM_MAP_PAGE_MASK(map)), + VM_MAP_REMOVE_KUNWIRE); if (kr != KERN_SUCCESS) panic("kmem_free"); } /* - * Allocate new wired pages in an object. - * The object is assumed to be mapped into the kernel map or - * a submap. + * Allocate new pages in an object. */ kern_return_t kmem_alloc_pages( register vm_object_t object, register vm_object_offset_t offset, - register vm_offset_t start, - register vm_offset_t end, - vm_prot_t protection) + register vm_object_size_t size) { - /* - * Mark the pmap region as not pageable. - */ - pmap_pageable(kernel_pmap, start, end, FALSE); + vm_object_size_t alloc_size; - while (start < end) { + alloc_size = vm_object_round_page(size); + vm_object_lock(object); + while (alloc_size) { register vm_page_t mem; - vm_object_lock(object); /* * Allocate a page */ - while ((mem = vm_page_alloc(object, offset)) - == VM_PAGE_NULL) { + while (VM_PAGE_NULL == + (mem = vm_page_alloc(object, offset))) { vm_object_unlock(object); VM_PAGE_WAIT(); vm_object_lock(object); } + mem->busy = FALSE; - /* - * Wire it down - */ - vm_page_lock_queues(); - vm_page_wire(mem); - vm_page_unlock_queues(); - vm_object_unlock(object); - - /* - * Enter it in the kernel pmap - */ - PMAP_ENTER(kernel_pmap, start, mem, - protection, TRUE); - - vm_object_lock(object); - PAGE_WAKEUP_DONE(mem); - vm_object_unlock(object); - - start += PAGE_SIZE; - offset += PAGE_SIZE_64; - } - return KERN_SUCCESS; -} - -/* - * Remap wired pages in an object into a new region. - * The object is assumed to be mapped into the kernel map or - * a submap. - */ -void -kmem_remap_pages( - register vm_object_t object, - register vm_object_offset_t offset, - register vm_offset_t start, - register vm_offset_t end, - vm_prot_t protection) -{ - /* - * Mark the pmap region as not pageable. - */ - pmap_pageable(kernel_pmap, start, end, FALSE); - - while (start < end) { - register vm_page_t mem; - - vm_object_lock(object); - - /* - * Find a page - */ - if ((mem = vm_page_lookup(object, offset)) == VM_PAGE_NULL) - panic("kmem_remap_pages"); - - /* - * Wire it down (again) - */ - vm_page_lock_queues(); - vm_page_wire(mem); - vm_page_unlock_queues(); - vm_object_unlock(object); - - /* - * Enter it in the kernel pmap. The page isn't busy, - * but this shouldn't be a problem because it is wired. - */ - PMAP_ENTER(kernel_pmap, start, mem, - protection, TRUE); - - start += PAGE_SIZE; + alloc_size -= PAGE_SIZE; offset += PAGE_SIZE; } + vm_object_unlock(object); + return KERN_SUCCESS; } /* @@ -627,13 +1100,16 @@ kmem_suballoc( vm_offset_t *addr, vm_size_t size, boolean_t pageable, - boolean_t anywhere, + int flags, vm_map_t *new_map) { - vm_map_t map; - kern_return_t kr; + vm_map_t map; + vm_map_offset_t map_addr; + vm_map_size_t map_size; + kern_return_t kr; - size = round_page(size); + map_size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(parent)); /* * Need reference on submap object because it is internal @@ -642,10 +1118,13 @@ kmem_suballoc( */ vm_object_reference(vm_submap_object); - if (anywhere == TRUE) - *addr = (vm_offset_t)vm_map_min(parent); - kr = vm_map_enter(parent, addr, size, - (vm_offset_t) 0, anywhere, + map_addr = ((flags & VM_FLAGS_ANYWHERE) + ? vm_map_min(parent) + : vm_map_trunc_page(*addr, + VM_MAP_PAGE_MASK(parent))); + + kr = vm_map_enter(parent, &map_addr, map_size, + (vm_map_offset_t) 0, flags, vm_submap_object, (vm_object_offset_t) 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); if (kr != KERN_SUCCESS) { @@ -654,20 +1133,23 @@ kmem_suballoc( } pmap_reference(vm_map_pmap(parent)); - map = vm_map_create(vm_map_pmap(parent), *addr, *addr + size, pageable); + map = vm_map_create(vm_map_pmap(parent), map_addr, map_addr + map_size, pageable); if (map == VM_MAP_NULL) panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */ + /* inherit the parent map's page size */ + vm_map_set_page_shift(map, VM_MAP_PAGE_SHIFT(parent)); - kr = vm_map_submap(parent, *addr, *addr + size, map, *addr, FALSE); + kr = vm_map_submap(parent, map_addr, map_addr + map_size, map, map_addr, FALSE); if (kr != KERN_SUCCESS) { /* * See comment preceding vm_map_submap(). */ - vm_map_remove(parent, *addr, *addr + size, VM_MAP_NO_FLAGS); + vm_map_remove(parent, map_addr, map_addr + map_size, VM_MAP_NO_FLAGS); vm_map_deallocate(map); /* also removes ref to pmap */ vm_object_deallocate(vm_submap_object); return (kr); } + *addr = CAST_DOWN(vm_offset_t, map_addr); *new_map = map; return (KERN_SUCCESS); } @@ -683,105 +1165,58 @@ kmem_init( vm_offset_t start, vm_offset_t end) { - kernel_map = vm_map_create(pmap_kernel(), - VM_MIN_KERNEL_ADDRESS, end, - FALSE); + vm_map_offset_t map_start; + vm_map_offset_t map_end; + + map_start = vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(kernel_map)); + map_end = vm_map_round_page(end, + VM_MAP_PAGE_MASK(kernel_map)); + kernel_map = vm_map_create(pmap_kernel(),VM_MIN_KERNEL_AND_KEXT_ADDRESS, + map_end, FALSE); /* * Reserve virtual memory allocated up to this time. */ - - if (start != VM_MIN_KERNEL_ADDRESS) { - vm_offset_t addr = VM_MIN_KERNEL_ADDRESS; - (void) vm_map_enter(kernel_map, - &addr, start - VM_MIN_KERNEL_ADDRESS, - (vm_offset_t) 0, TRUE, - VM_OBJECT_NULL, - (vm_object_offset_t) 0, FALSE, - VM_PROT_DEFAULT, VM_PROT_ALL, - VM_INHERIT_DEFAULT); - } - - /* - * Account for kernel memory (text, data, bss, vm shenanigans). - * This may include inaccessible "holes" as determined by what - * the machine-dependent init code includes in mem_size. - */ - vm_page_wire_count = (atop(mem_size) - (vm_page_free_count - + vm_page_active_count - + vm_page_inactive_count)); -} - - -/* - * kmem_io_object_trunc: - * - * Truncate an object vm_map_copy_t. - * Called by the scatter/gather list network code to remove pages from - * the tail end of a packet. Also unwires the objects pages. - */ - -kern_return_t -kmem_io_object_trunc(copy, new_size) - vm_map_copy_t copy; /* IN/OUT copy object */ - register vm_size_t new_size; /* IN new object size */ -{ - register vm_size_t offset, old_size; - - assert(copy->type == VM_MAP_COPY_OBJECT); - - old_size = (vm_size_t)round_page_64(copy->size); - copy->size = new_size; - new_size = round_page(new_size); - - vm_object_lock(copy->cpy_object); - vm_object_page_remove(copy->cpy_object, - (vm_object_offset_t)new_size, (vm_object_offset_t)old_size); - for (offset = 0; offset < new_size; offset += PAGE_SIZE) { - register vm_page_t mem; - - if ((mem = vm_page_lookup(copy->cpy_object, - (vm_object_offset_t)offset)) == VM_PAGE_NULL) - panic("kmem_io_object_trunc: unable to find object page"); - - /* - * Make sure these pages are marked dirty - */ - mem->dirty = TRUE; - vm_page_lock_queues(); - vm_page_unwire(mem); - vm_page_unlock_queues(); - } - copy->cpy_object->size = new_size; /* adjust size of object */ - vm_object_unlock(copy->cpy_object); - return(KERN_SUCCESS); -} - -/* - * kmem_io_object_deallocate: - * - * Free an vm_map_copy_t. - * Called by the scatter/gather list network code to free a packet. - */ - -void -kmem_io_object_deallocate( - vm_map_copy_t copy) /* IN/OUT copy object */ -{ - kern_return_t ret; + if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) { + vm_map_offset_t map_addr; + kern_return_t kr; + + map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS; + kr = vm_map_enter(kernel_map, + &map_addr, + (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS), + (vm_map_offset_t) 0, + VM_FLAGS_FIXED | VM_FLAGS_NO_PMAP_CHECK, + VM_OBJECT_NULL, + (vm_object_offset_t) 0, FALSE, + VM_PROT_NONE, VM_PROT_NONE, + VM_INHERIT_DEFAULT); + + if (kr != KERN_SUCCESS) { + panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n", + (uint64_t) start, (uint64_t) end, + (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS, + (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS), + kr); + } + } /* - * Clear out all the object pages (this will leave an empty object). + * Set the default global user wire limit which limits the amount of + * memory that can be locked via mlock(). We set this to the total + * amount of memory that are potentially usable by a user app (max_mem) + * minus a certain amount. This can be overridden via a sysctl. */ - ret = kmem_io_object_trunc(copy, 0); - if (ret != KERN_SUCCESS) - panic("kmem_io_object_deallocate: unable to truncate object"); - /* - * ...and discard the copy object. - */ - vm_map_copy_discard(copy); + vm_global_no_user_wire_amount = MIN(max_mem*20/100, + VM_NOT_USER_WIREABLE); + vm_global_user_wire_limit = max_mem - vm_global_no_user_wire_amount; + + /* the default per user limit is the same as the global limit */ + vm_user_wire_limit = vm_global_user_wire_limit; } + /* * Routine: copyinmap * Purpose: @@ -790,23 +1225,36 @@ kmem_io_object_deallocate( * is incomplete; it handles the current user map * and the kernel map/submaps. */ -boolean_t +kern_return_t copyinmap( - vm_map_t map, - vm_offset_t fromaddr, - vm_offset_t toaddr, - vm_size_t length) + vm_map_t map, + vm_map_offset_t fromaddr, + void *todata, + vm_size_t length) { - if (vm_map_pmap(map) == pmap_kernel()) { + kern_return_t kr = KERN_SUCCESS; + vm_map_t oldmap; + + if (vm_map_pmap(map) == pmap_kernel()) + { /* assume a correct copy */ - memcpy((void *)toaddr, (void *)fromaddr, length); - return FALSE; + memcpy(todata, CAST_DOWN(void *, fromaddr), length); + } + else if (current_map() == map) + { + if (copyin(fromaddr, todata, length) != 0) + kr = KERN_INVALID_ADDRESS; } - - if (current_map() == map) - return copyin((char *)fromaddr, (char *)toaddr, length); - - return TRUE; + else + { + vm_map_reference(map); + oldmap = vm_map_switch(map); + if (copyin(fromaddr, todata, length) != 0) + kr = KERN_INVALID_ADDRESS; + vm_map_switch(oldmap); + vm_map_deallocate(map); + } + return kr; } /* @@ -817,21 +1265,192 @@ copyinmap( * is incomplete; it handles the current user map * and the kernel map/submaps. */ -boolean_t +kern_return_t copyoutmap( - vm_map_t map, - vm_offset_t fromaddr, - vm_offset_t toaddr, - vm_size_t length) + vm_map_t map, + void *fromdata, + vm_map_address_t toaddr, + vm_size_t length) { if (vm_map_pmap(map) == pmap_kernel()) { /* assume a correct copy */ - memcpy((void *)toaddr, (void *)fromaddr, length); - return FALSE; + memcpy(CAST_DOWN(void *, toaddr), fromdata, length); + return KERN_SUCCESS; + } + + if (current_map() != map) + return KERN_NOT_SUPPORTED; + + if (copyout(fromdata, toaddr, length) != 0) + return KERN_INVALID_ADDRESS; + + return KERN_SUCCESS; +} + + +kern_return_t +vm_conflict_check( + vm_map_t map, + vm_map_offset_t off, + vm_map_size_t len, + memory_object_t pager, + vm_object_offset_t file_off) +{ + vm_map_entry_t entry; + vm_object_t obj; + vm_object_offset_t obj_off; + vm_map_t base_map; + vm_map_offset_t base_offset; + vm_map_offset_t original_offset; + kern_return_t kr; + vm_map_size_t local_len; + + base_map = map; + base_offset = off; + original_offset = off; + kr = KERN_SUCCESS; + vm_map_lock(map); + while(vm_map_lookup_entry(map, off, &entry)) { + local_len = len; + + if (VME_OBJECT(entry) == VM_OBJECT_NULL) { + vm_map_unlock(map); + return KERN_SUCCESS; + } + if (entry->is_sub_map) { + vm_map_t old_map; + + old_map = map; + vm_map_lock(VME_SUBMAP(entry)); + map = VME_SUBMAP(entry); + off = VME_OFFSET(entry) + (off - entry->vme_start); + vm_map_unlock(old_map); + continue; + } + obj = VME_OBJECT(entry); + obj_off = (off - entry->vme_start) + VME_OFFSET(entry); + while(obj->shadow) { + obj_off += obj->vo_shadow_offset; + obj = obj->shadow; + } + if((obj->pager_created) && (obj->pager == pager)) { + if(((obj->paging_offset) + obj_off) == file_off) { + if(off != base_offset) { + vm_map_unlock(map); + return KERN_FAILURE; + } + kr = KERN_ALREADY_WAITING; + } else { + vm_object_offset_t obj_off_aligned; + vm_object_offset_t file_off_aligned; + + obj_off_aligned = obj_off & ~PAGE_MASK; + file_off_aligned = file_off & ~PAGE_MASK; + + if (file_off_aligned == (obj->paging_offset + obj_off_aligned)) { + /* + * the target map and the file offset start in the same page + * but are not identical... + */ + vm_map_unlock(map); + return KERN_FAILURE; + } + if ((file_off < (obj->paging_offset + obj_off_aligned)) && + ((file_off + len) > (obj->paging_offset + obj_off_aligned))) { + /* + * some portion of the tail of the I/O will fall + * within the encompass of the target map + */ + vm_map_unlock(map); + return KERN_FAILURE; + } + if ((file_off_aligned > (obj->paging_offset + obj_off)) && + (file_off_aligned < (obj->paging_offset + obj_off) + len)) { + /* + * the beginning page of the file offset falls within + * the target map's encompass + */ + vm_map_unlock(map); + return KERN_FAILURE; + } + } + } else if(kr != KERN_SUCCESS) { + vm_map_unlock(map); + return KERN_FAILURE; + } + + if(len <= ((entry->vme_end - entry->vme_start) - + (off - entry->vme_start))) { + vm_map_unlock(map); + return kr; + } else { + len -= (entry->vme_end - entry->vme_start) - + (off - entry->vme_start); + } + base_offset = base_offset + (local_len - len); + file_off = file_off + (local_len - len); + off = base_offset; + if(map != base_map) { + vm_map_unlock(map); + vm_map_lock(base_map); + map = base_map; + } } - if (current_map() == map) - return copyout((char *)fromaddr, (char *)toaddr, length); + vm_map_unlock(map); + return kr; +} + +/* + * + * The following two functions are to be used when exposing kernel + * addresses to userspace via any of the various debug or info + * facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM() + * and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and + * are exported to KEXTs. + * + * NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL + */ + +/* + * vm_kernel_addrperm_external: + * + * Used when exposing an address to userspace which is in the kernel's + * "heap". These addresses are not loaded from anywhere and are resultingly + * unslid. We apply a permutation value to obscure the address. + */ +void +vm_kernel_addrperm_external( + vm_offset_t addr, + vm_offset_t *perm_addr) +{ + if (addr == 0) { + *perm_addr = 0; + return; + } + + *perm_addr = (addr + vm_kernel_addrperm_ext); + return; +} + +/* + * vm_kernel_unslide_or_perm_external: + * + * Use this macro when exposing an address to userspace that could come from + * either kernel text/data *or* the heap. + */ +void +vm_kernel_unslide_or_perm_external( + vm_offset_t addr, + vm_offset_t *up_addr) +{ + if (VM_KERNEL_IS_SLID(addr) || VM_KERNEL_IS_KEXT(addr) || + VM_KERNEL_IS_PRELINKTEXT(addr) || VM_KERNEL_IS_PRELINKINFO(addr) || + VM_KERNEL_IS_KEXT_LINKEDIT(addr)) { + *up_addr = addr - vm_kernel_slide; + return; + } - return TRUE; + vm_kernel_addrperm_external(addr, up_addr); + return; }