X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/4452a7af2eac33dbad800bcc91f2399d62c18f53..060df5ea7c632b1ac8cc8aac1fb59758165c2084:/osfmk/vm/vm_kern.c diff --git a/osfmk/vm/vm_kern.c b/osfmk/vm/vm_kern.c index b81d47b22..aa0dbafe2 100644 --- a/osfmk/vm/vm_kern.c +++ b/osfmk/vm/vm_kern.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2007 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -77,6 +77,10 @@ #include #include + +#include +#include + /* * Variables exported by this module. */ @@ -84,6 +88,8 @@ vm_map_t kernel_map; vm_map_t kernel_pageable_map; +extern boolean_t vm_kernel_ready; + /* * Forward declarations for internal functions. */ @@ -105,6 +111,8 @@ kmem_alloc_contig( vm_offset_t *addrp, vm_size_t size, vm_offset_t mask, + ppnum_t max_pnum, + ppnum_t pnum_mask, int flags) { vm_object_t object; @@ -116,7 +124,7 @@ kmem_alloc_contig( vm_page_t m, pages; kern_return_t kr; - if (map == VM_MAP_NULL || (flags && (flags ^ KMA_KOBJECT))) + if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) return KERN_INVALID_ARGUMENT; if (size == 0) { @@ -147,13 +155,13 @@ kmem_alloc_contig( entry->object.vm_object = object; entry->offset = offset = (object == kernel_object) ? - map_addr - VM_MIN_KERNEL_ADDRESS : 0; + map_addr : 0; /* Take an extra object ref in case the map entry gets deleted */ vm_object_reference(object); vm_map_unlock(map); - kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, FALSE); + kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags); if (kr != KERN_SUCCESS) { vm_map_remove(map, vm_map_trunc_page(map_addr), @@ -191,7 +199,8 @@ kmem_alloc_contig( if (object == kernel_object) vm_map_simplify(map, map_addr); - *addrp = map_addr; + *addrp = (vm_offset_t) map_addr; + assert((vm_map_offset_t) *addrp == map_addr); return KERN_SUCCESS; } @@ -222,26 +231,131 @@ kernel_memory_allocate( { vm_object_t object; vm_object_offset_t offset; + vm_object_offset_t pg_offset; vm_map_entry_t entry; - vm_map_offset_t map_addr; + vm_map_offset_t map_addr, fill_start; vm_map_offset_t map_mask; - vm_map_size_t map_size; - vm_map_size_t i; + vm_map_size_t map_size, fill_size; kern_return_t kr; + vm_page_t mem; + vm_page_t guard_page_list = NULL; + vm_page_t wired_page_list = NULL; + int guard_page_count = 0; + int wired_page_count = 0; + int i; + int vm_alloc_flags; + + if (! vm_kernel_ready) { + panic("kernel_memory_allocate: VM is not ready"); + } if (size == 0) { *addrp = 0; return KERN_INVALID_ARGUMENT; } - if (flags & KMA_LOMEM) { - if ( !(flags & KMA_NOPAGEWAIT) ) { - *addrp = 0; - return KERN_INVALID_ARGUMENT; + map_size = vm_map_round_page(size); + map_mask = (vm_map_offset_t) mask; + vm_alloc_flags = 0; + + + /* + * limit the size of a single extent of wired memory + * to try and limit the damage to the system if + * too many pages get wired down + */ + if (map_size > (1 << 30)) { + return KERN_RESOURCE_SHORTAGE; + } + + /* + * Guard pages: + * + * Guard pages are implemented as ficticious pages. By placing guard pages + * on either end of a stack, they can help detect cases where a thread walks + * off either end of its stack. They are allocated and set up here and attempts + * to access those pages are trapped in vm_fault_page(). + * + * The map_size we were passed may include extra space for + * guard pages. If those were requested, then back it out of fill_size + * since vm_map_find_space() takes just the actual size not including + * guard pages. Similarly, fill_start indicates where the actual pages + * will begin in the range. + */ + + fill_start = 0; + fill_size = map_size; + + if (flags & KMA_GUARD_FIRST) { + vm_alloc_flags |= VM_FLAGS_GUARD_BEFORE; + fill_start += PAGE_SIZE_64; + fill_size -= PAGE_SIZE_64; + if (map_size < fill_start + fill_size) { + /* no space for a guard page */ + *addrp = 0; + return KERN_INVALID_ARGUMENT; + } + guard_page_count++; + } + if (flags & KMA_GUARD_LAST) { + vm_alloc_flags |= VM_FLAGS_GUARD_AFTER; + fill_size -= PAGE_SIZE_64; + if (map_size <= fill_start + fill_size) { + /* no space for a guard page */ + *addrp = 0; + return KERN_INVALID_ARGUMENT; } + guard_page_count++; } + wired_page_count = (int) (fill_size / PAGE_SIZE_64); + assert(wired_page_count * PAGE_SIZE_64 == fill_size); - map_size = vm_map_round_page(size); - map_mask = (vm_map_offset_t) mask; + for (i = 0; i < guard_page_count; i++) { + for (;;) { + mem = vm_page_grab_guard(); + + if (mem != VM_PAGE_NULL) + break; + if (flags & KMA_NOPAGEWAIT) { + kr = KERN_RESOURCE_SHORTAGE; + goto out; + } + vm_page_more_fictitious(); + } + mem->pageq.next = (queue_entry_t)guard_page_list; + guard_page_list = mem; + } + + for (i = 0; i < wired_page_count; i++) { + uint64_t unavailable; + + for (;;) { + if (flags & KMA_LOMEM) + mem = vm_page_grablo(); + else + mem = vm_page_grab(); + + if (mem != VM_PAGE_NULL) + break; + + if (flags & KMA_NOPAGEWAIT) { + kr = KERN_RESOURCE_SHORTAGE; + goto out; + } + if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) { + kr = KERN_RESOURCE_SHORTAGE; + goto out; + } + unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE; + + if (unavailable > max_mem || map_size > (max_mem - unavailable)) { + kr = KERN_RESOURCE_SHORTAGE; + goto out; + } + VM_PAGE_WAIT(); + } + mem->pageq.next = (queue_entry_t)wired_page_list; + wired_page_list = mem; + } /* * Allocate a new object (if necessary). We must do this before @@ -254,68 +368,111 @@ kernel_memory_allocate( object = vm_object_allocate(map_size); } - kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0, &entry); + kr = vm_map_find_space(map, &map_addr, + fill_size, map_mask, + vm_alloc_flags, &entry); if (KERN_SUCCESS != kr) { vm_object_deallocate(object); - return kr; + goto out; } + entry->object.vm_object = object; entry->offset = offset = (object == kernel_object) ? - map_addr - VM_MIN_KERNEL_ADDRESS : 0; + map_addr : 0; - vm_object_reference(object); - vm_map_unlock(map); + entry->wired_count++; + + if (flags & KMA_PERMANENT) + entry->permanent = TRUE; + + if (object != kernel_object) + vm_object_reference(object); vm_object_lock(object); - for (i = 0; i < map_size; i += PAGE_SIZE) { - vm_page_t mem; + vm_map_unlock(map); - for (;;) { - if (flags & KMA_LOMEM) - mem = vm_page_alloclo(object, offset + i); - else - mem = vm_page_alloc(object, offset + i); + pg_offset = 0; - if (mem != VM_PAGE_NULL) - break; + if (fill_start) { + if (guard_page_list == NULL) + panic("kernel_memory_allocate: guard_page_list == NULL"); + + mem = guard_page_list; + guard_page_list = (vm_page_t)mem->pageq.next; + mem->pageq.next = NULL; + + vm_page_insert(mem, object, offset + pg_offset); - if (flags & KMA_NOPAGEWAIT) { - if (object == kernel_object) - vm_object_page_remove(object, offset, offset + i); - vm_object_unlock(object); - vm_map_remove(map, map_addr, map_addr + map_size, 0); - vm_object_deallocate(object); - return KERN_RESOURCE_SHORTAGE; - } - vm_object_unlock(object); - VM_PAGE_WAIT(); - vm_object_lock(object); - } mem->busy = FALSE; + pg_offset += PAGE_SIZE_64; } - vm_object_unlock(object); + for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) { + if (wired_page_list == NULL) + panic("kernel_memory_allocate: wired_page_list == NULL"); - if ((kr = vm_map_wire(map, map_addr, map_addr + map_size, VM_PROT_DEFAULT, FALSE)) - != KERN_SUCCESS) { - if (object == kernel_object) { - vm_object_lock(object); - vm_object_page_remove(object, offset, offset + map_size); - vm_object_unlock(object); + mem = wired_page_list; + wired_page_list = (vm_page_t)mem->pageq.next; + mem->pageq.next = NULL; + mem->wire_count++; + + vm_page_insert(mem, object, offset + pg_offset); + + mem->busy = FALSE; + mem->pmapped = TRUE; + mem->wpmapped = TRUE; + + PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem, + VM_PROT_READ | VM_PROT_WRITE, object->wimg_bits & VM_WIMG_MASK, TRUE); + + if (flags & KMA_NOENCRYPT) { + bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE); + + pmap_set_noencrypt(mem->phys_page); } - vm_map_remove(map, map_addr, map_addr + map_size, 0); - vm_object_deallocate(object); - return (kr); } - /* now that the page is wired, we no longer have to fear coalesce */ - vm_object_deallocate(object); + if ((fill_start + fill_size) < map_size) { + if (guard_page_list == NULL) + panic("kernel_memory_allocate: guard_page_list == NULL"); + + mem = guard_page_list; + guard_page_list = (vm_page_t)mem->pageq.next; + mem->pageq.next = NULL; + + vm_page_insert(mem, object, offset + pg_offset); + + mem->busy = FALSE; + } + if (guard_page_list || wired_page_list) + panic("kernel_memory_allocate: non empty list\n"); + + vm_page_lockspin_queues(); + vm_page_wire_count += wired_page_count; + vm_page_unlock_queues(); + + vm_object_unlock(object); + + /* + * now that the pages are wired, we no longer have to fear coalesce + */ if (object == kernel_object) vm_map_simplify(map, map_addr); + else + vm_object_deallocate(object); /* * Return the memory, not zeroed. */ *addrp = CAST_DOWN(vm_offset_t, map_addr); return KERN_SUCCESS; + +out: + if (guard_page_list) + vm_page_free_list(guard_page_list, FALSE); + + if (wired_page_list) + vm_page_free_list(wired_page_list, FALSE); + + return kr; } /* @@ -331,7 +488,9 @@ kmem_alloc( vm_offset_t *addrp, vm_size_t size) { - return kernel_memory_allocate(map, addrp, size, 0, 0); + kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, 0); + TRACE_MACHLEAKS(KMEM_ALLOC_CODE, KMEM_ALLOC_CODE_2, size, *addrp); + return kr; } /* @@ -412,9 +571,7 @@ kmem_realloc( for(offset = oldmapsize; offset < newmapsize; offset += PAGE_SIZE) { if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { - vm_page_lock_queues(); - vm_page_free(mem); - vm_page_unlock_queues(); + VM_PAGE_FREE(mem); } } object->size = oldmapsize; @@ -438,9 +595,7 @@ kmem_realloc( vm_object_lock(object); for(offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) { if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { - vm_page_lock_queues(); - vm_page_free(mem); - vm_page_unlock_queues(); + VM_PAGE_FREE(mem); } } object->size = oldmapsize; @@ -455,7 +610,7 @@ kmem_realloc( } /* - * kmem_alloc_wired: + * kmem_alloc_kobject: * * Allocate wired-down memory in the kernel's address map * or a submap. The memory is not zero-filled. @@ -466,7 +621,7 @@ kmem_realloc( */ kern_return_t -kmem_alloc_wired( +kmem_alloc_kobject( vm_map_t map, vm_offset_t *addrp, vm_size_t size) @@ -477,7 +632,7 @@ kmem_alloc_wired( /* * kmem_alloc_aligned: * - * Like kmem_alloc_wired, except that the memory is aligned. + * Like kmem_alloc_kobject, except that the memory is aligned. * The size should be a power-of-2. */ @@ -531,7 +686,7 @@ kmem_alloc_pageable( * kmem_free: * * Release a region of kernel virtual memory allocated - * with kmem_alloc, kmem_alloc_wired, or kmem_alloc_pageable, + * with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable, * and return the physical pages associated with that region. */ @@ -543,6 +698,17 @@ kmem_free( { kern_return_t kr; + assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS); + + TRACE_MACHLEAKS(KMEM_FREE_CODE, KMEM_FREE_CODE_2, size, addr); + + if(size == 0) { +#if MACH_ASSERT + printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n",map,(uint64_t)addr); +#endif + return; + } + kr = vm_map_remove(map, vm_map_trunc_page(addr), vm_map_round_page(addr + size), VM_MAP_REMOVE_KUNWIRE); @@ -625,7 +791,7 @@ kmem_remap_pages( /* * Wire it down (again) */ - vm_page_lock_queues(); + vm_page_lockspin_queues(); vm_page_wire(mem); vm_page_unlock_queues(); vm_object_unlock(object); @@ -642,6 +808,10 @@ kmem_remap_pages( * Enter it in the kernel pmap. The page isn't busy, * but this shouldn't be a problem because it is wired. */ + + mem->pmapped = TRUE; + mem->wpmapped = TRUE; + PMAP_ENTER(kernel_pmap, map_start, mem, protection, ((unsigned int)(mem->object->wimg_bits)) & VM_WIMG_MASK, @@ -722,6 +892,7 @@ kmem_suballoc( return (KERN_SUCCESS); } + /* * kmem_init: * @@ -744,7 +915,6 @@ kmem_init( /* * Reserve virtual memory allocated up to this time. */ - if (start != VM_MIN_KERNEL_ADDRESS) { vm_map_offset_t map_addr; @@ -759,15 +929,18 @@ kmem_init( VM_PROT_NONE, VM_PROT_NONE, VM_INHERIT_DEFAULT); } - - /* - * Account for kernel memory (text, data, bss, vm shenanigans). - * This may include inaccessible "holes" as determined by what - * the machine-dependent init code includes in max_mem. - */ - vm_page_wire_count = (atop_64(max_mem) - (vm_page_free_count - + vm_page_active_count - + vm_page_inactive_count)); + /* + * Set the default global user wire limit which limits the amount of + * memory that can be locked via mlock(). We set this to the total + * amount of memory that are potentially usable by a user app (max_mem) + * minus a certain amount. This can be overridden via a sysctl. + */ + vm_global_no_user_wire_amount = MIN(max_mem*20/100, + VM_NOT_USER_WIREABLE); + vm_global_user_wire_limit = max_mem - vm_global_no_user_wire_amount; + + /* the default per user limit is the same as the global limit */ + vm_user_wire_limit = vm_global_user_wire_limit; }