]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/vm/vm_kern.c
xnu-1504.15.3.tar.gz
[apple/xnu.git] / osfmk / vm / vm_kern.c
index b81d47b2264130293debcb5d333fadb45dc7c61f..aa0dbafe2f0ebb36e70eb7bb0576902cf37fa15a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <vm/cpm.h>
 
 #include <string.h>
+
+#include <libkern/OSDebug.h>
+#include <sys/kdebug.h>
+
 /*
  *     Variables exported by this module.
  */
@@ -84,6 +88,8 @@
 vm_map_t       kernel_map;
 vm_map_t       kernel_pageable_map;
 
+extern boolean_t vm_kernel_ready;
+
 /*
  * Forward declarations for internal functions.
  */
@@ -105,6 +111,8 @@ kmem_alloc_contig(
        vm_offset_t             *addrp,
        vm_size_t               size,
        vm_offset_t             mask,
+       ppnum_t                 max_pnum,
+       ppnum_t                 pnum_mask,
        int                     flags)
 {
        vm_object_t             object;
@@ -116,7 +124,7 @@ kmem_alloc_contig(
        vm_page_t               m, pages;
        kern_return_t           kr;
 
-       if (map == VM_MAP_NULL || (flags && (flags ^ KMA_KOBJECT))) 
+       if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) 
                return KERN_INVALID_ARGUMENT;
        
        if (size == 0) {
@@ -147,13 +155,13 @@ kmem_alloc_contig(
 
        entry->object.vm_object = object;
        entry->offset = offset = (object == kernel_object) ? 
-                       map_addr - VM_MIN_KERNEL_ADDRESS : 0;
+                       map_addr : 0;
 
        /* Take an extra object ref in case the map entry gets deleted */
        vm_object_reference(object);
        vm_map_unlock(map);
 
-       kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, FALSE);
+       kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags);
 
        if (kr != KERN_SUCCESS) {
                vm_map_remove(map, vm_map_trunc_page(map_addr),
@@ -191,7 +199,8 @@ kmem_alloc_contig(
        if (object == kernel_object)
                vm_map_simplify(map, map_addr);
 
-       *addrp = map_addr;
+       *addrp = (vm_offset_t) map_addr;
+       assert((vm_map_offset_t) *addrp == map_addr);
        return KERN_SUCCESS;
 }
 
@@ -222,26 +231,131 @@ kernel_memory_allocate(
 {
        vm_object_t             object;
        vm_object_offset_t      offset;
+       vm_object_offset_t      pg_offset;
        vm_map_entry_t          entry;
-       vm_map_offset_t         map_addr;
+       vm_map_offset_t         map_addr, fill_start;
        vm_map_offset_t         map_mask;
-       vm_map_size_t           map_size;
-       vm_map_size_t           i;
+       vm_map_size_t           map_size, fill_size;
        kern_return_t           kr;
+       vm_page_t               mem;
+       vm_page_t               guard_page_list = NULL;
+       vm_page_t               wired_page_list = NULL;
+       int                     guard_page_count = 0;
+       int                     wired_page_count = 0;
+       int                     i;
+       int                     vm_alloc_flags;
+
+       if (! vm_kernel_ready) {
+               panic("kernel_memory_allocate: VM is not ready");
+       }
 
        if (size == 0) {
                *addrp = 0;
                return KERN_INVALID_ARGUMENT;
        }
-       if (flags & KMA_LOMEM) {
-               if ( !(flags & KMA_NOPAGEWAIT) ) {
-                       *addrp = 0;
-                       return KERN_INVALID_ARGUMENT;
+       map_size = vm_map_round_page(size);
+       map_mask = (vm_map_offset_t) mask;
+       vm_alloc_flags = 0;
+
+
+       /*
+        * limit the size of a single extent of wired memory
+        * to try and limit the damage to the system if
+        * too many pages get wired down
+        */
+        if (map_size > (1 << 30)) {
+                return KERN_RESOURCE_SHORTAGE;
+        }
+
+       /*
+        * Guard pages:
+        *
+        * Guard pages are implemented as ficticious pages.  By placing guard pages
+        * on either end of a stack, they can help detect cases where a thread walks
+        * off either end of its stack.  They are allocated and set up here and attempts
+        * to access those pages are trapped in vm_fault_page().
+        *
+        * The map_size we were passed may include extra space for
+        * guard pages.  If those were requested, then back it out of fill_size
+        * since vm_map_find_space() takes just the actual size not including
+        * guard pages.  Similarly, fill_start indicates where the actual pages
+        * will begin in the range.
+        */
+
+       fill_start = 0;
+       fill_size = map_size;
+
+       if (flags & KMA_GUARD_FIRST) {
+               vm_alloc_flags |= VM_FLAGS_GUARD_BEFORE;
+               fill_start += PAGE_SIZE_64;
+               fill_size -= PAGE_SIZE_64;
+               if (map_size < fill_start + fill_size) {
+                       /* no space for a guard page */
+                       *addrp = 0;
+                       return KERN_INVALID_ARGUMENT;
+               }
+               guard_page_count++;
+       }
+       if (flags & KMA_GUARD_LAST) {
+               vm_alloc_flags |= VM_FLAGS_GUARD_AFTER;
+               fill_size -= PAGE_SIZE_64;
+               if (map_size <= fill_start + fill_size) {
+                       /* no space for a guard page */
+                       *addrp = 0;
+                       return KERN_INVALID_ARGUMENT;
                }
+               guard_page_count++;
        }
+       wired_page_count = (int) (fill_size / PAGE_SIZE_64);
+       assert(wired_page_count * PAGE_SIZE_64 == fill_size);
 
-       map_size = vm_map_round_page(size);
-       map_mask = (vm_map_offset_t) mask;
+       for (i = 0; i < guard_page_count; i++) {
+               for (;;) {
+                       mem = vm_page_grab_guard();
+
+                       if (mem != VM_PAGE_NULL)
+                               break;
+                       if (flags & KMA_NOPAGEWAIT) {
+                               kr = KERN_RESOURCE_SHORTAGE;
+                               goto out;
+                       }
+                       vm_page_more_fictitious();
+               }
+               mem->pageq.next = (queue_entry_t)guard_page_list;
+               guard_page_list = mem;
+       }
+
+       for (i = 0; i < wired_page_count; i++) {
+               uint64_t        unavailable;
+               
+               for (;;) {
+                       if (flags & KMA_LOMEM)
+                               mem = vm_page_grablo();
+                       else
+                               mem = vm_page_grab();
+
+                       if (mem != VM_PAGE_NULL)
+                               break;
+
+                       if (flags & KMA_NOPAGEWAIT) {
+                               kr = KERN_RESOURCE_SHORTAGE;
+                               goto out;
+                       }
+                       if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) {
+                               kr = KERN_RESOURCE_SHORTAGE;
+                               goto out;
+                       }
+                       unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE;
+
+                       if (unavailable > max_mem || map_size > (max_mem - unavailable)) {
+                               kr = KERN_RESOURCE_SHORTAGE;
+                               goto out;
+                       }
+                       VM_PAGE_WAIT();
+               }
+               mem->pageq.next = (queue_entry_t)wired_page_list;
+               wired_page_list = mem;
+       }
 
        /*
         *      Allocate a new object (if necessary).  We must do this before
@@ -254,68 +368,111 @@ kernel_memory_allocate(
                object = vm_object_allocate(map_size);
        }
 
-       kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0, &entry);
+       kr = vm_map_find_space(map, &map_addr,
+                              fill_size, map_mask,
+                              vm_alloc_flags, &entry);
        if (KERN_SUCCESS != kr) {
                vm_object_deallocate(object);
-               return kr;
+               goto out;
        }
+
        entry->object.vm_object = object;
        entry->offset = offset = (object == kernel_object) ? 
-                       map_addr - VM_MIN_KERNEL_ADDRESS : 0;
+                       map_addr : 0;
 
-       vm_object_reference(object);
-       vm_map_unlock(map);
+       entry->wired_count++;
+
+       if (flags & KMA_PERMANENT)
+               entry->permanent = TRUE;
+
+       if (object != kernel_object)
+               vm_object_reference(object);
 
        vm_object_lock(object);
-       for (i = 0; i < map_size; i += PAGE_SIZE) {
-               vm_page_t       mem;
+       vm_map_unlock(map);
 
-               for (;;) {
-                       if (flags & KMA_LOMEM)
-                               mem = vm_page_alloclo(object, offset + i);
-                       else
-                               mem = vm_page_alloc(object, offset + i);
+       pg_offset = 0;
 
-                       if (mem != VM_PAGE_NULL)
-                               break;
+       if (fill_start) {
+               if (guard_page_list == NULL)
+                       panic("kernel_memory_allocate: guard_page_list == NULL");
+
+               mem = guard_page_list;
+               guard_page_list = (vm_page_t)mem->pageq.next;
+               mem->pageq.next = NULL;
+
+               vm_page_insert(mem, object, offset + pg_offset);
 
-                       if (flags & KMA_NOPAGEWAIT) {
-                               if (object == kernel_object)
-                                       vm_object_page_remove(object, offset, offset + i);
-                               vm_object_unlock(object);
-                               vm_map_remove(map, map_addr, map_addr + map_size, 0);
-                               vm_object_deallocate(object);
-                               return KERN_RESOURCE_SHORTAGE;
-                       }
-                       vm_object_unlock(object);
-                       VM_PAGE_WAIT();
-                       vm_object_lock(object);
-               }
                mem->busy = FALSE;
+               pg_offset += PAGE_SIZE_64;
        }
-       vm_object_unlock(object);
+       for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) {
+               if (wired_page_list == NULL)
+                       panic("kernel_memory_allocate: wired_page_list == NULL");
 
-       if ((kr = vm_map_wire(map, map_addr, map_addr + map_size, VM_PROT_DEFAULT, FALSE)) 
-               != KERN_SUCCESS) {
-               if (object == kernel_object) {
-                       vm_object_lock(object);
-                       vm_object_page_remove(object, offset, offset + map_size);
-                       vm_object_unlock(object);
+               mem = wired_page_list;
+               wired_page_list = (vm_page_t)mem->pageq.next;
+               mem->pageq.next = NULL;
+               mem->wire_count++;
+
+               vm_page_insert(mem, object, offset + pg_offset);
+
+               mem->busy = FALSE;
+               mem->pmapped = TRUE;
+               mem->wpmapped = TRUE;
+
+               PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem, 
+                          VM_PROT_READ | VM_PROT_WRITE, object->wimg_bits & VM_WIMG_MASK, TRUE);
+
+               if (flags & KMA_NOENCRYPT) {
+                       bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE);
+
+                       pmap_set_noencrypt(mem->phys_page);
                }
-               vm_map_remove(map, map_addr, map_addr + map_size, 0);
-               vm_object_deallocate(object);
-               return (kr);
        }
-       /* now that the page is wired, we no longer have to fear coalesce */
-       vm_object_deallocate(object);
+       if ((fill_start + fill_size) < map_size) {
+               if (guard_page_list == NULL)
+                       panic("kernel_memory_allocate: guard_page_list == NULL");
+
+               mem = guard_page_list;
+               guard_page_list = (vm_page_t)mem->pageq.next;
+               mem->pageq.next = NULL;
+
+               vm_page_insert(mem, object, offset + pg_offset);
+
+               mem->busy = FALSE;
+       }
+       if (guard_page_list || wired_page_list)
+               panic("kernel_memory_allocate: non empty list\n");
+
+       vm_page_lockspin_queues();
+       vm_page_wire_count += wired_page_count;
+       vm_page_unlock_queues();
+
+       vm_object_unlock(object);
+
+       /*
+        * now that the pages are wired, we no longer have to fear coalesce
+        */
        if (object == kernel_object)
                vm_map_simplify(map, map_addr);
+       else
+               vm_object_deallocate(object);
 
        /*
         *      Return the memory, not zeroed.
         */
        *addrp = CAST_DOWN(vm_offset_t, map_addr);
        return KERN_SUCCESS;
+
+out:
+       if (guard_page_list)
+               vm_page_free_list(guard_page_list, FALSE);
+
+       if (wired_page_list)
+               vm_page_free_list(wired_page_list, FALSE);
+
+       return kr;
 }
 
 /*
@@ -331,7 +488,9 @@ kmem_alloc(
        vm_offset_t     *addrp,
        vm_size_t       size)
 {
-       return kernel_memory_allocate(map, addrp, size, 0, 0);
+       kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, 0);
+       TRACE_MACHLEAKS(KMEM_ALLOC_CODE, KMEM_ALLOC_CODE_2, size, *addrp);
+       return kr;
 }
 
 /*
@@ -412,9 +571,7 @@ kmem_realloc(
                for(offset = oldmapsize; 
                    offset < newmapsize; offset += PAGE_SIZE) {
                        if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
-                               vm_page_lock_queues();
-                               vm_page_free(mem);
-                               vm_page_unlock_queues();
+                               VM_PAGE_FREE(mem);
                        }
                }
                object->size = oldmapsize;
@@ -438,9 +595,7 @@ kmem_realloc(
                vm_object_lock(object);
                for(offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) {
                        if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
-                               vm_page_lock_queues();
-                               vm_page_free(mem);
-                               vm_page_unlock_queues();
+                               VM_PAGE_FREE(mem);
                        }
                }
                object->size = oldmapsize;
@@ -455,7 +610,7 @@ kmem_realloc(
 }
 
 /*
- *     kmem_alloc_wired:
+ *     kmem_alloc_kobject:
  *
  *     Allocate wired-down memory in the kernel's address map
  *     or a submap.  The memory is not zero-filled.
@@ -466,7 +621,7 @@ kmem_realloc(
  */
 
 kern_return_t
-kmem_alloc_wired(
+kmem_alloc_kobject(
        vm_map_t        map,
        vm_offset_t     *addrp,
        vm_size_t       size)
@@ -477,7 +632,7 @@ kmem_alloc_wired(
 /*
  *     kmem_alloc_aligned:
  *
- *     Like kmem_alloc_wired, except that the memory is aligned.
+ *     Like kmem_alloc_kobject, except that the memory is aligned.
  *     The size should be a power-of-2.
  */
 
@@ -531,7 +686,7 @@ kmem_alloc_pageable(
  *     kmem_free:
  *
  *     Release a region of kernel virtual memory allocated
- *     with kmem_alloc, kmem_alloc_wired, or kmem_alloc_pageable,
+ *     with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable,
  *     and return the physical pages associated with that region.
  */
 
@@ -543,6 +698,17 @@ kmem_free(
 {
        kern_return_t kr;
 
+       assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS);
+
+       TRACE_MACHLEAKS(KMEM_FREE_CODE, KMEM_FREE_CODE_2, size, addr);
+
+       if(size == 0) {
+#if MACH_ASSERT
+               printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n",map,(uint64_t)addr);
+#endif
+               return;
+       }
+
        kr = vm_map_remove(map, vm_map_trunc_page(addr),
                                vm_map_round_page(addr + size), 
                                VM_MAP_REMOVE_KUNWIRE);
@@ -625,7 +791,7 @@ kmem_remap_pages(
            /*
             *  Wire it down (again)
             */
-           vm_page_lock_queues();
+           vm_page_lockspin_queues();
            vm_page_wire(mem);
            vm_page_unlock_queues();
            vm_object_unlock(object);
@@ -642,6 +808,10 @@ kmem_remap_pages(
             *  Enter it in the kernel pmap.  The page isn't busy,
             *  but this shouldn't be a problem because it is wired.
             */
+
+           mem->pmapped = TRUE;
+           mem->wpmapped = TRUE;
+
            PMAP_ENTER(kernel_pmap, map_start, mem, protection, 
                        ((unsigned int)(mem->object->wimg_bits))
                                        & VM_WIMG_MASK,
@@ -722,6 +892,7 @@ kmem_suballoc(
        return (KERN_SUCCESS);
 }
 
+
 /*
  *     kmem_init:
  *
@@ -744,7 +915,6 @@ kmem_init(
        /*
         *      Reserve virtual memory allocated up to this time.
         */
-
        if (start != VM_MIN_KERNEL_ADDRESS) {
                vm_map_offset_t map_addr;
  
@@ -759,15 +929,18 @@ kmem_init(
                            VM_PROT_NONE, VM_PROT_NONE,
                            VM_INHERIT_DEFAULT);
        }
-
-        /*
-         * Account for kernel memory (text, data, bss, vm shenanigans).
-         * This may include inaccessible "holes" as determined by what
-         * the machine-dependent init code includes in max_mem.
-         */
-        vm_page_wire_count = (atop_64(max_mem) - (vm_page_free_count
-                                                + vm_page_active_count
-                                                + vm_page_inactive_count));
+       /*
+        * Set the default global user wire limit which limits the amount of
+        * memory that can be locked via mlock().  We set this to the total
+        * amount of memory that are potentially usable by a user app (max_mem)
+        * minus a certain amount.  This can be overridden via a sysctl.
+        */
+       vm_global_no_user_wire_amount = MIN(max_mem*20/100,
+                                           VM_NOT_USER_WIREABLE);
+       vm_global_user_wire_limit = max_mem - vm_global_no_user_wire_amount;
+       
+       /* the default per user limit is the same as the global limit */
+       vm_user_wire_limit = vm_global_user_wire_limit;
 }