2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
63 * Kernel memory management.
66 #include <mach/kern_return.h>
67 #include <mach/vm_param.h>
68 #include <kern/assert.h>
69 #include <kern/thread.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_map.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_compressor.h>
75 #include <vm/vm_pageout.h>
76 #include <kern/misc_protos.h>
78 #include <kern/ledger.h>
82 #include <libkern/OSDebug.h>
83 #include <libkern/crypto/sha2.h>
84 #include <libkern/section_keywords.h>
85 #include <sys/kdebug.h>
87 #include <san/kasan.h>
90 * Variables exported by this module.
93 SECURITY_READ_ONLY_LATE(vm_map_t
) kernel_map
;
94 vm_map_t kernel_pageable_map
;
96 extern boolean_t vm_kernel_ready
;
99 * Forward declarations for internal functions.
101 extern kern_return_t
kmem_alloc_pages(
103 vm_object_offset_t offset
,
104 vm_object_size_t size
);
118 vm_object_offset_t offset
;
119 vm_map_offset_t map_addr
;
120 vm_map_offset_t map_mask
;
121 vm_map_size_t map_size
, i
;
122 vm_map_entry_t entry
;
126 assert(VM_KERN_MEMORY_NONE
!= tag
);
128 if (map
== VM_MAP_NULL
|| (flags
& ~(KMA_KOBJECT
| KMA_LOMEM
| KMA_NOPAGEWAIT
))) {
129 return KERN_INVALID_ARGUMENT
;
132 map_size
= vm_map_round_page(size
,
133 VM_MAP_PAGE_MASK(map
));
134 map_mask
= (vm_map_offset_t
)mask
;
136 /* Check for zero allocation size (either directly or via overflow) */
139 return KERN_INVALID_ARGUMENT
;
143 * Allocate a new object (if necessary) and the reference we
144 * will be donating to the map entry. We must do this before
145 * locking the map, or risk deadlock with the default pager.
147 if ((flags
& KMA_KOBJECT
) != 0) {
148 object
= kernel_object
;
149 vm_object_reference(object
);
151 object
= vm_object_allocate(map_size
);
154 kr
= vm_map_find_space(map
, &map_addr
, map_size
, map_mask
, 0,
155 VM_MAP_KERNEL_FLAGS_NONE
, tag
, &entry
);
156 if (KERN_SUCCESS
!= kr
) {
157 vm_object_deallocate(object
);
161 if (object
== kernel_object
) {
166 VME_OBJECT_SET(entry
, object
);
167 VME_OFFSET_SET(entry
, offset
);
169 /* Take an extra object ref in case the map entry gets deleted */
170 vm_object_reference(object
);
173 kr
= cpm_allocate(CAST_DOWN(vm_size_t
, map_size
), &pages
, max_pnum
, pnum_mask
, FALSE
, flags
);
175 if (kr
!= KERN_SUCCESS
) {
177 vm_map_trunc_page(map_addr
,
178 VM_MAP_PAGE_MASK(map
)),
179 vm_map_round_page(map_addr
+ map_size
,
180 VM_MAP_PAGE_MASK(map
)),
181 VM_MAP_REMOVE_NO_FLAGS
);
182 vm_object_deallocate(object
);
187 vm_object_lock(object
);
188 for (i
= 0; i
< map_size
; i
+= PAGE_SIZE
) {
190 pages
= NEXT_PAGE(m
);
191 *(NEXT_PAGE_PTR(m
)) = VM_PAGE_NULL
;
193 vm_page_insert(m
, object
, offset
+ i
);
195 vm_object_unlock(object
);
197 kr
= vm_map_wire_kernel(map
,
198 vm_map_trunc_page(map_addr
,
199 VM_MAP_PAGE_MASK(map
)),
200 vm_map_round_page(map_addr
+ map_size
,
201 VM_MAP_PAGE_MASK(map
)),
202 VM_PROT_DEFAULT
, tag
,
205 if (kr
!= KERN_SUCCESS
) {
206 if (object
== kernel_object
) {
207 vm_object_lock(object
);
208 vm_object_page_remove(object
, offset
, offset
+ map_size
);
209 vm_object_unlock(object
);
212 vm_map_trunc_page(map_addr
,
213 VM_MAP_PAGE_MASK(map
)),
214 vm_map_round_page(map_addr
+ map_size
,
215 VM_MAP_PAGE_MASK(map
)),
216 VM_MAP_REMOVE_NO_FLAGS
);
217 vm_object_deallocate(object
);
220 vm_object_deallocate(object
);
222 if (object
== kernel_object
) {
223 vm_map_simplify(map
, map_addr
);
224 vm_tag_update_size(tag
, map_size
);
226 *addrp
= (vm_offset_t
) map_addr
;
227 assert((vm_map_offset_t
) *addrp
== map_addr
);
233 * Master entry point for allocating kernel memory.
234 * NOTE: this routine is _never_ interrupt safe.
236 * map : map to allocate into
237 * addrp : pointer to start address of new memory
238 * size : size of memory requested
240 * KMA_HERE *addrp is base address, else "anywhere"
241 * KMA_NOPAGEWAIT don't wait for pages if unavailable
242 * KMA_KOBJECT use kernel_object
243 * KMA_LOMEM support for 32 bit devices in a 64 bit world
244 * if set and a lomemory pool is available
245 * grab pages from it... this also implies
250 kernel_memory_allocate(
259 vm_object_offset_t offset
;
260 vm_object_offset_t pg_offset
;
261 vm_map_entry_t entry
= NULL
;
262 vm_map_offset_t map_addr
, fill_start
;
263 vm_map_offset_t map_mask
;
264 vm_map_size_t map_size
, fill_size
;
265 kern_return_t kr
, pe_result
;
267 vm_page_t guard_page_list
= NULL
;
268 vm_page_t wired_page_list
= NULL
;
269 int guard_page_count
= 0;
270 int wired_page_count
= 0;
271 int page_grab_count
= 0;
274 vm_map_kernel_flags_t vmk_flags
;
276 #if DEVELOPMENT || DEBUG
277 task_t task
= current_task();
278 #endif /* DEVELOPMENT || DEBUG */
280 if (!vm_kernel_ready
) {
281 panic("kernel_memory_allocate: VM is not ready");
284 map_size
= vm_map_round_page(size
,
285 VM_MAP_PAGE_MASK(map
));
286 map_mask
= (vm_map_offset_t
) mask
;
288 vm_alloc_flags
= 0; //VM_MAKE_TAG(tag);
289 vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
291 /* Check for zero allocation size (either directly or via overflow) */
294 return KERN_INVALID_ARGUMENT
;
298 * limit the size of a single extent of wired memory
299 * to try and limit the damage to the system if
300 * too many pages get wired down
301 * limit raised to 2GB with 128GB max physical limit,
302 * but scaled by installed memory above this
304 if (!(flags
& (KMA_VAONLY
| KMA_PAGEABLE
)) &&
305 map_size
> MAX(1ULL << 31, sane_size
/ 64)) {
306 return KERN_RESOURCE_SHORTAGE
;
312 * Guard pages are implemented as ficticious pages. By placing guard pages
313 * on either end of a stack, they can help detect cases where a thread walks
314 * off either end of its stack. They are allocated and set up here and attempts
315 * to access those pages are trapped in vm_fault_page().
317 * The map_size we were passed may include extra space for
318 * guard pages. If those were requested, then back it out of fill_size
319 * since vm_map_find_space() takes just the actual size not including
320 * guard pages. Similarly, fill_start indicates where the actual pages
321 * will begin in the range.
325 fill_size
= map_size
;
327 if (flags
& KMA_GUARD_FIRST
) {
328 vmk_flags
.vmkf_guard_before
= TRUE
;
329 fill_start
+= PAGE_SIZE_64
;
330 fill_size
-= PAGE_SIZE_64
;
331 if (map_size
< fill_start
+ fill_size
) {
332 /* no space for a guard page */
334 return KERN_INVALID_ARGUMENT
;
338 if (flags
& KMA_GUARD_LAST
) {
339 vmk_flags
.vmkf_guard_after
= TRUE
;
340 fill_size
-= PAGE_SIZE_64
;
341 if (map_size
<= fill_start
+ fill_size
) {
342 /* no space for a guard page */
344 return KERN_INVALID_ARGUMENT
;
348 wired_page_count
= (int) (fill_size
/ PAGE_SIZE_64
);
349 assert(wired_page_count
* PAGE_SIZE_64
== fill_size
);
351 #if DEBUG || DEVELOPMENT
352 VM_DEBUG_CONSTANT_EVENT(vm_kern_request
, VM_KERN_REQUEST
, DBG_FUNC_START
, size
, 0, 0, 0);
355 for (i
= 0; i
< guard_page_count
; i
++) {
357 mem
= vm_page_grab_guard();
359 if (mem
!= VM_PAGE_NULL
) {
362 if (flags
& KMA_NOPAGEWAIT
) {
363 kr
= KERN_RESOURCE_SHORTAGE
;
366 vm_page_more_fictitious();
368 mem
->vmp_snext
= guard_page_list
;
369 guard_page_list
= mem
;
372 if (!(flags
& (KMA_VAONLY
| KMA_PAGEABLE
))) {
373 for (i
= 0; i
< wired_page_count
; i
++) {
375 if (flags
& KMA_LOMEM
) {
376 mem
= vm_page_grablo();
378 mem
= vm_page_grab();
381 if (mem
!= VM_PAGE_NULL
) {
385 if (flags
& KMA_NOPAGEWAIT
) {
386 kr
= KERN_RESOURCE_SHORTAGE
;
389 if ((flags
& KMA_LOMEM
) && (vm_lopage_needed
== TRUE
)) {
390 kr
= KERN_RESOURCE_SHORTAGE
;
394 /* VM privileged threads should have waited in vm_page_grab() and not get here. */
395 assert(!(current_thread()->options
& TH_OPT_VMPRIV
));
397 uint64_t unavailable
= (vm_page_wire_count
+ vm_page_free_target
) * PAGE_SIZE
;
398 if (unavailable
> max_mem
|| map_size
> (max_mem
- unavailable
)) {
399 kr
= KERN_RESOURCE_SHORTAGE
;
405 if (KMA_ZERO
& flags
) {
406 vm_page_zero_fill(mem
);
408 mem
->vmp_snext
= wired_page_list
;
409 wired_page_list
= mem
;
414 * Allocate a new object (if necessary). We must do this before
415 * locking the map, or risk deadlock with the default pager.
417 if ((flags
& KMA_KOBJECT
) != 0) {
418 object
= kernel_object
;
419 vm_object_reference(object
);
420 } else if ((flags
& KMA_COMPRESSOR
) != 0) {
421 object
= compressor_object
;
422 vm_object_reference(object
);
424 object
= vm_object_allocate(map_size
);
427 if (flags
& KMA_ATOMIC
) {
428 vmk_flags
.vmkf_atomic_entry
= TRUE
;
431 kr
= vm_map_find_space(map
, &map_addr
,
433 vm_alloc_flags
, vmk_flags
, tag
, &entry
);
434 if (KERN_SUCCESS
!= kr
) {
435 vm_object_deallocate(object
);
439 if (object
== kernel_object
|| object
== compressor_object
) {
444 VME_OBJECT_SET(entry
, object
);
445 VME_OFFSET_SET(entry
, offset
);
447 if (!(flags
& (KMA_COMPRESSOR
| KMA_PAGEABLE
))) {
448 entry
->wired_count
++;
451 if (flags
& KMA_PERMANENT
) {
452 entry
->permanent
= TRUE
;
455 if (object
!= kernel_object
&& object
!= compressor_object
) {
456 vm_object_reference(object
);
459 vm_object_lock(object
);
465 if (guard_page_list
== NULL
) {
466 panic("kernel_memory_allocate: guard_page_list == NULL");
469 mem
= guard_page_list
;
470 guard_page_list
= mem
->vmp_snext
;
471 mem
->vmp_snext
= NULL
;
473 vm_page_insert(mem
, object
, offset
+ pg_offset
);
475 mem
->vmp_busy
= FALSE
;
476 pg_offset
+= PAGE_SIZE_64
;
479 kma_prot
= VM_PROT_READ
| VM_PROT_WRITE
;
482 if (!(flags
& KMA_VAONLY
)) {
483 /* for VAONLY mappings we notify in populate only */
484 kasan_notify_address(map_addr
, size
);
488 if (flags
& (KMA_VAONLY
| KMA_PAGEABLE
)) {
489 pg_offset
= fill_start
+ fill_size
;
491 for (pg_offset
= fill_start
; pg_offset
< fill_start
+ fill_size
; pg_offset
+= PAGE_SIZE_64
) {
492 if (wired_page_list
== NULL
) {
493 panic("kernel_memory_allocate: wired_page_list == NULL");
496 mem
= wired_page_list
;
497 wired_page_list
= mem
->vmp_snext
;
498 mem
->vmp_snext
= NULL
;
500 assert(mem
->vmp_wire_count
== 0);
501 assert(mem
->vmp_q_state
== VM_PAGE_NOT_ON_Q
);
503 mem
->vmp_q_state
= VM_PAGE_IS_WIRED
;
504 mem
->vmp_wire_count
++;
505 if (__improbable(mem
->vmp_wire_count
== 0)) {
506 panic("kernel_memory_allocate(%p): wire_count overflow",
510 vm_page_insert_wired(mem
, object
, offset
+ pg_offset
, tag
);
512 mem
->vmp_busy
= FALSE
;
513 mem
->vmp_pmapped
= TRUE
;
514 mem
->vmp_wpmapped
= TRUE
;
516 PMAP_ENTER_OPTIONS(kernel_pmap
, map_addr
+ pg_offset
, mem
,
517 kma_prot
, VM_PROT_NONE
, ((flags
& KMA_KSTACK
) ? VM_MEM_STACK
: 0), TRUE
,
518 PMAP_OPTIONS_NOWAIT
, pe_result
);
520 if (pe_result
== KERN_RESOURCE_SHORTAGE
) {
521 vm_object_unlock(object
);
523 PMAP_ENTER(kernel_pmap
, map_addr
+ pg_offset
, mem
,
524 kma_prot
, VM_PROT_NONE
, ((flags
& KMA_KSTACK
) ? VM_MEM_STACK
: 0), TRUE
,
527 vm_object_lock(object
);
530 assert(pe_result
== KERN_SUCCESS
);
532 if (flags
& KMA_NOENCRYPT
) {
533 bzero(CAST_DOWN(void *, (map_addr
+ pg_offset
)), PAGE_SIZE
);
535 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem
));
538 if (kernel_object
== object
) {
539 vm_tag_update_size(tag
, fill_size
);
542 if ((fill_start
+ fill_size
) < map_size
) {
543 if (guard_page_list
== NULL
) {
544 panic("kernel_memory_allocate: guard_page_list == NULL");
547 mem
= guard_page_list
;
548 guard_page_list
= mem
->vmp_snext
;
549 mem
->vmp_snext
= NULL
;
551 vm_page_insert(mem
, object
, offset
+ pg_offset
);
553 mem
->vmp_busy
= FALSE
;
555 if (guard_page_list
|| wired_page_list
) {
556 panic("kernel_memory_allocate: non empty list\n");
559 if (!(flags
& (KMA_VAONLY
| KMA_PAGEABLE
))) {
560 vm_page_lockspin_queues();
561 vm_page_wire_count
+= wired_page_count
;
562 vm_page_unlock_queues();
565 vm_object_unlock(object
);
568 * now that the pages are wired, we no longer have to fear coalesce
570 if (object
== kernel_object
|| object
== compressor_object
) {
571 vm_map_simplify(map
, map_addr
);
573 vm_object_deallocate(object
);
576 #if DEBUG || DEVELOPMENT
577 VM_DEBUG_CONSTANT_EVENT(vm_kern_request
, VM_KERN_REQUEST
, DBG_FUNC_END
, page_grab_count
, 0, 0, 0);
579 ledger_credit(task
->ledger
, task_ledgers
.pages_grabbed_kern
, page_grab_count
);
584 * Return the memory, not zeroed.
586 *addrp
= CAST_DOWN(vm_offset_t
, map_addr
);
590 if (guard_page_list
) {
591 vm_page_free_list(guard_page_list
, FALSE
);
594 if (wired_page_list
) {
595 vm_page_free_list(wired_page_list
, FALSE
);
598 #if DEBUG || DEVELOPMENT
599 VM_DEBUG_CONSTANT_EVENT(vm_kern_request
, VM_KERN_REQUEST
, DBG_FUNC_END
, page_grab_count
, 0, 0, 0);
600 if (task
!= NULL
&& kr
== KERN_SUCCESS
) {
601 ledger_credit(task
->ledger
, task_ledgers
.pages_grabbed_kern
, page_grab_count
);
609 kernel_memory_populate(
617 vm_object_offset_t offset
, pg_offset
;
618 kern_return_t kr
, pe_result
;
620 vm_page_t page_list
= NULL
;
622 int page_grab_count
= 0;
625 #if DEBUG || DEVELOPMENT
626 task_t task
= current_task();
627 VM_DEBUG_CONSTANT_EVENT(vm_kern_request
, VM_KERN_REQUEST
, DBG_FUNC_START
, size
, 0, 0, 0);
630 page_count
= (int) (size
/ PAGE_SIZE_64
);
632 assert((flags
& (KMA_COMPRESSOR
| KMA_KOBJECT
)) != (KMA_COMPRESSOR
| KMA_KOBJECT
));
634 if (flags
& KMA_COMPRESSOR
) {
635 pg_offset
= page_count
* PAGE_SIZE_64
;
639 mem
= vm_page_grab();
641 if (mem
!= VM_PAGE_NULL
) {
648 if (KMA_ZERO
& flags
) {
649 vm_page_zero_fill(mem
);
651 mem
->vmp_snext
= page_list
;
654 pg_offset
-= PAGE_SIZE_64
;
656 kr
= pmap_enter_options(kernel_pmap
,
657 addr
+ pg_offset
, VM_PAGE_GET_PHYS_PAGE(mem
),
658 VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
, 0, TRUE
,
659 PMAP_OPTIONS_INTERNAL
, NULL
);
660 assert(kr
== KERN_SUCCESS
);
664 object
= compressor_object
;
666 vm_object_lock(object
);
670 pg_offset
+= PAGE_SIZE_64
) {
672 page_list
= mem
->vmp_snext
;
673 mem
->vmp_snext
= NULL
;
675 vm_page_insert(mem
, object
, offset
+ pg_offset
);
676 assert(mem
->vmp_busy
);
678 mem
->vmp_busy
= FALSE
;
679 mem
->vmp_pmapped
= TRUE
;
680 mem
->vmp_wpmapped
= TRUE
;
681 mem
->vmp_q_state
= VM_PAGE_USED_BY_COMPRESSOR
;
683 vm_object_unlock(object
);
686 if (map
== compressor_map
) {
687 kasan_notify_address_nopoison(addr
, size
);
689 kasan_notify_address(addr
, size
);
693 #if DEBUG || DEVELOPMENT
694 VM_DEBUG_CONSTANT_EVENT(vm_kern_request
, VM_KERN_REQUEST
, DBG_FUNC_END
, page_grab_count
, 0, 0, 0);
696 ledger_credit(task
->ledger
, task_ledgers
.pages_grabbed_kern
, page_grab_count
);
702 for (i
= 0; i
< page_count
; i
++) {
704 if (flags
& KMA_LOMEM
) {
705 mem
= vm_page_grablo();
707 mem
= vm_page_grab();
710 if (mem
!= VM_PAGE_NULL
) {
714 if (flags
& KMA_NOPAGEWAIT
) {
715 kr
= KERN_RESOURCE_SHORTAGE
;
718 if ((flags
& KMA_LOMEM
) &&
719 (vm_lopage_needed
== TRUE
)) {
720 kr
= KERN_RESOURCE_SHORTAGE
;
726 if (KMA_ZERO
& flags
) {
727 vm_page_zero_fill(mem
);
729 mem
->vmp_snext
= page_list
;
732 if (flags
& KMA_KOBJECT
) {
734 object
= kernel_object
;
736 vm_object_lock(object
);
739 * If it's not the kernel object, we need to:
743 * take reference on object;
746 panic("kernel_memory_populate(%p,0x%llx,0x%llx,0x%x): "
748 map
, (uint64_t) addr
, (uint64_t) size
, flags
);
753 pg_offset
+= PAGE_SIZE_64
) {
754 if (page_list
== NULL
) {
755 panic("kernel_memory_populate: page_list == NULL");
759 page_list
= mem
->vmp_snext
;
760 mem
->vmp_snext
= NULL
;
762 assert(mem
->vmp_q_state
== VM_PAGE_NOT_ON_Q
);
763 mem
->vmp_q_state
= VM_PAGE_IS_WIRED
;
764 mem
->vmp_wire_count
++;
765 if (__improbable(mem
->vmp_wire_count
== 0)) {
766 panic("kernel_memory_populate(%p): wire_count overflow", mem
);
769 vm_page_insert_wired(mem
, object
, offset
+ pg_offset
, tag
);
771 mem
->vmp_busy
= FALSE
;
772 mem
->vmp_pmapped
= TRUE
;
773 mem
->vmp_wpmapped
= TRUE
;
775 PMAP_ENTER_OPTIONS(kernel_pmap
, addr
+ pg_offset
, mem
,
776 VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
,
777 ((flags
& KMA_KSTACK
) ? VM_MEM_STACK
: 0), TRUE
,
778 PMAP_OPTIONS_NOWAIT
, pe_result
);
780 if (pe_result
== KERN_RESOURCE_SHORTAGE
) {
781 vm_object_unlock(object
);
783 PMAP_ENTER(kernel_pmap
, addr
+ pg_offset
, mem
,
784 VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
,
785 ((flags
& KMA_KSTACK
) ? VM_MEM_STACK
: 0), TRUE
,
788 vm_object_lock(object
);
791 assert(pe_result
== KERN_SUCCESS
);
793 if (flags
& KMA_NOENCRYPT
) {
794 bzero(CAST_DOWN(void *, (addr
+ pg_offset
)), PAGE_SIZE
);
795 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem
));
798 vm_page_lockspin_queues();
799 vm_page_wire_count
+= page_count
;
800 vm_page_unlock_queues();
802 #if DEBUG || DEVELOPMENT
803 VM_DEBUG_CONSTANT_EVENT(vm_kern_request
, VM_KERN_REQUEST
, DBG_FUNC_END
, page_grab_count
, 0, 0, 0);
805 ledger_credit(task
->ledger
, task_ledgers
.pages_grabbed_kern
, page_grab_count
);
809 if (kernel_object
== object
) {
810 vm_tag_update_size(tag
, size
);
813 vm_object_unlock(object
);
816 if (map
== compressor_map
) {
817 kasan_notify_address_nopoison(addr
, size
);
819 kasan_notify_address(addr
, size
);
826 vm_page_free_list(page_list
, FALSE
);
829 #if DEBUG || DEVELOPMENT
830 VM_DEBUG_CONSTANT_EVENT(vm_kern_request
, VM_KERN_REQUEST
, DBG_FUNC_END
, page_grab_count
, 0, 0, 0);
831 if (task
!= NULL
&& kr
== KERN_SUCCESS
) {
832 ledger_credit(task
->ledger
, task_ledgers
.pages_grabbed_kern
, page_grab_count
);
841 kernel_memory_depopulate(
848 vm_object_offset_t offset
, pg_offset
;
850 vm_page_t local_freeq
= NULL
;
852 assert((flags
& (KMA_COMPRESSOR
| KMA_KOBJECT
)) != (KMA_COMPRESSOR
| KMA_KOBJECT
));
854 if (flags
& KMA_COMPRESSOR
) {
856 object
= compressor_object
;
858 vm_object_lock(object
);
859 } else if (flags
& KMA_KOBJECT
) {
861 object
= kernel_object
;
862 vm_object_lock(object
);
867 * If it's not the kernel object, we need to:
873 panic("kernel_memory_depopulate(%p,0x%llx,0x%llx,0x%x): "
875 map
, (uint64_t) addr
, (uint64_t) size
, flags
);
877 pmap_protect(kernel_map
->pmap
, offset
, offset
+ size
, VM_PROT_NONE
);
881 pg_offset
+= PAGE_SIZE_64
) {
882 mem
= vm_page_lookup(object
, offset
+ pg_offset
);
886 if (mem
->vmp_q_state
!= VM_PAGE_USED_BY_COMPRESSOR
) {
887 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem
));
890 mem
->vmp_busy
= TRUE
;
892 assert(mem
->vmp_tabled
);
893 vm_page_remove(mem
, TRUE
);
894 assert(mem
->vmp_busy
);
896 assert(mem
->vmp_pageq
.next
== 0 && mem
->vmp_pageq
.prev
== 0);
897 assert((mem
->vmp_q_state
== VM_PAGE_USED_BY_COMPRESSOR
) ||
898 (mem
->vmp_q_state
== VM_PAGE_NOT_ON_Q
));
900 mem
->vmp_q_state
= VM_PAGE_NOT_ON_Q
;
901 mem
->vmp_snext
= local_freeq
;
904 vm_object_unlock(object
);
907 vm_page_free_list(local_freeq
, TRUE
);
914 * Allocate wired-down memory in the kernel's address map
915 * or a submap. The memory is not zero-filled.
924 return kmem_alloc(map
, addrp
, size
, vm_tag_bt());
935 return kmem_alloc_flags(map
, addrp
, size
, tag
, 0);
946 kern_return_t kr
= kernel_memory_allocate(map
, addrp
, size
, 0, flags
, tag
);
947 TRACE_MACHLEAKS(KMEM_ALLOC_CODE
, KMEM_ALLOC_CODE_2
, size
, *addrp
);
954 * Reallocate wired-down memory in the kernel's address map
955 * or a submap. Newly allocated pages are not zeroed.
956 * This can only be used on regions allocated with kmem_alloc.
958 * If successful, the pages in the old region are mapped twice.
959 * The old region is unchanged. Use kmem_free to get rid of it.
966 vm_offset_t
*newaddrp
,
971 vm_object_offset_t offset
;
972 vm_map_offset_t oldmapmin
;
973 vm_map_offset_t oldmapmax
;
974 vm_map_offset_t newmapaddr
;
975 vm_map_size_t oldmapsize
;
976 vm_map_size_t newmapsize
;
977 vm_map_entry_t oldentry
;
978 vm_map_entry_t newentry
;
982 oldmapmin
= vm_map_trunc_page(oldaddr
,
983 VM_MAP_PAGE_MASK(map
));
984 oldmapmax
= vm_map_round_page(oldaddr
+ oldsize
,
985 VM_MAP_PAGE_MASK(map
));
986 oldmapsize
= oldmapmax
- oldmapmin
;
987 newmapsize
= vm_map_round_page(newsize
,
988 VM_MAP_PAGE_MASK(map
));
989 if (newmapsize
< newsize
) {
992 return KERN_INVALID_ARGUMENT
;
996 * Find the VM object backing the old region.
1001 if (!vm_map_lookup_entry(map
, oldmapmin
, &oldentry
)) {
1002 panic("kmem_realloc");
1004 object
= VME_OBJECT(oldentry
);
1007 * Increase the size of the object and
1008 * fill in the new region.
1011 vm_object_reference(object
);
1012 /* by grabbing the object lock before unlocking the map */
1013 /* we guarantee that we will panic if more than one */
1014 /* attempt is made to realloc a kmem_alloc'd area */
1015 vm_object_lock(object
);
1017 if (object
->vo_size
!= oldmapsize
) {
1018 panic("kmem_realloc");
1020 object
->vo_size
= newmapsize
;
1021 vm_object_unlock(object
);
1023 /* allocate the new pages while expanded portion of the */
1024 /* object is still not mapped */
1025 kmem_alloc_pages(object
, vm_object_round_page(oldmapsize
),
1026 vm_object_round_page(newmapsize
- oldmapsize
));
1029 * Find space for the new region.
1032 kr
= vm_map_find_space(map
, &newmapaddr
, newmapsize
,
1033 (vm_map_offset_t
) 0, 0,
1034 VM_MAP_KERNEL_FLAGS_NONE
,
1037 if (kr
!= KERN_SUCCESS
) {
1038 vm_object_lock(object
);
1039 for (offset
= oldmapsize
;
1040 offset
< newmapsize
; offset
+= PAGE_SIZE
) {
1041 if ((mem
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
1045 object
->vo_size
= oldmapsize
;
1046 vm_object_unlock(object
);
1047 vm_object_deallocate(object
);
1050 VME_OBJECT_SET(newentry
, object
);
1051 VME_OFFSET_SET(newentry
, 0);
1052 assert(newentry
->wired_count
== 0);
1055 /* add an extra reference in case we have someone doing an */
1056 /* unexpected deallocate */
1057 vm_object_reference(object
);
1060 kr
= vm_map_wire_kernel(map
, newmapaddr
, newmapaddr
+ newmapsize
,
1061 VM_PROT_DEFAULT
, tag
, FALSE
);
1062 if (KERN_SUCCESS
!= kr
) {
1063 vm_map_remove(map
, newmapaddr
, newmapaddr
+ newmapsize
, VM_MAP_REMOVE_NO_FLAGS
);
1064 vm_object_lock(object
);
1065 for (offset
= oldsize
; offset
< newmapsize
; offset
+= PAGE_SIZE
) {
1066 if ((mem
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
1070 object
->vo_size
= oldmapsize
;
1071 vm_object_unlock(object
);
1072 vm_object_deallocate(object
);
1075 vm_object_deallocate(object
);
1077 if (kernel_object
== object
) {
1078 vm_tag_update_size(tag
, newmapsize
);
1081 *newaddrp
= CAST_DOWN(vm_offset_t
, newmapaddr
);
1082 return KERN_SUCCESS
;
1086 * kmem_alloc_kobject:
1088 * Allocate wired-down memory in the kernel's address map
1089 * or a submap. The memory is not zero-filled.
1091 * The memory is allocated in the kernel_object.
1092 * It may not be copied with vm_map_copy, and
1093 * it may not be reallocated with kmem_realloc.
1097 kmem_alloc_kobject_external(
1102 return kmem_alloc_kobject(map
, addrp
, size
, vm_tag_bt());
1112 return kernel_memory_allocate(map
, addrp
, size
, 0, KMA_KOBJECT
, tag
);
1116 * kmem_alloc_aligned:
1118 * Like kmem_alloc_kobject, except that the memory is aligned.
1119 * The size should be a power-of-2.
1129 if ((size
& (size
- 1)) != 0) {
1130 panic("kmem_alloc_aligned: size not aligned");
1132 return kernel_memory_allocate(map
, addrp
, size
, size
- 1, KMA_KOBJECT
, tag
);
1136 * kmem_alloc_pageable:
1138 * Allocate pageable memory in the kernel's address map.
1142 kmem_alloc_pageable_external(
1147 return kmem_alloc_pageable(map
, addrp
, size
, vm_tag_bt());
1151 kmem_alloc_pageable(
1157 vm_map_offset_t map_addr
;
1158 vm_map_size_t map_size
;
1162 map_addr
= (vm_map_min(map
)) + PAGE_SIZE
;
1164 map_addr
= vm_map_min(map
);
1166 map_size
= vm_map_round_page(size
,
1167 VM_MAP_PAGE_MASK(map
));
1168 if (map_size
< size
) {
1171 return KERN_INVALID_ARGUMENT
;
1174 kr
= vm_map_enter(map
, &map_addr
, map_size
,
1175 (vm_map_offset_t
) 0,
1177 VM_MAP_KERNEL_FLAGS_NONE
,
1179 VM_OBJECT_NULL
, (vm_object_offset_t
) 0, FALSE
,
1180 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
1182 if (kr
!= KERN_SUCCESS
) {
1187 kasan_notify_address(map_addr
, map_size
);
1189 *addrp
= CAST_DOWN(vm_offset_t
, map_addr
);
1190 return KERN_SUCCESS
;
1196 * Release a region of kernel virtual memory allocated
1197 * with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable,
1198 * and return the physical pages associated with that region.
1209 assert(addr
>= VM_MIN_KERNEL_AND_KEXT_ADDRESS
);
1211 TRACE_MACHLEAKS(KMEM_FREE_CODE
, KMEM_FREE_CODE_2
, size
, addr
);
1215 printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n", map
, (uint64_t)addr
);
1220 kr
= vm_map_remove(map
,
1221 vm_map_trunc_page(addr
,
1222 VM_MAP_PAGE_MASK(map
)),
1223 vm_map_round_page(addr
+ size
,
1224 VM_MAP_PAGE_MASK(map
)),
1225 VM_MAP_REMOVE_KUNWIRE
);
1226 if (kr
!= KERN_SUCCESS
) {
1232 * Allocate new pages in an object.
1238 vm_object_offset_t offset
,
1239 vm_object_size_t size
)
1241 vm_object_size_t alloc_size
;
1243 alloc_size
= vm_object_round_page(size
);
1244 vm_object_lock(object
);
1245 while (alloc_size
) {
1252 while (VM_PAGE_NULL
==
1253 (mem
= vm_page_alloc(object
, offset
))) {
1254 vm_object_unlock(object
);
1256 vm_object_lock(object
);
1258 mem
->vmp_busy
= FALSE
;
1260 alloc_size
-= PAGE_SIZE
;
1261 offset
+= PAGE_SIZE
;
1263 vm_object_unlock(object
);
1264 return KERN_SUCCESS
;
1270 * Allocates a map to manage a subrange
1271 * of the kernel virtual address space.
1273 * Arguments are as follows:
1275 * parent Map to take range from
1276 * addr Address of start of range (IN/OUT)
1277 * size Size of range to find
1278 * pageable Can region be paged
1279 * anywhere Can region be located anywhere in map
1280 * new_map Pointer to new submap
1289 vm_map_kernel_flags_t vmk_flags
,
1294 vm_map_offset_t map_addr
;
1295 vm_map_size_t map_size
;
1298 map_size
= vm_map_round_page(size
,
1299 VM_MAP_PAGE_MASK(parent
));
1300 if (map_size
< size
) {
1303 return KERN_INVALID_ARGUMENT
;
1307 * Need reference on submap object because it is internal
1308 * to the vm_system. vm_object_enter will never be called
1309 * on it (usual source of reference for vm_map_enter).
1311 vm_object_reference(vm_submap_object
);
1313 map_addr
= ((flags
& VM_FLAGS_ANYWHERE
)
1314 ? vm_map_min(parent
)
1315 : vm_map_trunc_page(*addr
,
1316 VM_MAP_PAGE_MASK(parent
)));
1318 kr
= vm_map_enter(parent
, &map_addr
, map_size
,
1319 (vm_map_offset_t
) 0, flags
, vmk_flags
, tag
,
1320 vm_submap_object
, (vm_object_offset_t
) 0, FALSE
,
1321 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
1322 if (kr
!= KERN_SUCCESS
) {
1323 vm_object_deallocate(vm_submap_object
);
1327 pmap_reference(vm_map_pmap(parent
));
1328 map
= vm_map_create(vm_map_pmap(parent
), map_addr
, map_addr
+ map_size
, pageable
);
1329 if (map
== VM_MAP_NULL
) {
1330 panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */
1332 /* inherit the parent map's page size */
1333 vm_map_set_page_shift(map
, VM_MAP_PAGE_SHIFT(parent
));
1335 kr
= vm_map_submap(parent
, map_addr
, map_addr
+ map_size
, map
, map_addr
, FALSE
);
1336 if (kr
!= KERN_SUCCESS
) {
1338 * See comment preceding vm_map_submap().
1340 vm_map_remove(parent
, map_addr
, map_addr
+ map_size
,
1341 VM_MAP_REMOVE_NO_FLAGS
);
1342 vm_map_deallocate(map
); /* also removes ref to pmap */
1343 vm_object_deallocate(vm_submap_object
);
1346 *addr
= CAST_DOWN(vm_offset_t
, map_addr
);
1348 return KERN_SUCCESS
;
1354 * Initialize the kernel's virtual memory map, taking
1355 * into account all memory allocated up to this time.
1362 vm_map_offset_t map_start
;
1363 vm_map_offset_t map_end
;
1364 vm_map_kernel_flags_t vmk_flags
;
1366 vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
1367 vmk_flags
.vmkf_permanent
= TRUE
;
1368 vmk_flags
.vmkf_no_pmap_check
= TRUE
;
1370 map_start
= vm_map_trunc_page(start
,
1371 VM_MAP_PAGE_MASK(kernel_map
));
1372 map_end
= vm_map_round_page(end
,
1373 VM_MAP_PAGE_MASK(kernel_map
));
1375 #if defined(__arm__) || defined(__arm64__)
1376 kernel_map
= vm_map_create(pmap_kernel(), VM_MIN_KERNEL_AND_KEXT_ADDRESS
,
1377 VM_MAX_KERNEL_ADDRESS
, FALSE
);
1379 * Reserve virtual memory allocated up to this time.
1382 unsigned int region_select
= 0;
1383 vm_map_offset_t region_start
;
1384 vm_map_size_t region_size
;
1385 vm_map_offset_t map_addr
;
1388 while (pmap_virtual_region(region_select
, ®ion_start
, ®ion_size
)) {
1389 map_addr
= region_start
;
1390 kr
= vm_map_enter(kernel_map
, &map_addr
,
1391 vm_map_round_page(region_size
,
1392 VM_MAP_PAGE_MASK(kernel_map
)),
1393 (vm_map_offset_t
) 0,
1396 VM_KERN_MEMORY_NONE
,
1398 (vm_object_offset_t
) 0, FALSE
, VM_PROT_NONE
, VM_PROT_NONE
,
1399 VM_INHERIT_DEFAULT
);
1401 if (kr
!= KERN_SUCCESS
) {
1402 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n",
1403 (uint64_t) start
, (uint64_t) end
, (uint64_t) region_start
,
1404 (uint64_t) region_size
, kr
);
1411 kernel_map
= vm_map_create(pmap_kernel(), VM_MIN_KERNEL_AND_KEXT_ADDRESS
,
1414 * Reserve virtual memory allocated up to this time.
1416 if (start
!= VM_MIN_KERNEL_AND_KEXT_ADDRESS
) {
1417 vm_map_offset_t map_addr
;
1420 vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
1421 vmk_flags
.vmkf_no_pmap_check
= TRUE
;
1423 map_addr
= VM_MIN_KERNEL_AND_KEXT_ADDRESS
;
1424 kr
= vm_map_enter(kernel_map
,
1426 (vm_map_size_t
)(map_start
- VM_MIN_KERNEL_AND_KEXT_ADDRESS
),
1427 (vm_map_offset_t
) 0,
1430 VM_KERN_MEMORY_NONE
,
1432 (vm_object_offset_t
) 0, FALSE
,
1433 VM_PROT_NONE
, VM_PROT_NONE
,
1434 VM_INHERIT_DEFAULT
);
1436 if (kr
!= KERN_SUCCESS
) {
1437 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n",
1438 (uint64_t) start
, (uint64_t) end
,
1439 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS
,
1440 (uint64_t) (map_start
- VM_MIN_KERNEL_AND_KEXT_ADDRESS
),
1447 * Set the default global user wire limit which limits the amount of
1448 * memory that can be locked via mlock(). We set this to the total
1449 * amount of memory that are potentially usable by a user app (max_mem)
1450 * minus a certain amount. This can be overridden via a sysctl.
1452 vm_global_no_user_wire_amount
= MIN(max_mem
* 20 / 100,
1453 VM_NOT_USER_WIREABLE
);
1454 vm_global_user_wire_limit
= max_mem
- vm_global_no_user_wire_amount
;
1456 /* the default per user limit is the same as the global limit */
1457 vm_user_wire_limit
= vm_global_user_wire_limit
;
1462 * Routine: copyinmap
1464 * Like copyin, except that fromaddr is an address
1465 * in the specified VM map. This implementation
1466 * is incomplete; it handles the current user map
1467 * and the kernel map/submaps.
1472 vm_map_offset_t fromaddr
,
1476 kern_return_t kr
= KERN_SUCCESS
;
1479 if (vm_map_pmap(map
) == pmap_kernel()) {
1480 /* assume a correct copy */
1481 memcpy(todata
, CAST_DOWN(void *, fromaddr
), length
);
1482 } else if (current_map() == map
) {
1483 if (copyin(fromaddr
, todata
, length
) != 0) {
1484 kr
= KERN_INVALID_ADDRESS
;
1487 vm_map_reference(map
);
1488 oldmap
= vm_map_switch(map
);
1489 if (copyin(fromaddr
, todata
, length
) != 0) {
1490 kr
= KERN_INVALID_ADDRESS
;
1492 vm_map_switch(oldmap
);
1493 vm_map_deallocate(map
);
1499 * Routine: copyoutmap
1501 * Like copyout, except that toaddr is an address
1502 * in the specified VM map. This implementation
1503 * is incomplete; it handles the current user map
1504 * and the kernel map/submaps.
1510 vm_map_address_t toaddr
,
1513 if (vm_map_pmap(map
) == pmap_kernel()) {
1514 /* assume a correct copy */
1515 memcpy(CAST_DOWN(void *, toaddr
), fromdata
, length
);
1516 return KERN_SUCCESS
;
1519 if (current_map() != map
) {
1520 return KERN_NOT_SUPPORTED
;
1523 if (copyout(fromdata
, toaddr
, length
) != 0) {
1524 return KERN_INVALID_ADDRESS
;
1527 return KERN_SUCCESS
;
1532 * The following two functions are to be used when exposing kernel
1533 * addresses to userspace via any of the various debug or info
1534 * facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
1535 * and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
1536 * are exported to KEXTs.
1538 * NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
1542 vm_kernel_addrhash_internal(
1544 vm_offset_t
*hash_addr
,
1554 if (VM_KERNEL_IS_SLID(addr
)) {
1555 *hash_addr
= VM_KERNEL_UNSLIDE(addr
);
1559 vm_offset_t sha_digest
[SHA256_DIGEST_LENGTH
/ sizeof(vm_offset_t
)];
1562 SHA256_Init(&sha_ctx
);
1563 SHA256_Update(&sha_ctx
, &salt
, sizeof(salt
));
1564 SHA256_Update(&sha_ctx
, &addr
, sizeof(addr
));
1565 SHA256_Final(sha_digest
, &sha_ctx
);
1567 *hash_addr
= sha_digest
[0];
1571 vm_kernel_addrhash_external(
1573 vm_offset_t
*hash_addr
)
1575 return vm_kernel_addrhash_internal(addr
, hash_addr
, vm_kernel_addrhash_salt_ext
);
1579 vm_kernel_addrhash(vm_offset_t addr
)
1581 vm_offset_t hash_addr
;
1582 vm_kernel_addrhash_internal(addr
, &hash_addr
, vm_kernel_addrhash_salt
);
1589 vm_offset_t
*hide_addr
)
1591 *hide_addr
= VM_KERNEL_ADDRHIDE(addr
);
1595 * vm_kernel_addrperm_external:
1596 * vm_kernel_unslide_or_perm_external:
1598 * Use these macros when exposing an address to userspace that could come from
1599 * either kernel text/data *or* the heap.
1602 vm_kernel_addrperm_external(
1604 vm_offset_t
*perm_addr
)
1606 if (VM_KERNEL_IS_SLID(addr
)) {
1607 *perm_addr
= VM_KERNEL_UNSLIDE(addr
);
1608 } else if (VM_KERNEL_ADDRESS(addr
)) {
1609 *perm_addr
= addr
+ vm_kernel_addrperm_ext
;
1616 vm_kernel_unslide_or_perm_external(
1618 vm_offset_t
*up_addr
)
1620 vm_kernel_addrperm_external(addr
, up_addr
);