2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
63 * Kernel memory management.
66 #include <mach/kern_return.h>
67 #include <mach/vm_param.h>
68 #include <kern/assert.h>
69 #include <kern/thread.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_map.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_pageout.h>
75 #include <kern/misc_protos.h>
80 #include <libkern/OSDebug.h>
81 #include <sys/kdebug.h>
84 * Variables exported by this module.
88 vm_map_t kernel_pageable_map
;
90 extern boolean_t vm_kernel_ready
;
93 * Forward declarations for internal functions.
95 extern kern_return_t
kmem_alloc_pages(
97 vm_object_offset_t offset
,
98 vm_object_size_t size
);
112 vm_object_offset_t offset
;
113 vm_map_offset_t map_addr
;
114 vm_map_offset_t map_mask
;
115 vm_map_size_t map_size
, i
;
116 vm_map_entry_t entry
;
120 if (map
== VM_MAP_NULL
|| (flags
& ~(KMA_KOBJECT
| KMA_LOMEM
| KMA_NOPAGEWAIT
)))
121 return KERN_INVALID_ARGUMENT
;
123 map_size
= vm_map_round_page(size
,
124 VM_MAP_PAGE_MASK(map
));
125 map_mask
= (vm_map_offset_t
)mask
;
127 /* Check for zero allocation size (either directly or via overflow) */
130 return KERN_INVALID_ARGUMENT
;
134 * Allocate a new object (if necessary) and the reference we
135 * will be donating to the map entry. We must do this before
136 * locking the map, or risk deadlock with the default pager.
138 if ((flags
& KMA_KOBJECT
) != 0) {
139 object
= kernel_object
;
140 vm_object_reference(object
);
142 object
= vm_object_allocate(map_size
);
145 kr
= vm_map_find_space(map
, &map_addr
, map_size
, map_mask
, 0, &entry
);
146 if (KERN_SUCCESS
!= kr
) {
147 vm_object_deallocate(object
);
151 if (object
== kernel_object
) {
156 VME_OBJECT_SET(entry
, object
);
157 VME_OFFSET_SET(entry
, offset
);
158 VME_ALIAS_SET(entry
, tag
);
160 /* Take an extra object ref in case the map entry gets deleted */
161 vm_object_reference(object
);
164 kr
= cpm_allocate(CAST_DOWN(vm_size_t
, map_size
), &pages
, max_pnum
, pnum_mask
, FALSE
, flags
);
166 if (kr
!= KERN_SUCCESS
) {
168 vm_map_trunc_page(map_addr
,
169 VM_MAP_PAGE_MASK(map
)),
170 vm_map_round_page(map_addr
+ map_size
,
171 VM_MAP_PAGE_MASK(map
)),
173 vm_object_deallocate(object
);
178 vm_object_lock(object
);
179 for (i
= 0; i
< map_size
; i
+= PAGE_SIZE
) {
181 pages
= NEXT_PAGE(m
);
182 *(NEXT_PAGE_PTR(m
)) = VM_PAGE_NULL
;
184 vm_page_insert(m
, object
, offset
+ i
);
186 vm_object_unlock(object
);
188 kr
= vm_map_wire(map
,
189 vm_map_trunc_page(map_addr
,
190 VM_MAP_PAGE_MASK(map
)),
191 vm_map_round_page(map_addr
+ map_size
,
192 VM_MAP_PAGE_MASK(map
)),
193 VM_PROT_DEFAULT
| VM_PROT_MEMORY_TAG_MAKE(tag
),
196 if (kr
!= KERN_SUCCESS
) {
197 if (object
== kernel_object
) {
198 vm_object_lock(object
);
199 vm_object_page_remove(object
, offset
, offset
+ map_size
);
200 vm_object_unlock(object
);
203 vm_map_trunc_page(map_addr
,
204 VM_MAP_PAGE_MASK(map
)),
205 vm_map_round_page(map_addr
+ map_size
,
206 VM_MAP_PAGE_MASK(map
)),
208 vm_object_deallocate(object
);
211 vm_object_deallocate(object
);
213 if (object
== kernel_object
)
214 vm_map_simplify(map
, map_addr
);
216 *addrp
= (vm_offset_t
) map_addr
;
217 assert((vm_map_offset_t
) *addrp
== map_addr
);
222 * Master entry point for allocating kernel memory.
223 * NOTE: this routine is _never_ interrupt safe.
225 * map : map to allocate into
226 * addrp : pointer to start address of new memory
227 * size : size of memory requested
229 * KMA_HERE *addrp is base address, else "anywhere"
230 * KMA_NOPAGEWAIT don't wait for pages if unavailable
231 * KMA_KOBJECT use kernel_object
232 * KMA_LOMEM support for 32 bit devices in a 64 bit world
233 * if set and a lomemory pool is available
234 * grab pages from it... this also implies
239 kernel_memory_allocate(
248 vm_object_offset_t offset
;
249 vm_object_offset_t pg_offset
;
250 vm_map_entry_t entry
= NULL
;
251 vm_map_offset_t map_addr
, fill_start
;
252 vm_map_offset_t map_mask
;
253 vm_map_size_t map_size
, fill_size
;
254 kern_return_t kr
, pe_result
;
256 vm_page_t guard_page_list
= NULL
;
257 vm_page_t wired_page_list
= NULL
;
258 int guard_page_count
= 0;
259 int wired_page_count
= 0;
264 if (! vm_kernel_ready
) {
265 panic("kernel_memory_allocate: VM is not ready");
268 map_size
= vm_map_round_page(size
,
269 VM_MAP_PAGE_MASK(map
));
270 map_mask
= (vm_map_offset_t
) mask
;
272 vm_alloc_flags
= VM_MAKE_TAG(tag
);
274 /* Check for zero allocation size (either directly or via overflow) */
277 return KERN_INVALID_ARGUMENT
;
281 * limit the size of a single extent of wired memory
282 * to try and limit the damage to the system if
283 * too many pages get wired down
284 * limit raised to 2GB with 128GB max physical limit,
285 * but scaled by installed memory above this
287 if ( !(flags
& KMA_VAONLY
) && map_size
> MAX(1ULL<<31, sane_size
/64)) {
288 return KERN_RESOURCE_SHORTAGE
;
294 * Guard pages are implemented as ficticious pages. By placing guard pages
295 * on either end of a stack, they can help detect cases where a thread walks
296 * off either end of its stack. They are allocated and set up here and attempts
297 * to access those pages are trapped in vm_fault_page().
299 * The map_size we were passed may include extra space for
300 * guard pages. If those were requested, then back it out of fill_size
301 * since vm_map_find_space() takes just the actual size not including
302 * guard pages. Similarly, fill_start indicates where the actual pages
303 * will begin in the range.
307 fill_size
= map_size
;
309 if (flags
& KMA_GUARD_FIRST
) {
310 vm_alloc_flags
|= VM_FLAGS_GUARD_BEFORE
;
311 fill_start
+= PAGE_SIZE_64
;
312 fill_size
-= PAGE_SIZE_64
;
313 if (map_size
< fill_start
+ fill_size
) {
314 /* no space for a guard page */
316 return KERN_INVALID_ARGUMENT
;
320 if (flags
& KMA_GUARD_LAST
) {
321 vm_alloc_flags
|= VM_FLAGS_GUARD_AFTER
;
322 fill_size
-= PAGE_SIZE_64
;
323 if (map_size
<= fill_start
+ fill_size
) {
324 /* no space for a guard page */
326 return KERN_INVALID_ARGUMENT
;
330 wired_page_count
= (int) (fill_size
/ PAGE_SIZE_64
);
331 assert(wired_page_count
* PAGE_SIZE_64
== fill_size
);
333 for (i
= 0; i
< guard_page_count
; i
++) {
335 mem
= vm_page_grab_guard();
337 if (mem
!= VM_PAGE_NULL
)
339 if (flags
& KMA_NOPAGEWAIT
) {
340 kr
= KERN_RESOURCE_SHORTAGE
;
343 vm_page_more_fictitious();
345 mem
->snext
= guard_page_list
;
346 guard_page_list
= mem
;
349 if (! (flags
& KMA_VAONLY
)) {
350 for (i
= 0; i
< wired_page_count
; i
++) {
351 uint64_t unavailable
;
354 if (flags
& KMA_LOMEM
)
355 mem
= vm_page_grablo();
357 mem
= vm_page_grab();
359 if (mem
!= VM_PAGE_NULL
)
362 if (flags
& KMA_NOPAGEWAIT
) {
363 kr
= KERN_RESOURCE_SHORTAGE
;
366 if ((flags
& KMA_LOMEM
) && (vm_lopage_needed
== TRUE
)) {
367 kr
= KERN_RESOURCE_SHORTAGE
;
370 unavailable
= (vm_page_wire_count
+ vm_page_free_target
) * PAGE_SIZE
;
372 if (unavailable
> max_mem
|| map_size
> (max_mem
- unavailable
)) {
373 kr
= KERN_RESOURCE_SHORTAGE
;
378 mem
->snext
= wired_page_list
;
379 wired_page_list
= mem
;
384 * Allocate a new object (if necessary). We must do this before
385 * locking the map, or risk deadlock with the default pager.
387 if ((flags
& KMA_KOBJECT
) != 0) {
388 object
= kernel_object
;
389 vm_object_reference(object
);
390 } else if ((flags
& KMA_COMPRESSOR
) != 0) {
391 object
= compressor_object
;
392 vm_object_reference(object
);
394 object
= vm_object_allocate(map_size
);
397 if (flags
& KMA_ATOMIC
)
398 vm_alloc_flags
|= VM_FLAGS_ATOMIC_ENTRY
;
400 kr
= vm_map_find_space(map
, &map_addr
,
402 vm_alloc_flags
, &entry
);
403 if (KERN_SUCCESS
!= kr
) {
404 vm_object_deallocate(object
);
408 if (object
== kernel_object
|| object
== compressor_object
) {
413 VME_OBJECT_SET(entry
, object
);
414 VME_OFFSET_SET(entry
, offset
);
416 if (object
!= compressor_object
)
417 entry
->wired_count
++;
419 if (flags
& KMA_PERMANENT
)
420 entry
->permanent
= TRUE
;
422 if (object
!= kernel_object
&& object
!= compressor_object
)
423 vm_object_reference(object
);
425 vm_object_lock(object
);
431 if (guard_page_list
== NULL
)
432 panic("kernel_memory_allocate: guard_page_list == NULL");
434 mem
= guard_page_list
;
435 guard_page_list
= mem
->snext
;
438 vm_page_insert(mem
, object
, offset
+ pg_offset
);
441 pg_offset
+= PAGE_SIZE_64
;
444 kma_prot
= VM_PROT_READ
| VM_PROT_WRITE
;
446 if (flags
& KMA_VAONLY
) {
447 pg_offset
= fill_start
+ fill_size
;
449 for (pg_offset
= fill_start
; pg_offset
< fill_start
+ fill_size
; pg_offset
+= PAGE_SIZE_64
) {
450 if (wired_page_list
== NULL
)
451 panic("kernel_memory_allocate: wired_page_list == NULL");
453 mem
= wired_page_list
;
454 wired_page_list
= mem
->snext
;
457 assert(mem
->wire_count
== 0);
458 assert(mem
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
);
460 mem
->vm_page_q_state
= VM_PAGE_IS_WIRED
;
462 if (__improbable(mem
->wire_count
== 0)) {
463 panic("kernel_memory_allocate(%p): wire_count overflow",
467 vm_page_insert_wired(mem
, object
, offset
+ pg_offset
, tag
);
471 mem
->wpmapped
= TRUE
;
473 PMAP_ENTER_OPTIONS(kernel_pmap
, map_addr
+ pg_offset
, mem
,
474 kma_prot
, VM_PROT_NONE
, ((flags
& KMA_KSTACK
) ? VM_MEM_STACK
: 0), TRUE
,
475 PMAP_OPTIONS_NOWAIT
, pe_result
);
477 if (pe_result
== KERN_RESOURCE_SHORTAGE
) {
478 vm_object_unlock(object
);
480 PMAP_ENTER(kernel_pmap
, map_addr
+ pg_offset
, mem
,
481 kma_prot
, VM_PROT_NONE
, ((flags
& KMA_KSTACK
) ? VM_MEM_STACK
: 0), TRUE
);
483 vm_object_lock(object
);
485 if (flags
& KMA_NOENCRYPT
) {
486 bzero(CAST_DOWN(void *, (map_addr
+ pg_offset
)), PAGE_SIZE
);
488 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem
));
492 if ((fill_start
+ fill_size
) < map_size
) {
493 if (guard_page_list
== NULL
)
494 panic("kernel_memory_allocate: guard_page_list == NULL");
496 mem
= guard_page_list
;
497 guard_page_list
= mem
->snext
;
500 vm_page_insert(mem
, object
, offset
+ pg_offset
);
504 if (guard_page_list
|| wired_page_list
)
505 panic("kernel_memory_allocate: non empty list\n");
507 if (! (flags
& KMA_VAONLY
)) {
508 vm_page_lockspin_queues();
509 vm_page_wire_count
+= wired_page_count
;
510 vm_page_unlock_queues();
513 vm_object_unlock(object
);
516 * now that the pages are wired, we no longer have to fear coalesce
518 if (object
== kernel_object
|| object
== compressor_object
)
519 vm_map_simplify(map
, map_addr
);
521 vm_object_deallocate(object
);
524 * Return the memory, not zeroed.
526 *addrp
= CAST_DOWN(vm_offset_t
, map_addr
);
531 vm_page_free_list(guard_page_list
, FALSE
);
534 vm_page_free_list(wired_page_list
, FALSE
);
540 kernel_memory_populate(
548 vm_object_offset_t offset
, pg_offset
;
549 kern_return_t kr
, pe_result
;
551 vm_page_t page_list
= NULL
;
555 page_count
= (int) (size
/ PAGE_SIZE_64
);
557 assert((flags
& (KMA_COMPRESSOR
|KMA_KOBJECT
)) != (KMA_COMPRESSOR
|KMA_KOBJECT
));
559 if (flags
& KMA_COMPRESSOR
) {
561 pg_offset
= page_count
* PAGE_SIZE_64
;
565 mem
= vm_page_grab();
567 if (mem
!= VM_PAGE_NULL
)
572 mem
->snext
= page_list
;
575 pg_offset
-= PAGE_SIZE_64
;
577 kr
= pmap_enter_options(kernel_pmap
,
578 addr
+ pg_offset
, VM_PAGE_GET_PHYS_PAGE(mem
),
579 VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
, 0, TRUE
,
580 PMAP_OPTIONS_INTERNAL
, NULL
);
581 assert(kr
== KERN_SUCCESS
);
586 object
= compressor_object
;
588 vm_object_lock(object
);
592 pg_offset
+= PAGE_SIZE_64
) {
595 page_list
= mem
->snext
;
598 vm_page_insert(mem
, object
, offset
+ pg_offset
);
603 mem
->wpmapped
= TRUE
;
604 mem
->vm_page_q_state
= VM_PAGE_USED_BY_COMPRESSOR
;
606 vm_object_unlock(object
);
611 for (i
= 0; i
< page_count
; i
++) {
613 if (flags
& KMA_LOMEM
)
614 mem
= vm_page_grablo();
616 mem
= vm_page_grab();
618 if (mem
!= VM_PAGE_NULL
)
621 if (flags
& KMA_NOPAGEWAIT
) {
622 kr
= KERN_RESOURCE_SHORTAGE
;
625 if ((flags
& KMA_LOMEM
) &&
626 (vm_lopage_needed
== TRUE
)) {
627 kr
= KERN_RESOURCE_SHORTAGE
;
632 mem
->snext
= page_list
;
635 if (flags
& KMA_KOBJECT
) {
637 object
= kernel_object
;
639 vm_object_lock(object
);
642 * If it's not the kernel object, we need to:
646 * take reference on object;
649 panic("kernel_memory_populate(%p,0x%llx,0x%llx,0x%x): "
651 map
, (uint64_t) addr
, (uint64_t) size
, flags
);
656 pg_offset
+= PAGE_SIZE_64
) {
658 if (page_list
== NULL
)
659 panic("kernel_memory_populate: page_list == NULL");
662 page_list
= mem
->snext
;
665 assert(mem
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
);
666 mem
->vm_page_q_state
= VM_PAGE_IS_WIRED
;
668 if (__improbable(mem
->wire_count
== 0)) {
669 panic("kernel_memory_populate(%p): wire_count overflow",
673 vm_page_insert_wired(mem
, object
, offset
+ pg_offset
, tag
);
677 mem
->wpmapped
= TRUE
;
679 PMAP_ENTER_OPTIONS(kernel_pmap
, addr
+ pg_offset
, mem
,
680 VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
,
681 ((flags
& KMA_KSTACK
) ? VM_MEM_STACK
: 0), TRUE
,
682 PMAP_OPTIONS_NOWAIT
, pe_result
);
684 if (pe_result
== KERN_RESOURCE_SHORTAGE
) {
686 vm_object_unlock(object
);
688 PMAP_ENTER(kernel_pmap
, addr
+ pg_offset
, mem
,
689 VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
,
690 ((flags
& KMA_KSTACK
) ? VM_MEM_STACK
: 0), TRUE
);
692 vm_object_lock(object
);
694 if (flags
& KMA_NOENCRYPT
) {
695 bzero(CAST_DOWN(void *, (addr
+ pg_offset
)), PAGE_SIZE
);
696 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem
));
699 vm_page_lock_queues();
700 vm_page_wire_count
+= page_count
;
701 vm_page_unlock_queues();
703 vm_object_unlock(object
);
709 vm_page_free_list(page_list
, FALSE
);
716 kernel_memory_depopulate(
723 vm_object_offset_t offset
, pg_offset
;
725 vm_page_t local_freeq
= NULL
;
727 assert((flags
& (KMA_COMPRESSOR
|KMA_KOBJECT
)) != (KMA_COMPRESSOR
|KMA_KOBJECT
));
729 if (flags
& KMA_COMPRESSOR
) {
731 object
= compressor_object
;
733 vm_object_lock(object
);
734 } else if (flags
& KMA_KOBJECT
) {
736 object
= kernel_object
;
738 vm_object_lock(object
);
743 * If it's not the kernel object, we need to:
749 panic("kernel_memory_depopulate(%p,0x%llx,0x%llx,0x%x): "
751 map
, (uint64_t) addr
, (uint64_t) size
, flags
);
753 pmap_protect(kernel_map
->pmap
, offset
, offset
+ size
, VM_PROT_NONE
);
757 pg_offset
+= PAGE_SIZE_64
) {
759 mem
= vm_page_lookup(object
, offset
+ pg_offset
);
763 if (mem
->vm_page_q_state
!= VM_PAGE_USED_BY_COMPRESSOR
)
764 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem
));
769 vm_page_remove(mem
, TRUE
);
772 assert(mem
->pageq
.next
== 0 && mem
->pageq
.prev
== 0);
773 assert((mem
->vm_page_q_state
== VM_PAGE_USED_BY_COMPRESSOR
) ||
774 (mem
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
));
776 mem
->vm_page_q_state
= VM_PAGE_NOT_ON_Q
;
777 mem
->snext
= local_freeq
;
780 vm_object_unlock(object
);
783 vm_page_free_list(local_freeq
, TRUE
);
789 * Allocate wired-down memory in the kernel's address map
790 * or a submap. The memory is not zero-filled.
799 return (kmem_alloc(map
, addrp
, size
, vm_tag_bt()));
810 return kmem_alloc_flags(map
, addrp
, size
, tag
, 0);
821 kern_return_t kr
= kernel_memory_allocate(map
, addrp
, size
, 0, flags
, tag
);
822 TRACE_MACHLEAKS(KMEM_ALLOC_CODE
, KMEM_ALLOC_CODE_2
, size
, *addrp
);
829 * Reallocate wired-down memory in the kernel's address map
830 * or a submap. Newly allocated pages are not zeroed.
831 * This can only be used on regions allocated with kmem_alloc.
833 * If successful, the pages in the old region are mapped twice.
834 * The old region is unchanged. Use kmem_free to get rid of it.
841 vm_offset_t
*newaddrp
,
846 vm_object_offset_t offset
;
847 vm_map_offset_t oldmapmin
;
848 vm_map_offset_t oldmapmax
;
849 vm_map_offset_t newmapaddr
;
850 vm_map_size_t oldmapsize
;
851 vm_map_size_t newmapsize
;
852 vm_map_entry_t oldentry
;
853 vm_map_entry_t newentry
;
857 oldmapmin
= vm_map_trunc_page(oldaddr
,
858 VM_MAP_PAGE_MASK(map
));
859 oldmapmax
= vm_map_round_page(oldaddr
+ oldsize
,
860 VM_MAP_PAGE_MASK(map
));
861 oldmapsize
= oldmapmax
- oldmapmin
;
862 newmapsize
= vm_map_round_page(newsize
,
863 VM_MAP_PAGE_MASK(map
));
867 * Find the VM object backing the old region.
872 if (!vm_map_lookup_entry(map
, oldmapmin
, &oldentry
))
873 panic("kmem_realloc");
874 object
= VME_OBJECT(oldentry
);
877 * Increase the size of the object and
878 * fill in the new region.
881 vm_object_reference(object
);
882 /* by grabbing the object lock before unlocking the map */
883 /* we guarantee that we will panic if more than one */
884 /* attempt is made to realloc a kmem_alloc'd area */
885 vm_object_lock(object
);
887 if (object
->vo_size
!= oldmapsize
)
888 panic("kmem_realloc");
889 object
->vo_size
= newmapsize
;
890 vm_object_unlock(object
);
892 /* allocate the new pages while expanded portion of the */
893 /* object is still not mapped */
894 kmem_alloc_pages(object
, vm_object_round_page(oldmapsize
),
895 vm_object_round_page(newmapsize
-oldmapsize
));
898 * Find space for the new region.
901 kr
= vm_map_find_space(map
, &newmapaddr
, newmapsize
,
902 (vm_map_offset_t
) 0, 0, &newentry
);
903 if (kr
!= KERN_SUCCESS
) {
904 vm_object_lock(object
);
905 for(offset
= oldmapsize
;
906 offset
< newmapsize
; offset
+= PAGE_SIZE
) {
907 if ((mem
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
911 object
->vo_size
= oldmapsize
;
912 vm_object_unlock(object
);
913 vm_object_deallocate(object
);
916 VME_OBJECT_SET(newentry
, object
);
917 VME_OFFSET_SET(newentry
, 0);
918 VME_ALIAS_SET(newentry
, tag
);
919 assert(newentry
->wired_count
== 0);
922 /* add an extra reference in case we have someone doing an */
923 /* unexpected deallocate */
924 vm_object_reference(object
);
927 kr
= vm_map_wire(map
, newmapaddr
, newmapaddr
+ newmapsize
,
928 VM_PROT_DEFAULT
| VM_PROT_MEMORY_TAG_MAKE(tag
), FALSE
);
929 if (KERN_SUCCESS
!= kr
) {
930 vm_map_remove(map
, newmapaddr
, newmapaddr
+ newmapsize
, 0);
931 vm_object_lock(object
);
932 for(offset
= oldsize
; offset
< newmapsize
; offset
+= PAGE_SIZE
) {
933 if ((mem
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
937 object
->vo_size
= oldmapsize
;
938 vm_object_unlock(object
);
939 vm_object_deallocate(object
);
942 vm_object_deallocate(object
);
944 *newaddrp
= CAST_DOWN(vm_offset_t
, newmapaddr
);
949 * kmem_alloc_kobject:
951 * Allocate wired-down memory in the kernel's address map
952 * or a submap. The memory is not zero-filled.
954 * The memory is allocated in the kernel_object.
955 * It may not be copied with vm_map_copy, and
956 * it may not be reallocated with kmem_realloc.
960 kmem_alloc_kobject_external(
965 return (kmem_alloc_kobject(map
, addrp
, size
, vm_tag_bt()));
975 return kernel_memory_allocate(map
, addrp
, size
, 0, KMA_KOBJECT
, tag
);
979 * kmem_alloc_aligned:
981 * Like kmem_alloc_kobject, except that the memory is aligned.
982 * The size should be a power-of-2.
992 if ((size
& (size
- 1)) != 0)
993 panic("kmem_alloc_aligned: size not aligned");
994 return kernel_memory_allocate(map
, addrp
, size
, size
- 1, KMA_KOBJECT
, tag
);
998 * kmem_alloc_pageable:
1000 * Allocate pageable memory in the kernel's address map.
1004 kmem_alloc_pageable_external(
1009 return (kmem_alloc_pageable(map
, addrp
, size
, vm_tag_bt()));
1013 kmem_alloc_pageable(
1019 vm_map_offset_t map_addr
;
1020 vm_map_size_t map_size
;
1024 map_addr
= (vm_map_min(map
)) + PAGE_SIZE
;
1026 map_addr
= vm_map_min(map
);
1028 map_size
= vm_map_round_page(size
,
1029 VM_MAP_PAGE_MASK(map
));
1031 kr
= vm_map_enter(map
, &map_addr
, map_size
,
1032 (vm_map_offset_t
) 0,
1033 VM_FLAGS_ANYWHERE
| VM_MAKE_TAG(tag
),
1034 VM_OBJECT_NULL
, (vm_object_offset_t
) 0, FALSE
,
1035 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
1037 if (kr
!= KERN_SUCCESS
)
1040 *addrp
= CAST_DOWN(vm_offset_t
, map_addr
);
1041 return KERN_SUCCESS
;
1047 * Release a region of kernel virtual memory allocated
1048 * with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable,
1049 * and return the physical pages associated with that region.
1060 assert(addr
>= VM_MIN_KERNEL_AND_KEXT_ADDRESS
);
1062 TRACE_MACHLEAKS(KMEM_FREE_CODE
, KMEM_FREE_CODE_2
, size
, addr
);
1066 printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n",map
,(uint64_t)addr
);
1071 kr
= vm_map_remove(map
,
1072 vm_map_trunc_page(addr
,
1073 VM_MAP_PAGE_MASK(map
)),
1074 vm_map_round_page(addr
+ size
,
1075 VM_MAP_PAGE_MASK(map
)),
1076 VM_MAP_REMOVE_KUNWIRE
);
1077 if (kr
!= KERN_SUCCESS
)
1082 * Allocate new pages in an object.
1088 vm_object_offset_t offset
,
1089 vm_object_size_t size
)
1091 vm_object_size_t alloc_size
;
1093 alloc_size
= vm_object_round_page(size
);
1094 vm_object_lock(object
);
1095 while (alloc_size
) {
1102 while (VM_PAGE_NULL
==
1103 (mem
= vm_page_alloc(object
, offset
))) {
1104 vm_object_unlock(object
);
1106 vm_object_lock(object
);
1110 alloc_size
-= PAGE_SIZE
;
1111 offset
+= PAGE_SIZE
;
1113 vm_object_unlock(object
);
1114 return KERN_SUCCESS
;
1120 * Allocates a map to manage a subrange
1121 * of the kernel virtual address space.
1123 * Arguments are as follows:
1125 * parent Map to take range from
1126 * addr Address of start of range (IN/OUT)
1127 * size Size of range to find
1128 * pageable Can region be paged
1129 * anywhere Can region be located anywhere in map
1130 * new_map Pointer to new submap
1142 vm_map_offset_t map_addr
;
1143 vm_map_size_t map_size
;
1146 map_size
= vm_map_round_page(size
,
1147 VM_MAP_PAGE_MASK(parent
));
1150 * Need reference on submap object because it is internal
1151 * to the vm_system. vm_object_enter will never be called
1152 * on it (usual source of reference for vm_map_enter).
1154 vm_object_reference(vm_submap_object
);
1156 map_addr
= ((flags
& VM_FLAGS_ANYWHERE
)
1157 ? vm_map_min(parent
)
1158 : vm_map_trunc_page(*addr
,
1159 VM_MAP_PAGE_MASK(parent
)));
1161 kr
= vm_map_enter(parent
, &map_addr
, map_size
,
1162 (vm_map_offset_t
) 0, flags
,
1163 vm_submap_object
, (vm_object_offset_t
) 0, FALSE
,
1164 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
1165 if (kr
!= KERN_SUCCESS
) {
1166 vm_object_deallocate(vm_submap_object
);
1170 pmap_reference(vm_map_pmap(parent
));
1171 map
= vm_map_create(vm_map_pmap(parent
), map_addr
, map_addr
+ map_size
, pageable
);
1172 if (map
== VM_MAP_NULL
)
1173 panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */
1174 /* inherit the parent map's page size */
1175 vm_map_set_page_shift(map
, VM_MAP_PAGE_SHIFT(parent
));
1177 kr
= vm_map_submap(parent
, map_addr
, map_addr
+ map_size
, map
, map_addr
, FALSE
);
1178 if (kr
!= KERN_SUCCESS
) {
1180 * See comment preceding vm_map_submap().
1182 vm_map_remove(parent
, map_addr
, map_addr
+ map_size
, VM_MAP_NO_FLAGS
);
1183 vm_map_deallocate(map
); /* also removes ref to pmap */
1184 vm_object_deallocate(vm_submap_object
);
1187 *addr
= CAST_DOWN(vm_offset_t
, map_addr
);
1189 return (KERN_SUCCESS
);
1195 * Initialize the kernel's virtual memory map, taking
1196 * into account all memory allocated up to this time.
1203 vm_map_offset_t map_start
;
1204 vm_map_offset_t map_end
;
1206 map_start
= vm_map_trunc_page(start
,
1207 VM_MAP_PAGE_MASK(kernel_map
));
1208 map_end
= vm_map_round_page(end
,
1209 VM_MAP_PAGE_MASK(kernel_map
));
1211 kernel_map
= vm_map_create(pmap_kernel(),VM_MIN_KERNEL_AND_KEXT_ADDRESS
,
1214 * Reserve virtual memory allocated up to this time.
1216 if (start
!= VM_MIN_KERNEL_AND_KEXT_ADDRESS
) {
1217 vm_map_offset_t map_addr
;
1220 map_addr
= VM_MIN_KERNEL_AND_KEXT_ADDRESS
;
1221 kr
= vm_map_enter(kernel_map
,
1223 (vm_map_size_t
)(map_start
- VM_MIN_KERNEL_AND_KEXT_ADDRESS
),
1224 (vm_map_offset_t
) 0,
1225 VM_FLAGS_FIXED
| VM_FLAGS_NO_PMAP_CHECK
,
1227 (vm_object_offset_t
) 0, FALSE
,
1228 VM_PROT_NONE
, VM_PROT_NONE
,
1229 VM_INHERIT_DEFAULT
);
1231 if (kr
!= KERN_SUCCESS
) {
1232 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n",
1233 (uint64_t) start
, (uint64_t) end
,
1234 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS
,
1235 (uint64_t) (map_start
- VM_MIN_KERNEL_AND_KEXT_ADDRESS
),
1241 * Set the default global user wire limit which limits the amount of
1242 * memory that can be locked via mlock(). We set this to the total
1243 * amount of memory that are potentially usable by a user app (max_mem)
1244 * minus a certain amount. This can be overridden via a sysctl.
1246 vm_global_no_user_wire_amount
= MIN(max_mem
*20/100,
1247 VM_NOT_USER_WIREABLE
);
1248 vm_global_user_wire_limit
= max_mem
- vm_global_no_user_wire_amount
;
1250 /* the default per user limit is the same as the global limit */
1251 vm_user_wire_limit
= vm_global_user_wire_limit
;
1256 * Routine: copyinmap
1258 * Like copyin, except that fromaddr is an address
1259 * in the specified VM map. This implementation
1260 * is incomplete; it handles the current user map
1261 * and the kernel map/submaps.
1266 vm_map_offset_t fromaddr
,
1270 kern_return_t kr
= KERN_SUCCESS
;
1273 if (vm_map_pmap(map
) == pmap_kernel())
1275 /* assume a correct copy */
1276 memcpy(todata
, CAST_DOWN(void *, fromaddr
), length
);
1278 else if (current_map() == map
)
1280 if (copyin(fromaddr
, todata
, length
) != 0)
1281 kr
= KERN_INVALID_ADDRESS
;
1285 vm_map_reference(map
);
1286 oldmap
= vm_map_switch(map
);
1287 if (copyin(fromaddr
, todata
, length
) != 0)
1288 kr
= KERN_INVALID_ADDRESS
;
1289 vm_map_switch(oldmap
);
1290 vm_map_deallocate(map
);
1296 * Routine: copyoutmap
1298 * Like copyout, except that toaddr is an address
1299 * in the specified VM map. This implementation
1300 * is incomplete; it handles the current user map
1301 * and the kernel map/submaps.
1307 vm_map_address_t toaddr
,
1310 if (vm_map_pmap(map
) == pmap_kernel()) {
1311 /* assume a correct copy */
1312 memcpy(CAST_DOWN(void *, toaddr
), fromdata
, length
);
1313 return KERN_SUCCESS
;
1316 if (current_map() != map
)
1317 return KERN_NOT_SUPPORTED
;
1319 if (copyout(fromdata
, toaddr
, length
) != 0)
1320 return KERN_INVALID_ADDRESS
;
1322 return KERN_SUCCESS
;
1329 vm_map_offset_t off
,
1331 memory_object_t pager
,
1332 vm_object_offset_t file_off
)
1334 vm_map_entry_t entry
;
1336 vm_object_offset_t obj_off
;
1338 vm_map_offset_t base_offset
;
1339 vm_map_offset_t original_offset
;
1341 vm_map_size_t local_len
;
1345 original_offset
= off
;
1348 while(vm_map_lookup_entry(map
, off
, &entry
)) {
1351 if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
1353 return KERN_SUCCESS
;
1355 if (entry
->is_sub_map
) {
1359 vm_map_lock(VME_SUBMAP(entry
));
1360 map
= VME_SUBMAP(entry
);
1361 off
= VME_OFFSET(entry
) + (off
- entry
->vme_start
);
1362 vm_map_unlock(old_map
);
1365 obj
= VME_OBJECT(entry
);
1366 obj_off
= (off
- entry
->vme_start
) + VME_OFFSET(entry
);
1367 while(obj
->shadow
) {
1368 obj_off
+= obj
->vo_shadow_offset
;
1371 if((obj
->pager_created
) && (obj
->pager
== pager
)) {
1372 if(((obj
->paging_offset
) + obj_off
) == file_off
) {
1373 if(off
!= base_offset
) {
1375 return KERN_FAILURE
;
1377 kr
= KERN_ALREADY_WAITING
;
1379 vm_object_offset_t obj_off_aligned
;
1380 vm_object_offset_t file_off_aligned
;
1382 obj_off_aligned
= obj_off
& ~PAGE_MASK
;
1383 file_off_aligned
= file_off
& ~PAGE_MASK
;
1385 if (file_off_aligned
== (obj
->paging_offset
+ obj_off_aligned
)) {
1387 * the target map and the file offset start in the same page
1388 * but are not identical...
1391 return KERN_FAILURE
;
1393 if ((file_off
< (obj
->paging_offset
+ obj_off_aligned
)) &&
1394 ((file_off
+ len
) > (obj
->paging_offset
+ obj_off_aligned
))) {
1396 * some portion of the tail of the I/O will fall
1397 * within the encompass of the target map
1400 return KERN_FAILURE
;
1402 if ((file_off_aligned
> (obj
->paging_offset
+ obj_off
)) &&
1403 (file_off_aligned
< (obj
->paging_offset
+ obj_off
) + len
)) {
1405 * the beginning page of the file offset falls within
1406 * the target map's encompass
1409 return KERN_FAILURE
;
1412 } else if(kr
!= KERN_SUCCESS
) {
1414 return KERN_FAILURE
;
1417 if(len
<= ((entry
->vme_end
- entry
->vme_start
) -
1418 (off
- entry
->vme_start
))) {
1422 len
-= (entry
->vme_end
- entry
->vme_start
) -
1423 (off
- entry
->vme_start
);
1425 base_offset
= base_offset
+ (local_len
- len
);
1426 file_off
= file_off
+ (local_len
- len
);
1428 if(map
!= base_map
) {
1430 vm_map_lock(base_map
);
1441 * The following two functions are to be used when exposing kernel
1442 * addresses to userspace via any of the various debug or info
1443 * facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
1444 * and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
1445 * are exported to KEXTs.
1447 * NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
1451 * vm_kernel_addrperm_external:
1453 * Used when exposing an address to userspace which is in the kernel's
1454 * "heap". These addresses are not loaded from anywhere and are resultingly
1455 * unslid. We apply a permutation value to obscure the address.
1458 vm_kernel_addrperm_external(
1460 vm_offset_t
*perm_addr
)
1467 *perm_addr
= (addr
+ vm_kernel_addrperm_ext
);
1472 * vm_kernel_unslide_or_perm_external:
1474 * Use this macro when exposing an address to userspace that could come from
1475 * either kernel text/data *or* the heap.
1478 vm_kernel_unslide_or_perm_external(
1480 vm_offset_t
*up_addr
)
1482 if (VM_KERNEL_IS_SLID(addr
)) {
1483 *up_addr
= addr
- vm_kernel_slide
;
1487 vm_kernel_addrperm_external(addr
, up_addr
);