2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 * Carnegie Mellon requests users of this software to return to
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * Kernel memory management.
60 #include <mach/kern_return.h>
61 #include <mach/vm_param.h>
62 #include <kern/assert.h>
63 #include <kern/lock.h>
64 #include <kern/thread.h>
65 #include <vm/vm_kern.h>
66 #include <vm/vm_map.h>
67 #include <vm/vm_object.h>
68 #include <vm/vm_page.h>
69 #include <vm/vm_pageout.h>
70 #include <kern/misc_protos.h>
75 * Variables exported by this module.
79 vm_map_t kernel_pageable_map
;
82 * Forward declarations for internal functions.
84 extern kern_return_t
kmem_alloc_pages(
85 register vm_object_t object
,
86 register vm_object_offset_t offset
,
87 register vm_object_size_t size
);
89 extern void kmem_remap_pages(
90 register vm_object_t object
,
91 register vm_object_offset_t offset
,
92 register vm_offset_t start
,
93 register vm_offset_t end
,
94 vm_prot_t protection
);
105 vm_object_offset_t offset
;
106 vm_map_offset_t map_addr
;
107 vm_map_offset_t map_mask
;
108 vm_map_size_t map_size
, i
;
109 vm_map_entry_t entry
;
113 if (map
== VM_MAP_NULL
|| (flags
&& (flags
^ KMA_KOBJECT
)))
114 return KERN_INVALID_ARGUMENT
;
118 return KERN_INVALID_ARGUMENT
;
121 map_size
= vm_map_round_page(size
);
122 map_mask
= (vm_map_offset_t
)mask
;
125 * Allocate a new object (if necessary) and the reference we
126 * will be donating to the map entry. We must do this before
127 * locking the map, or risk deadlock with the default pager.
129 if ((flags
& KMA_KOBJECT
) != 0) {
130 object
= kernel_object
;
131 vm_object_reference(object
);
133 object
= vm_object_allocate(map_size
);
136 kr
= vm_map_find_space(map
, &map_addr
, map_size
, map_mask
, &entry
);
137 if (KERN_SUCCESS
!= kr
) {
138 vm_object_deallocate(object
);
142 entry
->object
.vm_object
= object
;
143 entry
->offset
= offset
= (object
== kernel_object
) ?
144 map_addr
- VM_MIN_KERNEL_ADDRESS
: 0;
146 /* Take an extra object ref in case the map entry gets deleted */
147 vm_object_reference(object
);
150 kr
= cpm_allocate(CAST_DOWN(vm_size_t
, map_size
), &pages
, FALSE
);
152 if (kr
!= KERN_SUCCESS
) {
153 vm_map_remove(map
, vm_map_trunc_page(map_addr
),
154 vm_map_round_page(map_addr
+ map_size
), 0);
155 vm_object_deallocate(object
);
160 vm_object_lock(object
);
161 for (i
= 0; i
< map_size
; i
+= PAGE_SIZE
) {
163 pages
= NEXT_PAGE(m
);
165 vm_page_insert(m
, object
, offset
+ i
);
167 vm_object_unlock(object
);
169 if ((kr
= vm_map_wire(map
, vm_map_trunc_page(map_addr
),
170 vm_map_round_page(map_addr
+ map_size
), VM_PROT_DEFAULT
, FALSE
))
172 if (object
== kernel_object
) {
173 vm_object_lock(object
);
174 vm_object_page_remove(object
, offset
, offset
+ map_size
);
175 vm_object_unlock(object
);
177 vm_map_remove(map
, vm_map_trunc_page(map_addr
),
178 vm_map_round_page(map_addr
+ map_size
), 0);
179 vm_object_deallocate(object
);
182 vm_object_deallocate(object
);
184 if (object
== kernel_object
)
185 vm_map_simplify(map
, map_addr
);
192 * Master entry point for allocating kernel memory.
193 * NOTE: this routine is _never_ interrupt safe.
195 * map : map to allocate into
196 * addrp : pointer to start address of new memory
197 * size : size of memory requested
199 * KMA_HERE *addrp is base address, else "anywhere"
200 * KMA_NOPAGEWAIT don't wait for pages if unavailable
201 * KMA_KOBJECT use kernel_object
205 kernel_memory_allocate(
206 register vm_map_t map
,
207 register vm_offset_t
*addrp
,
208 register vm_size_t size
,
209 register vm_offset_t mask
,
213 vm_object_offset_t offset
;
214 vm_map_entry_t entry
;
215 vm_map_offset_t map_addr
;
216 vm_map_offset_t map_mask
;
217 vm_map_size_t map_size
;
223 return KERN_INVALID_ARGUMENT
;
226 map_size
= vm_map_round_page(size
);
227 map_mask
= (vm_map_offset_t
) mask
;
230 * Allocate a new object (if necessary). We must do this before
231 * locking the map, or risk deadlock with the default pager.
233 if ((flags
& KMA_KOBJECT
) != 0) {
234 object
= kernel_object
;
235 vm_object_reference(object
);
237 object
= vm_object_allocate(map_size
);
240 kr
= vm_map_find_space(map
, &map_addr
, map_size
, map_mask
, &entry
);
241 if (KERN_SUCCESS
!= kr
) {
242 vm_object_deallocate(object
);
246 entry
->object
.vm_object
= object
;
247 entry
->offset
= offset
= (object
== kernel_object
) ?
248 map_addr
- VM_MIN_KERNEL_ADDRESS
: 0;
250 vm_object_reference(object
);
253 vm_object_lock(object
);
254 for (i
= 0; i
< map_size
; i
+= PAGE_SIZE
) {
257 while (VM_PAGE_NULL
==
258 (mem
= vm_page_alloc(object
, offset
+ i
))) {
259 if (flags
& KMA_NOPAGEWAIT
) {
260 if (object
== kernel_object
)
261 vm_object_page_remove(object
, offset
, offset
+ i
);
262 vm_object_unlock(object
);
263 vm_map_remove(map
, map_addr
, map_addr
+ map_size
, 0);
264 vm_object_deallocate(object
);
265 return KERN_RESOURCE_SHORTAGE
;
267 vm_object_unlock(object
);
269 vm_object_lock(object
);
273 vm_object_unlock(object
);
275 if ((kr
= vm_map_wire(map
, map_addr
, map_addr
+ map_size
, VM_PROT_DEFAULT
, FALSE
))
277 if (object
== kernel_object
) {
278 vm_object_lock(object
);
279 vm_object_page_remove(object
, offset
, offset
+ map_size
);
280 vm_object_unlock(object
);
282 vm_map_remove(map
, map_addr
, map_addr
+ map_size
, 0);
283 vm_object_deallocate(object
);
286 /* now that the page is wired, we no longer have to fear coalesce */
287 vm_object_deallocate(object
);
288 if (object
== kernel_object
)
289 vm_map_simplify(map
, map_addr
);
292 * Return the memory, not zeroed.
294 *addrp
= CAST_DOWN(vm_offset_t
, map_addr
);
301 * Allocate wired-down memory in the kernel's address map
302 * or a submap. The memory is not zero-filled.
311 return kernel_memory_allocate(map
, addrp
, size
, 0, 0);
317 * Reallocate wired-down memory in the kernel's address map
318 * or a submap. Newly allocated pages are not zeroed.
319 * This can only be used on regions allocated with kmem_alloc.
321 * If successful, the pages in the old region are mapped twice.
322 * The old region is unchanged. Use kmem_free to get rid of it.
329 vm_offset_t
*newaddrp
,
333 vm_object_offset_t offset
;
334 vm_map_offset_t oldmapmin
;
335 vm_map_offset_t oldmapmax
;
336 vm_map_offset_t newmapaddr
;
337 vm_map_size_t oldmapsize
;
338 vm_map_size_t newmapsize
;
339 vm_map_entry_t oldentry
;
340 vm_map_entry_t newentry
;
344 oldmapmin
= vm_map_trunc_page(oldaddr
);
345 oldmapmax
= vm_map_round_page(oldaddr
+ oldsize
);
346 oldmapsize
= oldmapmax
- oldmapmin
;
347 newmapsize
= vm_map_round_page(newsize
);
351 * Find the VM object backing the old region.
356 if (!vm_map_lookup_entry(map
, oldmapmin
, &oldentry
))
357 panic("kmem_realloc");
358 object
= oldentry
->object
.vm_object
;
361 * Increase the size of the object and
362 * fill in the new region.
365 vm_object_reference(object
);
366 /* by grabbing the object lock before unlocking the map */
367 /* we guarantee that we will panic if more than one */
368 /* attempt is made to realloc a kmem_alloc'd area */
369 vm_object_lock(object
);
371 if (object
->size
!= oldmapsize
)
372 panic("kmem_realloc");
373 object
->size
= newmapsize
;
374 vm_object_unlock(object
);
376 /* allocate the new pages while expanded portion of the */
377 /* object is still not mapped */
378 kmem_alloc_pages(object
, vm_object_round_page(oldmapsize
),
379 vm_object_round_page(newmapsize
-oldmapsize
));
382 * Find space for the new region.
385 kr
= vm_map_find_space(map
, &newmapaddr
, newmapsize
,
386 (vm_map_offset_t
) 0, &newentry
);
387 if (kr
!= KERN_SUCCESS
) {
388 vm_object_lock(object
);
389 for(offset
= oldmapsize
;
390 offset
< newmapsize
; offset
+= PAGE_SIZE
) {
391 if ((mem
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
392 vm_page_lock_queues();
394 vm_page_unlock_queues();
397 object
->size
= oldmapsize
;
398 vm_object_unlock(object
);
399 vm_object_deallocate(object
);
402 newentry
->object
.vm_object
= object
;
403 newentry
->offset
= 0;
404 assert (newentry
->wired_count
== 0);
407 /* add an extra reference in case we have someone doing an */
408 /* unexpected deallocate */
409 vm_object_reference(object
);
412 kr
= vm_map_wire(map
, newmapaddr
, newmapaddr
+ newmapsize
, VM_PROT_DEFAULT
, FALSE
);
413 if (KERN_SUCCESS
!= kr
) {
414 vm_map_remove(map
, newmapaddr
, newmapaddr
+ newmapsize
, 0);
415 vm_object_lock(object
);
416 for(offset
= oldsize
; offset
< newmapsize
; offset
+= PAGE_SIZE
) {
417 if ((mem
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
418 vm_page_lock_queues();
420 vm_page_unlock_queues();
423 object
->size
= oldmapsize
;
424 vm_object_unlock(object
);
425 vm_object_deallocate(object
);
428 vm_object_deallocate(object
);
430 *newaddrp
= CAST_DOWN(vm_offset_t
, newmapaddr
);
437 * Allocate wired-down memory in the kernel's address map
438 * or a submap. The memory is not zero-filled.
440 * The memory is allocated in the kernel_object.
441 * It may not be copied with vm_map_copy, and
442 * it may not be reallocated with kmem_realloc.
451 return kernel_memory_allocate(map
, addrp
, size
, 0, KMA_KOBJECT
);
455 * kmem_alloc_aligned:
457 * Like kmem_alloc_wired, except that the memory is aligned.
458 * The size should be a power-of-2.
467 if ((size
& (size
- 1)) != 0)
468 panic("kmem_alloc_aligned: size not aligned");
469 return kernel_memory_allocate(map
, addrp
, size
, size
- 1, KMA_KOBJECT
);
473 * kmem_alloc_pageable:
475 * Allocate pageable memory in the kernel's address map.
484 vm_map_offset_t map_addr
;
485 vm_map_size_t map_size
;
489 map_addr
= (vm_map_min(map
)) + 0x1000;
491 map_addr
= vm_map_min(map
);
493 map_size
= vm_map_round_page(size
);
495 kr
= vm_map_enter(map
, &map_addr
, map_size
,
496 (vm_map_offset_t
) 0, VM_FLAGS_ANYWHERE
,
497 VM_OBJECT_NULL
, (vm_object_offset_t
) 0, FALSE
,
498 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
500 if (kr
!= KERN_SUCCESS
)
503 *addrp
= CAST_DOWN(vm_offset_t
, map_addr
);
510 * Release a region of kernel virtual memory allocated
511 * with kmem_alloc, kmem_alloc_wired, or kmem_alloc_pageable,
512 * and return the physical pages associated with that region.
523 kr
= vm_map_remove(map
, vm_map_trunc_page(addr
),
524 vm_map_round_page(addr
+ size
),
525 VM_MAP_REMOVE_KUNWIRE
);
526 if (kr
!= KERN_SUCCESS
)
531 * Allocate new pages in an object.
536 register vm_object_t object
,
537 register vm_object_offset_t offset
,
538 register vm_object_size_t size
)
540 vm_object_size_t alloc_size
;
542 alloc_size
= vm_object_round_page(size
);
543 vm_object_lock(object
);
545 register vm_page_t mem
;
551 while (VM_PAGE_NULL
==
552 (mem
= vm_page_alloc(object
, offset
))) {
553 vm_object_unlock(object
);
555 vm_object_lock(object
);
559 alloc_size
-= PAGE_SIZE
;
562 vm_object_unlock(object
);
567 * Remap wired pages in an object into a new region.
568 * The object is assumed to be mapped into the kernel map or
573 register vm_object_t object
,
574 register vm_object_offset_t offset
,
575 register vm_offset_t start
,
576 register vm_offset_t end
,
577 vm_prot_t protection
)
580 vm_map_offset_t map_start
;
581 vm_map_offset_t map_end
;
584 * Mark the pmap region as not pageable.
586 map_start
= vm_map_trunc_page(start
);
587 map_end
= vm_map_round_page(end
);
589 pmap_pageable(kernel_pmap
, map_start
, map_end
, FALSE
);
591 while (map_start
< map_end
) {
592 register vm_page_t mem
;
594 vm_object_lock(object
);
599 if ((mem
= vm_page_lookup(object
, offset
)) == VM_PAGE_NULL
)
600 panic("kmem_remap_pages");
603 * Wire it down (again)
605 vm_page_lock_queues();
607 vm_page_unlock_queues();
608 vm_object_unlock(object
);
612 * The page is supposed to be wired now, so it
613 * shouldn't be encrypted at this point. It can
614 * safely be entered in the page table.
616 ASSERT_PAGE_DECRYPTED(mem
);
619 * Enter it in the kernel pmap. The page isn't busy,
620 * but this shouldn't be a problem because it is wired.
622 PMAP_ENTER(kernel_pmap
, map_start
, mem
, protection
,
623 ((unsigned int)(mem
->object
->wimg_bits
))
627 map_start
+= PAGE_SIZE
;
635 * Allocates a map to manage a subrange
636 * of the kernel virtual address space.
638 * Arguments are as follows:
640 * parent Map to take range from
641 * addr Address of start of range (IN/OUT)
642 * size Size of range to find
643 * pageable Can region be paged
644 * anywhere Can region be located anywhere in map
645 * new_map Pointer to new submap
657 vm_map_offset_t map_addr
;
658 vm_map_size_t map_size
;
661 map_size
= vm_map_round_page(size
);
664 * Need reference on submap object because it is internal
665 * to the vm_system. vm_object_enter will never be called
666 * on it (usual source of reference for vm_map_enter).
668 vm_object_reference(vm_submap_object
);
670 map_addr
= (flags
& VM_FLAGS_ANYWHERE
) ?
671 vm_map_min(parent
) : vm_map_trunc_page(*addr
);
673 kr
= vm_map_enter(parent
, &map_addr
, map_size
,
674 (vm_map_offset_t
) 0, flags
,
675 vm_submap_object
, (vm_object_offset_t
) 0, FALSE
,
676 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
677 if (kr
!= KERN_SUCCESS
) {
678 vm_object_deallocate(vm_submap_object
);
682 pmap_reference(vm_map_pmap(parent
));
683 map
= vm_map_create(vm_map_pmap(parent
), map_addr
, map_addr
+ map_size
, pageable
);
684 if (map
== VM_MAP_NULL
)
685 panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */
687 kr
= vm_map_submap(parent
, map_addr
, map_addr
+ map_size
, map
, map_addr
, FALSE
);
688 if (kr
!= KERN_SUCCESS
) {
690 * See comment preceding vm_map_submap().
692 vm_map_remove(parent
, map_addr
, map_addr
+ map_size
, VM_MAP_NO_FLAGS
);
693 vm_map_deallocate(map
); /* also removes ref to pmap */
694 vm_object_deallocate(vm_submap_object
);
697 *addr
= CAST_DOWN(vm_offset_t
, map_addr
);
699 return (KERN_SUCCESS
);
705 * Initialize the kernel's virtual memory map, taking
706 * into account all memory allocated up to this time.
713 vm_map_offset_t map_start
;
714 vm_map_offset_t map_end
;
716 map_start
= vm_map_trunc_page(start
);
717 map_end
= vm_map_round_page(end
);
719 kernel_map
= vm_map_create(pmap_kernel(),VM_MIN_KERNEL_ADDRESS
,
723 * Reserve virtual memory allocated up to this time.
726 if (start
!= VM_MIN_KERNEL_ADDRESS
) {
727 vm_map_offset_t map_addr
;
729 map_addr
= VM_MIN_KERNEL_ADDRESS
;
730 (void) vm_map_enter(kernel_map
,
732 (vm_map_size_t
)(map_start
- VM_MIN_KERNEL_ADDRESS
),
734 VM_FLAGS_ANYWHERE
| VM_FLAGS_NO_PMAP_CHECK
,
736 (vm_object_offset_t
) 0, FALSE
,
737 VM_PROT_DEFAULT
, VM_PROT_ALL
,
742 * Account for kernel memory (text, data, bss, vm shenanigans).
743 * This may include inaccessible "holes" as determined by what
744 * the machine-dependent init code includes in max_mem.
746 vm_page_wire_count
= (atop_64(max_mem
) - (vm_page_free_count
747 + vm_page_active_count
748 + vm_page_inactive_count
));
755 * Like copyin, except that fromaddr is an address
756 * in the specified VM map. This implementation
757 * is incomplete; it handles the current user map
758 * and the kernel map/submaps.
763 vm_map_offset_t fromaddr
,
767 kern_return_t kr
= KERN_SUCCESS
;
770 if (vm_map_pmap(map
) == pmap_kernel())
772 /* assume a correct copy */
773 memcpy(todata
, CAST_DOWN(void *, fromaddr
), length
);
775 else if (current_map() == map
)
777 if (copyin(fromaddr
, todata
, length
) != 0)
778 kr
= KERN_INVALID_ADDRESS
;
782 vm_map_reference(map
);
783 oldmap
= vm_map_switch(map
);
784 if (copyin(fromaddr
, todata
, length
) != 0)
785 kr
= KERN_INVALID_ADDRESS
;
786 vm_map_switch(oldmap
);
787 vm_map_deallocate(map
);
793 * Routine: copyoutmap
795 * Like copyout, except that toaddr is an address
796 * in the specified VM map. This implementation
797 * is incomplete; it handles the current user map
798 * and the kernel map/submaps.
804 vm_map_address_t toaddr
,
807 if (vm_map_pmap(map
) == pmap_kernel()) {
808 /* assume a correct copy */
809 memcpy(CAST_DOWN(void *, toaddr
), fromdata
, length
);
813 if (current_map() != map
)
814 return KERN_NOT_SUPPORTED
;
816 if (copyout(fromdata
, toaddr
, length
) != 0)
817 return KERN_INVALID_ADDRESS
;
828 memory_object_t pager
,
829 vm_object_offset_t file_off
)
831 vm_map_entry_t entry
;
833 vm_object_offset_t obj_off
;
835 vm_map_offset_t base_offset
;
836 vm_map_offset_t original_offset
;
838 vm_map_size_t local_len
;
842 original_offset
= off
;
845 while(vm_map_lookup_entry(map
, off
, &entry
)) {
848 if (entry
->object
.vm_object
== VM_OBJECT_NULL
) {
852 if (entry
->is_sub_map
) {
856 vm_map_lock(entry
->object
.sub_map
);
857 map
= entry
->object
.sub_map
;
858 off
= entry
->offset
+ (off
- entry
->vme_start
);
859 vm_map_unlock(old_map
);
862 obj
= entry
->object
.vm_object
;
863 obj_off
= (off
- entry
->vme_start
) + entry
->offset
;
865 obj_off
+= obj
->shadow_offset
;
868 if((obj
->pager_created
) && (obj
->pager
== pager
)) {
869 if(((obj
->paging_offset
) + obj_off
) == file_off
) {
870 if(off
!= base_offset
) {
874 kr
= KERN_ALREADY_WAITING
;
876 vm_object_offset_t obj_off_aligned
;
877 vm_object_offset_t file_off_aligned
;
879 obj_off_aligned
= obj_off
& ~PAGE_MASK
;
880 file_off_aligned
= file_off
& ~PAGE_MASK
;
882 if (file_off_aligned
== (obj
->paging_offset
+ obj_off_aligned
)) {
884 * the target map and the file offset start in the same page
885 * but are not identical...
890 if ((file_off
< (obj
->paging_offset
+ obj_off_aligned
)) &&
891 ((file_off
+ len
) > (obj
->paging_offset
+ obj_off_aligned
))) {
893 * some portion of the tail of the I/O will fall
894 * within the encompass of the target map
899 if ((file_off_aligned
> (obj
->paging_offset
+ obj_off
)) &&
900 (file_off_aligned
< (obj
->paging_offset
+ obj_off
) + len
)) {
902 * the beginning page of the file offset falls within
903 * the target map's encompass
909 } else if(kr
!= KERN_SUCCESS
) {
914 if(len
<= ((entry
->vme_end
- entry
->vme_start
) -
915 (off
- entry
->vme_start
))) {
919 len
-= (entry
->vme_end
- entry
->vme_start
) -
920 (off
- entry
->vme_start
);
922 base_offset
= base_offset
+ (local_len
- len
);
923 file_off
= file_off
+ (local_len
- len
);
925 if(map
!= base_map
) {
927 vm_map_lock(base_map
);