2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 * Carnegie Mellon requests users of this software to return to
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * Kernel memory management.
61 #include <mach/kern_return.h>
62 #include <mach/vm_param.h>
63 #include <kern/assert.h>
64 #include <kern/lock.h>
65 #include <kern/thread.h>
66 #include <vm/vm_kern.h>
67 #include <vm/vm_map.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_pageout.h>
71 #include <kern/misc_protos.h>
76 * Variables exported by this module.
80 vm_map_t kernel_pageable_map
;
83 * Forward declarations for internal functions.
85 extern kern_return_t
kmem_alloc_pages(
86 register vm_object_t object
,
87 register vm_object_offset_t offset
,
88 register vm_offset_t start
,
89 register vm_offset_t end
,
90 vm_prot_t protection
);
92 extern void kmem_remap_pages(
93 register vm_object_t object
,
94 register vm_object_offset_t offset
,
95 register vm_offset_t start
,
96 register vm_offset_t end
,
97 vm_prot_t protection
);
111 vm_object_offset_t offset
;
112 vm_map_entry_t entry
;
114 if (map
== VM_MAP_NULL
|| (flags
&& (flags
^ KMA_KOBJECT
)))
115 return KERN_INVALID_ARGUMENT
;
119 return KERN_INVALID_ARGUMENT
;
122 size
= round_page(size
);
123 if ((flags
& KMA_KOBJECT
) == 0) {
124 object
= vm_object_allocate(size
);
125 kr
= vm_map_find_space(map
, &addr
, size
, mask
, &entry
);
128 object
= kernel_object
;
129 kr
= vm_map_find_space(map
, &addr
, size
, mask
, &entry
);
132 if ((flags
& KMA_KOBJECT
) == 0) {
133 entry
->object
.vm_object
= object
;
134 entry
->offset
= offset
= 0;
136 offset
= addr
- VM_MIN_KERNEL_ADDRESS
;
138 if (entry
->object
.vm_object
== VM_OBJECT_NULL
) {
139 vm_object_reference(object
);
140 entry
->object
.vm_object
= object
;
141 entry
->offset
= offset
;
145 if (kr
!= KERN_SUCCESS
) {
146 if ((flags
& KMA_KOBJECT
) == 0)
147 vm_object_deallocate(object
);
153 kr
= cpm_allocate(size
, &pages
, FALSE
);
155 if (kr
!= KERN_SUCCESS
) {
156 vm_map_remove(map
, addr
, addr
+ size
, 0);
161 vm_object_lock(object
);
162 for (i
= 0; i
< size
; i
+= PAGE_SIZE
) {
164 pages
= NEXT_PAGE(m
);
166 vm_page_insert(m
, object
, offset
+ i
);
168 vm_object_unlock(object
);
170 if ((kr
= vm_map_wire(map
, addr
, addr
+ size
, VM_PROT_DEFAULT
, FALSE
))
172 if (object
== kernel_object
) {
173 vm_object_lock(object
);
174 vm_object_page_remove(object
, offset
, offset
+ size
);
175 vm_object_unlock(object
);
177 vm_map_remove(map
, addr
, addr
+ size
, 0);
180 if (object
== kernel_object
)
181 vm_map_simplify(map
, addr
);
188 * Master entry point for allocating kernel memory.
189 * NOTE: this routine is _never_ interrupt safe.
191 * map : map to allocate into
192 * addrp : pointer to start address of new memory
193 * size : size of memory requested
195 * KMA_HERE *addrp is base address, else "anywhere"
196 * KMA_NOPAGEWAIT don't wait for pages if unavailable
197 * KMA_KOBJECT use kernel_object
201 kernel_memory_allocate(
202 register vm_map_t map
,
203 register vm_offset_t
*addrp
,
204 register vm_size_t size
,
205 register vm_offset_t mask
,
208 vm_object_t object
= VM_OBJECT_NULL
;
209 vm_map_entry_t entry
;
210 vm_object_offset_t offset
;
215 size
= round_page(size
);
216 if ((flags
& KMA_KOBJECT
) == 0) {
218 * Allocate a new object. We must do this before locking
219 * the map, or risk deadlock with the default pager:
220 * device_read_alloc uses kmem_alloc,
221 * which tries to allocate an object,
222 * which uses kmem_alloc_wired to get memory,
223 * which blocks for pages.
224 * then the default pager needs to read a block
225 * to process a memory_object_data_write,
226 * and device_read_alloc calls kmem_alloc
227 * and deadlocks on the map lock.
229 object
= vm_object_allocate(size
);
230 kr
= vm_map_find_space(map
, &addr
, size
, mask
, &entry
);
233 object
= kernel_object
;
234 kr
= vm_map_find_space(map
, &addr
, size
, mask
, &entry
);
236 if (kr
!= KERN_SUCCESS
) {
237 if ((flags
& KMA_KOBJECT
) == 0)
238 vm_object_deallocate(object
);
242 if ((flags
& KMA_KOBJECT
) == 0) {
243 entry
->object
.vm_object
= object
;
244 entry
->offset
= offset
= 0;
246 offset
= addr
- VM_MIN_KERNEL_ADDRESS
;
248 if (entry
->object
.vm_object
== VM_OBJECT_NULL
) {
249 vm_object_reference(object
);
250 entry
->object
.vm_object
= object
;
251 entry
->offset
= offset
;
256 * Since we have not given out this address yet,
257 * it is safe to unlock the map.
261 vm_object_lock(object
);
262 for (i
= 0; i
< size
; i
+= PAGE_SIZE
) {
265 while ((mem
= vm_page_alloc(object
,
266 offset
+ (vm_object_offset_t
)i
))
268 if (flags
& KMA_NOPAGEWAIT
) {
269 if (object
== kernel_object
)
270 vm_object_page_remove(object
, offset
,
271 offset
+ (vm_object_offset_t
)i
);
272 vm_object_unlock(object
);
273 vm_map_remove(map
, addr
, addr
+ size
, 0);
274 return KERN_RESOURCE_SHORTAGE
;
276 vm_object_unlock(object
);
278 vm_object_lock(object
);
282 vm_object_unlock(object
);
284 if ((kr
= vm_map_wire(map
, addr
, addr
+ size
, VM_PROT_DEFAULT
, FALSE
))
286 if (object
== kernel_object
) {
287 vm_object_lock(object
);
288 vm_object_page_remove(object
, offset
, offset
+ size
);
289 vm_object_unlock(object
);
291 vm_map_remove(map
, addr
, addr
+ size
, 0);
294 if (object
== kernel_object
)
295 vm_map_simplify(map
, addr
);
298 * Return the memory, not zeroed.
300 #if (NCPUS > 1) && i860
302 #endif /* #if (NCPUS > 1) && i860 */
310 * Allocate wired-down memory in the kernel's address map
311 * or a submap. The memory is not zero-filled.
320 return kernel_memory_allocate(map
, addrp
, size
, 0, 0);
326 * Reallocate wired-down memory in the kernel's address map
327 * or a submap. Newly allocated pages are not zeroed.
328 * This can only be used on regions allocated with kmem_alloc.
330 * If successful, the pages in the old region are mapped twice.
331 * The old region is unchanged. Use kmem_free to get rid of it.
338 vm_offset_t
*newaddrp
,
341 vm_offset_t oldmin
, oldmax
;
344 vm_map_entry_t oldentry
, newentry
;
347 oldmin
= trunc_page(oldaddr
);
348 oldmax
= round_page(oldaddr
+ oldsize
);
349 oldsize
= oldmax
- oldmin
;
350 newsize
= round_page(newsize
);
353 * Find space for the new region.
356 kr
= vm_map_find_space(map
, &newaddr
, newsize
, (vm_offset_t
) 0,
358 if (kr
!= KERN_SUCCESS
) {
363 * Find the VM object backing the old region.
366 if (!vm_map_lookup_entry(map
, oldmin
, &oldentry
))
367 panic("kmem_realloc");
368 object
= oldentry
->object
.vm_object
;
371 * Increase the size of the object and
372 * fill in the new region.
375 vm_object_reference(object
);
376 vm_object_lock(object
);
377 if (object
->size
!= oldsize
)
378 panic("kmem_realloc");
379 object
->size
= newsize
;
380 vm_object_unlock(object
);
382 newentry
->object
.vm_object
= object
;
383 newentry
->offset
= 0;
384 assert (newentry
->wired_count
== 0);
385 newentry
->wired_count
= 1;
388 * Since we have not given out this address yet,
389 * it is safe to unlock the map. We are trusting
390 * that nobody will play with either region.
396 * Remap the pages in the old region and
397 * allocate more pages for the new region.
400 kmem_remap_pages(object
, 0,
401 newaddr
, newaddr
+ oldsize
,
403 kmem_alloc_pages(object
, oldsize
,
404 newaddr
+ oldsize
, newaddr
+ newsize
,
414 * Allocate wired-down memory in the kernel's address map
415 * or a submap. The memory is not zero-filled.
417 * The memory is allocated in the kernel_object.
418 * It may not be copied with vm_map_copy, and
419 * it may not be reallocated with kmem_realloc.
428 return kernel_memory_allocate(map
, addrp
, size
, 0, KMA_KOBJECT
);
432 * kmem_alloc_aligned:
434 * Like kmem_alloc_wired, except that the memory is aligned.
435 * The size should be a power-of-2.
444 if ((size
& (size
- 1)) != 0)
445 panic("kmem_alloc_aligned: size not aligned");
446 return kernel_memory_allocate(map
, addrp
, size
, size
- 1, KMA_KOBJECT
);
450 * kmem_alloc_pageable:
452 * Allocate pageable memory in the kernel's address map.
465 addr
= (vm_map_min(map
)) + 0x1000;
467 addr
= vm_map_min(map
);
469 kr
= vm_map_enter(map
, &addr
, round_page(size
),
470 (vm_offset_t
) 0, TRUE
,
471 VM_OBJECT_NULL
, (vm_object_offset_t
) 0, FALSE
,
472 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
473 if (kr
!= KERN_SUCCESS
)
483 * Release a region of kernel virtual memory allocated
484 * with kmem_alloc, kmem_alloc_wired, or kmem_alloc_pageable,
485 * and return the physical pages associated with that region.
496 kr
= vm_map_remove(map
, trunc_page(addr
),
497 round_page(addr
+ size
), VM_MAP_REMOVE_KUNWIRE
);
498 if (kr
!= KERN_SUCCESS
)
503 * Allocate new wired pages in an object.
504 * The object is assumed to be mapped into the kernel map or
510 register vm_object_t object
,
511 register vm_object_offset_t offset
,
512 register vm_offset_t start
,
513 register vm_offset_t end
,
514 vm_prot_t protection
)
517 * Mark the pmap region as not pageable.
519 pmap_pageable(kernel_pmap
, start
, end
, FALSE
);
521 while (start
< end
) {
522 register vm_page_t mem
;
524 vm_object_lock(object
);
529 while ((mem
= vm_page_alloc(object
, offset
))
531 vm_object_unlock(object
);
533 vm_object_lock(object
);
539 vm_page_lock_queues();
541 vm_page_unlock_queues();
542 vm_object_unlock(object
);
545 * Enter it in the kernel pmap
547 PMAP_ENTER(kernel_pmap
, start
, mem
,
550 vm_object_lock(object
);
551 PAGE_WAKEUP_DONE(mem
);
552 vm_object_unlock(object
);
555 offset
+= PAGE_SIZE_64
;
561 * Remap wired pages in an object into a new region.
562 * The object is assumed to be mapped into the kernel map or
567 register vm_object_t object
,
568 register vm_object_offset_t offset
,
569 register vm_offset_t start
,
570 register vm_offset_t end
,
571 vm_prot_t protection
)
574 * Mark the pmap region as not pageable.
576 pmap_pageable(kernel_pmap
, start
, end
, FALSE
);
578 while (start
< end
) {
579 register vm_page_t mem
;
581 vm_object_lock(object
);
586 if ((mem
= vm_page_lookup(object
, offset
)) == VM_PAGE_NULL
)
587 panic("kmem_remap_pages");
590 * Wire it down (again)
592 vm_page_lock_queues();
594 vm_page_unlock_queues();
595 vm_object_unlock(object
);
598 * Enter it in the kernel pmap. The page isn't busy,
599 * but this shouldn't be a problem because it is wired.
601 PMAP_ENTER(kernel_pmap
, start
, mem
,
612 * Allocates a map to manage a subrange
613 * of the kernel virtual address space.
615 * Arguments are as follows:
617 * parent Map to take range from
618 * addr Address of start of range (IN/OUT)
619 * size Size of range to find
620 * pageable Can region be paged
621 * anywhere Can region be located anywhere in map
622 * new_map Pointer to new submap
636 size
= round_page(size
);
639 * Need reference on submap object because it is internal
640 * to the vm_system. vm_object_enter will never be called
641 * on it (usual source of reference for vm_map_enter).
643 vm_object_reference(vm_submap_object
);
645 if (anywhere
== TRUE
)
646 *addr
= (vm_offset_t
)vm_map_min(parent
);
647 kr
= vm_map_enter(parent
, addr
, size
,
648 (vm_offset_t
) 0, anywhere
,
649 vm_submap_object
, (vm_object_offset_t
) 0, FALSE
,
650 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
651 if (kr
!= KERN_SUCCESS
) {
652 vm_object_deallocate(vm_submap_object
);
656 pmap_reference(vm_map_pmap(parent
));
657 map
= vm_map_create(vm_map_pmap(parent
), *addr
, *addr
+ size
, pageable
);
658 if (map
== VM_MAP_NULL
)
659 panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */
661 kr
= vm_map_submap(parent
, *addr
, *addr
+ size
, map
, *addr
, FALSE
);
662 if (kr
!= KERN_SUCCESS
) {
664 * See comment preceding vm_map_submap().
666 vm_map_remove(parent
, *addr
, *addr
+ size
, VM_MAP_NO_FLAGS
);
667 vm_map_deallocate(map
); /* also removes ref to pmap */
668 vm_object_deallocate(vm_submap_object
);
673 return (KERN_SUCCESS
);
679 * Initialize the kernel's virtual memory map, taking
680 * into account all memory allocated up to this time.
687 kernel_map
= vm_map_create(pmap_kernel(),
688 VM_MIN_KERNEL_ADDRESS
, end
,
692 * Reserve virtual memory allocated up to this time.
695 if (start
!= VM_MIN_KERNEL_ADDRESS
) {
696 vm_offset_t addr
= VM_MIN_KERNEL_ADDRESS
;
697 (void) vm_map_enter(kernel_map
,
698 &addr
, start
- VM_MIN_KERNEL_ADDRESS
,
699 (vm_offset_t
) 0, TRUE
,
701 (vm_object_offset_t
) 0, FALSE
,
702 VM_PROT_DEFAULT
, VM_PROT_ALL
,
707 * Account for kernel memory (text, data, bss, vm shenanigans).
708 * This may include inaccessible "holes" as determined by what
709 * the machine-dependent init code includes in mem_size.
711 vm_page_wire_count
= (atop(mem_size
) - (vm_page_free_count
712 + vm_page_active_count
713 + vm_page_inactive_count
));
717 * kmem_io_map_copyout:
719 * Establish temporary mapping in designated map for the memory
720 * passed in. Memory format must be a page_list vm_map_copy.
726 vm_offset_t
*addr
, /* actual addr of data */
727 vm_size_t
*alloc_size
, /* size allocated */
729 vm_size_t min_size
, /* Do at least this much */
730 vm_prot_t prot
) /* Protection of mapping */
732 vm_offset_t myaddr
, offset
;
733 vm_size_t mysize
, copy_size
;
736 vm_page_t
*page_list
;
737 vm_map_copy_t new_copy
;
741 assert(copy
->type
== VM_MAP_COPY_PAGE_LIST
);
742 assert(min_size
!= 0);
745 * Figure out the size in vm pages.
747 min_size
+= (vm_size_t
)(copy
->offset
- trunc_page_64(copy
->offset
));
748 min_size
= round_page(min_size
);
749 mysize
= (vm_size_t
)(round_page_64(
750 copy
->offset
+ (vm_object_offset_t
)copy
->size
) -
751 trunc_page_64(copy
->offset
));
754 * If total size is larger than one page list and
755 * we don't have to do more than one page list, then
756 * only do one page list.
758 * XXX Could be much smarter about this ... like trimming length
759 * XXX if we need more than one page list but not all of them.
762 copy_size
= ptoa(copy
->cpy_npages
);
763 if (mysize
> copy_size
&& copy_size
> min_size
)
767 * Allocate some address space in the map (must be kernel
770 myaddr
= vm_map_min(map
);
771 ret
= vm_map_enter(map
, &myaddr
, mysize
,
772 (vm_offset_t
) 0, TRUE
,
773 VM_OBJECT_NULL
, (vm_object_offset_t
) 0, FALSE
,
774 prot
, prot
, VM_INHERIT_DEFAULT
);
776 if (ret
!= KERN_SUCCESS
)
780 * Tell the pmap module that this will be wired, and
781 * enter the mappings.
783 pmap_pageable(vm_map_pmap(map
), myaddr
, myaddr
+ mysize
, TRUE
);
785 *addr
= myaddr
+ (vm_offset_t
)
786 (copy
->offset
- trunc_page_64(copy
->offset
));
787 *alloc_size
= mysize
;
790 page_list
= ©
->cpy_page_list
[0];
792 for ( i
= 0; i
< copy
->cpy_npages
; i
++, offset
+=PAGE_SIZE_64
) {
793 PMAP_ENTER(vm_map_pmap(map
),
794 (vm_offset_t
)offset
, *page_list
,
799 if (offset
== (myaddr
+ mysize
))
803 * Onward to the next page_list. The extend_cont
804 * leaves the current page list's pages alone;
805 * they'll be cleaned up at discard. Reset this
806 * copy's continuation to discard the next one.
808 vm_map_copy_invoke_extend_cont(copy
, &new_copy
, &ret
);
810 if (ret
!= KERN_SUCCESS
) {
811 kmem_io_map_deallocate(map
, myaddr
, mysize
);
814 copy
->cpy_cont
= vm_map_copy_discard_cont
;
815 copy
->cpy_cont_args
= (vm_map_copyin_args_t
) new_copy
;
816 assert(new_copy
!= VM_MAP_COPY_NULL
);
817 assert(new_copy
->type
== VM_MAP_COPY_PAGE_LIST
);
819 page_list
= ©
->cpy_page_list
[0];
826 * kmem_io_map_deallocate:
828 * Get rid of the mapping established by kmem_io_map_copyout.
829 * Assumes that addr and size have been rounded to page boundaries.
833 kmem_io_map_deallocate(
839 register vm_offset_t va
, end
;
841 end
= round_page(addr
+ size
);
842 for (va
= trunc_page(addr
); va
< end
; va
+= PAGE_SIZE
)
843 pmap_change_wiring(vm_map_pmap(map
), va
, FALSE
);
846 * Remove the mappings. The pmap_remove is needed.
849 pmap_remove(vm_map_pmap(map
), addr
, addr
+ size
);
850 vm_map_remove(map
, addr
, addr
+ size
, VM_MAP_REMOVE_KUNWIRE
);
855 * kmem_io_object_trunc:
857 * Truncate an object vm_map_copy_t.
858 * Called by the scatter/gather list network code to remove pages from
859 * the tail end of a packet. Also unwires the objects pages.
863 kmem_io_object_trunc(copy
, new_size
)
864 vm_map_copy_t copy
; /* IN/OUT copy object */
865 register vm_size_t new_size
; /* IN new object size */
867 register vm_size_t offset
, old_size
;
869 assert(copy
->type
== VM_MAP_COPY_OBJECT
);
871 old_size
= (vm_size_t
)round_page_64(copy
->size
);
872 copy
->size
= new_size
;
873 new_size
= round_page(new_size
);
875 vm_object_lock(copy
->cpy_object
);
876 vm_object_page_remove(copy
->cpy_object
,
877 (vm_object_offset_t
)new_size
, (vm_object_offset_t
)old_size
);
878 for (offset
= 0; offset
< new_size
; offset
+= PAGE_SIZE
) {
879 register vm_page_t mem
;
881 if ((mem
= vm_page_lookup(copy
->cpy_object
,
882 (vm_object_offset_t
)offset
)) == VM_PAGE_NULL
)
883 panic("kmem_io_object_trunc: unable to find object page");
886 * Make sure these pages are marked dirty
889 vm_page_lock_queues();
891 vm_page_unlock_queues();
893 copy
->cpy_object
->size
= new_size
; /* adjust size of object */
894 vm_object_unlock(copy
->cpy_object
);
895 return(KERN_SUCCESS
);
899 * kmem_io_object_deallocate:
901 * Free an vm_map_copy_t.
902 * Called by the scatter/gather list network code to free a packet.
906 kmem_io_object_deallocate(
907 vm_map_copy_t copy
) /* IN/OUT copy object */
912 * Clear out all the object pages (this will leave an empty object).
914 ret
= kmem_io_object_trunc(copy
, 0);
915 if (ret
!= KERN_SUCCESS
)
916 panic("kmem_io_object_deallocate: unable to truncate object");
918 * ...and discard the copy object.
920 vm_map_copy_discard(copy
);
926 * Like copyin, except that fromaddr is an address
927 * in the specified VM map. This implementation
928 * is incomplete; it handles the current user map
929 * and the kernel map/submaps.
934 vm_offset_t fromaddr
,
938 if (vm_map_pmap(map
) == pmap_kernel()) {
939 /* assume a correct copy */
940 memcpy((void *)toaddr
, (void *)fromaddr
, length
);
944 if (current_map() == map
)
945 return copyin((char *)fromaddr
, (char *)toaddr
, length
);
951 * Routine: copyoutmap
953 * Like copyout, except that toaddr is an address
954 * in the specified VM map. This implementation
955 * is incomplete; it handles the current user map
956 * and the kernel map/submaps.
961 vm_offset_t fromaddr
,
965 if (vm_map_pmap(map
) == pmap_kernel()) {
966 /* assume a correct copy */
967 memcpy((void *)toaddr
, (void *)fromaddr
, length
);
971 if (current_map() == map
)
972 return copyout((char *)fromaddr
, (char *)toaddr
, length
);