2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 * Carnegie Mellon requests users of this software to return to
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * Kernel memory management.
61 #include <mach/kern_return.h>
62 #include <mach/vm_param.h>
63 #include <kern/assert.h>
64 #include <kern/lock.h>
65 #include <kern/thread.h>
66 #include <vm/vm_kern.h>
67 #include <vm/vm_map.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_pageout.h>
71 #include <kern/misc_protos.h>
76 * Variables exported by this module.
80 vm_map_t kernel_pageable_map
;
83 * Forward declarations for internal functions.
85 extern kern_return_t
kmem_alloc_pages(
86 register vm_object_t object
,
87 register vm_object_offset_t offset
,
88 register vm_size_t size
);
90 extern void kmem_remap_pages(
91 register vm_object_t object
,
92 register vm_object_offset_t offset
,
93 register vm_offset_t start
,
94 register vm_offset_t end
,
95 vm_prot_t protection
);
109 vm_object_offset_t offset
;
110 vm_map_entry_t entry
;
112 if (map
== VM_MAP_NULL
|| (flags
&& (flags
^ KMA_KOBJECT
)))
113 return KERN_INVALID_ARGUMENT
;
117 return KERN_INVALID_ARGUMENT
;
120 size
= round_page_32(size
);
121 if ((flags
& KMA_KOBJECT
) == 0) {
122 object
= vm_object_allocate(size
);
123 kr
= vm_map_find_space(map
, &addr
, size
, mask
, &entry
);
126 object
= kernel_object
;
127 kr
= vm_map_find_space(map
, &addr
, size
, mask
, &entry
);
130 if ((flags
& KMA_KOBJECT
) == 0) {
131 entry
->object
.vm_object
= object
;
132 entry
->offset
= offset
= 0;
134 offset
= addr
- VM_MIN_KERNEL_ADDRESS
;
136 if (entry
->object
.vm_object
== VM_OBJECT_NULL
) {
137 vm_object_reference(object
);
138 entry
->object
.vm_object
= object
;
139 entry
->offset
= offset
;
143 if (kr
!= KERN_SUCCESS
) {
144 if ((flags
& KMA_KOBJECT
) == 0)
145 vm_object_deallocate(object
);
151 kr
= cpm_allocate(size
, &pages
, FALSE
);
153 if (kr
!= KERN_SUCCESS
) {
154 vm_map_remove(map
, addr
, addr
+ size
, 0);
159 vm_object_lock(object
);
160 for (i
= 0; i
< size
; i
+= PAGE_SIZE
) {
162 pages
= NEXT_PAGE(m
);
164 vm_page_insert(m
, object
, offset
+ i
);
166 vm_object_unlock(object
);
168 if ((kr
= vm_map_wire(map
, addr
, addr
+ size
, VM_PROT_DEFAULT
, FALSE
))
170 if (object
== kernel_object
) {
171 vm_object_lock(object
);
172 vm_object_page_remove(object
, offset
, offset
+ size
);
173 vm_object_unlock(object
);
175 vm_map_remove(map
, addr
, addr
+ size
, 0);
178 if (object
== kernel_object
)
179 vm_map_simplify(map
, addr
);
186 * Master entry point for allocating kernel memory.
187 * NOTE: this routine is _never_ interrupt safe.
189 * map : map to allocate into
190 * addrp : pointer to start address of new memory
191 * size : size of memory requested
193 * KMA_HERE *addrp is base address, else "anywhere"
194 * KMA_NOPAGEWAIT don't wait for pages if unavailable
195 * KMA_KOBJECT use kernel_object
199 kernel_memory_allocate(
200 register vm_map_t map
,
201 register vm_offset_t
*addrp
,
202 register vm_size_t size
,
203 register vm_offset_t mask
,
206 vm_object_t object
= VM_OBJECT_NULL
;
207 vm_map_entry_t entry
;
208 vm_object_offset_t offset
;
213 size
= round_page_32(size
);
214 if ((flags
& KMA_KOBJECT
) == 0) {
216 * Allocate a new object. We must do this before locking
217 * the map, or risk deadlock with the default pager:
218 * device_read_alloc uses kmem_alloc,
219 * which tries to allocate an object,
220 * which uses kmem_alloc_wired to get memory,
221 * which blocks for pages.
222 * then the default pager needs to read a block
223 * to process a memory_object_data_write,
224 * and device_read_alloc calls kmem_alloc
225 * and deadlocks on the map lock.
227 object
= vm_object_allocate(size
);
228 kr
= vm_map_find_space(map
, &addr
, size
, mask
, &entry
);
231 object
= kernel_object
;
232 kr
= vm_map_find_space(map
, &addr
, size
, mask
, &entry
);
234 if (kr
!= KERN_SUCCESS
) {
235 if ((flags
& KMA_KOBJECT
) == 0)
236 vm_object_deallocate(object
);
240 if ((flags
& KMA_KOBJECT
) == 0) {
241 entry
->object
.vm_object
= object
;
242 entry
->offset
= offset
= 0;
244 offset
= addr
- VM_MIN_KERNEL_ADDRESS
;
246 if (entry
->object
.vm_object
== VM_OBJECT_NULL
) {
247 vm_object_reference(object
);
248 entry
->object
.vm_object
= object
;
249 entry
->offset
= offset
;
254 * Since we have not given out this address yet,
255 * it is safe to unlock the map. Except of course
256 * we must make certain no one coalesces our address
257 * or does a blind vm_deallocate and removes the object
258 * an extra object reference will suffice to protect
259 * against both contingencies.
261 vm_object_reference(object
);
264 vm_object_lock(object
);
265 for (i
= 0; i
< size
; i
+= PAGE_SIZE
) {
268 while ((mem
= vm_page_alloc(object
,
269 offset
+ (vm_object_offset_t
)i
))
271 if (flags
& KMA_NOPAGEWAIT
) {
272 if (object
== kernel_object
)
273 vm_object_page_remove(object
, offset
,
274 offset
+ (vm_object_offset_t
)i
);
275 vm_object_unlock(object
);
276 vm_map_remove(map
, addr
, addr
+ size
, 0);
277 vm_object_deallocate(object
);
278 return KERN_RESOURCE_SHORTAGE
;
280 vm_object_unlock(object
);
282 vm_object_lock(object
);
286 vm_object_unlock(object
);
288 if ((kr
= vm_map_wire(map
, addr
, addr
+ size
, VM_PROT_DEFAULT
, FALSE
))
290 if (object
== kernel_object
) {
291 vm_object_lock(object
);
292 vm_object_page_remove(object
, offset
, offset
+ size
);
293 vm_object_unlock(object
);
295 vm_map_remove(map
, addr
, addr
+ size
, 0);
296 vm_object_deallocate(object
);
299 /* now that the page is wired, we no longer have to fear coalesce */
300 vm_object_deallocate(object
);
301 if (object
== kernel_object
)
302 vm_map_simplify(map
, addr
);
305 * Return the memory, not zeroed.
307 #if (NCPUS > 1) && i860
309 #endif /* #if (NCPUS > 1) && i860 */
317 * Allocate wired-down memory in the kernel's address map
318 * or a submap. The memory is not zero-filled.
327 return kernel_memory_allocate(map
, addrp
, size
, 0, 0);
333 * Reallocate wired-down memory in the kernel's address map
334 * or a submap. Newly allocated pages are not zeroed.
335 * This can only be used on regions allocated with kmem_alloc.
337 * If successful, the pages in the old region are mapped twice.
338 * The old region is unchanged. Use kmem_free to get rid of it.
345 vm_offset_t
*newaddrp
,
348 vm_offset_t oldmin
, oldmax
;
352 vm_map_entry_t oldentry
, newentry
;
356 oldmin
= trunc_page_32(oldaddr
);
357 oldmax
= round_page_32(oldaddr
+ oldsize
);
358 oldsize
= oldmax
- oldmin
;
359 newsize
= round_page_32(newsize
);
363 * Find the VM object backing the old region.
368 if (!vm_map_lookup_entry(map
, oldmin
, &oldentry
))
369 panic("kmem_realloc");
370 object
= oldentry
->object
.vm_object
;
373 * Increase the size of the object and
374 * fill in the new region.
377 vm_object_reference(object
);
378 /* by grabbing the object lock before unlocking the map */
379 /* we guarantee that we will panic if more than one */
380 /* attempt is made to realloc a kmem_alloc'd area */
381 vm_object_lock(object
);
383 if (object
->size
!= oldsize
)
384 panic("kmem_realloc");
385 object
->size
= newsize
;
386 vm_object_unlock(object
);
388 /* allocate the new pages while expanded portion of the */
389 /* object is still not mapped */
390 kmem_alloc_pages(object
, oldsize
, newsize
-oldsize
);
394 * Find space for the new region.
397 kr
= vm_map_find_space(map
, &newaddr
, newsize
, (vm_offset_t
) 0,
399 if (kr
!= KERN_SUCCESS
) {
400 vm_object_lock(object
);
401 for(offset
= oldsize
;
402 offset
<newsize
; offset
+=PAGE_SIZE
) {
403 if ((mem
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
404 vm_page_lock_queues();
406 vm_page_unlock_queues();
409 object
->size
= oldsize
;
410 vm_object_unlock(object
);
411 vm_object_deallocate(object
);
414 newentry
->object
.vm_object
= object
;
415 newentry
->offset
= 0;
416 assert (newentry
->wired_count
== 0);
419 /* add an extra reference in case we have someone doing an */
420 /* unexpected deallocate */
421 vm_object_reference(object
);
424 if ((kr
= vm_map_wire(map
, newaddr
, newaddr
+ newsize
,
425 VM_PROT_DEFAULT
, FALSE
)) != KERN_SUCCESS
) {
426 vm_map_remove(map
, newaddr
, newaddr
+ newsize
, 0);
427 vm_object_lock(object
);
428 for(offset
= oldsize
;
429 offset
<newsize
; offset
+=PAGE_SIZE
) {
430 if ((mem
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
431 vm_page_lock_queues();
433 vm_page_unlock_queues();
436 object
->size
= oldsize
;
437 vm_object_unlock(object
);
438 vm_object_deallocate(object
);
441 vm_object_deallocate(object
);
451 * Allocate wired-down memory in the kernel's address map
452 * or a submap. The memory is not zero-filled.
454 * The memory is allocated in the kernel_object.
455 * It may not be copied with vm_map_copy, and
456 * it may not be reallocated with kmem_realloc.
465 return kernel_memory_allocate(map
, addrp
, size
, 0, KMA_KOBJECT
);
469 * kmem_alloc_aligned:
471 * Like kmem_alloc_wired, except that the memory is aligned.
472 * The size should be a power-of-2.
481 if ((size
& (size
- 1)) != 0)
482 panic("kmem_alloc_aligned: size not aligned");
483 return kernel_memory_allocate(map
, addrp
, size
, size
- 1, KMA_KOBJECT
);
487 * kmem_alloc_pageable:
489 * Allocate pageable memory in the kernel's address map.
502 addr
= (vm_map_min(map
)) + 0x1000;
504 addr
= vm_map_min(map
);
506 kr
= vm_map_enter(map
, &addr
, round_page_32(size
),
507 (vm_offset_t
) 0, TRUE
,
508 VM_OBJECT_NULL
, (vm_object_offset_t
) 0, FALSE
,
509 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
510 if (kr
!= KERN_SUCCESS
)
520 * Release a region of kernel virtual memory allocated
521 * with kmem_alloc, kmem_alloc_wired, or kmem_alloc_pageable,
522 * and return the physical pages associated with that region.
533 kr
= vm_map_remove(map
, trunc_page_32(addr
),
534 round_page_32(addr
+ size
),
535 VM_MAP_REMOVE_KUNWIRE
);
536 if (kr
!= KERN_SUCCESS
)
541 * Allocate new pages in an object.
546 register vm_object_t object
,
547 register vm_object_offset_t offset
,
548 register vm_size_t size
)
551 size
= round_page_32(size
);
552 vm_object_lock(object
);
554 register vm_page_t mem
;
560 while ((mem
= vm_page_alloc(object
, offset
))
562 vm_object_unlock(object
);
564 vm_object_lock(object
);
572 vm_object_unlock(object
);
577 * Remap wired pages in an object into a new region.
578 * The object is assumed to be mapped into the kernel map or
583 register vm_object_t object
,
584 register vm_object_offset_t offset
,
585 register vm_offset_t start
,
586 register vm_offset_t end
,
587 vm_prot_t protection
)
590 * Mark the pmap region as not pageable.
592 pmap_pageable(kernel_pmap
, start
, end
, FALSE
);
594 while (start
< end
) {
595 register vm_page_t mem
;
597 vm_object_lock(object
);
602 if ((mem
= vm_page_lookup(object
, offset
)) == VM_PAGE_NULL
)
603 panic("kmem_remap_pages");
606 * Wire it down (again)
608 vm_page_lock_queues();
610 vm_page_unlock_queues();
611 vm_object_unlock(object
);
614 * Enter it in the kernel pmap. The page isn't busy,
615 * but this shouldn't be a problem because it is wired.
617 PMAP_ENTER(kernel_pmap
, start
, mem
, protection
,
618 ((unsigned int)(mem
->object
->wimg_bits
))
630 * Allocates a map to manage a subrange
631 * of the kernel virtual address space.
633 * Arguments are as follows:
635 * parent Map to take range from
636 * addr Address of start of range (IN/OUT)
637 * size Size of range to find
638 * pageable Can region be paged
639 * anywhere Can region be located anywhere in map
640 * new_map Pointer to new submap
654 size
= round_page_32(size
);
657 * Need reference on submap object because it is internal
658 * to the vm_system. vm_object_enter will never be called
659 * on it (usual source of reference for vm_map_enter).
661 vm_object_reference(vm_submap_object
);
663 if (anywhere
== TRUE
)
664 *addr
= (vm_offset_t
)vm_map_min(parent
);
665 kr
= vm_map_enter(parent
, addr
, size
,
666 (vm_offset_t
) 0, anywhere
,
667 vm_submap_object
, (vm_object_offset_t
) 0, FALSE
,
668 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_DEFAULT
);
669 if (kr
!= KERN_SUCCESS
) {
670 vm_object_deallocate(vm_submap_object
);
674 pmap_reference(vm_map_pmap(parent
));
675 map
= vm_map_create(vm_map_pmap(parent
), *addr
, *addr
+ size
, pageable
);
676 if (map
== VM_MAP_NULL
)
677 panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */
679 kr
= vm_map_submap(parent
, *addr
, *addr
+ size
, map
, *addr
, FALSE
);
680 if (kr
!= KERN_SUCCESS
) {
682 * See comment preceding vm_map_submap().
684 vm_map_remove(parent
, *addr
, *addr
+ size
, VM_MAP_NO_FLAGS
);
685 vm_map_deallocate(map
); /* also removes ref to pmap */
686 vm_object_deallocate(vm_submap_object
);
690 return (KERN_SUCCESS
);
696 * Initialize the kernel's virtual memory map, taking
697 * into account all memory allocated up to this time.
704 kernel_map
= vm_map_create(pmap_kernel(),
705 VM_MIN_KERNEL_ADDRESS
, end
,
709 * Reserve virtual memory allocated up to this time.
712 if (start
!= VM_MIN_KERNEL_ADDRESS
) {
713 vm_offset_t addr
= VM_MIN_KERNEL_ADDRESS
;
714 (void) vm_map_enter(kernel_map
,
715 &addr
, start
- VM_MIN_KERNEL_ADDRESS
,
716 (vm_offset_t
) 0, TRUE
,
718 (vm_object_offset_t
) 0, FALSE
,
719 VM_PROT_DEFAULT
, VM_PROT_ALL
,
724 * Account for kernel memory (text, data, bss, vm shenanigans).
725 * This may include inaccessible "holes" as determined by what
726 * the machine-dependent init code includes in max_mem.
728 vm_page_wire_count
= (atop_64(max_mem
) - (vm_page_free_count
729 + vm_page_active_count
730 + vm_page_inactive_count
));
735 * kmem_io_object_trunc:
737 * Truncate an object vm_map_copy_t.
738 * Called by the scatter/gather list network code to remove pages from
739 * the tail end of a packet. Also unwires the objects pages.
743 kmem_io_object_trunc(copy
, new_size
)
744 vm_map_copy_t copy
; /* IN/OUT copy object */
745 register vm_size_t new_size
; /* IN new object size */
747 register vm_size_t offset
, old_size
;
749 assert(copy
->type
== VM_MAP_COPY_OBJECT
);
751 old_size
= (vm_size_t
)round_page_64(copy
->size
);
752 copy
->size
= new_size
;
753 new_size
= round_page_32(new_size
);
755 vm_object_lock(copy
->cpy_object
);
756 vm_object_page_remove(copy
->cpy_object
,
757 (vm_object_offset_t
)new_size
, (vm_object_offset_t
)old_size
);
758 for (offset
= 0; offset
< new_size
; offset
+= PAGE_SIZE
) {
759 register vm_page_t mem
;
761 if ((mem
= vm_page_lookup(copy
->cpy_object
,
762 (vm_object_offset_t
)offset
)) == VM_PAGE_NULL
)
763 panic("kmem_io_object_trunc: unable to find object page");
766 * Make sure these pages are marked dirty
769 vm_page_lock_queues();
771 vm_page_unlock_queues();
773 copy
->cpy_object
->size
= new_size
; /* adjust size of object */
774 vm_object_unlock(copy
->cpy_object
);
775 return(KERN_SUCCESS
);
779 * kmem_io_object_deallocate:
781 * Free an vm_map_copy_t.
782 * Called by the scatter/gather list network code to free a packet.
786 kmem_io_object_deallocate(
787 vm_map_copy_t copy
) /* IN/OUT copy object */
792 * Clear out all the object pages (this will leave an empty object).
794 ret
= kmem_io_object_trunc(copy
, 0);
795 if (ret
!= KERN_SUCCESS
)
796 panic("kmem_io_object_deallocate: unable to truncate object");
798 * ...and discard the copy object.
800 vm_map_copy_discard(copy
);
806 * Like copyin, except that fromaddr is an address
807 * in the specified VM map. This implementation
808 * is incomplete; it handles the current user map
809 * and the kernel map/submaps.
814 vm_offset_t fromaddr
,
818 if (vm_map_pmap(map
) == pmap_kernel()) {
819 /* assume a correct copy */
820 memcpy((void *)toaddr
, (void *)fromaddr
, length
);
824 if (current_map() == map
)
825 return copyin((char *)fromaddr
, (char *)toaddr
, length
);
831 * Routine: copyoutmap
833 * Like copyout, except that toaddr is an address
834 * in the specified VM map. This implementation
835 * is incomplete; it handles the current user map
836 * and the kernel map/submaps.
841 vm_offset_t fromaddr
,
845 if (vm_map_pmap(map
) == pmap_kernel()) {
846 /* assume a correct copy */
847 memcpy((void *)toaddr
, (void *)fromaddr
, length
);
851 if (current_map() == map
)
852 return copyout((char *)fromaddr
, (char *)toaddr
, length
);
863 memory_object_t pager
,
864 vm_object_offset_t file_off
)
866 vm_map_entry_t entry
;
868 vm_object_offset_t obj_off
;
870 vm_offset_t base_offset
;
871 vm_offset_t original_offset
;
877 original_offset
= off
;
880 while(vm_map_lookup_entry(map
, off
, &entry
)) {
883 if (entry
->object
.vm_object
== VM_OBJECT_NULL
) {
887 if (entry
->is_sub_map
) {
891 vm_map_lock(entry
->object
.sub_map
);
892 map
= entry
->object
.sub_map
;
893 off
= entry
->offset
+ (off
- entry
->vme_start
);
894 vm_map_unlock(old_map
);
897 obj
= entry
->object
.vm_object
;
898 obj_off
= (off
- entry
->vme_start
) + entry
->offset
;
900 obj_off
+= obj
->shadow_offset
;
903 if((obj
->pager_created
) && (obj
->pager
== pager
)) {
904 if(((obj
->paging_offset
) + obj_off
) == file_off
) {
905 if(off
!= base_offset
) {
909 kr
= KERN_ALREADY_WAITING
;
911 vm_object_offset_t obj_off_aligned
;
912 vm_object_offset_t file_off_aligned
;
914 obj_off_aligned
= obj_off
& ~PAGE_MASK
;
915 file_off_aligned
= file_off
& ~PAGE_MASK
;
917 if (file_off_aligned
== (obj
->paging_offset
+ obj_off_aligned
)) {
919 * the target map and the file offset start in the same page
920 * but are not identical...
925 if ((file_off
< (obj
->paging_offset
+ obj_off_aligned
)) &&
926 ((file_off
+ len
) > (obj
->paging_offset
+ obj_off_aligned
))) {
928 * some portion of the tail of the I/O will fall
929 * within the encompass of the target map
934 if ((file_off_aligned
> (obj
->paging_offset
+ obj_off
)) &&
935 (file_off_aligned
< (obj
->paging_offset
+ obj_off
) + len
)) {
937 * the beginning page of the file offset falls within
938 * the target map's encompass
944 } else if(kr
!= KERN_SUCCESS
) {
949 if(len
<= ((entry
->vme_end
- entry
->vme_start
) -
950 (off
- entry
->vme_start
))) {
954 len
-= (entry
->vme_end
- entry
->vme_start
) -
955 (off
- entry
->vme_start
);
957 base_offset
= base_offset
+ (local_len
- len
);
958 file_off
= file_off
+ (local_len
- len
);
960 if(map
!= base_map
) {
962 vm_map_lock(base_map
);