]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_kern.c
xnu-2422.100.13.tar.gz
[apple/xnu.git] / osfmk / vm / vm_kern.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_kern.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Kernel memory management.
64 */
65
66 #include <mach/kern_return.h>
67 #include <mach/vm_param.h>
68 #include <kern/assert.h>
69 #include <kern/lock.h>
70 #include <kern/thread.h>
71 #include <vm/vm_kern.h>
72 #include <vm/vm_map.h>
73 #include <vm/vm_object.h>
74 #include <vm/vm_page.h>
75 #include <vm/vm_pageout.h>
76 #include <kern/misc_protos.h>
77 #include <vm/cpm.h>
78
79 #include <string.h>
80
81 #include <libkern/OSDebug.h>
82 #include <sys/kdebug.h>
83
84 /*
85 * Variables exported by this module.
86 */
87
88 vm_map_t kernel_map;
89 vm_map_t kernel_pageable_map;
90
91 extern boolean_t vm_kernel_ready;
92
93 /*
94 * Forward declarations for internal functions.
95 */
96 extern kern_return_t kmem_alloc_pages(
97 register vm_object_t object,
98 register vm_object_offset_t offset,
99 register vm_object_size_t size);
100
101 extern void kmem_remap_pages(
102 register vm_object_t object,
103 register vm_object_offset_t offset,
104 register vm_offset_t start,
105 register vm_offset_t end,
106 vm_prot_t protection);
107
108 kern_return_t
109 kmem_alloc_contig(
110 vm_map_t map,
111 vm_offset_t *addrp,
112 vm_size_t size,
113 vm_offset_t mask,
114 ppnum_t max_pnum,
115 ppnum_t pnum_mask,
116 int flags)
117 {
118 vm_object_t object;
119 vm_object_offset_t offset;
120 vm_map_offset_t map_addr;
121 vm_map_offset_t map_mask;
122 vm_map_size_t map_size, i;
123 vm_map_entry_t entry;
124 vm_page_t m, pages;
125 kern_return_t kr;
126
127 if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT)))
128 return KERN_INVALID_ARGUMENT;
129
130 map_size = vm_map_round_page(size,
131 VM_MAP_PAGE_MASK(map));
132 map_mask = (vm_map_offset_t)mask;
133
134 /* Check for zero allocation size (either directly or via overflow) */
135 if (map_size == 0) {
136 *addrp = 0;
137 return KERN_INVALID_ARGUMENT;
138 }
139
140 /*
141 * Allocate a new object (if necessary) and the reference we
142 * will be donating to the map entry. We must do this before
143 * locking the map, or risk deadlock with the default pager.
144 */
145 if ((flags & KMA_KOBJECT) != 0) {
146 object = kernel_object;
147 vm_object_reference(object);
148 } else {
149 object = vm_object_allocate(map_size);
150 }
151
152 kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0, &entry);
153 if (KERN_SUCCESS != kr) {
154 vm_object_deallocate(object);
155 return kr;
156 }
157
158 entry->object.vm_object = object;
159 entry->offset = offset = (object == kernel_object) ?
160 map_addr : 0;
161
162 /* Take an extra object ref in case the map entry gets deleted */
163 vm_object_reference(object);
164 vm_map_unlock(map);
165
166 kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags);
167
168 if (kr != KERN_SUCCESS) {
169 vm_map_remove(map,
170 vm_map_trunc_page(map_addr,
171 VM_MAP_PAGE_MASK(map)),
172 vm_map_round_page(map_addr + map_size,
173 VM_MAP_PAGE_MASK(map)),
174 0);
175 vm_object_deallocate(object);
176 *addrp = 0;
177 return kr;
178 }
179
180 vm_object_lock(object);
181 for (i = 0; i < map_size; i += PAGE_SIZE) {
182 m = pages;
183 pages = NEXT_PAGE(m);
184 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
185 m->busy = FALSE;
186 vm_page_insert(m, object, offset + i);
187 }
188 vm_object_unlock(object);
189
190 kr = vm_map_wire(map,
191 vm_map_trunc_page(map_addr,
192 VM_MAP_PAGE_MASK(map)),
193 vm_map_round_page(map_addr + map_size,
194 VM_MAP_PAGE_MASK(map)),
195 VM_PROT_DEFAULT,
196 FALSE);
197 if (kr != KERN_SUCCESS) {
198 if (object == kernel_object) {
199 vm_object_lock(object);
200 vm_object_page_remove(object, offset, offset + map_size);
201 vm_object_unlock(object);
202 }
203 vm_map_remove(map,
204 vm_map_trunc_page(map_addr,
205 VM_MAP_PAGE_MASK(map)),
206 vm_map_round_page(map_addr + map_size,
207 VM_MAP_PAGE_MASK(map)),
208 0);
209 vm_object_deallocate(object);
210 return kr;
211 }
212 vm_object_deallocate(object);
213
214 if (object == kernel_object)
215 vm_map_simplify(map, map_addr);
216
217 *addrp = (vm_offset_t) map_addr;
218 assert((vm_map_offset_t) *addrp == map_addr);
219 return KERN_SUCCESS;
220 }
221
222 /*
223 * Master entry point for allocating kernel memory.
224 * NOTE: this routine is _never_ interrupt safe.
225 *
226 * map : map to allocate into
227 * addrp : pointer to start address of new memory
228 * size : size of memory requested
229 * flags : options
230 * KMA_HERE *addrp is base address, else "anywhere"
231 * KMA_NOPAGEWAIT don't wait for pages if unavailable
232 * KMA_KOBJECT use kernel_object
233 * KMA_LOMEM support for 32 bit devices in a 64 bit world
234 * if set and a lomemory pool is available
235 * grab pages from it... this also implies
236 * KMA_NOPAGEWAIT
237 */
238
239 kern_return_t
240 kernel_memory_allocate(
241 register vm_map_t map,
242 register vm_offset_t *addrp,
243 register vm_size_t size,
244 register vm_offset_t mask,
245 int flags)
246 {
247 vm_object_t object;
248 vm_object_offset_t offset;
249 vm_object_offset_t pg_offset;
250 vm_map_entry_t entry = NULL;
251 vm_map_offset_t map_addr, fill_start;
252 vm_map_offset_t map_mask;
253 vm_map_size_t map_size, fill_size;
254 kern_return_t kr, pe_result;
255 vm_page_t mem;
256 vm_page_t guard_page_list = NULL;
257 vm_page_t wired_page_list = NULL;
258 int guard_page_count = 0;
259 int wired_page_count = 0;
260 int i;
261 int vm_alloc_flags;
262 vm_prot_t kma_prot;
263
264 if (! vm_kernel_ready) {
265 panic("kernel_memory_allocate: VM is not ready");
266 }
267
268 map_size = vm_map_round_page(size,
269 VM_MAP_PAGE_MASK(map));
270 map_mask = (vm_map_offset_t) mask;
271 vm_alloc_flags = 0;
272
273 /* Check for zero allocation size (either directly or via overflow) */
274 if (map_size == 0) {
275 *addrp = 0;
276 return KERN_INVALID_ARGUMENT;
277 }
278
279 /*
280 * limit the size of a single extent of wired memory
281 * to try and limit the damage to the system if
282 * too many pages get wired down
283 * limit raised to 2GB with 128GB max physical limit
284 */
285 if (map_size > (1ULL << 31)) {
286 return KERN_RESOURCE_SHORTAGE;
287 }
288
289 /*
290 * Guard pages:
291 *
292 * Guard pages are implemented as ficticious pages. By placing guard pages
293 * on either end of a stack, they can help detect cases where a thread walks
294 * off either end of its stack. They are allocated and set up here and attempts
295 * to access those pages are trapped in vm_fault_page().
296 *
297 * The map_size we were passed may include extra space for
298 * guard pages. If those were requested, then back it out of fill_size
299 * since vm_map_find_space() takes just the actual size not including
300 * guard pages. Similarly, fill_start indicates where the actual pages
301 * will begin in the range.
302 */
303
304 fill_start = 0;
305 fill_size = map_size;
306
307 if (flags & KMA_GUARD_FIRST) {
308 vm_alloc_flags |= VM_FLAGS_GUARD_BEFORE;
309 fill_start += PAGE_SIZE_64;
310 fill_size -= PAGE_SIZE_64;
311 if (map_size < fill_start + fill_size) {
312 /* no space for a guard page */
313 *addrp = 0;
314 return KERN_INVALID_ARGUMENT;
315 }
316 guard_page_count++;
317 }
318 if (flags & KMA_GUARD_LAST) {
319 vm_alloc_flags |= VM_FLAGS_GUARD_AFTER;
320 fill_size -= PAGE_SIZE_64;
321 if (map_size <= fill_start + fill_size) {
322 /* no space for a guard page */
323 *addrp = 0;
324 return KERN_INVALID_ARGUMENT;
325 }
326 guard_page_count++;
327 }
328 wired_page_count = (int) (fill_size / PAGE_SIZE_64);
329 assert(wired_page_count * PAGE_SIZE_64 == fill_size);
330
331 for (i = 0; i < guard_page_count; i++) {
332 for (;;) {
333 mem = vm_page_grab_guard();
334
335 if (mem != VM_PAGE_NULL)
336 break;
337 if (flags & KMA_NOPAGEWAIT) {
338 kr = KERN_RESOURCE_SHORTAGE;
339 goto out;
340 }
341 vm_page_more_fictitious();
342 }
343 mem->pageq.next = (queue_entry_t)guard_page_list;
344 guard_page_list = mem;
345 }
346
347 if (! (flags & KMA_VAONLY)) {
348 for (i = 0; i < wired_page_count; i++) {
349 uint64_t unavailable;
350
351 for (;;) {
352 if (flags & KMA_LOMEM)
353 mem = vm_page_grablo();
354 else
355 mem = vm_page_grab();
356
357 if (mem != VM_PAGE_NULL)
358 break;
359
360 if (flags & KMA_NOPAGEWAIT) {
361 kr = KERN_RESOURCE_SHORTAGE;
362 goto out;
363 }
364 if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) {
365 kr = KERN_RESOURCE_SHORTAGE;
366 goto out;
367 }
368 unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE;
369
370 if (unavailable > max_mem || map_size > (max_mem - unavailable)) {
371 kr = KERN_RESOURCE_SHORTAGE;
372 goto out;
373 }
374 VM_PAGE_WAIT();
375 }
376 mem->pageq.next = (queue_entry_t)wired_page_list;
377 wired_page_list = mem;
378 }
379 }
380
381 /*
382 * Allocate a new object (if necessary). We must do this before
383 * locking the map, or risk deadlock with the default pager.
384 */
385 if ((flags & KMA_KOBJECT) != 0) {
386 object = kernel_object;
387 vm_object_reference(object);
388 } else if ((flags & KMA_COMPRESSOR) != 0) {
389 object = compressor_object;
390 vm_object_reference(object);
391 } else {
392 object = vm_object_allocate(map_size);
393 }
394
395 kr = vm_map_find_space(map, &map_addr,
396 fill_size, map_mask,
397 vm_alloc_flags, &entry);
398 if (KERN_SUCCESS != kr) {
399 vm_object_deallocate(object);
400 goto out;
401 }
402
403 entry->object.vm_object = object;
404 entry->offset = offset = (object == kernel_object || object == compressor_object) ?
405 map_addr : 0;
406
407 if (object != compressor_object)
408 entry->wired_count++;
409
410 if (flags & KMA_PERMANENT)
411 entry->permanent = TRUE;
412
413 if (object != kernel_object && object != compressor_object)
414 vm_object_reference(object);
415
416 vm_object_lock(object);
417 vm_map_unlock(map);
418
419 pg_offset = 0;
420
421 if (fill_start) {
422 if (guard_page_list == NULL)
423 panic("kernel_memory_allocate: guard_page_list == NULL");
424
425 mem = guard_page_list;
426 guard_page_list = (vm_page_t)mem->pageq.next;
427 mem->pageq.next = NULL;
428
429 vm_page_insert(mem, object, offset + pg_offset);
430
431 mem->busy = FALSE;
432 pg_offset += PAGE_SIZE_64;
433 }
434
435 kma_prot = VM_PROT_READ | VM_PROT_WRITE;
436
437 if (flags & KMA_VAONLY) {
438 pg_offset = fill_start + fill_size;
439 } else {
440 for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) {
441 if (wired_page_list == NULL)
442 panic("kernel_memory_allocate: wired_page_list == NULL");
443
444 mem = wired_page_list;
445 wired_page_list = (vm_page_t)mem->pageq.next;
446 mem->pageq.next = NULL;
447 mem->wire_count++;
448
449 vm_page_insert(mem, object, offset + pg_offset);
450
451 mem->busy = FALSE;
452 mem->pmapped = TRUE;
453 mem->wpmapped = TRUE;
454
455 PMAP_ENTER_OPTIONS(kernel_pmap, map_addr + pg_offset, mem,
456 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
457 PMAP_OPTIONS_NOWAIT, pe_result);
458
459 if (pe_result == KERN_RESOURCE_SHORTAGE) {
460 vm_object_unlock(object);
461
462 PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem,
463 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE);
464
465 vm_object_lock(object);
466 }
467 if (flags & KMA_NOENCRYPT) {
468 bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE);
469
470 pmap_set_noencrypt(mem->phys_page);
471 }
472 }
473 }
474 if ((fill_start + fill_size) < map_size) {
475 if (guard_page_list == NULL)
476 panic("kernel_memory_allocate: guard_page_list == NULL");
477
478 mem = guard_page_list;
479 guard_page_list = (vm_page_t)mem->pageq.next;
480 mem->pageq.next = NULL;
481
482 vm_page_insert(mem, object, offset + pg_offset);
483
484 mem->busy = FALSE;
485 }
486 if (guard_page_list || wired_page_list)
487 panic("kernel_memory_allocate: non empty list\n");
488
489 if (! (flags & KMA_VAONLY)) {
490 vm_page_lockspin_queues();
491 vm_page_wire_count += wired_page_count;
492 vm_page_unlock_queues();
493 }
494
495 vm_object_unlock(object);
496
497 /*
498 * now that the pages are wired, we no longer have to fear coalesce
499 */
500 if (object == kernel_object || object == compressor_object)
501 vm_map_simplify(map, map_addr);
502 else
503 vm_object_deallocate(object);
504
505 /*
506 * Return the memory, not zeroed.
507 */
508 *addrp = CAST_DOWN(vm_offset_t, map_addr);
509 return KERN_SUCCESS;
510
511 out:
512 if (guard_page_list)
513 vm_page_free_list(guard_page_list, FALSE);
514
515 if (wired_page_list)
516 vm_page_free_list(wired_page_list, FALSE);
517
518 return kr;
519 }
520
521 kern_return_t
522 kernel_memory_populate(
523 vm_map_t map,
524 vm_offset_t addr,
525 vm_size_t size,
526 int flags)
527 {
528 vm_object_t object;
529 vm_object_offset_t offset, pg_offset;
530 kern_return_t kr, pe_result;
531 vm_page_t mem;
532 vm_page_t page_list = NULL;
533 int page_count = 0;
534 int i;
535
536 page_count = (int) (size / PAGE_SIZE_64);
537
538 assert((flags & (KMA_COMPRESSOR|KMA_KOBJECT)) != (KMA_COMPRESSOR|KMA_KOBJECT));
539
540 if (flags & KMA_COMPRESSOR) {
541
542 for (i = 0; i < page_count; i++) {
543 for (;;) {
544 mem = vm_page_grab();
545
546 if (mem != VM_PAGE_NULL)
547 break;
548
549 VM_PAGE_WAIT();
550 }
551 mem->pageq.next = (queue_entry_t) page_list;
552 page_list = mem;
553 }
554 offset = addr;
555 object = compressor_object;
556
557 vm_object_lock(object);
558
559 for (pg_offset = 0;
560 pg_offset < size;
561 pg_offset += PAGE_SIZE_64) {
562
563 mem = page_list;
564 page_list = (vm_page_t) mem->pageq.next;
565 mem->pageq.next = NULL;
566
567 vm_page_insert(mem, object, offset + pg_offset);
568 assert(mem->busy);
569
570 PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset, mem,
571 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
572 0, TRUE, PMAP_OPTIONS_NOWAIT, pe_result);
573
574 if (pe_result == KERN_RESOURCE_SHORTAGE) {
575
576 vm_object_unlock(object);
577
578 PMAP_ENTER(kernel_pmap, addr + pg_offset, mem,
579 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
580
581 vm_object_lock(object);
582 }
583 mem->busy = FALSE;
584 mem->pmapped = TRUE;
585 mem->wpmapped = TRUE;
586 mem->compressor = TRUE;
587 }
588 vm_object_unlock(object);
589
590 return KERN_SUCCESS;
591 }
592
593 for (i = 0; i < page_count; i++) {
594 for (;;) {
595 if (flags & KMA_LOMEM)
596 mem = vm_page_grablo();
597 else
598 mem = vm_page_grab();
599
600 if (mem != VM_PAGE_NULL)
601 break;
602
603 if (flags & KMA_NOPAGEWAIT) {
604 kr = KERN_RESOURCE_SHORTAGE;
605 goto out;
606 }
607 if ((flags & KMA_LOMEM) &&
608 (vm_lopage_needed == TRUE)) {
609 kr = KERN_RESOURCE_SHORTAGE;
610 goto out;
611 }
612 VM_PAGE_WAIT();
613 }
614 mem->pageq.next = (queue_entry_t) page_list;
615 page_list = mem;
616 }
617 if (flags & KMA_KOBJECT) {
618 offset = addr;
619 object = kernel_object;
620
621 vm_object_lock(object);
622 } else {
623 /*
624 * If it's not the kernel object, we need to:
625 * lock map;
626 * lookup entry;
627 * lock object;
628 * take reference on object;
629 * unlock map;
630 */
631 panic("kernel_memory_populate(%p,0x%llx,0x%llx,0x%x): "
632 "!KMA_KOBJECT",
633 map, (uint64_t) addr, (uint64_t) size, flags);
634 }
635
636 for (pg_offset = 0;
637 pg_offset < size;
638 pg_offset += PAGE_SIZE_64) {
639
640 if (page_list == NULL)
641 panic("kernel_memory_populate: page_list == NULL");
642
643 mem = page_list;
644 page_list = (vm_page_t) mem->pageq.next;
645 mem->pageq.next = NULL;
646
647 mem->wire_count++;
648
649 vm_page_insert(mem, object, offset + pg_offset);
650
651 mem->busy = FALSE;
652 mem->pmapped = TRUE;
653 mem->wpmapped = TRUE;
654
655 PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset, mem,
656 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
657 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
658 PMAP_OPTIONS_NOWAIT, pe_result);
659
660 if (pe_result == KERN_RESOURCE_SHORTAGE) {
661
662 vm_object_unlock(object);
663
664 PMAP_ENTER(kernel_pmap, addr + pg_offset, mem,
665 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
666 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE);
667
668 vm_object_lock(object);
669 }
670 if (flags & KMA_NOENCRYPT) {
671 bzero(CAST_DOWN(void *, (addr + pg_offset)), PAGE_SIZE);
672 pmap_set_noencrypt(mem->phys_page);
673 }
674 }
675 vm_page_lock_queues();
676 vm_page_wire_count += page_count;
677 vm_page_unlock_queues();
678
679 vm_object_unlock(object);
680
681 return KERN_SUCCESS;
682
683 out:
684 if (page_list)
685 vm_page_free_list(page_list, FALSE);
686
687 return kr;
688 }
689
690
691 void
692 kernel_memory_depopulate(
693 vm_map_t map,
694 vm_offset_t addr,
695 vm_size_t size,
696 int flags)
697 {
698 vm_object_t object;
699 vm_object_offset_t offset, pg_offset;
700 vm_page_t mem;
701 vm_page_t local_freeq = NULL;
702
703 assert((flags & (KMA_COMPRESSOR|KMA_KOBJECT)) != (KMA_COMPRESSOR|KMA_KOBJECT));
704
705 if (flags & KMA_COMPRESSOR) {
706 offset = addr;
707 object = compressor_object;
708
709 vm_object_lock(object);
710 } else if (flags & KMA_KOBJECT) {
711 offset = addr;
712 object = kernel_object;
713
714 vm_object_lock(object);
715 } else {
716 offset = 0;
717 object = NULL;
718 /*
719 * If it's not the kernel object, we need to:
720 * lock map;
721 * lookup entry;
722 * lock object;
723 * unlock map;
724 */
725 panic("kernel_memory_depopulate(%p,0x%llx,0x%llx,0x%x): "
726 "!KMA_KOBJECT",
727 map, (uint64_t) addr, (uint64_t) size, flags);
728 }
729 pmap_protect(kernel_map->pmap, offset, offset + size, VM_PROT_NONE);
730
731 for (pg_offset = 0;
732 pg_offset < size;
733 pg_offset += PAGE_SIZE_64) {
734
735 mem = vm_page_lookup(object, offset + pg_offset);
736
737 assert(mem);
738
739 pmap_disconnect(mem->phys_page);
740
741 mem->busy = TRUE;
742
743 assert(mem->tabled);
744 vm_page_remove(mem, TRUE);
745 assert(mem->busy);
746
747 assert(mem->pageq.next == NULL &&
748 mem->pageq.prev == NULL);
749 mem->pageq.next = (queue_entry_t)local_freeq;
750 local_freeq = mem;
751 }
752 vm_object_unlock(object);
753
754 if (local_freeq)
755 vm_page_free_list(local_freeq, TRUE);
756 }
757
758 /*
759 * kmem_alloc:
760 *
761 * Allocate wired-down memory in the kernel's address map
762 * or a submap. The memory is not zero-filled.
763 */
764
765 kern_return_t
766 kmem_alloc(
767 vm_map_t map,
768 vm_offset_t *addrp,
769 vm_size_t size)
770 {
771 kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, 0);
772 TRACE_MACHLEAKS(KMEM_ALLOC_CODE, KMEM_ALLOC_CODE_2, size, *addrp);
773 return kr;
774 }
775
776 /*
777 * kmem_realloc:
778 *
779 * Reallocate wired-down memory in the kernel's address map
780 * or a submap. Newly allocated pages are not zeroed.
781 * This can only be used on regions allocated with kmem_alloc.
782 *
783 * If successful, the pages in the old region are mapped twice.
784 * The old region is unchanged. Use kmem_free to get rid of it.
785 */
786 kern_return_t
787 kmem_realloc(
788 vm_map_t map,
789 vm_offset_t oldaddr,
790 vm_size_t oldsize,
791 vm_offset_t *newaddrp,
792 vm_size_t newsize)
793 {
794 vm_object_t object;
795 vm_object_offset_t offset;
796 vm_map_offset_t oldmapmin;
797 vm_map_offset_t oldmapmax;
798 vm_map_offset_t newmapaddr;
799 vm_map_size_t oldmapsize;
800 vm_map_size_t newmapsize;
801 vm_map_entry_t oldentry;
802 vm_map_entry_t newentry;
803 vm_page_t mem;
804 kern_return_t kr;
805
806 oldmapmin = vm_map_trunc_page(oldaddr,
807 VM_MAP_PAGE_MASK(map));
808 oldmapmax = vm_map_round_page(oldaddr + oldsize,
809 VM_MAP_PAGE_MASK(map));
810 oldmapsize = oldmapmax - oldmapmin;
811 newmapsize = vm_map_round_page(newsize,
812 VM_MAP_PAGE_MASK(map));
813
814
815 /*
816 * Find the VM object backing the old region.
817 */
818
819 vm_map_lock(map);
820
821 if (!vm_map_lookup_entry(map, oldmapmin, &oldentry))
822 panic("kmem_realloc");
823 object = oldentry->object.vm_object;
824
825 /*
826 * Increase the size of the object and
827 * fill in the new region.
828 */
829
830 vm_object_reference(object);
831 /* by grabbing the object lock before unlocking the map */
832 /* we guarantee that we will panic if more than one */
833 /* attempt is made to realloc a kmem_alloc'd area */
834 vm_object_lock(object);
835 vm_map_unlock(map);
836 if (object->vo_size != oldmapsize)
837 panic("kmem_realloc");
838 object->vo_size = newmapsize;
839 vm_object_unlock(object);
840
841 /* allocate the new pages while expanded portion of the */
842 /* object is still not mapped */
843 kmem_alloc_pages(object, vm_object_round_page(oldmapsize),
844 vm_object_round_page(newmapsize-oldmapsize));
845
846 /*
847 * Find space for the new region.
848 */
849
850 kr = vm_map_find_space(map, &newmapaddr, newmapsize,
851 (vm_map_offset_t) 0, 0, &newentry);
852 if (kr != KERN_SUCCESS) {
853 vm_object_lock(object);
854 for(offset = oldmapsize;
855 offset < newmapsize; offset += PAGE_SIZE) {
856 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
857 VM_PAGE_FREE(mem);
858 }
859 }
860 object->vo_size = oldmapsize;
861 vm_object_unlock(object);
862 vm_object_deallocate(object);
863 return kr;
864 }
865 newentry->object.vm_object = object;
866 newentry->offset = 0;
867 assert (newentry->wired_count == 0);
868
869
870 /* add an extra reference in case we have someone doing an */
871 /* unexpected deallocate */
872 vm_object_reference(object);
873 vm_map_unlock(map);
874
875 kr = vm_map_wire(map, newmapaddr, newmapaddr + newmapsize, VM_PROT_DEFAULT, FALSE);
876 if (KERN_SUCCESS != kr) {
877 vm_map_remove(map, newmapaddr, newmapaddr + newmapsize, 0);
878 vm_object_lock(object);
879 for(offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) {
880 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
881 VM_PAGE_FREE(mem);
882 }
883 }
884 object->vo_size = oldmapsize;
885 vm_object_unlock(object);
886 vm_object_deallocate(object);
887 return (kr);
888 }
889 vm_object_deallocate(object);
890
891 *newaddrp = CAST_DOWN(vm_offset_t, newmapaddr);
892 return KERN_SUCCESS;
893 }
894
895 /*
896 * kmem_alloc_kobject:
897 *
898 * Allocate wired-down memory in the kernel's address map
899 * or a submap. The memory is not zero-filled.
900 *
901 * The memory is allocated in the kernel_object.
902 * It may not be copied with vm_map_copy, and
903 * it may not be reallocated with kmem_realloc.
904 */
905
906 kern_return_t
907 kmem_alloc_kobject(
908 vm_map_t map,
909 vm_offset_t *addrp,
910 vm_size_t size)
911 {
912 return kernel_memory_allocate(map, addrp, size, 0, KMA_KOBJECT);
913 }
914
915 /*
916 * kmem_alloc_aligned:
917 *
918 * Like kmem_alloc_kobject, except that the memory is aligned.
919 * The size should be a power-of-2.
920 */
921
922 kern_return_t
923 kmem_alloc_aligned(
924 vm_map_t map,
925 vm_offset_t *addrp,
926 vm_size_t size)
927 {
928 if ((size & (size - 1)) != 0)
929 panic("kmem_alloc_aligned: size not aligned");
930 return kernel_memory_allocate(map, addrp, size, size - 1, KMA_KOBJECT);
931 }
932
933 /*
934 * kmem_alloc_pageable:
935 *
936 * Allocate pageable memory in the kernel's address map.
937 */
938
939 kern_return_t
940 kmem_alloc_pageable(
941 vm_map_t map,
942 vm_offset_t *addrp,
943 vm_size_t size)
944 {
945 vm_map_offset_t map_addr;
946 vm_map_size_t map_size;
947 kern_return_t kr;
948
949 #ifndef normal
950 map_addr = (vm_map_min(map)) + 0x1000;
951 #else
952 map_addr = vm_map_min(map);
953 #endif
954 map_size = vm_map_round_page(size,
955 VM_MAP_PAGE_MASK(map));
956
957 kr = vm_map_enter(map, &map_addr, map_size,
958 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
959 VM_OBJECT_NULL, (vm_object_offset_t) 0, FALSE,
960 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
961
962 if (kr != KERN_SUCCESS)
963 return kr;
964
965 *addrp = CAST_DOWN(vm_offset_t, map_addr);
966 return KERN_SUCCESS;
967 }
968
969 /*
970 * kmem_free:
971 *
972 * Release a region of kernel virtual memory allocated
973 * with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable,
974 * and return the physical pages associated with that region.
975 */
976
977 void
978 kmem_free(
979 vm_map_t map,
980 vm_offset_t addr,
981 vm_size_t size)
982 {
983 kern_return_t kr;
984
985 assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS);
986
987 TRACE_MACHLEAKS(KMEM_FREE_CODE, KMEM_FREE_CODE_2, size, addr);
988
989 if(size == 0) {
990 #if MACH_ASSERT
991 printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n",map,(uint64_t)addr);
992 #endif
993 return;
994 }
995
996 kr = vm_map_remove(map,
997 vm_map_trunc_page(addr,
998 VM_MAP_PAGE_MASK(map)),
999 vm_map_round_page(addr + size,
1000 VM_MAP_PAGE_MASK(map)),
1001 VM_MAP_REMOVE_KUNWIRE);
1002 if (kr != KERN_SUCCESS)
1003 panic("kmem_free");
1004 }
1005
1006 /*
1007 * Allocate new pages in an object.
1008 */
1009
1010 kern_return_t
1011 kmem_alloc_pages(
1012 register vm_object_t object,
1013 register vm_object_offset_t offset,
1014 register vm_object_size_t size)
1015 {
1016 vm_object_size_t alloc_size;
1017
1018 alloc_size = vm_object_round_page(size);
1019 vm_object_lock(object);
1020 while (alloc_size) {
1021 register vm_page_t mem;
1022
1023
1024 /*
1025 * Allocate a page
1026 */
1027 while (VM_PAGE_NULL ==
1028 (mem = vm_page_alloc(object, offset))) {
1029 vm_object_unlock(object);
1030 VM_PAGE_WAIT();
1031 vm_object_lock(object);
1032 }
1033 mem->busy = FALSE;
1034
1035 alloc_size -= PAGE_SIZE;
1036 offset += PAGE_SIZE;
1037 }
1038 vm_object_unlock(object);
1039 return KERN_SUCCESS;
1040 }
1041
1042 /*
1043 * Remap wired pages in an object into a new region.
1044 * The object is assumed to be mapped into the kernel map or
1045 * a submap.
1046 */
1047 void
1048 kmem_remap_pages(
1049 register vm_object_t object,
1050 register vm_object_offset_t offset,
1051 register vm_offset_t start,
1052 register vm_offset_t end,
1053 vm_prot_t protection)
1054 {
1055
1056 vm_map_offset_t map_start;
1057 vm_map_offset_t map_end;
1058
1059 /*
1060 * Mark the pmap region as not pageable.
1061 */
1062 map_start = vm_map_trunc_page(start,
1063 VM_MAP_PAGE_MASK(kernel_map));
1064 map_end = vm_map_round_page(end,
1065 VM_MAP_PAGE_MASK(kernel_map));
1066
1067 pmap_pageable(kernel_pmap, map_start, map_end, FALSE);
1068
1069 while (map_start < map_end) {
1070 register vm_page_t mem;
1071
1072 vm_object_lock(object);
1073
1074 /*
1075 * Find a page
1076 */
1077 if ((mem = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
1078 panic("kmem_remap_pages");
1079
1080 /*
1081 * Wire it down (again)
1082 */
1083 vm_page_lockspin_queues();
1084 vm_page_wire(mem);
1085 vm_page_unlock_queues();
1086 vm_object_unlock(object);
1087
1088 /*
1089 * ENCRYPTED SWAP:
1090 * The page is supposed to be wired now, so it
1091 * shouldn't be encrypted at this point. It can
1092 * safely be entered in the page table.
1093 */
1094 ASSERT_PAGE_DECRYPTED(mem);
1095
1096 /*
1097 * Enter it in the kernel pmap. The page isn't busy,
1098 * but this shouldn't be a problem because it is wired.
1099 */
1100
1101 mem->pmapped = TRUE;
1102 mem->wpmapped = TRUE;
1103
1104 PMAP_ENTER(kernel_pmap, map_start, mem, protection, VM_PROT_NONE, 0, TRUE);
1105
1106 map_start += PAGE_SIZE;
1107 offset += PAGE_SIZE;
1108 }
1109 }
1110
1111 /*
1112 * kmem_suballoc:
1113 *
1114 * Allocates a map to manage a subrange
1115 * of the kernel virtual address space.
1116 *
1117 * Arguments are as follows:
1118 *
1119 * parent Map to take range from
1120 * addr Address of start of range (IN/OUT)
1121 * size Size of range to find
1122 * pageable Can region be paged
1123 * anywhere Can region be located anywhere in map
1124 * new_map Pointer to new submap
1125 */
1126 kern_return_t
1127 kmem_suballoc(
1128 vm_map_t parent,
1129 vm_offset_t *addr,
1130 vm_size_t size,
1131 boolean_t pageable,
1132 int flags,
1133 vm_map_t *new_map)
1134 {
1135 vm_map_t map;
1136 vm_map_offset_t map_addr;
1137 vm_map_size_t map_size;
1138 kern_return_t kr;
1139
1140 map_size = vm_map_round_page(size,
1141 VM_MAP_PAGE_MASK(parent));
1142
1143 /*
1144 * Need reference on submap object because it is internal
1145 * to the vm_system. vm_object_enter will never be called
1146 * on it (usual source of reference for vm_map_enter).
1147 */
1148 vm_object_reference(vm_submap_object);
1149
1150 map_addr = ((flags & VM_FLAGS_ANYWHERE)
1151 ? vm_map_min(parent)
1152 : vm_map_trunc_page(*addr,
1153 VM_MAP_PAGE_MASK(parent)));
1154
1155 kr = vm_map_enter(parent, &map_addr, map_size,
1156 (vm_map_offset_t) 0, flags,
1157 vm_submap_object, (vm_object_offset_t) 0, FALSE,
1158 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1159 if (kr != KERN_SUCCESS) {
1160 vm_object_deallocate(vm_submap_object);
1161 return (kr);
1162 }
1163
1164 pmap_reference(vm_map_pmap(parent));
1165 map = vm_map_create(vm_map_pmap(parent), map_addr, map_addr + map_size, pageable);
1166 if (map == VM_MAP_NULL)
1167 panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */
1168 /* inherit the parent map's page size */
1169 vm_map_set_page_shift(map, VM_MAP_PAGE_SHIFT(parent));
1170
1171 kr = vm_map_submap(parent, map_addr, map_addr + map_size, map, map_addr, FALSE);
1172 if (kr != KERN_SUCCESS) {
1173 /*
1174 * See comment preceding vm_map_submap().
1175 */
1176 vm_map_remove(parent, map_addr, map_addr + map_size, VM_MAP_NO_FLAGS);
1177 vm_map_deallocate(map); /* also removes ref to pmap */
1178 vm_object_deallocate(vm_submap_object);
1179 return (kr);
1180 }
1181 *addr = CAST_DOWN(vm_offset_t, map_addr);
1182 *new_map = map;
1183 return (KERN_SUCCESS);
1184 }
1185
1186 /*
1187 * kmem_init:
1188 *
1189 * Initialize the kernel's virtual memory map, taking
1190 * into account all memory allocated up to this time.
1191 */
1192 void
1193 kmem_init(
1194 vm_offset_t start,
1195 vm_offset_t end)
1196 {
1197 vm_map_offset_t map_start;
1198 vm_map_offset_t map_end;
1199
1200 map_start = vm_map_trunc_page(start,
1201 VM_MAP_PAGE_MASK(kernel_map));
1202 map_end = vm_map_round_page(end,
1203 VM_MAP_PAGE_MASK(kernel_map));
1204
1205 kernel_map = vm_map_create(pmap_kernel(),VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1206 map_end, FALSE);
1207 /*
1208 * Reserve virtual memory allocated up to this time.
1209 */
1210 if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
1211 vm_map_offset_t map_addr;
1212 kern_return_t kr;
1213
1214 map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
1215 kr = vm_map_enter(kernel_map,
1216 &map_addr,
1217 (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1218 (vm_map_offset_t) 0,
1219 VM_FLAGS_FIXED | VM_FLAGS_NO_PMAP_CHECK,
1220 VM_OBJECT_NULL,
1221 (vm_object_offset_t) 0, FALSE,
1222 VM_PROT_NONE, VM_PROT_NONE,
1223 VM_INHERIT_DEFAULT);
1224
1225 if (kr != KERN_SUCCESS) {
1226 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n",
1227 (uint64_t) start, (uint64_t) end,
1228 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1229 (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1230 kr);
1231 }
1232 }
1233
1234 /*
1235 * Set the default global user wire limit which limits the amount of
1236 * memory that can be locked via mlock(). We set this to the total
1237 * amount of memory that are potentially usable by a user app (max_mem)
1238 * minus a certain amount. This can be overridden via a sysctl.
1239 */
1240 vm_global_no_user_wire_amount = MIN(max_mem*20/100,
1241 VM_NOT_USER_WIREABLE);
1242 vm_global_user_wire_limit = max_mem - vm_global_no_user_wire_amount;
1243
1244 /* the default per user limit is the same as the global limit */
1245 vm_user_wire_limit = vm_global_user_wire_limit;
1246 }
1247
1248
1249 /*
1250 * Routine: copyinmap
1251 * Purpose:
1252 * Like copyin, except that fromaddr is an address
1253 * in the specified VM map. This implementation
1254 * is incomplete; it handles the current user map
1255 * and the kernel map/submaps.
1256 */
1257 kern_return_t
1258 copyinmap(
1259 vm_map_t map,
1260 vm_map_offset_t fromaddr,
1261 void *todata,
1262 vm_size_t length)
1263 {
1264 kern_return_t kr = KERN_SUCCESS;
1265 vm_map_t oldmap;
1266
1267 if (vm_map_pmap(map) == pmap_kernel())
1268 {
1269 /* assume a correct copy */
1270 memcpy(todata, CAST_DOWN(void *, fromaddr), length);
1271 }
1272 else if (current_map() == map)
1273 {
1274 if (copyin(fromaddr, todata, length) != 0)
1275 kr = KERN_INVALID_ADDRESS;
1276 }
1277 else
1278 {
1279 vm_map_reference(map);
1280 oldmap = vm_map_switch(map);
1281 if (copyin(fromaddr, todata, length) != 0)
1282 kr = KERN_INVALID_ADDRESS;
1283 vm_map_switch(oldmap);
1284 vm_map_deallocate(map);
1285 }
1286 return kr;
1287 }
1288
1289 /*
1290 * Routine: copyoutmap
1291 * Purpose:
1292 * Like copyout, except that toaddr is an address
1293 * in the specified VM map. This implementation
1294 * is incomplete; it handles the current user map
1295 * and the kernel map/submaps.
1296 */
1297 kern_return_t
1298 copyoutmap(
1299 vm_map_t map,
1300 void *fromdata,
1301 vm_map_address_t toaddr,
1302 vm_size_t length)
1303 {
1304 if (vm_map_pmap(map) == pmap_kernel()) {
1305 /* assume a correct copy */
1306 memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
1307 return KERN_SUCCESS;
1308 }
1309
1310 if (current_map() != map)
1311 return KERN_NOT_SUPPORTED;
1312
1313 if (copyout(fromdata, toaddr, length) != 0)
1314 return KERN_INVALID_ADDRESS;
1315
1316 return KERN_SUCCESS;
1317 }
1318
1319
1320 kern_return_t
1321 vm_conflict_check(
1322 vm_map_t map,
1323 vm_map_offset_t off,
1324 vm_map_size_t len,
1325 memory_object_t pager,
1326 vm_object_offset_t file_off)
1327 {
1328 vm_map_entry_t entry;
1329 vm_object_t obj;
1330 vm_object_offset_t obj_off;
1331 vm_map_t base_map;
1332 vm_map_offset_t base_offset;
1333 vm_map_offset_t original_offset;
1334 kern_return_t kr;
1335 vm_map_size_t local_len;
1336
1337 base_map = map;
1338 base_offset = off;
1339 original_offset = off;
1340 kr = KERN_SUCCESS;
1341 vm_map_lock(map);
1342 while(vm_map_lookup_entry(map, off, &entry)) {
1343 local_len = len;
1344
1345 if (entry->object.vm_object == VM_OBJECT_NULL) {
1346 vm_map_unlock(map);
1347 return KERN_SUCCESS;
1348 }
1349 if (entry->is_sub_map) {
1350 vm_map_t old_map;
1351
1352 old_map = map;
1353 vm_map_lock(entry->object.sub_map);
1354 map = entry->object.sub_map;
1355 off = entry->offset + (off - entry->vme_start);
1356 vm_map_unlock(old_map);
1357 continue;
1358 }
1359 obj = entry->object.vm_object;
1360 obj_off = (off - entry->vme_start) + entry->offset;
1361 while(obj->shadow) {
1362 obj_off += obj->vo_shadow_offset;
1363 obj = obj->shadow;
1364 }
1365 if((obj->pager_created) && (obj->pager == pager)) {
1366 if(((obj->paging_offset) + obj_off) == file_off) {
1367 if(off != base_offset) {
1368 vm_map_unlock(map);
1369 return KERN_FAILURE;
1370 }
1371 kr = KERN_ALREADY_WAITING;
1372 } else {
1373 vm_object_offset_t obj_off_aligned;
1374 vm_object_offset_t file_off_aligned;
1375
1376 obj_off_aligned = obj_off & ~PAGE_MASK;
1377 file_off_aligned = file_off & ~PAGE_MASK;
1378
1379 if (file_off_aligned == (obj->paging_offset + obj_off_aligned)) {
1380 /*
1381 * the target map and the file offset start in the same page
1382 * but are not identical...
1383 */
1384 vm_map_unlock(map);
1385 return KERN_FAILURE;
1386 }
1387 if ((file_off < (obj->paging_offset + obj_off_aligned)) &&
1388 ((file_off + len) > (obj->paging_offset + obj_off_aligned))) {
1389 /*
1390 * some portion of the tail of the I/O will fall
1391 * within the encompass of the target map
1392 */
1393 vm_map_unlock(map);
1394 return KERN_FAILURE;
1395 }
1396 if ((file_off_aligned > (obj->paging_offset + obj_off)) &&
1397 (file_off_aligned < (obj->paging_offset + obj_off) + len)) {
1398 /*
1399 * the beginning page of the file offset falls within
1400 * the target map's encompass
1401 */
1402 vm_map_unlock(map);
1403 return KERN_FAILURE;
1404 }
1405 }
1406 } else if(kr != KERN_SUCCESS) {
1407 vm_map_unlock(map);
1408 return KERN_FAILURE;
1409 }
1410
1411 if(len <= ((entry->vme_end - entry->vme_start) -
1412 (off - entry->vme_start))) {
1413 vm_map_unlock(map);
1414 return kr;
1415 } else {
1416 len -= (entry->vme_end - entry->vme_start) -
1417 (off - entry->vme_start);
1418 }
1419 base_offset = base_offset + (local_len - len);
1420 file_off = file_off + (local_len - len);
1421 off = base_offset;
1422 if(map != base_map) {
1423 vm_map_unlock(map);
1424 vm_map_lock(base_map);
1425 map = base_map;
1426 }
1427 }
1428
1429 vm_map_unlock(map);
1430 return kr;
1431 }