]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_kern.c
xnu-2782.20.48.tar.gz
[apple/xnu.git] / osfmk / vm / vm_kern.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_kern.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Kernel memory management.
64 */
65
66 #include <mach/kern_return.h>
67 #include <mach/vm_param.h>
68 #include <kern/assert.h>
69 #include <kern/thread.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_map.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_pageout.h>
75 #include <kern/misc_protos.h>
76 #include <vm/cpm.h>
77
78 #include <string.h>
79
80 #include <libkern/OSDebug.h>
81 #include <sys/kdebug.h>
82
83 /*
84 * Variables exported by this module.
85 */
86
87 vm_map_t kernel_map;
88 vm_map_t kernel_pageable_map;
89
90 extern boolean_t vm_kernel_ready;
91
92 /*
93 * Forward declarations for internal functions.
94 */
95 extern kern_return_t kmem_alloc_pages(
96 register vm_object_t object,
97 register vm_object_offset_t offset,
98 register vm_object_size_t size);
99
100 extern void kmem_remap_pages(
101 register vm_object_t object,
102 register vm_object_offset_t offset,
103 register vm_offset_t start,
104 register vm_offset_t end,
105 vm_prot_t protection);
106
107 kern_return_t
108 kmem_alloc_contig(
109 vm_map_t map,
110 vm_offset_t *addrp,
111 vm_size_t size,
112 vm_offset_t mask,
113 ppnum_t max_pnum,
114 ppnum_t pnum_mask,
115 int flags)
116 {
117 vm_object_t object;
118 vm_object_offset_t offset;
119 vm_map_offset_t map_addr;
120 vm_map_offset_t map_mask;
121 vm_map_size_t map_size, i;
122 vm_map_entry_t entry;
123 vm_page_t m, pages;
124 kern_return_t kr;
125
126 if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT)))
127 return KERN_INVALID_ARGUMENT;
128
129 map_size = vm_map_round_page(size,
130 VM_MAP_PAGE_MASK(map));
131 map_mask = (vm_map_offset_t)mask;
132
133 /* Check for zero allocation size (either directly or via overflow) */
134 if (map_size == 0) {
135 *addrp = 0;
136 return KERN_INVALID_ARGUMENT;
137 }
138
139 /*
140 * Allocate a new object (if necessary) and the reference we
141 * will be donating to the map entry. We must do this before
142 * locking the map, or risk deadlock with the default pager.
143 */
144 if ((flags & KMA_KOBJECT) != 0) {
145 object = kernel_object;
146 vm_object_reference(object);
147 } else {
148 object = vm_object_allocate(map_size);
149 }
150
151 kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0, &entry);
152 if (KERN_SUCCESS != kr) {
153 vm_object_deallocate(object);
154 return kr;
155 }
156
157 entry->object.vm_object = object;
158 entry->offset = offset = (object == kernel_object) ?
159 map_addr : 0;
160
161 /* Take an extra object ref in case the map entry gets deleted */
162 vm_object_reference(object);
163 vm_map_unlock(map);
164
165 kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags);
166
167 if (kr != KERN_SUCCESS) {
168 vm_map_remove(map,
169 vm_map_trunc_page(map_addr,
170 VM_MAP_PAGE_MASK(map)),
171 vm_map_round_page(map_addr + map_size,
172 VM_MAP_PAGE_MASK(map)),
173 0);
174 vm_object_deallocate(object);
175 *addrp = 0;
176 return kr;
177 }
178
179 vm_object_lock(object);
180 for (i = 0; i < map_size; i += PAGE_SIZE) {
181 m = pages;
182 pages = NEXT_PAGE(m);
183 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
184 m->busy = FALSE;
185 vm_page_insert(m, object, offset + i);
186 }
187 vm_object_unlock(object);
188
189 kr = vm_map_wire(map,
190 vm_map_trunc_page(map_addr,
191 VM_MAP_PAGE_MASK(map)),
192 vm_map_round_page(map_addr + map_size,
193 VM_MAP_PAGE_MASK(map)),
194 VM_PROT_DEFAULT,
195 FALSE);
196 if (kr != KERN_SUCCESS) {
197 if (object == kernel_object) {
198 vm_object_lock(object);
199 vm_object_page_remove(object, offset, offset + map_size);
200 vm_object_unlock(object);
201 }
202 vm_map_remove(map,
203 vm_map_trunc_page(map_addr,
204 VM_MAP_PAGE_MASK(map)),
205 vm_map_round_page(map_addr + map_size,
206 VM_MAP_PAGE_MASK(map)),
207 0);
208 vm_object_deallocate(object);
209 return kr;
210 }
211 vm_object_deallocate(object);
212
213 if (object == kernel_object)
214 vm_map_simplify(map, map_addr);
215
216 *addrp = (vm_offset_t) map_addr;
217 assert((vm_map_offset_t) *addrp == map_addr);
218 return KERN_SUCCESS;
219 }
220
221 /*
222 * Master entry point for allocating kernel memory.
223 * NOTE: this routine is _never_ interrupt safe.
224 *
225 * map : map to allocate into
226 * addrp : pointer to start address of new memory
227 * size : size of memory requested
228 * flags : options
229 * KMA_HERE *addrp is base address, else "anywhere"
230 * KMA_NOPAGEWAIT don't wait for pages if unavailable
231 * KMA_KOBJECT use kernel_object
232 * KMA_LOMEM support for 32 bit devices in a 64 bit world
233 * if set and a lomemory pool is available
234 * grab pages from it... this also implies
235 * KMA_NOPAGEWAIT
236 */
237
238 kern_return_t
239 kernel_memory_allocate(
240 register vm_map_t map,
241 register vm_offset_t *addrp,
242 register vm_size_t size,
243 register vm_offset_t mask,
244 int flags)
245 {
246 vm_object_t object;
247 vm_object_offset_t offset;
248 vm_object_offset_t pg_offset;
249 vm_map_entry_t entry = NULL;
250 vm_map_offset_t map_addr, fill_start;
251 vm_map_offset_t map_mask;
252 vm_map_size_t map_size, fill_size;
253 kern_return_t kr, pe_result;
254 vm_page_t mem;
255 vm_page_t guard_page_list = NULL;
256 vm_page_t wired_page_list = NULL;
257 int guard_page_count = 0;
258 int wired_page_count = 0;
259 int i;
260 int vm_alloc_flags;
261 vm_prot_t kma_prot;
262
263 if (! vm_kernel_ready) {
264 panic("kernel_memory_allocate: VM is not ready");
265 }
266
267 map_size = vm_map_round_page(size,
268 VM_MAP_PAGE_MASK(map));
269 map_mask = (vm_map_offset_t) mask;
270 vm_alloc_flags = 0;
271
272 /* Check for zero allocation size (either directly or via overflow) */
273 if (map_size == 0) {
274 *addrp = 0;
275 return KERN_INVALID_ARGUMENT;
276 }
277
278 /*
279 * limit the size of a single extent of wired memory
280 * to try and limit the damage to the system if
281 * too many pages get wired down
282 * limit raised to 2GB with 128GB max physical limit
283 */
284 if (map_size > (1ULL << 31)) {
285 return KERN_RESOURCE_SHORTAGE;
286 }
287
288 /*
289 * Guard pages:
290 *
291 * Guard pages are implemented as ficticious pages. By placing guard pages
292 * on either end of a stack, they can help detect cases where a thread walks
293 * off either end of its stack. They are allocated and set up here and attempts
294 * to access those pages are trapped in vm_fault_page().
295 *
296 * The map_size we were passed may include extra space for
297 * guard pages. If those were requested, then back it out of fill_size
298 * since vm_map_find_space() takes just the actual size not including
299 * guard pages. Similarly, fill_start indicates where the actual pages
300 * will begin in the range.
301 */
302
303 fill_start = 0;
304 fill_size = map_size;
305
306 if (flags & KMA_GUARD_FIRST) {
307 vm_alloc_flags |= VM_FLAGS_GUARD_BEFORE;
308 fill_start += PAGE_SIZE_64;
309 fill_size -= PAGE_SIZE_64;
310 if (map_size < fill_start + fill_size) {
311 /* no space for a guard page */
312 *addrp = 0;
313 return KERN_INVALID_ARGUMENT;
314 }
315 guard_page_count++;
316 }
317 if (flags & KMA_GUARD_LAST) {
318 vm_alloc_flags |= VM_FLAGS_GUARD_AFTER;
319 fill_size -= PAGE_SIZE_64;
320 if (map_size <= fill_start + fill_size) {
321 /* no space for a guard page */
322 *addrp = 0;
323 return KERN_INVALID_ARGUMENT;
324 }
325 guard_page_count++;
326 }
327 wired_page_count = (int) (fill_size / PAGE_SIZE_64);
328 assert(wired_page_count * PAGE_SIZE_64 == fill_size);
329
330 for (i = 0; i < guard_page_count; i++) {
331 for (;;) {
332 mem = vm_page_grab_guard();
333
334 if (mem != VM_PAGE_NULL)
335 break;
336 if (flags & KMA_NOPAGEWAIT) {
337 kr = KERN_RESOURCE_SHORTAGE;
338 goto out;
339 }
340 vm_page_more_fictitious();
341 }
342 mem->pageq.next = (queue_entry_t)guard_page_list;
343 guard_page_list = mem;
344 }
345
346 if (! (flags & KMA_VAONLY)) {
347 for (i = 0; i < wired_page_count; i++) {
348 uint64_t unavailable;
349
350 for (;;) {
351 if (flags & KMA_LOMEM)
352 mem = vm_page_grablo();
353 else
354 mem = vm_page_grab();
355
356 if (mem != VM_PAGE_NULL)
357 break;
358
359 if (flags & KMA_NOPAGEWAIT) {
360 kr = KERN_RESOURCE_SHORTAGE;
361 goto out;
362 }
363 if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) {
364 kr = KERN_RESOURCE_SHORTAGE;
365 goto out;
366 }
367 unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE;
368
369 if (unavailable > max_mem || map_size > (max_mem - unavailable)) {
370 kr = KERN_RESOURCE_SHORTAGE;
371 goto out;
372 }
373 VM_PAGE_WAIT();
374 }
375 mem->pageq.next = (queue_entry_t)wired_page_list;
376 wired_page_list = mem;
377 }
378 }
379
380 /*
381 * Allocate a new object (if necessary). We must do this before
382 * locking the map, or risk deadlock with the default pager.
383 */
384 if ((flags & KMA_KOBJECT) != 0) {
385 object = kernel_object;
386 vm_object_reference(object);
387 } else if ((flags & KMA_COMPRESSOR) != 0) {
388 object = compressor_object;
389 vm_object_reference(object);
390 } else {
391 object = vm_object_allocate(map_size);
392 }
393
394 kr = vm_map_find_space(map, &map_addr,
395 fill_size, map_mask,
396 vm_alloc_flags, &entry);
397 if (KERN_SUCCESS != kr) {
398 vm_object_deallocate(object);
399 goto out;
400 }
401
402 entry->object.vm_object = object;
403 entry->offset = offset = (object == kernel_object || object == compressor_object) ?
404 map_addr : 0;
405
406 if (object != compressor_object)
407 entry->wired_count++;
408
409 if (flags & KMA_PERMANENT)
410 entry->permanent = TRUE;
411
412 if (object != kernel_object && object != compressor_object)
413 vm_object_reference(object);
414
415 vm_object_lock(object);
416 vm_map_unlock(map);
417
418 pg_offset = 0;
419
420 if (fill_start) {
421 if (guard_page_list == NULL)
422 panic("kernel_memory_allocate: guard_page_list == NULL");
423
424 mem = guard_page_list;
425 guard_page_list = (vm_page_t)mem->pageq.next;
426 mem->pageq.next = NULL;
427
428 vm_page_insert(mem, object, offset + pg_offset);
429
430 mem->busy = FALSE;
431 pg_offset += PAGE_SIZE_64;
432 }
433
434 kma_prot = VM_PROT_READ | VM_PROT_WRITE;
435
436 if (flags & KMA_VAONLY) {
437 pg_offset = fill_start + fill_size;
438 } else {
439 for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) {
440 if (wired_page_list == NULL)
441 panic("kernel_memory_allocate: wired_page_list == NULL");
442
443 mem = wired_page_list;
444 wired_page_list = (vm_page_t)mem->pageq.next;
445 mem->pageq.next = NULL;
446 mem->wire_count++;
447
448 vm_page_insert(mem, object, offset + pg_offset);
449
450 mem->busy = FALSE;
451 mem->pmapped = TRUE;
452 mem->wpmapped = TRUE;
453
454 PMAP_ENTER_OPTIONS(kernel_pmap, map_addr + pg_offset, mem,
455 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
456 PMAP_OPTIONS_NOWAIT, pe_result);
457
458 if (pe_result == KERN_RESOURCE_SHORTAGE) {
459 vm_object_unlock(object);
460
461 PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem,
462 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE);
463
464 vm_object_lock(object);
465 }
466 if (flags & KMA_NOENCRYPT) {
467 bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE);
468
469 pmap_set_noencrypt(mem->phys_page);
470 }
471 }
472 }
473 if ((fill_start + fill_size) < map_size) {
474 if (guard_page_list == NULL)
475 panic("kernel_memory_allocate: guard_page_list == NULL");
476
477 mem = guard_page_list;
478 guard_page_list = (vm_page_t)mem->pageq.next;
479 mem->pageq.next = NULL;
480
481 vm_page_insert(mem, object, offset + pg_offset);
482
483 mem->busy = FALSE;
484 }
485 if (guard_page_list || wired_page_list)
486 panic("kernel_memory_allocate: non empty list\n");
487
488 if (! (flags & KMA_VAONLY)) {
489 vm_page_lockspin_queues();
490 vm_page_wire_count += wired_page_count;
491 vm_page_unlock_queues();
492 }
493
494 vm_object_unlock(object);
495
496 /*
497 * now that the pages are wired, we no longer have to fear coalesce
498 */
499 if (object == kernel_object || object == compressor_object)
500 vm_map_simplify(map, map_addr);
501 else
502 vm_object_deallocate(object);
503
504 /*
505 * Return the memory, not zeroed.
506 */
507 *addrp = CAST_DOWN(vm_offset_t, map_addr);
508 return KERN_SUCCESS;
509
510 out:
511 if (guard_page_list)
512 vm_page_free_list(guard_page_list, FALSE);
513
514 if (wired_page_list)
515 vm_page_free_list(wired_page_list, FALSE);
516
517 return kr;
518 }
519
520 kern_return_t
521 kernel_memory_populate(
522 vm_map_t map,
523 vm_offset_t addr,
524 vm_size_t size,
525 int flags)
526 {
527 vm_object_t object;
528 vm_object_offset_t offset, pg_offset;
529 kern_return_t kr, pe_result;
530 vm_page_t mem;
531 vm_page_t page_list = NULL;
532 int page_count = 0;
533 int i;
534
535 page_count = (int) (size / PAGE_SIZE_64);
536
537 assert((flags & (KMA_COMPRESSOR|KMA_KOBJECT)) != (KMA_COMPRESSOR|KMA_KOBJECT));
538
539 if (flags & KMA_COMPRESSOR) {
540
541 for (i = 0; i < page_count; i++) {
542 for (;;) {
543 mem = vm_page_grab();
544
545 if (mem != VM_PAGE_NULL)
546 break;
547
548 VM_PAGE_WAIT();
549 }
550 mem->pageq.next = (queue_entry_t) page_list;
551 page_list = mem;
552 }
553 offset = addr;
554 object = compressor_object;
555
556 vm_object_lock(object);
557
558 for (pg_offset = 0;
559 pg_offset < size;
560 pg_offset += PAGE_SIZE_64) {
561
562 mem = page_list;
563 page_list = (vm_page_t) mem->pageq.next;
564 mem->pageq.next = NULL;
565
566 vm_page_insert(mem, object, offset + pg_offset);
567 assert(mem->busy);
568
569 PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset, mem,
570 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
571 0, TRUE, PMAP_OPTIONS_NOWAIT, pe_result);
572
573 if (pe_result == KERN_RESOURCE_SHORTAGE) {
574
575 vm_object_unlock(object);
576
577 PMAP_ENTER(kernel_pmap, addr + pg_offset, mem,
578 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
579
580 vm_object_lock(object);
581 }
582 mem->busy = FALSE;
583 mem->pmapped = TRUE;
584 mem->wpmapped = TRUE;
585 mem->compressor = TRUE;
586 }
587 vm_object_unlock(object);
588
589 return KERN_SUCCESS;
590 }
591
592 for (i = 0; i < page_count; i++) {
593 for (;;) {
594 if (flags & KMA_LOMEM)
595 mem = vm_page_grablo();
596 else
597 mem = vm_page_grab();
598
599 if (mem != VM_PAGE_NULL)
600 break;
601
602 if (flags & KMA_NOPAGEWAIT) {
603 kr = KERN_RESOURCE_SHORTAGE;
604 goto out;
605 }
606 if ((flags & KMA_LOMEM) &&
607 (vm_lopage_needed == TRUE)) {
608 kr = KERN_RESOURCE_SHORTAGE;
609 goto out;
610 }
611 VM_PAGE_WAIT();
612 }
613 mem->pageq.next = (queue_entry_t) page_list;
614 page_list = mem;
615 }
616 if (flags & KMA_KOBJECT) {
617 offset = addr;
618 object = kernel_object;
619
620 vm_object_lock(object);
621 } else {
622 /*
623 * If it's not the kernel object, we need to:
624 * lock map;
625 * lookup entry;
626 * lock object;
627 * take reference on object;
628 * unlock map;
629 */
630 panic("kernel_memory_populate(%p,0x%llx,0x%llx,0x%x): "
631 "!KMA_KOBJECT",
632 map, (uint64_t) addr, (uint64_t) size, flags);
633 }
634
635 for (pg_offset = 0;
636 pg_offset < size;
637 pg_offset += PAGE_SIZE_64) {
638
639 if (page_list == NULL)
640 panic("kernel_memory_populate: page_list == NULL");
641
642 mem = page_list;
643 page_list = (vm_page_t) mem->pageq.next;
644 mem->pageq.next = NULL;
645
646 mem->wire_count++;
647
648 vm_page_insert(mem, object, offset + pg_offset);
649
650 mem->busy = FALSE;
651 mem->pmapped = TRUE;
652 mem->wpmapped = TRUE;
653
654 PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset, mem,
655 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
656 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
657 PMAP_OPTIONS_NOWAIT, pe_result);
658
659 if (pe_result == KERN_RESOURCE_SHORTAGE) {
660
661 vm_object_unlock(object);
662
663 PMAP_ENTER(kernel_pmap, addr + pg_offset, mem,
664 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
665 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE);
666
667 vm_object_lock(object);
668 }
669 if (flags & KMA_NOENCRYPT) {
670 bzero(CAST_DOWN(void *, (addr + pg_offset)), PAGE_SIZE);
671 pmap_set_noencrypt(mem->phys_page);
672 }
673 }
674 vm_page_lock_queues();
675 vm_page_wire_count += page_count;
676 vm_page_unlock_queues();
677
678 vm_object_unlock(object);
679
680 return KERN_SUCCESS;
681
682 out:
683 if (page_list)
684 vm_page_free_list(page_list, FALSE);
685
686 return kr;
687 }
688
689
690 void
691 kernel_memory_depopulate(
692 vm_map_t map,
693 vm_offset_t addr,
694 vm_size_t size,
695 int flags)
696 {
697 vm_object_t object;
698 vm_object_offset_t offset, pg_offset;
699 vm_page_t mem;
700 vm_page_t local_freeq = NULL;
701
702 assert((flags & (KMA_COMPRESSOR|KMA_KOBJECT)) != (KMA_COMPRESSOR|KMA_KOBJECT));
703
704 if (flags & KMA_COMPRESSOR) {
705 offset = addr;
706 object = compressor_object;
707
708 vm_object_lock(object);
709 } else if (flags & KMA_KOBJECT) {
710 offset = addr;
711 object = kernel_object;
712
713 vm_object_lock(object);
714 } else {
715 offset = 0;
716 object = NULL;
717 /*
718 * If it's not the kernel object, we need to:
719 * lock map;
720 * lookup entry;
721 * lock object;
722 * unlock map;
723 */
724 panic("kernel_memory_depopulate(%p,0x%llx,0x%llx,0x%x): "
725 "!KMA_KOBJECT",
726 map, (uint64_t) addr, (uint64_t) size, flags);
727 }
728 pmap_protect(kernel_map->pmap, offset, offset + size, VM_PROT_NONE);
729
730 for (pg_offset = 0;
731 pg_offset < size;
732 pg_offset += PAGE_SIZE_64) {
733
734 mem = vm_page_lookup(object, offset + pg_offset);
735
736 assert(mem);
737
738 pmap_disconnect(mem->phys_page);
739
740 mem->busy = TRUE;
741
742 assert(mem->tabled);
743 vm_page_remove(mem, TRUE);
744 assert(mem->busy);
745
746 assert(mem->pageq.next == NULL &&
747 mem->pageq.prev == NULL);
748 mem->pageq.next = (queue_entry_t)local_freeq;
749 local_freeq = mem;
750 }
751 vm_object_unlock(object);
752
753 if (local_freeq)
754 vm_page_free_list(local_freeq, TRUE);
755 }
756
757 /*
758 * kmem_alloc:
759 *
760 * Allocate wired-down memory in the kernel's address map
761 * or a submap. The memory is not zero-filled.
762 */
763
764 kern_return_t
765 kmem_alloc(
766 vm_map_t map,
767 vm_offset_t *addrp,
768 vm_size_t size)
769 {
770 kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, 0);
771 TRACE_MACHLEAKS(KMEM_ALLOC_CODE, KMEM_ALLOC_CODE_2, size, *addrp);
772 return kr;
773 }
774
775 /*
776 * kmem_realloc:
777 *
778 * Reallocate wired-down memory in the kernel's address map
779 * or a submap. Newly allocated pages are not zeroed.
780 * This can only be used on regions allocated with kmem_alloc.
781 *
782 * If successful, the pages in the old region are mapped twice.
783 * The old region is unchanged. Use kmem_free to get rid of it.
784 */
785 kern_return_t
786 kmem_realloc(
787 vm_map_t map,
788 vm_offset_t oldaddr,
789 vm_size_t oldsize,
790 vm_offset_t *newaddrp,
791 vm_size_t newsize)
792 {
793 vm_object_t object;
794 vm_object_offset_t offset;
795 vm_map_offset_t oldmapmin;
796 vm_map_offset_t oldmapmax;
797 vm_map_offset_t newmapaddr;
798 vm_map_size_t oldmapsize;
799 vm_map_size_t newmapsize;
800 vm_map_entry_t oldentry;
801 vm_map_entry_t newentry;
802 vm_page_t mem;
803 kern_return_t kr;
804
805 oldmapmin = vm_map_trunc_page(oldaddr,
806 VM_MAP_PAGE_MASK(map));
807 oldmapmax = vm_map_round_page(oldaddr + oldsize,
808 VM_MAP_PAGE_MASK(map));
809 oldmapsize = oldmapmax - oldmapmin;
810 newmapsize = vm_map_round_page(newsize,
811 VM_MAP_PAGE_MASK(map));
812
813
814 /*
815 * Find the VM object backing the old region.
816 */
817
818 vm_map_lock(map);
819
820 if (!vm_map_lookup_entry(map, oldmapmin, &oldentry))
821 panic("kmem_realloc");
822 object = oldentry->object.vm_object;
823
824 /*
825 * Increase the size of the object and
826 * fill in the new region.
827 */
828
829 vm_object_reference(object);
830 /* by grabbing the object lock before unlocking the map */
831 /* we guarantee that we will panic if more than one */
832 /* attempt is made to realloc a kmem_alloc'd area */
833 vm_object_lock(object);
834 vm_map_unlock(map);
835 if (object->vo_size != oldmapsize)
836 panic("kmem_realloc");
837 object->vo_size = newmapsize;
838 vm_object_unlock(object);
839
840 /* allocate the new pages while expanded portion of the */
841 /* object is still not mapped */
842 kmem_alloc_pages(object, vm_object_round_page(oldmapsize),
843 vm_object_round_page(newmapsize-oldmapsize));
844
845 /*
846 * Find space for the new region.
847 */
848
849 kr = vm_map_find_space(map, &newmapaddr, newmapsize,
850 (vm_map_offset_t) 0, 0, &newentry);
851 if (kr != KERN_SUCCESS) {
852 vm_object_lock(object);
853 for(offset = oldmapsize;
854 offset < newmapsize; offset += PAGE_SIZE) {
855 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
856 VM_PAGE_FREE(mem);
857 }
858 }
859 object->vo_size = oldmapsize;
860 vm_object_unlock(object);
861 vm_object_deallocate(object);
862 return kr;
863 }
864 newentry->object.vm_object = object;
865 newentry->offset = 0;
866 assert (newentry->wired_count == 0);
867
868
869 /* add an extra reference in case we have someone doing an */
870 /* unexpected deallocate */
871 vm_object_reference(object);
872 vm_map_unlock(map);
873
874 kr = vm_map_wire(map, newmapaddr, newmapaddr + newmapsize, VM_PROT_DEFAULT, FALSE);
875 if (KERN_SUCCESS != kr) {
876 vm_map_remove(map, newmapaddr, newmapaddr + newmapsize, 0);
877 vm_object_lock(object);
878 for(offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) {
879 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
880 VM_PAGE_FREE(mem);
881 }
882 }
883 object->vo_size = oldmapsize;
884 vm_object_unlock(object);
885 vm_object_deallocate(object);
886 return (kr);
887 }
888 vm_object_deallocate(object);
889
890 *newaddrp = CAST_DOWN(vm_offset_t, newmapaddr);
891 return KERN_SUCCESS;
892 }
893
894 /*
895 * kmem_alloc_kobject:
896 *
897 * Allocate wired-down memory in the kernel's address map
898 * or a submap. The memory is not zero-filled.
899 *
900 * The memory is allocated in the kernel_object.
901 * It may not be copied with vm_map_copy, and
902 * it may not be reallocated with kmem_realloc.
903 */
904
905 kern_return_t
906 kmem_alloc_kobject(
907 vm_map_t map,
908 vm_offset_t *addrp,
909 vm_size_t size)
910 {
911 return kernel_memory_allocate(map, addrp, size, 0, KMA_KOBJECT);
912 }
913
914 /*
915 * kmem_alloc_aligned:
916 *
917 * Like kmem_alloc_kobject, except that the memory is aligned.
918 * The size should be a power-of-2.
919 */
920
921 kern_return_t
922 kmem_alloc_aligned(
923 vm_map_t map,
924 vm_offset_t *addrp,
925 vm_size_t size)
926 {
927 if ((size & (size - 1)) != 0)
928 panic("kmem_alloc_aligned: size not aligned");
929 return kernel_memory_allocate(map, addrp, size, size - 1, KMA_KOBJECT);
930 }
931
932 /*
933 * kmem_alloc_pageable:
934 *
935 * Allocate pageable memory in the kernel's address map.
936 */
937
938 kern_return_t
939 kmem_alloc_pageable(
940 vm_map_t map,
941 vm_offset_t *addrp,
942 vm_size_t size)
943 {
944 vm_map_offset_t map_addr;
945 vm_map_size_t map_size;
946 kern_return_t kr;
947
948 #ifndef normal
949 map_addr = (vm_map_min(map)) + PAGE_SIZE;
950 #else
951 map_addr = vm_map_min(map);
952 #endif
953 map_size = vm_map_round_page(size,
954 VM_MAP_PAGE_MASK(map));
955
956 kr = vm_map_enter(map, &map_addr, map_size,
957 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
958 VM_OBJECT_NULL, (vm_object_offset_t) 0, FALSE,
959 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
960
961 if (kr != KERN_SUCCESS)
962 return kr;
963
964 *addrp = CAST_DOWN(vm_offset_t, map_addr);
965 return KERN_SUCCESS;
966 }
967
968 /*
969 * kmem_free:
970 *
971 * Release a region of kernel virtual memory allocated
972 * with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable,
973 * and return the physical pages associated with that region.
974 */
975
976 void
977 kmem_free(
978 vm_map_t map,
979 vm_offset_t addr,
980 vm_size_t size)
981 {
982 kern_return_t kr;
983
984 assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS);
985
986 TRACE_MACHLEAKS(KMEM_FREE_CODE, KMEM_FREE_CODE_2, size, addr);
987
988 if(size == 0) {
989 #if MACH_ASSERT
990 printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n",map,(uint64_t)addr);
991 #endif
992 return;
993 }
994
995 kr = vm_map_remove(map,
996 vm_map_trunc_page(addr,
997 VM_MAP_PAGE_MASK(map)),
998 vm_map_round_page(addr + size,
999 VM_MAP_PAGE_MASK(map)),
1000 VM_MAP_REMOVE_KUNWIRE);
1001 if (kr != KERN_SUCCESS)
1002 panic("kmem_free");
1003 }
1004
1005 /*
1006 * Allocate new pages in an object.
1007 */
1008
1009 kern_return_t
1010 kmem_alloc_pages(
1011 register vm_object_t object,
1012 register vm_object_offset_t offset,
1013 register vm_object_size_t size)
1014 {
1015 vm_object_size_t alloc_size;
1016
1017 alloc_size = vm_object_round_page(size);
1018 vm_object_lock(object);
1019 while (alloc_size) {
1020 register vm_page_t mem;
1021
1022
1023 /*
1024 * Allocate a page
1025 */
1026 while (VM_PAGE_NULL ==
1027 (mem = vm_page_alloc(object, offset))) {
1028 vm_object_unlock(object);
1029 VM_PAGE_WAIT();
1030 vm_object_lock(object);
1031 }
1032 mem->busy = FALSE;
1033
1034 alloc_size -= PAGE_SIZE;
1035 offset += PAGE_SIZE;
1036 }
1037 vm_object_unlock(object);
1038 return KERN_SUCCESS;
1039 }
1040
1041 /*
1042 * Remap wired pages in an object into a new region.
1043 * The object is assumed to be mapped into the kernel map or
1044 * a submap.
1045 */
1046 void
1047 kmem_remap_pages(
1048 register vm_object_t object,
1049 register vm_object_offset_t offset,
1050 register vm_offset_t start,
1051 register vm_offset_t end,
1052 vm_prot_t protection)
1053 {
1054
1055 vm_map_offset_t map_start;
1056 vm_map_offset_t map_end;
1057
1058 /*
1059 * Mark the pmap region as not pageable.
1060 */
1061 map_start = vm_map_trunc_page(start,
1062 VM_MAP_PAGE_MASK(kernel_map));
1063 map_end = vm_map_round_page(end,
1064 VM_MAP_PAGE_MASK(kernel_map));
1065
1066 pmap_pageable(kernel_pmap, map_start, map_end, FALSE);
1067
1068 while (map_start < map_end) {
1069 register vm_page_t mem;
1070
1071 vm_object_lock(object);
1072
1073 /*
1074 * Find a page
1075 */
1076 if ((mem = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
1077 panic("kmem_remap_pages");
1078
1079 /*
1080 * Wire it down (again)
1081 */
1082 vm_page_lockspin_queues();
1083 vm_page_wire(mem);
1084 vm_page_unlock_queues();
1085 vm_object_unlock(object);
1086
1087 /*
1088 * ENCRYPTED SWAP:
1089 * The page is supposed to be wired now, so it
1090 * shouldn't be encrypted at this point. It can
1091 * safely be entered in the page table.
1092 */
1093 ASSERT_PAGE_DECRYPTED(mem);
1094
1095 /*
1096 * Enter it in the kernel pmap. The page isn't busy,
1097 * but this shouldn't be a problem because it is wired.
1098 */
1099
1100 mem->pmapped = TRUE;
1101 mem->wpmapped = TRUE;
1102
1103 PMAP_ENTER(kernel_pmap, map_start, mem, protection, VM_PROT_NONE, 0, TRUE);
1104
1105 map_start += PAGE_SIZE;
1106 offset += PAGE_SIZE;
1107 }
1108 }
1109
1110 /*
1111 * kmem_suballoc:
1112 *
1113 * Allocates a map to manage a subrange
1114 * of the kernel virtual address space.
1115 *
1116 * Arguments are as follows:
1117 *
1118 * parent Map to take range from
1119 * addr Address of start of range (IN/OUT)
1120 * size Size of range to find
1121 * pageable Can region be paged
1122 * anywhere Can region be located anywhere in map
1123 * new_map Pointer to new submap
1124 */
1125 kern_return_t
1126 kmem_suballoc(
1127 vm_map_t parent,
1128 vm_offset_t *addr,
1129 vm_size_t size,
1130 boolean_t pageable,
1131 int flags,
1132 vm_map_t *new_map)
1133 {
1134 vm_map_t map;
1135 vm_map_offset_t map_addr;
1136 vm_map_size_t map_size;
1137 kern_return_t kr;
1138
1139 map_size = vm_map_round_page(size,
1140 VM_MAP_PAGE_MASK(parent));
1141
1142 /*
1143 * Need reference on submap object because it is internal
1144 * to the vm_system. vm_object_enter will never be called
1145 * on it (usual source of reference for vm_map_enter).
1146 */
1147 vm_object_reference(vm_submap_object);
1148
1149 map_addr = ((flags & VM_FLAGS_ANYWHERE)
1150 ? vm_map_min(parent)
1151 : vm_map_trunc_page(*addr,
1152 VM_MAP_PAGE_MASK(parent)));
1153
1154 kr = vm_map_enter(parent, &map_addr, map_size,
1155 (vm_map_offset_t) 0, flags,
1156 vm_submap_object, (vm_object_offset_t) 0, FALSE,
1157 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1158 if (kr != KERN_SUCCESS) {
1159 vm_object_deallocate(vm_submap_object);
1160 return (kr);
1161 }
1162
1163 pmap_reference(vm_map_pmap(parent));
1164 map = vm_map_create(vm_map_pmap(parent), map_addr, map_addr + map_size, pageable);
1165 if (map == VM_MAP_NULL)
1166 panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */
1167 /* inherit the parent map's page size */
1168 vm_map_set_page_shift(map, VM_MAP_PAGE_SHIFT(parent));
1169
1170 kr = vm_map_submap(parent, map_addr, map_addr + map_size, map, map_addr, FALSE);
1171 if (kr != KERN_SUCCESS) {
1172 /*
1173 * See comment preceding vm_map_submap().
1174 */
1175 vm_map_remove(parent, map_addr, map_addr + map_size, VM_MAP_NO_FLAGS);
1176 vm_map_deallocate(map); /* also removes ref to pmap */
1177 vm_object_deallocate(vm_submap_object);
1178 return (kr);
1179 }
1180 *addr = CAST_DOWN(vm_offset_t, map_addr);
1181 *new_map = map;
1182 return (KERN_SUCCESS);
1183 }
1184
1185 /*
1186 * kmem_init:
1187 *
1188 * Initialize the kernel's virtual memory map, taking
1189 * into account all memory allocated up to this time.
1190 */
1191 void
1192 kmem_init(
1193 vm_offset_t start,
1194 vm_offset_t end)
1195 {
1196 vm_map_offset_t map_start;
1197 vm_map_offset_t map_end;
1198
1199 map_start = vm_map_trunc_page(start,
1200 VM_MAP_PAGE_MASK(kernel_map));
1201 map_end = vm_map_round_page(end,
1202 VM_MAP_PAGE_MASK(kernel_map));
1203
1204 kernel_map = vm_map_create(pmap_kernel(),VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1205 map_end, FALSE);
1206 /*
1207 * Reserve virtual memory allocated up to this time.
1208 */
1209 if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
1210 vm_map_offset_t map_addr;
1211 kern_return_t kr;
1212
1213 map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
1214 kr = vm_map_enter(kernel_map,
1215 &map_addr,
1216 (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1217 (vm_map_offset_t) 0,
1218 VM_FLAGS_FIXED | VM_FLAGS_NO_PMAP_CHECK,
1219 VM_OBJECT_NULL,
1220 (vm_object_offset_t) 0, FALSE,
1221 VM_PROT_NONE, VM_PROT_NONE,
1222 VM_INHERIT_DEFAULT);
1223
1224 if (kr != KERN_SUCCESS) {
1225 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n",
1226 (uint64_t) start, (uint64_t) end,
1227 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1228 (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1229 kr);
1230 }
1231 }
1232
1233 /*
1234 * Set the default global user wire limit which limits the amount of
1235 * memory that can be locked via mlock(). We set this to the total
1236 * amount of memory that are potentially usable by a user app (max_mem)
1237 * minus a certain amount. This can be overridden via a sysctl.
1238 */
1239 vm_global_no_user_wire_amount = MIN(max_mem*20/100,
1240 VM_NOT_USER_WIREABLE);
1241 vm_global_user_wire_limit = max_mem - vm_global_no_user_wire_amount;
1242
1243 /* the default per user limit is the same as the global limit */
1244 vm_user_wire_limit = vm_global_user_wire_limit;
1245 }
1246
1247
1248 /*
1249 * Routine: copyinmap
1250 * Purpose:
1251 * Like copyin, except that fromaddr is an address
1252 * in the specified VM map. This implementation
1253 * is incomplete; it handles the current user map
1254 * and the kernel map/submaps.
1255 */
1256 kern_return_t
1257 copyinmap(
1258 vm_map_t map,
1259 vm_map_offset_t fromaddr,
1260 void *todata,
1261 vm_size_t length)
1262 {
1263 kern_return_t kr = KERN_SUCCESS;
1264 vm_map_t oldmap;
1265
1266 if (vm_map_pmap(map) == pmap_kernel())
1267 {
1268 /* assume a correct copy */
1269 memcpy(todata, CAST_DOWN(void *, fromaddr), length);
1270 }
1271 else if (current_map() == map)
1272 {
1273 if (copyin(fromaddr, todata, length) != 0)
1274 kr = KERN_INVALID_ADDRESS;
1275 }
1276 else
1277 {
1278 vm_map_reference(map);
1279 oldmap = vm_map_switch(map);
1280 if (copyin(fromaddr, todata, length) != 0)
1281 kr = KERN_INVALID_ADDRESS;
1282 vm_map_switch(oldmap);
1283 vm_map_deallocate(map);
1284 }
1285 return kr;
1286 }
1287
1288 /*
1289 * Routine: copyoutmap
1290 * Purpose:
1291 * Like copyout, except that toaddr is an address
1292 * in the specified VM map. This implementation
1293 * is incomplete; it handles the current user map
1294 * and the kernel map/submaps.
1295 */
1296 kern_return_t
1297 copyoutmap(
1298 vm_map_t map,
1299 void *fromdata,
1300 vm_map_address_t toaddr,
1301 vm_size_t length)
1302 {
1303 if (vm_map_pmap(map) == pmap_kernel()) {
1304 /* assume a correct copy */
1305 memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
1306 return KERN_SUCCESS;
1307 }
1308
1309 if (current_map() != map)
1310 return KERN_NOT_SUPPORTED;
1311
1312 if (copyout(fromdata, toaddr, length) != 0)
1313 return KERN_INVALID_ADDRESS;
1314
1315 return KERN_SUCCESS;
1316 }
1317
1318
1319 kern_return_t
1320 vm_conflict_check(
1321 vm_map_t map,
1322 vm_map_offset_t off,
1323 vm_map_size_t len,
1324 memory_object_t pager,
1325 vm_object_offset_t file_off)
1326 {
1327 vm_map_entry_t entry;
1328 vm_object_t obj;
1329 vm_object_offset_t obj_off;
1330 vm_map_t base_map;
1331 vm_map_offset_t base_offset;
1332 vm_map_offset_t original_offset;
1333 kern_return_t kr;
1334 vm_map_size_t local_len;
1335
1336 base_map = map;
1337 base_offset = off;
1338 original_offset = off;
1339 kr = KERN_SUCCESS;
1340 vm_map_lock(map);
1341 while(vm_map_lookup_entry(map, off, &entry)) {
1342 local_len = len;
1343
1344 if (entry->object.vm_object == VM_OBJECT_NULL) {
1345 vm_map_unlock(map);
1346 return KERN_SUCCESS;
1347 }
1348 if (entry->is_sub_map) {
1349 vm_map_t old_map;
1350
1351 old_map = map;
1352 vm_map_lock(entry->object.sub_map);
1353 map = entry->object.sub_map;
1354 off = entry->offset + (off - entry->vme_start);
1355 vm_map_unlock(old_map);
1356 continue;
1357 }
1358 obj = entry->object.vm_object;
1359 obj_off = (off - entry->vme_start) + entry->offset;
1360 while(obj->shadow) {
1361 obj_off += obj->vo_shadow_offset;
1362 obj = obj->shadow;
1363 }
1364 if((obj->pager_created) && (obj->pager == pager)) {
1365 if(((obj->paging_offset) + obj_off) == file_off) {
1366 if(off != base_offset) {
1367 vm_map_unlock(map);
1368 return KERN_FAILURE;
1369 }
1370 kr = KERN_ALREADY_WAITING;
1371 } else {
1372 vm_object_offset_t obj_off_aligned;
1373 vm_object_offset_t file_off_aligned;
1374
1375 obj_off_aligned = obj_off & ~PAGE_MASK;
1376 file_off_aligned = file_off & ~PAGE_MASK;
1377
1378 if (file_off_aligned == (obj->paging_offset + obj_off_aligned)) {
1379 /*
1380 * the target map and the file offset start in the same page
1381 * but are not identical...
1382 */
1383 vm_map_unlock(map);
1384 return KERN_FAILURE;
1385 }
1386 if ((file_off < (obj->paging_offset + obj_off_aligned)) &&
1387 ((file_off + len) > (obj->paging_offset + obj_off_aligned))) {
1388 /*
1389 * some portion of the tail of the I/O will fall
1390 * within the encompass of the target map
1391 */
1392 vm_map_unlock(map);
1393 return KERN_FAILURE;
1394 }
1395 if ((file_off_aligned > (obj->paging_offset + obj_off)) &&
1396 (file_off_aligned < (obj->paging_offset + obj_off) + len)) {
1397 /*
1398 * the beginning page of the file offset falls within
1399 * the target map's encompass
1400 */
1401 vm_map_unlock(map);
1402 return KERN_FAILURE;
1403 }
1404 }
1405 } else if(kr != KERN_SUCCESS) {
1406 vm_map_unlock(map);
1407 return KERN_FAILURE;
1408 }
1409
1410 if(len <= ((entry->vme_end - entry->vme_start) -
1411 (off - entry->vme_start))) {
1412 vm_map_unlock(map);
1413 return kr;
1414 } else {
1415 len -= (entry->vme_end - entry->vme_start) -
1416 (off - entry->vme_start);
1417 }
1418 base_offset = base_offset + (local_len - len);
1419 file_off = file_off + (local_len - len);
1420 off = base_offset;
1421 if(map != base_map) {
1422 vm_map_unlock(map);
1423 vm_map_lock(base_map);
1424 map = base_map;
1425 }
1426 }
1427
1428 vm_map_unlock(map);
1429 return kr;
1430 }