]> git.saurik.com Git - apple/xnu.git/blame_incremental - osfmk/vm/vm_kern.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / vm / vm_kern.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_kern.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Kernel memory management.
64 */
65
66#include <mach/kern_return.h>
67#include <mach/vm_param.h>
68#include <kern/assert.h>
69#include <kern/thread.h>
70#include <vm/vm_kern.h>
71#include <vm/vm_map.h>
72#include <vm/vm_object.h>
73#include <vm/vm_page.h>
74#include <vm/vm_compressor.h>
75#include <vm/vm_pageout.h>
76#include <kern/misc_protos.h>
77#include <vm/cpm.h>
78#include <kern/ledger.h>
79#include <kern/bits.h>
80#include <kern/startup.h>
81
82#include <string.h>
83
84#include <libkern/OSDebug.h>
85#include <libkern/crypto/sha2.h>
86#include <libkern/section_keywords.h>
87#include <sys/kdebug.h>
88
89#include <san/kasan.h>
90
91/*
92 * Variables exported by this module.
93 */
94
95SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
96vm_map_t kernel_pageable_map;
97
98/*
99 * Forward declarations for internal functions.
100 */
101extern kern_return_t kmem_alloc_pages(
102 vm_object_t object,
103 vm_object_offset_t offset,
104 vm_object_size_t size);
105
106kern_return_t
107kmem_alloc_contig(
108 vm_map_t map,
109 vm_offset_t *addrp,
110 vm_size_t size,
111 vm_offset_t mask,
112 ppnum_t max_pnum,
113 ppnum_t pnum_mask,
114 kma_flags_t flags,
115 vm_tag_t tag)
116{
117 vm_object_t object;
118 vm_object_offset_t offset;
119 vm_map_offset_t map_addr;
120 vm_map_offset_t map_mask;
121 vm_map_size_t map_size, i;
122 vm_map_entry_t entry;
123 vm_page_t m, pages;
124 kern_return_t kr;
125
126 assert(VM_KERN_MEMORY_NONE != tag);
127
128 if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) {
129 return KERN_INVALID_ARGUMENT;
130 }
131
132 map_size = vm_map_round_page(size,
133 VM_MAP_PAGE_MASK(map));
134 map_mask = (vm_map_offset_t)mask;
135
136 /* Check for zero allocation size (either directly or via overflow) */
137 if (map_size == 0) {
138 *addrp = 0;
139 return KERN_INVALID_ARGUMENT;
140 }
141
142 /*
143 * Allocate a new object (if necessary) and the reference we
144 * will be donating to the map entry. We must do this before
145 * locking the map, or risk deadlock with the default pager.
146 */
147 if ((flags & KMA_KOBJECT) != 0) {
148 object = kernel_object;
149 vm_object_reference(object);
150 } else {
151 object = vm_object_allocate(map_size);
152 }
153
154 kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0,
155 VM_MAP_KERNEL_FLAGS_NONE, tag, &entry);
156 if (KERN_SUCCESS != kr) {
157 vm_object_deallocate(object);
158 return kr;
159 }
160
161 if (object == kernel_object) {
162 offset = map_addr;
163 } else {
164 offset = 0;
165 }
166 VME_OBJECT_SET(entry, object);
167 VME_OFFSET_SET(entry, offset);
168
169 /* Take an extra object ref in case the map entry gets deleted */
170 vm_object_reference(object);
171 vm_map_unlock(map);
172
173 kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags);
174
175 if (kr != KERN_SUCCESS) {
176 vm_map_remove(map,
177 vm_map_trunc_page(map_addr,
178 VM_MAP_PAGE_MASK(map)),
179 vm_map_round_page(map_addr + map_size,
180 VM_MAP_PAGE_MASK(map)),
181 VM_MAP_REMOVE_NO_FLAGS);
182 vm_object_deallocate(object);
183 *addrp = 0;
184 return kr;
185 }
186
187 vm_object_lock(object);
188 for (i = 0; i < map_size; i += PAGE_SIZE) {
189 m = pages;
190 pages = NEXT_PAGE(m);
191 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
192 m->vmp_busy = FALSE;
193 vm_page_insert(m, object, offset + i);
194 }
195 vm_object_unlock(object);
196
197 kr = vm_map_wire_kernel(map,
198 vm_map_trunc_page(map_addr,
199 VM_MAP_PAGE_MASK(map)),
200 vm_map_round_page(map_addr + map_size,
201 VM_MAP_PAGE_MASK(map)),
202 VM_PROT_DEFAULT, tag,
203 FALSE);
204
205 if (kr != KERN_SUCCESS) {
206 if (object == kernel_object) {
207 vm_object_lock(object);
208 vm_object_page_remove(object, offset, offset + map_size);
209 vm_object_unlock(object);
210 }
211 vm_map_remove(map,
212 vm_map_trunc_page(map_addr,
213 VM_MAP_PAGE_MASK(map)),
214 vm_map_round_page(map_addr + map_size,
215 VM_MAP_PAGE_MASK(map)),
216 VM_MAP_REMOVE_NO_FLAGS);
217 vm_object_deallocate(object);
218 return kr;
219 }
220 vm_object_deallocate(object);
221
222 if (object == kernel_object) {
223 vm_map_simplify(map, map_addr);
224 vm_tag_update_size(tag, map_size);
225 }
226 *addrp = (vm_offset_t) map_addr;
227 assert((vm_map_offset_t) *addrp == map_addr);
228
229 return KERN_SUCCESS;
230}
231
232/*
233 * Master entry point for allocating kernel memory.
234 * NOTE: this routine is _never_ interrupt safe.
235 *
236 * map : map to allocate into
237 * addrp : pointer to start address of new memory
238 * size : size of memory requested
239 * flags : options
240 * KMA_HERE *addrp is base address, else "anywhere"
241 * KMA_NOPAGEWAIT don't wait for pages if unavailable
242 * KMA_KOBJECT use kernel_object
243 * KMA_LOMEM support for 32 bit devices in a 64 bit world
244 * if set and a lomemory pool is available
245 * grab pages from it... this also implies
246 * KMA_NOPAGEWAIT
247 */
248
249kern_return_t
250kernel_memory_allocate(
251 vm_map_t map,
252 vm_offset_t *addrp,
253 vm_size_t size,
254 vm_offset_t mask,
255 kma_flags_t flags,
256 vm_tag_t tag)
257{
258 vm_object_t object;
259 vm_object_offset_t offset;
260 vm_object_offset_t pg_offset;
261 vm_map_entry_t entry = NULL;
262 vm_map_offset_t map_addr, fill_start;
263 vm_map_offset_t map_mask;
264 vm_map_size_t map_size, fill_size;
265 kern_return_t kr, pe_result;
266 vm_page_t mem;
267 vm_page_t guard_page_list = NULL;
268 vm_page_t wired_page_list = NULL;
269 int guard_page_count = 0;
270 int wired_page_count = 0;
271 int vm_alloc_flags;
272 vm_map_kernel_flags_t vmk_flags;
273 vm_prot_t kma_prot;
274
275 if (startup_phase < STARTUP_SUB_KMEM) {
276 panic("kernel_memory_allocate: VM is not ready");
277 }
278
279 map_size = vm_map_round_page(size,
280 VM_MAP_PAGE_MASK(map));
281 map_mask = (vm_map_offset_t) mask;
282
283 vm_alloc_flags = 0; //VM_MAKE_TAG(tag);
284 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
285
286 /* Check for zero allocation size (either directly or via overflow) */
287 if (map_size == 0) {
288 *addrp = 0;
289 return KERN_INVALID_ARGUMENT;
290 }
291
292 /*
293 * limit the size of a single extent of wired memory
294 * to try and limit the damage to the system if
295 * too many pages get wired down
296 * limit raised to 2GB with 128GB max physical limit,
297 * but scaled by installed memory above this
298 */
299 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
300 map_size > MAX(1ULL << 31, sane_size / 64)) {
301 return KERN_RESOURCE_SHORTAGE;
302 }
303
304 /*
305 * Guard pages:
306 *
307 * Guard pages are implemented as ficticious pages. By placing guard pages
308 * on either end of a stack, they can help detect cases where a thread walks
309 * off either end of its stack. They are allocated and set up here and attempts
310 * to access those pages are trapped in vm_fault_page().
311 *
312 * The map_size we were passed may include extra space for
313 * guard pages. If those were requested, then back it out of fill_size
314 * since vm_map_find_space() takes just the actual size not including
315 * guard pages. Similarly, fill_start indicates where the actual pages
316 * will begin in the range.
317 */
318
319 fill_start = 0;
320 fill_size = map_size;
321
322 if (flags & KMA_GUARD_FIRST) {
323 vmk_flags.vmkf_guard_before = TRUE;
324 fill_start += PAGE_SIZE_64;
325 fill_size -= PAGE_SIZE_64;
326 if (map_size < fill_start + fill_size) {
327 /* no space for a guard page */
328 *addrp = 0;
329 return KERN_INVALID_ARGUMENT;
330 }
331 guard_page_count++;
332 }
333 if (flags & KMA_GUARD_LAST) {
334 vmk_flags.vmkf_guard_after = TRUE;
335 fill_size -= PAGE_SIZE_64;
336 if (map_size <= fill_start + fill_size) {
337 /* no space for a guard page */
338 *addrp = 0;
339 return KERN_INVALID_ARGUMENT;
340 }
341 guard_page_count++;
342 }
343 wired_page_count = (int) (fill_size / PAGE_SIZE_64);
344 assert(wired_page_count * PAGE_SIZE_64 == fill_size);
345
346#if DEBUG || DEVELOPMENT
347 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
348 size, 0, 0, 0);
349#endif
350
351 for (int i = 0; i < guard_page_count; i++) {
352 mem = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
353 if (mem == VM_PAGE_NULL) {
354 kr = KERN_RESOURCE_SHORTAGE;
355 goto out;
356 }
357 mem->vmp_snext = guard_page_list;
358 guard_page_list = mem;
359 }
360
361 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
362 kr = vm_page_alloc_list(wired_page_count, flags,
363 &wired_page_list);
364 if (kr != KERN_SUCCESS) {
365 goto out;
366 }
367 }
368
369 /*
370 * Allocate a new object (if necessary). We must do this before
371 * locking the map, or risk deadlock with the default pager.
372 */
373 if ((flags & KMA_KOBJECT) != 0) {
374 object = kernel_object;
375 vm_object_reference(object);
376 } else if ((flags & KMA_COMPRESSOR) != 0) {
377 object = compressor_object;
378 vm_object_reference(object);
379 } else {
380 object = vm_object_allocate(map_size);
381 }
382
383 if (flags & KMA_ATOMIC) {
384 vmk_flags.vmkf_atomic_entry = TRUE;
385 }
386
387 if (flags & KMA_KHEAP) {
388 vm_alloc_flags |= VM_MAP_FIND_LAST_FREE;
389 }
390
391 kr = vm_map_find_space(map, &map_addr,
392 fill_size, map_mask,
393 vm_alloc_flags, vmk_flags, tag, &entry);
394 if (KERN_SUCCESS != kr) {
395 vm_object_deallocate(object);
396 goto out;
397 }
398
399 if (object == kernel_object || object == compressor_object) {
400 offset = map_addr;
401 } else {
402 offset = 0;
403 }
404 VME_OBJECT_SET(entry, object);
405 VME_OFFSET_SET(entry, offset);
406
407 if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
408 entry->wired_count++;
409 }
410
411 if (flags & KMA_PERMANENT) {
412 entry->permanent = TRUE;
413 }
414
415 if (object != kernel_object && object != compressor_object) {
416 vm_object_reference(object);
417 }
418
419 vm_object_lock(object);
420 vm_map_unlock(map);
421
422 pg_offset = 0;
423
424 if (fill_start) {
425 if (guard_page_list == NULL) {
426 panic("kernel_memory_allocate: guard_page_list == NULL");
427 }
428
429 mem = guard_page_list;
430 guard_page_list = mem->vmp_snext;
431 mem->vmp_snext = NULL;
432
433 vm_page_insert(mem, object, offset + pg_offset);
434
435 mem->vmp_busy = FALSE;
436 pg_offset += PAGE_SIZE_64;
437 }
438
439 kma_prot = VM_PROT_READ | VM_PROT_WRITE;
440
441#if KASAN
442 if (!(flags & KMA_VAONLY)) {
443 /* for VAONLY mappings we notify in populate only */
444 kasan_notify_address(map_addr, size);
445 }
446#endif
447
448 if (flags & (KMA_VAONLY | KMA_PAGEABLE)) {
449 pg_offset = fill_start + fill_size;
450 } else {
451 for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) {
452 if (wired_page_list == NULL) {
453 panic("kernel_memory_allocate: wired_page_list == NULL");
454 }
455
456 mem = wired_page_list;
457 wired_page_list = mem->vmp_snext;
458 mem->vmp_snext = NULL;
459
460 assert(mem->vmp_wire_count == 0);
461 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
462
463 mem->vmp_q_state = VM_PAGE_IS_WIRED;
464 mem->vmp_wire_count++;
465 if (__improbable(mem->vmp_wire_count == 0)) {
466 panic("kernel_memory_allocate(%p): wire_count overflow",
467 mem);
468 }
469
470 vm_page_insert_wired(mem, object, offset + pg_offset, tag);
471
472 mem->vmp_busy = FALSE;
473 mem->vmp_pmapped = TRUE;
474 mem->vmp_wpmapped = TRUE;
475
476 PMAP_ENTER_OPTIONS(kernel_pmap, map_addr + pg_offset,
477 0, /* fault_phys_offset */
478 mem,
479 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
480 PMAP_OPTIONS_NOWAIT, pe_result);
481
482 if (pe_result == KERN_RESOURCE_SHORTAGE) {
483 vm_object_unlock(object);
484
485 PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem,
486 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
487 pe_result);
488
489 vm_object_lock(object);
490 }
491
492 assert(pe_result == KERN_SUCCESS);
493
494 if (flags & KMA_NOENCRYPT) {
495 bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE);
496
497 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
498 }
499 }
500 if (kernel_object == object) {
501 vm_tag_update_size(tag, fill_size);
502 }
503 }
504 if ((fill_start + fill_size) < map_size) {
505 if (guard_page_list == NULL) {
506 panic("kernel_memory_allocate: guard_page_list == NULL");
507 }
508
509 mem = guard_page_list;
510 guard_page_list = mem->vmp_snext;
511 mem->vmp_snext = NULL;
512
513 vm_page_insert(mem, object, offset + pg_offset);
514
515 mem->vmp_busy = FALSE;
516 }
517 if (guard_page_list || wired_page_list) {
518 panic("kernel_memory_allocate: non empty list\n");
519 }
520
521 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
522 vm_page_lockspin_queues();
523 vm_page_wire_count += wired_page_count;
524 vm_page_unlock_queues();
525 }
526
527 vm_object_unlock(object);
528
529 /*
530 * now that the pages are wired, we no longer have to fear coalesce
531 */
532 if (object == kernel_object || object == compressor_object) {
533 vm_map_simplify(map, map_addr);
534 } else {
535 vm_object_deallocate(object);
536 }
537
538#if DEBUG || DEVELOPMENT
539 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
540 wired_page_count, 0, 0, 0);
541#endif
542 /*
543 * Return the memory, not zeroed.
544 */
545 *addrp = CAST_DOWN(vm_offset_t, map_addr);
546 return KERN_SUCCESS;
547
548out:
549 if (guard_page_list) {
550 vm_page_free_list(guard_page_list, FALSE);
551 }
552
553 if (wired_page_list) {
554 vm_page_free_list(wired_page_list, FALSE);
555 }
556
557#if DEBUG || DEVELOPMENT
558 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
559 wired_page_count, 0, 0, 0);
560#endif
561 return kr;
562}
563
564void
565kernel_memory_populate_with_pages(
566 vm_map_t map,
567 vm_offset_t addr,
568 vm_size_t size,
569 vm_page_t page_list,
570 kma_flags_t flags,
571 vm_tag_t tag)
572{
573 vm_object_t object;
574 kern_return_t pe_result;
575 vm_page_t mem;
576 int page_count = atop_64(size);
577
578 if (flags & KMA_COMPRESSOR) {
579 panic("%s(%p,0x%llx,0x%llx,0x%x): KMA_COMPRESSOR", __func__,
580 map, (uint64_t) addr, (uint64_t) size, flags);
581 }
582
583 if (flags & KMA_KOBJECT) {
584 object = kernel_object;
585
586 vm_object_lock(object);
587 } else {
588 /*
589 * If it's not the kernel object, we need to:
590 * lock map;
591 * lookup entry;
592 * lock object;
593 * take reference on object;
594 * unlock map;
595 */
596 panic("%s(%p,0x%llx,0x%llx,0x%x): !KMA_KOBJECT", __func__,
597 map, (uint64_t) addr, (uint64_t) size, flags);
598 }
599
600 for (vm_object_offset_t pg_offset = 0;
601 pg_offset < size;
602 pg_offset += PAGE_SIZE_64) {
603 if (page_list == NULL) {
604 panic("%s: page_list too short", __func__);
605 }
606
607 mem = page_list;
608 page_list = mem->vmp_snext;
609 mem->vmp_snext = NULL;
610
611 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
612 mem->vmp_q_state = VM_PAGE_IS_WIRED;
613 mem->vmp_wire_count++;
614 if (mem->vmp_wire_count == 0) {
615 panic("%s(%p): wire_count overflow", __func__, mem);
616 }
617
618 vm_page_insert_wired(mem, object, addr + pg_offset, tag);
619
620 mem->vmp_busy = FALSE;
621 mem->vmp_pmapped = TRUE;
622 mem->vmp_wpmapped = TRUE;
623
624 PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset,
625 0, /* fault_phys_offset */
626 mem,
627 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
628 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
629 PMAP_OPTIONS_NOWAIT, pe_result);
630
631 if (pe_result == KERN_RESOURCE_SHORTAGE) {
632 vm_object_unlock(object);
633
634 PMAP_ENTER(kernel_pmap, addr + pg_offset, mem,
635 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
636 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
637 pe_result);
638
639 vm_object_lock(object);
640 }
641
642 assert(pe_result == KERN_SUCCESS);
643
644 if (flags & KMA_NOENCRYPT) {
645 __nosan_bzero(CAST_DOWN(void *, (addr + pg_offset)), PAGE_SIZE);
646 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
647 }
648 }
649 if (page_list) {
650 panic("%s: page_list too long", __func__);
651 }
652 vm_object_unlock(object);
653
654 vm_page_lockspin_queues();
655 vm_page_wire_count += page_count;
656 vm_page_unlock_queues();
657 vm_tag_update_size(tag, size);
658
659#if KASAN
660 if (map == compressor_map) {
661 kasan_notify_address_nopoison(addr, size);
662 } else {
663 kasan_notify_address(addr, size);
664 }
665#endif
666}
667
668kern_return_t
669kernel_memory_populate(
670 vm_map_t map,
671 vm_offset_t addr,
672 vm_size_t size,
673 kma_flags_t flags,
674 vm_tag_t tag)
675{
676 vm_object_t object;
677 vm_object_offset_t offset, pg_offset;
678 kern_return_t kr = KERN_SUCCESS;
679 vm_page_t mem;
680 vm_page_t page_list = NULL;
681 int page_count = atop_64(size);
682
683#if DEBUG || DEVELOPMENT
684 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
685 size, 0, 0, 0);
686#endif
687
688 assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
689
690 if (flags & KMA_COMPRESSOR) {
691 pg_offset = page_count * PAGE_SIZE_64;
692
693 do {
694 for (;;) {
695 mem = vm_page_grab();
696
697 if (mem != VM_PAGE_NULL) {
698 break;
699 }
700
701 VM_PAGE_WAIT();
702 }
703 if (KMA_ZERO & flags) {
704 vm_page_zero_fill(mem);
705 }
706 mem->vmp_snext = page_list;
707 page_list = mem;
708
709 pg_offset -= PAGE_SIZE_64;
710
711 kr = pmap_enter_options(kernel_pmap,
712 addr + pg_offset, VM_PAGE_GET_PHYS_PAGE(mem),
713 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE,
714 PMAP_OPTIONS_INTERNAL, NULL);
715 assert(kr == KERN_SUCCESS);
716 } while (pg_offset);
717
718 offset = addr;
719 object = compressor_object;
720
721 vm_object_lock(object);
722
723 for (pg_offset = 0;
724 pg_offset < size;
725 pg_offset += PAGE_SIZE_64) {
726 mem = page_list;
727 page_list = mem->vmp_snext;
728 mem->vmp_snext = NULL;
729
730 vm_page_insert(mem, object, offset + pg_offset);
731 assert(mem->vmp_busy);
732
733 mem->vmp_busy = FALSE;
734 mem->vmp_pmapped = TRUE;
735 mem->vmp_wpmapped = TRUE;
736 mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
737 }
738 vm_object_unlock(object);
739
740#if KASAN
741 if (map == compressor_map) {
742 kasan_notify_address_nopoison(addr, size);
743 } else {
744 kasan_notify_address(addr, size);
745 }
746#endif
747
748#if DEBUG || DEVELOPMENT
749 task_t task = current_task();
750 if (task != NULL) {
751 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_count);
752 }
753#endif
754 } else {
755 kr = vm_page_alloc_list(page_count, flags, &page_list);
756 if (kr == KERN_SUCCESS) {
757 kernel_memory_populate_with_pages(map, addr, size,
758 page_list, flags, tag);
759 }
760 }
761
762#if DEBUG || DEVELOPMENT
763 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
764 page_count, 0, 0, 0);
765#endif
766 return kr;
767}
768
769
770void
771kernel_memory_depopulate(
772 vm_map_t map,
773 vm_offset_t addr,
774 vm_size_t size,
775 kma_flags_t flags,
776 vm_tag_t tag)
777{
778 vm_object_t object;
779 vm_object_offset_t offset, pg_offset;
780 vm_page_t mem;
781 vm_page_t local_freeq = NULL;
782 unsigned int pages_unwired;
783
784 assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
785
786 if (flags & KMA_COMPRESSOR) {
787 offset = addr;
788 object = compressor_object;
789
790 vm_object_lock(object);
791 } else if (flags & KMA_KOBJECT) {
792 offset = addr;
793 object = kernel_object;
794 vm_object_lock(object);
795 } else {
796 offset = 0;
797 object = NULL;
798 /*
799 * If it's not the kernel object, we need to:
800 * lock map;
801 * lookup entry;
802 * lock object;
803 * unlock map;
804 */
805 panic("kernel_memory_depopulate(%p,0x%llx,0x%llx,0x%x): "
806 "!KMA_KOBJECT",
807 map, (uint64_t) addr, (uint64_t) size, flags);
808 }
809 pmap_protect(kernel_map->pmap, offset, offset + size, VM_PROT_NONE);
810
811 for (pg_offset = 0, pages_unwired = 0;
812 pg_offset < size;
813 pg_offset += PAGE_SIZE_64) {
814 mem = vm_page_lookup(object, offset + pg_offset);
815
816 assert(mem);
817
818 if (mem->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR) {
819 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
820 pages_unwired++;
821 }
822
823 mem->vmp_busy = TRUE;
824
825 assert(mem->vmp_tabled);
826 vm_page_remove(mem, TRUE);
827 assert(mem->vmp_busy);
828
829 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
830 assert((mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
831 (mem->vmp_q_state == VM_PAGE_IS_WIRED));
832
833 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
834 mem->vmp_snext = local_freeq;
835 local_freeq = mem;
836 }
837 vm_object_unlock(object);
838
839
840 if (local_freeq) {
841 vm_page_free_list(local_freeq, TRUE);
842 if (pages_unwired != 0) {
843 vm_page_lockspin_queues();
844 vm_page_wire_count -= pages_unwired;
845 vm_page_unlock_queues();
846 vm_tag_update_size(tag, -ptoa_64(pages_unwired));
847 }
848 }
849}
850
851/*
852 * kmem_alloc:
853 *
854 * Allocate wired-down memory in the kernel's address map
855 * or a submap. The memory is not zero-filled.
856 */
857
858kern_return_t
859kmem_alloc_external(
860 vm_map_t map,
861 vm_offset_t *addrp,
862 vm_size_t size)
863{
864 return kmem_alloc(map, addrp, size, vm_tag_bt());
865}
866
867
868kern_return_t
869kmem_alloc(
870 vm_map_t map,
871 vm_offset_t *addrp,
872 vm_size_t size,
873 vm_tag_t tag)
874{
875 return kmem_alloc_flags(map, addrp, size, tag, 0);
876}
877
878kern_return_t
879kmem_alloc_flags(
880 vm_map_t map,
881 vm_offset_t *addrp,
882 vm_size_t size,
883 vm_tag_t tag,
884 kma_flags_t flags)
885{
886 kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, flags, tag);
887 if (kr == KERN_SUCCESS) {
888 TRACE_MACHLEAKS(KMEM_ALLOC_CODE, KMEM_ALLOC_CODE_2, size, *addrp);
889 }
890 return kr;
891}
892
893/*
894 * kmem_realloc:
895 *
896 * Reallocate wired-down memory in the kernel's address map
897 * or a submap. Newly allocated pages are not zeroed.
898 * This can only be used on regions allocated with kmem_alloc.
899 *
900 * If successful, the pages in the old region are mapped twice.
901 * The old region is unchanged. Use kmem_free to get rid of it.
902 */
903kern_return_t
904kmem_realloc(
905 vm_map_t map,
906 vm_offset_t oldaddr,
907 vm_size_t oldsize,
908 vm_offset_t *newaddrp,
909 vm_size_t newsize,
910 vm_tag_t tag)
911{
912 vm_object_t object;
913 vm_object_offset_t offset;
914 vm_map_offset_t oldmapmin;
915 vm_map_offset_t oldmapmax;
916 vm_map_offset_t newmapaddr;
917 vm_map_size_t oldmapsize;
918 vm_map_size_t newmapsize;
919 vm_map_entry_t oldentry;
920 vm_map_entry_t newentry;
921 vm_page_t mem;
922 kern_return_t kr;
923
924 oldmapmin = vm_map_trunc_page(oldaddr,
925 VM_MAP_PAGE_MASK(map));
926 oldmapmax = vm_map_round_page(oldaddr + oldsize,
927 VM_MAP_PAGE_MASK(map));
928 oldmapsize = oldmapmax - oldmapmin;
929 newmapsize = vm_map_round_page(newsize,
930 VM_MAP_PAGE_MASK(map));
931 if (newmapsize < newsize) {
932 /* overflow */
933 *newaddrp = 0;
934 return KERN_INVALID_ARGUMENT;
935 }
936
937 /*
938 * Find the VM object backing the old region.
939 */
940
941 vm_map_lock(map);
942
943 if (!vm_map_lookup_entry(map, oldmapmin, &oldentry)) {
944 panic("kmem_realloc");
945 }
946 object = VME_OBJECT(oldentry);
947
948 /*
949 * Increase the size of the object and
950 * fill in the new region.
951 */
952
953 vm_object_reference(object);
954 /* by grabbing the object lock before unlocking the map */
955 /* we guarantee that we will panic if more than one */
956 /* attempt is made to realloc a kmem_alloc'd area */
957 vm_object_lock(object);
958 vm_map_unlock(map);
959 if (object->vo_size != oldmapsize) {
960 panic("kmem_realloc");
961 }
962 object->vo_size = newmapsize;
963 vm_object_unlock(object);
964
965 /* allocate the new pages while expanded portion of the */
966 /* object is still not mapped */
967 kmem_alloc_pages(object, vm_object_round_page(oldmapsize),
968 vm_object_round_page(newmapsize - oldmapsize));
969
970 /*
971 * Find space for the new region.
972 */
973
974 kr = vm_map_find_space(map, &newmapaddr, newmapsize,
975 (vm_map_offset_t) 0, 0,
976 VM_MAP_KERNEL_FLAGS_NONE,
977 tag,
978 &newentry);
979 if (kr != KERN_SUCCESS) {
980 vm_object_lock(object);
981 for (offset = oldmapsize;
982 offset < newmapsize; offset += PAGE_SIZE) {
983 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
984 VM_PAGE_FREE(mem);
985 }
986 }
987 object->vo_size = oldmapsize;
988 vm_object_unlock(object);
989 vm_object_deallocate(object);
990 return kr;
991 }
992 VME_OBJECT_SET(newentry, object);
993 VME_OFFSET_SET(newentry, 0);
994 assert(newentry->wired_count == 0);
995
996
997 /* add an extra reference in case we have someone doing an */
998 /* unexpected deallocate */
999 vm_object_reference(object);
1000 vm_map_unlock(map);
1001
1002 kr = vm_map_wire_kernel(map, newmapaddr, newmapaddr + newmapsize,
1003 VM_PROT_DEFAULT, tag, FALSE);
1004 if (KERN_SUCCESS != kr) {
1005 vm_map_remove(map, newmapaddr, newmapaddr + newmapsize, VM_MAP_REMOVE_NO_FLAGS);
1006 vm_object_lock(object);
1007 for (offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) {
1008 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
1009 VM_PAGE_FREE(mem);
1010 }
1011 }
1012 object->vo_size = oldmapsize;
1013 vm_object_unlock(object);
1014 vm_object_deallocate(object);
1015 return kr;
1016 }
1017 vm_object_deallocate(object);
1018
1019 if (kernel_object == object) {
1020 vm_tag_update_size(tag, newmapsize);
1021 }
1022
1023 *newaddrp = CAST_DOWN(vm_offset_t, newmapaddr);
1024 return KERN_SUCCESS;
1025}
1026
1027/*
1028 * kmem_alloc_kobject:
1029 *
1030 * Allocate wired-down memory in the kernel's address map
1031 * or a submap. The memory is not zero-filled.
1032 *
1033 * The memory is allocated in the kernel_object.
1034 * It may not be copied with vm_map_copy, and
1035 * it may not be reallocated with kmem_realloc.
1036 */
1037
1038kern_return_t
1039kmem_alloc_kobject_external(
1040 vm_map_t map,
1041 vm_offset_t *addrp,
1042 vm_size_t size)
1043{
1044 return kmem_alloc_kobject(map, addrp, size, vm_tag_bt());
1045}
1046
1047kern_return_t
1048kmem_alloc_kobject(
1049 vm_map_t map,
1050 vm_offset_t *addrp,
1051 vm_size_t size,
1052 vm_tag_t tag)
1053{
1054 return kernel_memory_allocate(map, addrp, size, 0, KMA_KOBJECT, tag);
1055}
1056
1057/*
1058 * kmem_alloc_aligned:
1059 *
1060 * Like kmem_alloc_kobject, except that the memory is aligned.
1061 * The size should be a power-of-2.
1062 */
1063
1064kern_return_t
1065kmem_alloc_aligned(
1066 vm_map_t map,
1067 vm_offset_t *addrp,
1068 vm_size_t size,
1069 vm_tag_t tag)
1070{
1071 if ((size & (size - 1)) != 0) {
1072 panic("kmem_alloc_aligned: size not aligned");
1073 }
1074 return kernel_memory_allocate(map, addrp, size, size - 1, KMA_KOBJECT, tag);
1075}
1076
1077/*
1078 * kmem_alloc_pageable:
1079 *
1080 * Allocate pageable memory in the kernel's address map.
1081 */
1082
1083kern_return_t
1084kmem_alloc_pageable_external(
1085 vm_map_t map,
1086 vm_offset_t *addrp,
1087 vm_size_t size)
1088{
1089 return kmem_alloc_pageable(map, addrp, size, vm_tag_bt());
1090}
1091
1092kern_return_t
1093kmem_alloc_pageable(
1094 vm_map_t map,
1095 vm_offset_t *addrp,
1096 vm_size_t size,
1097 vm_tag_t tag)
1098{
1099 vm_map_offset_t map_addr;
1100 vm_map_size_t map_size;
1101 kern_return_t kr;
1102
1103#ifndef normal
1104 map_addr = (vm_map_min(map)) + PAGE_SIZE;
1105#else
1106 map_addr = vm_map_min(map);
1107#endif
1108 map_size = vm_map_round_page(size,
1109 VM_MAP_PAGE_MASK(map));
1110 if (map_size < size) {
1111 /* overflow */
1112 *addrp = 0;
1113 return KERN_INVALID_ARGUMENT;
1114 }
1115
1116 kr = vm_map_enter(map, &map_addr, map_size,
1117 (vm_map_offset_t) 0,
1118 VM_FLAGS_ANYWHERE,
1119 VM_MAP_KERNEL_FLAGS_NONE,
1120 tag,
1121 VM_OBJECT_NULL, (vm_object_offset_t) 0, FALSE,
1122 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1123
1124 if (kr != KERN_SUCCESS) {
1125 return kr;
1126 }
1127
1128#if KASAN
1129 kasan_notify_address(map_addr, map_size);
1130#endif
1131 *addrp = CAST_DOWN(vm_offset_t, map_addr);
1132 return KERN_SUCCESS;
1133}
1134
1135/*
1136 * kmem_free:
1137 *
1138 * Release a region of kernel virtual memory allocated
1139 * with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable,
1140 * and return the physical pages associated with that region.
1141 */
1142
1143void
1144kmem_free(
1145 vm_map_t map,
1146 vm_offset_t addr,
1147 vm_size_t size)
1148{
1149 kern_return_t kr;
1150
1151 assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS);
1152
1153 TRACE_MACHLEAKS(KMEM_FREE_CODE, KMEM_FREE_CODE_2, size, addr);
1154
1155 if (size == 0) {
1156#if MACH_ASSERT
1157 printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n", map, (uint64_t)addr);
1158#endif
1159 return;
1160 }
1161
1162 kr = vm_map_remove(map,
1163 vm_map_trunc_page(addr,
1164 VM_MAP_PAGE_MASK(map)),
1165 vm_map_round_page(addr + size,
1166 VM_MAP_PAGE_MASK(map)),
1167 VM_MAP_REMOVE_KUNWIRE);
1168 if (kr != KERN_SUCCESS) {
1169 panic("kmem_free");
1170 }
1171}
1172
1173/*
1174 * Allocate new pages in an object.
1175 */
1176
1177kern_return_t
1178kmem_alloc_pages(
1179 vm_object_t object,
1180 vm_object_offset_t offset,
1181 vm_object_size_t size)
1182{
1183 vm_object_size_t alloc_size;
1184
1185 alloc_size = vm_object_round_page(size);
1186 vm_object_lock(object);
1187 while (alloc_size) {
1188 vm_page_t mem;
1189
1190
1191 /*
1192 * Allocate a page
1193 */
1194 while (VM_PAGE_NULL ==
1195 (mem = vm_page_alloc(object, offset))) {
1196 vm_object_unlock(object);
1197 VM_PAGE_WAIT();
1198 vm_object_lock(object);
1199 }
1200 mem->vmp_busy = FALSE;
1201
1202 alloc_size -= PAGE_SIZE;
1203 offset += PAGE_SIZE;
1204 }
1205 vm_object_unlock(object);
1206 return KERN_SUCCESS;
1207}
1208
1209/*
1210 * kmem_suballoc:
1211 *
1212 * Allocates a map to manage a subrange
1213 * of the kernel virtual address space.
1214 *
1215 * Arguments are as follows:
1216 *
1217 * parent Map to take range from
1218 * addr Address of start of range (IN/OUT)
1219 * size Size of range to find
1220 * pageable Can region be paged
1221 * anywhere Can region be located anywhere in map
1222 * new_map Pointer to new submap
1223 */
1224kern_return_t
1225kmem_suballoc(
1226 vm_map_t parent,
1227 vm_offset_t *addr,
1228 vm_size_t size,
1229 boolean_t pageable,
1230 int flags,
1231 vm_map_kernel_flags_t vmk_flags,
1232 vm_tag_t tag,
1233 vm_map_t *new_map)
1234{
1235 vm_map_t map;
1236 vm_map_offset_t map_addr;
1237 vm_map_size_t map_size;
1238 kern_return_t kr;
1239
1240 map_size = vm_map_round_page(size,
1241 VM_MAP_PAGE_MASK(parent));
1242 if (map_size < size) {
1243 /* overflow */
1244 *addr = 0;
1245 return KERN_INVALID_ARGUMENT;
1246 }
1247
1248 /*
1249 * Need reference on submap object because it is internal
1250 * to the vm_system. vm_object_enter will never be called
1251 * on it (usual source of reference for vm_map_enter).
1252 */
1253 vm_object_reference(vm_submap_object);
1254
1255 map_addr = ((flags & VM_FLAGS_ANYWHERE)
1256 ? vm_map_min(parent)
1257 : vm_map_trunc_page(*addr,
1258 VM_MAP_PAGE_MASK(parent)));
1259
1260 kr = vm_map_enter(parent, &map_addr, map_size,
1261 (vm_map_offset_t) 0, flags, vmk_flags, tag,
1262 vm_submap_object, (vm_object_offset_t) 0, FALSE,
1263 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1264 if (kr != KERN_SUCCESS) {
1265 vm_object_deallocate(vm_submap_object);
1266 return kr;
1267 }
1268
1269 pmap_reference(vm_map_pmap(parent));
1270 map = vm_map_create(vm_map_pmap(parent), map_addr, map_addr + map_size, pageable);
1271 if (map == VM_MAP_NULL) {
1272 panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */
1273 }
1274 /* inherit the parent map's page size */
1275 vm_map_set_page_shift(map, VM_MAP_PAGE_SHIFT(parent));
1276
1277 kr = vm_map_submap(parent, map_addr, map_addr + map_size, map, map_addr, FALSE);
1278 if (kr != KERN_SUCCESS) {
1279 /*
1280 * See comment preceding vm_map_submap().
1281 */
1282 vm_map_remove(parent, map_addr, map_addr + map_size,
1283 VM_MAP_REMOVE_NO_FLAGS);
1284 vm_map_deallocate(map); /* also removes ref to pmap */
1285 vm_object_deallocate(vm_submap_object);
1286 return kr;
1287 }
1288 *addr = CAST_DOWN(vm_offset_t, map_addr);
1289 *new_map = map;
1290 return KERN_SUCCESS;
1291}
1292/*
1293 * The default percentage of memory that can be mlocked is scaled based on the total
1294 * amount of memory in the system. These percentages are caclulated
1295 * offline and stored in this table. We index this table by
1296 * log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range
1297 * [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t))
1298 *
1299 * Note that these values were picked for mac.
1300 * If we ever have very large memory config arm devices, we may want to revisit
1301 * since the kernel overhead is smaller there due to the larger page size.
1302 */
1303
1304/* Start scaling iff we're managing > 2^32 = 4GB of RAM. */
1305#define VM_USER_WIREABLE_MIN_CONFIG 32
1306static vm_map_size_t wire_limit_percents[] =
1307{ 70, 73, 76, 79, 82, 85, 88, 91, 94, 97};
1308
1309/*
1310 * Sets the default global user wire limit which limits the amount of
1311 * memory that can be locked via mlock() based on the above algorithm..
1312 * This can be overridden via a sysctl.
1313 */
1314static void
1315kmem_set_user_wire_limits(void)
1316{
1317 uint64_t available_mem_log;
1318 uint64_t max_wire_percent;
1319 size_t wire_limit_percents_length = sizeof(wire_limit_percents) /
1320 sizeof(vm_map_size_t);
1321 vm_map_size_t limit;
1322 uint64_t config_memsize = max_mem;
1323#if defined(XNU_TARGET_OS_OSX)
1324 config_memsize = max_mem_actual;
1325#endif /* defined(XNU_TARGET_OS_OSX) */
1326
1327 available_mem_log = bit_floor(config_memsize);
1328
1329 if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) {
1330 available_mem_log = 0;
1331 } else {
1332 available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG;
1333 }
1334 if (available_mem_log >= wire_limit_percents_length) {
1335 available_mem_log = wire_limit_percents_length - 1;
1336 }
1337 max_wire_percent = wire_limit_percents[available_mem_log];
1338
1339 limit = config_memsize * max_wire_percent / 100;
1340 /* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */
1341 if (config_memsize - limit > VM_NOT_USER_WIREABLE_MAX) {
1342 limit = config_memsize - VM_NOT_USER_WIREABLE_MAX;
1343 }
1344
1345 vm_global_user_wire_limit = limit;
1346 /* the default per task limit is the same as the global limit */
1347 vm_per_task_user_wire_limit = limit;
1348 vm_add_wire_count_over_global_limit = 0;
1349 vm_add_wire_count_over_user_limit = 0;
1350}
1351
1352
1353/*
1354 * kmem_init:
1355 *
1356 * Initialize the kernel's virtual memory map, taking
1357 * into account all memory allocated up to this time.
1358 */
1359__startup_func
1360void
1361kmem_init(
1362 vm_offset_t start,
1363 vm_offset_t end)
1364{
1365 vm_map_offset_t map_start;
1366 vm_map_offset_t map_end;
1367 vm_map_kernel_flags_t vmk_flags;
1368
1369 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1370 vmk_flags.vmkf_permanent = TRUE;
1371 vmk_flags.vmkf_no_pmap_check = TRUE;
1372
1373 map_start = vm_map_trunc_page(start,
1374 VM_MAP_PAGE_MASK(kernel_map));
1375 map_end = vm_map_round_page(end,
1376 VM_MAP_PAGE_MASK(kernel_map));
1377
1378#if defined(__arm__) || defined(__arm64__)
1379 kernel_map = vm_map_create(pmap_kernel(), VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1380 VM_MAX_KERNEL_ADDRESS, FALSE);
1381 /*
1382 * Reserve virtual memory allocated up to this time.
1383 */
1384 {
1385 unsigned int region_select = 0;
1386 vm_map_offset_t region_start;
1387 vm_map_size_t region_size;
1388 vm_map_offset_t map_addr;
1389 kern_return_t kr;
1390
1391 while (pmap_virtual_region(region_select, &region_start, &region_size)) {
1392 map_addr = region_start;
1393 kr = vm_map_enter(kernel_map, &map_addr,
1394 vm_map_round_page(region_size,
1395 VM_MAP_PAGE_MASK(kernel_map)),
1396 (vm_map_offset_t) 0,
1397 VM_FLAGS_FIXED,
1398 vmk_flags,
1399 VM_KERN_MEMORY_NONE,
1400 VM_OBJECT_NULL,
1401 (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
1402 VM_INHERIT_DEFAULT);
1403
1404 if (kr != KERN_SUCCESS) {
1405 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n",
1406 (uint64_t) start, (uint64_t) end, (uint64_t) region_start,
1407 (uint64_t) region_size, kr);
1408 }
1409
1410 region_select++;
1411 }
1412 }
1413#else
1414 kernel_map = vm_map_create(pmap_kernel(), VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1415 map_end, FALSE);
1416 /*
1417 * Reserve virtual memory allocated up to this time.
1418 */
1419 if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
1420 vm_map_offset_t map_addr;
1421 kern_return_t kr;
1422
1423 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1424 vmk_flags.vmkf_no_pmap_check = TRUE;
1425
1426 map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
1427 kr = vm_map_enter(kernel_map,
1428 &map_addr,
1429 (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1430 (vm_map_offset_t) 0,
1431 VM_FLAGS_FIXED,
1432 vmk_flags,
1433 VM_KERN_MEMORY_NONE,
1434 VM_OBJECT_NULL,
1435 (vm_object_offset_t) 0, FALSE,
1436 VM_PROT_NONE, VM_PROT_NONE,
1437 VM_INHERIT_DEFAULT);
1438
1439 if (kr != KERN_SUCCESS) {
1440 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n",
1441 (uint64_t) start, (uint64_t) end,
1442 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1443 (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1444 kr);
1445 }
1446 }
1447#endif
1448
1449 kmem_set_user_wire_limits();
1450}
1451
1452/*
1453 * Routine: copyinmap
1454 * Purpose:
1455 * Like copyin, except that fromaddr is an address
1456 * in the specified VM map. This implementation
1457 * is incomplete; it handles the current user map
1458 * and the kernel map/submaps.
1459 */
1460kern_return_t
1461copyinmap(
1462 vm_map_t map,
1463 vm_map_offset_t fromaddr,
1464 void *todata,
1465 vm_size_t length)
1466{
1467 kern_return_t kr = KERN_SUCCESS;
1468 vm_map_t oldmap;
1469
1470 if (vm_map_pmap(map) == pmap_kernel()) {
1471 /* assume a correct copy */
1472 memcpy(todata, CAST_DOWN(void *, fromaddr), length);
1473 } else if (current_map() == map) {
1474 if (copyin(fromaddr, todata, length) != 0) {
1475 kr = KERN_INVALID_ADDRESS;
1476 }
1477 } else {
1478 vm_map_reference(map);
1479 oldmap = vm_map_switch(map);
1480 if (copyin(fromaddr, todata, length) != 0) {
1481 kr = KERN_INVALID_ADDRESS;
1482 }
1483 vm_map_switch(oldmap);
1484 vm_map_deallocate(map);
1485 }
1486 return kr;
1487}
1488
1489/*
1490 * Routine: copyoutmap
1491 * Purpose:
1492 * Like copyout, except that toaddr is an address
1493 * in the specified VM map.
1494 */
1495kern_return_t
1496copyoutmap(
1497 vm_map_t map,
1498 void *fromdata,
1499 vm_map_address_t toaddr,
1500 vm_size_t length)
1501{
1502 kern_return_t kr = KERN_SUCCESS;
1503 vm_map_t oldmap;
1504
1505 if (vm_map_pmap(map) == pmap_kernel()) {
1506 /* assume a correct copy */
1507 memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
1508 } else if (current_map() == map) {
1509 if (copyout(fromdata, toaddr, length) != 0) {
1510 kr = KERN_INVALID_ADDRESS;
1511 }
1512 } else {
1513 vm_map_reference(map);
1514 oldmap = vm_map_switch(map);
1515 if (copyout(fromdata, toaddr, length) != 0) {
1516 kr = KERN_INVALID_ADDRESS;
1517 }
1518 vm_map_switch(oldmap);
1519 vm_map_deallocate(map);
1520 }
1521 return kr;
1522}
1523
1524/*
1525 * Routine: copyoutmap_atomic{32, 64}
1526 * Purpose:
1527 * Like copyoutmap, except that the operation is atomic.
1528 * Takes in value rather than *fromdata pointer.
1529 */
1530kern_return_t
1531copyoutmap_atomic32(
1532 vm_map_t map,
1533 uint32_t value,
1534 vm_map_address_t toaddr)
1535{
1536 kern_return_t kr = KERN_SUCCESS;
1537 vm_map_t oldmap;
1538
1539 if (vm_map_pmap(map) == pmap_kernel()) {
1540 /* assume a correct toaddr */
1541 *(uint32_t *)toaddr = value;
1542 } else if (current_map() == map) {
1543 if (copyout_atomic32(value, toaddr) != 0) {
1544 kr = KERN_INVALID_ADDRESS;
1545 }
1546 } else {
1547 vm_map_reference(map);
1548 oldmap = vm_map_switch(map);
1549 if (copyout_atomic32(value, toaddr) != 0) {
1550 kr = KERN_INVALID_ADDRESS;
1551 }
1552 vm_map_switch(oldmap);
1553 vm_map_deallocate(map);
1554 }
1555 return kr;
1556}
1557
1558kern_return_t
1559copyoutmap_atomic64(
1560 vm_map_t map,
1561 uint64_t value,
1562 vm_map_address_t toaddr)
1563{
1564 kern_return_t kr = KERN_SUCCESS;
1565 vm_map_t oldmap;
1566
1567 if (vm_map_pmap(map) == pmap_kernel()) {
1568 /* assume a correct toaddr */
1569 *(uint64_t *)toaddr = value;
1570 } else if (current_map() == map) {
1571 if (copyout_atomic64(value, toaddr) != 0) {
1572 kr = KERN_INVALID_ADDRESS;
1573 }
1574 } else {
1575 vm_map_reference(map);
1576 oldmap = vm_map_switch(map);
1577 if (copyout_atomic64(value, toaddr) != 0) {
1578 kr = KERN_INVALID_ADDRESS;
1579 }
1580 vm_map_switch(oldmap);
1581 vm_map_deallocate(map);
1582 }
1583 return kr;
1584}
1585
1586/*
1587 *
1588 * The following two functions are to be used when exposing kernel
1589 * addresses to userspace via any of the various debug or info
1590 * facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
1591 * and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
1592 * are exported to KEXTs.
1593 *
1594 * NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
1595 */
1596
1597static void
1598vm_kernel_addrhash_internal(
1599 vm_offset_t addr,
1600 vm_offset_t *hash_addr,
1601 uint64_t salt)
1602{
1603 assert(salt != 0);
1604
1605 if (addr == 0) {
1606 *hash_addr = 0;
1607 return;
1608 }
1609
1610 if (VM_KERNEL_IS_SLID(addr)) {
1611 *hash_addr = VM_KERNEL_UNSLIDE(addr);
1612 return;
1613 }
1614
1615 vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
1616 SHA256_CTX sha_ctx;
1617
1618 SHA256_Init(&sha_ctx);
1619 SHA256_Update(&sha_ctx, &salt, sizeof(salt));
1620 SHA256_Update(&sha_ctx, &addr, sizeof(addr));
1621 SHA256_Final(sha_digest, &sha_ctx);
1622
1623 *hash_addr = sha_digest[0];
1624}
1625
1626void
1627vm_kernel_addrhash_external(
1628 vm_offset_t addr,
1629 vm_offset_t *hash_addr)
1630{
1631 return vm_kernel_addrhash_internal(addr, hash_addr, vm_kernel_addrhash_salt_ext);
1632}
1633
1634vm_offset_t
1635vm_kernel_addrhash(vm_offset_t addr)
1636{
1637 vm_offset_t hash_addr;
1638 vm_kernel_addrhash_internal(addr, &hash_addr, vm_kernel_addrhash_salt);
1639 return hash_addr;
1640}
1641
1642void
1643vm_kernel_addrhide(
1644 vm_offset_t addr,
1645 vm_offset_t *hide_addr)
1646{
1647 *hide_addr = VM_KERNEL_ADDRHIDE(addr);
1648}
1649
1650/*
1651 * vm_kernel_addrperm_external:
1652 * vm_kernel_unslide_or_perm_external:
1653 *
1654 * Use these macros when exposing an address to userspace that could come from
1655 * either kernel text/data *or* the heap.
1656 */
1657void
1658vm_kernel_addrperm_external(
1659 vm_offset_t addr,
1660 vm_offset_t *perm_addr)
1661{
1662 if (VM_KERNEL_IS_SLID(addr)) {
1663 *perm_addr = VM_KERNEL_UNSLIDE(addr);
1664 } else if (VM_KERNEL_ADDRESS(addr)) {
1665 *perm_addr = addr + vm_kernel_addrperm_ext;
1666 } else {
1667 *perm_addr = addr;
1668 }
1669}
1670
1671void
1672vm_kernel_unslide_or_perm_external(
1673 vm_offset_t addr,
1674 vm_offset_t *up_addr)
1675{
1676 vm_kernel_addrperm_external(addr, up_addr);
1677}
1678
1679void
1680vm_packing_pointer_invalid(vm_offset_t ptr, vm_packing_params_t params)
1681{
1682 if (ptr & ((1ul << params.vmpp_shift) - 1)) {
1683 panic("pointer %p can't be packed: low %d bits aren't 0",
1684 (void *)ptr, params.vmpp_shift);
1685 } else if (ptr <= params.vmpp_base) {
1686 panic("pointer %p can't be packed: below base %p",
1687 (void *)ptr, (void *)params.vmpp_base);
1688 } else {
1689 panic("pointer %p can't be packed: maximum encodable pointer is %p",
1690 (void *)ptr, (void *)vm_packing_max_packable(params));
1691 }
1692}
1693
1694void
1695vm_packing_verify_range(
1696 const char *subsystem,
1697 vm_offset_t min_address,
1698 vm_offset_t max_address,
1699 vm_packing_params_t params)
1700{
1701 if (min_address > max_address) {
1702 panic("%s: %s range invalid min:%p > max:%p",
1703 __func__, subsystem, (void *)min_address, (void *)max_address);
1704 }
1705
1706 if (!params.vmpp_base_relative) {
1707 return;
1708 }
1709
1710 if (min_address <= params.vmpp_base) {
1711 panic("%s: %s range invalid min:%p <= base:%p",
1712 __func__, subsystem, (void *)min_address, (void *)params.vmpp_base);
1713 }
1714
1715 if (max_address > vm_packing_max_packable(params)) {
1716 panic("%s: %s range invalid max:%p >= max packable:%p",
1717 __func__, subsystem, (void *)max_address,
1718 (void *)vm_packing_max_packable(params));
1719 }
1720}