]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_kern.c
xnu-7195.60.75.tar.gz
[apple/xnu.git] / osfmk / vm / vm_kern.c
1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_kern.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Kernel memory management.
64 */
65
66 #include <mach/kern_return.h>
67 #include <mach/vm_param.h>
68 #include <kern/assert.h>
69 #include <kern/thread.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_map.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_compressor.h>
75 #include <vm/vm_pageout.h>
76 #include <kern/misc_protos.h>
77 #include <vm/cpm.h>
78 #include <kern/ledger.h>
79 #include <kern/bits.h>
80 #include <kern/startup.h>
81
82 #include <string.h>
83
84 #include <libkern/OSDebug.h>
85 #include <libkern/crypto/sha2.h>
86 #include <libkern/section_keywords.h>
87 #include <sys/kdebug.h>
88
89 #include <san/kasan.h>
90
91 /*
92 * Variables exported by this module.
93 */
94
95 SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
96 vm_map_t kernel_pageable_map;
97
98 /*
99 * Forward declarations for internal functions.
100 */
101 extern kern_return_t kmem_alloc_pages(
102 vm_object_t object,
103 vm_object_offset_t offset,
104 vm_object_size_t size);
105
106 kern_return_t
107 kmem_alloc_contig(
108 vm_map_t map,
109 vm_offset_t *addrp,
110 vm_size_t size,
111 vm_offset_t mask,
112 ppnum_t max_pnum,
113 ppnum_t pnum_mask,
114 int flags,
115 vm_tag_t tag)
116 {
117 vm_object_t object;
118 vm_object_offset_t offset;
119 vm_map_offset_t map_addr;
120 vm_map_offset_t map_mask;
121 vm_map_size_t map_size, i;
122 vm_map_entry_t entry;
123 vm_page_t m, pages;
124 kern_return_t kr;
125
126 assert(VM_KERN_MEMORY_NONE != tag);
127
128 if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) {
129 return KERN_INVALID_ARGUMENT;
130 }
131
132 map_size = vm_map_round_page(size,
133 VM_MAP_PAGE_MASK(map));
134 map_mask = (vm_map_offset_t)mask;
135
136 /* Check for zero allocation size (either directly or via overflow) */
137 if (map_size == 0) {
138 *addrp = 0;
139 return KERN_INVALID_ARGUMENT;
140 }
141
142 /*
143 * Allocate a new object (if necessary) and the reference we
144 * will be donating to the map entry. We must do this before
145 * locking the map, or risk deadlock with the default pager.
146 */
147 if ((flags & KMA_KOBJECT) != 0) {
148 object = kernel_object;
149 vm_object_reference(object);
150 } else {
151 object = vm_object_allocate(map_size);
152 }
153
154 kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0,
155 VM_MAP_KERNEL_FLAGS_NONE, tag, &entry);
156 if (KERN_SUCCESS != kr) {
157 vm_object_deallocate(object);
158 return kr;
159 }
160
161 if (object == kernel_object) {
162 offset = map_addr;
163 } else {
164 offset = 0;
165 }
166 VME_OBJECT_SET(entry, object);
167 VME_OFFSET_SET(entry, offset);
168
169 /* Take an extra object ref in case the map entry gets deleted */
170 vm_object_reference(object);
171 vm_map_unlock(map);
172
173 kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags);
174
175 if (kr != KERN_SUCCESS) {
176 vm_map_remove(map,
177 vm_map_trunc_page(map_addr,
178 VM_MAP_PAGE_MASK(map)),
179 vm_map_round_page(map_addr + map_size,
180 VM_MAP_PAGE_MASK(map)),
181 VM_MAP_REMOVE_NO_FLAGS);
182 vm_object_deallocate(object);
183 *addrp = 0;
184 return kr;
185 }
186
187 vm_object_lock(object);
188 for (i = 0; i < map_size; i += PAGE_SIZE) {
189 m = pages;
190 pages = NEXT_PAGE(m);
191 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
192 m->vmp_busy = FALSE;
193 vm_page_insert(m, object, offset + i);
194 }
195 vm_object_unlock(object);
196
197 kr = vm_map_wire_kernel(map,
198 vm_map_trunc_page(map_addr,
199 VM_MAP_PAGE_MASK(map)),
200 vm_map_round_page(map_addr + map_size,
201 VM_MAP_PAGE_MASK(map)),
202 VM_PROT_DEFAULT, tag,
203 FALSE);
204
205 if (kr != KERN_SUCCESS) {
206 if (object == kernel_object) {
207 vm_object_lock(object);
208 vm_object_page_remove(object, offset, offset + map_size);
209 vm_object_unlock(object);
210 }
211 vm_map_remove(map,
212 vm_map_trunc_page(map_addr,
213 VM_MAP_PAGE_MASK(map)),
214 vm_map_round_page(map_addr + map_size,
215 VM_MAP_PAGE_MASK(map)),
216 VM_MAP_REMOVE_NO_FLAGS);
217 vm_object_deallocate(object);
218 return kr;
219 }
220 vm_object_deallocate(object);
221
222 if (object == kernel_object) {
223 vm_map_simplify(map, map_addr);
224 vm_tag_update_size(tag, map_size);
225 }
226 *addrp = (vm_offset_t) map_addr;
227 assert((vm_map_offset_t) *addrp == map_addr);
228
229 return KERN_SUCCESS;
230 }
231
232 /*
233 * Master entry point for allocating kernel memory.
234 * NOTE: this routine is _never_ interrupt safe.
235 *
236 * map : map to allocate into
237 * addrp : pointer to start address of new memory
238 * size : size of memory requested
239 * flags : options
240 * KMA_HERE *addrp is base address, else "anywhere"
241 * KMA_NOPAGEWAIT don't wait for pages if unavailable
242 * KMA_KOBJECT use kernel_object
243 * KMA_LOMEM support for 32 bit devices in a 64 bit world
244 * if set and a lomemory pool is available
245 * grab pages from it... this also implies
246 * KMA_NOPAGEWAIT
247 */
248
249 kern_return_t
250 kernel_memory_allocate(
251 vm_map_t map,
252 vm_offset_t *addrp,
253 vm_size_t size,
254 vm_offset_t mask,
255 int flags,
256 vm_tag_t tag)
257 {
258 vm_object_t object;
259 vm_object_offset_t offset;
260 vm_object_offset_t pg_offset;
261 vm_map_entry_t entry = NULL;
262 vm_map_offset_t map_addr, fill_start;
263 vm_map_offset_t map_mask;
264 vm_map_size_t map_size, fill_size;
265 kern_return_t kr, pe_result;
266 vm_page_t mem;
267 vm_page_t guard_page_list = NULL;
268 vm_page_t wired_page_list = NULL;
269 int guard_page_count = 0;
270 int wired_page_count = 0;
271 int page_grab_count = 0;
272 int i;
273 int vm_alloc_flags;
274 vm_map_kernel_flags_t vmk_flags;
275 vm_prot_t kma_prot;
276 #if DEVELOPMENT || DEBUG
277 task_t task = current_task();
278 #endif /* DEVELOPMENT || DEBUG */
279
280 if (startup_phase < STARTUP_SUB_KMEM) {
281 panic("kernel_memory_allocate: VM is not ready");
282 }
283
284 map_size = vm_map_round_page(size,
285 VM_MAP_PAGE_MASK(map));
286 map_mask = (vm_map_offset_t) mask;
287
288 vm_alloc_flags = 0; //VM_MAKE_TAG(tag);
289 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
290
291 /* Check for zero allocation size (either directly or via overflow) */
292 if (map_size == 0) {
293 *addrp = 0;
294 return KERN_INVALID_ARGUMENT;
295 }
296
297 /*
298 * limit the size of a single extent of wired memory
299 * to try and limit the damage to the system if
300 * too many pages get wired down
301 * limit raised to 2GB with 128GB max physical limit,
302 * but scaled by installed memory above this
303 */
304 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
305 map_size > MAX(1ULL << 31, sane_size / 64)) {
306 return KERN_RESOURCE_SHORTAGE;
307 }
308
309 /*
310 * Guard pages:
311 *
312 * Guard pages are implemented as ficticious pages. By placing guard pages
313 * on either end of a stack, they can help detect cases where a thread walks
314 * off either end of its stack. They are allocated and set up here and attempts
315 * to access those pages are trapped in vm_fault_page().
316 *
317 * The map_size we were passed may include extra space for
318 * guard pages. If those were requested, then back it out of fill_size
319 * since vm_map_find_space() takes just the actual size not including
320 * guard pages. Similarly, fill_start indicates where the actual pages
321 * will begin in the range.
322 */
323
324 fill_start = 0;
325 fill_size = map_size;
326
327 if (flags & KMA_GUARD_FIRST) {
328 vmk_flags.vmkf_guard_before = TRUE;
329 fill_start += PAGE_SIZE_64;
330 fill_size -= PAGE_SIZE_64;
331 if (map_size < fill_start + fill_size) {
332 /* no space for a guard page */
333 *addrp = 0;
334 return KERN_INVALID_ARGUMENT;
335 }
336 guard_page_count++;
337 }
338 if (flags & KMA_GUARD_LAST) {
339 vmk_flags.vmkf_guard_after = TRUE;
340 fill_size -= PAGE_SIZE_64;
341 if (map_size <= fill_start + fill_size) {
342 /* no space for a guard page */
343 *addrp = 0;
344 return KERN_INVALID_ARGUMENT;
345 }
346 guard_page_count++;
347 }
348 wired_page_count = (int) (fill_size / PAGE_SIZE_64);
349 assert(wired_page_count * PAGE_SIZE_64 == fill_size);
350
351 #if DEBUG || DEVELOPMENT
352 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START, size, 0, 0, 0);
353 #endif
354
355 for (i = 0; i < guard_page_count; i++) {
356 for (;;) {
357 mem = vm_page_grab_guard();
358
359 if (mem != VM_PAGE_NULL) {
360 break;
361 }
362 if (flags & KMA_NOPAGEWAIT) {
363 kr = KERN_RESOURCE_SHORTAGE;
364 goto out;
365 }
366 vm_page_more_fictitious();
367 }
368 mem->vmp_snext = guard_page_list;
369 guard_page_list = mem;
370 }
371
372 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
373 for (i = 0; i < wired_page_count; i++) {
374 for (;;) {
375 if (flags & KMA_LOMEM) {
376 mem = vm_page_grablo();
377 } else {
378 mem = vm_page_grab();
379 }
380
381 if (mem != VM_PAGE_NULL) {
382 break;
383 }
384
385 if (flags & KMA_NOPAGEWAIT) {
386 kr = KERN_RESOURCE_SHORTAGE;
387 goto out;
388 }
389 if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) {
390 kr = KERN_RESOURCE_SHORTAGE;
391 goto out;
392 }
393
394 /* VM privileged threads should have waited in vm_page_grab() and not get here. */
395 assert(!(current_thread()->options & TH_OPT_VMPRIV));
396
397 uint64_t unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE;
398 if (unavailable > max_mem || map_size > (max_mem - unavailable)) {
399 kr = KERN_RESOURCE_SHORTAGE;
400 goto out;
401 }
402 VM_PAGE_WAIT();
403 }
404 page_grab_count++;
405 if (KMA_ZERO & flags) {
406 vm_page_zero_fill(mem);
407 }
408 mem->vmp_snext = wired_page_list;
409 wired_page_list = mem;
410 }
411 }
412
413 /*
414 * Allocate a new object (if necessary). We must do this before
415 * locking the map, or risk deadlock with the default pager.
416 */
417 if ((flags & KMA_KOBJECT) != 0) {
418 object = kernel_object;
419 vm_object_reference(object);
420 } else if ((flags & KMA_COMPRESSOR) != 0) {
421 object = compressor_object;
422 vm_object_reference(object);
423 } else {
424 object = vm_object_allocate(map_size);
425 }
426
427 if (flags & KMA_ATOMIC) {
428 vmk_flags.vmkf_atomic_entry = TRUE;
429 }
430
431 if (flags & KMA_KHEAP) {
432 vm_alloc_flags |= VM_MAP_FIND_LAST_FREE;
433 }
434
435 kr = vm_map_find_space(map, &map_addr,
436 fill_size, map_mask,
437 vm_alloc_flags, vmk_flags, tag, &entry);
438 if (KERN_SUCCESS != kr) {
439 vm_object_deallocate(object);
440 goto out;
441 }
442
443 if (object == kernel_object || object == compressor_object) {
444 offset = map_addr;
445 } else {
446 offset = 0;
447 }
448 VME_OBJECT_SET(entry, object);
449 VME_OFFSET_SET(entry, offset);
450
451 if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
452 entry->wired_count++;
453 }
454
455 if (flags & KMA_PERMANENT) {
456 entry->permanent = TRUE;
457 }
458
459 if (object != kernel_object && object != compressor_object) {
460 vm_object_reference(object);
461 }
462
463 vm_object_lock(object);
464 vm_map_unlock(map);
465
466 pg_offset = 0;
467
468 if (fill_start) {
469 if (guard_page_list == NULL) {
470 panic("kernel_memory_allocate: guard_page_list == NULL");
471 }
472
473 mem = guard_page_list;
474 guard_page_list = mem->vmp_snext;
475 mem->vmp_snext = NULL;
476
477 vm_page_insert(mem, object, offset + pg_offset);
478
479 mem->vmp_busy = FALSE;
480 pg_offset += PAGE_SIZE_64;
481 }
482
483 kma_prot = VM_PROT_READ | VM_PROT_WRITE;
484
485 #if KASAN
486 if (!(flags & KMA_VAONLY)) {
487 /* for VAONLY mappings we notify in populate only */
488 kasan_notify_address(map_addr, size);
489 }
490 #endif
491
492 if (flags & (KMA_VAONLY | KMA_PAGEABLE)) {
493 pg_offset = fill_start + fill_size;
494 } else {
495 for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) {
496 if (wired_page_list == NULL) {
497 panic("kernel_memory_allocate: wired_page_list == NULL");
498 }
499
500 mem = wired_page_list;
501 wired_page_list = mem->vmp_snext;
502 mem->vmp_snext = NULL;
503
504 assert(mem->vmp_wire_count == 0);
505 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
506
507 mem->vmp_q_state = VM_PAGE_IS_WIRED;
508 mem->vmp_wire_count++;
509 if (__improbable(mem->vmp_wire_count == 0)) {
510 panic("kernel_memory_allocate(%p): wire_count overflow",
511 mem);
512 }
513
514 vm_page_insert_wired(mem, object, offset + pg_offset, tag);
515
516 mem->vmp_busy = FALSE;
517 mem->vmp_pmapped = TRUE;
518 mem->vmp_wpmapped = TRUE;
519
520 PMAP_ENTER_OPTIONS(kernel_pmap, map_addr + pg_offset,
521 0, /* fault_phys_offset */
522 mem,
523 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
524 PMAP_OPTIONS_NOWAIT, pe_result);
525
526 if (pe_result == KERN_RESOURCE_SHORTAGE) {
527 vm_object_unlock(object);
528
529 PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem,
530 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
531 pe_result);
532
533 vm_object_lock(object);
534 }
535
536 assert(pe_result == KERN_SUCCESS);
537
538 if (flags & KMA_NOENCRYPT) {
539 bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE);
540
541 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
542 }
543 }
544 if (kernel_object == object) {
545 vm_tag_update_size(tag, fill_size);
546 }
547 }
548 if ((fill_start + fill_size) < map_size) {
549 if (guard_page_list == NULL) {
550 panic("kernel_memory_allocate: guard_page_list == NULL");
551 }
552
553 mem = guard_page_list;
554 guard_page_list = mem->vmp_snext;
555 mem->vmp_snext = NULL;
556
557 vm_page_insert(mem, object, offset + pg_offset);
558
559 mem->vmp_busy = FALSE;
560 }
561 if (guard_page_list || wired_page_list) {
562 panic("kernel_memory_allocate: non empty list\n");
563 }
564
565 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
566 vm_page_lockspin_queues();
567 vm_page_wire_count += wired_page_count;
568 vm_page_unlock_queues();
569 }
570
571 vm_object_unlock(object);
572
573 /*
574 * now that the pages are wired, we no longer have to fear coalesce
575 */
576 if (object == kernel_object || object == compressor_object) {
577 vm_map_simplify(map, map_addr);
578 } else {
579 vm_object_deallocate(object);
580 }
581
582 #if DEBUG || DEVELOPMENT
583 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
584 if (task != NULL) {
585 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
586 }
587 #endif
588
589 /*
590 * Return the memory, not zeroed.
591 */
592 *addrp = CAST_DOWN(vm_offset_t, map_addr);
593 return KERN_SUCCESS;
594
595 out:
596 if (guard_page_list) {
597 vm_page_free_list(guard_page_list, FALSE);
598 }
599
600 if (wired_page_list) {
601 vm_page_free_list(wired_page_list, FALSE);
602 }
603
604 #if DEBUG || DEVELOPMENT
605 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
606 if (task != NULL && kr == KERN_SUCCESS) {
607 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
608 }
609 #endif
610
611 return kr;
612 }
613
614 kern_return_t
615 kernel_memory_populate(
616 vm_map_t map,
617 vm_offset_t addr,
618 vm_size_t size,
619 int flags,
620 vm_tag_t tag)
621 {
622 vm_object_t object;
623 vm_object_offset_t offset, pg_offset;
624 kern_return_t kr, pe_result;
625 vm_page_t mem;
626 vm_page_t page_list = NULL;
627 int page_count = 0;
628 int page_grab_count = 0;
629 int i;
630
631 #if DEBUG || DEVELOPMENT
632 task_t task = current_task();
633 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START, size, 0, 0, 0);
634 #endif
635
636 page_count = (int) (size / PAGE_SIZE_64);
637
638 assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
639
640 if (flags & KMA_COMPRESSOR) {
641 pg_offset = page_count * PAGE_SIZE_64;
642
643 do {
644 for (;;) {
645 mem = vm_page_grab();
646
647 if (mem != VM_PAGE_NULL) {
648 break;
649 }
650
651 VM_PAGE_WAIT();
652 }
653 page_grab_count++;
654 if (KMA_ZERO & flags) {
655 vm_page_zero_fill(mem);
656 }
657 mem->vmp_snext = page_list;
658 page_list = mem;
659
660 pg_offset -= PAGE_SIZE_64;
661
662 kr = pmap_enter_options(kernel_pmap,
663 addr + pg_offset, VM_PAGE_GET_PHYS_PAGE(mem),
664 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE,
665 PMAP_OPTIONS_INTERNAL, NULL);
666 assert(kr == KERN_SUCCESS);
667 } while (pg_offset);
668
669 offset = addr;
670 object = compressor_object;
671
672 vm_object_lock(object);
673
674 for (pg_offset = 0;
675 pg_offset < size;
676 pg_offset += PAGE_SIZE_64) {
677 mem = page_list;
678 page_list = mem->vmp_snext;
679 mem->vmp_snext = NULL;
680
681 vm_page_insert(mem, object, offset + pg_offset);
682 assert(mem->vmp_busy);
683
684 mem->vmp_busy = FALSE;
685 mem->vmp_pmapped = TRUE;
686 mem->vmp_wpmapped = TRUE;
687 mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
688 }
689 vm_object_unlock(object);
690
691 #if KASAN
692 if (map == compressor_map) {
693 kasan_notify_address_nopoison(addr, size);
694 } else {
695 kasan_notify_address(addr, size);
696 }
697 #endif
698
699 #if DEBUG || DEVELOPMENT
700 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
701 if (task != NULL) {
702 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
703 }
704 #endif
705 return KERN_SUCCESS;
706 }
707
708 for (i = 0; i < page_count; i++) {
709 for (;;) {
710 if (flags & KMA_LOMEM) {
711 mem = vm_page_grablo();
712 } else {
713 mem = vm_page_grab();
714 }
715
716 if (mem != VM_PAGE_NULL) {
717 break;
718 }
719
720 if (flags & KMA_NOPAGEWAIT) {
721 kr = KERN_RESOURCE_SHORTAGE;
722 goto out;
723 }
724 if ((flags & KMA_LOMEM) &&
725 (vm_lopage_needed == TRUE)) {
726 kr = KERN_RESOURCE_SHORTAGE;
727 goto out;
728 }
729 VM_PAGE_WAIT();
730 }
731 page_grab_count++;
732 if (KMA_ZERO & flags) {
733 vm_page_zero_fill(mem);
734 }
735 mem->vmp_snext = page_list;
736 page_list = mem;
737 }
738 if (flags & KMA_KOBJECT) {
739 offset = addr;
740 object = kernel_object;
741
742 vm_object_lock(object);
743 } else {
744 /*
745 * If it's not the kernel object, we need to:
746 * lock map;
747 * lookup entry;
748 * lock object;
749 * take reference on object;
750 * unlock map;
751 */
752 panic("kernel_memory_populate(%p,0x%llx,0x%llx,0x%x): "
753 "!KMA_KOBJECT",
754 map, (uint64_t) addr, (uint64_t) size, flags);
755 }
756
757 for (pg_offset = 0;
758 pg_offset < size;
759 pg_offset += PAGE_SIZE_64) {
760 if (page_list == NULL) {
761 panic("kernel_memory_populate: page_list == NULL");
762 }
763
764 mem = page_list;
765 page_list = mem->vmp_snext;
766 mem->vmp_snext = NULL;
767
768 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
769 mem->vmp_q_state = VM_PAGE_IS_WIRED;
770 mem->vmp_wire_count++;
771 if (__improbable(mem->vmp_wire_count == 0)) {
772 panic("kernel_memory_populate(%p): wire_count overflow", mem);
773 }
774
775 vm_page_insert_wired(mem, object, offset + pg_offset, tag);
776
777 mem->vmp_busy = FALSE;
778 mem->vmp_pmapped = TRUE;
779 mem->vmp_wpmapped = TRUE;
780
781 PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset,
782 0, /* fault_phys_offset */
783 mem,
784 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
785 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
786 PMAP_OPTIONS_NOWAIT, pe_result);
787
788 if (pe_result == KERN_RESOURCE_SHORTAGE) {
789 vm_object_unlock(object);
790
791 PMAP_ENTER(kernel_pmap, addr + pg_offset, mem,
792 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
793 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
794 pe_result);
795
796 vm_object_lock(object);
797 }
798
799 assert(pe_result == KERN_SUCCESS);
800
801 if (flags & KMA_NOENCRYPT) {
802 bzero(CAST_DOWN(void *, (addr + pg_offset)), PAGE_SIZE);
803 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
804 }
805 }
806 vm_object_unlock(object);
807
808 vm_page_lockspin_queues();
809 vm_page_wire_count += page_count;
810 vm_page_unlock_queues();
811 vm_tag_update_size(tag, ptoa_64(page_count));
812
813 #if DEBUG || DEVELOPMENT
814 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
815 if (task != NULL) {
816 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
817 }
818 #endif
819
820 #if KASAN
821 if (map == compressor_map) {
822 kasan_notify_address_nopoison(addr, size);
823 } else {
824 kasan_notify_address(addr, size);
825 }
826 #endif
827 return KERN_SUCCESS;
828
829 out:
830 if (page_list) {
831 vm_page_free_list(page_list, FALSE);
832 }
833
834 #if DEBUG || DEVELOPMENT
835 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
836 if (task != NULL && kr == KERN_SUCCESS) {
837 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
838 }
839 #endif
840
841 return kr;
842 }
843
844
845 void
846 kernel_memory_depopulate(
847 vm_map_t map,
848 vm_offset_t addr,
849 vm_size_t size,
850 int flags,
851 vm_tag_t tag)
852 {
853 vm_object_t object;
854 vm_object_offset_t offset, pg_offset;
855 vm_page_t mem;
856 vm_page_t local_freeq = NULL;
857 unsigned int pages_unwired;
858
859 assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
860
861 if (flags & KMA_COMPRESSOR) {
862 offset = addr;
863 object = compressor_object;
864
865 vm_object_lock(object);
866 } else if (flags & KMA_KOBJECT) {
867 offset = addr;
868 object = kernel_object;
869 vm_object_lock(object);
870 } else {
871 offset = 0;
872 object = NULL;
873 /*
874 * If it's not the kernel object, we need to:
875 * lock map;
876 * lookup entry;
877 * lock object;
878 * unlock map;
879 */
880 panic("kernel_memory_depopulate(%p,0x%llx,0x%llx,0x%x): "
881 "!KMA_KOBJECT",
882 map, (uint64_t) addr, (uint64_t) size, flags);
883 }
884 pmap_protect(kernel_map->pmap, offset, offset + size, VM_PROT_NONE);
885
886 for (pg_offset = 0, pages_unwired = 0;
887 pg_offset < size;
888 pg_offset += PAGE_SIZE_64) {
889 mem = vm_page_lookup(object, offset + pg_offset);
890
891 assert(mem);
892
893 if (mem->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR) {
894 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
895 pages_unwired++;
896 }
897
898 mem->vmp_busy = TRUE;
899
900 assert(mem->vmp_tabled);
901 vm_page_remove(mem, TRUE);
902 assert(mem->vmp_busy);
903
904 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
905 assert((mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
906 (mem->vmp_q_state == VM_PAGE_IS_WIRED));
907
908 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
909 mem->vmp_snext = local_freeq;
910 local_freeq = mem;
911 }
912 vm_object_unlock(object);
913
914
915 if (local_freeq) {
916 vm_page_free_list(local_freeq, TRUE);
917 if (pages_unwired != 0) {
918 vm_page_lockspin_queues();
919 vm_page_wire_count -= pages_unwired;
920 vm_page_unlock_queues();
921 vm_tag_update_size(tag, -ptoa_64(pages_unwired));
922 }
923 }
924 }
925
926 /*
927 * kmem_alloc:
928 *
929 * Allocate wired-down memory in the kernel's address map
930 * or a submap. The memory is not zero-filled.
931 */
932
933 kern_return_t
934 kmem_alloc_external(
935 vm_map_t map,
936 vm_offset_t *addrp,
937 vm_size_t size)
938 {
939 return kmem_alloc(map, addrp, size, vm_tag_bt());
940 }
941
942
943 kern_return_t
944 kmem_alloc(
945 vm_map_t map,
946 vm_offset_t *addrp,
947 vm_size_t size,
948 vm_tag_t tag)
949 {
950 return kmem_alloc_flags(map, addrp, size, tag, 0);
951 }
952
953 kern_return_t
954 kmem_alloc_flags(
955 vm_map_t map,
956 vm_offset_t *addrp,
957 vm_size_t size,
958 vm_tag_t tag,
959 int flags)
960 {
961 kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, flags, tag);
962 if (kr == KERN_SUCCESS) {
963 TRACE_MACHLEAKS(KMEM_ALLOC_CODE, KMEM_ALLOC_CODE_2, size, *addrp);
964 }
965 return kr;
966 }
967
968 /*
969 * kmem_realloc:
970 *
971 * Reallocate wired-down memory in the kernel's address map
972 * or a submap. Newly allocated pages are not zeroed.
973 * This can only be used on regions allocated with kmem_alloc.
974 *
975 * If successful, the pages in the old region are mapped twice.
976 * The old region is unchanged. Use kmem_free to get rid of it.
977 */
978 kern_return_t
979 kmem_realloc(
980 vm_map_t map,
981 vm_offset_t oldaddr,
982 vm_size_t oldsize,
983 vm_offset_t *newaddrp,
984 vm_size_t newsize,
985 vm_tag_t tag)
986 {
987 vm_object_t object;
988 vm_object_offset_t offset;
989 vm_map_offset_t oldmapmin;
990 vm_map_offset_t oldmapmax;
991 vm_map_offset_t newmapaddr;
992 vm_map_size_t oldmapsize;
993 vm_map_size_t newmapsize;
994 vm_map_entry_t oldentry;
995 vm_map_entry_t newentry;
996 vm_page_t mem;
997 kern_return_t kr;
998
999 oldmapmin = vm_map_trunc_page(oldaddr,
1000 VM_MAP_PAGE_MASK(map));
1001 oldmapmax = vm_map_round_page(oldaddr + oldsize,
1002 VM_MAP_PAGE_MASK(map));
1003 oldmapsize = oldmapmax - oldmapmin;
1004 newmapsize = vm_map_round_page(newsize,
1005 VM_MAP_PAGE_MASK(map));
1006 if (newmapsize < newsize) {
1007 /* overflow */
1008 *newaddrp = 0;
1009 return KERN_INVALID_ARGUMENT;
1010 }
1011
1012 /*
1013 * Find the VM object backing the old region.
1014 */
1015
1016 vm_map_lock(map);
1017
1018 if (!vm_map_lookup_entry(map, oldmapmin, &oldentry)) {
1019 panic("kmem_realloc");
1020 }
1021 object = VME_OBJECT(oldentry);
1022
1023 /*
1024 * Increase the size of the object and
1025 * fill in the new region.
1026 */
1027
1028 vm_object_reference(object);
1029 /* by grabbing the object lock before unlocking the map */
1030 /* we guarantee that we will panic if more than one */
1031 /* attempt is made to realloc a kmem_alloc'd area */
1032 vm_object_lock(object);
1033 vm_map_unlock(map);
1034 if (object->vo_size != oldmapsize) {
1035 panic("kmem_realloc");
1036 }
1037 object->vo_size = newmapsize;
1038 vm_object_unlock(object);
1039
1040 /* allocate the new pages while expanded portion of the */
1041 /* object is still not mapped */
1042 kmem_alloc_pages(object, vm_object_round_page(oldmapsize),
1043 vm_object_round_page(newmapsize - oldmapsize));
1044
1045 /*
1046 * Find space for the new region.
1047 */
1048
1049 kr = vm_map_find_space(map, &newmapaddr, newmapsize,
1050 (vm_map_offset_t) 0, 0,
1051 VM_MAP_KERNEL_FLAGS_NONE,
1052 tag,
1053 &newentry);
1054 if (kr != KERN_SUCCESS) {
1055 vm_object_lock(object);
1056 for (offset = oldmapsize;
1057 offset < newmapsize; offset += PAGE_SIZE) {
1058 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
1059 VM_PAGE_FREE(mem);
1060 }
1061 }
1062 object->vo_size = oldmapsize;
1063 vm_object_unlock(object);
1064 vm_object_deallocate(object);
1065 return kr;
1066 }
1067 VME_OBJECT_SET(newentry, object);
1068 VME_OFFSET_SET(newentry, 0);
1069 assert(newentry->wired_count == 0);
1070
1071
1072 /* add an extra reference in case we have someone doing an */
1073 /* unexpected deallocate */
1074 vm_object_reference(object);
1075 vm_map_unlock(map);
1076
1077 kr = vm_map_wire_kernel(map, newmapaddr, newmapaddr + newmapsize,
1078 VM_PROT_DEFAULT, tag, FALSE);
1079 if (KERN_SUCCESS != kr) {
1080 vm_map_remove(map, newmapaddr, newmapaddr + newmapsize, VM_MAP_REMOVE_NO_FLAGS);
1081 vm_object_lock(object);
1082 for (offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) {
1083 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
1084 VM_PAGE_FREE(mem);
1085 }
1086 }
1087 object->vo_size = oldmapsize;
1088 vm_object_unlock(object);
1089 vm_object_deallocate(object);
1090 return kr;
1091 }
1092 vm_object_deallocate(object);
1093
1094 if (kernel_object == object) {
1095 vm_tag_update_size(tag, newmapsize);
1096 }
1097
1098 *newaddrp = CAST_DOWN(vm_offset_t, newmapaddr);
1099 return KERN_SUCCESS;
1100 }
1101
1102 /*
1103 * kmem_alloc_kobject:
1104 *
1105 * Allocate wired-down memory in the kernel's address map
1106 * or a submap. The memory is not zero-filled.
1107 *
1108 * The memory is allocated in the kernel_object.
1109 * It may not be copied with vm_map_copy, and
1110 * it may not be reallocated with kmem_realloc.
1111 */
1112
1113 kern_return_t
1114 kmem_alloc_kobject_external(
1115 vm_map_t map,
1116 vm_offset_t *addrp,
1117 vm_size_t size)
1118 {
1119 return kmem_alloc_kobject(map, addrp, size, vm_tag_bt());
1120 }
1121
1122 kern_return_t
1123 kmem_alloc_kobject(
1124 vm_map_t map,
1125 vm_offset_t *addrp,
1126 vm_size_t size,
1127 vm_tag_t tag)
1128 {
1129 return kernel_memory_allocate(map, addrp, size, 0, KMA_KOBJECT, tag);
1130 }
1131
1132 /*
1133 * kmem_alloc_aligned:
1134 *
1135 * Like kmem_alloc_kobject, except that the memory is aligned.
1136 * The size should be a power-of-2.
1137 */
1138
1139 kern_return_t
1140 kmem_alloc_aligned(
1141 vm_map_t map,
1142 vm_offset_t *addrp,
1143 vm_size_t size,
1144 vm_tag_t tag)
1145 {
1146 if ((size & (size - 1)) != 0) {
1147 panic("kmem_alloc_aligned: size not aligned");
1148 }
1149 return kernel_memory_allocate(map, addrp, size, size - 1, KMA_KOBJECT, tag);
1150 }
1151
1152 /*
1153 * kmem_alloc_pageable:
1154 *
1155 * Allocate pageable memory in the kernel's address map.
1156 */
1157
1158 kern_return_t
1159 kmem_alloc_pageable_external(
1160 vm_map_t map,
1161 vm_offset_t *addrp,
1162 vm_size_t size)
1163 {
1164 return kmem_alloc_pageable(map, addrp, size, vm_tag_bt());
1165 }
1166
1167 kern_return_t
1168 kmem_alloc_pageable(
1169 vm_map_t map,
1170 vm_offset_t *addrp,
1171 vm_size_t size,
1172 vm_tag_t tag)
1173 {
1174 vm_map_offset_t map_addr;
1175 vm_map_size_t map_size;
1176 kern_return_t kr;
1177
1178 #ifndef normal
1179 map_addr = (vm_map_min(map)) + PAGE_SIZE;
1180 #else
1181 map_addr = vm_map_min(map);
1182 #endif
1183 map_size = vm_map_round_page(size,
1184 VM_MAP_PAGE_MASK(map));
1185 if (map_size < size) {
1186 /* overflow */
1187 *addrp = 0;
1188 return KERN_INVALID_ARGUMENT;
1189 }
1190
1191 kr = vm_map_enter(map, &map_addr, map_size,
1192 (vm_map_offset_t) 0,
1193 VM_FLAGS_ANYWHERE,
1194 VM_MAP_KERNEL_FLAGS_NONE,
1195 tag,
1196 VM_OBJECT_NULL, (vm_object_offset_t) 0, FALSE,
1197 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1198
1199 if (kr != KERN_SUCCESS) {
1200 return kr;
1201 }
1202
1203 #if KASAN
1204 kasan_notify_address(map_addr, map_size);
1205 #endif
1206 *addrp = CAST_DOWN(vm_offset_t, map_addr);
1207 return KERN_SUCCESS;
1208 }
1209
1210 /*
1211 * kmem_free:
1212 *
1213 * Release a region of kernel virtual memory allocated
1214 * with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable,
1215 * and return the physical pages associated with that region.
1216 */
1217
1218 void
1219 kmem_free(
1220 vm_map_t map,
1221 vm_offset_t addr,
1222 vm_size_t size)
1223 {
1224 kern_return_t kr;
1225
1226 assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS);
1227
1228 TRACE_MACHLEAKS(KMEM_FREE_CODE, KMEM_FREE_CODE_2, size, addr);
1229
1230 if (size == 0) {
1231 #if MACH_ASSERT
1232 printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n", map, (uint64_t)addr);
1233 #endif
1234 return;
1235 }
1236
1237 kr = vm_map_remove(map,
1238 vm_map_trunc_page(addr,
1239 VM_MAP_PAGE_MASK(map)),
1240 vm_map_round_page(addr + size,
1241 VM_MAP_PAGE_MASK(map)),
1242 VM_MAP_REMOVE_KUNWIRE);
1243 if (kr != KERN_SUCCESS) {
1244 panic("kmem_free");
1245 }
1246 }
1247
1248 /*
1249 * Allocate new pages in an object.
1250 */
1251
1252 kern_return_t
1253 kmem_alloc_pages(
1254 vm_object_t object,
1255 vm_object_offset_t offset,
1256 vm_object_size_t size)
1257 {
1258 vm_object_size_t alloc_size;
1259
1260 alloc_size = vm_object_round_page(size);
1261 vm_object_lock(object);
1262 while (alloc_size) {
1263 vm_page_t mem;
1264
1265
1266 /*
1267 * Allocate a page
1268 */
1269 while (VM_PAGE_NULL ==
1270 (mem = vm_page_alloc(object, offset))) {
1271 vm_object_unlock(object);
1272 VM_PAGE_WAIT();
1273 vm_object_lock(object);
1274 }
1275 mem->vmp_busy = FALSE;
1276
1277 alloc_size -= PAGE_SIZE;
1278 offset += PAGE_SIZE;
1279 }
1280 vm_object_unlock(object);
1281 return KERN_SUCCESS;
1282 }
1283
1284 /*
1285 * kmem_suballoc:
1286 *
1287 * Allocates a map to manage a subrange
1288 * of the kernel virtual address space.
1289 *
1290 * Arguments are as follows:
1291 *
1292 * parent Map to take range from
1293 * addr Address of start of range (IN/OUT)
1294 * size Size of range to find
1295 * pageable Can region be paged
1296 * anywhere Can region be located anywhere in map
1297 * new_map Pointer to new submap
1298 */
1299 kern_return_t
1300 kmem_suballoc(
1301 vm_map_t parent,
1302 vm_offset_t *addr,
1303 vm_size_t size,
1304 boolean_t pageable,
1305 int flags,
1306 vm_map_kernel_flags_t vmk_flags,
1307 vm_tag_t tag,
1308 vm_map_t *new_map)
1309 {
1310 vm_map_t map;
1311 vm_map_offset_t map_addr;
1312 vm_map_size_t map_size;
1313 kern_return_t kr;
1314
1315 map_size = vm_map_round_page(size,
1316 VM_MAP_PAGE_MASK(parent));
1317 if (map_size < size) {
1318 /* overflow */
1319 *addr = 0;
1320 return KERN_INVALID_ARGUMENT;
1321 }
1322
1323 /*
1324 * Need reference on submap object because it is internal
1325 * to the vm_system. vm_object_enter will never be called
1326 * on it (usual source of reference for vm_map_enter).
1327 */
1328 vm_object_reference(vm_submap_object);
1329
1330 map_addr = ((flags & VM_FLAGS_ANYWHERE)
1331 ? vm_map_min(parent)
1332 : vm_map_trunc_page(*addr,
1333 VM_MAP_PAGE_MASK(parent)));
1334
1335 kr = vm_map_enter(parent, &map_addr, map_size,
1336 (vm_map_offset_t) 0, flags, vmk_flags, tag,
1337 vm_submap_object, (vm_object_offset_t) 0, FALSE,
1338 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1339 if (kr != KERN_SUCCESS) {
1340 vm_object_deallocate(vm_submap_object);
1341 return kr;
1342 }
1343
1344 pmap_reference(vm_map_pmap(parent));
1345 map = vm_map_create(vm_map_pmap(parent), map_addr, map_addr + map_size, pageable);
1346 if (map == VM_MAP_NULL) {
1347 panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */
1348 }
1349 /* inherit the parent map's page size */
1350 vm_map_set_page_shift(map, VM_MAP_PAGE_SHIFT(parent));
1351
1352 kr = vm_map_submap(parent, map_addr, map_addr + map_size, map, map_addr, FALSE);
1353 if (kr != KERN_SUCCESS) {
1354 /*
1355 * See comment preceding vm_map_submap().
1356 */
1357 vm_map_remove(parent, map_addr, map_addr + map_size,
1358 VM_MAP_REMOVE_NO_FLAGS);
1359 vm_map_deallocate(map); /* also removes ref to pmap */
1360 vm_object_deallocate(vm_submap_object);
1361 return kr;
1362 }
1363 *addr = CAST_DOWN(vm_offset_t, map_addr);
1364 *new_map = map;
1365 return KERN_SUCCESS;
1366 }
1367 /*
1368 * The default percentage of memory that can be mlocked is scaled based on the total
1369 * amount of memory in the system. These percentages are caclulated
1370 * offline and stored in this table. We index this table by
1371 * log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range
1372 * [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t))
1373 *
1374 * Note that these values were picked for mac.
1375 * If we ever have very large memory config arm devices, we may want to revisit
1376 * since the kernel overhead is smaller there due to the larger page size.
1377 */
1378
1379 /* Start scaling iff we're managing > 2^32 = 4GB of RAM. */
1380 #define VM_USER_WIREABLE_MIN_CONFIG 32
1381 static vm_map_size_t wire_limit_percents[] =
1382 { 70, 73, 76, 79, 82, 85, 88, 91, 94, 97};
1383
1384 /*
1385 * Sets the default global user wire limit which limits the amount of
1386 * memory that can be locked via mlock() based on the above algorithm..
1387 * This can be overridden via a sysctl.
1388 */
1389 static void
1390 kmem_set_user_wire_limits(void)
1391 {
1392 uint64_t available_mem_log;
1393 uint64_t max_wire_percent;
1394 size_t wire_limit_percents_length = sizeof(wire_limit_percents) /
1395 sizeof(vm_map_size_t);
1396 vm_map_size_t limit;
1397 uint64_t config_memsize = max_mem;
1398 #if defined(XNU_TARGET_OS_OSX)
1399 config_memsize = max_mem_actual;
1400 #endif /* defined(XNU_TARGET_OS_OSX) */
1401
1402 available_mem_log = bit_floor(config_memsize);
1403
1404 if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) {
1405 available_mem_log = 0;
1406 } else {
1407 available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG;
1408 }
1409 if (available_mem_log >= wire_limit_percents_length) {
1410 available_mem_log = wire_limit_percents_length - 1;
1411 }
1412 max_wire_percent = wire_limit_percents[available_mem_log];
1413
1414 limit = config_memsize * max_wire_percent / 100;
1415 /* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */
1416 if (config_memsize - limit > VM_NOT_USER_WIREABLE_MAX) {
1417 limit = config_memsize - VM_NOT_USER_WIREABLE_MAX;
1418 }
1419
1420 vm_global_user_wire_limit = limit;
1421 /* the default per task limit is the same as the global limit */
1422 vm_per_task_user_wire_limit = limit;
1423 vm_add_wire_count_over_global_limit = 0;
1424 vm_add_wire_count_over_user_limit = 0;
1425 }
1426
1427
1428 /*
1429 * kmem_init:
1430 *
1431 * Initialize the kernel's virtual memory map, taking
1432 * into account all memory allocated up to this time.
1433 */
1434 __startup_func
1435 void
1436 kmem_init(
1437 vm_offset_t start,
1438 vm_offset_t end)
1439 {
1440 vm_map_offset_t map_start;
1441 vm_map_offset_t map_end;
1442 vm_map_kernel_flags_t vmk_flags;
1443
1444 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1445 vmk_flags.vmkf_permanent = TRUE;
1446 vmk_flags.vmkf_no_pmap_check = TRUE;
1447
1448 map_start = vm_map_trunc_page(start,
1449 VM_MAP_PAGE_MASK(kernel_map));
1450 map_end = vm_map_round_page(end,
1451 VM_MAP_PAGE_MASK(kernel_map));
1452
1453 #if defined(__arm__) || defined(__arm64__)
1454 kernel_map = vm_map_create(pmap_kernel(), VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1455 VM_MAX_KERNEL_ADDRESS, FALSE);
1456 /*
1457 * Reserve virtual memory allocated up to this time.
1458 */
1459 {
1460 unsigned int region_select = 0;
1461 vm_map_offset_t region_start;
1462 vm_map_size_t region_size;
1463 vm_map_offset_t map_addr;
1464 kern_return_t kr;
1465
1466 while (pmap_virtual_region(region_select, &region_start, &region_size)) {
1467 map_addr = region_start;
1468 kr = vm_map_enter(kernel_map, &map_addr,
1469 vm_map_round_page(region_size,
1470 VM_MAP_PAGE_MASK(kernel_map)),
1471 (vm_map_offset_t) 0,
1472 VM_FLAGS_FIXED,
1473 vmk_flags,
1474 VM_KERN_MEMORY_NONE,
1475 VM_OBJECT_NULL,
1476 (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
1477 VM_INHERIT_DEFAULT);
1478
1479 if (kr != KERN_SUCCESS) {
1480 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n",
1481 (uint64_t) start, (uint64_t) end, (uint64_t) region_start,
1482 (uint64_t) region_size, kr);
1483 }
1484
1485 region_select++;
1486 }
1487 }
1488 #else
1489 kernel_map = vm_map_create(pmap_kernel(), VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1490 map_end, FALSE);
1491 /*
1492 * Reserve virtual memory allocated up to this time.
1493 */
1494 if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
1495 vm_map_offset_t map_addr;
1496 kern_return_t kr;
1497
1498 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1499 vmk_flags.vmkf_no_pmap_check = TRUE;
1500
1501 map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
1502 kr = vm_map_enter(kernel_map,
1503 &map_addr,
1504 (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1505 (vm_map_offset_t) 0,
1506 VM_FLAGS_FIXED,
1507 vmk_flags,
1508 VM_KERN_MEMORY_NONE,
1509 VM_OBJECT_NULL,
1510 (vm_object_offset_t) 0, FALSE,
1511 VM_PROT_NONE, VM_PROT_NONE,
1512 VM_INHERIT_DEFAULT);
1513
1514 if (kr != KERN_SUCCESS) {
1515 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n",
1516 (uint64_t) start, (uint64_t) end,
1517 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1518 (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1519 kr);
1520 }
1521 }
1522 #endif
1523
1524 kmem_set_user_wire_limits();
1525 }
1526
1527 /*
1528 * Routine: copyinmap
1529 * Purpose:
1530 * Like copyin, except that fromaddr is an address
1531 * in the specified VM map. This implementation
1532 * is incomplete; it handles the current user map
1533 * and the kernel map/submaps.
1534 */
1535 kern_return_t
1536 copyinmap(
1537 vm_map_t map,
1538 vm_map_offset_t fromaddr,
1539 void *todata,
1540 vm_size_t length)
1541 {
1542 kern_return_t kr = KERN_SUCCESS;
1543 vm_map_t oldmap;
1544
1545 if (vm_map_pmap(map) == pmap_kernel()) {
1546 /* assume a correct copy */
1547 memcpy(todata, CAST_DOWN(void *, fromaddr), length);
1548 } else if (current_map() == map) {
1549 if (copyin(fromaddr, todata, length) != 0) {
1550 kr = KERN_INVALID_ADDRESS;
1551 }
1552 } else {
1553 vm_map_reference(map);
1554 oldmap = vm_map_switch(map);
1555 if (copyin(fromaddr, todata, length) != 0) {
1556 kr = KERN_INVALID_ADDRESS;
1557 }
1558 vm_map_switch(oldmap);
1559 vm_map_deallocate(map);
1560 }
1561 return kr;
1562 }
1563
1564 /*
1565 * Routine: copyoutmap
1566 * Purpose:
1567 * Like copyout, except that toaddr is an address
1568 * in the specified VM map.
1569 */
1570 kern_return_t
1571 copyoutmap(
1572 vm_map_t map,
1573 void *fromdata,
1574 vm_map_address_t toaddr,
1575 vm_size_t length)
1576 {
1577 kern_return_t kr = KERN_SUCCESS;
1578 vm_map_t oldmap;
1579
1580 if (vm_map_pmap(map) == pmap_kernel()) {
1581 /* assume a correct copy */
1582 memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
1583 } else if (current_map() == map) {
1584 if (copyout(fromdata, toaddr, length) != 0) {
1585 kr = KERN_INVALID_ADDRESS;
1586 }
1587 } else {
1588 vm_map_reference(map);
1589 oldmap = vm_map_switch(map);
1590 if (copyout(fromdata, toaddr, length) != 0) {
1591 kr = KERN_INVALID_ADDRESS;
1592 }
1593 vm_map_switch(oldmap);
1594 vm_map_deallocate(map);
1595 }
1596 return kr;
1597 }
1598
1599 /*
1600 *
1601 * The following two functions are to be used when exposing kernel
1602 * addresses to userspace via any of the various debug or info
1603 * facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
1604 * and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
1605 * are exported to KEXTs.
1606 *
1607 * NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
1608 */
1609
1610 static void
1611 vm_kernel_addrhash_internal(
1612 vm_offset_t addr,
1613 vm_offset_t *hash_addr,
1614 uint64_t salt)
1615 {
1616 assert(salt != 0);
1617
1618 if (addr == 0) {
1619 *hash_addr = 0;
1620 return;
1621 }
1622
1623 if (VM_KERNEL_IS_SLID(addr)) {
1624 *hash_addr = VM_KERNEL_UNSLIDE(addr);
1625 return;
1626 }
1627
1628 vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
1629 SHA256_CTX sha_ctx;
1630
1631 SHA256_Init(&sha_ctx);
1632 SHA256_Update(&sha_ctx, &salt, sizeof(salt));
1633 SHA256_Update(&sha_ctx, &addr, sizeof(addr));
1634 SHA256_Final(sha_digest, &sha_ctx);
1635
1636 *hash_addr = sha_digest[0];
1637 }
1638
1639 void
1640 vm_kernel_addrhash_external(
1641 vm_offset_t addr,
1642 vm_offset_t *hash_addr)
1643 {
1644 return vm_kernel_addrhash_internal(addr, hash_addr, vm_kernel_addrhash_salt_ext);
1645 }
1646
1647 vm_offset_t
1648 vm_kernel_addrhash(vm_offset_t addr)
1649 {
1650 vm_offset_t hash_addr;
1651 vm_kernel_addrhash_internal(addr, &hash_addr, vm_kernel_addrhash_salt);
1652 return hash_addr;
1653 }
1654
1655 void
1656 vm_kernel_addrhide(
1657 vm_offset_t addr,
1658 vm_offset_t *hide_addr)
1659 {
1660 *hide_addr = VM_KERNEL_ADDRHIDE(addr);
1661 }
1662
1663 /*
1664 * vm_kernel_addrperm_external:
1665 * vm_kernel_unslide_or_perm_external:
1666 *
1667 * Use these macros when exposing an address to userspace that could come from
1668 * either kernel text/data *or* the heap.
1669 */
1670 void
1671 vm_kernel_addrperm_external(
1672 vm_offset_t addr,
1673 vm_offset_t *perm_addr)
1674 {
1675 if (VM_KERNEL_IS_SLID(addr)) {
1676 *perm_addr = VM_KERNEL_UNSLIDE(addr);
1677 } else if (VM_KERNEL_ADDRESS(addr)) {
1678 *perm_addr = addr + vm_kernel_addrperm_ext;
1679 } else {
1680 *perm_addr = addr;
1681 }
1682 }
1683
1684 void
1685 vm_kernel_unslide_or_perm_external(
1686 vm_offset_t addr,
1687 vm_offset_t *up_addr)
1688 {
1689 vm_kernel_addrperm_external(addr, up_addr);
1690 }
1691
1692 void
1693 vm_packing_pointer_invalid(vm_offset_t ptr, vm_packing_params_t params)
1694 {
1695 if (ptr & ((1ul << params.vmpp_shift) - 1)) {
1696 panic("pointer %p can't be packed: low %d bits aren't 0",
1697 (void *)ptr, params.vmpp_shift);
1698 } else if (ptr <= params.vmpp_base) {
1699 panic("pointer %p can't be packed: below base %p",
1700 (void *)ptr, (void *)params.vmpp_base);
1701 } else {
1702 panic("pointer %p can't be packed: maximum encodable pointer is %p",
1703 (void *)ptr, (void *)vm_packing_max_packable(params));
1704 }
1705 }
1706
1707 void
1708 vm_packing_verify_range(
1709 const char *subsystem,
1710 vm_offset_t min_address,
1711 vm_offset_t max_address,
1712 vm_packing_params_t params)
1713 {
1714 if (min_address > max_address) {
1715 panic("%s: %s range invalid min:%p > max:%p",
1716 __func__, subsystem, (void *)min_address, (void *)max_address);
1717 }
1718
1719 if (!params.vmpp_base_relative) {
1720 return;
1721 }
1722
1723 if (min_address <= params.vmpp_base) {
1724 panic("%s: %s range invalid min:%p <= base:%p",
1725 __func__, subsystem, (void *)min_address, (void *)params.vmpp_base);
1726 }
1727
1728 if (max_address > vm_packing_max_packable(params)) {
1729 panic("%s: %s range invalid max:%p >= max packable:%p",
1730 __func__, subsystem, (void *)max_address,
1731 (void *)vm_packing_max_packable(params));
1732 }
1733 }