]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_kern.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / vm / vm_kern.c
CommitLineData
1c79356b 1/*
f427ee49 2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
0a7de745 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
0a7de745 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
0a7de745 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
0a7de745 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
0a7de745 31/*
1c79356b
A
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
0a7de745 35 *
1c79356b
A
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
0a7de745 41 *
1c79356b
A
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
0a7de745 45 *
1c79356b 46 * Carnegie Mellon requests users of this software to return to
0a7de745 47 *
1c79356b
A
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
0a7de745 52 *
1c79356b
A
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_kern.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Kernel memory management.
64 */
65
1c79356b
A
66#include <mach/kern_return.h>
67#include <mach/vm_param.h>
68#include <kern/assert.h>
1c79356b
A
69#include <kern/thread.h>
70#include <vm/vm_kern.h>
71#include <vm/vm_map.h>
72#include <vm/vm_object.h>
73#include <vm/vm_page.h>
5ba3f43e 74#include <vm/vm_compressor.h>
1c79356b
A
75#include <vm/vm_pageout.h>
76#include <kern/misc_protos.h>
77#include <vm/cpm.h>
0a7de745 78#include <kern/ledger.h>
4ba76501 79#include <kern/bits.h>
f427ee49 80#include <kern/startup.h>
1c79356b
A
81
82#include <string.h>
2d21ac55
A
83
84#include <libkern/OSDebug.h>
5ba3f43e 85#include <libkern/crypto/sha2.h>
cb323159 86#include <libkern/section_keywords.h>
2d21ac55
A
87#include <sys/kdebug.h>
88
5ba3f43e
A
89#include <san/kasan.h>
90
1c79356b
A
91/*
92 * Variables exported by this module.
93 */
94
cb323159
A
95SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
96vm_map_t kernel_pageable_map;
1c79356b
A
97
98/*
99 * Forward declarations for internal functions.
100 */
101extern kern_return_t kmem_alloc_pages(
0a7de745
A
102 vm_object_t object,
103 vm_object_offset_t offset,
104 vm_object_size_t size);
1c79356b 105
1c79356b
A
106kern_return_t
107kmem_alloc_contig(
0a7de745
A
108 vm_map_t map,
109 vm_offset_t *addrp,
110 vm_size_t size,
111 vm_offset_t mask,
112 ppnum_t max_pnum,
113 ppnum_t pnum_mask,
c3c9b80d 114 kma_flags_t flags,
3e170ce0 115 vm_tag_t tag)
1c79356b 116{
0a7de745
A
117 vm_object_t object;
118 vm_object_offset_t offset;
119 vm_map_offset_t map_addr;
120 vm_map_offset_t map_mask;
121 vm_map_size_t map_size, i;
122 vm_map_entry_t entry;
123 vm_page_t m, pages;
124 kern_return_t kr;
125
126 assert(VM_KERN_MEMORY_NONE != tag);
127
128 if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) {
1c79356b 129 return KERN_INVALID_ARGUMENT;
0a7de745 130 }
316670eb 131
39236c6e 132 map_size = vm_map_round_page(size,
0a7de745 133 VM_MAP_PAGE_MASK(map));
316670eb 134 map_mask = (vm_map_offset_t)mask;
0a7de745 135
316670eb
A
136 /* Check for zero allocation size (either directly or via overflow) */
137 if (map_size == 0) {
1c79356b
A
138 *addrp = 0;
139 return KERN_INVALID_ARGUMENT;
140 }
141
91447636
A
142 /*
143 * Allocate a new object (if necessary) and the reference we
144 * will be donating to the map entry. We must do this before
145 * locking the map, or risk deadlock with the default pager.
146 */
147 if ((flags & KMA_KOBJECT) != 0) {
148 object = kernel_object;
149 vm_object_reference(object);
1c79356b 150 } else {
91447636 151 object = vm_object_allocate(map_size);
1c79356b
A
152 }
153
5ba3f43e 154 kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0,
0a7de745 155 VM_MAP_KERNEL_FLAGS_NONE, tag, &entry);
91447636
A
156 if (KERN_SUCCESS != kr) {
157 vm_object_deallocate(object);
1c79356b
A
158 return kr;
159 }
160
3e170ce0
A
161 if (object == kernel_object) {
162 offset = map_addr;
163 } else {
164 offset = 0;
165 }
166 VME_OBJECT_SET(entry, object);
167 VME_OFFSET_SET(entry, offset);
91447636
A
168
169 /* Take an extra object ref in case the map entry gets deleted */
170 vm_object_reference(object);
1c79356b
A
171 vm_map_unlock(map);
172
b0d623f7 173 kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags);
1c79356b
A
174
175 if (kr != KERN_SUCCESS) {
39236c6e 176 vm_map_remove(map,
0a7de745
A
177 vm_map_trunc_page(map_addr,
178 VM_MAP_PAGE_MASK(map)),
179 vm_map_round_page(map_addr + map_size,
180 VM_MAP_PAGE_MASK(map)),
181 VM_MAP_REMOVE_NO_FLAGS);
91447636 182 vm_object_deallocate(object);
1c79356b
A
183 *addrp = 0;
184 return kr;
185 }
186
187 vm_object_lock(object);
91447636 188 for (i = 0; i < map_size; i += PAGE_SIZE) {
1c79356b
A
189 m = pages;
190 pages = NEXT_PAGE(m);
0c530ab8 191 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
d9a64523 192 m->vmp_busy = FALSE;
1c79356b
A
193 vm_page_insert(m, object, offset + i);
194 }
195 vm_object_unlock(object);
196
5ba3f43e 197 kr = vm_map_wire_kernel(map,
0a7de745
A
198 vm_map_trunc_page(map_addr,
199 VM_MAP_PAGE_MASK(map)),
200 vm_map_round_page(map_addr + map_size,
201 VM_MAP_PAGE_MASK(map)),
202 VM_PROT_DEFAULT, tag,
203 FALSE);
3e170ce0 204
39236c6e 205 if (kr != KERN_SUCCESS) {
1c79356b
A
206 if (object == kernel_object) {
207 vm_object_lock(object);
91447636 208 vm_object_page_remove(object, offset, offset + map_size);
1c79356b
A
209 vm_object_unlock(object);
210 }
39236c6e 211 vm_map_remove(map,
0a7de745
A
212 vm_map_trunc_page(map_addr,
213 VM_MAP_PAGE_MASK(map)),
214 vm_map_round_page(map_addr + map_size,
215 VM_MAP_PAGE_MASK(map)),
216 VM_MAP_REMOVE_NO_FLAGS);
91447636 217 vm_object_deallocate(object);
1c79356b
A
218 return kr;
219 }
91447636
A
220 vm_object_deallocate(object);
221
5ba3f43e 222 if (object == kernel_object) {
91447636 223 vm_map_simplify(map, map_addr);
0a7de745
A
224 vm_tag_update_size(tag, map_size);
225 }
b0d623f7
A
226 *addrp = (vm_offset_t) map_addr;
227 assert((vm_map_offset_t) *addrp == map_addr);
5ba3f43e 228
1c79356b
A
229 return KERN_SUCCESS;
230}
231
232/*
233 * Master entry point for allocating kernel memory.
234 * NOTE: this routine is _never_ interrupt safe.
235 *
236 * map : map to allocate into
237 * addrp : pointer to start address of new memory
238 * size : size of memory requested
239 * flags : options
240 * KMA_HERE *addrp is base address, else "anywhere"
241 * KMA_NOPAGEWAIT don't wait for pages if unavailable
242 * KMA_KOBJECT use kernel_object
0c530ab8
A
243 * KMA_LOMEM support for 32 bit devices in a 64 bit world
244 * if set and a lomemory pool is available
245 * grab pages from it... this also implies
246 * KMA_NOPAGEWAIT
1c79356b
A
247 */
248
249kern_return_t
250kernel_memory_allocate(
0a7de745
A
251 vm_map_t map,
252 vm_offset_t *addrp,
253 vm_size_t size,
254 vm_offset_t mask,
c3c9b80d
A
255 kma_flags_t flags,
256 vm_tag_t tag)
1c79356b 257{
0a7de745
A
258 vm_object_t object;
259 vm_object_offset_t offset;
260 vm_object_offset_t pg_offset;
261 vm_map_entry_t entry = NULL;
262 vm_map_offset_t map_addr, fill_start;
263 vm_map_offset_t map_mask;
264 vm_map_size_t map_size, fill_size;
265 kern_return_t kr, pe_result;
266 vm_page_t mem;
267 vm_page_t guard_page_list = NULL;
268 vm_page_t wired_page_list = NULL;
269 int guard_page_count = 0;
270 int wired_page_count = 0;
0a7de745
A
271 int vm_alloc_flags;
272 vm_map_kernel_flags_t vmk_flags;
273 vm_prot_t kma_prot;
0a7de745 274
f427ee49 275 if (startup_phase < STARTUP_SUB_KMEM) {
2d21ac55
A
276 panic("kernel_memory_allocate: VM is not ready");
277 }
1c79356b 278
39236c6e 279 map_size = vm_map_round_page(size,
0a7de745 280 VM_MAP_PAGE_MASK(map));
91447636 281 map_mask = (vm_map_offset_t) mask;
3e170ce0 282
5ba3f43e
A
283 vm_alloc_flags = 0; //VM_MAKE_TAG(tag);
284 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
2d21ac55 285
316670eb
A
286 /* Check for zero allocation size (either directly or via overflow) */
287 if (map_size == 0) {
288 *addrp = 0;
289 return KERN_INVALID_ARGUMENT;
290 }
b0d623f7
A
291
292 /*
293 * limit the size of a single extent of wired memory
294 * to try and limit the damage to the system if
295 * too many pages get wired down
4bd07ac2
A
296 * limit raised to 2GB with 128GB max physical limit,
297 * but scaled by installed memory above this
b0d623f7 298 */
0a7de745
A
299 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
300 map_size > MAX(1ULL << 31, sane_size / 64)) {
301 return KERN_RESOURCE_SHORTAGE;
302 }
b0d623f7 303
2d21ac55
A
304 /*
305 * Guard pages:
306 *
307 * Guard pages are implemented as ficticious pages. By placing guard pages
308 * on either end of a stack, they can help detect cases where a thread walks
309 * off either end of its stack. They are allocated and set up here and attempts
310 * to access those pages are trapped in vm_fault_page().
311 *
312 * The map_size we were passed may include extra space for
313 * guard pages. If those were requested, then back it out of fill_size
314 * since vm_map_find_space() takes just the actual size not including
315 * guard pages. Similarly, fill_start indicates where the actual pages
316 * will begin in the range.
317 */
318
319 fill_start = 0;
320 fill_size = map_size;
b0d623f7 321
2d21ac55 322 if (flags & KMA_GUARD_FIRST) {
5ba3f43e 323 vmk_flags.vmkf_guard_before = TRUE;
2d21ac55
A
324 fill_start += PAGE_SIZE_64;
325 fill_size -= PAGE_SIZE_64;
326 if (map_size < fill_start + fill_size) {
327 /* no space for a guard page */
328 *addrp = 0;
329 return KERN_INVALID_ARGUMENT;
330 }
b0d623f7 331 guard_page_count++;
2d21ac55
A
332 }
333 if (flags & KMA_GUARD_LAST) {
5ba3f43e 334 vmk_flags.vmkf_guard_after = TRUE;
2d21ac55
A
335 fill_size -= PAGE_SIZE_64;
336 if (map_size <= fill_start + fill_size) {
337 /* no space for a guard page */
338 *addrp = 0;
339 return KERN_INVALID_ARGUMENT;
340 }
b0d623f7
A
341 guard_page_count++;
342 }
343 wired_page_count = (int) (fill_size / PAGE_SIZE_64);
344 assert(wired_page_count * PAGE_SIZE_64 == fill_size);
345
d9a64523 346#if DEBUG || DEVELOPMENT
c3c9b80d
A
347 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
348 size, 0, 0, 0);
d9a64523
A
349#endif
350
c3c9b80d
A
351 for (int i = 0; i < guard_page_count; i++) {
352 mem = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
353 if (mem == VM_PAGE_NULL) {
354 kr = KERN_RESOURCE_SHORTAGE;
355 goto out;
b0d623f7 356 }
d9a64523 357 mem->vmp_snext = guard_page_list;
b0d623f7
A
358 guard_page_list = mem;
359 }
360
d9a64523 361 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
c3c9b80d
A
362 kr = vm_page_alloc_list(wired_page_count, flags,
363 &wired_page_list);
364 if (kr != KERN_SUCCESS) {
365 goto out;
b0d623f7 366 }
39236c6e 367 }
91447636
A
368
369 /*
370 * Allocate a new object (if necessary). We must do this before
371 * locking the map, or risk deadlock with the default pager.
372 */
373 if ((flags & KMA_KOBJECT) != 0) {
1c79356b 374 object = kernel_object;
91447636 375 vm_object_reference(object);
39236c6e
A
376 } else if ((flags & KMA_COMPRESSOR) != 0) {
377 object = compressor_object;
378 vm_object_reference(object);
91447636
A
379 } else {
380 object = vm_object_allocate(map_size);
1c79356b 381 }
91447636 382
0a7de745 383 if (flags & KMA_ATOMIC) {
5ba3f43e 384 vmk_flags.vmkf_atomic_entry = TRUE;
0a7de745 385 }
5ba3f43e 386
f427ee49
A
387 if (flags & KMA_KHEAP) {
388 vm_alloc_flags |= VM_MAP_FIND_LAST_FREE;
389 }
390
2d21ac55 391 kr = vm_map_find_space(map, &map_addr,
0a7de745
A
392 fill_size, map_mask,
393 vm_alloc_flags, vmk_flags, tag, &entry);
91447636
A
394 if (KERN_SUCCESS != kr) {
395 vm_object_deallocate(object);
b0d623f7 396 goto out;
1c79356b 397 }
2d21ac55 398
3e170ce0
A
399 if (object == kernel_object || object == compressor_object) {
400 offset = map_addr;
401 } else {
402 offset = 0;
403 }
404 VME_OBJECT_SET(entry, object);
405 VME_OFFSET_SET(entry, offset);
0a7de745
A
406
407 if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
39236c6e 408 entry->wired_count++;
0a7de745 409 }
b0d623f7 410
0a7de745 411 if (flags & KMA_PERMANENT) {
b0d623f7 412 entry->permanent = TRUE;
0a7de745 413 }
b0d623f7 414
0a7de745 415 if (object != kernel_object && object != compressor_object) {
b0d623f7 416 vm_object_reference(object);
0a7de745 417 }
1c79356b
A
418
419 vm_object_lock(object);
b0d623f7 420 vm_map_unlock(map);
1c79356b 421
b0d623f7
A
422 pg_offset = 0;
423
424 if (fill_start) {
0a7de745 425 if (guard_page_list == NULL) {
b0d623f7 426 panic("kernel_memory_allocate: guard_page_list == NULL");
0a7de745 427 }
b0d623f7
A
428
429 mem = guard_page_list;
d9a64523
A
430 guard_page_list = mem->vmp_snext;
431 mem->vmp_snext = NULL;
b0d623f7
A
432
433 vm_page_insert(mem, object, offset + pg_offset);
2d21ac55 434
d9a64523 435 mem->vmp_busy = FALSE;
b0d623f7 436 pg_offset += PAGE_SIZE_64;
2d21ac55 437 }
316670eb
A
438
439 kma_prot = VM_PROT_READ | VM_PROT_WRITE;
440
5ba3f43e
A
441#if KASAN
442 if (!(flags & KMA_VAONLY)) {
443 /* for VAONLY mappings we notify in populate only */
444 kasan_notify_address(map_addr, size);
445 }
446#endif
447
d9a64523 448 if (flags & (KMA_VAONLY | KMA_PAGEABLE)) {
39236c6e
A
449 pg_offset = fill_start + fill_size;
450 } else {
0a7de745
A
451 for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) {
452 if (wired_page_list == NULL) {
453 panic("kernel_memory_allocate: wired_page_list == NULL");
454 }
2d21ac55 455
0a7de745
A
456 mem = wired_page_list;
457 wired_page_list = mem->vmp_snext;
458 mem->vmp_snext = NULL;
39037602 459
0a7de745
A
460 assert(mem->vmp_wire_count == 0);
461 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
39037602 462
0a7de745
A
463 mem->vmp_q_state = VM_PAGE_IS_WIRED;
464 mem->vmp_wire_count++;
465 if (__improbable(mem->vmp_wire_count == 0)) {
466 panic("kernel_memory_allocate(%p): wire_count overflow",
467 mem);
468 }
2d21ac55 469
0a7de745 470 vm_page_insert_wired(mem, object, offset + pg_offset, tag);
0c530ab8 471
0a7de745
A
472 mem->vmp_busy = FALSE;
473 mem->vmp_pmapped = TRUE;
474 mem->vmp_wpmapped = TRUE;
b0d623f7 475
f427ee49
A
476 PMAP_ENTER_OPTIONS(kernel_pmap, map_addr + pg_offset,
477 0, /* fault_phys_offset */
478 mem,
0a7de745
A
479 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
480 PMAP_OPTIONS_NOWAIT, pe_result);
39236c6e 481
0a7de745
A
482 if (pe_result == KERN_RESOURCE_SHORTAGE) {
483 vm_object_unlock(object);
0b4c1975 484
0a7de745
A
485 PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem,
486 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
487 pe_result);
39236c6e 488
0a7de745
A
489 vm_object_lock(object);
490 }
5ba3f43e 491
0a7de745 492 assert(pe_result == KERN_SUCCESS);
5ba3f43e 493
0a7de745
A
494 if (flags & KMA_NOENCRYPT) {
495 bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE);
0b4c1975 496
0a7de745
A
497 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
498 }
499 }
500 if (kernel_object == object) {
501 vm_tag_update_size(tag, fill_size);
0b4c1975 502 }
39236c6e 503 }
b0d623f7 504 if ((fill_start + fill_size) < map_size) {
0a7de745 505 if (guard_page_list == NULL) {
b0d623f7 506 panic("kernel_memory_allocate: guard_page_list == NULL");
0a7de745 507 }
1c79356b 508
b0d623f7 509 mem = guard_page_list;
d9a64523
A
510 guard_page_list = mem->vmp_snext;
511 mem->vmp_snext = NULL;
b0d623f7
A
512
513 vm_page_insert(mem, object, offset + pg_offset);
2d21ac55 514
d9a64523 515 mem->vmp_busy = FALSE;
1c79356b 516 }
0a7de745 517 if (guard_page_list || wired_page_list) {
b0d623f7 518 panic("kernel_memory_allocate: non empty list\n");
0a7de745 519 }
2d21ac55 520
d9a64523 521 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
0a7de745
A
522 vm_page_lockspin_queues();
523 vm_page_wire_count += wired_page_count;
524 vm_page_unlock_queues();
39236c6e 525 }
2d21ac55 526
b0d623f7
A
527 vm_object_unlock(object);
528
529 /*
530 * now that the pages are wired, we no longer have to fear coalesce
531 */
0a7de745 532 if (object == kernel_object || object == compressor_object) {
91447636 533 vm_map_simplify(map, map_addr);
0a7de745 534 } else {
b0d623f7 535 vm_object_deallocate(object);
0a7de745 536 }
1c79356b 537
d9a64523 538#if DEBUG || DEVELOPMENT
c3c9b80d
A
539 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
540 wired_page_count, 0, 0, 0);
d9a64523 541#endif
1c79356b
A
542 /*
543 * Return the memory, not zeroed.
544 */
91447636 545 *addrp = CAST_DOWN(vm_offset_t, map_addr);
1c79356b 546 return KERN_SUCCESS;
2d21ac55 547
b0d623f7 548out:
0a7de745 549 if (guard_page_list) {
b0d623f7 550 vm_page_free_list(guard_page_list, FALSE);
0a7de745 551 }
b0d623f7 552
0a7de745 553 if (wired_page_list) {
b0d623f7 554 vm_page_free_list(wired_page_list, FALSE);
0a7de745 555 }
b0d623f7 556
d9a64523 557#if DEBUG || DEVELOPMENT
c3c9b80d
A
558 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
559 wired_page_count, 0, 0, 0);
d9a64523 560#endif
b0d623f7 561 return kr;
1c79356b
A
562}
563
c3c9b80d
A
564void
565kernel_memory_populate_with_pages(
0a7de745
A
566 vm_map_t map,
567 vm_offset_t addr,
568 vm_size_t size,
c3c9b80d
A
569 vm_page_t page_list,
570 kma_flags_t flags,
3e170ce0 571 vm_tag_t tag)
39236c6e 572{
c3c9b80d
A
573 vm_object_t object;
574 kern_return_t pe_result;
575 vm_page_t mem;
576 int page_count = atop_64(size);
39236c6e
A
577
578 if (flags & KMA_COMPRESSOR) {
c3c9b80d
A
579 panic("%s(%p,0x%llx,0x%llx,0x%x): KMA_COMPRESSOR", __func__,
580 map, (uint64_t) addr, (uint64_t) size, flags);
39236c6e
A
581 }
582
39236c6e 583 if (flags & KMA_KOBJECT) {
39236c6e
A
584 object = kernel_object;
585
586 vm_object_lock(object);
587 } else {
588 /*
589 * If it's not the kernel object, we need to:
590 * lock map;
591 * lookup entry;
592 * lock object;
593 * take reference on object;
594 * unlock map;
595 */
c3c9b80d 596 panic("%s(%p,0x%llx,0x%llx,0x%x): !KMA_KOBJECT", __func__,
0a7de745 597 map, (uint64_t) addr, (uint64_t) size, flags);
39236c6e
A
598 }
599
c3c9b80d 600 for (vm_object_offset_t pg_offset = 0;
0a7de745
A
601 pg_offset < size;
602 pg_offset += PAGE_SIZE_64) {
603 if (page_list == NULL) {
c3c9b80d 604 panic("%s: page_list too short", __func__);
0a7de745 605 }
39236c6e
A
606
607 mem = page_list;
d9a64523
A
608 page_list = mem->vmp_snext;
609 mem->vmp_snext = NULL;
610
611 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
612 mem->vmp_q_state = VM_PAGE_IS_WIRED;
613 mem->vmp_wire_count++;
c3c9b80d
A
614 if (mem->vmp_wire_count == 0) {
615 panic("%s(%p): wire_count overflow", __func__, mem);
39037602 616 }
39236c6e 617
c3c9b80d 618 vm_page_insert_wired(mem, object, addr + pg_offset, tag);
39236c6e 619
d9a64523
A
620 mem->vmp_busy = FALSE;
621 mem->vmp_pmapped = TRUE;
622 mem->vmp_wpmapped = TRUE;
39236c6e 623
f427ee49
A
624 PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset,
625 0, /* fault_phys_offset */
626 mem,
0a7de745
A
627 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
628 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
629 PMAP_OPTIONS_NOWAIT, pe_result);
39236c6e
A
630
631 if (pe_result == KERN_RESOURCE_SHORTAGE) {
39236c6e
A
632 vm_object_unlock(object);
633
634 PMAP_ENTER(kernel_pmap, addr + pg_offset, mem,
0a7de745
A
635 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
636 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
637 pe_result);
39236c6e
A
638
639 vm_object_lock(object);
640 }
5ba3f43e
A
641
642 assert(pe_result == KERN_SUCCESS);
643
39236c6e 644 if (flags & KMA_NOENCRYPT) {
c3c9b80d 645 __nosan_bzero(CAST_DOWN(void *, (addr + pg_offset)), PAGE_SIZE);
39037602 646 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
39236c6e
A
647 }
648 }
c3c9b80d
A
649 if (page_list) {
650 panic("%s: page_list too long", __func__);
651 }
f427ee49
A
652 vm_object_unlock(object);
653
d9a64523 654 vm_page_lockspin_queues();
39236c6e
A
655 vm_page_wire_count += page_count;
656 vm_page_unlock_queues();
c3c9b80d 657 vm_tag_update_size(tag, size);
d9a64523 658
5ba3f43e
A
659#if KASAN
660 if (map == compressor_map) {
661 kasan_notify_address_nopoison(addr, size);
662 } else {
663 kasan_notify_address(addr, size);
664 }
665#endif
c3c9b80d 666}
39236c6e 667
c3c9b80d
A
668kern_return_t
669kernel_memory_populate(
670 vm_map_t map,
671 vm_offset_t addr,
672 vm_size_t size,
673 kma_flags_t flags,
674 vm_tag_t tag)
675{
676 vm_object_t object;
677 vm_object_offset_t offset, pg_offset;
678 kern_return_t kr = KERN_SUCCESS;
679 vm_page_t mem;
680 vm_page_t page_list = NULL;
681 int page_count = atop_64(size);
39236c6e 682
d9a64523 683#if DEBUG || DEVELOPMENT
c3c9b80d
A
684 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
685 size, 0, 0, 0);
d9a64523
A
686#endif
687
c3c9b80d
A
688 assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
689
690 if (flags & KMA_COMPRESSOR) {
691 pg_offset = page_count * PAGE_SIZE_64;
692
693 do {
694 for (;;) {
695 mem = vm_page_grab();
696
697 if (mem != VM_PAGE_NULL) {
698 break;
699 }
700
701 VM_PAGE_WAIT();
702 }
703 if (KMA_ZERO & flags) {
704 vm_page_zero_fill(mem);
705 }
706 mem->vmp_snext = page_list;
707 page_list = mem;
708
709 pg_offset -= PAGE_SIZE_64;
710
711 kr = pmap_enter_options(kernel_pmap,
712 addr + pg_offset, VM_PAGE_GET_PHYS_PAGE(mem),
713 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE,
714 PMAP_OPTIONS_INTERNAL, NULL);
715 assert(kr == KERN_SUCCESS);
716 } while (pg_offset);
717
718 offset = addr;
719 object = compressor_object;
720
721 vm_object_lock(object);
722
723 for (pg_offset = 0;
724 pg_offset < size;
725 pg_offset += PAGE_SIZE_64) {
726 mem = page_list;
727 page_list = mem->vmp_snext;
728 mem->vmp_snext = NULL;
729
730 vm_page_insert(mem, object, offset + pg_offset);
731 assert(mem->vmp_busy);
732
733 mem->vmp_busy = FALSE;
734 mem->vmp_pmapped = TRUE;
735 mem->vmp_wpmapped = TRUE;
736 mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
737 }
738 vm_object_unlock(object);
739
740#if KASAN
741 if (map == compressor_map) {
742 kasan_notify_address_nopoison(addr, size);
743 } else {
744 kasan_notify_address(addr, size);
745 }
746#endif
747
748#if DEBUG || DEVELOPMENT
749 task_t task = current_task();
750 if (task != NULL) {
751 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_count);
752 }
753#endif
754 } else {
755 kr = vm_page_alloc_list(page_count, flags, &page_list);
756 if (kr == KERN_SUCCESS) {
757 kernel_memory_populate_with_pages(map, addr, size,
758 page_list, flags, tag);
759 }
760 }
761
762#if DEBUG || DEVELOPMENT
763 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
764 page_count, 0, 0, 0);
765#endif
39236c6e
A
766 return kr;
767}
768
769
770void
771kernel_memory_depopulate(
f427ee49
A
772 vm_map_t map,
773 vm_offset_t addr,
774 vm_size_t size,
c3c9b80d 775 kma_flags_t flags,
f427ee49 776 vm_tag_t tag)
39236c6e 777{
f427ee49
A
778 vm_object_t object;
779 vm_object_offset_t offset, pg_offset;
780 vm_page_t mem;
781 vm_page_t local_freeq = NULL;
782 unsigned int pages_unwired;
39236c6e 783
0a7de745 784 assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
39236c6e
A
785
786 if (flags & KMA_COMPRESSOR) {
787 offset = addr;
788 object = compressor_object;
789
790 vm_object_lock(object);
791 } else if (flags & KMA_KOBJECT) {
792 offset = addr;
793 object = kernel_object;
39236c6e
A
794 vm_object_lock(object);
795 } else {
796 offset = 0;
797 object = NULL;
0a7de745
A
798 /*
799 * If it's not the kernel object, we need to:
800 * lock map;
801 * lookup entry;
802 * lock object;
803 * unlock map;
804 */
39236c6e 805 panic("kernel_memory_depopulate(%p,0x%llx,0x%llx,0x%x): "
0a7de745
A
806 "!KMA_KOBJECT",
807 map, (uint64_t) addr, (uint64_t) size, flags);
39236c6e
A
808 }
809 pmap_protect(kernel_map->pmap, offset, offset + size, VM_PROT_NONE);
810
f427ee49 811 for (pg_offset = 0, pages_unwired = 0;
0a7de745
A
812 pg_offset < size;
813 pg_offset += PAGE_SIZE_64) {
39236c6e
A
814 mem = vm_page_lookup(object, offset + pg_offset);
815
816 assert(mem);
0a7de745
A
817
818 if (mem->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR) {
39037602 819 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
f427ee49 820 pages_unwired++;
0a7de745 821 }
39236c6e 822
d9a64523 823 mem->vmp_busy = TRUE;
39236c6e 824
d9a64523 825 assert(mem->vmp_tabled);
39236c6e 826 vm_page_remove(mem, TRUE);
d9a64523 827 assert(mem->vmp_busy);
39236c6e 828
d9a64523
A
829 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
830 assert((mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
f427ee49 831 (mem->vmp_q_state == VM_PAGE_IS_WIRED));
39037602 832
d9a64523
A
833 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
834 mem->vmp_snext = local_freeq;
39236c6e
A
835 local_freeq = mem;
836 }
837 vm_object_unlock(object);
838
f427ee49 839
0a7de745 840 if (local_freeq) {
39236c6e 841 vm_page_free_list(local_freeq, TRUE);
f427ee49
A
842 if (pages_unwired != 0) {
843 vm_page_lockspin_queues();
844 vm_page_wire_count -= pages_unwired;
845 vm_page_unlock_queues();
846 vm_tag_update_size(tag, -ptoa_64(pages_unwired));
847 }
0a7de745 848 }
39236c6e
A
849}
850
1c79356b
A
851/*
852 * kmem_alloc:
853 *
854 * Allocate wired-down memory in the kernel's address map
855 * or a submap. The memory is not zero-filled.
856 */
857
858kern_return_t
3e170ce0 859kmem_alloc_external(
0a7de745
A
860 vm_map_t map,
861 vm_offset_t *addrp,
862 vm_size_t size)
1c79356b 863{
0a7de745 864 return kmem_alloc(map, addrp, size, vm_tag_bt());
3e170ce0
A
865}
866
39037602 867
3e170ce0
A
868kern_return_t
869kmem_alloc(
0a7de745
A
870 vm_map_t map,
871 vm_offset_t *addrp,
872 vm_size_t size,
873 vm_tag_t tag)
39037602
A
874{
875 return kmem_alloc_flags(map, addrp, size, tag, 0);
876}
877
878kern_return_t
879kmem_alloc_flags(
0a7de745
A
880 vm_map_t map,
881 vm_offset_t *addrp,
882 vm_size_t size,
883 vm_tag_t tag,
c3c9b80d 884 kma_flags_t flags)
3e170ce0 885{
39037602 886 kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, flags, tag);
f427ee49
A
887 if (kr == KERN_SUCCESS) {
888 TRACE_MACHLEAKS(KMEM_ALLOC_CODE, KMEM_ALLOC_CODE_2, size, *addrp);
889 }
2d21ac55 890 return kr;
1c79356b
A
891}
892
893/*
894 * kmem_realloc:
895 *
896 * Reallocate wired-down memory in the kernel's address map
897 * or a submap. Newly allocated pages are not zeroed.
898 * This can only be used on regions allocated with kmem_alloc.
899 *
900 * If successful, the pages in the old region are mapped twice.
901 * The old region is unchanged. Use kmem_free to get rid of it.
902 */
903kern_return_t
904kmem_realloc(
0a7de745
A
905 vm_map_t map,
906 vm_offset_t oldaddr,
907 vm_size_t oldsize,
908 vm_offset_t *newaddrp,
909 vm_size_t newsize,
910 vm_tag_t tag)
1c79356b 911{
0a7de745
A
912 vm_object_t object;
913 vm_object_offset_t offset;
914 vm_map_offset_t oldmapmin;
915 vm_map_offset_t oldmapmax;
916 vm_map_offset_t newmapaddr;
917 vm_map_size_t oldmapsize;
918 vm_map_size_t newmapsize;
919 vm_map_entry_t oldentry;
920 vm_map_entry_t newentry;
921 vm_page_t mem;
922 kern_return_t kr;
1c79356b 923
39236c6e 924 oldmapmin = vm_map_trunc_page(oldaddr,
0a7de745 925 VM_MAP_PAGE_MASK(map));
39236c6e 926 oldmapmax = vm_map_round_page(oldaddr + oldsize,
0a7de745 927 VM_MAP_PAGE_MASK(map));
91447636 928 oldmapsize = oldmapmax - oldmapmin;
39236c6e 929 newmapsize = vm_map_round_page(newsize,
0a7de745 930 VM_MAP_PAGE_MASK(map));
5c9f4661
A
931 if (newmapsize < newsize) {
932 /* overflow */
933 *newaddrp = 0;
934 return KERN_INVALID_ARGUMENT;
935 }
1c79356b
A
936
937 /*
938 * Find the VM object backing the old region.
939 */
940
b4c24cb9
A
941 vm_map_lock(map);
942
0a7de745 943 if (!vm_map_lookup_entry(map, oldmapmin, &oldentry)) {
1c79356b 944 panic("kmem_realloc");
0a7de745 945 }
3e170ce0 946 object = VME_OBJECT(oldentry);
1c79356b
A
947
948 /*
949 * Increase the size of the object and
950 * fill in the new region.
951 */
952
953 vm_object_reference(object);
b4c24cb9
A
954 /* by grabbing the object lock before unlocking the map */
955 /* we guarantee that we will panic if more than one */
956 /* attempt is made to realloc a kmem_alloc'd area */
1c79356b 957 vm_object_lock(object);
b4c24cb9 958 vm_map_unlock(map);
0a7de745 959 if (object->vo_size != oldmapsize) {
1c79356b 960 panic("kmem_realloc");
0a7de745 961 }
6d2010ae 962 object->vo_size = newmapsize;
1c79356b
A
963 vm_object_unlock(object);
964
b4c24cb9
A
965 /* allocate the new pages while expanded portion of the */
966 /* object is still not mapped */
91447636 967 kmem_alloc_pages(object, vm_object_round_page(oldmapsize),
0a7de745 968 vm_object_round_page(newmapsize - oldmapsize));
1c79356b
A
969
970 /*
b4c24cb9 971 * Find space for the new region.
1c79356b
A
972 */
973
91447636 974 kr = vm_map_find_space(map, &newmapaddr, newmapsize,
0a7de745
A
975 (vm_map_offset_t) 0, 0,
976 VM_MAP_KERNEL_FLAGS_NONE,
977 tag,
978 &newentry);
b4c24cb9
A
979 if (kr != KERN_SUCCESS) {
980 vm_object_lock(object);
0a7de745 981 for (offset = oldmapsize;
91447636 982 offset < newmapsize; offset += PAGE_SIZE) {
0a7de745 983 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
b0d623f7 984 VM_PAGE_FREE(mem);
b4c24cb9
A
985 }
986 }
6d2010ae 987 object->vo_size = oldmapsize;
b4c24cb9
A
988 vm_object_unlock(object);
989 vm_object_deallocate(object);
990 return kr;
991 }
3e170ce0
A
992 VME_OBJECT_SET(newentry, object);
993 VME_OFFSET_SET(newentry, 0);
3e170ce0 994 assert(newentry->wired_count == 0);
b4c24cb9 995
0a7de745 996
b4c24cb9
A
997 /* add an extra reference in case we have someone doing an */
998 /* unexpected deallocate */
999 vm_object_reference(object);
1c79356b
A
1000 vm_map_unlock(map);
1001
5ba3f43e 1002 kr = vm_map_wire_kernel(map, newmapaddr, newmapaddr + newmapsize,
0a7de745 1003 VM_PROT_DEFAULT, tag, FALSE);
91447636 1004 if (KERN_SUCCESS != kr) {
d9a64523 1005 vm_map_remove(map, newmapaddr, newmapaddr + newmapsize, VM_MAP_REMOVE_NO_FLAGS);
b4c24cb9 1006 vm_object_lock(object);
0a7de745
A
1007 for (offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) {
1008 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
b0d623f7 1009 VM_PAGE_FREE(mem);
b4c24cb9
A
1010 }
1011 }
6d2010ae 1012 object->vo_size = oldmapsize;
b4c24cb9
A
1013 vm_object_unlock(object);
1014 vm_object_deallocate(object);
0a7de745 1015 return kr;
b4c24cb9
A
1016 }
1017 vm_object_deallocate(object);
1c79356b 1018
0a7de745
A
1019 if (kernel_object == object) {
1020 vm_tag_update_size(tag, newmapsize);
1021 }
5ba3f43e 1022
91447636 1023 *newaddrp = CAST_DOWN(vm_offset_t, newmapaddr);
1c79356b
A
1024 return KERN_SUCCESS;
1025}
1026
1027/*
b0d623f7 1028 * kmem_alloc_kobject:
1c79356b
A
1029 *
1030 * Allocate wired-down memory in the kernel's address map
1031 * or a submap. The memory is not zero-filled.
1032 *
1033 * The memory is allocated in the kernel_object.
1034 * It may not be copied with vm_map_copy, and
1035 * it may not be reallocated with kmem_realloc.
1036 */
1037
1038kern_return_t
3e170ce0 1039kmem_alloc_kobject_external(
0a7de745
A
1040 vm_map_t map,
1041 vm_offset_t *addrp,
1042 vm_size_t size)
1c79356b 1043{
0a7de745 1044 return kmem_alloc_kobject(map, addrp, size, vm_tag_bt());
3e170ce0
A
1045}
1046
1047kern_return_t
1048kmem_alloc_kobject(
0a7de745
A
1049 vm_map_t map,
1050 vm_offset_t *addrp,
1051 vm_size_t size,
3e170ce0
A
1052 vm_tag_t tag)
1053{
1054 return kernel_memory_allocate(map, addrp, size, 0, KMA_KOBJECT, tag);
1c79356b
A
1055}
1056
1057/*
1058 * kmem_alloc_aligned:
1059 *
b0d623f7 1060 * Like kmem_alloc_kobject, except that the memory is aligned.
1c79356b
A
1061 * The size should be a power-of-2.
1062 */
1063
1064kern_return_t
1065kmem_alloc_aligned(
0a7de745
A
1066 vm_map_t map,
1067 vm_offset_t *addrp,
1068 vm_size_t size,
3e170ce0 1069 vm_tag_t tag)
1c79356b 1070{
0a7de745 1071 if ((size & (size - 1)) != 0) {
1c79356b 1072 panic("kmem_alloc_aligned: size not aligned");
0a7de745 1073 }
3e170ce0 1074 return kernel_memory_allocate(map, addrp, size, size - 1, KMA_KOBJECT, tag);
1c79356b
A
1075}
1076
1077/*
1078 * kmem_alloc_pageable:
1079 *
1080 * Allocate pageable memory in the kernel's address map.
1081 */
1082
1083kern_return_t
3e170ce0 1084kmem_alloc_pageable_external(
0a7de745
A
1085 vm_map_t map,
1086 vm_offset_t *addrp,
1087 vm_size_t size)
3e170ce0 1088{
0a7de745 1089 return kmem_alloc_pageable(map, addrp, size, vm_tag_bt());
3e170ce0
A
1090}
1091
1092kern_return_t
1093kmem_alloc_pageable(
0a7de745
A
1094 vm_map_t map,
1095 vm_offset_t *addrp,
1096 vm_size_t size,
3e170ce0 1097 vm_tag_t tag)
1c79356b 1098{
91447636 1099 vm_map_offset_t map_addr;
0a7de745 1100 vm_map_size_t map_size;
1c79356b
A
1101 kern_return_t kr;
1102
1103#ifndef normal
fe8ab488 1104 map_addr = (vm_map_min(map)) + PAGE_SIZE;
1c79356b 1105#else
91447636 1106 map_addr = vm_map_min(map);
1c79356b 1107#endif
39236c6e 1108 map_size = vm_map_round_page(size,
0a7de745 1109 VM_MAP_PAGE_MASK(map));
5c9f4661
A
1110 if (map_size < size) {
1111 /* overflow */
1112 *addrp = 0;
1113 return KERN_INVALID_ARGUMENT;
1114 }
91447636
A
1115
1116 kr = vm_map_enter(map, &map_addr, map_size,
0a7de745
A
1117 (vm_map_offset_t) 0,
1118 VM_FLAGS_ANYWHERE,
1119 VM_MAP_KERNEL_FLAGS_NONE,
1120 tag,
1121 VM_OBJECT_NULL, (vm_object_offset_t) 0, FALSE,
1122 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1123
1124 if (kr != KERN_SUCCESS) {
1c79356b 1125 return kr;
0a7de745 1126 }
1c79356b 1127
5ba3f43e
A
1128#if KASAN
1129 kasan_notify_address(map_addr, map_size);
1130#endif
91447636 1131 *addrp = CAST_DOWN(vm_offset_t, map_addr);
1c79356b
A
1132 return KERN_SUCCESS;
1133}
1134
1135/*
1136 * kmem_free:
1137 *
1138 * Release a region of kernel virtual memory allocated
b0d623f7 1139 * with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable,
1c79356b
A
1140 * and return the physical pages associated with that region.
1141 */
1142
1143void
1144kmem_free(
0a7de745
A
1145 vm_map_t map,
1146 vm_offset_t addr,
1147 vm_size_t size)
1c79356b
A
1148{
1149 kern_return_t kr;
1150
b0d623f7
A
1151 assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS);
1152
2d21ac55
A
1153 TRACE_MACHLEAKS(KMEM_FREE_CODE, KMEM_FREE_CODE_2, size, addr);
1154
0a7de745 1155 if (size == 0) {
b0d623f7 1156#if MACH_ASSERT
0a7de745 1157 printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n", map, (uint64_t)addr);
b0d623f7
A
1158#endif
1159 return;
1160 }
1161
39236c6e 1162 kr = vm_map_remove(map,
0a7de745
A
1163 vm_map_trunc_page(addr,
1164 VM_MAP_PAGE_MASK(map)),
1165 vm_map_round_page(addr + size,
1166 VM_MAP_PAGE_MASK(map)),
1167 VM_MAP_REMOVE_KUNWIRE);
1168 if (kr != KERN_SUCCESS) {
1c79356b 1169 panic("kmem_free");
0a7de745 1170 }
1c79356b
A
1171}
1172
1173/*
b4c24cb9 1174 * Allocate new pages in an object.
1c79356b
A
1175 */
1176
1177kern_return_t
1178kmem_alloc_pages(
0a7de745
A
1179 vm_object_t object,
1180 vm_object_offset_t offset,
1181 vm_object_size_t size)
1c79356b 1182{
0a7de745 1183 vm_object_size_t alloc_size;
1c79356b 1184
91447636 1185 alloc_size = vm_object_round_page(size);
0a7de745 1186 vm_object_lock(object);
91447636 1187 while (alloc_size) {
0a7de745 1188 vm_page_t mem;
1c79356b 1189
1c79356b 1190
0a7de745
A
1191 /*
1192 * Allocate a page
1193 */
1194 while (VM_PAGE_NULL ==
1195 (mem = vm_page_alloc(object, offset))) {
1196 vm_object_unlock(object);
1197 VM_PAGE_WAIT();
1198 vm_object_lock(object);
1199 }
1200 mem->vmp_busy = FALSE;
1c79356b 1201
0a7de745
A
1202 alloc_size -= PAGE_SIZE;
1203 offset += PAGE_SIZE;
1c79356b 1204 }
b4c24cb9 1205 vm_object_unlock(object);
1c79356b
A
1206 return KERN_SUCCESS;
1207}
1208
1c79356b
A
1209/*
1210 * kmem_suballoc:
1211 *
1212 * Allocates a map to manage a subrange
1213 * of the kernel virtual address space.
1214 *
1215 * Arguments are as follows:
1216 *
1217 * parent Map to take range from
1218 * addr Address of start of range (IN/OUT)
1219 * size Size of range to find
1220 * pageable Can region be paged
1221 * anywhere Can region be located anywhere in map
1222 * new_map Pointer to new submap
1223 */
1224kern_return_t
1225kmem_suballoc(
0a7de745
A
1226 vm_map_t parent,
1227 vm_offset_t *addr,
1228 vm_size_t size,
1229 boolean_t pageable,
1230 int flags,
5ba3f43e
A
1231 vm_map_kernel_flags_t vmk_flags,
1232 vm_tag_t tag,
0a7de745 1233 vm_map_t *new_map)
1c79356b 1234{
0a7de745
A
1235 vm_map_t map;
1236 vm_map_offset_t map_addr;
1237 vm_map_size_t map_size;
1238 kern_return_t kr;
1c79356b 1239
39236c6e 1240 map_size = vm_map_round_page(size,
0a7de745 1241 VM_MAP_PAGE_MASK(parent));
5c9f4661
A
1242 if (map_size < size) {
1243 /* overflow */
1244 *addr = 0;
1245 return KERN_INVALID_ARGUMENT;
1246 }
1c79356b
A
1247
1248 /*
1249 * Need reference on submap object because it is internal
1250 * to the vm_system. vm_object_enter will never be called
1251 * on it (usual source of reference for vm_map_enter).
1252 */
1253 vm_object_reference(vm_submap_object);
1254
39236c6e 1255 map_addr = ((flags & VM_FLAGS_ANYWHERE)
0a7de745
A
1256 ? vm_map_min(parent)
1257 : vm_map_trunc_page(*addr,
1258 VM_MAP_PAGE_MASK(parent)));
91447636
A
1259
1260 kr = vm_map_enter(parent, &map_addr, map_size,
0a7de745
A
1261 (vm_map_offset_t) 0, flags, vmk_flags, tag,
1262 vm_submap_object, (vm_object_offset_t) 0, FALSE,
1263 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1c79356b
A
1264 if (kr != KERN_SUCCESS) {
1265 vm_object_deallocate(vm_submap_object);
0a7de745 1266 return kr;
1c79356b
A
1267 }
1268
1269 pmap_reference(vm_map_pmap(parent));
91447636 1270 map = vm_map_create(vm_map_pmap(parent), map_addr, map_addr + map_size, pageable);
0a7de745
A
1271 if (map == VM_MAP_NULL) {
1272 panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */
1273 }
39236c6e
A
1274 /* inherit the parent map's page size */
1275 vm_map_set_page_shift(map, VM_MAP_PAGE_SHIFT(parent));
1c79356b 1276
91447636 1277 kr = vm_map_submap(parent, map_addr, map_addr + map_size, map, map_addr, FALSE);
1c79356b
A
1278 if (kr != KERN_SUCCESS) {
1279 /*
1280 * See comment preceding vm_map_submap().
1281 */
d9a64523 1282 vm_map_remove(parent, map_addr, map_addr + map_size,
0a7de745
A
1283 VM_MAP_REMOVE_NO_FLAGS);
1284 vm_map_deallocate(map); /* also removes ref to pmap */
1c79356b 1285 vm_object_deallocate(vm_submap_object);
0a7de745 1286 return kr;
1c79356b 1287 }
91447636 1288 *addr = CAST_DOWN(vm_offset_t, map_addr);
1c79356b 1289 *new_map = map;
0a7de745 1290 return KERN_SUCCESS;
1c79356b 1291}
4ba76501
A
1292/*
1293 * The default percentage of memory that can be mlocked is scaled based on the total
1294 * amount of memory in the system. These percentages are caclulated
1295 * offline and stored in this table. We index this table by
1296 * log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range
1297 * [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t))
1298 *
1299 * Note that these values were picked for mac.
1300 * If we ever have very large memory config arm devices, we may want to revisit
1301 * since the kernel overhead is smaller there due to the larger page size.
1302 */
1303
1304/* Start scaling iff we're managing > 2^32 = 4GB of RAM. */
1305#define VM_USER_WIREABLE_MIN_CONFIG 32
1306static vm_map_size_t wire_limit_percents[] =
1307{ 70, 73, 76, 79, 82, 85, 88, 91, 94, 97};
1308
1309/*
1310 * Sets the default global user wire limit which limits the amount of
1311 * memory that can be locked via mlock() based on the above algorithm..
1312 * This can be overridden via a sysctl.
1313 */
1314static void
1315kmem_set_user_wire_limits(void)
1316{
1317 uint64_t available_mem_log;
1318 uint64_t max_wire_percent;
1319 size_t wire_limit_percents_length = sizeof(wire_limit_percents) /
1320 sizeof(vm_map_size_t);
1321 vm_map_size_t limit;
f427ee49
A
1322 uint64_t config_memsize = max_mem;
1323#if defined(XNU_TARGET_OS_OSX)
1324 config_memsize = max_mem_actual;
1325#endif /* defined(XNU_TARGET_OS_OSX) */
1326
1327 available_mem_log = bit_floor(config_memsize);
4ba76501
A
1328
1329 if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) {
1330 available_mem_log = 0;
1331 } else {
1332 available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG;
1333 }
1334 if (available_mem_log >= wire_limit_percents_length) {
1335 available_mem_log = wire_limit_percents_length - 1;
1336 }
1337 max_wire_percent = wire_limit_percents[available_mem_log];
1338
f427ee49 1339 limit = config_memsize * max_wire_percent / 100;
4ba76501 1340 /* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */
f427ee49
A
1341 if (config_memsize - limit > VM_NOT_USER_WIREABLE_MAX) {
1342 limit = config_memsize - VM_NOT_USER_WIREABLE_MAX;
4ba76501
A
1343 }
1344
1345 vm_global_user_wire_limit = limit;
1346 /* the default per task limit is the same as the global limit */
1347 vm_per_task_user_wire_limit = limit;
f427ee49
A
1348 vm_add_wire_count_over_global_limit = 0;
1349 vm_add_wire_count_over_user_limit = 0;
4ba76501
A
1350}
1351
1c79356b
A
1352
1353/*
1354 * kmem_init:
1355 *
1356 * Initialize the kernel's virtual memory map, taking
1357 * into account all memory allocated up to this time.
1358 */
f427ee49 1359__startup_func
1c79356b
A
1360void
1361kmem_init(
0a7de745
A
1362 vm_offset_t start,
1363 vm_offset_t end)
1c79356b 1364{
91447636
A
1365 vm_map_offset_t map_start;
1366 vm_map_offset_t map_end;
5ba3f43e
A
1367 vm_map_kernel_flags_t vmk_flags;
1368
1369 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1370 vmk_flags.vmkf_permanent = TRUE;
1371 vmk_flags.vmkf_no_pmap_check = TRUE;
91447636 1372
39236c6e 1373 map_start = vm_map_trunc_page(start,
0a7de745 1374 VM_MAP_PAGE_MASK(kernel_map));
39236c6e 1375 map_end = vm_map_round_page(end,
0a7de745 1376 VM_MAP_PAGE_MASK(kernel_map));
91447636 1377
0a7de745
A
1378#if defined(__arm__) || defined(__arm64__)
1379 kernel_map = vm_map_create(pmap_kernel(), VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1380 VM_MAX_KERNEL_ADDRESS, FALSE);
5ba3f43e
A
1381 /*
1382 * Reserve virtual memory allocated up to this time.
1383 */
1384 {
0a7de745
A
1385 unsigned int region_select = 0;
1386 vm_map_offset_t region_start;
1387 vm_map_size_t region_size;
5ba3f43e
A
1388 vm_map_offset_t map_addr;
1389 kern_return_t kr;
1390
1391 while (pmap_virtual_region(region_select, &region_start, &region_size)) {
5ba3f43e
A
1392 map_addr = region_start;
1393 kr = vm_map_enter(kernel_map, &map_addr,
0a7de745
A
1394 vm_map_round_page(region_size,
1395 VM_MAP_PAGE_MASK(kernel_map)),
1396 (vm_map_offset_t) 0,
1397 VM_FLAGS_FIXED,
1398 vmk_flags,
1399 VM_KERN_MEMORY_NONE,
1400 VM_OBJECT_NULL,
1401 (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
1402 VM_INHERIT_DEFAULT);
5ba3f43e
A
1403
1404 if (kr != KERN_SUCCESS) {
1405 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n",
0a7de745
A
1406 (uint64_t) start, (uint64_t) end, (uint64_t) region_start,
1407 (uint64_t) region_size, kr);
1408 }
5ba3f43e
A
1409
1410 region_select++;
0a7de745 1411 }
5ba3f43e
A
1412 }
1413#else
0a7de745
A
1414 kernel_map = vm_map_create(pmap_kernel(), VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1415 map_end, FALSE);
1c79356b
A
1416 /*
1417 * Reserve virtual memory allocated up to this time.
1418 */
6d2010ae 1419 if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
91447636 1420 vm_map_offset_t map_addr;
6d2010ae 1421 kern_return_t kr;
0a7de745 1422
5ba3f43e
A
1423 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1424 vmk_flags.vmkf_no_pmap_check = TRUE;
1425
6d2010ae
A
1426 map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
1427 kr = vm_map_enter(kernel_map,
0a7de745
A
1428 &map_addr,
1429 (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1430 (vm_map_offset_t) 0,
1431 VM_FLAGS_FIXED,
1432 vmk_flags,
1433 VM_KERN_MEMORY_NONE,
1434 VM_OBJECT_NULL,
1435 (vm_object_offset_t) 0, FALSE,
1436 VM_PROT_NONE, VM_PROT_NONE,
1437 VM_INHERIT_DEFAULT);
1438
6d2010ae
A
1439 if (kr != KERN_SUCCESS) {
1440 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n",
0a7de745
A
1441 (uint64_t) start, (uint64_t) end,
1442 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1443 (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1444 kr);
1445 }
1c79356b 1446 }
5ba3f43e 1447#endif
6d2010ae 1448
4ba76501 1449 kmem_set_user_wire_limits();
1c79356b
A
1450}
1451
1c79356b
A
1452/*
1453 * Routine: copyinmap
1454 * Purpose:
1455 * Like copyin, except that fromaddr is an address
1456 * in the specified VM map. This implementation
1457 * is incomplete; it handles the current user map
1458 * and the kernel map/submaps.
1459 */
91447636 1460kern_return_t
1c79356b 1461copyinmap(
0a7de745
A
1462 vm_map_t map,
1463 vm_map_offset_t fromaddr,
1464 void *todata,
1465 vm_size_t length)
1c79356b 1466{
0a7de745 1467 kern_return_t kr = KERN_SUCCESS;
91447636
A
1468 vm_map_t oldmap;
1469
0a7de745 1470 if (vm_map_pmap(map) == pmap_kernel()) {
1c79356b 1471 /* assume a correct copy */
91447636 1472 memcpy(todata, CAST_DOWN(void *, fromaddr), length);
0a7de745
A
1473 } else if (current_map() == map) {
1474 if (copyin(fromaddr, todata, length) != 0) {
91447636 1475 kr = KERN_INVALID_ADDRESS;
0a7de745
A
1476 }
1477 } else {
91447636
A
1478 vm_map_reference(map);
1479 oldmap = vm_map_switch(map);
0a7de745 1480 if (copyin(fromaddr, todata, length) != 0) {
91447636 1481 kr = KERN_INVALID_ADDRESS;
0a7de745 1482 }
91447636
A
1483 vm_map_switch(oldmap);
1484 vm_map_deallocate(map);
1485 }
1486 return kr;
1c79356b
A
1487}
1488
1489/*
1490 * Routine: copyoutmap
1491 * Purpose:
1492 * Like copyout, except that toaddr is an address
f427ee49 1493 * in the specified VM map.
1c79356b 1494 */
91447636 1495kern_return_t
1c79356b 1496copyoutmap(
0a7de745
A
1497 vm_map_t map,
1498 void *fromdata,
1499 vm_map_address_t toaddr,
1500 vm_size_t length)
1c79356b 1501{
f427ee49
A
1502 kern_return_t kr = KERN_SUCCESS;
1503 vm_map_t oldmap;
1504
1c79356b
A
1505 if (vm_map_pmap(map) == pmap_kernel()) {
1506 /* assume a correct copy */
91447636 1507 memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
f427ee49
A
1508 } else if (current_map() == map) {
1509 if (copyout(fromdata, toaddr, length) != 0) {
1510 kr = KERN_INVALID_ADDRESS;
1511 }
1512 } else {
1513 vm_map_reference(map);
1514 oldmap = vm_map_switch(map);
1515 if (copyout(fromdata, toaddr, length) != 0) {
1516 kr = KERN_INVALID_ADDRESS;
1517 }
1518 vm_map_switch(oldmap);
1519 vm_map_deallocate(map);
0a7de745 1520 }
f427ee49 1521 return kr;
1c79356b 1522}
9bccf70c 1523
c3c9b80d
A
1524/*
1525 * Routine: copyoutmap_atomic{32, 64}
1526 * Purpose:
1527 * Like copyoutmap, except that the operation is atomic.
1528 * Takes in value rather than *fromdata pointer.
1529 */
1530kern_return_t
1531copyoutmap_atomic32(
1532 vm_map_t map,
1533 uint32_t value,
1534 vm_map_address_t toaddr)
1535{
1536 kern_return_t kr = KERN_SUCCESS;
1537 vm_map_t oldmap;
1538
1539 if (vm_map_pmap(map) == pmap_kernel()) {
1540 /* assume a correct toaddr */
1541 *(uint32_t *)toaddr = value;
1542 } else if (current_map() == map) {
1543 if (copyout_atomic32(value, toaddr) != 0) {
1544 kr = KERN_INVALID_ADDRESS;
1545 }
1546 } else {
1547 vm_map_reference(map);
1548 oldmap = vm_map_switch(map);
1549 if (copyout_atomic32(value, toaddr) != 0) {
1550 kr = KERN_INVALID_ADDRESS;
1551 }
1552 vm_map_switch(oldmap);
1553 vm_map_deallocate(map);
1554 }
1555 return kr;
1556}
1557
1558kern_return_t
1559copyoutmap_atomic64(
1560 vm_map_t map,
1561 uint64_t value,
1562 vm_map_address_t toaddr)
1563{
1564 kern_return_t kr = KERN_SUCCESS;
1565 vm_map_t oldmap;
1566
1567 if (vm_map_pmap(map) == pmap_kernel()) {
1568 /* assume a correct toaddr */
1569 *(uint64_t *)toaddr = value;
1570 } else if (current_map() == map) {
1571 if (copyout_atomic64(value, toaddr) != 0) {
1572 kr = KERN_INVALID_ADDRESS;
1573 }
1574 } else {
1575 vm_map_reference(map);
1576 oldmap = vm_map_switch(map);
1577 if (copyout_atomic64(value, toaddr) != 0) {
1578 kr = KERN_INVALID_ADDRESS;
1579 }
1580 vm_map_switch(oldmap);
1581 vm_map_deallocate(map);
1582 }
1583 return kr;
1584}
1585
3e170ce0
A
1586/*
1587 *
1588 * The following two functions are to be used when exposing kernel
1589 * addresses to userspace via any of the various debug or info
1590 * facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
1591 * and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
1592 * are exported to KEXTs.
1593 *
1594 * NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
1595 */
1596
5ba3f43e
A
1597static void
1598vm_kernel_addrhash_internal(
3e170ce0 1599 vm_offset_t addr,
5ba3f43e
A
1600 vm_offset_t *hash_addr,
1601 uint64_t salt)
3e170ce0 1602{
5ba3f43e
A
1603 assert(salt != 0);
1604
3e170ce0 1605 if (addr == 0) {
5ba3f43e 1606 *hash_addr = 0;
3e170ce0
A
1607 return;
1608 }
1609
5ba3f43e
A
1610 if (VM_KERNEL_IS_SLID(addr)) {
1611 *hash_addr = VM_KERNEL_UNSLIDE(addr);
1612 return;
1613 }
1614
0a7de745 1615 vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
5ba3f43e
A
1616 SHA256_CTX sha_ctx;
1617
1618 SHA256_Init(&sha_ctx);
1619 SHA256_Update(&sha_ctx, &salt, sizeof(salt));
1620 SHA256_Update(&sha_ctx, &addr, sizeof(addr));
1621 SHA256_Final(sha_digest, &sha_ctx);
1622
1623 *hash_addr = sha_digest[0];
1624}
1625
1626void
1627vm_kernel_addrhash_external(
1628 vm_offset_t addr,
1629 vm_offset_t *hash_addr)
1630{
1631 return vm_kernel_addrhash_internal(addr, hash_addr, vm_kernel_addrhash_salt_ext);
1632}
1633
1634vm_offset_t
1635vm_kernel_addrhash(vm_offset_t addr)
1636{
1637 vm_offset_t hash_addr;
1638 vm_kernel_addrhash_internal(addr, &hash_addr, vm_kernel_addrhash_salt);
1639 return hash_addr;
1640}
1641
1642void
1643vm_kernel_addrhide(
1644 vm_offset_t addr,
1645 vm_offset_t *hide_addr)
1646{
1647 *hide_addr = VM_KERNEL_ADDRHIDE(addr);
3e170ce0
A
1648}
1649
1650/*
5ba3f43e 1651 * vm_kernel_addrperm_external:
3e170ce0
A
1652 * vm_kernel_unslide_or_perm_external:
1653 *
5ba3f43e 1654 * Use these macros when exposing an address to userspace that could come from
3e170ce0
A
1655 * either kernel text/data *or* the heap.
1656 */
1657void
5ba3f43e 1658vm_kernel_addrperm_external(
3e170ce0 1659 vm_offset_t addr,
5ba3f43e 1660 vm_offset_t *perm_addr)
3e170ce0 1661{
39037602 1662 if (VM_KERNEL_IS_SLID(addr)) {
5ba3f43e
A
1663 *perm_addr = VM_KERNEL_UNSLIDE(addr);
1664 } else if (VM_KERNEL_ADDRESS(addr)) {
1665 *perm_addr = addr + vm_kernel_addrperm_ext;
1666 } else {
1667 *perm_addr = addr;
3e170ce0 1668 }
5ba3f43e 1669}
3e170ce0 1670
5ba3f43e
A
1671void
1672vm_kernel_unslide_or_perm_external(
1673 vm_offset_t addr,
1674 vm_offset_t *up_addr)
1675{
3e170ce0 1676 vm_kernel_addrperm_external(addr, up_addr);
3e170ce0 1677}
f427ee49
A
1678
1679void
1680vm_packing_pointer_invalid(vm_offset_t ptr, vm_packing_params_t params)
1681{
1682 if (ptr & ((1ul << params.vmpp_shift) - 1)) {
1683 panic("pointer %p can't be packed: low %d bits aren't 0",
1684 (void *)ptr, params.vmpp_shift);
1685 } else if (ptr <= params.vmpp_base) {
1686 panic("pointer %p can't be packed: below base %p",
1687 (void *)ptr, (void *)params.vmpp_base);
1688 } else {
1689 panic("pointer %p can't be packed: maximum encodable pointer is %p",
1690 (void *)ptr, (void *)vm_packing_max_packable(params));
1691 }
1692}
1693
1694void
1695vm_packing_verify_range(
1696 const char *subsystem,
1697 vm_offset_t min_address,
1698 vm_offset_t max_address,
1699 vm_packing_params_t params)
1700{
1701 if (min_address > max_address) {
1702 panic("%s: %s range invalid min:%p > max:%p",
1703 __func__, subsystem, (void *)min_address, (void *)max_address);
1704 }
1705
1706 if (!params.vmpp_base_relative) {
1707 return;
1708 }
1709
1710 if (min_address <= params.vmpp_base) {
1711 panic("%s: %s range invalid min:%p <= base:%p",
1712 __func__, subsystem, (void *)min_address, (void *)params.vmpp_base);
1713 }
1714
1715 if (max_address > vm_packing_max_packable(params)) {
1716 panic("%s: %s range invalid max:%p >= max packable:%p",
1717 __func__, subsystem, (void *)max_address,
1718 (void *)vm_packing_max_packable(params));
1719 }
1720}