]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_kern.c
xnu-6153.101.6.tar.gz
[apple/xnu.git] / osfmk / vm / vm_kern.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
0a7de745 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
0a7de745 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
0a7de745 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
0a7de745 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
0a7de745 31/*
1c79356b
A
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
0a7de745 35 *
1c79356b
A
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
0a7de745 41 *
1c79356b
A
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
0a7de745 45 *
1c79356b 46 * Carnegie Mellon requests users of this software to return to
0a7de745 47 *
1c79356b
A
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
0a7de745 52 *
1c79356b
A
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_kern.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Kernel memory management.
64 */
65
1c79356b
A
66#include <mach/kern_return.h>
67#include <mach/vm_param.h>
68#include <kern/assert.h>
1c79356b
A
69#include <kern/thread.h>
70#include <vm/vm_kern.h>
71#include <vm/vm_map.h>
72#include <vm/vm_object.h>
73#include <vm/vm_page.h>
5ba3f43e 74#include <vm/vm_compressor.h>
1c79356b
A
75#include <vm/vm_pageout.h>
76#include <kern/misc_protos.h>
77#include <vm/cpm.h>
0a7de745 78#include <kern/ledger.h>
4ba76501 79#include <kern/bits.h>
1c79356b
A
80
81#include <string.h>
2d21ac55
A
82
83#include <libkern/OSDebug.h>
5ba3f43e 84#include <libkern/crypto/sha2.h>
cb323159 85#include <libkern/section_keywords.h>
2d21ac55
A
86#include <sys/kdebug.h>
87
5ba3f43e
A
88#include <san/kasan.h>
89
1c79356b
A
90/*
91 * Variables exported by this module.
92 */
93
cb323159
A
94SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
95vm_map_t kernel_pageable_map;
1c79356b 96
2d21ac55
A
97extern boolean_t vm_kernel_ready;
98
1c79356b
A
99/*
100 * Forward declarations for internal functions.
101 */
102extern kern_return_t kmem_alloc_pages(
0a7de745
A
103 vm_object_t object,
104 vm_object_offset_t offset,
105 vm_object_size_t size);
1c79356b 106
1c79356b
A
107kern_return_t
108kmem_alloc_contig(
0a7de745
A
109 vm_map_t map,
110 vm_offset_t *addrp,
111 vm_size_t size,
112 vm_offset_t mask,
113 ppnum_t max_pnum,
114 ppnum_t pnum_mask,
115 int flags,
3e170ce0 116 vm_tag_t tag)
1c79356b 117{
0a7de745
A
118 vm_object_t object;
119 vm_object_offset_t offset;
120 vm_map_offset_t map_addr;
121 vm_map_offset_t map_mask;
122 vm_map_size_t map_size, i;
123 vm_map_entry_t entry;
124 vm_page_t m, pages;
125 kern_return_t kr;
126
127 assert(VM_KERN_MEMORY_NONE != tag);
128
129 if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) {
1c79356b 130 return KERN_INVALID_ARGUMENT;
0a7de745 131 }
316670eb 132
39236c6e 133 map_size = vm_map_round_page(size,
0a7de745 134 VM_MAP_PAGE_MASK(map));
316670eb 135 map_mask = (vm_map_offset_t)mask;
0a7de745 136
316670eb
A
137 /* Check for zero allocation size (either directly or via overflow) */
138 if (map_size == 0) {
1c79356b
A
139 *addrp = 0;
140 return KERN_INVALID_ARGUMENT;
141 }
142
91447636
A
143 /*
144 * Allocate a new object (if necessary) and the reference we
145 * will be donating to the map entry. We must do this before
146 * locking the map, or risk deadlock with the default pager.
147 */
148 if ((flags & KMA_KOBJECT) != 0) {
149 object = kernel_object;
150 vm_object_reference(object);
1c79356b 151 } else {
91447636 152 object = vm_object_allocate(map_size);
1c79356b
A
153 }
154
5ba3f43e 155 kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0,
0a7de745 156 VM_MAP_KERNEL_FLAGS_NONE, tag, &entry);
91447636
A
157 if (KERN_SUCCESS != kr) {
158 vm_object_deallocate(object);
1c79356b
A
159 return kr;
160 }
161
3e170ce0
A
162 if (object == kernel_object) {
163 offset = map_addr;
164 } else {
165 offset = 0;
166 }
167 VME_OBJECT_SET(entry, object);
168 VME_OFFSET_SET(entry, offset);
91447636
A
169
170 /* Take an extra object ref in case the map entry gets deleted */
171 vm_object_reference(object);
1c79356b
A
172 vm_map_unlock(map);
173
b0d623f7 174 kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags);
1c79356b
A
175
176 if (kr != KERN_SUCCESS) {
39236c6e 177 vm_map_remove(map,
0a7de745
A
178 vm_map_trunc_page(map_addr,
179 VM_MAP_PAGE_MASK(map)),
180 vm_map_round_page(map_addr + map_size,
181 VM_MAP_PAGE_MASK(map)),
182 VM_MAP_REMOVE_NO_FLAGS);
91447636 183 vm_object_deallocate(object);
1c79356b
A
184 *addrp = 0;
185 return kr;
186 }
187
188 vm_object_lock(object);
91447636 189 for (i = 0; i < map_size; i += PAGE_SIZE) {
1c79356b
A
190 m = pages;
191 pages = NEXT_PAGE(m);
0c530ab8 192 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
d9a64523 193 m->vmp_busy = FALSE;
1c79356b
A
194 vm_page_insert(m, object, offset + i);
195 }
196 vm_object_unlock(object);
197
5ba3f43e 198 kr = vm_map_wire_kernel(map,
0a7de745
A
199 vm_map_trunc_page(map_addr,
200 VM_MAP_PAGE_MASK(map)),
201 vm_map_round_page(map_addr + map_size,
202 VM_MAP_PAGE_MASK(map)),
203 VM_PROT_DEFAULT, tag,
204 FALSE);
3e170ce0 205
39236c6e 206 if (kr != KERN_SUCCESS) {
1c79356b
A
207 if (object == kernel_object) {
208 vm_object_lock(object);
91447636 209 vm_object_page_remove(object, offset, offset + map_size);
1c79356b
A
210 vm_object_unlock(object);
211 }
39236c6e 212 vm_map_remove(map,
0a7de745
A
213 vm_map_trunc_page(map_addr,
214 VM_MAP_PAGE_MASK(map)),
215 vm_map_round_page(map_addr + map_size,
216 VM_MAP_PAGE_MASK(map)),
217 VM_MAP_REMOVE_NO_FLAGS);
91447636 218 vm_object_deallocate(object);
1c79356b
A
219 return kr;
220 }
91447636
A
221 vm_object_deallocate(object);
222
5ba3f43e 223 if (object == kernel_object) {
91447636 224 vm_map_simplify(map, map_addr);
0a7de745
A
225 vm_tag_update_size(tag, map_size);
226 }
b0d623f7
A
227 *addrp = (vm_offset_t) map_addr;
228 assert((vm_map_offset_t) *addrp == map_addr);
5ba3f43e 229
1c79356b
A
230 return KERN_SUCCESS;
231}
232
233/*
234 * Master entry point for allocating kernel memory.
235 * NOTE: this routine is _never_ interrupt safe.
236 *
237 * map : map to allocate into
238 * addrp : pointer to start address of new memory
239 * size : size of memory requested
240 * flags : options
241 * KMA_HERE *addrp is base address, else "anywhere"
242 * KMA_NOPAGEWAIT don't wait for pages if unavailable
243 * KMA_KOBJECT use kernel_object
0c530ab8
A
244 * KMA_LOMEM support for 32 bit devices in a 64 bit world
245 * if set and a lomemory pool is available
246 * grab pages from it... this also implies
247 * KMA_NOPAGEWAIT
1c79356b
A
248 */
249
250kern_return_t
251kernel_memory_allocate(
0a7de745
A
252 vm_map_t map,
253 vm_offset_t *addrp,
254 vm_size_t size,
255 vm_offset_t mask,
256 int flags,
3e170ce0 257 vm_tag_t tag)
1c79356b 258{
0a7de745
A
259 vm_object_t object;
260 vm_object_offset_t offset;
261 vm_object_offset_t pg_offset;
262 vm_map_entry_t entry = NULL;
263 vm_map_offset_t map_addr, fill_start;
264 vm_map_offset_t map_mask;
265 vm_map_size_t map_size, fill_size;
266 kern_return_t kr, pe_result;
267 vm_page_t mem;
268 vm_page_t guard_page_list = NULL;
269 vm_page_t wired_page_list = NULL;
270 int guard_page_count = 0;
271 int wired_page_count = 0;
272 int page_grab_count = 0;
273 int i;
274 int vm_alloc_flags;
275 vm_map_kernel_flags_t vmk_flags;
276 vm_prot_t kma_prot;
277#if DEVELOPMENT || DEBUG
278 task_t task = current_task();
279#endif /* DEVELOPMENT || DEBUG */
280
281 if (!vm_kernel_ready) {
2d21ac55
A
282 panic("kernel_memory_allocate: VM is not ready");
283 }
1c79356b 284
39236c6e 285 map_size = vm_map_round_page(size,
0a7de745 286 VM_MAP_PAGE_MASK(map));
91447636 287 map_mask = (vm_map_offset_t) mask;
3e170ce0 288
5ba3f43e
A
289 vm_alloc_flags = 0; //VM_MAKE_TAG(tag);
290 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
2d21ac55 291
316670eb
A
292 /* Check for zero allocation size (either directly or via overflow) */
293 if (map_size == 0) {
294 *addrp = 0;
295 return KERN_INVALID_ARGUMENT;
296 }
b0d623f7
A
297
298 /*
299 * limit the size of a single extent of wired memory
300 * to try and limit the damage to the system if
301 * too many pages get wired down
4bd07ac2
A
302 * limit raised to 2GB with 128GB max physical limit,
303 * but scaled by installed memory above this
b0d623f7 304 */
0a7de745
A
305 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
306 map_size > MAX(1ULL << 31, sane_size / 64)) {
307 return KERN_RESOURCE_SHORTAGE;
308 }
b0d623f7 309
2d21ac55
A
310 /*
311 * Guard pages:
312 *
313 * Guard pages are implemented as ficticious pages. By placing guard pages
314 * on either end of a stack, they can help detect cases where a thread walks
315 * off either end of its stack. They are allocated and set up here and attempts
316 * to access those pages are trapped in vm_fault_page().
317 *
318 * The map_size we were passed may include extra space for
319 * guard pages. If those were requested, then back it out of fill_size
320 * since vm_map_find_space() takes just the actual size not including
321 * guard pages. Similarly, fill_start indicates where the actual pages
322 * will begin in the range.
323 */
324
325 fill_start = 0;
326 fill_size = map_size;
b0d623f7 327
2d21ac55 328 if (flags & KMA_GUARD_FIRST) {
5ba3f43e 329 vmk_flags.vmkf_guard_before = TRUE;
2d21ac55
A
330 fill_start += PAGE_SIZE_64;
331 fill_size -= PAGE_SIZE_64;
332 if (map_size < fill_start + fill_size) {
333 /* no space for a guard page */
334 *addrp = 0;
335 return KERN_INVALID_ARGUMENT;
336 }
b0d623f7 337 guard_page_count++;
2d21ac55
A
338 }
339 if (flags & KMA_GUARD_LAST) {
5ba3f43e 340 vmk_flags.vmkf_guard_after = TRUE;
2d21ac55
A
341 fill_size -= PAGE_SIZE_64;
342 if (map_size <= fill_start + fill_size) {
343 /* no space for a guard page */
344 *addrp = 0;
345 return KERN_INVALID_ARGUMENT;
346 }
b0d623f7
A
347 guard_page_count++;
348 }
349 wired_page_count = (int) (fill_size / PAGE_SIZE_64);
350 assert(wired_page_count * PAGE_SIZE_64 == fill_size);
351
d9a64523
A
352#if DEBUG || DEVELOPMENT
353 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START, size, 0, 0, 0);
354#endif
355
b0d623f7
A
356 for (i = 0; i < guard_page_count; i++) {
357 for (;;) {
358 mem = vm_page_grab_guard();
359
0a7de745 360 if (mem != VM_PAGE_NULL) {
b0d623f7 361 break;
0a7de745 362 }
b0d623f7
A
363 if (flags & KMA_NOPAGEWAIT) {
364 kr = KERN_RESOURCE_SHORTAGE;
365 goto out;
366 }
367 vm_page_more_fictitious();
368 }
d9a64523 369 mem->vmp_snext = guard_page_list;
b0d623f7
A
370 guard_page_list = mem;
371 }
372
d9a64523 373 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
0a7de745 374 for (i = 0; i < wired_page_count; i++) {
0a7de745
A
375 for (;;) {
376 if (flags & KMA_LOMEM) {
377 mem = vm_page_grablo();
378 } else {
379 mem = vm_page_grab();
380 }
b0d623f7 381
0a7de745
A
382 if (mem != VM_PAGE_NULL) {
383 break;
384 }
385
386 if (flags & KMA_NOPAGEWAIT) {
387 kr = KERN_RESOURCE_SHORTAGE;
388 goto out;
389 }
390 if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) {
391 kr = KERN_RESOURCE_SHORTAGE;
392 goto out;
393 }
0a7de745 394
cb323159
A
395 /* VM privileged threads should have waited in vm_page_grab() and not get here. */
396 assert(!(current_thread()->options & TH_OPT_VMPRIV));
397
398 uint64_t unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE;
0a7de745
A
399 if (unavailable > max_mem || map_size > (max_mem - unavailable)) {
400 kr = KERN_RESOURCE_SHORTAGE;
401 goto out;
402 }
403 VM_PAGE_WAIT();
0b4c1975 404 }
0a7de745
A
405 page_grab_count++;
406 if (KMA_ZERO & flags) {
407 vm_page_zero_fill(mem);
b0d623f7 408 }
0a7de745
A
409 mem->vmp_snext = wired_page_list;
410 wired_page_list = mem;
b0d623f7 411 }
39236c6e 412 }
91447636
A
413
414 /*
415 * Allocate a new object (if necessary). We must do this before
416 * locking the map, or risk deadlock with the default pager.
417 */
418 if ((flags & KMA_KOBJECT) != 0) {
1c79356b 419 object = kernel_object;
91447636 420 vm_object_reference(object);
39236c6e
A
421 } else if ((flags & KMA_COMPRESSOR) != 0) {
422 object = compressor_object;
423 vm_object_reference(object);
91447636
A
424 } else {
425 object = vm_object_allocate(map_size);
1c79356b 426 }
91447636 427
0a7de745 428 if (flags & KMA_ATOMIC) {
5ba3f43e 429 vmk_flags.vmkf_atomic_entry = TRUE;
0a7de745 430 }
5ba3f43e 431
2d21ac55 432 kr = vm_map_find_space(map, &map_addr,
0a7de745
A
433 fill_size, map_mask,
434 vm_alloc_flags, vmk_flags, tag, &entry);
91447636
A
435 if (KERN_SUCCESS != kr) {
436 vm_object_deallocate(object);
b0d623f7 437 goto out;
1c79356b 438 }
2d21ac55 439
3e170ce0
A
440 if (object == kernel_object || object == compressor_object) {
441 offset = map_addr;
442 } else {
443 offset = 0;
444 }
445 VME_OBJECT_SET(entry, object);
446 VME_OFFSET_SET(entry, offset);
0a7de745
A
447
448 if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
39236c6e 449 entry->wired_count++;
0a7de745 450 }
b0d623f7 451
0a7de745 452 if (flags & KMA_PERMANENT) {
b0d623f7 453 entry->permanent = TRUE;
0a7de745 454 }
b0d623f7 455
0a7de745 456 if (object != kernel_object && object != compressor_object) {
b0d623f7 457 vm_object_reference(object);
0a7de745 458 }
1c79356b
A
459
460 vm_object_lock(object);
b0d623f7 461 vm_map_unlock(map);
1c79356b 462
b0d623f7
A
463 pg_offset = 0;
464
465 if (fill_start) {
0a7de745 466 if (guard_page_list == NULL) {
b0d623f7 467 panic("kernel_memory_allocate: guard_page_list == NULL");
0a7de745 468 }
b0d623f7
A
469
470 mem = guard_page_list;
d9a64523
A
471 guard_page_list = mem->vmp_snext;
472 mem->vmp_snext = NULL;
b0d623f7
A
473
474 vm_page_insert(mem, object, offset + pg_offset);
2d21ac55 475
d9a64523 476 mem->vmp_busy = FALSE;
b0d623f7 477 pg_offset += PAGE_SIZE_64;
2d21ac55 478 }
316670eb
A
479
480 kma_prot = VM_PROT_READ | VM_PROT_WRITE;
481
5ba3f43e
A
482#if KASAN
483 if (!(flags & KMA_VAONLY)) {
484 /* for VAONLY mappings we notify in populate only */
485 kasan_notify_address(map_addr, size);
486 }
487#endif
488
d9a64523 489 if (flags & (KMA_VAONLY | KMA_PAGEABLE)) {
39236c6e
A
490 pg_offset = fill_start + fill_size;
491 } else {
0a7de745
A
492 for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) {
493 if (wired_page_list == NULL) {
494 panic("kernel_memory_allocate: wired_page_list == NULL");
495 }
2d21ac55 496
0a7de745
A
497 mem = wired_page_list;
498 wired_page_list = mem->vmp_snext;
499 mem->vmp_snext = NULL;
39037602 500
0a7de745
A
501 assert(mem->vmp_wire_count == 0);
502 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
39037602 503
0a7de745
A
504 mem->vmp_q_state = VM_PAGE_IS_WIRED;
505 mem->vmp_wire_count++;
506 if (__improbable(mem->vmp_wire_count == 0)) {
507 panic("kernel_memory_allocate(%p): wire_count overflow",
508 mem);
509 }
2d21ac55 510
0a7de745 511 vm_page_insert_wired(mem, object, offset + pg_offset, tag);
0c530ab8 512
0a7de745
A
513 mem->vmp_busy = FALSE;
514 mem->vmp_pmapped = TRUE;
515 mem->vmp_wpmapped = TRUE;
b0d623f7 516
0a7de745
A
517 PMAP_ENTER_OPTIONS(kernel_pmap, map_addr + pg_offset, mem,
518 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
519 PMAP_OPTIONS_NOWAIT, pe_result);
39236c6e 520
0a7de745
A
521 if (pe_result == KERN_RESOURCE_SHORTAGE) {
522 vm_object_unlock(object);
0b4c1975 523
0a7de745
A
524 PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem,
525 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
526 pe_result);
39236c6e 527
0a7de745
A
528 vm_object_lock(object);
529 }
5ba3f43e 530
0a7de745 531 assert(pe_result == KERN_SUCCESS);
5ba3f43e 532
0a7de745
A
533 if (flags & KMA_NOENCRYPT) {
534 bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE);
0b4c1975 535
0a7de745
A
536 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
537 }
538 }
539 if (kernel_object == object) {
540 vm_tag_update_size(tag, fill_size);
0b4c1975 541 }
39236c6e 542 }
b0d623f7 543 if ((fill_start + fill_size) < map_size) {
0a7de745 544 if (guard_page_list == NULL) {
b0d623f7 545 panic("kernel_memory_allocate: guard_page_list == NULL");
0a7de745 546 }
1c79356b 547
b0d623f7 548 mem = guard_page_list;
d9a64523
A
549 guard_page_list = mem->vmp_snext;
550 mem->vmp_snext = NULL;
b0d623f7
A
551
552 vm_page_insert(mem, object, offset + pg_offset);
2d21ac55 553
d9a64523 554 mem->vmp_busy = FALSE;
1c79356b 555 }
0a7de745 556 if (guard_page_list || wired_page_list) {
b0d623f7 557 panic("kernel_memory_allocate: non empty list\n");
0a7de745 558 }
2d21ac55 559
d9a64523 560 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
0a7de745
A
561 vm_page_lockspin_queues();
562 vm_page_wire_count += wired_page_count;
563 vm_page_unlock_queues();
39236c6e 564 }
2d21ac55 565
b0d623f7
A
566 vm_object_unlock(object);
567
568 /*
569 * now that the pages are wired, we no longer have to fear coalesce
570 */
0a7de745 571 if (object == kernel_object || object == compressor_object) {
91447636 572 vm_map_simplify(map, map_addr);
0a7de745 573 } else {
b0d623f7 574 vm_object_deallocate(object);
0a7de745 575 }
1c79356b 576
d9a64523
A
577#if DEBUG || DEVELOPMENT
578 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
0a7de745
A
579 if (task != NULL) {
580 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
581 }
d9a64523
A
582#endif
583
1c79356b
A
584 /*
585 * Return the memory, not zeroed.
586 */
91447636 587 *addrp = CAST_DOWN(vm_offset_t, map_addr);
1c79356b 588 return KERN_SUCCESS;
2d21ac55 589
b0d623f7 590out:
0a7de745 591 if (guard_page_list) {
b0d623f7 592 vm_page_free_list(guard_page_list, FALSE);
0a7de745 593 }
b0d623f7 594
0a7de745 595 if (wired_page_list) {
b0d623f7 596 vm_page_free_list(wired_page_list, FALSE);
0a7de745 597 }
b0d623f7 598
d9a64523
A
599#if DEBUG || DEVELOPMENT
600 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
0a7de745
A
601 if (task != NULL && kr == KERN_SUCCESS) {
602 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
603 }
d9a64523
A
604#endif
605
b0d623f7 606 return kr;
1c79356b
A
607}
608
39236c6e
A
609kern_return_t
610kernel_memory_populate(
0a7de745
A
611 vm_map_t map,
612 vm_offset_t addr,
613 vm_size_t size,
614 int flags,
3e170ce0 615 vm_tag_t tag)
39236c6e 616{
0a7de745
A
617 vm_object_t object;
618 vm_object_offset_t offset, pg_offset;
619 kern_return_t kr, pe_result;
620 vm_page_t mem;
621 vm_page_t page_list = NULL;
622 int page_count = 0;
623 int page_grab_count = 0;
624 int i;
39236c6e 625
d9a64523 626#if DEBUG || DEVELOPMENT
0a7de745 627 task_t task = current_task();
d9a64523
A
628 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START, size, 0, 0, 0);
629#endif
630
39236c6e
A
631 page_count = (int) (size / PAGE_SIZE_64);
632
0a7de745 633 assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
39236c6e
A
634
635 if (flags & KMA_COMPRESSOR) {
3e170ce0
A
636 pg_offset = page_count * PAGE_SIZE_64;
637
638 do {
39236c6e
A
639 for (;;) {
640 mem = vm_page_grab();
641
0a7de745 642 if (mem != VM_PAGE_NULL) {
39236c6e 643 break;
0a7de745
A
644 }
645
39236c6e
A
646 VM_PAGE_WAIT();
647 }
d9a64523 648 page_grab_count++;
0a7de745
A
649 if (KMA_ZERO & flags) {
650 vm_page_zero_fill(mem);
651 }
d9a64523 652 mem->vmp_snext = page_list;
39236c6e 653 page_list = mem;
3e170ce0
A
654
655 pg_offset -= PAGE_SIZE_64;
656
657 kr = pmap_enter_options(kernel_pmap,
0a7de745
A
658 addr + pg_offset, VM_PAGE_GET_PHYS_PAGE(mem),
659 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE,
660 PMAP_OPTIONS_INTERNAL, NULL);
3e170ce0 661 assert(kr == KERN_SUCCESS);
3e170ce0
A
662 } while (pg_offset);
663
39236c6e
A
664 offset = addr;
665 object = compressor_object;
666
667 vm_object_lock(object);
668
669 for (pg_offset = 0;
0a7de745
A
670 pg_offset < size;
671 pg_offset += PAGE_SIZE_64) {
39236c6e 672 mem = page_list;
d9a64523
A
673 page_list = mem->vmp_snext;
674 mem->vmp_snext = NULL;
39236c6e
A
675
676 vm_page_insert(mem, object, offset + pg_offset);
d9a64523 677 assert(mem->vmp_busy);
39236c6e 678
d9a64523
A
679 mem->vmp_busy = FALSE;
680 mem->vmp_pmapped = TRUE;
681 mem->vmp_wpmapped = TRUE;
682 mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
39236c6e
A
683 }
684 vm_object_unlock(object);
685
5ba3f43e
A
686#if KASAN
687 if (map == compressor_map) {
688 kasan_notify_address_nopoison(addr, size);
689 } else {
690 kasan_notify_address(addr, size);
691 }
692#endif
d9a64523
A
693
694#if DEBUG || DEVELOPMENT
695 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
0a7de745
A
696 if (task != NULL) {
697 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
698 }
d9a64523 699#endif
39236c6e
A
700 return KERN_SUCCESS;
701 }
702
703 for (i = 0; i < page_count; i++) {
704 for (;;) {
0a7de745 705 if (flags & KMA_LOMEM) {
39236c6e 706 mem = vm_page_grablo();
0a7de745 707 } else {
39236c6e 708 mem = vm_page_grab();
0a7de745
A
709 }
710
711 if (mem != VM_PAGE_NULL) {
39236c6e 712 break;
0a7de745 713 }
39236c6e
A
714
715 if (flags & KMA_NOPAGEWAIT) {
716 kr = KERN_RESOURCE_SHORTAGE;
717 goto out;
718 }
719 if ((flags & KMA_LOMEM) &&
720 (vm_lopage_needed == TRUE)) {
721 kr = KERN_RESOURCE_SHORTAGE;
722 goto out;
723 }
724 VM_PAGE_WAIT();
725 }
d9a64523 726 page_grab_count++;
0a7de745
A
727 if (KMA_ZERO & flags) {
728 vm_page_zero_fill(mem);
729 }
d9a64523 730 mem->vmp_snext = page_list;
39236c6e
A
731 page_list = mem;
732 }
733 if (flags & KMA_KOBJECT) {
734 offset = addr;
735 object = kernel_object;
736
737 vm_object_lock(object);
738 } else {
739 /*
740 * If it's not the kernel object, we need to:
741 * lock map;
742 * lookup entry;
743 * lock object;
744 * take reference on object;
745 * unlock map;
746 */
747 panic("kernel_memory_populate(%p,0x%llx,0x%llx,0x%x): "
0a7de745
A
748 "!KMA_KOBJECT",
749 map, (uint64_t) addr, (uint64_t) size, flags);
39236c6e
A
750 }
751
752 for (pg_offset = 0;
0a7de745
A
753 pg_offset < size;
754 pg_offset += PAGE_SIZE_64) {
755 if (page_list == NULL) {
39236c6e 756 panic("kernel_memory_populate: page_list == NULL");
0a7de745 757 }
39236c6e
A
758
759 mem = page_list;
d9a64523
A
760 page_list = mem->vmp_snext;
761 mem->vmp_snext = NULL;
762
763 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
764 mem->vmp_q_state = VM_PAGE_IS_WIRED;
765 mem->vmp_wire_count++;
766 if (__improbable(mem->vmp_wire_count == 0)) {
767 panic("kernel_memory_populate(%p): wire_count overflow", mem);
39037602 768 }
39236c6e 769
3e170ce0 770 vm_page_insert_wired(mem, object, offset + pg_offset, tag);
39236c6e 771
d9a64523
A
772 mem->vmp_busy = FALSE;
773 mem->vmp_pmapped = TRUE;
774 mem->vmp_wpmapped = TRUE;
39236c6e
A
775
776 PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset, mem,
0a7de745
A
777 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
778 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
779 PMAP_OPTIONS_NOWAIT, pe_result);
39236c6e
A
780
781 if (pe_result == KERN_RESOURCE_SHORTAGE) {
39236c6e
A
782 vm_object_unlock(object);
783
784 PMAP_ENTER(kernel_pmap, addr + pg_offset, mem,
0a7de745
A
785 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
786 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
787 pe_result);
39236c6e
A
788
789 vm_object_lock(object);
790 }
5ba3f43e
A
791
792 assert(pe_result == KERN_SUCCESS);
793
39236c6e
A
794 if (flags & KMA_NOENCRYPT) {
795 bzero(CAST_DOWN(void *, (addr + pg_offset)), PAGE_SIZE);
39037602 796 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
39236c6e
A
797 }
798 }
d9a64523 799 vm_page_lockspin_queues();
39236c6e
A
800 vm_page_wire_count += page_count;
801 vm_page_unlock_queues();
802
d9a64523
A
803#if DEBUG || DEVELOPMENT
804 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
0a7de745
A
805 if (task != NULL) {
806 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
807 }
d9a64523
A
808#endif
809
0a7de745
A
810 if (kernel_object == object) {
811 vm_tag_update_size(tag, size);
812 }
5ba3f43e 813
39236c6e
A
814 vm_object_unlock(object);
815
5ba3f43e
A
816#if KASAN
817 if (map == compressor_map) {
818 kasan_notify_address_nopoison(addr, size);
819 } else {
820 kasan_notify_address(addr, size);
821 }
822#endif
39236c6e
A
823 return KERN_SUCCESS;
824
825out:
0a7de745 826 if (page_list) {
39236c6e 827 vm_page_free_list(page_list, FALSE);
0a7de745 828 }
39236c6e 829
d9a64523
A
830#if DEBUG || DEVELOPMENT
831 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
0a7de745
A
832 if (task != NULL && kr == KERN_SUCCESS) {
833 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
834 }
d9a64523
A
835#endif
836
39236c6e
A
837 return kr;
838}
839
840
841void
842kernel_memory_depopulate(
0a7de745
A
843 vm_map_t map,
844 vm_offset_t addr,
845 vm_size_t size,
846 int flags)
39236c6e 847{
0a7de745
A
848 vm_object_t object;
849 vm_object_offset_t offset, pg_offset;
850 vm_page_t mem;
851 vm_page_t local_freeq = NULL;
39236c6e 852
0a7de745 853 assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
39236c6e
A
854
855 if (flags & KMA_COMPRESSOR) {
856 offset = addr;
857 object = compressor_object;
858
859 vm_object_lock(object);
860 } else if (flags & KMA_KOBJECT) {
861 offset = addr;
862 object = kernel_object;
39236c6e
A
863 vm_object_lock(object);
864 } else {
865 offset = 0;
866 object = NULL;
0a7de745
A
867 /*
868 * If it's not the kernel object, we need to:
869 * lock map;
870 * lookup entry;
871 * lock object;
872 * unlock map;
873 */
39236c6e 874 panic("kernel_memory_depopulate(%p,0x%llx,0x%llx,0x%x): "
0a7de745
A
875 "!KMA_KOBJECT",
876 map, (uint64_t) addr, (uint64_t) size, flags);
39236c6e
A
877 }
878 pmap_protect(kernel_map->pmap, offset, offset + size, VM_PROT_NONE);
879
880 for (pg_offset = 0;
0a7de745
A
881 pg_offset < size;
882 pg_offset += PAGE_SIZE_64) {
39236c6e
A
883 mem = vm_page_lookup(object, offset + pg_offset);
884
885 assert(mem);
0a7de745
A
886
887 if (mem->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR) {
39037602 888 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
0a7de745 889 }
39236c6e 890
d9a64523 891 mem->vmp_busy = TRUE;
39236c6e 892
d9a64523 893 assert(mem->vmp_tabled);
39236c6e 894 vm_page_remove(mem, TRUE);
d9a64523 895 assert(mem->vmp_busy);
39236c6e 896
d9a64523
A
897 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
898 assert((mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
0a7de745 899 (mem->vmp_q_state == VM_PAGE_NOT_ON_Q));
39037602 900
d9a64523
A
901 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
902 mem->vmp_snext = local_freeq;
39236c6e
A
903 local_freeq = mem;
904 }
905 vm_object_unlock(object);
906
0a7de745 907 if (local_freeq) {
39236c6e 908 vm_page_free_list(local_freeq, TRUE);
0a7de745 909 }
39236c6e
A
910}
911
1c79356b
A
912/*
913 * kmem_alloc:
914 *
915 * Allocate wired-down memory in the kernel's address map
916 * or a submap. The memory is not zero-filled.
917 */
918
919kern_return_t
3e170ce0 920kmem_alloc_external(
0a7de745
A
921 vm_map_t map,
922 vm_offset_t *addrp,
923 vm_size_t size)
1c79356b 924{
0a7de745 925 return kmem_alloc(map, addrp, size, vm_tag_bt());
3e170ce0
A
926}
927
39037602 928
3e170ce0
A
929kern_return_t
930kmem_alloc(
0a7de745
A
931 vm_map_t map,
932 vm_offset_t *addrp,
933 vm_size_t size,
934 vm_tag_t tag)
39037602
A
935{
936 return kmem_alloc_flags(map, addrp, size, tag, 0);
937}
938
939kern_return_t
940kmem_alloc_flags(
0a7de745
A
941 vm_map_t map,
942 vm_offset_t *addrp,
943 vm_size_t size,
944 vm_tag_t tag,
945 int flags)
3e170ce0 946{
39037602 947 kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, flags, tag);
2d21ac55
A
948 TRACE_MACHLEAKS(KMEM_ALLOC_CODE, KMEM_ALLOC_CODE_2, size, *addrp);
949 return kr;
1c79356b
A
950}
951
952/*
953 * kmem_realloc:
954 *
955 * Reallocate wired-down memory in the kernel's address map
956 * or a submap. Newly allocated pages are not zeroed.
957 * This can only be used on regions allocated with kmem_alloc.
958 *
959 * If successful, the pages in the old region are mapped twice.
960 * The old region is unchanged. Use kmem_free to get rid of it.
961 */
962kern_return_t
963kmem_realloc(
0a7de745
A
964 vm_map_t map,
965 vm_offset_t oldaddr,
966 vm_size_t oldsize,
967 vm_offset_t *newaddrp,
968 vm_size_t newsize,
969 vm_tag_t tag)
1c79356b 970{
0a7de745
A
971 vm_object_t object;
972 vm_object_offset_t offset;
973 vm_map_offset_t oldmapmin;
974 vm_map_offset_t oldmapmax;
975 vm_map_offset_t newmapaddr;
976 vm_map_size_t oldmapsize;
977 vm_map_size_t newmapsize;
978 vm_map_entry_t oldentry;
979 vm_map_entry_t newentry;
980 vm_page_t mem;
981 kern_return_t kr;
1c79356b 982
39236c6e 983 oldmapmin = vm_map_trunc_page(oldaddr,
0a7de745 984 VM_MAP_PAGE_MASK(map));
39236c6e 985 oldmapmax = vm_map_round_page(oldaddr + oldsize,
0a7de745 986 VM_MAP_PAGE_MASK(map));
91447636 987 oldmapsize = oldmapmax - oldmapmin;
39236c6e 988 newmapsize = vm_map_round_page(newsize,
0a7de745 989 VM_MAP_PAGE_MASK(map));
5c9f4661
A
990 if (newmapsize < newsize) {
991 /* overflow */
992 *newaddrp = 0;
993 return KERN_INVALID_ARGUMENT;
994 }
1c79356b
A
995
996 /*
997 * Find the VM object backing the old region.
998 */
999
b4c24cb9
A
1000 vm_map_lock(map);
1001
0a7de745 1002 if (!vm_map_lookup_entry(map, oldmapmin, &oldentry)) {
1c79356b 1003 panic("kmem_realloc");
0a7de745 1004 }
3e170ce0 1005 object = VME_OBJECT(oldentry);
1c79356b
A
1006
1007 /*
1008 * Increase the size of the object and
1009 * fill in the new region.
1010 */
1011
1012 vm_object_reference(object);
b4c24cb9
A
1013 /* by grabbing the object lock before unlocking the map */
1014 /* we guarantee that we will panic if more than one */
1015 /* attempt is made to realloc a kmem_alloc'd area */
1c79356b 1016 vm_object_lock(object);
b4c24cb9 1017 vm_map_unlock(map);
0a7de745 1018 if (object->vo_size != oldmapsize) {
1c79356b 1019 panic("kmem_realloc");
0a7de745 1020 }
6d2010ae 1021 object->vo_size = newmapsize;
1c79356b
A
1022 vm_object_unlock(object);
1023
b4c24cb9
A
1024 /* allocate the new pages while expanded portion of the */
1025 /* object is still not mapped */
91447636 1026 kmem_alloc_pages(object, vm_object_round_page(oldmapsize),
0a7de745 1027 vm_object_round_page(newmapsize - oldmapsize));
1c79356b
A
1028
1029 /*
b4c24cb9 1030 * Find space for the new region.
1c79356b
A
1031 */
1032
91447636 1033 kr = vm_map_find_space(map, &newmapaddr, newmapsize,
0a7de745
A
1034 (vm_map_offset_t) 0, 0,
1035 VM_MAP_KERNEL_FLAGS_NONE,
1036 tag,
1037 &newentry);
b4c24cb9
A
1038 if (kr != KERN_SUCCESS) {
1039 vm_object_lock(object);
0a7de745 1040 for (offset = oldmapsize;
91447636 1041 offset < newmapsize; offset += PAGE_SIZE) {
0a7de745 1042 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
b0d623f7 1043 VM_PAGE_FREE(mem);
b4c24cb9
A
1044 }
1045 }
6d2010ae 1046 object->vo_size = oldmapsize;
b4c24cb9
A
1047 vm_object_unlock(object);
1048 vm_object_deallocate(object);
1049 return kr;
1050 }
3e170ce0
A
1051 VME_OBJECT_SET(newentry, object);
1052 VME_OFFSET_SET(newentry, 0);
3e170ce0 1053 assert(newentry->wired_count == 0);
b4c24cb9 1054
0a7de745 1055
b4c24cb9
A
1056 /* add an extra reference in case we have someone doing an */
1057 /* unexpected deallocate */
1058 vm_object_reference(object);
1c79356b
A
1059 vm_map_unlock(map);
1060
5ba3f43e 1061 kr = vm_map_wire_kernel(map, newmapaddr, newmapaddr + newmapsize,
0a7de745 1062 VM_PROT_DEFAULT, tag, FALSE);
91447636 1063 if (KERN_SUCCESS != kr) {
d9a64523 1064 vm_map_remove(map, newmapaddr, newmapaddr + newmapsize, VM_MAP_REMOVE_NO_FLAGS);
b4c24cb9 1065 vm_object_lock(object);
0a7de745
A
1066 for (offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) {
1067 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
b0d623f7 1068 VM_PAGE_FREE(mem);
b4c24cb9
A
1069 }
1070 }
6d2010ae 1071 object->vo_size = oldmapsize;
b4c24cb9
A
1072 vm_object_unlock(object);
1073 vm_object_deallocate(object);
0a7de745 1074 return kr;
b4c24cb9
A
1075 }
1076 vm_object_deallocate(object);
1c79356b 1077
0a7de745
A
1078 if (kernel_object == object) {
1079 vm_tag_update_size(tag, newmapsize);
1080 }
5ba3f43e 1081
91447636 1082 *newaddrp = CAST_DOWN(vm_offset_t, newmapaddr);
1c79356b
A
1083 return KERN_SUCCESS;
1084}
1085
1086/*
b0d623f7 1087 * kmem_alloc_kobject:
1c79356b
A
1088 *
1089 * Allocate wired-down memory in the kernel's address map
1090 * or a submap. The memory is not zero-filled.
1091 *
1092 * The memory is allocated in the kernel_object.
1093 * It may not be copied with vm_map_copy, and
1094 * it may not be reallocated with kmem_realloc.
1095 */
1096
1097kern_return_t
3e170ce0 1098kmem_alloc_kobject_external(
0a7de745
A
1099 vm_map_t map,
1100 vm_offset_t *addrp,
1101 vm_size_t size)
1c79356b 1102{
0a7de745 1103 return kmem_alloc_kobject(map, addrp, size, vm_tag_bt());
3e170ce0
A
1104}
1105
1106kern_return_t
1107kmem_alloc_kobject(
0a7de745
A
1108 vm_map_t map,
1109 vm_offset_t *addrp,
1110 vm_size_t size,
3e170ce0
A
1111 vm_tag_t tag)
1112{
1113 return kernel_memory_allocate(map, addrp, size, 0, KMA_KOBJECT, tag);
1c79356b
A
1114}
1115
1116/*
1117 * kmem_alloc_aligned:
1118 *
b0d623f7 1119 * Like kmem_alloc_kobject, except that the memory is aligned.
1c79356b
A
1120 * The size should be a power-of-2.
1121 */
1122
1123kern_return_t
1124kmem_alloc_aligned(
0a7de745
A
1125 vm_map_t map,
1126 vm_offset_t *addrp,
1127 vm_size_t size,
3e170ce0 1128 vm_tag_t tag)
1c79356b 1129{
0a7de745 1130 if ((size & (size - 1)) != 0) {
1c79356b 1131 panic("kmem_alloc_aligned: size not aligned");
0a7de745 1132 }
3e170ce0 1133 return kernel_memory_allocate(map, addrp, size, size - 1, KMA_KOBJECT, tag);
1c79356b
A
1134}
1135
1136/*
1137 * kmem_alloc_pageable:
1138 *
1139 * Allocate pageable memory in the kernel's address map.
1140 */
1141
1142kern_return_t
3e170ce0 1143kmem_alloc_pageable_external(
0a7de745
A
1144 vm_map_t map,
1145 vm_offset_t *addrp,
1146 vm_size_t size)
3e170ce0 1147{
0a7de745 1148 return kmem_alloc_pageable(map, addrp, size, vm_tag_bt());
3e170ce0
A
1149}
1150
1151kern_return_t
1152kmem_alloc_pageable(
0a7de745
A
1153 vm_map_t map,
1154 vm_offset_t *addrp,
1155 vm_size_t size,
3e170ce0 1156 vm_tag_t tag)
1c79356b 1157{
91447636 1158 vm_map_offset_t map_addr;
0a7de745 1159 vm_map_size_t map_size;
1c79356b
A
1160 kern_return_t kr;
1161
1162#ifndef normal
fe8ab488 1163 map_addr = (vm_map_min(map)) + PAGE_SIZE;
1c79356b 1164#else
91447636 1165 map_addr = vm_map_min(map);
1c79356b 1166#endif
39236c6e 1167 map_size = vm_map_round_page(size,
0a7de745 1168 VM_MAP_PAGE_MASK(map));
5c9f4661
A
1169 if (map_size < size) {
1170 /* overflow */
1171 *addrp = 0;
1172 return KERN_INVALID_ARGUMENT;
1173 }
91447636
A
1174
1175 kr = vm_map_enter(map, &map_addr, map_size,
0a7de745
A
1176 (vm_map_offset_t) 0,
1177 VM_FLAGS_ANYWHERE,
1178 VM_MAP_KERNEL_FLAGS_NONE,
1179 tag,
1180 VM_OBJECT_NULL, (vm_object_offset_t) 0, FALSE,
1181 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1182
1183 if (kr != KERN_SUCCESS) {
1c79356b 1184 return kr;
0a7de745 1185 }
1c79356b 1186
5ba3f43e
A
1187#if KASAN
1188 kasan_notify_address(map_addr, map_size);
1189#endif
91447636 1190 *addrp = CAST_DOWN(vm_offset_t, map_addr);
1c79356b
A
1191 return KERN_SUCCESS;
1192}
1193
1194/*
1195 * kmem_free:
1196 *
1197 * Release a region of kernel virtual memory allocated
b0d623f7 1198 * with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable,
1c79356b
A
1199 * and return the physical pages associated with that region.
1200 */
1201
1202void
1203kmem_free(
0a7de745
A
1204 vm_map_t map,
1205 vm_offset_t addr,
1206 vm_size_t size)
1c79356b
A
1207{
1208 kern_return_t kr;
1209
b0d623f7
A
1210 assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS);
1211
2d21ac55
A
1212 TRACE_MACHLEAKS(KMEM_FREE_CODE, KMEM_FREE_CODE_2, size, addr);
1213
0a7de745 1214 if (size == 0) {
b0d623f7 1215#if MACH_ASSERT
0a7de745 1216 printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n", map, (uint64_t)addr);
b0d623f7
A
1217#endif
1218 return;
1219 }
1220
39236c6e 1221 kr = vm_map_remove(map,
0a7de745
A
1222 vm_map_trunc_page(addr,
1223 VM_MAP_PAGE_MASK(map)),
1224 vm_map_round_page(addr + size,
1225 VM_MAP_PAGE_MASK(map)),
1226 VM_MAP_REMOVE_KUNWIRE);
1227 if (kr != KERN_SUCCESS) {
1c79356b 1228 panic("kmem_free");
0a7de745 1229 }
1c79356b
A
1230}
1231
1232/*
b4c24cb9 1233 * Allocate new pages in an object.
1c79356b
A
1234 */
1235
1236kern_return_t
1237kmem_alloc_pages(
0a7de745
A
1238 vm_object_t object,
1239 vm_object_offset_t offset,
1240 vm_object_size_t size)
1c79356b 1241{
0a7de745 1242 vm_object_size_t alloc_size;
1c79356b 1243
91447636 1244 alloc_size = vm_object_round_page(size);
0a7de745 1245 vm_object_lock(object);
91447636 1246 while (alloc_size) {
0a7de745 1247 vm_page_t mem;
1c79356b 1248
1c79356b 1249
0a7de745
A
1250 /*
1251 * Allocate a page
1252 */
1253 while (VM_PAGE_NULL ==
1254 (mem = vm_page_alloc(object, offset))) {
1255 vm_object_unlock(object);
1256 VM_PAGE_WAIT();
1257 vm_object_lock(object);
1258 }
1259 mem->vmp_busy = FALSE;
1c79356b 1260
0a7de745
A
1261 alloc_size -= PAGE_SIZE;
1262 offset += PAGE_SIZE;
1c79356b 1263 }
b4c24cb9 1264 vm_object_unlock(object);
1c79356b
A
1265 return KERN_SUCCESS;
1266}
1267
1c79356b
A
1268/*
1269 * kmem_suballoc:
1270 *
1271 * Allocates a map to manage a subrange
1272 * of the kernel virtual address space.
1273 *
1274 * Arguments are as follows:
1275 *
1276 * parent Map to take range from
1277 * addr Address of start of range (IN/OUT)
1278 * size Size of range to find
1279 * pageable Can region be paged
1280 * anywhere Can region be located anywhere in map
1281 * new_map Pointer to new submap
1282 */
1283kern_return_t
1284kmem_suballoc(
0a7de745
A
1285 vm_map_t parent,
1286 vm_offset_t *addr,
1287 vm_size_t size,
1288 boolean_t pageable,
1289 int flags,
5ba3f43e
A
1290 vm_map_kernel_flags_t vmk_flags,
1291 vm_tag_t tag,
0a7de745 1292 vm_map_t *new_map)
1c79356b 1293{
0a7de745
A
1294 vm_map_t map;
1295 vm_map_offset_t map_addr;
1296 vm_map_size_t map_size;
1297 kern_return_t kr;
1c79356b 1298
39236c6e 1299 map_size = vm_map_round_page(size,
0a7de745 1300 VM_MAP_PAGE_MASK(parent));
5c9f4661
A
1301 if (map_size < size) {
1302 /* overflow */
1303 *addr = 0;
1304 return KERN_INVALID_ARGUMENT;
1305 }
1c79356b
A
1306
1307 /*
1308 * Need reference on submap object because it is internal
1309 * to the vm_system. vm_object_enter will never be called
1310 * on it (usual source of reference for vm_map_enter).
1311 */
1312 vm_object_reference(vm_submap_object);
1313
39236c6e 1314 map_addr = ((flags & VM_FLAGS_ANYWHERE)
0a7de745
A
1315 ? vm_map_min(parent)
1316 : vm_map_trunc_page(*addr,
1317 VM_MAP_PAGE_MASK(parent)));
91447636
A
1318
1319 kr = vm_map_enter(parent, &map_addr, map_size,
0a7de745
A
1320 (vm_map_offset_t) 0, flags, vmk_flags, tag,
1321 vm_submap_object, (vm_object_offset_t) 0, FALSE,
1322 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1c79356b
A
1323 if (kr != KERN_SUCCESS) {
1324 vm_object_deallocate(vm_submap_object);
0a7de745 1325 return kr;
1c79356b
A
1326 }
1327
1328 pmap_reference(vm_map_pmap(parent));
91447636 1329 map = vm_map_create(vm_map_pmap(parent), map_addr, map_addr + map_size, pageable);
0a7de745
A
1330 if (map == VM_MAP_NULL) {
1331 panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */
1332 }
39236c6e
A
1333 /* inherit the parent map's page size */
1334 vm_map_set_page_shift(map, VM_MAP_PAGE_SHIFT(parent));
1c79356b 1335
91447636 1336 kr = vm_map_submap(parent, map_addr, map_addr + map_size, map, map_addr, FALSE);
1c79356b
A
1337 if (kr != KERN_SUCCESS) {
1338 /*
1339 * See comment preceding vm_map_submap().
1340 */
d9a64523 1341 vm_map_remove(parent, map_addr, map_addr + map_size,
0a7de745
A
1342 VM_MAP_REMOVE_NO_FLAGS);
1343 vm_map_deallocate(map); /* also removes ref to pmap */
1c79356b 1344 vm_object_deallocate(vm_submap_object);
0a7de745 1345 return kr;
1c79356b 1346 }
91447636 1347 *addr = CAST_DOWN(vm_offset_t, map_addr);
1c79356b 1348 *new_map = map;
0a7de745 1349 return KERN_SUCCESS;
1c79356b 1350}
4ba76501
A
1351/*
1352 * The default percentage of memory that can be mlocked is scaled based on the total
1353 * amount of memory in the system. These percentages are caclulated
1354 * offline and stored in this table. We index this table by
1355 * log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range
1356 * [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t))
1357 *
1358 * Note that these values were picked for mac.
1359 * If we ever have very large memory config arm devices, we may want to revisit
1360 * since the kernel overhead is smaller there due to the larger page size.
1361 */
1362
1363/* Start scaling iff we're managing > 2^32 = 4GB of RAM. */
1364#define VM_USER_WIREABLE_MIN_CONFIG 32
1365static vm_map_size_t wire_limit_percents[] =
1366{ 70, 73, 76, 79, 82, 85, 88, 91, 94, 97};
1367
1368/*
1369 * Sets the default global user wire limit which limits the amount of
1370 * memory that can be locked via mlock() based on the above algorithm..
1371 * This can be overridden via a sysctl.
1372 */
1373static void
1374kmem_set_user_wire_limits(void)
1375{
1376 uint64_t available_mem_log;
1377 uint64_t max_wire_percent;
1378 size_t wire_limit_percents_length = sizeof(wire_limit_percents) /
1379 sizeof(vm_map_size_t);
1380 vm_map_size_t limit;
1381 available_mem_log = bit_floor(max_mem);
1382
1383 if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) {
1384 available_mem_log = 0;
1385 } else {
1386 available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG;
1387 }
1388 if (available_mem_log >= wire_limit_percents_length) {
1389 available_mem_log = wire_limit_percents_length - 1;
1390 }
1391 max_wire_percent = wire_limit_percents[available_mem_log];
1392
1393 limit = max_mem * max_wire_percent / 100;
1394 /* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */
1395 if (max_mem - limit > VM_NOT_USER_WIREABLE_MAX) {
1396 limit = max_mem - VM_NOT_USER_WIREABLE_MAX;
1397 }
1398
1399 vm_global_user_wire_limit = limit;
1400 /* the default per task limit is the same as the global limit */
1401 vm_per_task_user_wire_limit = limit;
1402}
1403
1c79356b
A
1404
1405/*
1406 * kmem_init:
1407 *
1408 * Initialize the kernel's virtual memory map, taking
1409 * into account all memory allocated up to this time.
1410 */
1411void
1412kmem_init(
0a7de745
A
1413 vm_offset_t start,
1414 vm_offset_t end)
1c79356b 1415{
91447636
A
1416 vm_map_offset_t map_start;
1417 vm_map_offset_t map_end;
5ba3f43e
A
1418 vm_map_kernel_flags_t vmk_flags;
1419
1420 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1421 vmk_flags.vmkf_permanent = TRUE;
1422 vmk_flags.vmkf_no_pmap_check = TRUE;
91447636 1423
39236c6e 1424 map_start = vm_map_trunc_page(start,
0a7de745 1425 VM_MAP_PAGE_MASK(kernel_map));
39236c6e 1426 map_end = vm_map_round_page(end,
0a7de745 1427 VM_MAP_PAGE_MASK(kernel_map));
91447636 1428
0a7de745
A
1429#if defined(__arm__) || defined(__arm64__)
1430 kernel_map = vm_map_create(pmap_kernel(), VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1431 VM_MAX_KERNEL_ADDRESS, FALSE);
5ba3f43e
A
1432 /*
1433 * Reserve virtual memory allocated up to this time.
1434 */
1435 {
0a7de745
A
1436 unsigned int region_select = 0;
1437 vm_map_offset_t region_start;
1438 vm_map_size_t region_size;
5ba3f43e
A
1439 vm_map_offset_t map_addr;
1440 kern_return_t kr;
1441
1442 while (pmap_virtual_region(region_select, &region_start, &region_size)) {
5ba3f43e
A
1443 map_addr = region_start;
1444 kr = vm_map_enter(kernel_map, &map_addr,
0a7de745
A
1445 vm_map_round_page(region_size,
1446 VM_MAP_PAGE_MASK(kernel_map)),
1447 (vm_map_offset_t) 0,
1448 VM_FLAGS_FIXED,
1449 vmk_flags,
1450 VM_KERN_MEMORY_NONE,
1451 VM_OBJECT_NULL,
1452 (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
1453 VM_INHERIT_DEFAULT);
5ba3f43e
A
1454
1455 if (kr != KERN_SUCCESS) {
1456 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n",
0a7de745
A
1457 (uint64_t) start, (uint64_t) end, (uint64_t) region_start,
1458 (uint64_t) region_size, kr);
1459 }
5ba3f43e
A
1460
1461 region_select++;
0a7de745 1462 }
5ba3f43e
A
1463 }
1464#else
0a7de745
A
1465 kernel_map = vm_map_create(pmap_kernel(), VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1466 map_end, FALSE);
1c79356b
A
1467 /*
1468 * Reserve virtual memory allocated up to this time.
1469 */
6d2010ae 1470 if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
91447636 1471 vm_map_offset_t map_addr;
6d2010ae 1472 kern_return_t kr;
0a7de745 1473
5ba3f43e
A
1474 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1475 vmk_flags.vmkf_no_pmap_check = TRUE;
1476
6d2010ae
A
1477 map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
1478 kr = vm_map_enter(kernel_map,
0a7de745
A
1479 &map_addr,
1480 (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1481 (vm_map_offset_t) 0,
1482 VM_FLAGS_FIXED,
1483 vmk_flags,
1484 VM_KERN_MEMORY_NONE,
1485 VM_OBJECT_NULL,
1486 (vm_object_offset_t) 0, FALSE,
1487 VM_PROT_NONE, VM_PROT_NONE,
1488 VM_INHERIT_DEFAULT);
1489
6d2010ae
A
1490 if (kr != KERN_SUCCESS) {
1491 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n",
0a7de745
A
1492 (uint64_t) start, (uint64_t) end,
1493 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1494 (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1495 kr);
1496 }
1c79356b 1497 }
5ba3f43e 1498#endif
6d2010ae 1499
4ba76501 1500 kmem_set_user_wire_limits();
1c79356b
A
1501}
1502
1c79356b
A
1503/*
1504 * Routine: copyinmap
1505 * Purpose:
1506 * Like copyin, except that fromaddr is an address
1507 * in the specified VM map. This implementation
1508 * is incomplete; it handles the current user map
1509 * and the kernel map/submaps.
1510 */
91447636 1511kern_return_t
1c79356b 1512copyinmap(
0a7de745
A
1513 vm_map_t map,
1514 vm_map_offset_t fromaddr,
1515 void *todata,
1516 vm_size_t length)
1c79356b 1517{
0a7de745 1518 kern_return_t kr = KERN_SUCCESS;
91447636
A
1519 vm_map_t oldmap;
1520
0a7de745 1521 if (vm_map_pmap(map) == pmap_kernel()) {
1c79356b 1522 /* assume a correct copy */
91447636 1523 memcpy(todata, CAST_DOWN(void *, fromaddr), length);
0a7de745
A
1524 } else if (current_map() == map) {
1525 if (copyin(fromaddr, todata, length) != 0) {
91447636 1526 kr = KERN_INVALID_ADDRESS;
0a7de745
A
1527 }
1528 } else {
91447636
A
1529 vm_map_reference(map);
1530 oldmap = vm_map_switch(map);
0a7de745 1531 if (copyin(fromaddr, todata, length) != 0) {
91447636 1532 kr = KERN_INVALID_ADDRESS;
0a7de745 1533 }
91447636
A
1534 vm_map_switch(oldmap);
1535 vm_map_deallocate(map);
1536 }
1537 return kr;
1c79356b
A
1538}
1539
1540/*
1541 * Routine: copyoutmap
1542 * Purpose:
1543 * Like copyout, except that toaddr is an address
1544 * in the specified VM map. This implementation
1545 * is incomplete; it handles the current user map
1546 * and the kernel map/submaps.
1547 */
91447636 1548kern_return_t
1c79356b 1549copyoutmap(
0a7de745
A
1550 vm_map_t map,
1551 void *fromdata,
1552 vm_map_address_t toaddr,
1553 vm_size_t length)
1c79356b
A
1554{
1555 if (vm_map_pmap(map) == pmap_kernel()) {
1556 /* assume a correct copy */
91447636
A
1557 memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
1558 return KERN_SUCCESS;
1c79356b
A
1559 }
1560
0a7de745 1561 if (current_map() != map) {
91447636 1562 return KERN_NOT_SUPPORTED;
0a7de745 1563 }
91447636 1564
0a7de745 1565 if (copyout(fromdata, toaddr, length) != 0) {
91447636 1566 return KERN_INVALID_ADDRESS;
0a7de745 1567 }
1c79356b 1568
91447636 1569 return KERN_SUCCESS;
1c79356b 1570}
9bccf70c 1571
3e170ce0
A
1572/*
1573 *
1574 * The following two functions are to be used when exposing kernel
1575 * addresses to userspace via any of the various debug or info
1576 * facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
1577 * and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
1578 * are exported to KEXTs.
1579 *
1580 * NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
1581 */
1582
5ba3f43e
A
1583static void
1584vm_kernel_addrhash_internal(
3e170ce0 1585 vm_offset_t addr,
5ba3f43e
A
1586 vm_offset_t *hash_addr,
1587 uint64_t salt)
3e170ce0 1588{
5ba3f43e
A
1589 assert(salt != 0);
1590
3e170ce0 1591 if (addr == 0) {
5ba3f43e 1592 *hash_addr = 0;
3e170ce0
A
1593 return;
1594 }
1595
5ba3f43e
A
1596 if (VM_KERNEL_IS_SLID(addr)) {
1597 *hash_addr = VM_KERNEL_UNSLIDE(addr);
1598 return;
1599 }
1600
0a7de745 1601 vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
5ba3f43e
A
1602 SHA256_CTX sha_ctx;
1603
1604 SHA256_Init(&sha_ctx);
1605 SHA256_Update(&sha_ctx, &salt, sizeof(salt));
1606 SHA256_Update(&sha_ctx, &addr, sizeof(addr));
1607 SHA256_Final(sha_digest, &sha_ctx);
1608
1609 *hash_addr = sha_digest[0];
1610}
1611
1612void
1613vm_kernel_addrhash_external(
1614 vm_offset_t addr,
1615 vm_offset_t *hash_addr)
1616{
1617 return vm_kernel_addrhash_internal(addr, hash_addr, vm_kernel_addrhash_salt_ext);
1618}
1619
1620vm_offset_t
1621vm_kernel_addrhash(vm_offset_t addr)
1622{
1623 vm_offset_t hash_addr;
1624 vm_kernel_addrhash_internal(addr, &hash_addr, vm_kernel_addrhash_salt);
1625 return hash_addr;
1626}
1627
1628void
1629vm_kernel_addrhide(
1630 vm_offset_t addr,
1631 vm_offset_t *hide_addr)
1632{
1633 *hide_addr = VM_KERNEL_ADDRHIDE(addr);
3e170ce0
A
1634}
1635
1636/*
5ba3f43e 1637 * vm_kernel_addrperm_external:
3e170ce0
A
1638 * vm_kernel_unslide_or_perm_external:
1639 *
5ba3f43e 1640 * Use these macros when exposing an address to userspace that could come from
3e170ce0
A
1641 * either kernel text/data *or* the heap.
1642 */
1643void
5ba3f43e 1644vm_kernel_addrperm_external(
3e170ce0 1645 vm_offset_t addr,
5ba3f43e 1646 vm_offset_t *perm_addr)
3e170ce0 1647{
39037602 1648 if (VM_KERNEL_IS_SLID(addr)) {
5ba3f43e
A
1649 *perm_addr = VM_KERNEL_UNSLIDE(addr);
1650 } else if (VM_KERNEL_ADDRESS(addr)) {
1651 *perm_addr = addr + vm_kernel_addrperm_ext;
1652 } else {
1653 *perm_addr = addr;
3e170ce0 1654 }
5ba3f43e 1655}
3e170ce0 1656
5ba3f43e
A
1657void
1658vm_kernel_unslide_or_perm_external(
1659 vm_offset_t addr,
1660 vm_offset_t *up_addr)
1661{
3e170ce0 1662 vm_kernel_addrperm_external(addr, up_addr);
3e170ce0 1663}