]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_kern.c
xnu-6153.41.3.tar.gz
[apple/xnu.git] / osfmk / vm / vm_kern.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
0a7de745 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
0a7de745 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
0a7de745 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
0a7de745 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
0a7de745 31/*
1c79356b
A
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
0a7de745 35 *
1c79356b
A
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
0a7de745 41 *
1c79356b
A
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
0a7de745 45 *
1c79356b 46 * Carnegie Mellon requests users of this software to return to
0a7de745 47 *
1c79356b
A
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
0a7de745 52 *
1c79356b
A
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_kern.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Kernel memory management.
64 */
65
1c79356b
A
66#include <mach/kern_return.h>
67#include <mach/vm_param.h>
68#include <kern/assert.h>
1c79356b
A
69#include <kern/thread.h>
70#include <vm/vm_kern.h>
71#include <vm/vm_map.h>
72#include <vm/vm_object.h>
73#include <vm/vm_page.h>
5ba3f43e 74#include <vm/vm_compressor.h>
1c79356b
A
75#include <vm/vm_pageout.h>
76#include <kern/misc_protos.h>
77#include <vm/cpm.h>
0a7de745 78#include <kern/ledger.h>
1c79356b
A
79
80#include <string.h>
2d21ac55
A
81
82#include <libkern/OSDebug.h>
5ba3f43e 83#include <libkern/crypto/sha2.h>
cb323159 84#include <libkern/section_keywords.h>
2d21ac55
A
85#include <sys/kdebug.h>
86
5ba3f43e
A
87#include <san/kasan.h>
88
1c79356b
A
89/*
90 * Variables exported by this module.
91 */
92
cb323159
A
93SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
94vm_map_t kernel_pageable_map;
1c79356b 95
2d21ac55
A
96extern boolean_t vm_kernel_ready;
97
1c79356b
A
98/*
99 * Forward declarations for internal functions.
100 */
101extern kern_return_t kmem_alloc_pages(
0a7de745
A
102 vm_object_t object,
103 vm_object_offset_t offset,
104 vm_object_size_t size);
1c79356b 105
1c79356b
A
106kern_return_t
107kmem_alloc_contig(
0a7de745
A
108 vm_map_t map,
109 vm_offset_t *addrp,
110 vm_size_t size,
111 vm_offset_t mask,
112 ppnum_t max_pnum,
113 ppnum_t pnum_mask,
114 int flags,
3e170ce0 115 vm_tag_t tag)
1c79356b 116{
0a7de745
A
117 vm_object_t object;
118 vm_object_offset_t offset;
119 vm_map_offset_t map_addr;
120 vm_map_offset_t map_mask;
121 vm_map_size_t map_size, i;
122 vm_map_entry_t entry;
123 vm_page_t m, pages;
124 kern_return_t kr;
125
126 assert(VM_KERN_MEMORY_NONE != tag);
127
128 if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) {
1c79356b 129 return KERN_INVALID_ARGUMENT;
0a7de745 130 }
316670eb 131
39236c6e 132 map_size = vm_map_round_page(size,
0a7de745 133 VM_MAP_PAGE_MASK(map));
316670eb 134 map_mask = (vm_map_offset_t)mask;
0a7de745 135
316670eb
A
136 /* Check for zero allocation size (either directly or via overflow) */
137 if (map_size == 0) {
1c79356b
A
138 *addrp = 0;
139 return KERN_INVALID_ARGUMENT;
140 }
141
91447636
A
142 /*
143 * Allocate a new object (if necessary) and the reference we
144 * will be donating to the map entry. We must do this before
145 * locking the map, or risk deadlock with the default pager.
146 */
147 if ((flags & KMA_KOBJECT) != 0) {
148 object = kernel_object;
149 vm_object_reference(object);
1c79356b 150 } else {
91447636 151 object = vm_object_allocate(map_size);
1c79356b
A
152 }
153
5ba3f43e 154 kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0,
0a7de745 155 VM_MAP_KERNEL_FLAGS_NONE, tag, &entry);
91447636
A
156 if (KERN_SUCCESS != kr) {
157 vm_object_deallocate(object);
1c79356b
A
158 return kr;
159 }
160
3e170ce0
A
161 if (object == kernel_object) {
162 offset = map_addr;
163 } else {
164 offset = 0;
165 }
166 VME_OBJECT_SET(entry, object);
167 VME_OFFSET_SET(entry, offset);
91447636
A
168
169 /* Take an extra object ref in case the map entry gets deleted */
170 vm_object_reference(object);
1c79356b
A
171 vm_map_unlock(map);
172
b0d623f7 173 kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags);
1c79356b
A
174
175 if (kr != KERN_SUCCESS) {
39236c6e 176 vm_map_remove(map,
0a7de745
A
177 vm_map_trunc_page(map_addr,
178 VM_MAP_PAGE_MASK(map)),
179 vm_map_round_page(map_addr + map_size,
180 VM_MAP_PAGE_MASK(map)),
181 VM_MAP_REMOVE_NO_FLAGS);
91447636 182 vm_object_deallocate(object);
1c79356b
A
183 *addrp = 0;
184 return kr;
185 }
186
187 vm_object_lock(object);
91447636 188 for (i = 0; i < map_size; i += PAGE_SIZE) {
1c79356b
A
189 m = pages;
190 pages = NEXT_PAGE(m);
0c530ab8 191 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
d9a64523 192 m->vmp_busy = FALSE;
1c79356b
A
193 vm_page_insert(m, object, offset + i);
194 }
195 vm_object_unlock(object);
196
5ba3f43e 197 kr = vm_map_wire_kernel(map,
0a7de745
A
198 vm_map_trunc_page(map_addr,
199 VM_MAP_PAGE_MASK(map)),
200 vm_map_round_page(map_addr + map_size,
201 VM_MAP_PAGE_MASK(map)),
202 VM_PROT_DEFAULT, tag,
203 FALSE);
3e170ce0 204
39236c6e 205 if (kr != KERN_SUCCESS) {
1c79356b
A
206 if (object == kernel_object) {
207 vm_object_lock(object);
91447636 208 vm_object_page_remove(object, offset, offset + map_size);
1c79356b
A
209 vm_object_unlock(object);
210 }
39236c6e 211 vm_map_remove(map,
0a7de745
A
212 vm_map_trunc_page(map_addr,
213 VM_MAP_PAGE_MASK(map)),
214 vm_map_round_page(map_addr + map_size,
215 VM_MAP_PAGE_MASK(map)),
216 VM_MAP_REMOVE_NO_FLAGS);
91447636 217 vm_object_deallocate(object);
1c79356b
A
218 return kr;
219 }
91447636
A
220 vm_object_deallocate(object);
221
5ba3f43e 222 if (object == kernel_object) {
91447636 223 vm_map_simplify(map, map_addr);
0a7de745
A
224 vm_tag_update_size(tag, map_size);
225 }
b0d623f7
A
226 *addrp = (vm_offset_t) map_addr;
227 assert((vm_map_offset_t) *addrp == map_addr);
5ba3f43e 228
1c79356b
A
229 return KERN_SUCCESS;
230}
231
232/*
233 * Master entry point for allocating kernel memory.
234 * NOTE: this routine is _never_ interrupt safe.
235 *
236 * map : map to allocate into
237 * addrp : pointer to start address of new memory
238 * size : size of memory requested
239 * flags : options
240 * KMA_HERE *addrp is base address, else "anywhere"
241 * KMA_NOPAGEWAIT don't wait for pages if unavailable
242 * KMA_KOBJECT use kernel_object
0c530ab8
A
243 * KMA_LOMEM support for 32 bit devices in a 64 bit world
244 * if set and a lomemory pool is available
245 * grab pages from it... this also implies
246 * KMA_NOPAGEWAIT
1c79356b
A
247 */
248
249kern_return_t
250kernel_memory_allocate(
0a7de745
A
251 vm_map_t map,
252 vm_offset_t *addrp,
253 vm_size_t size,
254 vm_offset_t mask,
255 int flags,
3e170ce0 256 vm_tag_t tag)
1c79356b 257{
0a7de745
A
258 vm_object_t object;
259 vm_object_offset_t offset;
260 vm_object_offset_t pg_offset;
261 vm_map_entry_t entry = NULL;
262 vm_map_offset_t map_addr, fill_start;
263 vm_map_offset_t map_mask;
264 vm_map_size_t map_size, fill_size;
265 kern_return_t kr, pe_result;
266 vm_page_t mem;
267 vm_page_t guard_page_list = NULL;
268 vm_page_t wired_page_list = NULL;
269 int guard_page_count = 0;
270 int wired_page_count = 0;
271 int page_grab_count = 0;
272 int i;
273 int vm_alloc_flags;
274 vm_map_kernel_flags_t vmk_flags;
275 vm_prot_t kma_prot;
276#if DEVELOPMENT || DEBUG
277 task_t task = current_task();
278#endif /* DEVELOPMENT || DEBUG */
279
280 if (!vm_kernel_ready) {
2d21ac55
A
281 panic("kernel_memory_allocate: VM is not ready");
282 }
1c79356b 283
39236c6e 284 map_size = vm_map_round_page(size,
0a7de745 285 VM_MAP_PAGE_MASK(map));
91447636 286 map_mask = (vm_map_offset_t) mask;
3e170ce0 287
5ba3f43e
A
288 vm_alloc_flags = 0; //VM_MAKE_TAG(tag);
289 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
2d21ac55 290
316670eb
A
291 /* Check for zero allocation size (either directly or via overflow) */
292 if (map_size == 0) {
293 *addrp = 0;
294 return KERN_INVALID_ARGUMENT;
295 }
b0d623f7
A
296
297 /*
298 * limit the size of a single extent of wired memory
299 * to try and limit the damage to the system if
300 * too many pages get wired down
4bd07ac2
A
301 * limit raised to 2GB with 128GB max physical limit,
302 * but scaled by installed memory above this
b0d623f7 303 */
0a7de745
A
304 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
305 map_size > MAX(1ULL << 31, sane_size / 64)) {
306 return KERN_RESOURCE_SHORTAGE;
307 }
b0d623f7 308
2d21ac55
A
309 /*
310 * Guard pages:
311 *
312 * Guard pages are implemented as ficticious pages. By placing guard pages
313 * on either end of a stack, they can help detect cases where a thread walks
314 * off either end of its stack. They are allocated and set up here and attempts
315 * to access those pages are trapped in vm_fault_page().
316 *
317 * The map_size we were passed may include extra space for
318 * guard pages. If those were requested, then back it out of fill_size
319 * since vm_map_find_space() takes just the actual size not including
320 * guard pages. Similarly, fill_start indicates where the actual pages
321 * will begin in the range.
322 */
323
324 fill_start = 0;
325 fill_size = map_size;
b0d623f7 326
2d21ac55 327 if (flags & KMA_GUARD_FIRST) {
5ba3f43e 328 vmk_flags.vmkf_guard_before = TRUE;
2d21ac55
A
329 fill_start += PAGE_SIZE_64;
330 fill_size -= PAGE_SIZE_64;
331 if (map_size < fill_start + fill_size) {
332 /* no space for a guard page */
333 *addrp = 0;
334 return KERN_INVALID_ARGUMENT;
335 }
b0d623f7 336 guard_page_count++;
2d21ac55
A
337 }
338 if (flags & KMA_GUARD_LAST) {
5ba3f43e 339 vmk_flags.vmkf_guard_after = TRUE;
2d21ac55
A
340 fill_size -= PAGE_SIZE_64;
341 if (map_size <= fill_start + fill_size) {
342 /* no space for a guard page */
343 *addrp = 0;
344 return KERN_INVALID_ARGUMENT;
345 }
b0d623f7
A
346 guard_page_count++;
347 }
348 wired_page_count = (int) (fill_size / PAGE_SIZE_64);
349 assert(wired_page_count * PAGE_SIZE_64 == fill_size);
350
d9a64523
A
351#if DEBUG || DEVELOPMENT
352 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START, size, 0, 0, 0);
353#endif
354
b0d623f7
A
355 for (i = 0; i < guard_page_count; i++) {
356 for (;;) {
357 mem = vm_page_grab_guard();
358
0a7de745 359 if (mem != VM_PAGE_NULL) {
b0d623f7 360 break;
0a7de745 361 }
b0d623f7
A
362 if (flags & KMA_NOPAGEWAIT) {
363 kr = KERN_RESOURCE_SHORTAGE;
364 goto out;
365 }
366 vm_page_more_fictitious();
367 }
d9a64523 368 mem->vmp_snext = guard_page_list;
b0d623f7
A
369 guard_page_list = mem;
370 }
371
d9a64523 372 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
0a7de745 373 for (i = 0; i < wired_page_count; i++) {
0a7de745
A
374 for (;;) {
375 if (flags & KMA_LOMEM) {
376 mem = vm_page_grablo();
377 } else {
378 mem = vm_page_grab();
379 }
b0d623f7 380
0a7de745
A
381 if (mem != VM_PAGE_NULL) {
382 break;
383 }
384
385 if (flags & KMA_NOPAGEWAIT) {
386 kr = KERN_RESOURCE_SHORTAGE;
387 goto out;
388 }
389 if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) {
390 kr = KERN_RESOURCE_SHORTAGE;
391 goto out;
392 }
0a7de745 393
cb323159
A
394 /* VM privileged threads should have waited in vm_page_grab() and not get here. */
395 assert(!(current_thread()->options & TH_OPT_VMPRIV));
396
397 uint64_t unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE;
0a7de745
A
398 if (unavailable > max_mem || map_size > (max_mem - unavailable)) {
399 kr = KERN_RESOURCE_SHORTAGE;
400 goto out;
401 }
402 VM_PAGE_WAIT();
0b4c1975 403 }
0a7de745
A
404 page_grab_count++;
405 if (KMA_ZERO & flags) {
406 vm_page_zero_fill(mem);
b0d623f7 407 }
0a7de745
A
408 mem->vmp_snext = wired_page_list;
409 wired_page_list = mem;
b0d623f7 410 }
39236c6e 411 }
91447636
A
412
413 /*
414 * Allocate a new object (if necessary). We must do this before
415 * locking the map, or risk deadlock with the default pager.
416 */
417 if ((flags & KMA_KOBJECT) != 0) {
1c79356b 418 object = kernel_object;
91447636 419 vm_object_reference(object);
39236c6e
A
420 } else if ((flags & KMA_COMPRESSOR) != 0) {
421 object = compressor_object;
422 vm_object_reference(object);
91447636
A
423 } else {
424 object = vm_object_allocate(map_size);
1c79356b 425 }
91447636 426
0a7de745 427 if (flags & KMA_ATOMIC) {
5ba3f43e 428 vmk_flags.vmkf_atomic_entry = TRUE;
0a7de745 429 }
5ba3f43e 430
2d21ac55 431 kr = vm_map_find_space(map, &map_addr,
0a7de745
A
432 fill_size, map_mask,
433 vm_alloc_flags, vmk_flags, tag, &entry);
91447636
A
434 if (KERN_SUCCESS != kr) {
435 vm_object_deallocate(object);
b0d623f7 436 goto out;
1c79356b 437 }
2d21ac55 438
3e170ce0
A
439 if (object == kernel_object || object == compressor_object) {
440 offset = map_addr;
441 } else {
442 offset = 0;
443 }
444 VME_OBJECT_SET(entry, object);
445 VME_OFFSET_SET(entry, offset);
0a7de745
A
446
447 if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
39236c6e 448 entry->wired_count++;
0a7de745 449 }
b0d623f7 450
0a7de745 451 if (flags & KMA_PERMANENT) {
b0d623f7 452 entry->permanent = TRUE;
0a7de745 453 }
b0d623f7 454
0a7de745 455 if (object != kernel_object && object != compressor_object) {
b0d623f7 456 vm_object_reference(object);
0a7de745 457 }
1c79356b
A
458
459 vm_object_lock(object);
b0d623f7 460 vm_map_unlock(map);
1c79356b 461
b0d623f7
A
462 pg_offset = 0;
463
464 if (fill_start) {
0a7de745 465 if (guard_page_list == NULL) {
b0d623f7 466 panic("kernel_memory_allocate: guard_page_list == NULL");
0a7de745 467 }
b0d623f7
A
468
469 mem = guard_page_list;
d9a64523
A
470 guard_page_list = mem->vmp_snext;
471 mem->vmp_snext = NULL;
b0d623f7
A
472
473 vm_page_insert(mem, object, offset + pg_offset);
2d21ac55 474
d9a64523 475 mem->vmp_busy = FALSE;
b0d623f7 476 pg_offset += PAGE_SIZE_64;
2d21ac55 477 }
316670eb
A
478
479 kma_prot = VM_PROT_READ | VM_PROT_WRITE;
480
5ba3f43e
A
481#if KASAN
482 if (!(flags & KMA_VAONLY)) {
483 /* for VAONLY mappings we notify in populate only */
484 kasan_notify_address(map_addr, size);
485 }
486#endif
487
d9a64523 488 if (flags & (KMA_VAONLY | KMA_PAGEABLE)) {
39236c6e
A
489 pg_offset = fill_start + fill_size;
490 } else {
0a7de745
A
491 for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) {
492 if (wired_page_list == NULL) {
493 panic("kernel_memory_allocate: wired_page_list == NULL");
494 }
2d21ac55 495
0a7de745
A
496 mem = wired_page_list;
497 wired_page_list = mem->vmp_snext;
498 mem->vmp_snext = NULL;
39037602 499
0a7de745
A
500 assert(mem->vmp_wire_count == 0);
501 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
39037602 502
0a7de745
A
503 mem->vmp_q_state = VM_PAGE_IS_WIRED;
504 mem->vmp_wire_count++;
505 if (__improbable(mem->vmp_wire_count == 0)) {
506 panic("kernel_memory_allocate(%p): wire_count overflow",
507 mem);
508 }
2d21ac55 509
0a7de745 510 vm_page_insert_wired(mem, object, offset + pg_offset, tag);
0c530ab8 511
0a7de745
A
512 mem->vmp_busy = FALSE;
513 mem->vmp_pmapped = TRUE;
514 mem->vmp_wpmapped = TRUE;
b0d623f7 515
0a7de745
A
516 PMAP_ENTER_OPTIONS(kernel_pmap, map_addr + pg_offset, mem,
517 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
518 PMAP_OPTIONS_NOWAIT, pe_result);
39236c6e 519
0a7de745
A
520 if (pe_result == KERN_RESOURCE_SHORTAGE) {
521 vm_object_unlock(object);
0b4c1975 522
0a7de745
A
523 PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem,
524 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
525 pe_result);
39236c6e 526
0a7de745
A
527 vm_object_lock(object);
528 }
5ba3f43e 529
0a7de745 530 assert(pe_result == KERN_SUCCESS);
5ba3f43e 531
0a7de745
A
532 if (flags & KMA_NOENCRYPT) {
533 bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE);
0b4c1975 534
0a7de745
A
535 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
536 }
537 }
538 if (kernel_object == object) {
539 vm_tag_update_size(tag, fill_size);
0b4c1975 540 }
39236c6e 541 }
b0d623f7 542 if ((fill_start + fill_size) < map_size) {
0a7de745 543 if (guard_page_list == NULL) {
b0d623f7 544 panic("kernel_memory_allocate: guard_page_list == NULL");
0a7de745 545 }
1c79356b 546
b0d623f7 547 mem = guard_page_list;
d9a64523
A
548 guard_page_list = mem->vmp_snext;
549 mem->vmp_snext = NULL;
b0d623f7
A
550
551 vm_page_insert(mem, object, offset + pg_offset);
2d21ac55 552
d9a64523 553 mem->vmp_busy = FALSE;
1c79356b 554 }
0a7de745 555 if (guard_page_list || wired_page_list) {
b0d623f7 556 panic("kernel_memory_allocate: non empty list\n");
0a7de745 557 }
2d21ac55 558
d9a64523 559 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
0a7de745
A
560 vm_page_lockspin_queues();
561 vm_page_wire_count += wired_page_count;
562 vm_page_unlock_queues();
39236c6e 563 }
2d21ac55 564
b0d623f7
A
565 vm_object_unlock(object);
566
567 /*
568 * now that the pages are wired, we no longer have to fear coalesce
569 */
0a7de745 570 if (object == kernel_object || object == compressor_object) {
91447636 571 vm_map_simplify(map, map_addr);
0a7de745 572 } else {
b0d623f7 573 vm_object_deallocate(object);
0a7de745 574 }
1c79356b 575
d9a64523
A
576#if DEBUG || DEVELOPMENT
577 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
0a7de745
A
578 if (task != NULL) {
579 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
580 }
d9a64523
A
581#endif
582
1c79356b
A
583 /*
584 * Return the memory, not zeroed.
585 */
91447636 586 *addrp = CAST_DOWN(vm_offset_t, map_addr);
1c79356b 587 return KERN_SUCCESS;
2d21ac55 588
b0d623f7 589out:
0a7de745 590 if (guard_page_list) {
b0d623f7 591 vm_page_free_list(guard_page_list, FALSE);
0a7de745 592 }
b0d623f7 593
0a7de745 594 if (wired_page_list) {
b0d623f7 595 vm_page_free_list(wired_page_list, FALSE);
0a7de745 596 }
b0d623f7 597
d9a64523
A
598#if DEBUG || DEVELOPMENT
599 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
0a7de745
A
600 if (task != NULL && kr == KERN_SUCCESS) {
601 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
602 }
d9a64523
A
603#endif
604
b0d623f7 605 return kr;
1c79356b
A
606}
607
39236c6e
A
608kern_return_t
609kernel_memory_populate(
0a7de745
A
610 vm_map_t map,
611 vm_offset_t addr,
612 vm_size_t size,
613 int flags,
3e170ce0 614 vm_tag_t tag)
39236c6e 615{
0a7de745
A
616 vm_object_t object;
617 vm_object_offset_t offset, pg_offset;
618 kern_return_t kr, pe_result;
619 vm_page_t mem;
620 vm_page_t page_list = NULL;
621 int page_count = 0;
622 int page_grab_count = 0;
623 int i;
39236c6e 624
d9a64523 625#if DEBUG || DEVELOPMENT
0a7de745 626 task_t task = current_task();
d9a64523
A
627 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START, size, 0, 0, 0);
628#endif
629
39236c6e
A
630 page_count = (int) (size / PAGE_SIZE_64);
631
0a7de745 632 assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
39236c6e
A
633
634 if (flags & KMA_COMPRESSOR) {
3e170ce0
A
635 pg_offset = page_count * PAGE_SIZE_64;
636
637 do {
39236c6e
A
638 for (;;) {
639 mem = vm_page_grab();
640
0a7de745 641 if (mem != VM_PAGE_NULL) {
39236c6e 642 break;
0a7de745
A
643 }
644
39236c6e
A
645 VM_PAGE_WAIT();
646 }
d9a64523 647 page_grab_count++;
0a7de745
A
648 if (KMA_ZERO & flags) {
649 vm_page_zero_fill(mem);
650 }
d9a64523 651 mem->vmp_snext = page_list;
39236c6e 652 page_list = mem;
3e170ce0
A
653
654 pg_offset -= PAGE_SIZE_64;
655
656 kr = pmap_enter_options(kernel_pmap,
0a7de745
A
657 addr + pg_offset, VM_PAGE_GET_PHYS_PAGE(mem),
658 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE,
659 PMAP_OPTIONS_INTERNAL, NULL);
3e170ce0 660 assert(kr == KERN_SUCCESS);
3e170ce0
A
661 } while (pg_offset);
662
39236c6e
A
663 offset = addr;
664 object = compressor_object;
665
666 vm_object_lock(object);
667
668 for (pg_offset = 0;
0a7de745
A
669 pg_offset < size;
670 pg_offset += PAGE_SIZE_64) {
39236c6e 671 mem = page_list;
d9a64523
A
672 page_list = mem->vmp_snext;
673 mem->vmp_snext = NULL;
39236c6e
A
674
675 vm_page_insert(mem, object, offset + pg_offset);
d9a64523 676 assert(mem->vmp_busy);
39236c6e 677
d9a64523
A
678 mem->vmp_busy = FALSE;
679 mem->vmp_pmapped = TRUE;
680 mem->vmp_wpmapped = TRUE;
681 mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
39236c6e
A
682 }
683 vm_object_unlock(object);
684
5ba3f43e
A
685#if KASAN
686 if (map == compressor_map) {
687 kasan_notify_address_nopoison(addr, size);
688 } else {
689 kasan_notify_address(addr, size);
690 }
691#endif
d9a64523
A
692
693#if DEBUG || DEVELOPMENT
694 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
0a7de745
A
695 if (task != NULL) {
696 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
697 }
d9a64523 698#endif
39236c6e
A
699 return KERN_SUCCESS;
700 }
701
702 for (i = 0; i < page_count; i++) {
703 for (;;) {
0a7de745 704 if (flags & KMA_LOMEM) {
39236c6e 705 mem = vm_page_grablo();
0a7de745 706 } else {
39236c6e 707 mem = vm_page_grab();
0a7de745
A
708 }
709
710 if (mem != VM_PAGE_NULL) {
39236c6e 711 break;
0a7de745 712 }
39236c6e
A
713
714 if (flags & KMA_NOPAGEWAIT) {
715 kr = KERN_RESOURCE_SHORTAGE;
716 goto out;
717 }
718 if ((flags & KMA_LOMEM) &&
719 (vm_lopage_needed == TRUE)) {
720 kr = KERN_RESOURCE_SHORTAGE;
721 goto out;
722 }
723 VM_PAGE_WAIT();
724 }
d9a64523 725 page_grab_count++;
0a7de745
A
726 if (KMA_ZERO & flags) {
727 vm_page_zero_fill(mem);
728 }
d9a64523 729 mem->vmp_snext = page_list;
39236c6e
A
730 page_list = mem;
731 }
732 if (flags & KMA_KOBJECT) {
733 offset = addr;
734 object = kernel_object;
735
736 vm_object_lock(object);
737 } else {
738 /*
739 * If it's not the kernel object, we need to:
740 * lock map;
741 * lookup entry;
742 * lock object;
743 * take reference on object;
744 * unlock map;
745 */
746 panic("kernel_memory_populate(%p,0x%llx,0x%llx,0x%x): "
0a7de745
A
747 "!KMA_KOBJECT",
748 map, (uint64_t) addr, (uint64_t) size, flags);
39236c6e
A
749 }
750
751 for (pg_offset = 0;
0a7de745
A
752 pg_offset < size;
753 pg_offset += PAGE_SIZE_64) {
754 if (page_list == NULL) {
39236c6e 755 panic("kernel_memory_populate: page_list == NULL");
0a7de745 756 }
39236c6e
A
757
758 mem = page_list;
d9a64523
A
759 page_list = mem->vmp_snext;
760 mem->vmp_snext = NULL;
761
762 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
763 mem->vmp_q_state = VM_PAGE_IS_WIRED;
764 mem->vmp_wire_count++;
765 if (__improbable(mem->vmp_wire_count == 0)) {
766 panic("kernel_memory_populate(%p): wire_count overflow", mem);
39037602 767 }
39236c6e 768
3e170ce0 769 vm_page_insert_wired(mem, object, offset + pg_offset, tag);
39236c6e 770
d9a64523
A
771 mem->vmp_busy = FALSE;
772 mem->vmp_pmapped = TRUE;
773 mem->vmp_wpmapped = TRUE;
39236c6e
A
774
775 PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset, mem,
0a7de745
A
776 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
777 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
778 PMAP_OPTIONS_NOWAIT, pe_result);
39236c6e
A
779
780 if (pe_result == KERN_RESOURCE_SHORTAGE) {
39236c6e
A
781 vm_object_unlock(object);
782
783 PMAP_ENTER(kernel_pmap, addr + pg_offset, mem,
0a7de745
A
784 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE,
785 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE,
786 pe_result);
39236c6e
A
787
788 vm_object_lock(object);
789 }
5ba3f43e
A
790
791 assert(pe_result == KERN_SUCCESS);
792
39236c6e
A
793 if (flags & KMA_NOENCRYPT) {
794 bzero(CAST_DOWN(void *, (addr + pg_offset)), PAGE_SIZE);
39037602 795 pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
39236c6e
A
796 }
797 }
d9a64523 798 vm_page_lockspin_queues();
39236c6e
A
799 vm_page_wire_count += page_count;
800 vm_page_unlock_queues();
801
d9a64523
A
802#if DEBUG || DEVELOPMENT
803 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
0a7de745
A
804 if (task != NULL) {
805 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
806 }
d9a64523
A
807#endif
808
0a7de745
A
809 if (kernel_object == object) {
810 vm_tag_update_size(tag, size);
811 }
5ba3f43e 812
39236c6e
A
813 vm_object_unlock(object);
814
5ba3f43e
A
815#if KASAN
816 if (map == compressor_map) {
817 kasan_notify_address_nopoison(addr, size);
818 } else {
819 kasan_notify_address(addr, size);
820 }
821#endif
39236c6e
A
822 return KERN_SUCCESS;
823
824out:
0a7de745 825 if (page_list) {
39236c6e 826 vm_page_free_list(page_list, FALSE);
0a7de745 827 }
39236c6e 828
d9a64523
A
829#if DEBUG || DEVELOPMENT
830 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
0a7de745
A
831 if (task != NULL && kr == KERN_SUCCESS) {
832 ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
833 }
d9a64523
A
834#endif
835
39236c6e
A
836 return kr;
837}
838
839
840void
841kernel_memory_depopulate(
0a7de745
A
842 vm_map_t map,
843 vm_offset_t addr,
844 vm_size_t size,
845 int flags)
39236c6e 846{
0a7de745
A
847 vm_object_t object;
848 vm_object_offset_t offset, pg_offset;
849 vm_page_t mem;
850 vm_page_t local_freeq = NULL;
39236c6e 851
0a7de745 852 assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
39236c6e
A
853
854 if (flags & KMA_COMPRESSOR) {
855 offset = addr;
856 object = compressor_object;
857
858 vm_object_lock(object);
859 } else if (flags & KMA_KOBJECT) {
860 offset = addr;
861 object = kernel_object;
39236c6e
A
862 vm_object_lock(object);
863 } else {
864 offset = 0;
865 object = NULL;
0a7de745
A
866 /*
867 * If it's not the kernel object, we need to:
868 * lock map;
869 * lookup entry;
870 * lock object;
871 * unlock map;
872 */
39236c6e 873 panic("kernel_memory_depopulate(%p,0x%llx,0x%llx,0x%x): "
0a7de745
A
874 "!KMA_KOBJECT",
875 map, (uint64_t) addr, (uint64_t) size, flags);
39236c6e
A
876 }
877 pmap_protect(kernel_map->pmap, offset, offset + size, VM_PROT_NONE);
878
879 for (pg_offset = 0;
0a7de745
A
880 pg_offset < size;
881 pg_offset += PAGE_SIZE_64) {
39236c6e
A
882 mem = vm_page_lookup(object, offset + pg_offset);
883
884 assert(mem);
0a7de745
A
885
886 if (mem->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR) {
39037602 887 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
0a7de745 888 }
39236c6e 889
d9a64523 890 mem->vmp_busy = TRUE;
39236c6e 891
d9a64523 892 assert(mem->vmp_tabled);
39236c6e 893 vm_page_remove(mem, TRUE);
d9a64523 894 assert(mem->vmp_busy);
39236c6e 895
d9a64523
A
896 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
897 assert((mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) ||
0a7de745 898 (mem->vmp_q_state == VM_PAGE_NOT_ON_Q));
39037602 899
d9a64523
A
900 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
901 mem->vmp_snext = local_freeq;
39236c6e
A
902 local_freeq = mem;
903 }
904 vm_object_unlock(object);
905
0a7de745 906 if (local_freeq) {
39236c6e 907 vm_page_free_list(local_freeq, TRUE);
0a7de745 908 }
39236c6e
A
909}
910
1c79356b
A
911/*
912 * kmem_alloc:
913 *
914 * Allocate wired-down memory in the kernel's address map
915 * or a submap. The memory is not zero-filled.
916 */
917
918kern_return_t
3e170ce0 919kmem_alloc_external(
0a7de745
A
920 vm_map_t map,
921 vm_offset_t *addrp,
922 vm_size_t size)
1c79356b 923{
0a7de745 924 return kmem_alloc(map, addrp, size, vm_tag_bt());
3e170ce0
A
925}
926
39037602 927
3e170ce0
A
928kern_return_t
929kmem_alloc(
0a7de745
A
930 vm_map_t map,
931 vm_offset_t *addrp,
932 vm_size_t size,
933 vm_tag_t tag)
39037602
A
934{
935 return kmem_alloc_flags(map, addrp, size, tag, 0);
936}
937
938kern_return_t
939kmem_alloc_flags(
0a7de745
A
940 vm_map_t map,
941 vm_offset_t *addrp,
942 vm_size_t size,
943 vm_tag_t tag,
944 int flags)
3e170ce0 945{
39037602 946 kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, flags, tag);
2d21ac55
A
947 TRACE_MACHLEAKS(KMEM_ALLOC_CODE, KMEM_ALLOC_CODE_2, size, *addrp);
948 return kr;
1c79356b
A
949}
950
951/*
952 * kmem_realloc:
953 *
954 * Reallocate wired-down memory in the kernel's address map
955 * or a submap. Newly allocated pages are not zeroed.
956 * This can only be used on regions allocated with kmem_alloc.
957 *
958 * If successful, the pages in the old region are mapped twice.
959 * The old region is unchanged. Use kmem_free to get rid of it.
960 */
961kern_return_t
962kmem_realloc(
0a7de745
A
963 vm_map_t map,
964 vm_offset_t oldaddr,
965 vm_size_t oldsize,
966 vm_offset_t *newaddrp,
967 vm_size_t newsize,
968 vm_tag_t tag)
1c79356b 969{
0a7de745
A
970 vm_object_t object;
971 vm_object_offset_t offset;
972 vm_map_offset_t oldmapmin;
973 vm_map_offset_t oldmapmax;
974 vm_map_offset_t newmapaddr;
975 vm_map_size_t oldmapsize;
976 vm_map_size_t newmapsize;
977 vm_map_entry_t oldentry;
978 vm_map_entry_t newentry;
979 vm_page_t mem;
980 kern_return_t kr;
1c79356b 981
39236c6e 982 oldmapmin = vm_map_trunc_page(oldaddr,
0a7de745 983 VM_MAP_PAGE_MASK(map));
39236c6e 984 oldmapmax = vm_map_round_page(oldaddr + oldsize,
0a7de745 985 VM_MAP_PAGE_MASK(map));
91447636 986 oldmapsize = oldmapmax - oldmapmin;
39236c6e 987 newmapsize = vm_map_round_page(newsize,
0a7de745 988 VM_MAP_PAGE_MASK(map));
5c9f4661
A
989 if (newmapsize < newsize) {
990 /* overflow */
991 *newaddrp = 0;
992 return KERN_INVALID_ARGUMENT;
993 }
1c79356b
A
994
995 /*
996 * Find the VM object backing the old region.
997 */
998
b4c24cb9
A
999 vm_map_lock(map);
1000
0a7de745 1001 if (!vm_map_lookup_entry(map, oldmapmin, &oldentry)) {
1c79356b 1002 panic("kmem_realloc");
0a7de745 1003 }
3e170ce0 1004 object = VME_OBJECT(oldentry);
1c79356b
A
1005
1006 /*
1007 * Increase the size of the object and
1008 * fill in the new region.
1009 */
1010
1011 vm_object_reference(object);
b4c24cb9
A
1012 /* by grabbing the object lock before unlocking the map */
1013 /* we guarantee that we will panic if more than one */
1014 /* attempt is made to realloc a kmem_alloc'd area */
1c79356b 1015 vm_object_lock(object);
b4c24cb9 1016 vm_map_unlock(map);
0a7de745 1017 if (object->vo_size != oldmapsize) {
1c79356b 1018 panic("kmem_realloc");
0a7de745 1019 }
6d2010ae 1020 object->vo_size = newmapsize;
1c79356b
A
1021 vm_object_unlock(object);
1022
b4c24cb9
A
1023 /* allocate the new pages while expanded portion of the */
1024 /* object is still not mapped */
91447636 1025 kmem_alloc_pages(object, vm_object_round_page(oldmapsize),
0a7de745 1026 vm_object_round_page(newmapsize - oldmapsize));
1c79356b
A
1027
1028 /*
b4c24cb9 1029 * Find space for the new region.
1c79356b
A
1030 */
1031
91447636 1032 kr = vm_map_find_space(map, &newmapaddr, newmapsize,
0a7de745
A
1033 (vm_map_offset_t) 0, 0,
1034 VM_MAP_KERNEL_FLAGS_NONE,
1035 tag,
1036 &newentry);
b4c24cb9
A
1037 if (kr != KERN_SUCCESS) {
1038 vm_object_lock(object);
0a7de745 1039 for (offset = oldmapsize;
91447636 1040 offset < newmapsize; offset += PAGE_SIZE) {
0a7de745 1041 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
b0d623f7 1042 VM_PAGE_FREE(mem);
b4c24cb9
A
1043 }
1044 }
6d2010ae 1045 object->vo_size = oldmapsize;
b4c24cb9
A
1046 vm_object_unlock(object);
1047 vm_object_deallocate(object);
1048 return kr;
1049 }
3e170ce0
A
1050 VME_OBJECT_SET(newentry, object);
1051 VME_OFFSET_SET(newentry, 0);
3e170ce0 1052 assert(newentry->wired_count == 0);
b4c24cb9 1053
0a7de745 1054
b4c24cb9
A
1055 /* add an extra reference in case we have someone doing an */
1056 /* unexpected deallocate */
1057 vm_object_reference(object);
1c79356b
A
1058 vm_map_unlock(map);
1059
5ba3f43e 1060 kr = vm_map_wire_kernel(map, newmapaddr, newmapaddr + newmapsize,
0a7de745 1061 VM_PROT_DEFAULT, tag, FALSE);
91447636 1062 if (KERN_SUCCESS != kr) {
d9a64523 1063 vm_map_remove(map, newmapaddr, newmapaddr + newmapsize, VM_MAP_REMOVE_NO_FLAGS);
b4c24cb9 1064 vm_object_lock(object);
0a7de745
A
1065 for (offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) {
1066 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
b0d623f7 1067 VM_PAGE_FREE(mem);
b4c24cb9
A
1068 }
1069 }
6d2010ae 1070 object->vo_size = oldmapsize;
b4c24cb9
A
1071 vm_object_unlock(object);
1072 vm_object_deallocate(object);
0a7de745 1073 return kr;
b4c24cb9
A
1074 }
1075 vm_object_deallocate(object);
1c79356b 1076
0a7de745
A
1077 if (kernel_object == object) {
1078 vm_tag_update_size(tag, newmapsize);
1079 }
5ba3f43e 1080
91447636 1081 *newaddrp = CAST_DOWN(vm_offset_t, newmapaddr);
1c79356b
A
1082 return KERN_SUCCESS;
1083}
1084
1085/*
b0d623f7 1086 * kmem_alloc_kobject:
1c79356b
A
1087 *
1088 * Allocate wired-down memory in the kernel's address map
1089 * or a submap. The memory is not zero-filled.
1090 *
1091 * The memory is allocated in the kernel_object.
1092 * It may not be copied with vm_map_copy, and
1093 * it may not be reallocated with kmem_realloc.
1094 */
1095
1096kern_return_t
3e170ce0 1097kmem_alloc_kobject_external(
0a7de745
A
1098 vm_map_t map,
1099 vm_offset_t *addrp,
1100 vm_size_t size)
1c79356b 1101{
0a7de745 1102 return kmem_alloc_kobject(map, addrp, size, vm_tag_bt());
3e170ce0
A
1103}
1104
1105kern_return_t
1106kmem_alloc_kobject(
0a7de745
A
1107 vm_map_t map,
1108 vm_offset_t *addrp,
1109 vm_size_t size,
3e170ce0
A
1110 vm_tag_t tag)
1111{
1112 return kernel_memory_allocate(map, addrp, size, 0, KMA_KOBJECT, tag);
1c79356b
A
1113}
1114
1115/*
1116 * kmem_alloc_aligned:
1117 *
b0d623f7 1118 * Like kmem_alloc_kobject, except that the memory is aligned.
1c79356b
A
1119 * The size should be a power-of-2.
1120 */
1121
1122kern_return_t
1123kmem_alloc_aligned(
0a7de745
A
1124 vm_map_t map,
1125 vm_offset_t *addrp,
1126 vm_size_t size,
3e170ce0 1127 vm_tag_t tag)
1c79356b 1128{
0a7de745 1129 if ((size & (size - 1)) != 0) {
1c79356b 1130 panic("kmem_alloc_aligned: size not aligned");
0a7de745 1131 }
3e170ce0 1132 return kernel_memory_allocate(map, addrp, size, size - 1, KMA_KOBJECT, tag);
1c79356b
A
1133}
1134
1135/*
1136 * kmem_alloc_pageable:
1137 *
1138 * Allocate pageable memory in the kernel's address map.
1139 */
1140
1141kern_return_t
3e170ce0 1142kmem_alloc_pageable_external(
0a7de745
A
1143 vm_map_t map,
1144 vm_offset_t *addrp,
1145 vm_size_t size)
3e170ce0 1146{
0a7de745 1147 return kmem_alloc_pageable(map, addrp, size, vm_tag_bt());
3e170ce0
A
1148}
1149
1150kern_return_t
1151kmem_alloc_pageable(
0a7de745
A
1152 vm_map_t map,
1153 vm_offset_t *addrp,
1154 vm_size_t size,
3e170ce0 1155 vm_tag_t tag)
1c79356b 1156{
91447636 1157 vm_map_offset_t map_addr;
0a7de745 1158 vm_map_size_t map_size;
1c79356b
A
1159 kern_return_t kr;
1160
1161#ifndef normal
fe8ab488 1162 map_addr = (vm_map_min(map)) + PAGE_SIZE;
1c79356b 1163#else
91447636 1164 map_addr = vm_map_min(map);
1c79356b 1165#endif
39236c6e 1166 map_size = vm_map_round_page(size,
0a7de745 1167 VM_MAP_PAGE_MASK(map));
5c9f4661
A
1168 if (map_size < size) {
1169 /* overflow */
1170 *addrp = 0;
1171 return KERN_INVALID_ARGUMENT;
1172 }
91447636
A
1173
1174 kr = vm_map_enter(map, &map_addr, map_size,
0a7de745
A
1175 (vm_map_offset_t) 0,
1176 VM_FLAGS_ANYWHERE,
1177 VM_MAP_KERNEL_FLAGS_NONE,
1178 tag,
1179 VM_OBJECT_NULL, (vm_object_offset_t) 0, FALSE,
1180 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1181
1182 if (kr != KERN_SUCCESS) {
1c79356b 1183 return kr;
0a7de745 1184 }
1c79356b 1185
5ba3f43e
A
1186#if KASAN
1187 kasan_notify_address(map_addr, map_size);
1188#endif
91447636 1189 *addrp = CAST_DOWN(vm_offset_t, map_addr);
1c79356b
A
1190 return KERN_SUCCESS;
1191}
1192
1193/*
1194 * kmem_free:
1195 *
1196 * Release a region of kernel virtual memory allocated
b0d623f7 1197 * with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable,
1c79356b
A
1198 * and return the physical pages associated with that region.
1199 */
1200
1201void
1202kmem_free(
0a7de745
A
1203 vm_map_t map,
1204 vm_offset_t addr,
1205 vm_size_t size)
1c79356b
A
1206{
1207 kern_return_t kr;
1208
b0d623f7
A
1209 assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS);
1210
2d21ac55
A
1211 TRACE_MACHLEAKS(KMEM_FREE_CODE, KMEM_FREE_CODE_2, size, addr);
1212
0a7de745 1213 if (size == 0) {
b0d623f7 1214#if MACH_ASSERT
0a7de745 1215 printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n", map, (uint64_t)addr);
b0d623f7
A
1216#endif
1217 return;
1218 }
1219
39236c6e 1220 kr = vm_map_remove(map,
0a7de745
A
1221 vm_map_trunc_page(addr,
1222 VM_MAP_PAGE_MASK(map)),
1223 vm_map_round_page(addr + size,
1224 VM_MAP_PAGE_MASK(map)),
1225 VM_MAP_REMOVE_KUNWIRE);
1226 if (kr != KERN_SUCCESS) {
1c79356b 1227 panic("kmem_free");
0a7de745 1228 }
1c79356b
A
1229}
1230
1231/*
b4c24cb9 1232 * Allocate new pages in an object.
1c79356b
A
1233 */
1234
1235kern_return_t
1236kmem_alloc_pages(
0a7de745
A
1237 vm_object_t object,
1238 vm_object_offset_t offset,
1239 vm_object_size_t size)
1c79356b 1240{
0a7de745 1241 vm_object_size_t alloc_size;
1c79356b 1242
91447636 1243 alloc_size = vm_object_round_page(size);
0a7de745 1244 vm_object_lock(object);
91447636 1245 while (alloc_size) {
0a7de745 1246 vm_page_t mem;
1c79356b 1247
1c79356b 1248
0a7de745
A
1249 /*
1250 * Allocate a page
1251 */
1252 while (VM_PAGE_NULL ==
1253 (mem = vm_page_alloc(object, offset))) {
1254 vm_object_unlock(object);
1255 VM_PAGE_WAIT();
1256 vm_object_lock(object);
1257 }
1258 mem->vmp_busy = FALSE;
1c79356b 1259
0a7de745
A
1260 alloc_size -= PAGE_SIZE;
1261 offset += PAGE_SIZE;
1c79356b 1262 }
b4c24cb9 1263 vm_object_unlock(object);
1c79356b
A
1264 return KERN_SUCCESS;
1265}
1266
1c79356b
A
1267/*
1268 * kmem_suballoc:
1269 *
1270 * Allocates a map to manage a subrange
1271 * of the kernel virtual address space.
1272 *
1273 * Arguments are as follows:
1274 *
1275 * parent Map to take range from
1276 * addr Address of start of range (IN/OUT)
1277 * size Size of range to find
1278 * pageable Can region be paged
1279 * anywhere Can region be located anywhere in map
1280 * new_map Pointer to new submap
1281 */
1282kern_return_t
1283kmem_suballoc(
0a7de745
A
1284 vm_map_t parent,
1285 vm_offset_t *addr,
1286 vm_size_t size,
1287 boolean_t pageable,
1288 int flags,
5ba3f43e
A
1289 vm_map_kernel_flags_t vmk_flags,
1290 vm_tag_t tag,
0a7de745 1291 vm_map_t *new_map)
1c79356b 1292{
0a7de745
A
1293 vm_map_t map;
1294 vm_map_offset_t map_addr;
1295 vm_map_size_t map_size;
1296 kern_return_t kr;
1c79356b 1297
39236c6e 1298 map_size = vm_map_round_page(size,
0a7de745 1299 VM_MAP_PAGE_MASK(parent));
5c9f4661
A
1300 if (map_size < size) {
1301 /* overflow */
1302 *addr = 0;
1303 return KERN_INVALID_ARGUMENT;
1304 }
1c79356b
A
1305
1306 /*
1307 * Need reference on submap object because it is internal
1308 * to the vm_system. vm_object_enter will never be called
1309 * on it (usual source of reference for vm_map_enter).
1310 */
1311 vm_object_reference(vm_submap_object);
1312
39236c6e 1313 map_addr = ((flags & VM_FLAGS_ANYWHERE)
0a7de745
A
1314 ? vm_map_min(parent)
1315 : vm_map_trunc_page(*addr,
1316 VM_MAP_PAGE_MASK(parent)));
91447636
A
1317
1318 kr = vm_map_enter(parent, &map_addr, map_size,
0a7de745
A
1319 (vm_map_offset_t) 0, flags, vmk_flags, tag,
1320 vm_submap_object, (vm_object_offset_t) 0, FALSE,
1321 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1c79356b
A
1322 if (kr != KERN_SUCCESS) {
1323 vm_object_deallocate(vm_submap_object);
0a7de745 1324 return kr;
1c79356b
A
1325 }
1326
1327 pmap_reference(vm_map_pmap(parent));
91447636 1328 map = vm_map_create(vm_map_pmap(parent), map_addr, map_addr + map_size, pageable);
0a7de745
A
1329 if (map == VM_MAP_NULL) {
1330 panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */
1331 }
39236c6e
A
1332 /* inherit the parent map's page size */
1333 vm_map_set_page_shift(map, VM_MAP_PAGE_SHIFT(parent));
1c79356b 1334
91447636 1335 kr = vm_map_submap(parent, map_addr, map_addr + map_size, map, map_addr, FALSE);
1c79356b
A
1336 if (kr != KERN_SUCCESS) {
1337 /*
1338 * See comment preceding vm_map_submap().
1339 */
d9a64523 1340 vm_map_remove(parent, map_addr, map_addr + map_size,
0a7de745
A
1341 VM_MAP_REMOVE_NO_FLAGS);
1342 vm_map_deallocate(map); /* also removes ref to pmap */
1c79356b 1343 vm_object_deallocate(vm_submap_object);
0a7de745 1344 return kr;
1c79356b 1345 }
91447636 1346 *addr = CAST_DOWN(vm_offset_t, map_addr);
1c79356b 1347 *new_map = map;
0a7de745 1348 return KERN_SUCCESS;
1c79356b
A
1349}
1350
1351/*
1352 * kmem_init:
1353 *
1354 * Initialize the kernel's virtual memory map, taking
1355 * into account all memory allocated up to this time.
1356 */
1357void
1358kmem_init(
0a7de745
A
1359 vm_offset_t start,
1360 vm_offset_t end)
1c79356b 1361{
91447636
A
1362 vm_map_offset_t map_start;
1363 vm_map_offset_t map_end;
5ba3f43e
A
1364 vm_map_kernel_flags_t vmk_flags;
1365
1366 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1367 vmk_flags.vmkf_permanent = TRUE;
1368 vmk_flags.vmkf_no_pmap_check = TRUE;
91447636 1369
39236c6e 1370 map_start = vm_map_trunc_page(start,
0a7de745 1371 VM_MAP_PAGE_MASK(kernel_map));
39236c6e 1372 map_end = vm_map_round_page(end,
0a7de745 1373 VM_MAP_PAGE_MASK(kernel_map));
91447636 1374
0a7de745
A
1375#if defined(__arm__) || defined(__arm64__)
1376 kernel_map = vm_map_create(pmap_kernel(), VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1377 VM_MAX_KERNEL_ADDRESS, FALSE);
5ba3f43e
A
1378 /*
1379 * Reserve virtual memory allocated up to this time.
1380 */
1381 {
0a7de745
A
1382 unsigned int region_select = 0;
1383 vm_map_offset_t region_start;
1384 vm_map_size_t region_size;
5ba3f43e
A
1385 vm_map_offset_t map_addr;
1386 kern_return_t kr;
1387
1388 while (pmap_virtual_region(region_select, &region_start, &region_size)) {
5ba3f43e
A
1389 map_addr = region_start;
1390 kr = vm_map_enter(kernel_map, &map_addr,
0a7de745
A
1391 vm_map_round_page(region_size,
1392 VM_MAP_PAGE_MASK(kernel_map)),
1393 (vm_map_offset_t) 0,
1394 VM_FLAGS_FIXED,
1395 vmk_flags,
1396 VM_KERN_MEMORY_NONE,
1397 VM_OBJECT_NULL,
1398 (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
1399 VM_INHERIT_DEFAULT);
5ba3f43e
A
1400
1401 if (kr != KERN_SUCCESS) {
1402 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n",
0a7de745
A
1403 (uint64_t) start, (uint64_t) end, (uint64_t) region_start,
1404 (uint64_t) region_size, kr);
1405 }
5ba3f43e
A
1406
1407 region_select++;
0a7de745 1408 }
5ba3f43e
A
1409 }
1410#else
0a7de745
A
1411 kernel_map = vm_map_create(pmap_kernel(), VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1412 map_end, FALSE);
1c79356b
A
1413 /*
1414 * Reserve virtual memory allocated up to this time.
1415 */
6d2010ae 1416 if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
91447636 1417 vm_map_offset_t map_addr;
6d2010ae 1418 kern_return_t kr;
0a7de745 1419
5ba3f43e
A
1420 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1421 vmk_flags.vmkf_no_pmap_check = TRUE;
1422
6d2010ae
A
1423 map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
1424 kr = vm_map_enter(kernel_map,
0a7de745
A
1425 &map_addr,
1426 (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1427 (vm_map_offset_t) 0,
1428 VM_FLAGS_FIXED,
1429 vmk_flags,
1430 VM_KERN_MEMORY_NONE,
1431 VM_OBJECT_NULL,
1432 (vm_object_offset_t) 0, FALSE,
1433 VM_PROT_NONE, VM_PROT_NONE,
1434 VM_INHERIT_DEFAULT);
1435
6d2010ae
A
1436 if (kr != KERN_SUCCESS) {
1437 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n",
0a7de745
A
1438 (uint64_t) start, (uint64_t) end,
1439 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
1440 (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
1441 kr);
1442 }
1c79356b 1443 }
5ba3f43e 1444#endif
6d2010ae 1445
2d21ac55
A
1446 /*
1447 * Set the default global user wire limit which limits the amount of
b0d623f7
A
1448 * memory that can be locked via mlock(). We set this to the total
1449 * amount of memory that are potentially usable by a user app (max_mem)
1450 * minus a certain amount. This can be overridden via a sysctl.
2d21ac55 1451 */
0a7de745
A
1452 vm_global_no_user_wire_amount = MIN(max_mem * 20 / 100,
1453 VM_NOT_USER_WIREABLE);
b0d623f7 1454 vm_global_user_wire_limit = max_mem - vm_global_no_user_wire_amount;
0a7de745 1455
b0d623f7
A
1456 /* the default per user limit is the same as the global limit */
1457 vm_user_wire_limit = vm_global_user_wire_limit;
1c79356b
A
1458}
1459
1c79356b 1460
1c79356b
A
1461/*
1462 * Routine: copyinmap
1463 * Purpose:
1464 * Like copyin, except that fromaddr is an address
1465 * in the specified VM map. This implementation
1466 * is incomplete; it handles the current user map
1467 * and the kernel map/submaps.
1468 */
91447636 1469kern_return_t
1c79356b 1470copyinmap(
0a7de745
A
1471 vm_map_t map,
1472 vm_map_offset_t fromaddr,
1473 void *todata,
1474 vm_size_t length)
1c79356b 1475{
0a7de745 1476 kern_return_t kr = KERN_SUCCESS;
91447636
A
1477 vm_map_t oldmap;
1478
0a7de745 1479 if (vm_map_pmap(map) == pmap_kernel()) {
1c79356b 1480 /* assume a correct copy */
91447636 1481 memcpy(todata, CAST_DOWN(void *, fromaddr), length);
0a7de745
A
1482 } else if (current_map() == map) {
1483 if (copyin(fromaddr, todata, length) != 0) {
91447636 1484 kr = KERN_INVALID_ADDRESS;
0a7de745
A
1485 }
1486 } else {
91447636
A
1487 vm_map_reference(map);
1488 oldmap = vm_map_switch(map);
0a7de745 1489 if (copyin(fromaddr, todata, length) != 0) {
91447636 1490 kr = KERN_INVALID_ADDRESS;
0a7de745 1491 }
91447636
A
1492 vm_map_switch(oldmap);
1493 vm_map_deallocate(map);
1494 }
1495 return kr;
1c79356b
A
1496}
1497
1498/*
1499 * Routine: copyoutmap
1500 * Purpose:
1501 * Like copyout, except that toaddr is an address
1502 * in the specified VM map. This implementation
1503 * is incomplete; it handles the current user map
1504 * and the kernel map/submaps.
1505 */
91447636 1506kern_return_t
1c79356b 1507copyoutmap(
0a7de745
A
1508 vm_map_t map,
1509 void *fromdata,
1510 vm_map_address_t toaddr,
1511 vm_size_t length)
1c79356b
A
1512{
1513 if (vm_map_pmap(map) == pmap_kernel()) {
1514 /* assume a correct copy */
91447636
A
1515 memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
1516 return KERN_SUCCESS;
1c79356b
A
1517 }
1518
0a7de745 1519 if (current_map() != map) {
91447636 1520 return KERN_NOT_SUPPORTED;
0a7de745 1521 }
91447636 1522
0a7de745 1523 if (copyout(fromdata, toaddr, length) != 0) {
91447636 1524 return KERN_INVALID_ADDRESS;
0a7de745 1525 }
1c79356b 1526
91447636 1527 return KERN_SUCCESS;
1c79356b 1528}
9bccf70c 1529
3e170ce0
A
1530/*
1531 *
1532 * The following two functions are to be used when exposing kernel
1533 * addresses to userspace via any of the various debug or info
1534 * facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
1535 * and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
1536 * are exported to KEXTs.
1537 *
1538 * NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
1539 */
1540
5ba3f43e
A
1541static void
1542vm_kernel_addrhash_internal(
3e170ce0 1543 vm_offset_t addr,
5ba3f43e
A
1544 vm_offset_t *hash_addr,
1545 uint64_t salt)
3e170ce0 1546{
5ba3f43e
A
1547 assert(salt != 0);
1548
3e170ce0 1549 if (addr == 0) {
5ba3f43e 1550 *hash_addr = 0;
3e170ce0
A
1551 return;
1552 }
1553
5ba3f43e
A
1554 if (VM_KERNEL_IS_SLID(addr)) {
1555 *hash_addr = VM_KERNEL_UNSLIDE(addr);
1556 return;
1557 }
1558
0a7de745 1559 vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
5ba3f43e
A
1560 SHA256_CTX sha_ctx;
1561
1562 SHA256_Init(&sha_ctx);
1563 SHA256_Update(&sha_ctx, &salt, sizeof(salt));
1564 SHA256_Update(&sha_ctx, &addr, sizeof(addr));
1565 SHA256_Final(sha_digest, &sha_ctx);
1566
1567 *hash_addr = sha_digest[0];
1568}
1569
1570void
1571vm_kernel_addrhash_external(
1572 vm_offset_t addr,
1573 vm_offset_t *hash_addr)
1574{
1575 return vm_kernel_addrhash_internal(addr, hash_addr, vm_kernel_addrhash_salt_ext);
1576}
1577
1578vm_offset_t
1579vm_kernel_addrhash(vm_offset_t addr)
1580{
1581 vm_offset_t hash_addr;
1582 vm_kernel_addrhash_internal(addr, &hash_addr, vm_kernel_addrhash_salt);
1583 return hash_addr;
1584}
1585
1586void
1587vm_kernel_addrhide(
1588 vm_offset_t addr,
1589 vm_offset_t *hide_addr)
1590{
1591 *hide_addr = VM_KERNEL_ADDRHIDE(addr);
3e170ce0
A
1592}
1593
1594/*
5ba3f43e 1595 * vm_kernel_addrperm_external:
3e170ce0
A
1596 * vm_kernel_unslide_or_perm_external:
1597 *
5ba3f43e 1598 * Use these macros when exposing an address to userspace that could come from
3e170ce0
A
1599 * either kernel text/data *or* the heap.
1600 */
1601void
5ba3f43e 1602vm_kernel_addrperm_external(
3e170ce0 1603 vm_offset_t addr,
5ba3f43e 1604 vm_offset_t *perm_addr)
3e170ce0 1605{
39037602 1606 if (VM_KERNEL_IS_SLID(addr)) {
5ba3f43e
A
1607 *perm_addr = VM_KERNEL_UNSLIDE(addr);
1608 } else if (VM_KERNEL_ADDRESS(addr)) {
1609 *perm_addr = addr + vm_kernel_addrperm_ext;
1610 } else {
1611 *perm_addr = addr;
3e170ce0 1612 }
5ba3f43e 1613}
3e170ce0 1614
5ba3f43e
A
1615void
1616vm_kernel_unslide_or_perm_external(
1617 vm_offset_t addr,
1618 vm_offset_t *up_addr)
1619{
3e170ce0 1620 vm_kernel_addrperm_external(addr, up_addr);
3e170ce0 1621}