]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_map.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
CommitLineData
1c79356b 1/*
f427ee49 2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5ba3f43e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
5ba3f43e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
5ba3f43e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
5ba3f43e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
5ba3f43e 31/*
1c79356b
A
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
5ba3f43e 35 *
1c79356b
A
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
5ba3f43e 41 *
1c79356b
A
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
5ba3f43e 45 *
1c79356b 46 * Carnegie Mellon requests users of this software to return to
5ba3f43e 47 *
1c79356b
A
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
5ba3f43e 52 *
1c79356b
A
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
1c79356b 66#include <mach_assert.h>
fe8ab488
A
67
68#include <vm/vm_options.h>
69
91447636 70#include <libkern/OSAtomic.h>
1c79356b
A
71
72#include <mach/kern_return.h>
73#include <mach/port.h>
74#include <mach/vm_attributes.h>
75#include <mach/vm_param.h>
76#include <mach/vm_behavior.h>
55e303ae 77#include <mach/vm_statistics.h>
91447636 78#include <mach/memory_object.h>
0c530ab8 79#include <mach/mach_vm.h>
91447636 80#include <machine/cpu_capabilities.h>
2d21ac55 81#include <mach/sdt.h>
91447636 82
1c79356b 83#include <kern/assert.h>
39037602 84#include <kern/backtrace.h>
c3c9b80d 85#include <kern/counter.h>
d9a64523 86#include <kern/exc_guard.h>
91447636 87#include <kern/kalloc.h>
f427ee49 88#include <kern/zalloc_internal.h>
91447636
A
89
90#include <vm/cpm.h>
d9a64523 91#include <vm/vm_compressor.h>
39236c6e 92#include <vm/vm_compressor_pager.h>
1c79356b
A
93#include <vm/vm_init.h>
94#include <vm/vm_fault.h>
95#include <vm/vm_map.h>
96#include <vm/vm_object.h>
97#include <vm/vm_page.h>
b0d623f7 98#include <vm/vm_pageout.h>
d9a64523 99#include <vm/pmap.h>
1c79356b
A
100#include <vm/vm_kern.h>
101#include <ipc/ipc_port.h>
102#include <kern/sched_prim.h>
103#include <kern/misc_protos.h>
1c79356b 104
91447636
A
105#include <mach/vm_map_server.h>
106#include <mach/mach_host_server.h>
2d21ac55 107#include <vm/vm_protos.h>
b0d623f7 108#include <vm/vm_purgeable_internal.h>
91447636 109
91447636 110#include <vm/vm_protos.h>
2d21ac55 111#include <vm/vm_shared_region.h>
6d2010ae 112#include <vm/vm_map_store.h>
91447636 113
5ba3f43e
A
114#include <san/kasan.h>
115
d9a64523 116#include <sys/codesign.h>
f427ee49
A
117#include <sys/mman.h>
118
d9a64523
A
119#include <libkern/section_keywords.h>
120#if DEVELOPMENT || DEBUG
121extern int proc_selfcsflags(void);
f427ee49 122int panic_on_unsigned_execute = 0;
d9a64523
A
123#endif /* DEVELOPMENT || DEBUG */
124
f427ee49
A
125#if MACH_ASSERT
126int debug4k_filter = 0;
127char debug4k_proc_name[1024] = "";
128int debug4k_proc_filter = (int)-1 & ~(1 << __DEBUG4K_FAULT);
129int debug4k_panic_on_misaligned_sharing = 0;
130const char *debug4k_category_name[] = {
131 "error", /* 0 */
132 "life", /* 1 */
133 "load", /* 2 */
134 "fault", /* 3 */
135 "copy", /* 4 */
136 "share", /* 5 */
137 "adjust", /* 6 */
138 "pmap", /* 7 */
139 "mementry", /* 8 */
140 "iokit", /* 9 */
141 "upl", /* 10 */
142 "exc", /* 11 */
143 "vfs" /* 12 */
144};
145#endif /* MACH_ASSERT */
146int debug4k_no_cow_copyin = 0;
147
148
5ba3f43e 149#if __arm64__
d9a64523
A
150extern const int fourk_binary_compatibility_unsafe;
151extern const int fourk_binary_compatibility_allow_wx;
5ba3f43e 152#endif /* __arm64__ */
39037602
A
153extern int proc_selfpid(void);
154extern char *proc_name_address(void *p);
155
156#if VM_MAP_DEBUG_APPLE_PROTECT
157int vm_map_debug_apple_protect = 0;
158#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
159#if VM_MAP_DEBUG_FOURK
160int vm_map_debug_fourk = 0;
161#endif /* VM_MAP_DEBUG_FOURK */
3e170ce0 162
d9a64523
A
163SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
164int vm_map_executable_immutable_verbose = 0;
5ba3f43e 165
cb323159
A
166os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
167
0a7de745 168extern u_int32_t random(void); /* from <libkern/libkern.h> */
1c79356b
A
169/* Internal prototypes
170 */
2d21ac55 171
91447636 172static void vm_map_simplify_range(
0a7de745
A
173 vm_map_t map,
174 vm_map_offset_t start,
175 vm_map_offset_t end); /* forward */
176
177static boolean_t vm_map_range_check(
178 vm_map_t map,
179 vm_map_offset_t start,
180 vm_map_offset_t end,
181 vm_map_entry_t *entry);
182
183static vm_map_entry_t _vm_map_entry_create(
184 struct vm_map_header *map_header, boolean_t map_locked);
185
186static void _vm_map_entry_dispose(
187 struct vm_map_header *map_header,
188 vm_map_entry_t entry);
189
190static void vm_map_pmap_enter(
191 vm_map_t map,
192 vm_map_offset_t addr,
193 vm_map_offset_t end_addr,
194 vm_object_t object,
195 vm_object_offset_t offset,
196 vm_prot_t protection);
197
198static void _vm_map_clip_end(
199 struct vm_map_header *map_header,
200 vm_map_entry_t entry,
201 vm_map_offset_t end);
202
203static void _vm_map_clip_start(
204 struct vm_map_header *map_header,
205 vm_map_entry_t entry,
206 vm_map_offset_t start);
207
208static void vm_map_entry_delete(
209 vm_map_t map,
210 vm_map_entry_t entry);
211
212static kern_return_t vm_map_delete(
213 vm_map_t map,
214 vm_map_offset_t start,
215 vm_map_offset_t end,
216 int flags,
217 vm_map_t zap_map);
218
219static void vm_map_copy_insert(
220 vm_map_t map,
221 vm_map_entry_t after_where,
222 vm_map_copy_t copy);
223
224static kern_return_t vm_map_copy_overwrite_unaligned(
225 vm_map_t dst_map,
226 vm_map_entry_t entry,
227 vm_map_copy_t copy,
39236c6e 228 vm_map_address_t start,
0a7de745 229 boolean_t discard_on_success);
1c79356b 230
0a7de745
A
231static kern_return_t vm_map_copy_overwrite_aligned(
232 vm_map_t dst_map,
233 vm_map_entry_t tmp_entry,
234 vm_map_copy_t copy,
2d21ac55 235 vm_map_offset_t start,
0a7de745 236 pmap_t pmap);
1c79356b 237
0a7de745
A
238static kern_return_t vm_map_copyin_kernel_buffer(
239 vm_map_t src_map,
2d21ac55 240 vm_map_address_t src_addr,
0a7de745
A
241 vm_map_size_t len,
242 boolean_t src_destroy,
243 vm_map_copy_t *copy_result); /* OUT */
244
245static kern_return_t vm_map_copyout_kernel_buffer(
246 vm_map_t map,
247 vm_map_address_t *addr, /* IN/OUT */
248 vm_map_copy_t copy,
39037602 249 vm_map_size_t copy_size,
0a7de745
A
250 boolean_t overwrite,
251 boolean_t consume_on_success);
252
253static void vm_map_fork_share(
254 vm_map_t old_map,
255 vm_map_entry_t old_entry,
256 vm_map_t new_map);
257
258static boolean_t vm_map_fork_copy(
259 vm_map_t old_map,
260 vm_map_entry_t *old_entry_p,
261 vm_map_t new_map,
262 int vm_map_copyin_flags);
263
264static kern_return_t vm_map_wire_nested(
265 vm_map_t map,
266 vm_map_offset_t start,
267 vm_map_offset_t end,
268 vm_prot_t caller_prot,
269 vm_tag_t tag,
270 boolean_t user_wire,
271 pmap_t map_pmap,
272 vm_map_offset_t pmap_addr,
273 ppnum_t *physpage_p);
274
275static kern_return_t vm_map_unwire_nested(
276 vm_map_t map,
277 vm_map_offset_t start,
278 vm_map_offset_t end,
279 boolean_t user_wire,
280 pmap_t map_pmap,
281 vm_map_offset_t pmap_addr);
282
283static kern_return_t vm_map_overwrite_submap_recurse(
284 vm_map_t dst_map,
285 vm_map_offset_t dst_addr,
286 vm_map_size_t dst_size);
287
288static kern_return_t vm_map_copy_overwrite_nested(
289 vm_map_t dst_map,
290 vm_map_offset_t dst_addr,
291 vm_map_copy_t copy,
292 boolean_t interruptible,
293 pmap_t pmap,
294 boolean_t discard_on_success);
295
296static kern_return_t vm_map_remap_extract(
297 vm_map_t map,
298 vm_map_offset_t addr,
299 vm_map_size_t size,
300 boolean_t copy,
301 struct vm_map_header *map_header,
302 vm_prot_t *cur_protection,
303 vm_prot_t *max_protection,
304 vm_inherit_t inheritance,
0a7de745
A
305 vm_map_kernel_flags_t vmk_flags);
306
307static kern_return_t vm_map_remap_range_allocate(
308 vm_map_t map,
309 vm_map_address_t *address,
310 vm_map_size_t size,
311 vm_map_offset_t mask,
312 int flags,
313 vm_map_kernel_flags_t vmk_flags,
314 vm_tag_t tag,
315 vm_map_entry_t *map_entry);
316
317static void vm_map_region_look_for_page(
318 vm_map_t map,
2d21ac55 319 vm_map_offset_t va,
0a7de745
A
320 vm_object_t object,
321 vm_object_offset_t offset,
2d21ac55 322 int max_refcnt,
f427ee49 323 unsigned short depth,
39236c6e
A
324 vm_region_extended_info_t extended,
325 mach_msg_type_number_t count);
91447636 326
0a7de745
A
327static int vm_map_region_count_obj_refs(
328 vm_map_entry_t entry,
329 vm_object_t object);
1c79356b 330
b0d623f7 331
0a7de745
A
332static kern_return_t vm_map_willneed(
333 vm_map_t map,
334 vm_map_offset_t start,
335 vm_map_offset_t end);
b0d623f7 336
0a7de745
A
337static kern_return_t vm_map_reuse_pages(
338 vm_map_t map,
339 vm_map_offset_t start,
340 vm_map_offset_t end);
b0d623f7 341
0a7de745
A
342static kern_return_t vm_map_reusable_pages(
343 vm_map_t map,
344 vm_map_offset_t start,
345 vm_map_offset_t end);
b0d623f7 346
0a7de745
A
347static kern_return_t vm_map_can_reuse(
348 vm_map_t map,
349 vm_map_offset_t start,
350 vm_map_offset_t end);
b0d623f7 351
3e170ce0 352#if MACH_ASSERT
0a7de745
A
353static kern_return_t vm_map_pageout(
354 vm_map_t map,
355 vm_map_offset_t start,
356 vm_map_offset_t end);
3e170ce0 357#endif /* MACH_ASSERT */
6d2010ae 358
f427ee49
A
359kern_return_t vm_map_corpse_footprint_collect(
360 vm_map_t old_map,
361 vm_map_entry_t old_entry,
362 vm_map_t new_map);
363void vm_map_corpse_footprint_collect_done(
364 vm_map_t new_map);
365void vm_map_corpse_footprint_destroy(
0a7de745 366 vm_map_t map);
f427ee49
A
367kern_return_t vm_map_corpse_footprint_query_page_info(
368 vm_map_t map,
369 vm_map_offset_t va,
370 int *disposition_p);
371void vm_map_footprint_query_page_info(
372 vm_map_t map,
373 vm_map_entry_t map_entry,
374 vm_map_offset_t curr_s_offset,
375 int *disposition_p);
376
377static const struct vm_map_entry vm_map_entry_template = {
378 .behavior = VM_BEHAVIOR_DEFAULT,
379 .inheritance = VM_INHERIT_DEFAULT,
380};
d9a64523 381
5ba3f43e
A
382pid_t find_largest_process_vm_map_entries(void);
383
1c79356b
A
384/*
385 * Macros to copy a vm_map_entry. We must be careful to correctly
386 * manage the wired page count. vm_map_entry_copy() creates a new
387 * map entry to the same memory - the wired count in the new entry
388 * must be set to zero. vm_map_entry_copy_full() creates a new
389 * entry that is identical to the old entry. This preserves the
390 * wire count; it's used for map splitting and zone changing in
391 * vm_map_copyout.
392 */
316670eb 393
f427ee49
A
394static inline void
395vm_map_entry_copy_pmap_cs_assoc(
396 vm_map_t map __unused,
397 vm_map_entry_t new __unused,
398 vm_map_entry_t old __unused)
399{
f427ee49
A
400 /* when pmap_cs is not enabled, assert as a sanity check */
401 assert(new->pmap_cs_associated == FALSE);
f427ee49 402}
d9a64523
A
403
404/*
405 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
f427ee49
A
406 * But for security reasons on some platforms, we don't want the
407 * new mapping to be "used for jit", so we reset the flag here.
d9a64523 408 */
f427ee49
A
409static inline void
410vm_map_entry_copy_code_signing(
411 vm_map_t map,
412 vm_map_entry_t new,
413 vm_map_entry_t old __unused)
414{
415 if (VM_MAP_POLICY_ALLOW_JIT_COPY(map)) {
416 assert(new->used_for_jit == old->used_for_jit);
417 } else {
418 new->used_for_jit = FALSE;
419 }
420}
d9a64523 421
f427ee49
A
422static inline void
423vm_map_entry_copy(
424 vm_map_t map,
425 vm_map_entry_t new,
426 vm_map_entry_t old)
427{
428 boolean_t _vmec_reserved = new->from_reserved_zone;
429 *new = *old;
430 new->is_shared = FALSE;
431 new->needs_wakeup = FALSE;
432 new->in_transition = FALSE;
433 new->wired_count = 0;
434 new->user_wired_count = 0;
435 new->permanent = FALSE;
436 vm_map_entry_copy_code_signing(map, new, old);
437 vm_map_entry_copy_pmap_cs_assoc(map, new, old);
438 new->from_reserved_zone = _vmec_reserved;
439 if (new->iokit_acct) {
440 assertf(!new->use_pmap, "old %p new %p\n", old, new);
441 new->iokit_acct = FALSE;
442 new->use_pmap = TRUE;
443 }
444 new->vme_resilient_codesign = FALSE;
445 new->vme_resilient_media = FALSE;
446 new->vme_atomic = FALSE;
447 new->vme_no_copy_on_read = FALSE;
448}
d9a64523 449
f427ee49
A
450static inline void
451vm_map_entry_copy_full(
452 vm_map_entry_t new,
453 vm_map_entry_t old)
454{
455 boolean_t _vmecf_reserved = new->from_reserved_zone;
456 *new = *old;
457 new->from_reserved_zone = _vmecf_reserved;
458}
1c79356b 459
cb323159
A
460/*
461 * Normal lock_read_to_write() returns FALSE/0 on failure.
462 * These functions evaluate to zero on success and non-zero value on failure.
463 */
464__attribute__((always_inline))
465int
466vm_map_lock_read_to_write(vm_map_t map)
467{
468 if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
469 DTRACE_VM(vm_map_lock_upgrade);
470 return 0;
471 }
472 return 1;
473}
474
475__attribute__((always_inline))
476boolean_t
477vm_map_try_lock(vm_map_t map)
478{
479 if (lck_rw_try_lock_exclusive(&(map)->lock)) {
480 DTRACE_VM(vm_map_lock_w);
481 return TRUE;
482 }
483 return FALSE;
484}
485
486__attribute__((always_inline))
487boolean_t
488vm_map_try_lock_read(vm_map_t map)
489{
490 if (lck_rw_try_lock_shared(&(map)->lock)) {
491 DTRACE_VM(vm_map_lock_r);
492 return TRUE;
493 }
494 return FALSE;
495}
496
f427ee49
A
497/*
498 * Routines to get the page size the caller should
499 * use while inspecting the target address space.
500 * Use the "_safely" variant if the caller is dealing with a user-provided
501 * array whose size depends on the page size, to avoid any overflow or
502 * underflow of a user-allocated buffer.
503 */
504int
505vm_self_region_page_shift_safely(
506 vm_map_t target_map)
507{
508 int effective_page_shift = 0;
509
510 if (PAGE_SIZE == (4096)) {
511 /* x86_64 and 4k watches: always use 4k */
512 return PAGE_SHIFT;
513 }
514 /* did caller provide an explicit page size for this thread to use? */
515 effective_page_shift = thread_self_region_page_shift();
516 if (effective_page_shift) {
517 /* use the explicitly-provided page size */
518 return effective_page_shift;
519 }
520 /* no explicit page size: use the caller's page size... */
521 effective_page_shift = VM_MAP_PAGE_SHIFT(current_map());
522 if (effective_page_shift == VM_MAP_PAGE_SHIFT(target_map)) {
523 /* page size match: safe to use */
524 return effective_page_shift;
525 }
526 /* page size mismatch */
527 return -1;
528}
529int
530vm_self_region_page_shift(
531 vm_map_t target_map)
532{
533 int effective_page_shift;
534
535 effective_page_shift = vm_self_region_page_shift_safely(target_map);
536 if (effective_page_shift == -1) {
537 /* no safe value but OK to guess for caller */
538 effective_page_shift = MIN(VM_MAP_PAGE_SHIFT(current_map()),
539 VM_MAP_PAGE_SHIFT(target_map));
540 }
541 return effective_page_shift;
542}
543
544
2d21ac55
A
545/*
546 * Decide if we want to allow processes to execute from their data or stack areas.
5ba3f43e 547 * override_nx() returns true if we do. Data/stack execution can be enabled independently
2d21ac55
A
548 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
549 * or allow_stack_exec to enable data execution for that type of data area for that particular
550 * ABI (or both by or'ing the flags together). These are initialized in the architecture
5ba3f43e
A
551 * specific pmap files since the default behavior varies according to architecture. The
552 * main reason it varies is because of the need to provide binary compatibility with old
553 * applications that were written before these restrictions came into being. In the old
554 * days, an app could execute anything it could read, but this has slowly been tightened
2d21ac55
A
555 * up over time. The default behavior is:
556 *
557 * 32-bit PPC apps may execute from both stack and data areas
558 * 32-bit Intel apps may exeucte from data areas but not stack
559 * 64-bit PPC/Intel apps may not execute from either data or stack
560 *
561 * An application on any architecture may override these defaults by explicitly
5ba3f43e 562 * adding PROT_EXEC permission to the page in question with the mprotect(2)
2d21ac55 563 * system call. This code here just determines what happens when an app tries to
0a7de745 564 * execute from a page that lacks execute permission.
2d21ac55
A
565 *
566 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
6d2010ae
A
567 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
568 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
569 * execution from data areas for a particular binary even if the arch normally permits it. As
570 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
571 * to support some complicated use cases, notably browsers with out-of-process plugins that
572 * are not all NX-safe.
2d21ac55
A
573 */
574
575extern int allow_data_exec, allow_stack_exec;
576
577int
578override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
579{
580 int current_abi;
581
0a7de745
A
582 if (map->pmap == kernel_pmap) {
583 return FALSE;
584 }
3e170ce0 585
2d21ac55
A
586 /*
587 * Determine if the app is running in 32 or 64 bit mode.
588 */
589
0a7de745 590 if (vm_map_is_64bit(map)) {
2d21ac55 591 current_abi = VM_ABI_64;
0a7de745 592 } else {
2d21ac55 593 current_abi = VM_ABI_32;
0a7de745 594 }
2d21ac55
A
595
596 /*
5ba3f43e 597 * Determine if we should allow the execution based on whether it's a
2d21ac55
A
598 * stack or data area and the current architecture.
599 */
600
0a7de745 601 if (user_tag == VM_MEMORY_STACK) {
2d21ac55 602 return allow_stack_exec & current_abi;
0a7de745 603 }
2d21ac55 604
6d2010ae 605 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
2d21ac55
A
606}
607
608
1c79356b
A
609/*
610 * Virtual memory maps provide for the mapping, protection,
611 * and sharing of virtual memory objects. In addition,
612 * this module provides for an efficient virtual copy of
613 * memory from one map to another.
614 *
615 * Synchronization is required prior to most operations.
616 *
617 * Maps consist of an ordered doubly-linked list of simple
618 * entries; a single hint is used to speed up lookups.
619 *
620 * Sharing maps have been deleted from this version of Mach.
621 * All shared objects are now mapped directly into the respective
622 * maps. This requires a change in the copy on write strategy;
623 * the asymmetric (delayed) strategy is used for shared temporary
624 * objects instead of the symmetric (shadow) strategy. All maps
625 * are now "top level" maps (either task map, kernel map or submap
5ba3f43e 626 * of the kernel map).
1c79356b
A
627 *
628 * Since portions of maps are specified by start/end addreses,
629 * which may not align with existing map entries, all
630 * routines merely "clip" entries to these start/end values.
631 * [That is, an entry is split into two, bordering at a
632 * start or end value.] Note that these clippings may not
633 * always be necessary (as the two resulting entries are then
634 * not changed); however, the clipping is done for convenience.
635 * No attempt is currently made to "glue back together" two
636 * abutting entries.
637 *
638 * The symmetric (shadow) copy strategy implements virtual copy
639 * by copying VM object references from one map to
640 * another, and then marking both regions as copy-on-write.
641 * It is important to note that only one writeable reference
642 * to a VM object region exists in any map when this strategy
643 * is used -- this means that shadow object creation can be
644 * delayed until a write operation occurs. The symmetric (delayed)
645 * strategy allows multiple maps to have writeable references to
646 * the same region of a vm object, and hence cannot delay creating
647 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
648 * Copying of permanent objects is completely different; see
649 * vm_object_copy_strategically() in vm_object.c.
650 */
651
f427ee49
A
652static SECURITY_READ_ONLY_LATE(zone_t) vm_map_zone; /* zone for vm_map structures */
653static SECURITY_READ_ONLY_LATE(zone_t) vm_map_entry_reserved_zone; /* zone with reserve for non-blocking allocations */
654static SECURITY_READ_ONLY_LATE(zone_t) vm_map_copy_zone; /* zone for vm_map_copy structures */
655
656SECURITY_READ_ONLY_LATE(zone_t) vm_map_entry_zone; /* zone for vm_map_entry structures */
657SECURITY_READ_ONLY_LATE(zone_t) vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
658
659#define VM_MAP_ZONE_NAME "maps"
660#define VM_MAP_ZFLAGS ( \
661 ZC_NOENCRYPT | \
662 ZC_NOGC | \
663 ZC_NOGZALLOC | \
664 ZC_ALLOW_FOREIGN)
665
666#define VME_RESERVED_ZONE_NAME "Reserved VM map entries"
667#define VM_MAP_RESERVED_ZFLAGS ( \
668 ZC_NOENCRYPT | \
669 ZC_ALLOW_FOREIGN | \
670 ZC_NOCALLOUT | \
671 ZC_NOGZALLOC | \
672 ZC_KASAN_NOQUARANTINE | \
673 ZC_NOGC)
674
675#define VM_MAP_HOLES_ZONE_NAME "VM map holes"
676#define VM_MAP_HOLES_ZFLAGS ( \
677 ZC_NOENCRYPT | \
678 ZC_NOGC | \
679 ZC_NOGZALLOC | \
680 ZC_ALLOW_FOREIGN)
1c79356b 681
f427ee49
A
682/*
683 * Asserts that a vm_map_copy object is coming from the
684 * vm_map_copy_zone to ensure that it isn't a fake constructed
685 * anywhere else.
686 */
687static inline void
688vm_map_copy_require(struct vm_map_copy *copy)
689{
690 zone_id_require(ZONE_ID_VM_MAP_COPY, sizeof(struct vm_map_copy), copy);
691}
1c79356b
A
692
693/*
c3c9b80d
A
694 * vm_map_require:
695 *
696 * Ensures that the argument is memory allocated from the genuine
697 * vm map zone. (See zone_id_require_allow_foreign).
1c79356b 698 */
c3c9b80d
A
699void
700vm_map_require(vm_map_t map)
701{
702 zone_id_require_allow_foreign(ZONE_ID_VM_MAP, sizeof(struct _vm_map), map);
703}
1c79356b 704
f427ee49
A
705static __startup_data vm_offset_t map_data;
706static __startup_data vm_size_t map_data_size;
707static __startup_data vm_offset_t kentry_data;
708static __startup_data vm_size_t kentry_data_size;
709static __startup_data vm_offset_t map_holes_data;
710static __startup_data vm_size_t map_holes_data_size;
1c79356b 711
f427ee49 712#if XNU_TARGET_OS_OSX
b0d623f7 713#define NO_COALESCE_LIMIT ((1024 * 128) - 1)
f427ee49
A
714#else /* XNU_TARGET_OS_OSX */
715#define NO_COALESCE_LIMIT 0
716#endif /* XNU_TARGET_OS_OSX */
1c79356b 717
55e303ae 718/* Skip acquiring locks if we're in the midst of a kernel core dump */
b0d623f7 719unsigned int not_in_kdp = 1;
55e303ae 720
6d2010ae
A
721unsigned int vm_map_set_cache_attr_count = 0;
722
723kern_return_t
724vm_map_set_cache_attr(
0a7de745
A
725 vm_map_t map,
726 vm_map_offset_t va)
6d2010ae 727{
0a7de745
A
728 vm_map_entry_t map_entry;
729 vm_object_t object;
730 kern_return_t kr = KERN_SUCCESS;
6d2010ae
A
731
732 vm_map_lock_read(map);
733
734 if (!vm_map_lookup_entry(map, va, &map_entry) ||
735 map_entry->is_sub_map) {
736 /*
737 * that memory is not properly mapped
738 */
739 kr = KERN_INVALID_ARGUMENT;
740 goto done;
741 }
3e170ce0 742 object = VME_OBJECT(map_entry);
6d2010ae
A
743
744 if (object == VM_OBJECT_NULL) {
745 /*
746 * there should be a VM object here at this point
747 */
748 kr = KERN_INVALID_ARGUMENT;
749 goto done;
750 }
751 vm_object_lock(object);
752 object->set_cache_attr = TRUE;
753 vm_object_unlock(object);
754
755 vm_map_set_cache_attr_count++;
756done:
757 vm_map_unlock_read(map);
758
759 return kr;
760}
761
762
593a1d5f
A
763#if CONFIG_CODE_DECRYPTION
764/*
765 * vm_map_apple_protected:
5ba3f43e 766 * This remaps the requested part of the object with an object backed by
593a1d5f
A
767 * the decrypting pager.
768 * crypt_info contains entry points and session data for the crypt module.
769 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
770 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
771 */
0c530ab8
A
772kern_return_t
773vm_map_apple_protected(
0a7de745
A
774 vm_map_t map,
775 vm_map_offset_t start,
776 vm_map_offset_t end,
777 vm_object_offset_t crypto_backing_offset,
f427ee49
A
778 struct pager_crypt_info *crypt_info,
779 uint32_t cryptid)
0c530ab8 780{
0a7de745
A
781 boolean_t map_locked;
782 kern_return_t kr;
783 vm_map_entry_t map_entry;
3e170ce0 784 struct vm_map_entry tmp_entry;
0a7de745
A
785 memory_object_t unprotected_mem_obj;
786 vm_object_t protected_object;
787 vm_map_offset_t map_addr;
788 vm_map_offset_t start_aligned, end_aligned;
789 vm_object_offset_t crypto_start, crypto_end;
790 int vm_flags;
5ba3f43e 791 vm_map_kernel_flags_t vmk_flags;
c3c9b80d 792 boolean_t cache_pager;
5ba3f43e
A
793
794 vm_flags = 0;
795 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
0c530ab8 796
3e170ce0
A
797 map_locked = FALSE;
798 unprotected_mem_obj = MEMORY_OBJECT_NULL;
0c530ab8 799
3e170ce0
A
800 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
801 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
802 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
803 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
b0d623f7 804
5ba3f43e
A
805#if __arm64__
806 /*
807 * "start" and "end" might be 4K-aligned but not 16K-aligned,
808 * so we might have to loop and establish up to 3 mappings:
809 *
810 * + the first 16K-page, which might overlap with the previous
811 * 4K-aligned mapping,
812 * + the center,
813 * + the last 16K-page, which might overlap with the next
814 * 4K-aligned mapping.
815 * Each of these mapping might be backed by a vnode pager (if
816 * properly page-aligned) or a "fourk_pager", itself backed by a
817 * vnode pager (if 4K-aligned but not page-aligned).
818 */
5ba3f43e 819#endif /* __arm64__ */
b0d623f7 820
3e170ce0
A
821 map_addr = start_aligned;
822 for (map_addr = start_aligned;
0a7de745
A
823 map_addr < end;
824 map_addr = tmp_entry.vme_end) {
3e170ce0
A
825 vm_map_lock(map);
826 map_locked = TRUE;
b0d623f7 827
3e170ce0
A
828 /* lookup the protected VM object */
829 if (!vm_map_lookup_entry(map,
0a7de745
A
830 map_addr,
831 &map_entry) ||
3e170ce0 832 map_entry->is_sub_map ||
f427ee49 833 VME_OBJECT(map_entry) == VM_OBJECT_NULL) {
3e170ce0
A
834 /* that memory is not properly mapped */
835 kr = KERN_INVALID_ARGUMENT;
836 goto done;
837 }
b0d623f7 838
f427ee49
A
839 /* ensure mapped memory is mapped as executable except
840 * except for model decryption flow */
841 if ((cryptid != CRYPTID_MODEL_ENCRYPTION) &&
842 !(map_entry->protection & VM_PROT_EXECUTE)) {
843 kr = KERN_INVALID_ARGUMENT;
844 goto done;
845 }
846
3e170ce0
A
847 /* get the protected object to be decrypted */
848 protected_object = VME_OBJECT(map_entry);
849 if (protected_object == VM_OBJECT_NULL) {
850 /* there should be a VM object here at this point */
851 kr = KERN_INVALID_ARGUMENT;
852 goto done;
853 }
854 /* ensure protected object stays alive while map is unlocked */
855 vm_object_reference(protected_object);
856
857 /* limit the map entry to the area we want to cover */
858 vm_map_clip_start(map, map_entry, start_aligned);
859 vm_map_clip_end(map, map_entry, end_aligned);
860
861 tmp_entry = *map_entry;
862 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
863 vm_map_unlock(map);
864 map_locked = FALSE;
865
866 /*
867 * This map entry might be only partially encrypted
868 * (if not fully "page-aligned").
869 */
870 crypto_start = 0;
871 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
872 if (tmp_entry.vme_start < start) {
873 if (tmp_entry.vme_start != start_aligned) {
874 kr = KERN_INVALID_ADDRESS;
875 }
876 crypto_start += (start - tmp_entry.vme_start);
877 }
878 if (tmp_entry.vme_end > end) {
879 if (tmp_entry.vme_end != end_aligned) {
880 kr = KERN_INVALID_ADDRESS;
881 }
882 crypto_end -= (tmp_entry.vme_end - end);
883 }
884
885 /*
886 * This "extra backing offset" is needed to get the decryption
887 * routine to use the right key. It adjusts for the possibly
888 * relative offset of an interposed "4K" pager...
889 */
890 if (crypto_backing_offset == (vm_object_offset_t) -1) {
891 crypto_backing_offset = VME_OFFSET(&tmp_entry);
892 }
0c530ab8 893
c3c9b80d
A
894 cache_pager = TRUE;
895#if XNU_TARGET_OS_OSX
896 if (vm_map_is_alien(map)) {
897 cache_pager = FALSE;
898 }
899#endif /* XNU_TARGET_OS_OSX */
900
3e170ce0
A
901 /*
902 * Lookup (and create if necessary) the protected memory object
903 * matching that VM object.
904 * If successful, this also grabs a reference on the memory object,
905 * to guarantee that it doesn't go away before we get a chance to map
906 * it.
907 */
908 unprotected_mem_obj = apple_protect_pager_setup(
909 protected_object,
910 VME_OFFSET(&tmp_entry),
911 crypto_backing_offset,
912 crypt_info,
913 crypto_start,
c3c9b80d
A
914 crypto_end,
915 cache_pager);
3e170ce0
A
916
917 /* release extra ref on protected object */
918 vm_object_deallocate(protected_object);
919
920 if (unprotected_mem_obj == NULL) {
921 kr = KERN_FAILURE;
922 goto done;
923 }
924
925 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
5ba3f43e
A
926 /* can overwrite an immutable mapping */
927 vmk_flags.vmkf_overwrite_immutable = TRUE;
928#if __arm64__
929 if (tmp_entry.used_for_jit &&
930 (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
0a7de745 931 PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
5ba3f43e
A
932 fourk_binary_compatibility_unsafe &&
933 fourk_binary_compatibility_allow_wx) {
934 printf("** FOURK_COMPAT [%d]: "
0a7de745
A
935 "allowing write+execute at 0x%llx\n",
936 proc_selfpid(), tmp_entry.vme_start);
5ba3f43e
A
937 vmk_flags.vmkf_map_jit = TRUE;
938 }
939#endif /* __arm64__ */
3e170ce0
A
940
941 /* map this memory object in place of the current one */
942 map_addr = tmp_entry.vme_start;
943 kr = vm_map_enter_mem_object(map,
0a7de745
A
944 &map_addr,
945 (tmp_entry.vme_end -
946 tmp_entry.vme_start),
947 (mach_vm_offset_t) 0,
948 vm_flags,
949 vmk_flags,
950 VM_KERN_MEMORY_NONE,
951 (ipc_port_t)(uintptr_t) unprotected_mem_obj,
952 0,
953 TRUE,
954 tmp_entry.protection,
955 tmp_entry.max_protection,
956 tmp_entry.inheritance);
5ba3f43e 957 assertf(kr == KERN_SUCCESS,
0a7de745 958 "kr = 0x%x\n", kr);
5ba3f43e 959 assertf(map_addr == tmp_entry.vme_start,
0a7de745
A
960 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
961 (uint64_t)map_addr,
962 (uint64_t) tmp_entry.vme_start,
963 &tmp_entry);
3e170ce0
A
964
965#if VM_MAP_DEBUG_APPLE_PROTECT
39037602
A
966 if (vm_map_debug_apple_protect) {
967 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
0a7de745
A
968 " backing:[object:%p,offset:0x%llx,"
969 "crypto_backing_offset:0x%llx,"
970 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
971 map,
972 (uint64_t) map_addr,
973 (uint64_t) (map_addr + (tmp_entry.vme_end -
974 tmp_entry.vme_start)),
975 unprotected_mem_obj,
976 protected_object,
977 VME_OFFSET(&tmp_entry),
978 crypto_backing_offset,
979 crypto_start,
980 crypto_end);
39037602 981 }
3e170ce0 982#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
5ba3f43e 983
3e170ce0
A
984 /*
985 * Release the reference obtained by
986 * apple_protect_pager_setup().
987 * The mapping (if it succeeded) is now holding a reference on
988 * the memory object.
989 */
990 memory_object_deallocate(unprotected_mem_obj);
991 unprotected_mem_obj = MEMORY_OBJECT_NULL;
992
993 /* continue with next map entry */
994 crypto_backing_offset += (tmp_entry.vme_end -
0a7de745 995 tmp_entry.vme_start);
3e170ce0
A
996 crypto_backing_offset -= crypto_start;
997 }
998 kr = KERN_SUCCESS;
0c530ab8
A
999
1000done:
1001 if (map_locked) {
3e170ce0 1002 vm_map_unlock(map);
0c530ab8
A
1003 }
1004 return kr;
1005}
0a7de745 1006#endif /* CONFIG_CODE_DECRYPTION */
0c530ab8
A
1007
1008
f427ee49
A
1009LCK_GRP_DECLARE(vm_map_lck_grp, "vm_map");
1010LCK_ATTR_DECLARE(vm_map_lck_attr, 0, 0);
1011LCK_ATTR_DECLARE(vm_map_lck_rw_attr, 0, LCK_ATTR_DEBUG);
b0d623f7 1012
f427ee49 1013#if XNU_TARGET_OS_OSX
d9a64523 1014int malloc_no_cow = 0;
f427ee49
A
1015#else /* XNU_TARGET_OS_OSX */
1016int malloc_no_cow = 1;
1017#endif /* XNU_TARGET_OS_OSX */
d9a64523 1018uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
4ba76501
A
1019#if DEBUG
1020int vm_check_map_sanity = 0;
1021#endif
b0d623f7 1022
593a1d5f
A
1023/*
1024 * vm_map_init:
1025 *
1026 * Initialize the vm_map module. Must be called before
1027 * any other vm_map routines.
1028 *
1029 * Map and entry structures are allocated from zones -- we must
1030 * initialize those zones.
1031 *
1032 * There are three zones of interest:
1033 *
1034 * vm_map_zone: used to allocate maps.
1035 * vm_map_entry_zone: used to allocate map entries.
7ddcb079 1036 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
593a1d5f
A
1037 *
1038 * The kernel allocates map entries from a special zone that is initially
1039 * "crammed" with memory. It would be difficult (perhaps impossible) for
1040 * the kernel to allocate more memory to a entry zone when it became
1041 * empty since the very act of allocating memory implies the creation
1042 * of a new entry.
1043 */
f427ee49 1044__startup_func
1c79356b 1045void
f427ee49 1046vm_map_init(void)
1c79356b 1047{
316670eb
A
1048 const char *mez_name = "VM map entries";
1049
f427ee49
A
1050
1051#if MACH_ASSERT
1052 PE_parse_boot_argn("debug4k_filter", &debug4k_filter,
1053 sizeof(debug4k_filter));
1054#endif /* MACH_ASSERT */
1055
c3c9b80d
A
1056 vm_map_zone = zone_create_ext(VM_MAP_ZONE_NAME, sizeof(struct _vm_map),
1057 VM_MAP_ZFLAGS, ZONE_ID_VM_MAP, NULL);
f427ee49
A
1058
1059 vm_map_entry_zone = zone_create(mez_name, sizeof(struct vm_map_entry),
1060 ZC_NOENCRYPT | ZC_NOGZALLOC | ZC_NOCALLOUT);
3e170ce0 1061
1c79356b 1062 /*
f427ee49
A
1063 * Don't quarantine because we always need elements available
1064 * Disallow GC on this zone... to aid the GC.
1c79356b 1065 */
f427ee49
A
1066 vm_map_entry_reserved_zone = zone_create_ext(VME_RESERVED_ZONE_NAME,
1067 sizeof(struct vm_map_entry), VM_MAP_RESERVED_ZFLAGS,
c3c9b80d 1068 ZONE_ID_ANY, NULL);
f427ee49
A
1069
1070 vm_map_copy_zone = zone_create_ext("VM map copies", sizeof(struct vm_map_copy),
1071 ZC_NOENCRYPT | ZC_CACHING, ZONE_ID_VM_MAP_COPY, NULL);
1072
1073 vm_map_holes_zone = zone_create(VM_MAP_HOLES_ZONE_NAME,
1074 sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS);
3e170ce0 1075
5ba3f43e 1076 /*
3e170ce0
A
1077 * Add the stolen memory to zones, adjust zone size and stolen counts.
1078 */
c3c9b80d
A
1079 zone_cram_foreign(vm_map_zone, map_data, map_data_size);
1080 zone_cram_foreign(vm_map_entry_reserved_zone, kentry_data, kentry_data_size);
1081 zone_cram_foreign(vm_map_holes_zone, map_holes_data, map_holes_data_size);
5ba3f43e 1082
0a7de745
A
1083 /*
1084 * Since these are covered by zones, remove them from stolen page accounting.
1085 */
3e170ce0
A
1086 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
1087
39037602
A
1088#if VM_MAP_DEBUG_APPLE_PROTECT
1089 PE_parse_boot_argn("vm_map_debug_apple_protect",
0a7de745
A
1090 &vm_map_debug_apple_protect,
1091 sizeof(vm_map_debug_apple_protect));
39037602
A
1092#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1093#if VM_MAP_DEBUG_APPLE_FOURK
1094 PE_parse_boot_argn("vm_map_debug_fourk",
0a7de745
A
1095 &vm_map_debug_fourk,
1096 sizeof(vm_map_debug_fourk));
39037602 1097#endif /* VM_MAP_DEBUG_FOURK */
5ba3f43e 1098 PE_parse_boot_argn("vm_map_executable_immutable",
0a7de745
A
1099 &vm_map_executable_immutable,
1100 sizeof(vm_map_executable_immutable));
d9a64523 1101 PE_parse_boot_argn("vm_map_executable_immutable_verbose",
0a7de745
A
1102 &vm_map_executable_immutable_verbose,
1103 sizeof(vm_map_executable_immutable_verbose));
d9a64523
A
1104
1105 PE_parse_boot_argn("malloc_no_cow",
0a7de745
A
1106 &malloc_no_cow,
1107 sizeof(malloc_no_cow));
d9a64523
A
1108 if (malloc_no_cow) {
1109 vm_memory_malloc_no_cow_mask = 0ULL;
1110 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1111 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
0a7de745 1112 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
d9a64523
A
1113 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1114// vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1115// vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1116 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1117 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1118 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1119 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1120// vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1121 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
0a7de745
A
1122 &vm_memory_malloc_no_cow_mask,
1123 sizeof(vm_memory_malloc_no_cow_mask));
d9a64523 1124 }
4ba76501
A
1125
1126#if DEBUG
1127 PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity, sizeof(vm_check_map_sanity));
1128 if (vm_check_map_sanity) {
1129 kprintf("VM sanity checking enabled\n");
1130 } else {
1131 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1132 }
1133#endif /* DEBUG */
f427ee49
A
1134
1135#if DEVELOPMENT || DEBUG
1136 PE_parse_boot_argn("panic_on_unsigned_execute",
1137 &panic_on_unsigned_execute,
1138 sizeof(panic_on_unsigned_execute));
1139#endif /* DEVELOPMENT || DEBUG */
1c79356b
A
1140}
1141
f427ee49
A
1142__startup_func
1143static void
1144vm_map_steal_memory(void)
1c79356b 1145{
f427ee49 1146 uint16_t kentry_initial_pages;
c3c9b80d 1147 uint16_t zone_foreign_pages;
7ddcb079 1148
f427ee49
A
1149 map_data_size = zone_get_foreign_alloc_size(VM_MAP_ZONE_NAME,
1150 sizeof(struct _vm_map), VM_MAP_ZFLAGS, 1);
1c79356b 1151
1c79356b 1152 /*
7ddcb079
A
1153 * kentry_initial_pages corresponds to the number of kernel map entries
1154 * required during bootstrap until the asynchronous replenishment
1155 * scheme is activated and/or entries are available from the general
1156 * map entry pool.
1c79356b 1157 */
c3c9b80d
A
1158#if defined(__LP64__)
1159 kentry_initial_pages = (uint16_t)atop(16 * 4096);
7ddcb079
A
1160#else
1161 kentry_initial_pages = 6;
1c79356b 1162#endif
316670eb
A
1163
1164#if CONFIG_GZALLOC
1165 /* If using the guard allocator, reserve more memory for the kernel
1166 * reserved map entry pool.
0a7de745
A
1167 */
1168 if (gzalloc_enabled()) {
316670eb 1169 kentry_initial_pages *= 1024;
0a7de745 1170 }
316670eb 1171#endif
c3c9b80d
A
1172 if (PE_parse_boot_argn("zone_foreign_pages", &zone_foreign_pages,
1173 sizeof(zone_foreign_pages))) {
1174 kentry_initial_pages = zone_foreign_pages;
1175 }
316670eb 1176
f427ee49
A
1177 kentry_data_size = zone_get_foreign_alloc_size(VME_RESERVED_ZONE_NAME,
1178 sizeof(struct vm_map_entry), VM_MAP_RESERVED_ZFLAGS,
1179 kentry_initial_pages);
3e170ce0 1180
f427ee49
A
1181 map_holes_data_size = zone_get_foreign_alloc_size(VM_MAP_HOLES_ZONE_NAME,
1182 sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS,
1183 kentry_initial_pages);
1184
1185 /*
1186 * Steal a contiguous range of memory so that a simple range check
1187 * can validate foreign addresses being freed/crammed to these
1188 * zones
1189 */
1190 vm_size_t total_size;
1191 if (os_add3_overflow(map_data_size, kentry_data_size,
1192 map_holes_data_size, &total_size)) {
1193 panic("vm_map_steal_memory: overflow in amount of memory requested");
1194 }
1195 map_data = zone_foreign_mem_init(total_size);
1196 kentry_data = map_data + map_data_size;
1197 map_holes_data = kentry_data + kentry_data_size;
1c79356b 1198}
f427ee49 1199STARTUP(PMAP_STEAL, STARTUP_RANK_FIRST, vm_map_steal_memory);
1c79356b 1200
5ba3f43e
A
1201boolean_t vm_map_supports_hole_optimization = FALSE;
1202
3e170ce0 1203void
0a7de745
A
1204vm_kernel_reserved_entry_init(void)
1205{
c3c9b80d 1206 zone_replenish_configure(vm_map_entry_reserved_zone);
5ba3f43e
A
1207
1208 /*
1209 * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1210 */
c3c9b80d 1211 zone_replenish_configure(vm_map_holes_zone);
5ba3f43e 1212 vm_map_supports_hole_optimization = TRUE;
3e170ce0
A
1213}
1214
1215void
1216vm_map_disable_hole_optimization(vm_map_t map)
1217{
0a7de745 1218 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
3e170ce0
A
1219
1220 if (map->holelistenabled) {
d9a64523 1221 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
3e170ce0
A
1222
1223 while (hole_entry != NULL) {
3e170ce0
A
1224 next_hole_entry = hole_entry->vme_next;
1225
1226 hole_entry->vme_next = NULL;
1227 hole_entry->vme_prev = NULL;
1228 zfree(vm_map_holes_zone, hole_entry);
1229
1230 if (next_hole_entry == head_entry) {
1231 hole_entry = NULL;
1232 } else {
1233 hole_entry = next_hole_entry;
1234 }
1235 }
1236
1237 map->holes_list = NULL;
1238 map->holelistenabled = FALSE;
1239
1240 map->first_free = vm_map_first_entry(map);
1241 SAVE_HINT_HOLE_WRITE(map, NULL);
1242 }
1243}
1244
1245boolean_t
0a7de745
A
1246vm_kernel_map_is_kernel(vm_map_t map)
1247{
1248 return map->pmap == kernel_pmap;
7ddcb079
A
1249}
1250
1c79356b
A
1251/*
1252 * vm_map_create:
1253 *
1254 * Creates and returns a new empty VM map with
1255 * the given physical map structure, and having
1256 * the given lower and upper address bounds.
1257 */
3e170ce0 1258
1c79356b
A
1259vm_map_t
1260vm_map_create(
0a7de745
A
1261 pmap_t pmap,
1262 vm_map_offset_t min,
1263 vm_map_offset_t max,
1264 boolean_t pageable)
d9a64523
A
1265{
1266 int options;
1267
1268 options = 0;
1269 if (pageable) {
1270 options |= VM_MAP_CREATE_PAGEABLE;
1271 }
1272 return vm_map_create_options(pmap, min, max, options);
1273}
1274
1275vm_map_t
1276vm_map_create_options(
0a7de745
A
1277 pmap_t pmap,
1278 vm_map_offset_t min,
d9a64523 1279 vm_map_offset_t max,
0a7de745 1280 int options)
1c79356b 1281{
0a7de745
A
1282 vm_map_t result;
1283 struct vm_map_links *hole_entry = NULL;
1c79356b 1284
d9a64523
A
1285 if (options & ~(VM_MAP_CREATE_ALL_OPTIONS)) {
1286 /* unknown option */
1287 return VM_MAP_NULL;
1288 }
1289
1c79356b 1290 result = (vm_map_t) zalloc(vm_map_zone);
0a7de745 1291 if (result == VM_MAP_NULL) {
1c79356b 1292 panic("vm_map_create");
0a7de745 1293 }
1c79356b
A
1294
1295 vm_map_first_entry(result) = vm_map_to_entry(result);
1296 vm_map_last_entry(result) = vm_map_to_entry(result);
1297 result->hdr.nentries = 0;
d9a64523
A
1298 if (options & VM_MAP_CREATE_PAGEABLE) {
1299 result->hdr.entries_pageable = TRUE;
1300 } else {
1301 result->hdr.entries_pageable = FALSE;
1302 }
1c79356b 1303
0a7de745 1304 vm_map_store_init( &(result->hdr));
5ba3f43e 1305
39236c6e
A
1306 result->hdr.page_shift = PAGE_SHIFT;
1307
1c79356b 1308 result->size = 0;
0a7de745 1309 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
2d21ac55 1310 result->user_wire_size = 0;
f427ee49 1311#if XNU_TARGET_OS_OSX
5ba3f43e 1312 result->vmmap_high_start = 0;
cb323159
A
1313#endif
1314 os_ref_init_count(&result->map_refcnt, &map_refgrp, 1);
1c79356b
A
1315 result->pmap = pmap;
1316 result->min_offset = min;
1317 result->max_offset = max;
1318 result->wiring_required = FALSE;
1319 result->no_zero_fill = FALSE;
316670eb 1320 result->mapped_in_other_pmaps = FALSE;
1c79356b 1321 result->wait_for_space = FALSE;
b0d623f7 1322 result->switch_protect = FALSE;
6d2010ae
A
1323 result->disable_vmentry_reuse = FALSE;
1324 result->map_disallow_data_exec = FALSE;
39037602 1325 result->is_nested_map = FALSE;
a39ff7e2 1326 result->map_disallow_new_exec = FALSE;
ea3f0419 1327 result->terminated = FALSE;
f427ee49 1328 result->cs_enforcement = FALSE;
6d2010ae 1329 result->highest_entry_end = 0;
1c79356b
A
1330 result->first_free = vm_map_to_entry(result);
1331 result->hint = vm_map_to_entry(result);
0a7de745 1332 result->jit_entry_exists = FALSE;
f427ee49
A
1333 result->is_alien = FALSE;
1334 result->reserved_regions = FALSE;
c3c9b80d 1335 result->single_jit = FALSE;
3e170ce0 1336
d9a64523
A
1337 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1338 if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1339 result->has_corpse_footprint = TRUE;
1340 result->holelistenabled = FALSE;
1341 result->vmmap_corpse_footprint = NULL;
1342 } else {
1343 result->has_corpse_footprint = FALSE;
1344 if (vm_map_supports_hole_optimization) {
1345 hole_entry = zalloc(vm_map_holes_zone);
3e170ce0 1346
d9a64523 1347 hole_entry->start = min;
5ba3f43e 1348#if defined(__arm__) || defined(__arm64__)
d9a64523 1349 hole_entry->end = result->max_offset;
5ba3f43e 1350#else
d9a64523 1351 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
5ba3f43e 1352#endif
d9a64523
A
1353 result->holes_list = result->hole_hint = hole_entry;
1354 hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1355 result->holelistenabled = TRUE;
1356 } else {
1357 result->holelistenabled = FALSE;
1358 }
3e170ce0
A
1359 }
1360
1c79356b 1361 vm_map_lock_init(result);
b0d623f7 1362 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
5ba3f43e 1363
0a7de745 1364 return result;
1c79356b
A
1365}
1366
f427ee49
A
1367vm_map_size_t
1368vm_map_adjusted_size(vm_map_t map)
1369{
1370 struct vm_reserved_region *regions = NULL;
1371 size_t num_regions = 0;
1372 mach_vm_size_t reserved_size = 0, map_size = 0;
1373
1374 if (map == NULL || (map->size == 0)) {
1375 return 0;
1376 }
1377
1378 map_size = map->size;
1379
1380 if (map->reserved_regions == FALSE || !vm_map_is_exotic(map) || map->terminated) {
1381 /*
1382 * No special reserved regions or not an exotic map or the task
1383 * is terminating and these special regions might have already
1384 * been deallocated.
1385 */
1386 return map_size;
1387 }
1388
1389 num_regions = ml_get_vm_reserved_regions(vm_map_is_64bit(map), &regions);
1390 assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
1391
1392 while (num_regions) {
1393 reserved_size += regions[--num_regions].vmrr_size;
1394 }
1395
1396 /*
1397 * There are a few places where the map is being switched out due to
1398 * 'termination' without that bit being set (e.g. exec and corpse purging).
1399 * In those cases, we could have the map's regions being deallocated on
1400 * a core while some accounting process is trying to get the map's size.
1401 * So this assert can't be enabled till all those places are uniform in
1402 * their use of the 'map->terminated' bit.
1403 *
1404 * assert(map_size >= reserved_size);
1405 */
1406
1407 return (map_size >= reserved_size) ? (map_size - reserved_size) : map_size;
1408}
1409
1c79356b
A
1410/*
1411 * vm_map_entry_create: [ internal use only ]
1412 *
1413 * Allocates a VM map entry for insertion in the
1414 * given map (or map copy). No fields are filled.
1415 */
0a7de745 1416#define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1c79356b 1417
0a7de745 1418#define vm_map_copy_entry_create(copy, map_locked) \
7ddcb079
A
1419 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1420unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1c79356b 1421
91447636 1422static vm_map_entry_t
1c79356b 1423_vm_map_entry_create(
0a7de745 1424 struct vm_map_header *map_header, boolean_t __unused map_locked)
1c79356b 1425{
0a7de745
A
1426 zone_t zone;
1427 vm_map_entry_t entry;
1c79356b 1428
7ddcb079
A
1429 zone = vm_map_entry_zone;
1430
1431 assert(map_header->entries_pageable ? !map_locked : TRUE);
1432
1433 if (map_header->entries_pageable) {
1434 entry = (vm_map_entry_t) zalloc(zone);
0a7de745 1435 } else {
f427ee49 1436 entry = (vm_map_entry_t) zalloc_noblock(zone);
7ddcb079
A
1437
1438 if (entry == VM_MAP_ENTRY_NULL) {
1439 zone = vm_map_entry_reserved_zone;
1440 entry = (vm_map_entry_t) zalloc(zone);
1441 OSAddAtomic(1, &reserved_zalloc_count);
0a7de745 1442 } else {
7ddcb079 1443 OSAddAtomic(1, &nonreserved_zalloc_count);
0a7de745 1444 }
7ddcb079 1445 }
1c79356b 1446
0a7de745 1447 if (entry == VM_MAP_ENTRY_NULL) {
1c79356b 1448 panic("vm_map_entry_create");
0a7de745 1449 }
f427ee49 1450 *entry = vm_map_entry_template;
7ddcb079
A
1451 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1452
0a7de745
A
1453 vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1454#if MAP_ENTRY_CREATION_DEBUG
39236c6e 1455 entry->vme_creation_maphdr = map_header;
39037602 1456 backtrace(&entry->vme_creation_bt[0],
cb323159 1457 (sizeof(entry->vme_creation_bt) / sizeof(uintptr_t)), NULL);
316670eb 1458#endif
0a7de745 1459 return entry;
1c79356b
A
1460}
1461
1462/*
1463 * vm_map_entry_dispose: [ internal use only ]
1464 *
1465 * Inverse of vm_map_entry_create.
2d21ac55 1466 *
0a7de745 1467 * write map lock held so no need to
2d21ac55 1468 * do anything special to insure correctness
0a7de745 1469 * of the stores
1c79356b 1470 */
0a7de745 1471#define vm_map_entry_dispose(map, entry) \
6d2010ae 1472 _vm_map_entry_dispose(&(map)->hdr, (entry))
1c79356b 1473
f427ee49 1474#define vm_map_copy_entry_dispose(copy, entry) \
1c79356b
A
1475 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1476
91447636 1477static void
1c79356b 1478_vm_map_entry_dispose(
0a7de745
A
1479 struct vm_map_header *map_header,
1480 vm_map_entry_t entry)
1c79356b 1481{
0a7de745 1482 zone_t zone;
1c79356b 1483
0a7de745 1484 if (map_header->entries_pageable || !(entry->from_reserved_zone)) {
2d21ac55 1485 zone = vm_map_entry_zone;
0a7de745 1486 } else {
7ddcb079 1487 zone = vm_map_entry_reserved_zone;
0a7de745 1488 }
7ddcb079
A
1489
1490 if (!map_header->entries_pageable) {
0a7de745 1491 if (zone == vm_map_entry_zone) {
7ddcb079 1492 OSAddAtomic(-1, &nonreserved_zalloc_count);
0a7de745 1493 } else {
7ddcb079 1494 OSAddAtomic(-1, &reserved_zalloc_count);
0a7de745 1495 }
7ddcb079 1496 }
1c79356b 1497
91447636 1498 zfree(zone, entry);
1c79356b
A
1499}
1500
91447636 1501#if MACH_ASSERT
91447636 1502static boolean_t first_free_check = FALSE;
6d2010ae 1503boolean_t
1c79356b 1504first_free_is_valid(
0a7de745 1505 vm_map_t map)
1c79356b 1506{
0a7de745 1507 if (!first_free_check) {
1c79356b 1508 return TRUE;
0a7de745 1509 }
5ba3f43e 1510
0a7de745 1511 return first_free_is_valid_store( map );
1c79356b 1512}
91447636 1513#endif /* MACH_ASSERT */
1c79356b 1514
1c79356b 1515
0a7de745 1516#define vm_map_copy_entry_link(copy, after_where, entry) \
6d2010ae 1517 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1c79356b 1518
0a7de745 1519#define vm_map_copy_entry_unlink(copy, entry) \
6d2010ae 1520 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1c79356b 1521
1c79356b
A
1522/*
1523 * vm_map_destroy:
1524 *
1525 * Actually destroy a map.
1526 */
1527void
1528vm_map_destroy(
0a7de745
A
1529 vm_map_t map,
1530 int flags)
5ba3f43e 1531{
1c79356b 1532 vm_map_lock(map);
2d21ac55 1533
3e170ce0
A
1534 /* final cleanup: no need to unnest shared region */
1535 flags |= VM_MAP_REMOVE_NO_UNNESTING;
5ba3f43e
A
1536 /* final cleanup: ok to remove immutable mappings */
1537 flags |= VM_MAP_REMOVE_IMMUTABLE;
d9a64523
A
1538 /* final cleanup: allow gaps in range */
1539 flags |= VM_MAP_REMOVE_GAPS_OK;
3e170ce0 1540
2d21ac55
A
1541 /* clean up regular map entries */
1542 (void) vm_map_delete(map, map->min_offset, map->max_offset,
0a7de745 1543 flags, VM_MAP_NULL);
f427ee49
A
1544 /* clean up leftover special mappings (commpage, GPU carveout, etc...) */
1545#if !defined(__arm__)
2d21ac55 1546 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
0a7de745 1547 flags, VM_MAP_NULL);
f427ee49 1548#endif /* !__arm__ */
6d2010ae 1549
3e170ce0 1550 vm_map_disable_hole_optimization(map);
d9a64523
A
1551 vm_map_corpse_footprint_destroy(map);
1552
1c79356b
A
1553 vm_map_unlock(map);
1554
2d21ac55 1555 assert(map->hdr.nentries == 0);
5ba3f43e 1556
0a7de745 1557 if (map->pmap) {
55e303ae 1558 pmap_destroy(map->pmap);
0a7de745 1559 }
1c79356b 1560
39037602
A
1561 if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1562 /*
1563 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1564 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1565 * structure or kalloc'ed via lck_mtx_init.
1566 * An example is s_lock_ext within struct _vm_map.
1567 *
1568 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1569 * can add another tag to detect embedded vs alloc'ed indirect external
1570 * mutexes but that'll be additional checks in the lock path and require
1571 * updating dependencies for the old vs new tag.
1572 *
1573 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1574 * just when lock debugging is ON, we choose to forego explicitly destroying
1575 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1576 * count on vm_map_lck_grp, which has no serious side-effect.
1577 */
1578 } else {
1579 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1580 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1581 }
1582
91447636 1583 zfree(vm_map_zone, map);
1c79356b
A
1584}
1585
5ba3f43e
A
1586/*
1587 * Returns pid of the task with the largest number of VM map entries.
1588 * Used in the zone-map-exhaustion jetsam path.
1589 */
1590pid_t
1591find_largest_process_vm_map_entries(void)
1592{
1593 pid_t victim_pid = -1;
1594 int max_vm_map_entries = 0;
1595 task_t task = TASK_NULL;
1596 queue_head_t *task_list = &tasks;
1597
1598 lck_mtx_lock(&tasks_threads_lock);
1599 queue_iterate(task_list, task, task_t, tasks) {
0a7de745 1600 if (task == kernel_task || !task->active) {
5ba3f43e 1601 continue;
0a7de745 1602 }
5ba3f43e
A
1603
1604 vm_map_t task_map = task->map;
1605 if (task_map != VM_MAP_NULL) {
1606 int task_vm_map_entries = task_map->hdr.nentries;
1607 if (task_vm_map_entries > max_vm_map_entries) {
1608 max_vm_map_entries = task_vm_map_entries;
1609 victim_pid = pid_from_task(task);
1610 }
1611 }
1612 }
1613 lck_mtx_unlock(&tasks_threads_lock);
1614
1615 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1616 return victim_pid;
1617}
1618
1c79356b 1619
1c79356b
A
1620/*
1621 * vm_map_lookup_entry: [ internal use only ]
1622 *
5ba3f43e
A
1623 * Calls into the vm map store layer to find the map
1624 * entry containing (or immediately preceding) the
6d2010ae 1625 * specified address in the given map; the entry is returned
1c79356b
A
1626 * in the "entry" parameter. The boolean
1627 * result indicates whether the address is
1628 * actually contained in the map.
1629 */
1630boolean_t
1631vm_map_lookup_entry(
0a7de745
A
1632 vm_map_t map,
1633 vm_map_offset_t address,
1634 vm_map_entry_t *entry) /* OUT */
1c79356b 1635{
0a7de745 1636 return vm_map_store_lookup_entry( map, address, entry );
1c79356b
A
1637}
1638
1639/*
1640 * Routine: vm_map_find_space
1641 * Purpose:
1642 * Allocate a range in the specified virtual address map,
1643 * returning the entry allocated for that range.
1644 * Used by kmem_alloc, etc.
1645 *
1646 * The map must be NOT be locked. It will be returned locked
1647 * on KERN_SUCCESS, unlocked on failure.
1648 *
1649 * If an entry is allocated, the object/offset fields
1650 * are initialized to zero.
f427ee49
A
1651 *
1652 * If VM_MAP_FIND_LAST_FREE flag is set, allocate from end of map. This
1653 * is currently only used for allocating memory for zones backing
1654 * one of the kalloc heaps.(rdar://65832263)
1c79356b
A
1655 */
1656kern_return_t
1657vm_map_find_space(
f427ee49 1658 vm_map_t map,
0a7de745
A
1659 vm_map_offset_t *address, /* OUT */
1660 vm_map_size_t size,
1661 vm_map_offset_t mask,
f427ee49 1662 int flags,
0a7de745
A
1663 vm_map_kernel_flags_t vmk_flags,
1664 vm_tag_t tag,
1665 vm_map_entry_t *o_entry) /* OUT */
1c79356b 1666{
f427ee49
A
1667 vm_map_entry_t entry, new_entry, hole_entry;
1668 vm_map_offset_t start;
1669 vm_map_offset_t end;
91447636
A
1670
1671 if (size == 0) {
1672 *address = 0;
1673 return KERN_INVALID_ARGUMENT;
1674 }
1c79356b 1675
7ddcb079 1676 new_entry = vm_map_entry_create(map, FALSE);
f427ee49 1677 vm_map_lock(map);
1c79356b 1678
f427ee49
A
1679 if (flags & VM_MAP_FIND_LAST_FREE) {
1680 assert(!map->disable_vmentry_reuse);
1681 /* TODO: Make backward lookup generic and support guard pages */
1682 assert(!vmk_flags.vmkf_guard_after && !vmk_flags.vmkf_guard_before);
1683 assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
1c79356b 1684
f427ee49
A
1685 /* Allocate space from end of map */
1686 vm_map_store_find_last_free(map, &entry);
1c79356b 1687
f427ee49
A
1688 if (!entry) {
1689 goto noSpace;
1690 }
3e170ce0 1691
f427ee49
A
1692 if (entry == vm_map_to_entry(map)) {
1693 end = map->max_offset;
1694 } else {
1695 end = entry->vme_start;
1696 }
1697
1698 while (TRUE) {
1699 vm_map_entry_t prev;
1700
1701 start = end - size;
1702
1703 if ((start < map->min_offset) || end < start) {
1704 goto noSpace;
3e170ce0
A
1705 }
1706
f427ee49
A
1707 prev = entry->vme_prev;
1708 entry = prev;
1709
1710 if (prev == vm_map_to_entry(map)) {
1711 break;
0a7de745 1712 }
1c79356b 1713
f427ee49
A
1714 if (prev->vme_end <= start) {
1715 break;
1716 }
1c79356b 1717
f427ee49
A
1718 /*
1719 * Didn't fit -- move to the next entry.
1720 */
1721
1722 end = entry->vme_start;
1723 }
1724 } else {
1725 if (vmk_flags.vmkf_guard_after) {
1726 /* account for the back guard page in the size */
1727 size += VM_MAP_PAGE_SIZE(map);
1728 }
1c79356b
A
1729
1730 /*
f427ee49
A
1731 * Look for the first possible address; if there's already
1732 * something at this address, we have to start after it.
1c79356b
A
1733 */
1734
f427ee49
A
1735 if (map->disable_vmentry_reuse == TRUE) {
1736 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1737 } else {
1738 if (map->holelistenabled) {
1739 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
5ba3f43e 1740
f427ee49
A
1741 if (hole_entry == NULL) {
1742 /*
1743 * No more space in the map?
1744 */
1745 goto noSpace;
1746 }
1c79356b 1747
f427ee49
A
1748 entry = hole_entry;
1749 start = entry->vme_start;
1750 } else {
1751 assert(first_free_is_valid(map));
1752 if ((entry = map->first_free) == vm_map_to_entry(map)) {
1753 start = map->min_offset;
1754 } else {
1755 start = entry->vme_end;
1756 }
1757 }
1c79356b
A
1758 }
1759
f427ee49
A
1760 /*
1761 * In any case, the "entry" always precedes
1762 * the proposed new region throughout the loop:
1763 */
1764
1765 while (TRUE) {
1766 vm_map_entry_t next;
1c79356b 1767
3e170ce0 1768 /*
f427ee49
A
1769 * Find the end of the proposed new region.
1770 * Be sure we didn't go beyond the end, or
1771 * wrap around the address.
3e170ce0 1772 */
1c79356b 1773
f427ee49
A
1774 if (vmk_flags.vmkf_guard_before) {
1775 /* reserve space for the front guard page */
1776 start += VM_MAP_PAGE_SIZE(map);
0a7de745 1777 }
f427ee49 1778 end = ((start + mask) & ~mask);
3e170ce0 1779
f427ee49
A
1780 if (end < start) {
1781 goto noSpace;
0a7de745 1782 }
f427ee49
A
1783 start = end;
1784 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
1785 end += size;
1786 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1c79356b 1787
f427ee49
A
1788 if ((end > map->max_offset) || (end < start)) {
1789 goto noSpace;
1790 }
1c79356b 1791
f427ee49 1792 next = entry->vme_next;
3e170ce0 1793
f427ee49
A
1794 if (map->holelistenabled) {
1795 if (entry->vme_end >= end) {
1796 break;
1797 }
1798 } else {
3e170ce0 1799 /*
f427ee49
A
1800 * If there are no more entries, we must win.
1801 *
1802 * OR
1803 *
1804 * If there is another entry, it must be
1805 * after the end of the potential new region.
3e170ce0 1806 */
f427ee49
A
1807
1808 if (next == vm_map_to_entry(map)) {
1809 break;
1810 }
1811
1812 if (next->vme_start >= end) {
1813 break;
1814 }
3e170ce0 1815 }
f427ee49
A
1816
1817 /*
1818 * Didn't fit -- move to the next entry.
1819 */
1820
1821 entry = next;
1822
1823 if (map->holelistenabled) {
1824 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
1825 /*
1826 * Wrapped around
1827 */
1828 goto noSpace;
1829 }
1830 start = entry->vme_start;
1831 } else {
1832 start = entry->vme_end;
1833 }
1834 }
1835
1836 if (vmk_flags.vmkf_guard_before) {
1837 /* go back for the front guard page */
1838 start -= VM_MAP_PAGE_SIZE(map);
3e170ce0
A
1839 }
1840 }
1841
1842 if (map->holelistenabled) {
1843 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1844 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1845 }
1c79356b
A
1846 }
1847
1848 /*
1849 * At this point,
1850 * "start" and "end" should define the endpoints of the
1851 * available new range, and
1852 * "entry" should refer to the region before the new
1853 * range, and
1854 *
1855 * the map should be locked.
1856 */
1857
1858 *address = start;
1859
e2d2fc5c 1860 assert(start < end);
1c79356b
A
1861 new_entry->vme_start = start;
1862 new_entry->vme_end = end;
1863 assert(page_aligned(new_entry->vme_start));
1864 assert(page_aligned(new_entry->vme_end));
39236c6e 1865 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
0a7de745 1866 VM_MAP_PAGE_MASK(map)));
39236c6e 1867 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
0a7de745 1868 VM_MAP_PAGE_MASK(map)));
1c79356b
A
1869
1870 new_entry->is_shared = FALSE;
1871 new_entry->is_sub_map = FALSE;
fe8ab488 1872 new_entry->use_pmap = TRUE;
3e170ce0
A
1873 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1874 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1c79356b
A
1875
1876 new_entry->needs_copy = FALSE;
1877
1878 new_entry->inheritance = VM_INHERIT_DEFAULT;
1879 new_entry->protection = VM_PROT_DEFAULT;
1880 new_entry->max_protection = VM_PROT_ALL;
1881 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1882 new_entry->wired_count = 0;
1883 new_entry->user_wired_count = 0;
1884
1885 new_entry->in_transition = FALSE;
1886 new_entry->needs_wakeup = FALSE;
2d21ac55 1887 new_entry->no_cache = FALSE;
b0d623f7 1888 new_entry->permanent = FALSE;
39236c6e
A
1889 new_entry->superpage_size = FALSE;
1890 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1891 new_entry->map_aligned = TRUE;
1892 } else {
1893 new_entry->map_aligned = FALSE;
1894 }
2d21ac55 1895
3e170ce0 1896 new_entry->used_for_jit = FALSE;
d9a64523 1897 new_entry->pmap_cs_associated = FALSE;
b0d623f7 1898 new_entry->zero_wired_pages = FALSE;
fe8ab488 1899 new_entry->iokit_acct = FALSE;
3e170ce0
A
1900 new_entry->vme_resilient_codesign = FALSE;
1901 new_entry->vme_resilient_media = FALSE;
0a7de745 1902 if (vmk_flags.vmkf_atomic_entry) {
39037602 1903 new_entry->vme_atomic = TRUE;
0a7de745 1904 } else {
39037602 1905 new_entry->vme_atomic = FALSE;
0a7de745 1906 }
1c79356b 1907
5ba3f43e 1908 VME_ALIAS_SET(new_entry, tag);
0c530ab8 1909
1c79356b
A
1910 /*
1911 * Insert the new entry into the list
1912 */
1913
d9a64523 1914 vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
1c79356b
A
1915
1916 map->size += size;
1917
1918 /*
1919 * Update the lookup hint
1920 */
0c530ab8 1921 SAVE_HINT_MAP_WRITE(map, new_entry);
1c79356b
A
1922
1923 *o_entry = new_entry;
0a7de745 1924 return KERN_SUCCESS;
f427ee49
A
1925
1926noSpace:
1927
1928 vm_map_entry_dispose(map, new_entry);
1929 vm_map_unlock(map);
1930 return KERN_NO_SPACE;
1c79356b
A
1931}
1932
1933int vm_map_pmap_enter_print = FALSE;
1934int vm_map_pmap_enter_enable = FALSE;
1935
1936/*
91447636 1937 * Routine: vm_map_pmap_enter [internal only]
1c79356b
A
1938 *
1939 * Description:
1940 * Force pages from the specified object to be entered into
1941 * the pmap at the specified address if they are present.
1942 * As soon as a page not found in the object the scan ends.
1943 *
1944 * Returns:
5ba3f43e 1945 * Nothing.
1c79356b
A
1946 *
1947 * In/out conditions:
1948 * The source map should not be locked on entry.
1949 */
fe8ab488 1950__unused static void
1c79356b 1951vm_map_pmap_enter(
0a7de745
A
1952 vm_map_t map,
1953 vm_map_offset_t addr,
1954 vm_map_offset_t end_addr,
1955 vm_object_t object,
1956 vm_object_offset_t offset,
1957 vm_prot_t protection)
1c79356b 1958{
0a7de745
A
1959 int type_of_fault;
1960 kern_return_t kr;
d9a64523 1961 struct vm_object_fault_info fault_info = {};
0b4e3aa0 1962
0a7de745 1963 if (map->pmap == 0) {
55e303ae 1964 return;
0a7de745 1965 }
55e303ae 1966
f427ee49
A
1967 assert(VM_MAP_PAGE_SHIFT(map) == PAGE_SHIFT);
1968
1c79356b 1969 while (addr < end_addr) {
0a7de745 1970 vm_page_t m;
1c79356b 1971
fe8ab488
A
1972
1973 /*
0a7de745 1974 * TODO:
fe8ab488
A
1975 * From vm_map_enter(), we come into this function without the map
1976 * lock held or the object lock held.
1977 * We haven't taken a reference on the object either.
1978 * We should do a proper lookup on the map to make sure
1979 * that things are sane before we go locking objects that
1980 * could have been deallocated from under us.
1981 */
1982
1c79356b 1983 vm_object_lock(object);
1c79356b
A
1984
1985 m = vm_page_lookup(object, offset);
5ba3f43e 1986
d9a64523 1987 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
0a7de745 1988 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_absent))) {
1c79356b
A
1989 vm_object_unlock(object);
1990 return;
1991 }
1992
1c79356b
A
1993 if (vm_map_pmap_enter_print) {
1994 printf("vm_map_pmap_enter:");
2d21ac55 1995 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
0a7de745 1996 map, (unsigned long long)addr, object, (unsigned long long)offset);
1c79356b 1997 }
2d21ac55 1998 type_of_fault = DBG_CACHE_HIT_FAULT;
d9a64523 1999 kr = vm_fault_enter(m, map->pmap,
f427ee49
A
2000 addr,
2001 PAGE_SIZE, 0,
2002 protection, protection,
0a7de745
A
2003 VM_PAGE_WIRED(m),
2004 FALSE, /* change_wiring */
2005 VM_KERN_MEMORY_NONE, /* tag - not wiring */
2006 &fault_info,
2007 NULL, /* need_retry */
2008 &type_of_fault);
1c79356b 2009
1c79356b
A
2010 vm_object_unlock(object);
2011
2012 offset += PAGE_SIZE_64;
2013 addr += PAGE_SIZE;
2014 }
2015}
2016
91447636 2017boolean_t vm_map_pmap_is_empty(
0a7de745
A
2018 vm_map_t map,
2019 vm_map_offset_t start,
91447636 2020 vm_map_offset_t end);
0a7de745
A
2021boolean_t
2022vm_map_pmap_is_empty(
2023 vm_map_t map,
2024 vm_map_offset_t start,
2025 vm_map_offset_t end)
91447636 2026{
2d21ac55
A
2027#ifdef MACHINE_PMAP_IS_EMPTY
2028 return pmap_is_empty(map->pmap, start, end);
0a7de745
A
2029#else /* MACHINE_PMAP_IS_EMPTY */
2030 vm_map_offset_t offset;
2031 ppnum_t phys_page;
91447636
A
2032
2033 if (map->pmap == NULL) {
2034 return TRUE;
2035 }
2d21ac55 2036
91447636 2037 for (offset = start;
0a7de745
A
2038 offset < end;
2039 offset += PAGE_SIZE) {
91447636
A
2040 phys_page = pmap_find_phys(map->pmap, offset);
2041 if (phys_page) {
2042 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
0a7de745
A
2043 "page %d at 0x%llx\n",
2044 map, (long long)start, (long long)end,
2045 phys_page, (long long)offset);
91447636
A
2046 return FALSE;
2047 }
2048 }
2049 return TRUE;
0a7de745 2050#endif /* MACHINE_PMAP_IS_EMPTY */
91447636
A
2051}
2052
0a7de745 2053#define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
316670eb
A
2054kern_return_t
2055vm_map_random_address_for_size(
0a7de745
A
2056 vm_map_t map,
2057 vm_map_offset_t *address,
2058 vm_map_size_t size)
316670eb 2059{
0a7de745
A
2060 kern_return_t kr = KERN_SUCCESS;
2061 int tries = 0;
2062 vm_map_offset_t random_addr = 0;
316670eb
A
2063 vm_map_offset_t hole_end;
2064
0a7de745
A
2065 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
2066 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
2067 vm_map_size_t vm_hole_size = 0;
2068 vm_map_size_t addr_space_size;
316670eb
A
2069
2070 addr_space_size = vm_map_max(map) - vm_map_min(map);
2071
f427ee49 2072 assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
316670eb
A
2073
2074 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
c3c9b80d
A
2075 if (startup_phase < STARTUP_SUB_ZALLOC) {
2076 random_addr = (vm_map_offset_t)early_random();
2077 } else {
2078 random_addr = (vm_map_offset_t)random();
2079 }
2080 random_addr <<= VM_MAP_PAGE_SHIFT(map);
39236c6e 2081 random_addr = vm_map_trunc_page(
0a7de745 2082 vm_map_min(map) + (random_addr % addr_space_size),
39236c6e 2083 VM_MAP_PAGE_MASK(map));
316670eb
A
2084
2085 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2086 if (prev_entry == vm_map_to_entry(map)) {
2087 next_entry = vm_map_first_entry(map);
2088 } else {
2089 next_entry = prev_entry->vme_next;
2090 }
2091 if (next_entry == vm_map_to_entry(map)) {
2092 hole_end = vm_map_max(map);
2093 } else {
2094 hole_end = next_entry->vme_start;
2095 }
2096 vm_hole_size = hole_end - random_addr;
2097 if (vm_hole_size >= size) {
2098 *address = random_addr;
2099 break;
2100 }
2101 }
2102 tries++;
2103 }
2104
2105 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2106 kr = KERN_NO_SPACE;
2107 }
2108 return kr;
2109}
2110
d9a64523
A
2111static boolean_t
2112vm_memory_malloc_no_cow(
2113 int alias)
2114{
2115 uint64_t alias_mask;
2116
cb323159
A
2117 if (alias > 63) {
2118 return FALSE;
2119 }
2120
d9a64523
A
2121 alias_mask = 1ULL << alias;
2122 if (alias_mask & vm_memory_malloc_no_cow_mask) {
2123 return TRUE;
2124 }
2125 return FALSE;
2126}
2127
1c79356b
A
2128/*
2129 * Routine: vm_map_enter
2130 *
2131 * Description:
2132 * Allocate a range in the specified virtual address map.
2133 * The resulting range will refer to memory defined by
2134 * the given memory object and offset into that object.
2135 *
2136 * Arguments are as defined in the vm_map call.
2137 */
91447636
A
2138static unsigned int vm_map_enter_restore_successes = 0;
2139static unsigned int vm_map_enter_restore_failures = 0;
1c79356b
A
2140kern_return_t
2141vm_map_enter(
0a7de745
A
2142 vm_map_t map,
2143 vm_map_offset_t *address, /* IN/OUT */
2144 vm_map_size_t size,
2145 vm_map_offset_t mask,
2146 int flags,
2147 vm_map_kernel_flags_t vmk_flags,
2148 vm_tag_t alias,
2149 vm_object_t object,
2150 vm_object_offset_t offset,
2151 boolean_t needs_copy,
2152 vm_prot_t cur_protection,
2153 vm_prot_t max_protection,
2154 vm_inherit_t inheritance)
1c79356b 2155{
0a7de745
A
2156 vm_map_entry_t entry, new_entry;
2157 vm_map_offset_t start, tmp_start, tmp_offset;
2158 vm_map_offset_t end, tmp_end;
2159 vm_map_offset_t tmp2_start, tmp2_end;
2160 vm_map_offset_t desired_empty_end;
2161 vm_map_offset_t step;
2162 kern_return_t result = KERN_SUCCESS;
2163 vm_map_t zap_old_map = VM_MAP_NULL;
2164 vm_map_t zap_new_map = VM_MAP_NULL;
2165 boolean_t map_locked = FALSE;
2166 boolean_t pmap_empty = TRUE;
2167 boolean_t new_mapping_established = FALSE;
2168 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2169 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2170 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2171 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2172 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2173 boolean_t is_submap = vmk_flags.vmkf_submap;
c3c9b80d 2174 boolean_t permanent = (((flags & VM_FLAGS_PERMANENT) != 0) || vmk_flags.vmkf_permanent);
cb323159 2175 boolean_t no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
0a7de745
A
2176 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
2177 boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
f427ee49 2178 boolean_t translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
0a7de745
A
2179 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2180 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2181 boolean_t random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2182 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2183 vm_tag_t user_alias;
2184 vm_map_offset_t effective_min_offset, effective_max_offset;
2185 kern_return_t kr;
2186 boolean_t clear_map_aligned = FALSE;
2187 vm_map_entry_t hole_entry;
2188 vm_map_size_t chunk_size = 0;
593a1d5f 2189
5ba3f43e
A
2190 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2191
a39ff7e2
A
2192 if (flags & VM_FLAGS_4GB_CHUNK) {
2193#if defined(__LP64__)
2194 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2195#else /* __LP64__ */
2196 chunk_size = ANON_CHUNK_SIZE;
2197#endif /* __LP64__ */
2198 } else {
2199 chunk_size = ANON_CHUNK_SIZE;
2200 }
2201
b0d623f7
A
2202 if (superpage_size) {
2203 switch (superpage_size) {
2204 /*
2205 * Note that the current implementation only supports
2206 * a single size for superpages, SUPERPAGE_SIZE, per
2207 * architecture. As soon as more sizes are supposed
2208 * to be supported, SUPERPAGE_SIZE has to be replaced
2209 * with a lookup of the size depending on superpage_size.
2210 */
2211#ifdef __x86_64__
0a7de745
A
2212 case SUPERPAGE_SIZE_ANY:
2213 /* handle it like 2 MB and round up to page size */
2214 size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
f427ee49 2215 OS_FALLTHROUGH;
0a7de745
A
2216 case SUPERPAGE_SIZE_2MB:
2217 break;
b0d623f7 2218#endif
0a7de745
A
2219 default:
2220 return KERN_INVALID_ARGUMENT;
b0d623f7 2221 }
0a7de745
A
2222 mask = SUPERPAGE_SIZE - 1;
2223 if (size & (SUPERPAGE_SIZE - 1)) {
b0d623f7 2224 return KERN_INVALID_ARGUMENT;
0a7de745
A
2225 }
2226 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
b0d623f7
A
2227 }
2228
6d2010ae 2229
d9a64523
A
2230 if ((cur_protection & VM_PROT_WRITE) &&
2231 (cur_protection & VM_PROT_EXECUTE) &&
f427ee49
A
2232#if XNU_TARGET_OS_OSX
2233 map->pmap != kernel_pmap &&
d9a64523 2234 (cs_process_global_enforcement() ||
0a7de745
A
2235 (vmk_flags.vmkf_cs_enforcement_override
2236 ? vmk_flags.vmkf_cs_enforcement
f427ee49
A
2237 : (vm_map_cs_enforcement(map)
2238#if __arm64__
2239 || !VM_MAP_IS_EXOTIC(map)
2240#endif /* __arm64__ */
2241 ))) &&
2242#endif /* XNU_TARGET_OS_OSX */
f427ee49
A
2243 (VM_MAP_POLICY_WX_FAIL(map) ||
2244 VM_MAP_POLICY_WX_STRIP_X(map)) &&
d9a64523 2245 !entry_for_jit) {
f427ee49
A
2246 boolean_t vm_protect_wx_fail = VM_MAP_POLICY_WX_FAIL(map);
2247
d9a64523 2248 DTRACE_VM3(cs_wx,
0a7de745
A
2249 uint64_t, 0,
2250 uint64_t, 0,
2251 vm_prot_t, cur_protection);
f427ee49 2252 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. %s\n",
0a7de745
A
2253 proc_selfpid(),
2254 (current_task()->bsd_info
2255 ? proc_name_address(current_task()->bsd_info)
2256 : "?"),
f427ee49
A
2257 __FUNCTION__,
2258 (vm_protect_wx_fail ? "failing" : "turning off execute"));
d9a64523 2259 cur_protection &= ~VM_PROT_EXECUTE;
f427ee49
A
2260 if (vm_protect_wx_fail) {
2261 return KERN_PROTECTION_FAILURE;
2262 }
5ba3f43e 2263 }
1c79356b 2264
a39ff7e2
A
2265 /*
2266 * If the task has requested executable lockdown,
2267 * deny any new executable mapping.
2268 */
2269 if (map->map_disallow_new_exec == TRUE) {
2270 if (cur_protection & VM_PROT_EXECUTE) {
2271 return KERN_PROTECTION_FAILURE;
2272 }
2273 }
2274
cb323159
A
2275 if (resilient_codesign) {
2276 assert(!is_submap);
f427ee49
A
2277 int reject_prot = (needs_copy ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
2278 if ((cur_protection | max_protection) & reject_prot) {
3e170ce0
A
2279 return KERN_PROTECTION_FAILURE;
2280 }
2281 }
2282
cb323159
A
2283 if (resilient_media) {
2284 assert(!is_submap);
2285// assert(!needs_copy);
2286 if (object != VM_OBJECT_NULL &&
2287 !object->internal) {
2288 /*
2289 * This mapping is directly backed by an external
2290 * memory manager (e.g. a vnode pager for a file):
2291 * we would not have any safe place to inject
2292 * a zero-filled page if an actual page is not
2293 * available, without possibly impacting the actual
2294 * contents of the mapped object (e.g. the file),
2295 * so we can't provide any media resiliency here.
2296 */
2297 return KERN_INVALID_ARGUMENT;
2298 }
2299 }
2300
2d21ac55
A
2301 if (is_submap) {
2302 if (purgable) {
2303 /* submaps can not be purgeable */
2304 return KERN_INVALID_ARGUMENT;
2305 }
2306 if (object == VM_OBJECT_NULL) {
2307 /* submaps can not be created lazily */
2308 return KERN_INVALID_ARGUMENT;
2309 }
2310 }
5ba3f43e 2311 if (vmk_flags.vmkf_already) {
2d21ac55
A
2312 /*
2313 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2314 * is already present. For it to be meaningul, the requested
2315 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2316 * we shouldn't try and remove what was mapped there first
2317 * (!VM_FLAGS_OVERWRITE).
2318 */
2319 if ((flags & VM_FLAGS_ANYWHERE) ||
2320 (flags & VM_FLAGS_OVERWRITE)) {
2321 return KERN_INVALID_ARGUMENT;
2322 }
2323 }
2324
6d2010ae 2325 effective_min_offset = map->min_offset;
b0d623f7 2326
5ba3f43e 2327 if (vmk_flags.vmkf_beyond_max) {
2d21ac55 2328 /*
b0d623f7 2329 * Allow an insertion beyond the map's max offset.
2d21ac55 2330 */
f427ee49 2331#if !defined(__arm__)
0a7de745 2332 if (vm_map_is_64bit(map)) {
2d21ac55 2333 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
0a7de745
A
2334 } else
2335#endif /* __arm__ */
2336 effective_max_offset = 0x00000000FFFFF000ULL;
2d21ac55 2337 } else {
f427ee49 2338#if XNU_TARGET_OS_OSX
cb323159
A
2339 if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2340 effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2341 } else {
2342 effective_max_offset = map->max_offset;
2343 }
f427ee49 2344#else /* XNU_TARGET_OS_OSX */
2d21ac55 2345 effective_max_offset = map->max_offset;
f427ee49 2346#endif /* XNU_TARGET_OS_OSX */
2d21ac55
A
2347 }
2348
2349 if (size == 0 ||
f427ee49 2350 (offset & MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK_64)) != 0) {
91447636
A
2351 *address = 0;
2352 return KERN_INVALID_ARGUMENT;
2353 }
2354
3e170ce0
A
2355 if (map->pmap == kernel_pmap) {
2356 user_alias = VM_KERN_MEMORY_NONE;
2357 } else {
2358 user_alias = alias;
2359 }
2d21ac55 2360
0a7de745
A
2361 if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2362 chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2363 }
2364
2365#define RETURN(value) { result = value; goto BailOut; }
1c79356b 2366
f427ee49
A
2367 assertf(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK), "0x%llx", (uint64_t)*address);
2368 assertf(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK), "0x%llx", (uint64_t)size);
2369 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
2370 assertf(page_aligned(*address), "0x%llx", (uint64_t)*address);
2371 assertf(page_aligned(size), "0x%llx", (uint64_t)size);
2372 }
91447636 2373
f427ee49
A
2374 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2375 !VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
39236c6e
A
2376 /*
2377 * In most cases, the caller rounds the size up to the
2378 * map's page size.
2379 * If we get a size that is explicitly not map-aligned here,
2380 * we'll have to respect the caller's wish and mark the
2381 * mapping as "not map-aligned" to avoid tripping the
2382 * map alignment checks later.
2383 */
2384 clear_map_aligned = TRUE;
2385 }
5ba3f43e 2386 if (!anywhere &&
f427ee49 2387 VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
fe8ab488
A
2388 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2389 /*
2390 * We've been asked to map at a fixed address and that
2391 * address is not aligned to the map's specific alignment.
2392 * The caller should know what it's doing (i.e. most likely
2393 * mapping some fragmented copy map, transferring memory from
2394 * a VM map with a different alignment), so clear map_aligned
2395 * for this new VM map entry and proceed.
2396 */
2397 clear_map_aligned = TRUE;
2398 }
39236c6e 2399
91447636
A
2400 /*
2401 * Only zero-fill objects are allowed to be purgable.
2402 * LP64todo - limit purgable objects to 32-bits for now
2403 */
2404 if (purgable &&
2405 (offset != 0 ||
0a7de745
A
2406 (object != VM_OBJECT_NULL &&
2407 (object->vo_size != size ||
2408 object->purgable == VM_PURGABLE_DENY))
2409 || size > ANON_MAX_SIZE)) { /* LP64todo: remove when dp capable */
91447636 2410 return KERN_INVALID_ARGUMENT;
0a7de745 2411 }
91447636
A
2412
2413 if (!anywhere && overwrite) {
2414 /*
2415 * Create a temporary VM map to hold the old mappings in the
2416 * affected area while we create the new one.
2417 * This avoids releasing the VM map lock in
2418 * vm_map_entry_delete() and allows atomicity
2419 * when we want to replace some mappings with a new one.
2420 * It also allows us to restore the old VM mappings if the
2421 * new mapping fails.
2422 */
2423 zap_old_map = vm_map_create(PMAP_NULL,
0a7de745
A
2424 *address,
2425 *address + size,
2426 map->hdr.entries_pageable);
39236c6e 2427 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3e170ce0 2428 vm_map_disable_hole_optimization(zap_old_map);
91447636
A
2429 }
2430
0a7de745 2431StartAgain:;
1c79356b
A
2432
2433 start = *address;
2434
2435 if (anywhere) {
2436 vm_map_lock(map);
91447636 2437 map_locked = TRUE;
5ba3f43e 2438
316670eb 2439 if (entry_for_jit) {
f427ee49
A
2440 if (map->jit_entry_exists &&
2441 !VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
316670eb
A
2442 result = KERN_INVALID_ARGUMENT;
2443 goto BailOut;
2444 }
f427ee49
A
2445 if (VM_MAP_POLICY_ALLOW_JIT_RANDOM_ADDRESS(map)) {
2446 random_address = TRUE;
2447 }
39037602
A
2448 }
2449
2450 if (random_address) {
316670eb
A
2451 /*
2452 * Get a random start address.
2453 */
2454 result = vm_map_random_address_for_size(map, address, size);
2455 if (result != KERN_SUCCESS) {
2456 goto BailOut;
2457 }
2458 start = *address;
6d2010ae 2459 }
f427ee49 2460#if XNU_TARGET_OS_OSX
5ba3f43e 2461 else if ((start == 0 || start == vm_map_min(map)) &&
0a7de745
A
2462 !map->disable_vmentry_reuse &&
2463 map->vmmap_high_start != 0) {
5ba3f43e
A
2464 start = map->vmmap_high_start;
2465 }
f427ee49 2466#endif /* XNU_TARGET_OS_OSX */
1c79356b 2467
316670eb 2468
1c79356b
A
2469 /*
2470 * Calculate the first possible address.
2471 */
2472
0a7de745 2473 if (start < effective_min_offset) {
2d21ac55 2474 start = effective_min_offset;
0a7de745
A
2475 }
2476 if (start > effective_max_offset) {
1c79356b 2477 RETURN(KERN_NO_SPACE);
0a7de745 2478 }
1c79356b
A
2479
2480 /*
2481 * Look for the first possible address;
2482 * if there's already something at this
2483 * address, we have to start after it.
2484 */
2485
0a7de745 2486 if (map->disable_vmentry_reuse == TRUE) {
6d2010ae 2487 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1c79356b 2488 } else {
3e170ce0 2489 if (map->holelistenabled) {
d9a64523 2490 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
3e170ce0
A
2491
2492 if (hole_entry == NULL) {
2493 /*
2494 * No more space in the map?
2495 */
2496 result = KERN_NO_SPACE;
2497 goto BailOut;
2498 } else {
3e170ce0
A
2499 boolean_t found_hole = FALSE;
2500
2501 do {
2502 if (hole_entry->vme_start >= start) {
2503 start = hole_entry->vme_start;
2504 found_hole = TRUE;
2505 break;
2506 }
2507
2508 if (hole_entry->vme_end > start) {
2509 found_hole = TRUE;
2510 break;
2511 }
2512 hole_entry = hole_entry->vme_next;
d9a64523 2513 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
3e170ce0
A
2514
2515 if (found_hole == FALSE) {
2516 result = KERN_NO_SPACE;
2517 goto BailOut;
2518 }
2519
2520 entry = hole_entry;
6d2010ae 2521
0a7de745 2522 if (start == 0) {
3e170ce0 2523 start += PAGE_SIZE_64;
0a7de745 2524 }
3e170ce0 2525 }
6d2010ae 2526 } else {
3e170ce0
A
2527 assert(first_free_is_valid(map));
2528
2529 entry = map->first_free;
2530
2531 if (entry == vm_map_to_entry(map)) {
6d2010ae 2532 entry = NULL;
3e170ce0 2533 } else {
0a7de745
A
2534 if (entry->vme_next == vm_map_to_entry(map)) {
2535 /*
2536 * Hole at the end of the map.
2537 */
3e170ce0 2538 entry = NULL;
0a7de745
A
2539 } else {
2540 if (start < (entry->vme_next)->vme_start) {
3e170ce0
A
2541 start = entry->vme_end;
2542 start = vm_map_round_page(start,
0a7de745 2543 VM_MAP_PAGE_MASK(map));
3e170ce0
A
2544 } else {
2545 /*
2546 * Need to do a lookup.
2547 */
2548 entry = NULL;
2549 }
0a7de745 2550 }
3e170ce0
A
2551 }
2552
2553 if (entry == NULL) {
0a7de745 2554 vm_map_entry_t tmp_entry;
3e170ce0
A
2555 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2556 assert(!entry_for_jit);
2557 start = tmp_entry->vme_end;
39236c6e 2558 start = vm_map_round_page(start,
0a7de745 2559 VM_MAP_PAGE_MASK(map));
6d2010ae 2560 }
3e170ce0 2561 entry = tmp_entry;
316670eb 2562 }
6d2010ae 2563 }
1c79356b
A
2564 }
2565
2566 /*
2567 * In any case, the "entry" always precedes
2568 * the proposed new region throughout the
2569 * loop:
2570 */
2571
2572 while (TRUE) {
0a7de745 2573 vm_map_entry_t next;
1c79356b 2574
2d21ac55 2575 /*
1c79356b
A
2576 * Find the end of the proposed new region.
2577 * Be sure we didn't go beyond the end, or
2578 * wrap around the address.
2579 */
2580
2581 end = ((start + mask) & ~mask);
39236c6e 2582 end = vm_map_round_page(end,
0a7de745
A
2583 VM_MAP_PAGE_MASK(map));
2584 if (end < start) {
1c79356b 2585 RETURN(KERN_NO_SPACE);
0a7de745 2586 }
1c79356b 2587 start = end;
39236c6e 2588 assert(VM_MAP_PAGE_ALIGNED(start,
0a7de745 2589 VM_MAP_PAGE_MASK(map)));
1c79356b
A
2590 end += size;
2591
d9a64523
A
2592 /* We want an entire page of empty space, but don't increase the allocation size. */
2593 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2594
2595 if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
1c79356b 2596 if (map->wait_for_space) {
fe8ab488 2597 assert(!keep_map_locked);
2d21ac55 2598 if (size <= (effective_max_offset -
0a7de745 2599 effective_min_offset)) {
1c79356b 2600 assert_wait((event_t)map,
0a7de745 2601 THREAD_ABORTSAFE);
1c79356b 2602 vm_map_unlock(map);
91447636
A
2603 map_locked = FALSE;
2604 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
2605 goto StartAgain;
2606 }
2607 }
2608 RETURN(KERN_NO_SPACE);
2609 }
2610
1c79356b 2611 next = entry->vme_next;
1c79356b 2612
3e170ce0 2613 if (map->holelistenabled) {
0a7de745 2614 if (entry->vme_end >= desired_empty_end) {
3e170ce0 2615 break;
0a7de745 2616 }
3e170ce0
A
2617 } else {
2618 /*
2619 * If there are no more entries, we must win.
2620 *
2621 * OR
2622 *
2623 * If there is another entry, it must be
2624 * after the end of the potential new region.
2625 */
1c79356b 2626
0a7de745 2627 if (next == vm_map_to_entry(map)) {
3e170ce0 2628 break;
0a7de745 2629 }
3e170ce0 2630
0a7de745 2631 if (next->vme_start >= desired_empty_end) {
3e170ce0 2632 break;
0a7de745 2633 }
3e170ce0 2634 }
1c79356b
A
2635
2636 /*
2637 * Didn't fit -- move to the next entry.
2638 */
2639
2640 entry = next;
3e170ce0
A
2641
2642 if (map->holelistenabled) {
d9a64523 2643 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
3e170ce0
A
2644 /*
2645 * Wrapped around
2646 */
2647 result = KERN_NO_SPACE;
2648 goto BailOut;
2649 }
2650 start = entry->vme_start;
2651 } else {
2652 start = entry->vme_end;
2653 }
2654
39236c6e 2655 start = vm_map_round_page(start,
0a7de745 2656 VM_MAP_PAGE_MASK(map));
1c79356b 2657 }
3e170ce0
A
2658
2659 if (map->holelistenabled) {
2660 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2661 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2662 }
2663 }
2664
1c79356b 2665 *address = start;
39236c6e 2666 assert(VM_MAP_PAGE_ALIGNED(*address,
0a7de745 2667 VM_MAP_PAGE_MASK(map)));
1c79356b 2668 } else {
f427ee49
A
2669 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT &&
2670 !overwrite &&
2671 user_alias == VM_MEMORY_REALLOC) {
2672 /*
2673 * Force realloc() to switch to a new allocation,
2674 * to prevent 4k-fragmented virtual ranges.
2675 */
2676// DEBUG4K_ERROR("no realloc in place");
2677 return KERN_NO_SPACE;
2678 }
2679
1c79356b
A
2680 /*
2681 * Verify that:
2682 * the address doesn't itself violate
2683 * the mask requirement.
2684 */
2685
2686 vm_map_lock(map);
91447636 2687 map_locked = TRUE;
0a7de745 2688 if ((start & mask) != 0) {
1c79356b 2689 RETURN(KERN_NO_SPACE);
0a7de745 2690 }
1c79356b
A
2691
2692 /*
2693 * ... the address is within bounds
2694 */
2695
2696 end = start + size;
2697
2d21ac55
A
2698 if ((start < effective_min_offset) ||
2699 (end > effective_max_offset) ||
1c79356b
A
2700 (start >= end)) {
2701 RETURN(KERN_INVALID_ADDRESS);
2702 }
2703
91447636 2704 if (overwrite && zap_old_map != VM_MAP_NULL) {
5ba3f43e 2705 int remove_flags;
91447636
A
2706 /*
2707 * Fixed mapping and "overwrite" flag: attempt to
2708 * remove all existing mappings in the specified
2709 * address range, saving them in our "zap_old_map".
2710 */
5ba3f43e
A
2711 remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2712 remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2713 if (vmk_flags.vmkf_overwrite_immutable) {
2714 /* we can overwrite immutable mappings */
2715 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2716 }
91447636 2717 (void) vm_map_delete(map, start, end,
0a7de745
A
2718 remove_flags,
2719 zap_old_map);
91447636
A
2720 }
2721
1c79356b
A
2722 /*
2723 * ... the starting address isn't allocated
2724 */
2725
2d21ac55 2726 if (vm_map_lookup_entry(map, start, &entry)) {
0a7de745 2727 if (!(vmk_flags.vmkf_already)) {
2d21ac55
A
2728 RETURN(KERN_NO_SPACE);
2729 }
2730 /*
2731 * Check if what's already there is what we want.
2732 */
2733 tmp_start = start;
2734 tmp_offset = offset;
2735 if (entry->vme_start < start) {
2736 tmp_start -= start - entry->vme_start;
2737 tmp_offset -= start - entry->vme_start;
2d21ac55
A
2738 }
2739 for (; entry->vme_start < end;
0a7de745 2740 entry = entry->vme_next) {
4a3eedf9
A
2741 /*
2742 * Check if the mapping's attributes
2743 * match the existing map entry.
2744 */
2d21ac55
A
2745 if (entry == vm_map_to_entry(map) ||
2746 entry->vme_start != tmp_start ||
2747 entry->is_sub_map != is_submap ||
3e170ce0 2748 VME_OFFSET(entry) != tmp_offset ||
2d21ac55
A
2749 entry->needs_copy != needs_copy ||
2750 entry->protection != cur_protection ||
2751 entry->max_protection != max_protection ||
2752 entry->inheritance != inheritance ||
fe8ab488 2753 entry->iokit_acct != iokit_acct ||
3e170ce0 2754 VME_ALIAS(entry) != alias) {
2d21ac55
A
2755 /* not the same mapping ! */
2756 RETURN(KERN_NO_SPACE);
2757 }
4a3eedf9
A
2758 /*
2759 * Check if the same object is being mapped.
2760 */
2761 if (is_submap) {
3e170ce0 2762 if (VME_SUBMAP(entry) !=
4a3eedf9
A
2763 (vm_map_t) object) {
2764 /* not the same submap */
2765 RETURN(KERN_NO_SPACE);
2766 }
2767 } else {
3e170ce0 2768 if (VME_OBJECT(entry) != object) {
4a3eedf9
A
2769 /* not the same VM object... */
2770 vm_object_t obj2;
2771
3e170ce0 2772 obj2 = VME_OBJECT(entry);
4a3eedf9 2773 if ((obj2 == VM_OBJECT_NULL ||
0a7de745 2774 obj2->internal) &&
4a3eedf9 2775 (object == VM_OBJECT_NULL ||
0a7de745 2776 object->internal)) {
4a3eedf9
A
2777 /*
2778 * ... but both are
2779 * anonymous memory,
2780 * so equivalent.
2781 */
2782 } else {
2783 RETURN(KERN_NO_SPACE);
2784 }
2785 }
2786 }
2787
2d21ac55
A
2788 tmp_offset += entry->vme_end - entry->vme_start;
2789 tmp_start += entry->vme_end - entry->vme_start;
2790 if (entry->vme_end >= end) {
2791 /* reached the end of our mapping */
2792 break;
2793 }
2794 }
2795 /* it all matches: let's use what's already there ! */
2796 RETURN(KERN_MEMORY_PRESENT);
2797 }
1c79356b
A
2798
2799 /*
2800 * ... the next region doesn't overlap the
2801 * end point.
2802 */
2803
2804 if ((entry->vme_next != vm_map_to_entry(map)) &&
0a7de745 2805 (entry->vme_next->vme_start < end)) {
1c79356b 2806 RETURN(KERN_NO_SPACE);
0a7de745 2807 }
1c79356b
A
2808 }
2809
2810 /*
2811 * At this point,
2812 * "start" and "end" should define the endpoints of the
2813 * available new range, and
2814 * "entry" should refer to the region before the new
2815 * range, and
2816 *
2817 * the map should be locked.
2818 */
2819
2820 /*
2821 * See whether we can avoid creating a new entry (and object) by
2822 * extending one of our neighbors. [So far, we only attempt to
91447636
A
2823 * extend from below.] Note that we can never extend/join
2824 * purgable objects because they need to remain distinct
2825 * entities in order to implement their "volatile object"
2826 * semantics.
1c79356b
A
2827 */
2828
d9a64523
A
2829 if (purgable ||
2830 entry_for_jit ||
2831 vm_memory_malloc_no_cow(user_alias)) {
91447636
A
2832 if (object == VM_OBJECT_NULL) {
2833 object = vm_object_allocate(size);
2834 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
d9a64523 2835 object->true_share = FALSE;
316670eb 2836 if (purgable) {
fe8ab488 2837 task_t owner;
316670eb 2838 object->purgable = VM_PURGABLE_NONVOLATILE;
fe8ab488
A
2839 if (map->pmap == kernel_pmap) {
2840 /*
2841 * Purgeable mappings made in a kernel
2842 * map are "owned" by the kernel itself
2843 * rather than the current user task
2844 * because they're likely to be used by
2845 * more than this user task (see
2846 * execargs_purgeable_allocate(), for
2847 * example).
2848 */
2849 owner = kernel_task;
2850 } else {
2851 owner = current_task();
2852 }
d9a64523 2853 assert(object->vo_owner == NULL);
fe8ab488
A
2854 assert(object->resident_page_count == 0);
2855 assert(object->wired_page_count == 0);
2856 vm_object_lock(object);
2857 vm_purgeable_nonvolatile_enqueue(object, owner);
2858 vm_object_unlock(object);
316670eb 2859 }
91447636
A
2860 offset = (vm_object_offset_t)0;
2861 }
f427ee49
A
2862 } else if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
2863 /* no coalescing if address space uses sub-pages */
2d21ac55 2864 } else if ((is_submap == FALSE) &&
0a7de745
A
2865 (object == VM_OBJECT_NULL) &&
2866 (entry != vm_map_to_entry(map)) &&
2867 (entry->vme_end == start) &&
2868 (!entry->is_shared) &&
2869 (!entry->is_sub_map) &&
2870 (!entry->in_transition) &&
2871 (!entry->needs_wakeup) &&
2872 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2873 (entry->protection == cur_protection) &&
2874 (entry->max_protection == max_protection) &&
2875 (entry->inheritance == inheritance) &&
2876 ((user_alias == VM_MEMORY_REALLOC) ||
2877 (VME_ALIAS(entry) == alias)) &&
2878 (entry->no_cache == no_cache) &&
2879 (entry->permanent == permanent) &&
2880 /* no coalescing for immutable executable mappings */
2881 !((entry->protection & VM_PROT_EXECUTE) &&
2882 entry->permanent) &&
2883 (!entry->superpage_size && !superpage_size) &&
2884 /*
2885 * No coalescing if not map-aligned, to avoid propagating
2886 * that condition any further than needed:
2887 */
2888 (!entry->map_aligned || !clear_map_aligned) &&
2889 (!entry->zero_wired_pages) &&
2890 (!entry->used_for_jit && !entry_for_jit) &&
2891 (!entry->pmap_cs_associated) &&
2892 (entry->iokit_acct == iokit_acct) &&
2893 (!entry->vme_resilient_codesign) &&
2894 (!entry->vme_resilient_media) &&
2895 (!entry->vme_atomic) &&
cb323159 2896 (entry->vme_no_copy_on_read == no_copy_on_read) &&
0a7de745
A
2897
2898 ((entry->vme_end - entry->vme_start) + size <=
2899 (user_alias == VM_MEMORY_REALLOC ?
2900 ANON_CHUNK_SIZE :
2901 NO_COALESCE_LIMIT)) &&
2902
2903 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
3e170ce0 2904 if (vm_object_coalesce(VME_OBJECT(entry),
0a7de745
A
2905 VM_OBJECT_NULL,
2906 VME_OFFSET(entry),
2907 (vm_object_offset_t) 0,
2908 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2909 (vm_map_size_t)(end - entry->vme_end))) {
1c79356b
A
2910 /*
2911 * Coalesced the two objects - can extend
2912 * the previous map entry to include the
2913 * new range.
2914 */
2915 map->size += (end - entry->vme_end);
e2d2fc5c 2916 assert(entry->vme_start < end);
39236c6e 2917 assert(VM_MAP_PAGE_ALIGNED(end,
0a7de745
A
2918 VM_MAP_PAGE_MASK(map)));
2919 if (__improbable(vm_debug_events)) {
3e170ce0 2920 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
0a7de745 2921 }
1c79356b 2922 entry->vme_end = end;
3e170ce0
A
2923 if (map->holelistenabled) {
2924 vm_map_store_update_first_free(map, entry, TRUE);
2925 } else {
2926 vm_map_store_update_first_free(map, map->first_free, TRUE);
2927 }
fe8ab488 2928 new_mapping_established = TRUE;
1c79356b
A
2929 RETURN(KERN_SUCCESS);
2930 }
2931 }
2932
b0d623f7
A
2933 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2934 new_entry = NULL;
2935
0a7de745 2936 for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
b0d623f7
A
2937 tmp2_end = tmp2_start + step;
2938 /*
2939 * Create a new entry
b0d623f7
A
2940 *
2941 * XXX FBDP
2942 * The reserved "page zero" in each process's address space can
a39ff7e2 2943 * be arbitrarily large. Splitting it into separate objects and
b0d623f7
A
2944 * therefore different VM map entries serves no purpose and just
2945 * slows down operations on the VM map, so let's not split the
a39ff7e2 2946 * allocation into chunks if the max protection is NONE. That
b0d623f7
A
2947 * memory should never be accessible, so it will never get to the
2948 * default pager.
2949 */
2950 tmp_start = tmp2_start;
2951 if (object == VM_OBJECT_NULL &&
a39ff7e2 2952 size > chunk_size &&
b0d623f7 2953 max_protection != VM_PROT_NONE &&
0a7de745 2954 superpage_size == 0) {
a39ff7e2 2955 tmp_end = tmp_start + chunk_size;
0a7de745 2956 } else {
b0d623f7 2957 tmp_end = tmp2_end;
0a7de745 2958 }
b0d623f7 2959 do {
f427ee49
A
2960 new_entry = vm_map_entry_insert(map,
2961 entry, tmp_start, tmp_end,
a991bd8d
A
2962 object, offset, vmk_flags,
2963 needs_copy, FALSE, FALSE,
f427ee49
A
2964 cur_protection, max_protection,
2965 VM_BEHAVIOR_DEFAULT,
2966 (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
2967 VM_INHERIT_NONE : inheritance),
2968 0,
2969 no_cache,
2970 permanent,
2971 no_copy_on_read,
2972 superpage_size,
2973 clear_map_aligned,
2974 is_submap,
2975 entry_for_jit,
2976 alias,
2977 translated_allow_execute);
3e170ce0
A
2978
2979 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
6d2010ae 2980
f427ee49
A
2981 if (resilient_codesign) {
2982 int reject_prot = (needs_copy ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
2983 if (!((cur_protection | max_protection) & reject_prot)) {
2984 new_entry->vme_resilient_codesign = TRUE;
2985 }
3e170ce0
A
2986 }
2987
2988 if (resilient_media &&
cb323159
A
2989 (object == VM_OBJECT_NULL ||
2990 object->internal)) {
3e170ce0
A
2991 new_entry->vme_resilient_media = TRUE;
2992 }
2993
fe8ab488
A
2994 assert(!new_entry->iokit_acct);
2995 if (!is_submap &&
2996 object != VM_OBJECT_NULL &&
d9a64523 2997 (object->purgable != VM_PURGABLE_DENY ||
0a7de745 2998 object->vo_ledger_tag)) {
fe8ab488
A
2999 assert(new_entry->use_pmap);
3000 assert(!new_entry->iokit_acct);
3001 /*
3002 * Turn off pmap accounting since
d9a64523 3003 * purgeable (or tagged) objects have their
fe8ab488
A
3004 * own ledgers.
3005 */
3006 new_entry->use_pmap = FALSE;
3007 } else if (!is_submap &&
0a7de745
A
3008 iokit_acct &&
3009 object != VM_OBJECT_NULL &&
3010 object->internal) {
fe8ab488
A
3011 /* alternate accounting */
3012 assert(!new_entry->iokit_acct);
3013 assert(new_entry->use_pmap);
3014 new_entry->iokit_acct = TRUE;
3015 new_entry->use_pmap = FALSE;
ecc0ceb4
A
3016 DTRACE_VM4(
3017 vm_map_iokit_mapped_region,
3018 vm_map_t, map,
3019 vm_map_offset_t, new_entry->vme_start,
3020 vm_map_offset_t, new_entry->vme_end,
3021 int, VME_ALIAS(new_entry));
fe8ab488
A
3022 vm_map_iokit_mapped_region(
3023 map,
3024 (new_entry->vme_end -
0a7de745 3025 new_entry->vme_start));
fe8ab488
A
3026 } else if (!is_submap) {
3027 assert(!new_entry->iokit_acct);
3028 assert(new_entry->use_pmap);
3029 }
3030
b0d623f7 3031 if (is_submap) {
0a7de745
A
3032 vm_map_t submap;
3033 boolean_t submap_is_64bit;
3034 boolean_t use_pmap;
b0d623f7 3035
fe8ab488
A
3036 assert(new_entry->is_sub_map);
3037 assert(!new_entry->use_pmap);
3038 assert(!new_entry->iokit_acct);
b0d623f7
A
3039 submap = (vm_map_t) object;
3040 submap_is_64bit = vm_map_is_64bit(submap);
cb323159 3041 use_pmap = vmk_flags.vmkf_nested_pmap;
fe8ab488 3042#ifndef NO_NESTED_PMAP
b0d623f7 3043 if (use_pmap && submap->pmap == NULL) {
316670eb 3044 ledger_t ledger = map->pmap->ledger;
b0d623f7 3045 /* we need a sub pmap to nest... */
cb323159
A
3046 submap->pmap = pmap_create_options(ledger, 0,
3047 submap_is_64bit ? PMAP_CREATE_64BIT : 0);
b0d623f7
A
3048 if (submap->pmap == NULL) {
3049 /* let's proceed without nesting... */
3050 }
0a7de745 3051#if defined(__arm__) || defined(__arm64__)
5ba3f43e
A
3052 else {
3053 pmap_set_nested(submap->pmap);
3054 }
3055#endif
2d21ac55 3056 }
b0d623f7 3057 if (use_pmap && submap->pmap != NULL) {
f427ee49
A
3058 if (VM_MAP_PAGE_SHIFT(map) != VM_MAP_PAGE_SHIFT(submap)) {
3059 DEBUG4K_ERROR("map %p (%d) submap %p (%d): incompatible page sizes\n", map, VM_MAP_PAGE_SHIFT(map), submap, VM_MAP_PAGE_SHIFT(submap));
3060 kr = KERN_FAILURE;
3061 } else {
3062 kr = pmap_nest(map->pmap,
3063 submap->pmap,
3064 tmp_start,
3065 tmp_end - tmp_start);
3066 }
b0d623f7
A
3067 if (kr != KERN_SUCCESS) {
3068 printf("vm_map_enter: "
0a7de745
A
3069 "pmap_nest(0x%llx,0x%llx) "
3070 "error 0x%x\n",
3071 (long long)tmp_start,
3072 (long long)tmp_end,
3073 kr);
b0d623f7
A
3074 } else {
3075 /* we're now nested ! */
3076 new_entry->use_pmap = TRUE;
3077 pmap_empty = FALSE;
3078 }
3079 }
fe8ab488 3080#endif /* NO_NESTED_PMAP */
2d21ac55 3081 }
b0d623f7
A
3082 entry = new_entry;
3083
3084 if (superpage_size) {
3085 vm_page_t pages, m;
3086 vm_object_t sp_object;
5ba3f43e 3087 vm_object_offset_t sp_offset;
b0d623f7 3088
3e170ce0 3089 VME_OFFSET_SET(entry, 0);
b0d623f7
A
3090
3091 /* allocate one superpage */
0a7de745 3092 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
2d21ac55 3093 if (kr != KERN_SUCCESS) {
3e170ce0
A
3094 /* deallocate whole range... */
3095 new_mapping_established = TRUE;
3096 /* ... but only up to "tmp_end" */
3097 size -= end - tmp_end;
b0d623f7
A
3098 RETURN(kr);
3099 }
3100
3101 /* create one vm_object per superpage */
3102 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3103 sp_object->phys_contiguous = TRUE;
0a7de745 3104 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3e170ce0 3105 VME_OBJECT_SET(entry, sp_object);
fe8ab488 3106 assert(entry->use_pmap);
b0d623f7
A
3107
3108 /* enter the base pages into the object */
3109 vm_object_lock(sp_object);
5ba3f43e 3110 for (sp_offset = 0;
0a7de745
A
3111 sp_offset < SUPERPAGE_SIZE;
3112 sp_offset += PAGE_SIZE) {
b0d623f7 3113 m = pages;
39037602 3114 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
b0d623f7
A
3115 pages = NEXT_PAGE(m);
3116 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5ba3f43e 3117 vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
2d21ac55 3118 }
b0d623f7 3119 vm_object_unlock(sp_object);
2d21ac55 3120 }
5ba3f43e 3121 } while (tmp_end != tmp2_end &&
0a7de745
A
3122 (tmp_start = tmp_end) &&
3123 (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3124 tmp_end + chunk_size : tmp2_end));
b0d623f7 3125 }
91447636 3126
91447636 3127 new_mapping_established = TRUE;
1c79356b 3128
fe8ab488
A
3129BailOut:
3130 assert(map_locked == TRUE);
2d21ac55 3131
593a1d5f
A
3132 if (result == KERN_SUCCESS) {
3133 vm_prot_t pager_prot;
3134 memory_object_t pager;
91447636 3135
fe8ab488 3136#if DEBUG
593a1d5f 3137 if (pmap_empty &&
5ba3f43e 3138 !(vmk_flags.vmkf_no_pmap_check)) {
593a1d5f 3139 assert(vm_map_pmap_is_empty(map,
0a7de745
A
3140 *address,
3141 *address + size));
593a1d5f 3142 }
fe8ab488 3143#endif /* DEBUG */
593a1d5f
A
3144
3145 /*
3146 * For "named" VM objects, let the pager know that the
3147 * memory object is being mapped. Some pagers need to keep
3148 * track of this, to know when they can reclaim the memory
3149 * object, for example.
3150 * VM calls memory_object_map() for each mapping (specifying
3151 * the protection of each mapping) and calls
3152 * memory_object_last_unmap() when all the mappings are gone.
3153 */
3154 pager_prot = max_protection;
3155 if (needs_copy) {
3156 /*
3157 * Copy-On-Write mapping: won't modify
3158 * the memory object.
3159 */
3160 pager_prot &= ~VM_PROT_WRITE;
3161 }
3162 if (!is_submap &&
3163 object != VM_OBJECT_NULL &&
3164 object->named &&
3165 object->pager != MEMORY_OBJECT_NULL) {
3166 vm_object_lock(object);
3167 pager = object->pager;
3168 if (object->named &&
3169 pager != MEMORY_OBJECT_NULL) {
3170 assert(object->pager_ready);
3171 vm_object_mapping_wait(object, THREAD_UNINT);
3172 vm_object_mapping_begin(object);
3173 vm_object_unlock(object);
3174
3175 kr = memory_object_map(pager, pager_prot);
3176 assert(kr == KERN_SUCCESS);
3177
3178 vm_object_lock(object);
3179 vm_object_mapping_end(object);
3180 }
3181 vm_object_unlock(object);
3182 }
fe8ab488
A
3183 }
3184
3185 assert(map_locked == TRUE);
3186
3187 if (!keep_map_locked) {
3188 vm_map_unlock(map);
3189 map_locked = FALSE;
3190 }
3191
3192 /*
3193 * We can't hold the map lock if we enter this block.
3194 */
3195
3196 if (result == KERN_SUCCESS) {
fe8ab488
A
3197 /* Wire down the new entry if the user
3198 * requested all new map entries be wired.
3199 */
0a7de745 3200 if ((map->wiring_required) || (superpage_size)) {
fe8ab488
A
3201 assert(!keep_map_locked);
3202 pmap_empty = FALSE; /* pmap won't be empty */
5ba3f43e 3203 kr = vm_map_wire_kernel(map, start, end,
0a7de745
A
3204 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3205 TRUE);
fe8ab488
A
3206 result = kr;
3207 }
3208
3209 }
3210
3211 if (result != KERN_SUCCESS) {
91447636
A
3212 if (new_mapping_established) {
3213 /*
3214 * We have to get rid of the new mappings since we
3215 * won't make them available to the user.
3216 * Try and do that atomically, to minimize the risk
3217 * that someone else create new mappings that range.
3218 */
3219 zap_new_map = vm_map_create(PMAP_NULL,
0a7de745
A
3220 *address,
3221 *address + size,
3222 map->hdr.entries_pageable);
39236c6e 3223 vm_map_set_page_shift(zap_new_map,
0a7de745 3224 VM_MAP_PAGE_SHIFT(map));
3e170ce0
A
3225 vm_map_disable_hole_optimization(zap_new_map);
3226
91447636
A
3227 if (!map_locked) {
3228 vm_map_lock(map);
3229 map_locked = TRUE;
3230 }
0a7de745
A
3231 (void) vm_map_delete(map, *address, *address + size,
3232 (VM_MAP_REMOVE_SAVE_ENTRIES |
3233 VM_MAP_REMOVE_NO_MAP_ALIGN),
3234 zap_new_map);
91447636
A
3235 }
3236 if (zap_old_map != VM_MAP_NULL &&
3237 zap_old_map->hdr.nentries != 0) {
0a7de745 3238 vm_map_entry_t entry1, entry2;
91447636
A
3239
3240 /*
3241 * The new mapping failed. Attempt to restore
3242 * the old mappings, saved in the "zap_old_map".
3243 */
3244 if (!map_locked) {
3245 vm_map_lock(map);
3246 map_locked = TRUE;
3247 }
3248
3249 /* first check if the coast is still clear */
3250 start = vm_map_first_entry(zap_old_map)->vme_start;
3251 end = vm_map_last_entry(zap_old_map)->vme_end;
3252 if (vm_map_lookup_entry(map, start, &entry1) ||
3253 vm_map_lookup_entry(map, end, &entry2) ||
3254 entry1 != entry2) {
3255 /*
3256 * Part of that range has already been
3257 * re-mapped: we can't restore the old
3258 * mappings...
3259 */
3260 vm_map_enter_restore_failures++;
3261 } else {
3262 /*
3263 * Transfer the saved map entries from
3264 * "zap_old_map" to the original "map",
3265 * inserting them all after "entry1".
3266 */
3267 for (entry2 = vm_map_first_entry(zap_old_map);
0a7de745
A
3268 entry2 != vm_map_to_entry(zap_old_map);
3269 entry2 = vm_map_first_entry(zap_old_map)) {
2d21ac55
A
3270 vm_map_size_t entry_size;
3271
3272 entry_size = (entry2->vme_end -
0a7de745 3273 entry2->vme_start);
6d2010ae 3274 vm_map_store_entry_unlink(zap_old_map,
0a7de745 3275 entry2);
2d21ac55 3276 zap_old_map->size -= entry_size;
d9a64523 3277 vm_map_store_entry_link(map, entry1, entry2,
0a7de745 3278 VM_MAP_KERNEL_FLAGS_NONE);
2d21ac55 3279 map->size += entry_size;
91447636
A
3280 entry1 = entry2;
3281 }
3282 if (map->wiring_required) {
3283 /*
3284 * XXX TODO: we should rewire the
3285 * old pages here...
3286 */
3287 }
3288 vm_map_enter_restore_successes++;
3289 }
3290 }
3291 }
3292
fe8ab488
A
3293 /*
3294 * The caller is responsible for releasing the lock if it requested to
3295 * keep the map locked.
3296 */
3297 if (map_locked && !keep_map_locked) {
91447636
A
3298 vm_map_unlock(map);
3299 }
3300
3301 /*
3302 * Get rid of the "zap_maps" and all the map entries that
3303 * they may still contain.
3304 */
3305 if (zap_old_map != VM_MAP_NULL) {
2d21ac55 3306 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
3307 zap_old_map = VM_MAP_NULL;
3308 }
3309 if (zap_new_map != VM_MAP_NULL) {
2d21ac55 3310 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
3311 zap_new_map = VM_MAP_NULL;
3312 }
3313
3314 return result;
1c79356b 3315
0a7de745 3316#undef RETURN
1c79356b
A
3317}
3318
5ba3f43e
A
3319#if __arm64__
3320extern const struct memory_object_pager_ops fourk_pager_ops;
3321kern_return_t
3322vm_map_enter_fourk(
0a7de745
A
3323 vm_map_t map,
3324 vm_map_offset_t *address, /* IN/OUT */
3325 vm_map_size_t size,
3326 vm_map_offset_t mask,
3327 int flags,
3328 vm_map_kernel_flags_t vmk_flags,
3329 vm_tag_t alias,
3330 vm_object_t object,
3331 vm_object_offset_t offset,
3332 boolean_t needs_copy,
3333 vm_prot_t cur_protection,
3334 vm_prot_t max_protection,
3335 vm_inherit_t inheritance)
91447636 3336{
0a7de745
A
3337 vm_map_entry_t entry, new_entry;
3338 vm_map_offset_t start, fourk_start;
3339 vm_map_offset_t end, fourk_end;
3340 vm_map_size_t fourk_size;
3341 kern_return_t result = KERN_SUCCESS;
3342 vm_map_t zap_old_map = VM_MAP_NULL;
3343 vm_map_t zap_new_map = VM_MAP_NULL;
3344 boolean_t map_locked = FALSE;
3345 boolean_t pmap_empty = TRUE;
3346 boolean_t new_mapping_established = FALSE;
3347 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3348 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3349 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3350 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3351 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3352 boolean_t is_submap = vmk_flags.vmkf_submap;
3353 boolean_t permanent = vmk_flags.vmkf_permanent;
cb323159 3354 boolean_t no_copy_on_read = vmk_flags.vmkf_permanent;
0a7de745 3355 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
5ba3f43e 3356// boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
f427ee49 3357 boolean_t translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
0a7de745
A
3358 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3359 vm_map_offset_t effective_min_offset, effective_max_offset;
3360 kern_return_t kr;
3361 boolean_t clear_map_aligned = FALSE;
3362 memory_object_t fourk_mem_obj;
3363 vm_object_t fourk_object;
3364 vm_map_offset_t fourk_pager_offset;
3365 int fourk_pager_index_start, fourk_pager_index_num;
3366 int cur_idx;
3367 boolean_t fourk_copy;
3368 vm_object_t copy_object;
3369 vm_object_offset_t copy_offset;
5ba3f43e 3370
f427ee49
A
3371 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
3372 panic("%s:%d\n", __FUNCTION__, __LINE__);
3373 }
5ba3f43e
A
3374 fourk_mem_obj = MEMORY_OBJECT_NULL;
3375 fourk_object = VM_OBJECT_NULL;
6d2010ae 3376
5ba3f43e
A
3377 if (superpage_size) {
3378 return KERN_NOT_SUPPORTED;
3379 }
91447636 3380
d9a64523
A
3381 if ((cur_protection & VM_PROT_WRITE) &&
3382 (cur_protection & VM_PROT_EXECUTE) &&
f427ee49
A
3383#if XNU_TARGET_OS_OSX
3384 map->pmap != kernel_pmap &&
3385 (vm_map_cs_enforcement(map)
3386#if __arm64__
3387 || !VM_MAP_IS_EXOTIC(map)
3388#endif /* __arm64__ */
3389 ) &&
3390#endif /* XNU_TARGET_OS_OSX */
d9a64523
A
3391 !entry_for_jit) {
3392 DTRACE_VM3(cs_wx,
0a7de745
A
3393 uint64_t, 0,
3394 uint64_t, 0,
3395 vm_prot_t, cur_protection);
d9a64523 3396 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
0a7de745
A
3397 "turning off execute\n",
3398 proc_selfpid(),
3399 (current_task()->bsd_info
3400 ? proc_name_address(current_task()->bsd_info)
3401 : "?"),
3402 __FUNCTION__);
d9a64523 3403 cur_protection &= ~VM_PROT_EXECUTE;
5ba3f43e 3404 }
5ba3f43e 3405
a39ff7e2
A
3406 /*
3407 * If the task has requested executable lockdown,
3408 * deny any new executable mapping.
3409 */
3410 if (map->map_disallow_new_exec == TRUE) {
3411 if (cur_protection & VM_PROT_EXECUTE) {
3412 return KERN_PROTECTION_FAILURE;
3413 }
3414 }
3415
5ba3f43e
A
3416 if (is_submap) {
3417 return KERN_NOT_SUPPORTED;
3418 }
3419 if (vmk_flags.vmkf_already) {
3420 return KERN_NOT_SUPPORTED;
3421 }
3422 if (purgable || entry_for_jit) {
3423 return KERN_NOT_SUPPORTED;
3424 }
3425
3426 effective_min_offset = map->min_offset;
3427
3428 if (vmk_flags.vmkf_beyond_max) {
3429 return KERN_NOT_SUPPORTED;
3430 } else {
3431 effective_max_offset = map->max_offset;
3432 }
3433
3434 if (size == 0 ||
3435 (offset & FOURK_PAGE_MASK) != 0) {
3436 *address = 0;
2d21ac55 3437 return KERN_INVALID_ARGUMENT;
3e170ce0 3438 }
5ba3f43e 3439
0a7de745 3440#define RETURN(value) { result = value; goto BailOut; }
5ba3f43e
A
3441
3442 assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3443 assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3444
3445 if (!anywhere && overwrite) {
3446 return KERN_NOT_SUPPORTED;
3447 }
3448 if (!anywhere && overwrite) {
3449 /*
3450 * Create a temporary VM map to hold the old mappings in the
3451 * affected area while we create the new one.
3452 * This avoids releasing the VM map lock in
3453 * vm_map_entry_delete() and allows atomicity
3454 * when we want to replace some mappings with a new one.
3455 * It also allows us to restore the old VM mappings if the
3456 * new mapping fails.
3457 */
3458 zap_old_map = vm_map_create(PMAP_NULL,
0a7de745
A
3459 *address,
3460 *address + size,
3461 map->hdr.entries_pageable);
5ba3f43e
A
3462 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3463 vm_map_disable_hole_optimization(zap_old_map);
3e170ce0 3464 }
593a1d5f 3465
5ba3f43e
A
3466 fourk_start = *address;
3467 fourk_size = size;
3468 fourk_end = fourk_start + fourk_size;
2d21ac55 3469
5ba3f43e
A
3470 start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3471 end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3472 size = end - start;
39236c6e 3473
5ba3f43e
A
3474 if (anywhere) {
3475 return KERN_NOT_SUPPORTED;
3476 } else {
3477 /*
3478 * Verify that:
3479 * the address doesn't itself violate
3480 * the mask requirement.
3481 */
3482
3483 vm_map_lock(map);
3484 map_locked = TRUE;
3485 if ((start & mask) != 0) {
3486 RETURN(KERN_NO_SPACE);
6d2010ae 3487 }
5ba3f43e
A
3488
3489 /*
3490 * ... the address is within bounds
3491 */
3492
3493 end = start + size;
3494
3495 if ((start < effective_min_offset) ||
3496 (end > effective_max_offset) ||
3497 (start >= end)) {
3498 RETURN(KERN_INVALID_ADDRESS);
22ba694c 3499 }
5ba3f43e
A
3500
3501 if (overwrite && zap_old_map != VM_MAP_NULL) {
3502 /*
3503 * Fixed mapping and "overwrite" flag: attempt to
3504 * remove all existing mappings in the specified
3505 * address range, saving them in our "zap_old_map".
3506 */
3507 (void) vm_map_delete(map, start, end,
0a7de745
A
3508 (VM_MAP_REMOVE_SAVE_ENTRIES |
3509 VM_MAP_REMOVE_NO_MAP_ALIGN),
3510 zap_old_map);
3e170ce0 3511 }
2d21ac55 3512
5ba3f43e
A
3513 /*
3514 * ... the starting address isn't allocated
3515 */
3516 if (vm_map_lookup_entry(map, start, &entry)) {
3517 vm_object_t cur_object, shadow_object;
3518
3519 /*
3520 * We might already some 4K mappings
3521 * in a 16K page here.
3522 */
3523
3524 if (entry->vme_end - entry->vme_start
3525 != SIXTEENK_PAGE_SIZE) {
3526 RETURN(KERN_NO_SPACE);
3527 }
3528 if (entry->is_sub_map) {
3529 RETURN(KERN_NO_SPACE);
3530 }
3531 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3532 RETURN(KERN_NO_SPACE);
3533 }
3534
3535 /* go all the way down the shadow chain */
3536 cur_object = VME_OBJECT(entry);
3537 vm_object_lock(cur_object);
3538 while (cur_object->shadow != VM_OBJECT_NULL) {
3539 shadow_object = cur_object->shadow;
3540 vm_object_lock(shadow_object);
3541 vm_object_unlock(cur_object);
3542 cur_object = shadow_object;
3543 shadow_object = VM_OBJECT_NULL;
3544 }
3545 if (cur_object->internal ||
3546 cur_object->pager == NULL) {
3547 vm_object_unlock(cur_object);
3548 RETURN(KERN_NO_SPACE);
3549 }
3550 if (cur_object->pager->mo_pager_ops
3551 != &fourk_pager_ops) {
3552 vm_object_unlock(cur_object);
3553 RETURN(KERN_NO_SPACE);
3554 }
3555 fourk_object = cur_object;
3556 fourk_mem_obj = fourk_object->pager;
3557
3558 /* keep the "4K" object alive */
3559 vm_object_reference_locked(fourk_object);
f427ee49 3560 memory_object_reference(fourk_mem_obj);
5ba3f43e
A
3561 vm_object_unlock(fourk_object);
3562
3563 /* merge permissions */
3564 entry->protection |= cur_protection;
3565 entry->max_protection |= max_protection;
3566 if ((entry->protection & (VM_PROT_WRITE |
0a7de745 3567 VM_PROT_EXECUTE)) ==
5ba3f43e
A
3568 (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3569 fourk_binary_compatibility_unsafe &&
3570 fourk_binary_compatibility_allow_wx) {
3571 /* write+execute: need to be "jit" */
3572 entry->used_for_jit = TRUE;
3573 }
5ba3f43e
A
3574 goto map_in_fourk_pager;
3575 }
3576
3577 /*
3578 * ... the next region doesn't overlap the
3579 * end point.
3580 */
3581
3582 if ((entry->vme_next != vm_map_to_entry(map)) &&
3583 (entry->vme_next->vme_start < end)) {
3584 RETURN(KERN_NO_SPACE);
3585 }
3586 }
3587
3588 /*
3589 * At this point,
3590 * "start" and "end" should define the endpoints of the
3591 * available new range, and
3592 * "entry" should refer to the region before the new
3593 * range, and
3594 *
3595 * the map should be locked.
3596 */
3597
3598 /* create a new "4K" pager */
3599 fourk_mem_obj = fourk_pager_create();
3600 fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3601 assert(fourk_object);
3602
3603 /* keep the "4" object alive */
3604 vm_object_reference(fourk_object);
3605
3606 /* create a "copy" object, to map the "4K" object copy-on-write */
3607 fourk_copy = TRUE;
3608 result = vm_object_copy_strategically(fourk_object,
0a7de745
A
3609 0,
3610 end - start,
3611 &copy_object,
3612 &copy_offset,
3613 &fourk_copy);
5ba3f43e
A
3614 assert(result == KERN_SUCCESS);
3615 assert(copy_object != VM_OBJECT_NULL);
3616 assert(copy_offset == 0);
3617
5ba3f43e
A
3618 /* map the "4K" pager's copy object */
3619 new_entry =
0a7de745
A
3620 vm_map_entry_insert(map, entry,
3621 vm_map_trunc_page(start,
3622 VM_MAP_PAGE_MASK(map)),
3623 vm_map_round_page(end,
3624 VM_MAP_PAGE_MASK(map)),
3625 copy_object,
3626 0, /* offset */
a991bd8d 3627 vmk_flags,
0a7de745 3628 FALSE, /* needs_copy */
cb323159
A
3629 FALSE,
3630 FALSE,
0a7de745
A
3631 cur_protection, max_protection,
3632 VM_BEHAVIOR_DEFAULT,
f427ee49
A
3633 (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3634 VM_INHERIT_NONE : inheritance),
0a7de745
A
3635 0,
3636 no_cache,
3637 permanent,
cb323159 3638 no_copy_on_read,
0a7de745
A
3639 superpage_size,
3640 clear_map_aligned,
3641 is_submap,
3642 FALSE, /* jit */
f427ee49
A
3643 alias,
3644 translated_allow_execute);
5ba3f43e
A
3645 entry = new_entry;
3646
3647#if VM_MAP_DEBUG_FOURK
3648 if (vm_map_debug_fourk) {
3649 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
0a7de745
A
3650 map,
3651 (uint64_t) entry->vme_start,
3652 (uint64_t) entry->vme_end,
3653 fourk_mem_obj);
5ba3f43e
A
3654 }
3655#endif /* VM_MAP_DEBUG_FOURK */
3656
3657 new_mapping_established = TRUE;
3658
3659map_in_fourk_pager:
3660 /* "map" the original "object" where it belongs in the "4K" pager */
3661 fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3662 fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3663 if (fourk_size > SIXTEENK_PAGE_SIZE) {
3664 fourk_pager_index_num = 4;
3665 } else {
3666 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3667 }
3668 if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3669 fourk_pager_index_num = 4 - fourk_pager_index_start;
3670 }
3671 for (cur_idx = 0;
0a7de745
A
3672 cur_idx < fourk_pager_index_num;
3673 cur_idx++) {
3674 vm_object_t old_object;
3675 vm_object_offset_t old_offset;
5ba3f43e
A
3676
3677 kr = fourk_pager_populate(fourk_mem_obj,
0a7de745
A
3678 TRUE, /* overwrite */
3679 fourk_pager_index_start + cur_idx,
3680 object,
3681 (object
3682 ? (offset +
3683 (cur_idx * FOURK_PAGE_SIZE))
3684 : 0),
3685 &old_object,
3686 &old_offset);
5ba3f43e
A
3687#if VM_MAP_DEBUG_FOURK
3688 if (vm_map_debug_fourk) {
3689 if (old_object == (vm_object_t) -1 &&
3690 old_offset == (vm_object_offset_t) -1) {
3691 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
0a7de745
A
3692 "pager [%p:0x%llx] "
3693 "populate[%d] "
3694 "[object:%p,offset:0x%llx]\n",
3695 map,
3696 (uint64_t) entry->vme_start,
3697 (uint64_t) entry->vme_end,
3698 fourk_mem_obj,
3699 VME_OFFSET(entry),
3700 fourk_pager_index_start + cur_idx,
3701 object,
3702 (object
3703 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3704 : 0));
5ba3f43e
A
3705 } else {
3706 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
0a7de745
A
3707 "pager [%p:0x%llx] "
3708 "populate[%d] [object:%p,offset:0x%llx] "
3709 "old [%p:0x%llx]\n",
3710 map,
3711 (uint64_t) entry->vme_start,
3712 (uint64_t) entry->vme_end,
3713 fourk_mem_obj,
3714 VME_OFFSET(entry),
3715 fourk_pager_index_start + cur_idx,
3716 object,
3717 (object
3718 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3719 : 0),
3720 old_object,
3721 old_offset);
5ba3f43e
A
3722 }
3723 }
3724#endif /* VM_MAP_DEBUG_FOURK */
3725
3726 assert(kr == KERN_SUCCESS);
3727 if (object != old_object &&
3728 object != VM_OBJECT_NULL &&
3729 object != (vm_object_t) -1) {
3730 vm_object_reference(object);
3731 }
3732 if (object != old_object &&
3733 old_object != VM_OBJECT_NULL &&
3734 old_object != (vm_object_t) -1) {
3735 vm_object_deallocate(old_object);
3736 }
3737 }
3738
3739BailOut:
3740 assert(map_locked == TRUE);
3741
5ba3f43e
A
3742 if (result == KERN_SUCCESS) {
3743 vm_prot_t pager_prot;
3744 memory_object_t pager;
3745
3746#if DEBUG
3747 if (pmap_empty &&
3748 !(vmk_flags.vmkf_no_pmap_check)) {
3749 assert(vm_map_pmap_is_empty(map,
0a7de745
A
3750 *address,
3751 *address + size));
5ba3f43e
A
3752 }
3753#endif /* DEBUG */
3754
3755 /*
3756 * For "named" VM objects, let the pager know that the
3757 * memory object is being mapped. Some pagers need to keep
3758 * track of this, to know when they can reclaim the memory
3759 * object, for example.
3760 * VM calls memory_object_map() for each mapping (specifying
3761 * the protection of each mapping) and calls
3762 * memory_object_last_unmap() when all the mappings are gone.
3763 */
3764 pager_prot = max_protection;
3765 if (needs_copy) {
3766 /*
3767 * Copy-On-Write mapping: won't modify
3768 * the memory object.
3769 */
3770 pager_prot &= ~VM_PROT_WRITE;
3771 }
3772 if (!is_submap &&
3773 object != VM_OBJECT_NULL &&
3774 object->named &&
3775 object->pager != MEMORY_OBJECT_NULL) {
3776 vm_object_lock(object);
3777 pager = object->pager;
3778 if (object->named &&
3779 pager != MEMORY_OBJECT_NULL) {
3780 assert(object->pager_ready);
3781 vm_object_mapping_wait(object, THREAD_UNINT);
3782 vm_object_mapping_begin(object);
3783 vm_object_unlock(object);
3784
3785 kr = memory_object_map(pager, pager_prot);
3786 assert(kr == KERN_SUCCESS);
3787
3788 vm_object_lock(object);
3789 vm_object_mapping_end(object);
3790 }
3791 vm_object_unlock(object);
3792 }
3793 if (!is_submap &&
3794 fourk_object != VM_OBJECT_NULL &&
3795 fourk_object->named &&
3796 fourk_object->pager != MEMORY_OBJECT_NULL) {
3797 vm_object_lock(fourk_object);
3798 pager = fourk_object->pager;
3799 if (fourk_object->named &&
3800 pager != MEMORY_OBJECT_NULL) {
3801 assert(fourk_object->pager_ready);
3802 vm_object_mapping_wait(fourk_object,
0a7de745 3803 THREAD_UNINT);
5ba3f43e
A
3804 vm_object_mapping_begin(fourk_object);
3805 vm_object_unlock(fourk_object);
3806
3807 kr = memory_object_map(pager, VM_PROT_READ);
3808 assert(kr == KERN_SUCCESS);
3809
3810 vm_object_lock(fourk_object);
3811 vm_object_mapping_end(fourk_object);
3812 }
3813 vm_object_unlock(fourk_object);
3814 }
3815 }
3816
f427ee49
A
3817 if (fourk_object != VM_OBJECT_NULL) {
3818 vm_object_deallocate(fourk_object);
3819 fourk_object = VM_OBJECT_NULL;
3820 memory_object_deallocate(fourk_mem_obj);
3821 fourk_mem_obj = MEMORY_OBJECT_NULL;
3822 }
3823
5ba3f43e
A
3824 assert(map_locked == TRUE);
3825
3826 if (!keep_map_locked) {
3827 vm_map_unlock(map);
3828 map_locked = FALSE;
3829 }
3830
3831 /*
3832 * We can't hold the map lock if we enter this block.
3833 */
3834
3835 if (result == KERN_SUCCESS) {
5ba3f43e
A
3836 /* Wire down the new entry if the user
3837 * requested all new map entries be wired.
3838 */
0a7de745 3839 if ((map->wiring_required) || (superpage_size)) {
5ba3f43e
A
3840 assert(!keep_map_locked);
3841 pmap_empty = FALSE; /* pmap won't be empty */
3842 kr = vm_map_wire_kernel(map, start, end,
0a7de745
A
3843 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3844 TRUE);
5ba3f43e
A
3845 result = kr;
3846 }
3847
3848 }
3849
3850 if (result != KERN_SUCCESS) {
3851 if (new_mapping_established) {
3852 /*
3853 * We have to get rid of the new mappings since we
3854 * won't make them available to the user.
3855 * Try and do that atomically, to minimize the risk
3856 * that someone else create new mappings that range.
3857 */
3858 zap_new_map = vm_map_create(PMAP_NULL,
0a7de745
A
3859 *address,
3860 *address + size,
3861 map->hdr.entries_pageable);
5ba3f43e 3862 vm_map_set_page_shift(zap_new_map,
0a7de745 3863 VM_MAP_PAGE_SHIFT(map));
5ba3f43e
A
3864 vm_map_disable_hole_optimization(zap_new_map);
3865
3866 if (!map_locked) {
3867 vm_map_lock(map);
3868 map_locked = TRUE;
3869 }
0a7de745
A
3870 (void) vm_map_delete(map, *address, *address + size,
3871 (VM_MAP_REMOVE_SAVE_ENTRIES |
3872 VM_MAP_REMOVE_NO_MAP_ALIGN),
3873 zap_new_map);
5ba3f43e
A
3874 }
3875 if (zap_old_map != VM_MAP_NULL &&
3876 zap_old_map->hdr.nentries != 0) {
0a7de745 3877 vm_map_entry_t entry1, entry2;
5ba3f43e
A
3878
3879 /*
3880 * The new mapping failed. Attempt to restore
3881 * the old mappings, saved in the "zap_old_map".
3882 */
3883 if (!map_locked) {
3884 vm_map_lock(map);
3885 map_locked = TRUE;
3886 }
3887
3888 /* first check if the coast is still clear */
3889 start = vm_map_first_entry(zap_old_map)->vme_start;
3890 end = vm_map_last_entry(zap_old_map)->vme_end;
3891 if (vm_map_lookup_entry(map, start, &entry1) ||
3892 vm_map_lookup_entry(map, end, &entry2) ||
3893 entry1 != entry2) {
3894 /*
3895 * Part of that range has already been
3896 * re-mapped: we can't restore the old
3897 * mappings...
3898 */
3899 vm_map_enter_restore_failures++;
3900 } else {
3901 /*
3902 * Transfer the saved map entries from
3903 * "zap_old_map" to the original "map",
3904 * inserting them all after "entry1".
3905 */
3906 for (entry2 = vm_map_first_entry(zap_old_map);
0a7de745
A
3907 entry2 != vm_map_to_entry(zap_old_map);
3908 entry2 = vm_map_first_entry(zap_old_map)) {
5ba3f43e
A
3909 vm_map_size_t entry_size;
3910
3911 entry_size = (entry2->vme_end -
0a7de745 3912 entry2->vme_start);
5ba3f43e 3913 vm_map_store_entry_unlink(zap_old_map,
0a7de745 3914 entry2);
5ba3f43e 3915 zap_old_map->size -= entry_size;
d9a64523 3916 vm_map_store_entry_link(map, entry1, entry2,
0a7de745 3917 VM_MAP_KERNEL_FLAGS_NONE);
5ba3f43e
A
3918 map->size += entry_size;
3919 entry1 = entry2;
3920 }
3921 if (map->wiring_required) {
3922 /*
3923 * XXX TODO: we should rewire the
3924 * old pages here...
3925 */
3926 }
3927 vm_map_enter_restore_successes++;
3928 }
3929 }
3930 }
3931
3932 /*
3933 * The caller is responsible for releasing the lock if it requested to
3934 * keep the map locked.
3935 */
3936 if (map_locked && !keep_map_locked) {
3937 vm_map_unlock(map);
3938 }
3939
3940 /*
3941 * Get rid of the "zap_maps" and all the map entries that
3942 * they may still contain.
3943 */
3944 if (zap_old_map != VM_MAP_NULL) {
3945 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3946 zap_old_map = VM_MAP_NULL;
3947 }
3948 if (zap_new_map != VM_MAP_NULL) {
3949 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3950 zap_new_map = VM_MAP_NULL;
3951 }
3952
3953 return result;
3954
0a7de745 3955#undef RETURN
5ba3f43e
A
3956}
3957#endif /* __arm64__ */
3958
3959/*
3960 * Counters for the prefault optimization.
3961 */
3962int64_t vm_prefault_nb_pages = 0;
3963int64_t vm_prefault_nb_bailout = 0;
3964
3965static kern_return_t
3966vm_map_enter_mem_object_helper(
0a7de745
A
3967 vm_map_t target_map,
3968 vm_map_offset_t *address,
3969 vm_map_size_t initial_size,
3970 vm_map_offset_t mask,
3971 int flags,
3972 vm_map_kernel_flags_t vmk_flags,
3973 vm_tag_t tag,
3974 ipc_port_t port,
3975 vm_object_offset_t offset,
3976 boolean_t copy,
3977 vm_prot_t cur_protection,
3978 vm_prot_t max_protection,
3979 vm_inherit_t inheritance,
3980 upl_page_list_ptr_t page_list,
3981 unsigned int page_list_count)
5ba3f43e 3982{
0a7de745
A
3983 vm_map_address_t map_addr;
3984 vm_map_size_t map_size;
3985 vm_object_t object;
3986 vm_object_size_t size;
3987 kern_return_t result;
3988 boolean_t mask_cur_protection, mask_max_protection;
3989 boolean_t kernel_prefault, try_prefault = (page_list_count != 0);
3990 vm_map_offset_t offset_in_mapping = 0;
5ba3f43e 3991#if __arm64__
0a7de745 3992 boolean_t fourk = vmk_flags.vmkf_fourk;
5ba3f43e
A
3993#endif /* __arm64__ */
3994
f427ee49
A
3995 if (VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
3996 /* XXX TODO4K prefaulting depends on page size... */
3997 try_prefault = FALSE;
3998 }
3999
5ba3f43e
A
4000 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
4001
4002 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
4003 mask_max_protection = max_protection & VM_PROT_IS_MASK;
4004 cur_protection &= ~VM_PROT_IS_MASK;
4005 max_protection &= ~VM_PROT_IS_MASK;
4006
4007 /*
4008 * Check arguments for validity
4009 */
4010 if ((target_map == VM_MAP_NULL) ||
4011 (cur_protection & ~VM_PROT_ALL) ||
4012 (max_protection & ~VM_PROT_ALL) ||
4013 (inheritance > VM_INHERIT_LAST_VALID) ||
4014 (try_prefault && (copy || !page_list)) ||
4015 initial_size == 0) {
4016 return KERN_INVALID_ARGUMENT;
4017 }
4018
4019#if __arm64__
f427ee49
A
4020 if (fourk && VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4021 /* no "fourk" if map is using a sub-page page size */
4022 fourk = FALSE;
4023 }
5ba3f43e
A
4024 if (fourk) {
4025 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
4026 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
4027 } else
4028#endif /* __arm64__ */
4029 {
4030 map_addr = vm_map_trunc_page(*address,
0a7de745 4031 VM_MAP_PAGE_MASK(target_map));
5ba3f43e 4032 map_size = vm_map_round_page(initial_size,
0a7de745 4033 VM_MAP_PAGE_MASK(target_map));
5ba3f43e
A
4034 }
4035 size = vm_object_round_page(initial_size);
4036
4037 /*
4038 * Find the vm object (if any) corresponding to this port.
4039 */
4040 if (!IP_VALID(port)) {
4041 object = VM_OBJECT_NULL;
4042 offset = 0;
4043 copy = FALSE;
4044 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
0a7de745 4045 vm_named_entry_t named_entry;
f427ee49 4046 vm_object_offset_t data_offset;
5ba3f43e 4047
ea3f0419 4048 named_entry = (vm_named_entry_t) ip_get_kobject(port);
5ba3f43e
A
4049
4050 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4051 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
f427ee49 4052 data_offset = named_entry->data_offset;
5ba3f43e 4053 offset += named_entry->data_offset;
f427ee49
A
4054 } else {
4055 data_offset = 0;
5ba3f43e
A
4056 }
4057
4058 /* a few checks to make sure user is obeying rules */
4059 if (size == 0) {
0a7de745 4060 if (offset >= named_entry->size) {
5ba3f43e 4061 return KERN_INVALID_RIGHT;
0a7de745 4062 }
5ba3f43e
A
4063 size = named_entry->size - offset;
4064 }
4065 if (mask_max_protection) {
4066 max_protection &= named_entry->protection;
4067 }
4068 if (mask_cur_protection) {
4069 cur_protection &= named_entry->protection;
4070 }
4071 if ((named_entry->protection & max_protection) !=
0a7de745 4072 max_protection) {
5ba3f43e 4073 return KERN_INVALID_RIGHT;
0a7de745 4074 }
5ba3f43e 4075 if ((named_entry->protection & cur_protection) !=
0a7de745 4076 cur_protection) {
5ba3f43e 4077 return KERN_INVALID_RIGHT;
0a7de745 4078 }
5ba3f43e
A
4079 if (offset + size < offset) {
4080 /* overflow */
4081 return KERN_INVALID_ARGUMENT;
4082 }
4083 if (named_entry->size < (offset + initial_size)) {
4084 return KERN_INVALID_ARGUMENT;
4085 }
4086
4087 if (named_entry->is_copy) {
4088 /* for a vm_map_copy, we can only map it whole */
4089 if ((size != named_entry->size) &&
4090 (vm_map_round_page(size,
0a7de745
A
4091 VM_MAP_PAGE_MASK(target_map)) ==
4092 named_entry->size)) {
5ba3f43e 4093 /* XXX FBDP use the rounded size... */
39236c6e
A
4094 size = vm_map_round_page(
4095 size,
4096 VM_MAP_PAGE_MASK(target_map));
4097 }
39236c6e
A
4098 }
4099
2d21ac55
A
4100 /* the callers parameter offset is defined to be the */
4101 /* offset from beginning of named entry offset in object */
4102 offset = offset + named_entry->offset;
5ba3f43e 4103
0a7de745
A
4104 if (!VM_MAP_PAGE_ALIGNED(size,
4105 VM_MAP_PAGE_MASK(target_map))) {
39236c6e
A
4106 /*
4107 * Let's not map more than requested;
4108 * vm_map_enter() will handle this "not map-aligned"
4109 * case.
4110 */
4111 map_size = size;
4112 }
4113
2d21ac55
A
4114 named_entry_lock(named_entry);
4115 if (named_entry->is_sub_map) {
0a7de745 4116 vm_map_t submap;
2d21ac55 4117
3e170ce0 4118 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4119 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
4120 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4121 }
4122
2d21ac55 4123 submap = named_entry->backing.map;
2d21ac55 4124 vm_map_reference(submap);
2d21ac55
A
4125 named_entry_unlock(named_entry);
4126
5ba3f43e
A
4127 vmk_flags.vmkf_submap = TRUE;
4128
2d21ac55 4129 result = vm_map_enter(target_map,
0a7de745
A
4130 &map_addr,
4131 map_size,
4132 mask,
4133 flags,
4134 vmk_flags,
4135 tag,
4136 (vm_object_t)(uintptr_t) submap,
4137 offset,
4138 copy,
4139 cur_protection,
4140 max_protection,
4141 inheritance);
2d21ac55
A
4142 if (result != KERN_SUCCESS) {
4143 vm_map_deallocate(submap);
4144 } else {
4145 /*
4146 * No need to lock "submap" just to check its
4147 * "mapped" flag: that flag is never reset
4148 * once it's been set and if we race, we'll
4149 * just end up setting it twice, which is OK.
4150 */
316670eb
A
4151 if (submap->mapped_in_other_pmaps == FALSE &&
4152 vm_map_pmap(submap) != PMAP_NULL &&
4153 vm_map_pmap(submap) !=
4154 vm_map_pmap(target_map)) {
2d21ac55 4155 /*
316670eb
A
4156 * This submap is being mapped in a map
4157 * that uses a different pmap.
4158 * Set its "mapped_in_other_pmaps" flag
5ba3f43e 4159 * to indicate that we now need to
316670eb
A
4160 * remove mappings from all pmaps rather
4161 * than just the submap's pmap.
2d21ac55
A
4162 */
4163 vm_map_lock(submap);
316670eb 4164 submap->mapped_in_other_pmaps = TRUE;
2d21ac55
A
4165 vm_map_unlock(submap);
4166 }
4167 *address = map_addr;
4168 }
4169 return result;
39236c6e 4170 } else if (named_entry->is_copy) {
0a7de745
A
4171 kern_return_t kr;
4172 vm_map_copy_t copy_map;
4173 vm_map_entry_t copy_entry;
4174 vm_map_offset_t copy_addr;
f427ee49
A
4175 vm_map_copy_t target_copy_map;
4176 vm_map_offset_t overmap_start, overmap_end;
4177 vm_map_offset_t trimmed_start;
4178 vm_map_size_t target_size;
39236c6e
A
4179
4180 if (flags & ~(VM_FLAGS_FIXED |
0a7de745
A
4181 VM_FLAGS_ANYWHERE |
4182 VM_FLAGS_OVERWRITE |
4183 VM_FLAGS_RETURN_4K_DATA_ADDR |
4184 VM_FLAGS_RETURN_DATA_ADDR |
4185 VM_FLAGS_ALIAS_MASK)) {
39236c6e
A
4186 named_entry_unlock(named_entry);
4187 return KERN_INVALID_ARGUMENT;
4188 }
4189
39236c6e
A
4190 copy_map = named_entry->backing.copy;
4191 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4192 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4193 /* unsupported type; should not happen */
4194 printf("vm_map_enter_mem_object: "
0a7de745
A
4195 "memory_entry->backing.copy "
4196 "unsupported type 0x%x\n",
4197 copy_map->type);
39236c6e
A
4198 named_entry_unlock(named_entry);
4199 return KERN_INVALID_ARGUMENT;
4200 }
4201
f427ee49
A
4202 if (VM_MAP_PAGE_SHIFT(target_map) != copy_map->cpy_hdr.page_shift) {
4203 DEBUG4K_SHARE("copy_map %p offset %llx size 0x%llx pgshift %d -> target_map %p pgshift %d\n", copy_map, offset, (uint64_t)map_size, copy_map->cpy_hdr.page_shift, target_map, VM_MAP_PAGE_SHIFT(target_map));
4204 }
4205
4206 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4207 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4208 offset_in_mapping = offset & VM_MAP_PAGE_MASK(target_map);
4209 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4210 offset_in_mapping &= ~((signed)(0xFFF));
4211 }
4212 }
4213
4214 target_copy_map = VM_MAP_COPY_NULL;
4215 target_size = copy_map->size;
4216 overmap_start = 0;
4217 overmap_end = 0;
4218 trimmed_start = 0;
4219 if (copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(target_map)) {
4220 DEBUG4K_ADJUST("adjusting...\n");
4221 kr = vm_map_copy_adjust_to_target(
4222 copy_map,
4223 offset /* includes data_offset */,
4224 initial_size,
4225 target_map,
4226 copy,
4227 &target_copy_map,
4228 &overmap_start,
4229 &overmap_end,
4230 &trimmed_start);
4231 if (kr != KERN_SUCCESS) {
4232 named_entry_unlock(named_entry);
4233 return kr;
4234 }
4235 target_size = target_copy_map->size;
4236 if (trimmed_start >= data_offset) {
4237 data_offset = offset & VM_MAP_PAGE_MASK(target_map);
4238 } else {
4239 data_offset -= trimmed_start;
4240 }
4241 } else {
4242 target_copy_map = copy_map;
4243 }
4244
39236c6e
A
4245 /* reserve a contiguous range */
4246 kr = vm_map_enter(target_map,
0a7de745 4247 &map_addr,
f427ee49 4248 vm_map_round_page(target_size, VM_MAP_PAGE_MASK(target_map)),
0a7de745
A
4249 mask,
4250 flags & (VM_FLAGS_ANYWHERE |
4251 VM_FLAGS_OVERWRITE |
4252 VM_FLAGS_RETURN_4K_DATA_ADDR |
4253 VM_FLAGS_RETURN_DATA_ADDR),
4254 vmk_flags,
4255 tag,
4256 VM_OBJECT_NULL,
4257 0,
4258 FALSE, /* copy */
4259 cur_protection,
4260 max_protection,
4261 inheritance);
39236c6e 4262 if (kr != KERN_SUCCESS) {
f427ee49
A
4263 DEBUG4K_ERROR("kr 0x%x\n", kr);
4264 if (target_copy_map != copy_map) {
4265 vm_map_copy_discard(target_copy_map);
4266 target_copy_map = VM_MAP_COPY_NULL;
4267 }
39236c6e
A
4268 named_entry_unlock(named_entry);
4269 return kr;
4270 }
4271
4272 copy_addr = map_addr;
4273
f427ee49
A
4274 for (copy_entry = vm_map_copy_first_entry(target_copy_map);
4275 copy_entry != vm_map_copy_to_entry(target_copy_map);
0a7de745
A
4276 copy_entry = copy_entry->vme_next) {
4277 int remap_flags;
4278 vm_map_kernel_flags_t vmk_remap_flags;
4279 vm_map_t copy_submap;
4280 vm_object_t copy_object;
4281 vm_map_size_t copy_size;
4282 vm_object_offset_t copy_offset;
4283 int copy_vm_alias;
39236c6e 4284
5ba3f43e
A
4285 remap_flags = 0;
4286 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4287
813fb2f6 4288 copy_object = VME_OBJECT(copy_entry);
3e170ce0 4289 copy_offset = VME_OFFSET(copy_entry);
39236c6e 4290 copy_size = (copy_entry->vme_end -
0a7de745 4291 copy_entry->vme_start);
39037602
A
4292 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4293 if (copy_vm_alias == 0) {
4294 /*
4295 * Caller does not want a specific
4296 * alias for this new mapping: use
4297 * the alias of the original mapping.
4298 */
4299 copy_vm_alias = VME_ALIAS(copy_entry);
4300 }
39236c6e
A
4301
4302 /* sanity check */
fe8ab488
A
4303 if ((copy_addr + copy_size) >
4304 (map_addr +
f427ee49 4305 overmap_start + overmap_end +
0a7de745 4306 named_entry->size /* XXX full size */)) {
39236c6e
A
4307 /* over-mapping too much !? */
4308 kr = KERN_INVALID_ARGUMENT;
f427ee49 4309 DEBUG4K_ERROR("kr 0x%x\n", kr);
39236c6e
A
4310 /* abort */
4311 break;
4312 }
4313
4314 /* take a reference on the object */
4315 if (copy_entry->is_sub_map) {
5ba3f43e 4316 vmk_remap_flags.vmkf_submap = TRUE;
3e170ce0 4317 copy_submap = VME_SUBMAP(copy_entry);
39236c6e
A
4318 vm_map_lock(copy_submap);
4319 vm_map_reference(copy_submap);
4320 vm_map_unlock(copy_submap);
d9a64523 4321 copy_object = (vm_object_t)(uintptr_t) copy_submap;
813fb2f6 4322 } else if (!copy &&
0a7de745
A
4323 copy_object != VM_OBJECT_NULL &&
4324 (copy_entry->needs_copy ||
4325 copy_object->shadowed ||
4326 (!copy_object->true_share &&
4327 !copy_entry->is_shared &&
4328 copy_object->vo_size > copy_size))) {
813fb2f6
A
4329 /*
4330 * We need to resolve our side of this
4331 * "symmetric" copy-on-write now; we
4332 * need a new object to map and share,
4333 * instead of the current one which
4334 * might still be shared with the
4335 * original mapping.
4336 *
4337 * Note: A "vm_map_copy_t" does not
4338 * have a lock but we're protected by
4339 * the named entry's lock here.
4340 */
4341 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4342 VME_OBJECT_SHADOW(copy_entry, copy_size);
4343 if (!copy_entry->needs_copy &&
4344 copy_entry->protection & VM_PROT_WRITE) {
4345 vm_prot_t prot;
4346
4347 prot = copy_entry->protection & ~VM_PROT_WRITE;
4348 vm_object_pmap_protect(copy_object,
0a7de745
A
4349 copy_offset,
4350 copy_size,
4351 PMAP_NULL,
f427ee49 4352 PAGE_SIZE,
0a7de745
A
4353 0,
4354 prot);
813fb2f6
A
4355 }
4356
4357 copy_entry->needs_copy = FALSE;
4358 copy_entry->is_shared = TRUE;
4359 copy_object = VME_OBJECT(copy_entry);
4360 copy_offset = VME_OFFSET(copy_entry);
4361 vm_object_lock(copy_object);
4362 vm_object_reference_locked(copy_object);
4363 if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4364 /* we're about to make a shared mapping of this object */
4365 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4366 copy_object->true_share = TRUE;
4367 }
4368 vm_object_unlock(copy_object);
39236c6e 4369 } else {
813fb2f6
A
4370 /*
4371 * We already have the right object
4372 * to map.
4373 */
3e170ce0 4374 copy_object = VME_OBJECT(copy_entry);
39236c6e
A
4375 vm_object_reference(copy_object);
4376 }
4377
4378 /* over-map the object into destination */
4379 remap_flags |= flags;
4380 remap_flags |= VM_FLAGS_FIXED;
4381 remap_flags |= VM_FLAGS_OVERWRITE;
4382 remap_flags &= ~VM_FLAGS_ANYWHERE;
813fb2f6
A
4383 if (!copy && !copy_entry->is_sub_map) {
4384 /*
4385 * copy-on-write should have been
4386 * resolved at this point, or we would
4387 * end up sharing instead of copying.
4388 */
4389 assert(!copy_entry->needs_copy);
4390 }
f427ee49 4391#if XNU_TARGET_OS_OSX
d9a64523
A
4392 if (copy_entry->used_for_jit) {
4393 vmk_remap_flags.vmkf_map_jit = TRUE;
4394 }
f427ee49
A
4395#endif /* XNU_TARGET_OS_OSX */
4396
4397 assertf((copy_vm_alias & VME_ALIAS_MASK) == copy_vm_alias,
4398 "VM Tag truncated from 0x%x to 0x%x\n", copy_vm_alias, (copy_vm_alias & VME_ALIAS_MASK));
39236c6e 4399 kr = vm_map_enter(target_map,
0a7de745
A
4400 &copy_addr,
4401 copy_size,
4402 (vm_map_offset_t) 0,
4403 remap_flags,
4404 vmk_remap_flags,
f427ee49 4405 (vm_tag_t) copy_vm_alias, /* see comment at end of vm_fault_unwire re. cast*/
0a7de745
A
4406 copy_object,
4407 copy_offset,
c3c9b80d
A
4408 ((copy_object == NULL)
4409 ? FALSE
4410 : (copy || copy_entry->needs_copy)),
0a7de745
A
4411 cur_protection,
4412 max_protection,
4413 inheritance);
39236c6e 4414 if (kr != KERN_SUCCESS) {
f427ee49 4415 DEBUG4K_SHARE("failed kr 0x%x\n", kr);
39236c6e
A
4416 if (copy_entry->is_sub_map) {
4417 vm_map_deallocate(copy_submap);
4418 } else {
4419 vm_object_deallocate(copy_object);
4420 }
4421 /* abort */
4422 break;
4423 }
4424
4425 /* next mapping */
4426 copy_addr += copy_size;
4427 }
5ba3f43e 4428
39236c6e 4429 if (kr == KERN_SUCCESS) {
3e170ce0 4430 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4431 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
4432 *address = map_addr + offset_in_mapping;
4433 } else {
4434 *address = map_addr;
4435 }
f427ee49
A
4436 if (overmap_start) {
4437 *address += overmap_start;
4438 DEBUG4K_SHARE("map %p map_addr 0x%llx offset_in_mapping 0x%llx overmap_start 0x%llx -> *address 0x%llx\n", target_map, (uint64_t)map_addr, (uint64_t) offset_in_mapping, (uint64_t)overmap_start, (uint64_t)*address);
fe8ab488 4439 }
39236c6e
A
4440 }
4441 named_entry_unlock(named_entry);
f427ee49
A
4442 if (target_copy_map != copy_map) {
4443 vm_map_copy_discard(target_copy_map);
4444 target_copy_map = VM_MAP_COPY_NULL;
4445 }
39236c6e
A
4446
4447 if (kr != KERN_SUCCESS) {
0a7de745 4448 if (!(flags & VM_FLAGS_OVERWRITE)) {
39236c6e
A
4449 /* deallocate the contiguous range */
4450 (void) vm_deallocate(target_map,
0a7de745
A
4451 map_addr,
4452 map_size);
39236c6e
A
4453 }
4454 }
4455
4456 return kr;
f427ee49
A
4457 }
4458
4459 if (named_entry->is_object) {
0a7de745
A
4460 unsigned int access;
4461 vm_prot_t protections;
4462 unsigned int wimg_mode;
5ba3f43e
A
4463
4464 /* we are mapping a VM object */
4465
4466 protections = named_entry->protection & VM_PROT_ALL;
4467 access = GET_MAP_MEM(named_entry->protection);
4468
3e170ce0 4469 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4470 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
f427ee49 4471 offset_in_mapping = offset - VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
0a7de745 4472 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
3e170ce0 4473 offset_in_mapping &= ~((signed)(0xFFF));
0a7de745 4474 }
f427ee49
A
4475 offset = VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4476 map_size = VM_MAP_ROUND_PAGE((offset + offset_in_mapping + initial_size) - offset, VM_MAP_PAGE_MASK(target_map));
5ba3f43e 4477 }
39236c6e 4478
f427ee49 4479 object = vm_named_entry_to_vm_object(named_entry);
2d21ac55 4480 assert(object != VM_OBJECT_NULL);
5ba3f43e 4481 vm_object_lock(object);
2d21ac55 4482 named_entry_unlock(named_entry);
5ba3f43e
A
4483
4484 vm_object_reference_locked(object);
4485
4486 wimg_mode = object->wimg_bits;
0a7de745
A
4487 vm_prot_to_wimg(access, &wimg_mode);
4488 if (object->wimg_bits != wimg_mode) {
5ba3f43e 4489 vm_object_change_wimg_mode(object, wimg_mode);
0a7de745 4490 }
5ba3f43e
A
4491
4492 vm_object_unlock(object);
f427ee49
A
4493 } else {
4494 panic("invalid VM named entry %p", named_entry);
2d21ac55
A
4495 }
4496 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4497 /*
4498 * JMM - This is temporary until we unify named entries
4499 * and raw memory objects.
4500 *
4501 * Detected fake ip_kotype for a memory object. In
4502 * this case, the port isn't really a port at all, but
4503 * instead is just a raw memory object.
4504 */
3e170ce0 4505 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4506 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
4507 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4508 }
4509
5ba3f43e 4510 object = memory_object_to_vm_object((memory_object_t)port);
0a7de745 4511 if (object == VM_OBJECT_NULL) {
2d21ac55 4512 return KERN_INVALID_OBJECT;
0a7de745 4513 }
5ba3f43e 4514 vm_object_reference(object);
2d21ac55
A
4515
4516 /* wait for object (if any) to be ready */
4517 if (object != VM_OBJECT_NULL) {
4518 if (object == kernel_object) {
4519 printf("Warning: Attempt to map kernel object"
0a7de745 4520 " by a non-private kernel entity\n");
2d21ac55
A
4521 return KERN_INVALID_OBJECT;
4522 }
b0d623f7 4523 if (!object->pager_ready) {
2d21ac55 4524 vm_object_lock(object);
b0d623f7
A
4525
4526 while (!object->pager_ready) {
4527 vm_object_wait(object,
0a7de745
A
4528 VM_OBJECT_EVENT_PAGER_READY,
4529 THREAD_UNINT);
b0d623f7
A
4530 vm_object_lock(object);
4531 }
4532 vm_object_unlock(object);
2d21ac55 4533 }
2d21ac55
A
4534 }
4535 } else {
4536 return KERN_INVALID_OBJECT;
4537 }
4538
593a1d5f
A
4539 if (object != VM_OBJECT_NULL &&
4540 object->named &&
4541 object->pager != MEMORY_OBJECT_NULL &&
4542 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4543 memory_object_t pager;
0a7de745
A
4544 vm_prot_t pager_prot;
4545 kern_return_t kr;
593a1d5f
A
4546
4547 /*
4548 * For "named" VM objects, let the pager know that the
4549 * memory object is being mapped. Some pagers need to keep
4550 * track of this, to know when they can reclaim the memory
4551 * object, for example.
4552 * VM calls memory_object_map() for each mapping (specifying
4553 * the protection of each mapping) and calls
4554 * memory_object_last_unmap() when all the mappings are gone.
4555 */
4556 pager_prot = max_protection;
4557 if (copy) {
4558 /*
4559 * Copy-On-Write mapping: won't modify the
4560 * memory object.
4561 */
4562 pager_prot &= ~VM_PROT_WRITE;
4563 }
4564 vm_object_lock(object);
4565 pager = object->pager;
4566 if (object->named &&
4567 pager != MEMORY_OBJECT_NULL &&
4568 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4569 assert(object->pager_ready);
4570 vm_object_mapping_wait(object, THREAD_UNINT);
4571 vm_object_mapping_begin(object);
4572 vm_object_unlock(object);
4573
4574 kr = memory_object_map(pager, pager_prot);
4575 assert(kr == KERN_SUCCESS);
4576
4577 vm_object_lock(object);
4578 vm_object_mapping_end(object);
4579 }
4580 vm_object_unlock(object);
4581 }
4582
2d21ac55
A
4583 /*
4584 * Perform the copy if requested
4585 */
4586
4587 if (copy) {
0a7de745
A
4588 vm_object_t new_object;
4589 vm_object_offset_t new_offset;
2d21ac55 4590
3e170ce0 4591 result = vm_object_copy_strategically(object, offset,
0a7de745
A
4592 map_size,
4593 &new_object, &new_offset,
4594 &copy);
2d21ac55
A
4595
4596
4597 if (result == KERN_MEMORY_RESTART_COPY) {
4598 boolean_t success;
4599 boolean_t src_needs_copy;
4600
4601 /*
4602 * XXX
4603 * We currently ignore src_needs_copy.
4604 * This really is the issue of how to make
4605 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4606 * non-kernel users to use. Solution forthcoming.
4607 * In the meantime, since we don't allow non-kernel
4608 * memory managers to specify symmetric copy,
4609 * we won't run into problems here.
4610 */
4611 new_object = object;
4612 new_offset = offset;
4613 success = vm_object_copy_quickly(&new_object,
0a7de745
A
4614 new_offset,
4615 map_size,
4616 &src_needs_copy,
4617 &copy);
2d21ac55
A
4618 assert(success);
4619 result = KERN_SUCCESS;
4620 }
4621 /*
4622 * Throw away the reference to the
4623 * original object, as it won't be mapped.
4624 */
4625
4626 vm_object_deallocate(object);
4627
3e170ce0 4628 if (result != KERN_SUCCESS) {
2d21ac55 4629 return result;
3e170ce0 4630 }
2d21ac55
A
4631
4632 object = new_object;
4633 offset = new_offset;
4634 }
4635
fe8ab488 4636 /*
5ba3f43e 4637 * If non-kernel users want to try to prefault pages, the mapping and prefault
fe8ab488
A
4638 * needs to be atomic.
4639 */
5ba3f43e
A
4640 kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4641 vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4642
4643#if __arm64__
4644 if (fourk) {
4645 /* map this object in a "4K" pager */
4646 result = vm_map_enter_fourk(target_map,
0a7de745
A
4647 &map_addr,
4648 map_size,
4649 (vm_map_offset_t) mask,
4650 flags,
4651 vmk_flags,
4652 tag,
4653 object,
4654 offset,
4655 copy,
4656 cur_protection,
4657 max_protection,
4658 inheritance);
5ba3f43e
A
4659 } else
4660#endif /* __arm64__ */
3e170ce0
A
4661 {
4662 result = vm_map_enter(target_map,
0a7de745
A
4663 &map_addr, map_size,
4664 (vm_map_offset_t)mask,
4665 flags,
4666 vmk_flags,
4667 tag,
4668 object, offset,
4669 copy,
4670 cur_protection, max_protection,
4671 inheritance);
4672 }
4673 if (result != KERN_SUCCESS) {
2d21ac55 4674 vm_object_deallocate(object);
0a7de745 4675 }
39236c6e 4676
fe8ab488
A
4677 /*
4678 * Try to prefault, and do not forget to release the vm map lock.
4679 */
4680 if (result == KERN_SUCCESS && try_prefault) {
4681 mach_vm_address_t va = map_addr;
4682 kern_return_t kr = KERN_SUCCESS;
4683 unsigned int i = 0;
39037602
A
4684 int pmap_options;
4685
5ba3f43e 4686 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
39037602
A
4687 if (object->internal) {
4688 pmap_options |= PMAP_OPTIONS_INTERNAL;
4689 }
fe8ab488
A
4690
4691 for (i = 0; i < page_list_count; ++i) {
5ba3f43e
A
4692 if (!UPL_VALID_PAGE(page_list, i)) {
4693 if (kernel_prefault) {
4694 assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4695 result = KERN_MEMORY_ERROR;
4696 break;
4697 }
4698 } else {
fe8ab488
A
4699 /*
4700 * If this function call failed, we should stop
4701 * trying to optimize, other calls are likely
4702 * going to fail too.
4703 *
4704 * We are not gonna report an error for such
4705 * failure though. That's an optimization, not
4706 * something critical.
4707 */
4708 kr = pmap_enter_options(target_map->pmap,
0a7de745
A
4709 va, UPL_PHYS_PAGE(page_list, i),
4710 cur_protection, VM_PROT_NONE,
4711 0, TRUE, pmap_options, NULL);
fe8ab488
A
4712 if (kr != KERN_SUCCESS) {
4713 OSIncrementAtomic64(&vm_prefault_nb_bailout);
5ba3f43e
A
4714 if (kernel_prefault) {
4715 result = kr;
4716 }
3e170ce0 4717 break;
fe8ab488
A
4718 }
4719 OSIncrementAtomic64(&vm_prefault_nb_pages);
4720 }
4721
4722 /* Next virtual address */
4723 va += PAGE_SIZE;
4724 }
5ba3f43e
A
4725 if (vmk_flags.vmkf_keep_map_locked) {
4726 vm_map_unlock(target_map);
4727 }
fe8ab488
A
4728 }
4729
3e170ce0 4730 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
0a7de745 4731 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
4732 *address = map_addr + offset_in_mapping;
4733 } else {
4734 *address = map_addr;
4735 }
2d21ac55
A
4736 return result;
4737}
4738
fe8ab488
A
4739kern_return_t
4740vm_map_enter_mem_object(
0a7de745
A
4741 vm_map_t target_map,
4742 vm_map_offset_t *address,
4743 vm_map_size_t initial_size,
4744 vm_map_offset_t mask,
4745 int flags,
4746 vm_map_kernel_flags_t vmk_flags,
4747 vm_tag_t tag,
4748 ipc_port_t port,
4749 vm_object_offset_t offset,
4750 boolean_t copy,
4751 vm_prot_t cur_protection,
4752 vm_prot_t max_protection,
4753 vm_inherit_t inheritance)
fe8ab488 4754{
5ba3f43e
A
4755 kern_return_t ret;
4756
4757 ret = vm_map_enter_mem_object_helper(target_map,
0a7de745
A
4758 address,
4759 initial_size,
4760 mask,
4761 flags,
4762 vmk_flags,
4763 tag,
4764 port,
4765 offset,
4766 copy,
4767 cur_protection,
4768 max_protection,
4769 inheritance,
4770 NULL,
4771 0);
5ba3f43e
A
4772
4773#if KASAN
4774 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4775 kasan_notify_address(*address, initial_size);
4776 }
4777#endif
4778
4779 return ret;
fe8ab488 4780}
b0d623f7 4781
fe8ab488
A
4782kern_return_t
4783vm_map_enter_mem_object_prefault(
0a7de745
A
4784 vm_map_t target_map,
4785 vm_map_offset_t *address,
4786 vm_map_size_t initial_size,
4787 vm_map_offset_t mask,
4788 int flags,
4789 vm_map_kernel_flags_t vmk_flags,
4790 vm_tag_t tag,
4791 ipc_port_t port,
4792 vm_object_offset_t offset,
4793 vm_prot_t cur_protection,
4794 vm_prot_t max_protection,
4795 upl_page_list_ptr_t page_list,
4796 unsigned int page_list_count)
fe8ab488 4797{
5ba3f43e
A
4798 kern_return_t ret;
4799
4800 ret = vm_map_enter_mem_object_helper(target_map,
0a7de745
A
4801 address,
4802 initial_size,
4803 mask,
4804 flags,
4805 vmk_flags,
4806 tag,
4807 port,
4808 offset,
4809 FALSE,
4810 cur_protection,
4811 max_protection,
4812 VM_INHERIT_DEFAULT,
4813 page_list,
4814 page_list_count);
5ba3f43e
A
4815
4816#if KASAN
4817 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4818 kasan_notify_address(*address, initial_size);
4819 }
4820#endif
4821
4822 return ret;
fe8ab488 4823}
b0d623f7
A
4824
4825
4826kern_return_t
4827vm_map_enter_mem_object_control(
0a7de745
A
4828 vm_map_t target_map,
4829 vm_map_offset_t *address,
4830 vm_map_size_t initial_size,
4831 vm_map_offset_t mask,
4832 int flags,
4833 vm_map_kernel_flags_t vmk_flags,
4834 vm_tag_t tag,
4835 memory_object_control_t control,
4836 vm_object_offset_t offset,
4837 boolean_t copy,
4838 vm_prot_t cur_protection,
4839 vm_prot_t max_protection,
4840 vm_inherit_t inheritance)
b0d623f7 4841{
0a7de745
A
4842 vm_map_address_t map_addr;
4843 vm_map_size_t map_size;
4844 vm_object_t object;
4845 vm_object_size_t size;
4846 kern_return_t result;
4847 memory_object_t pager;
4848 vm_prot_t pager_prot;
4849 kern_return_t kr;
5ba3f43e 4850#if __arm64__
0a7de745 4851 boolean_t fourk = vmk_flags.vmkf_fourk;
5ba3f43e 4852#endif /* __arm64__ */
b0d623f7
A
4853
4854 /*
4855 * Check arguments for validity
4856 */
4857 if ((target_map == VM_MAP_NULL) ||
4858 (cur_protection & ~VM_PROT_ALL) ||
4859 (max_protection & ~VM_PROT_ALL) ||
4860 (inheritance > VM_INHERIT_LAST_VALID) ||
3e170ce0 4861 initial_size == 0) {
b0d623f7 4862 return KERN_INVALID_ARGUMENT;
3e170ce0 4863 }
b0d623f7 4864
5ba3f43e 4865#if __arm64__
f427ee49
A
4866 if (fourk && VM_MAP_PAGE_MASK(target_map) < PAGE_MASK) {
4867 fourk = FALSE;
4868 }
4869
5ba3f43e
A
4870 if (fourk) {
4871 map_addr = vm_map_trunc_page(*address,
0a7de745 4872 FOURK_PAGE_MASK);
5ba3f43e 4873 map_size = vm_map_round_page(initial_size,
0a7de745 4874 FOURK_PAGE_MASK);
5ba3f43e
A
4875 } else
4876#endif /* __arm64__ */
3e170ce0
A
4877 {
4878 map_addr = vm_map_trunc_page(*address,
0a7de745 4879 VM_MAP_PAGE_MASK(target_map));
3e170ce0 4880 map_size = vm_map_round_page(initial_size,
0a7de745 4881 VM_MAP_PAGE_MASK(target_map));
3e170ce0
A
4882 }
4883 size = vm_object_round_page(initial_size);
b0d623f7
A
4884
4885 object = memory_object_control_to_vm_object(control);
4886
0a7de745 4887 if (object == VM_OBJECT_NULL) {
b0d623f7 4888 return KERN_INVALID_OBJECT;
0a7de745 4889 }
b0d623f7
A
4890
4891 if (object == kernel_object) {
4892 printf("Warning: Attempt to map kernel object"
0a7de745 4893 " by a non-private kernel entity\n");
b0d623f7
A
4894 return KERN_INVALID_OBJECT;
4895 }
4896
4897 vm_object_lock(object);
4898 object->ref_count++;
b0d623f7
A
4899
4900 /*
4901 * For "named" VM objects, let the pager know that the
4902 * memory object is being mapped. Some pagers need to keep
4903 * track of this, to know when they can reclaim the memory
4904 * object, for example.
4905 * VM calls memory_object_map() for each mapping (specifying
4906 * the protection of each mapping) and calls
4907 * memory_object_last_unmap() when all the mappings are gone.
4908 */
4909 pager_prot = max_protection;
4910 if (copy) {
4911 pager_prot &= ~VM_PROT_WRITE;
4912 }
4913 pager = object->pager;
4914 if (object->named &&
4915 pager != MEMORY_OBJECT_NULL &&
4916 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4917 assert(object->pager_ready);
4918 vm_object_mapping_wait(object, THREAD_UNINT);
4919 vm_object_mapping_begin(object);
4920 vm_object_unlock(object);
4921
4922 kr = memory_object_map(pager, pager_prot);
4923 assert(kr == KERN_SUCCESS);
4924
4925 vm_object_lock(object);
4926 vm_object_mapping_end(object);
4927 }
4928 vm_object_unlock(object);
4929
4930 /*
4931 * Perform the copy if requested
4932 */
4933
4934 if (copy) {
0a7de745
A
4935 vm_object_t new_object;
4936 vm_object_offset_t new_offset;
b0d623f7
A
4937
4938 result = vm_object_copy_strategically(object, offset, size,
0a7de745
A
4939 &new_object, &new_offset,
4940 &copy);
b0d623f7
A
4941
4942
4943 if (result == KERN_MEMORY_RESTART_COPY) {
4944 boolean_t success;
4945 boolean_t src_needs_copy;
4946
4947 /*
4948 * XXX
4949 * We currently ignore src_needs_copy.
4950 * This really is the issue of how to make
4951 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4952 * non-kernel users to use. Solution forthcoming.
4953 * In the meantime, since we don't allow non-kernel
4954 * memory managers to specify symmetric copy,
4955 * we won't run into problems here.
4956 */
4957 new_object = object;
4958 new_offset = offset;
4959 success = vm_object_copy_quickly(&new_object,
0a7de745
A
4960 new_offset, size,
4961 &src_needs_copy,
4962 &copy);
b0d623f7
A
4963 assert(success);
4964 result = KERN_SUCCESS;
4965 }
4966 /*
4967 * Throw away the reference to the
4968 * original object, as it won't be mapped.
4969 */
4970
4971 vm_object_deallocate(object);
4972
3e170ce0 4973 if (result != KERN_SUCCESS) {
b0d623f7 4974 return result;
3e170ce0 4975 }
b0d623f7
A
4976
4977 object = new_object;
4978 offset = new_offset;
4979 }
4980
5ba3f43e
A
4981#if __arm64__
4982 if (fourk) {
4983 result = vm_map_enter_fourk(target_map,
0a7de745
A
4984 &map_addr,
4985 map_size,
4986 (vm_map_offset_t)mask,
4987 flags,
4988 vmk_flags,
4989 tag,
4990 object, offset,
4991 copy,
4992 cur_protection, max_protection,
4993 inheritance);
5ba3f43e
A
4994 } else
4995#endif /* __arm64__ */
3e170ce0
A
4996 {
4997 result = vm_map_enter(target_map,
0a7de745
A
4998 &map_addr, map_size,
4999 (vm_map_offset_t)mask,
5000 flags,
5001 vmk_flags,
5002 tag,
5003 object, offset,
5004 copy,
5005 cur_protection, max_protection,
5006 inheritance);
5007 }
5008 if (result != KERN_SUCCESS) {
b0d623f7 5009 vm_object_deallocate(object);
0a7de745 5010 }
b0d623f7
A
5011 *address = map_addr;
5012
5013 return result;
5014}
5015
5016
0a7de745 5017#if VM_CPM
2d21ac55
A
5018
5019#ifdef MACH_ASSERT
0a7de745 5020extern pmap_paddr_t avail_start, avail_end;
2d21ac55
A
5021#endif
5022
5023/*
5024 * Allocate memory in the specified map, with the caveat that
5025 * the memory is physically contiguous. This call may fail
5026 * if the system can't find sufficient contiguous memory.
5027 * This call may cause or lead to heart-stopping amounts of
5028 * paging activity.
5029 *
5030 * Memory obtained from this call should be freed in the
5031 * normal way, viz., via vm_deallocate.
5032 */
5033kern_return_t
5034vm_map_enter_cpm(
0a7de745
A
5035 vm_map_t map,
5036 vm_map_offset_t *addr,
5037 vm_map_size_t size,
5038 int flags)
2d21ac55 5039{
0a7de745
A
5040 vm_object_t cpm_obj;
5041 pmap_t pmap;
5042 vm_page_t m, pages;
5043 kern_return_t kr;
5044 vm_map_offset_t va, start, end, offset;
5045#if MACH_ASSERT
5046 vm_map_offset_t prev_addr = 0;
5047#endif /* MACH_ASSERT */
5048
5049 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3e170ce0
A
5050 vm_tag_t tag;
5051
f427ee49
A
5052 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
5053 /* XXX TODO4K do we need to support this? */
5054 *addr = 0;
5055 return KERN_NOT_SUPPORTED;
5056 }
5057
3e170ce0 5058 VM_GET_FLAGS_ALIAS(flags, tag);
2d21ac55 5059
2d21ac55
A
5060 if (size == 0) {
5061 *addr = 0;
5062 return KERN_SUCCESS;
5063 }
0a7de745 5064 if (anywhere) {
2d21ac55 5065 *addr = vm_map_min(map);
0a7de745 5066 } else {
39236c6e 5067 *addr = vm_map_trunc_page(*addr,
0a7de745
A
5068 VM_MAP_PAGE_MASK(map));
5069 }
39236c6e 5070 size = vm_map_round_page(size,
0a7de745 5071 VM_MAP_PAGE_MASK(map));
2d21ac55
A
5072
5073 /*
5074 * LP64todo - cpm_allocate should probably allow
5075 * allocations of >4GB, but not with the current
5076 * algorithm, so just cast down the size for now.
5077 */
0a7de745 5078 if (size > VM_MAX_ADDRESS) {
2d21ac55 5079 return KERN_RESOURCE_SHORTAGE;
0a7de745 5080 }
2d21ac55 5081 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
0a7de745 5082 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
2d21ac55 5083 return kr;
0a7de745 5084 }
2d21ac55
A
5085
5086 cpm_obj = vm_object_allocate((vm_object_size_t)size);
5087 assert(cpm_obj != VM_OBJECT_NULL);
5088 assert(cpm_obj->internal);
316670eb 5089 assert(cpm_obj->vo_size == (vm_object_size_t)size);
2d21ac55
A
5090 assert(cpm_obj->can_persist == FALSE);
5091 assert(cpm_obj->pager_created == FALSE);
5092 assert(cpm_obj->pageout == FALSE);
5093 assert(cpm_obj->shadow == VM_OBJECT_NULL);
91447636
A
5094
5095 /*
5096 * Insert pages into object.
5097 */
5098
5099 vm_object_lock(cpm_obj);
5100 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5101 m = pages;
5102 pages = NEXT_PAGE(m);
0c530ab8 5103 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
91447636 5104
d9a64523
A
5105 assert(!m->vmp_gobbled);
5106 assert(!m->vmp_wanted);
5107 assert(!m->vmp_pageout);
5108 assert(!m->vmp_tabled);
b0d623f7 5109 assert(VM_PAGE_WIRED(m));
d9a64523 5110 assert(m->vmp_busy);
0a7de745 5111 assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
91447636 5112
d9a64523 5113 m->vmp_busy = FALSE;
91447636
A
5114 vm_page_insert(m, cpm_obj, offset);
5115 }
5116 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5117 vm_object_unlock(cpm_obj);
5118
5119 /*
5120 * Hang onto a reference on the object in case a
5121 * multi-threaded application for some reason decides
5122 * to deallocate the portion of the address space into
5123 * which we will insert this object.
5124 *
5125 * Unfortunately, we must insert the object now before
5126 * we can talk to the pmap module about which addresses
5127 * must be wired down. Hence, the race with a multi-
5128 * threaded app.
5129 */
5130 vm_object_reference(cpm_obj);
5131
5132 /*
5133 * Insert object into map.
5134 */
5135
5136 kr = vm_map_enter(
2d21ac55
A
5137 map,
5138 addr,
5139 size,
5140 (vm_map_offset_t)0,
5141 flags,
5ba3f43e 5142 VM_MAP_KERNEL_FLAGS_NONE,
2d21ac55
A
5143 cpm_obj,
5144 (vm_object_offset_t)0,
5145 FALSE,
5146 VM_PROT_ALL,
5147 VM_PROT_ALL,
5148 VM_INHERIT_DEFAULT);
91447636
A
5149
5150 if (kr != KERN_SUCCESS) {
5151 /*
5152 * A CPM object doesn't have can_persist set,
5153 * so all we have to do is deallocate it to
5154 * free up these pages.
5155 */
5156 assert(cpm_obj->pager_created == FALSE);
5157 assert(cpm_obj->can_persist == FALSE);
5158 assert(cpm_obj->pageout == FALSE);
5159 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5160 vm_object_deallocate(cpm_obj); /* kill acquired ref */
5161 vm_object_deallocate(cpm_obj); /* kill creation ref */
5162 }
5163
5164 /*
5165 * Inform the physical mapping system that the
5166 * range of addresses may not fault, so that
5167 * page tables and such can be locked down as well.
5168 */
5169 start = *addr;
5170 end = start + size;
5171 pmap = vm_map_pmap(map);
5172 pmap_pageable(pmap, start, end, FALSE);
5173
5174 /*
5175 * Enter each page into the pmap, to avoid faults.
5176 * Note that this loop could be coded more efficiently,
5177 * if the need arose, rather than looking up each page
5178 * again.
5179 */
5180 for (offset = 0, va = start; offset < size;
0a7de745
A
5181 va += PAGE_SIZE, offset += PAGE_SIZE) {
5182 int type_of_fault;
2d21ac55 5183
91447636
A
5184 vm_object_lock(cpm_obj);
5185 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
91447636 5186 assert(m != VM_PAGE_NULL);
2d21ac55
A
5187
5188 vm_page_zero_fill(m);
5189
5190 type_of_fault = DBG_ZERO_FILL_FAULT;
5191
f427ee49
A
5192 vm_fault_enter(m, pmap, va,
5193 PAGE_SIZE, 0,
5194 VM_PROT_ALL, VM_PROT_WRITE,
0a7de745
A
5195 VM_PAGE_WIRED(m),
5196 FALSE, /* change_wiring */
5197 VM_KERN_MEMORY_NONE, /* tag - not wiring */
5198 FALSE, /* no_cache */
5199 FALSE, /* cs_bypass */
5200 0, /* user_tag */
5201 0, /* pmap_options */
5202 NULL, /* need_retry */
5203 &type_of_fault);
2d21ac55
A
5204
5205 vm_object_unlock(cpm_obj);
91447636
A
5206 }
5207
0a7de745 5208#if MACH_ASSERT
91447636
A
5209 /*
5210 * Verify ordering in address space.
5211 */
5212 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5213 vm_object_lock(cpm_obj);
5214 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5215 vm_object_unlock(cpm_obj);
0a7de745 5216 if (m == VM_PAGE_NULL) {
316670eb 5217 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
0a7de745
A
5218 cpm_obj, (uint64_t)offset);
5219 }
d9a64523
A
5220 assert(m->vmp_tabled);
5221 assert(!m->vmp_busy);
5222 assert(!m->vmp_wanted);
5223 assert(!m->vmp_fictitious);
5224 assert(!m->vmp_private);
5225 assert(!m->vmp_absent);
5226 assert(!m->vmp_error);
5227 assert(!m->vmp_cleaning);
5228 assert(!m->vmp_laundry);
5229 assert(!m->vmp_precious);
5230 assert(!m->vmp_clustered);
91447636 5231 if (offset != 0) {
39037602 5232 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
316670eb 5233 printf("start 0x%llx end 0x%llx va 0x%llx\n",
0a7de745 5234 (uint64_t)start, (uint64_t)end, (uint64_t)va);
316670eb
A
5235 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5236 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
91447636
A
5237 panic("vm_allocate_cpm: pages not contig!");
5238 }
5239 }
39037602 5240 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
91447636 5241 }
0a7de745 5242#endif /* MACH_ASSERT */
91447636
A
5243
5244 vm_object_deallocate(cpm_obj); /* kill extra ref */
5245
5246 return kr;
5247}
5248
5249
0a7de745 5250#else /* VM_CPM */
91447636
A
5251
5252/*
5253 * Interface is defined in all cases, but unless the kernel
5254 * is built explicitly for this option, the interface does
5255 * nothing.
5256 */
5257
5258kern_return_t
5259vm_map_enter_cpm(
0a7de745
A
5260 __unused vm_map_t map,
5261 __unused vm_map_offset_t *addr,
5262 __unused vm_map_size_t size,
5263 __unused int flags)
91447636
A
5264{
5265 return KERN_FAILURE;
5266}
5267#endif /* VM_CPM */
5268
b0d623f7
A
5269/* Not used without nested pmaps */
5270#ifndef NO_NESTED_PMAP
2d21ac55
A
5271/*
5272 * Clip and unnest a portion of a nested submap mapping.
5273 */
b0d623f7
A
5274
5275
2d21ac55
A
5276static void
5277vm_map_clip_unnest(
0a7de745
A
5278 vm_map_t map,
5279 vm_map_entry_t entry,
5280 vm_map_offset_t start_unnest,
5281 vm_map_offset_t end_unnest)
2d21ac55 5282{
b0d623f7
A
5283 vm_map_offset_t old_start_unnest = start_unnest;
5284 vm_map_offset_t old_end_unnest = end_unnest;
5285
2d21ac55 5286 assert(entry->is_sub_map);
3e170ce0 5287 assert(VME_SUBMAP(entry) != NULL);
fe8ab488 5288 assert(entry->use_pmap);
2d21ac55 5289
b0d623f7
A
5290 /*
5291 * Query the platform for the optimal unnest range.
5292 * DRK: There's some duplication of effort here, since
5293 * callers may have adjusted the range to some extent. This
5294 * routine was introduced to support 1GiB subtree nesting
5295 * for x86 platforms, which can also nest on 2MiB boundaries
5296 * depending on size/alignment.
5297 */
5298 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
39037602
A
5299 assert(VME_SUBMAP(entry)->is_nested_map);
5300 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5301 log_unnest_badness(map,
0a7de745
A
5302 old_start_unnest,
5303 old_end_unnest,
5304 VME_SUBMAP(entry)->is_nested_map,
5305 (entry->vme_start +
5306 VME_SUBMAP(entry)->lowest_unnestable_start -
5307 VME_OFFSET(entry)));
b0d623f7
A
5308 }
5309
2d21ac55
A
5310 if (entry->vme_start > start_unnest ||
5311 entry->vme_end < end_unnest) {
5312 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
0a7de745
A
5313 "bad nested entry: start=0x%llx end=0x%llx\n",
5314 (long long)start_unnest, (long long)end_unnest,
5315 (long long)entry->vme_start, (long long)entry->vme_end);
2d21ac55 5316 }
b0d623f7 5317
2d21ac55
A
5318 if (start_unnest > entry->vme_start) {
5319 _vm_map_clip_start(&map->hdr,
0a7de745
A
5320 entry,
5321 start_unnest);
3e170ce0
A
5322 if (map->holelistenabled) {
5323 vm_map_store_update_first_free(map, NULL, FALSE);
5324 } else {
5325 vm_map_store_update_first_free(map, map->first_free, FALSE);
5326 }
2d21ac55
A
5327 }
5328 if (entry->vme_end > end_unnest) {
5329 _vm_map_clip_end(&map->hdr,
0a7de745
A
5330 entry,
5331 end_unnest);
3e170ce0
A
5332 if (map->holelistenabled) {
5333 vm_map_store_update_first_free(map, NULL, FALSE);
5334 } else {
5335 vm_map_store_update_first_free(map, map->first_free, FALSE);
5336 }
2d21ac55
A
5337 }
5338
5339 pmap_unnest(map->pmap,
0a7de745
A
5340 entry->vme_start,
5341 entry->vme_end - entry->vme_start);
cb323159 5342 if ((map->mapped_in_other_pmaps) && os_ref_get_count(&map->map_refcnt) != 0) {
2d21ac55
A
5343 /* clean up parent map/maps */
5344 vm_map_submap_pmap_clean(
5345 map, entry->vme_start,
5346 entry->vme_end,
3e170ce0
A
5347 VME_SUBMAP(entry),
5348 VME_OFFSET(entry));
2d21ac55
A
5349 }
5350 entry->use_pmap = FALSE;
3e170ce0
A
5351 if ((map->pmap != kernel_pmap) &&
5352 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5353 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
316670eb 5354 }
2d21ac55 5355}
0a7de745 5356#endif /* NO_NESTED_PMAP */
2d21ac55 5357
1c79356b
A
5358/*
5359 * vm_map_clip_start: [ internal use only ]
5360 *
5361 * Asserts that the given entry begins at or after
5362 * the specified address; if necessary,
5363 * it splits the entry into two.
5364 */
e2d2fc5c 5365void
2d21ac55 5366vm_map_clip_start(
0a7de745
A
5367 vm_map_t map,
5368 vm_map_entry_t entry,
5369 vm_map_offset_t startaddr)
2d21ac55 5370{
0c530ab8 5371#ifndef NO_NESTED_PMAP
fe8ab488
A
5372 if (entry->is_sub_map &&
5373 entry->use_pmap &&
2d21ac55 5374 startaddr >= entry->vme_start) {
0a7de745 5375 vm_map_offset_t start_unnest, end_unnest;
2d21ac55
A
5376
5377 /*
5378 * Make sure "startaddr" is no longer in a nested range
5379 * before we clip. Unnest only the minimum range the platform
5380 * can handle.
b0d623f7
A
5381 * vm_map_clip_unnest may perform additional adjustments to
5382 * the unnest range.
2d21ac55 5383 */
f427ee49
A
5384 start_unnest = startaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
5385 end_unnest = start_unnest + pmap_shared_region_size_min(map->pmap);
2d21ac55
A
5386 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5387 }
5388#endif /* NO_NESTED_PMAP */
5389 if (startaddr > entry->vme_start) {
3e170ce0 5390 if (VME_OBJECT(entry) &&
2d21ac55 5391 !entry->is_sub_map &&
3e170ce0 5392 VME_OBJECT(entry)->phys_contiguous) {
2d21ac55 5393 pmap_remove(map->pmap,
0a7de745
A
5394 (addr64_t)(entry->vme_start),
5395 (addr64_t)(entry->vme_end));
2d21ac55 5396 }
39037602
A
5397 if (entry->vme_atomic) {
5398 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5ba3f43e 5399 }
d9a64523
A
5400
5401 DTRACE_VM5(
5402 vm_map_clip_start,
5403 vm_map_t, map,
5404 vm_map_offset_t, entry->vme_start,
5405 vm_map_offset_t, entry->vme_end,
5406 vm_map_offset_t, startaddr,
5407 int, VME_ALIAS(entry));
5408
2d21ac55 5409 _vm_map_clip_start(&map->hdr, entry, startaddr);
3e170ce0
A
5410 if (map->holelistenabled) {
5411 vm_map_store_update_first_free(map, NULL, FALSE);
5412 } else {
5413 vm_map_store_update_first_free(map, map->first_free, FALSE);
5414 }
2d21ac55
A
5415 }
5416}
5417
1c79356b
A
5418
5419#define vm_map_copy_clip_start(copy, entry, startaddr) \
5420 MACRO_BEGIN \
5421 if ((startaddr) > (entry)->vme_start) \
0a7de745 5422 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
1c79356b
A
5423 MACRO_END
5424
5425/*
5426 * This routine is called only when it is known that
5427 * the entry must be split.
5428 */
91447636 5429static void
1c79356b 5430_vm_map_clip_start(
0a7de745
A
5431 struct vm_map_header *map_header,
5432 vm_map_entry_t entry,
5433 vm_map_offset_t start)
1c79356b 5434{
0a7de745 5435 vm_map_entry_t new_entry;
1c79356b
A
5436
5437 /*
5438 * Split off the front portion --
5439 * note that we must insert the new
5440 * entry BEFORE this one, so that
5441 * this entry has the specified starting
5442 * address.
5443 */
5444
fe8ab488
A
5445 if (entry->map_aligned) {
5446 assert(VM_MAP_PAGE_ALIGNED(start,
0a7de745 5447 VM_MAP_HDR_PAGE_MASK(map_header)));
fe8ab488
A
5448 }
5449
7ddcb079 5450 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
1c79356b
A
5451 vm_map_entry_copy_full(new_entry, entry);
5452
5453 new_entry->vme_end = start;
e2d2fc5c 5454 assert(new_entry->vme_start < new_entry->vme_end);
3e170ce0 5455 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
e2d2fc5c 5456 assert(start < entry->vme_end);
1c79356b
A
5457 entry->vme_start = start;
5458
6d2010ae 5459 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
1c79356b 5460
0a7de745 5461 if (entry->is_sub_map) {
3e170ce0 5462 vm_map_reference(VME_SUBMAP(new_entry));
0a7de745 5463 } else {
3e170ce0 5464 vm_object_reference(VME_OBJECT(new_entry));
0a7de745 5465 }
1c79356b
A
5466}
5467
5468
5469/*
5470 * vm_map_clip_end: [ internal use only ]
5471 *
5472 * Asserts that the given entry ends at or before
5473 * the specified address; if necessary,
5474 * it splits the entry into two.
5475 */
e2d2fc5c 5476void
2d21ac55 5477vm_map_clip_end(
0a7de745
A
5478 vm_map_t map,
5479 vm_map_entry_t entry,
5480 vm_map_offset_t endaddr)
2d21ac55
A
5481{
5482 if (endaddr > entry->vme_end) {
5483 /*
5484 * Within the scope of this clipping, limit "endaddr" to
5485 * the end of this map entry...
5486 */
5487 endaddr = entry->vme_end;
5488 }
5489#ifndef NO_NESTED_PMAP
fe8ab488 5490 if (entry->is_sub_map && entry->use_pmap) {
0a7de745 5491 vm_map_offset_t start_unnest, end_unnest;
2d21ac55
A
5492
5493 /*
5494 * Make sure the range between the start of this entry and
5495 * the new "endaddr" is no longer nested before we clip.
5496 * Unnest only the minimum range the platform can handle.
b0d623f7
A
5497 * vm_map_clip_unnest may perform additional adjustments to
5498 * the unnest range.
2d21ac55
A
5499 */
5500 start_unnest = entry->vme_start;
5501 end_unnest =
f427ee49
A
5502 (endaddr + pmap_shared_region_size_min(map->pmap) - 1) &
5503 ~(pmap_shared_region_size_min(map->pmap) - 1);
2d21ac55
A
5504 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5505 }
5506#endif /* NO_NESTED_PMAP */
5507 if (endaddr < entry->vme_end) {
3e170ce0 5508 if (VME_OBJECT(entry) &&
2d21ac55 5509 !entry->is_sub_map &&
3e170ce0 5510 VME_OBJECT(entry)->phys_contiguous) {
2d21ac55 5511 pmap_remove(map->pmap,
0a7de745
A
5512 (addr64_t)(entry->vme_start),
5513 (addr64_t)(entry->vme_end));
2d21ac55 5514 }
39037602
A
5515 if (entry->vme_atomic) {
5516 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5517 }
d9a64523
A
5518 DTRACE_VM5(
5519 vm_map_clip_end,
5520 vm_map_t, map,
5521 vm_map_offset_t, entry->vme_start,
5522 vm_map_offset_t, entry->vme_end,
5523 vm_map_offset_t, endaddr,
5524 int, VME_ALIAS(entry));
5525
2d21ac55 5526 _vm_map_clip_end(&map->hdr, entry, endaddr);
3e170ce0
A
5527 if (map->holelistenabled) {
5528 vm_map_store_update_first_free(map, NULL, FALSE);
5529 } else {
5530 vm_map_store_update_first_free(map, map->first_free, FALSE);
5531 }
2d21ac55
A
5532 }
5533}
0c530ab8 5534
1c79356b
A
5535
5536#define vm_map_copy_clip_end(copy, entry, endaddr) \
5537 MACRO_BEGIN \
5538 if ((endaddr) < (entry)->vme_end) \
0a7de745 5539 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
1c79356b
A
5540 MACRO_END
5541
5542/*
5543 * This routine is called only when it is known that
5544 * the entry must be split.
5545 */
91447636 5546static void
1c79356b 5547_vm_map_clip_end(
0a7de745
A
5548 struct vm_map_header *map_header,
5549 vm_map_entry_t entry,
5550 vm_map_offset_t end)
1c79356b 5551{
0a7de745 5552 vm_map_entry_t new_entry;
1c79356b
A
5553
5554 /*
5555 * Create a new entry and insert it
5556 * AFTER the specified entry
5557 */
5558
fe8ab488
A
5559 if (entry->map_aligned) {
5560 assert(VM_MAP_PAGE_ALIGNED(end,
0a7de745 5561 VM_MAP_HDR_PAGE_MASK(map_header)));
fe8ab488
A
5562 }
5563
7ddcb079 5564 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
1c79356b
A
5565 vm_map_entry_copy_full(new_entry, entry);
5566
e2d2fc5c 5567 assert(entry->vme_start < end);
1c79356b 5568 new_entry->vme_start = entry->vme_end = end;
3e170ce0 5569 VME_OFFSET_SET(new_entry,
0a7de745 5570 VME_OFFSET(new_entry) + (end - entry->vme_start));
e2d2fc5c 5571 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 5572
6d2010ae 5573 _vm_map_store_entry_link(map_header, entry, new_entry);
1c79356b 5574
0a7de745 5575 if (entry->is_sub_map) {
3e170ce0 5576 vm_map_reference(VME_SUBMAP(new_entry));
0a7de745 5577 } else {
3e170ce0 5578 vm_object_reference(VME_OBJECT(new_entry));
0a7de745 5579 }
1c79356b
A
5580}
5581
5582
5583/*
5584 * VM_MAP_RANGE_CHECK: [ internal use only ]
5585 *
5586 * Asserts that the starting and ending region
5587 * addresses fall within the valid range of the map.
5588 */
0a7de745
A
5589#define VM_MAP_RANGE_CHECK(map, start, end) \
5590 MACRO_BEGIN \
5591 if (start < vm_map_min(map)) \
5592 start = vm_map_min(map); \
5593 if (end > vm_map_max(map)) \
5594 end = vm_map_max(map); \
5595 if (start > end) \
5596 start = end; \
2d21ac55 5597 MACRO_END
1c79356b
A
5598
5599/*
5600 * vm_map_range_check: [ internal use only ]
5ba3f43e 5601 *
1c79356b
A
5602 * Check that the region defined by the specified start and
5603 * end addresses are wholly contained within a single map
5604 * entry or set of adjacent map entries of the spacified map,
5605 * i.e. the specified region contains no unmapped space.
5606 * If any or all of the region is unmapped, FALSE is returned.
5607 * Otherwise, TRUE is returned and if the output argument 'entry'
5608 * is not NULL it points to the map entry containing the start
5609 * of the region.
5610 *
5611 * The map is locked for reading on entry and is left locked.
5612 */
91447636 5613static boolean_t
1c79356b 5614vm_map_range_check(
0a7de745
A
5615 vm_map_t map,
5616 vm_map_offset_t start,
5617 vm_map_offset_t end,
5618 vm_map_entry_t *entry)
1c79356b 5619{
0a7de745
A
5620 vm_map_entry_t cur;
5621 vm_map_offset_t prev;
1c79356b
A
5622
5623 /*
0a7de745 5624 * Basic sanity checks first
1c79356b 5625 */
0a7de745
A
5626 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5627 return FALSE;
5628 }
1c79356b
A
5629
5630 /*
0a7de745 5631 * Check first if the region starts within a valid
1c79356b
A
5632 * mapping for the map.
5633 */
0a7de745
A
5634 if (!vm_map_lookup_entry(map, start, &cur)) {
5635 return FALSE;
5636 }
1c79356b
A
5637
5638 /*
5ba3f43e 5639 * Optimize for the case that the region is contained
1c79356b
A
5640 * in a single map entry.
5641 */
0a7de745 5642 if (entry != (vm_map_entry_t *) NULL) {
1c79356b 5643 *entry = cur;
0a7de745
A
5644 }
5645 if (end <= cur->vme_end) {
5646 return TRUE;
5647 }
1c79356b
A
5648
5649 /*
0a7de745
A
5650 * If the region is not wholly contained within a
5651 * single entry, walk the entries looking for holes.
1c79356b
A
5652 */
5653 prev = cur->vme_end;
5654 cur = cur->vme_next;
5655 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
0a7de745
A
5656 if (end <= cur->vme_end) {
5657 return TRUE;
5658 }
1c79356b
A
5659 prev = cur->vme_end;
5660 cur = cur->vme_next;
5661 }
0a7de745 5662 return FALSE;
1c79356b
A
5663}
5664
5665/*
5666 * vm_map_submap: [ kernel use only ]
5667 *
5668 * Mark the given range as handled by a subordinate map.
5669 *
5670 * This range must have been created with vm_map_find using
5671 * the vm_submap_object, and no other operations may have been
5672 * performed on this range prior to calling vm_map_submap.
5673 *
5674 * Only a limited number of operations can be performed
5675 * within this rage after calling vm_map_submap:
5676 * vm_fault
5677 * [Don't try vm_map_copyin!]
5678 *
5679 * To remove a submapping, one must first remove the
5680 * range from the superior map, and then destroy the
5681 * submap (if desired). [Better yet, don't try it.]
5682 */
5683kern_return_t
5684vm_map_submap(
0a7de745
A
5685 vm_map_t map,
5686 vm_map_offset_t start,
5687 vm_map_offset_t end,
5688 vm_map_t submap,
5689 vm_map_offset_t offset,
0c530ab8 5690#ifdef NO_NESTED_PMAP
91447636 5691 __unused
0a7de745
A
5692#endif /* NO_NESTED_PMAP */
5693 boolean_t use_pmap)
1c79356b 5694{
0a7de745
A
5695 vm_map_entry_t entry;
5696 kern_return_t result = KERN_INVALID_ARGUMENT;
5697 vm_object_t object;
1c79356b
A
5698
5699 vm_map_lock(map);
5700
0a7de745 5701 if (!vm_map_lookup_entry(map, start, &entry)) {
1c79356b 5702 entry = entry->vme_next;
2d21ac55 5703 }
1c79356b 5704
2d21ac55
A
5705 if (entry == vm_map_to_entry(map) ||
5706 entry->is_sub_map) {
1c79356b
A
5707 vm_map_unlock(map);
5708 return KERN_INVALID_ARGUMENT;
5709 }
5710
2d21ac55 5711 vm_map_clip_start(map, entry, start);
1c79356b
A
5712 vm_map_clip_end(map, entry, end);
5713
5714 if ((entry->vme_start == start) && (entry->vme_end == end) &&
5715 (!entry->is_sub_map) &&
3e170ce0 5716 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
1c79356b
A
5717 (object->resident_page_count == 0) &&
5718 (object->copy == VM_OBJECT_NULL) &&
5719 (object->shadow == VM_OBJECT_NULL) &&
5720 (!object->pager_created)) {
3e170ce0
A
5721 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5722 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
2d21ac55
A
5723 vm_object_deallocate(object);
5724 entry->is_sub_map = TRUE;
fe8ab488 5725 entry->use_pmap = FALSE;
3e170ce0 5726 VME_SUBMAP_SET(entry, submap);
2d21ac55 5727 vm_map_reference(submap);
316670eb
A
5728 if (submap->mapped_in_other_pmaps == FALSE &&
5729 vm_map_pmap(submap) != PMAP_NULL &&
5730 vm_map_pmap(submap) != vm_map_pmap(map)) {
5731 /*
5732 * This submap is being mapped in a map
5733 * that uses a different pmap.
5734 * Set its "mapped_in_other_pmaps" flag
5ba3f43e 5735 * to indicate that we now need to
316670eb
A
5736 * remove mappings from all pmaps rather
5737 * than just the submap's pmap.
5738 */
5739 submap->mapped_in_other_pmaps = TRUE;
5740 }
2d21ac55 5741
0c530ab8 5742#ifndef NO_NESTED_PMAP
2d21ac55
A
5743 if (use_pmap) {
5744 /* nest if platform code will allow */
0a7de745 5745 if (submap->pmap == NULL) {
316670eb 5746 ledger_t ledger = map->pmap->ledger;
cb323159
A
5747 submap->pmap = pmap_create_options(ledger,
5748 (vm_map_size_t) 0, 0);
0a7de745 5749 if (submap->pmap == PMAP_NULL) {
2d21ac55 5750 vm_map_unlock(map);
0a7de745 5751 return KERN_NO_SPACE;
55e303ae 5752 }
0a7de745 5753#if defined(__arm__) || defined(__arm64__)
5ba3f43e
A
5754 pmap_set_nested(submap->pmap);
5755#endif
55e303ae 5756 }
2d21ac55 5757 result = pmap_nest(map->pmap,
0a7de745
A
5758 (VME_SUBMAP(entry))->pmap,
5759 (addr64_t)start,
0a7de745
A
5760 (uint64_t)(end - start));
5761 if (result) {
2d21ac55 5762 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
0a7de745 5763 }
2d21ac55
A
5764 entry->use_pmap = TRUE;
5765 }
0a7de745 5766#else /* NO_NESTED_PMAP */
2d21ac55 5767 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
0a7de745 5768#endif /* NO_NESTED_PMAP */
2d21ac55 5769 result = KERN_SUCCESS;
1c79356b
A
5770 }
5771 vm_map_unlock(map);
5772
0a7de745 5773 return result;
1c79356b
A
5774}
5775
5776/*
5777 * vm_map_protect:
5778 *
5779 * Sets the protection of the specified address
5780 * region in the target map. If "set_max" is
5781 * specified, the maximum protection is to be set;
5782 * otherwise, only the current protection is affected.
5783 */
5784kern_return_t
5785vm_map_protect(
0a7de745
A
5786 vm_map_t map,
5787 vm_map_offset_t start,
5788 vm_map_offset_t end,
5789 vm_prot_t new_prot,
5790 boolean_t set_max)
39037602 5791{
0a7de745
A
5792 vm_map_entry_t current;
5793 vm_map_offset_t prev;
5794 vm_map_entry_t entry;
5795 vm_prot_t new_max;
5796 int pmap_options = 0;
5797 kern_return_t kr;
1c79356b 5798
5c9f4661 5799 if (new_prot & VM_PROT_COPY) {
0a7de745
A
5800 vm_map_offset_t new_start;
5801 vm_prot_t cur_prot, max_prot;
5802 vm_map_kernel_flags_t kflags;
5c9f4661
A
5803
5804 /* LP64todo - see below */
5805 if (start >= map->max_offset) {
5806 return KERN_INVALID_ADDRESS;
5807 }
5808
d9a64523 5809 if ((new_prot & VM_PROT_EXECUTE) &&
f427ee49
A
5810 map->pmap != kernel_pmap &&
5811 (vm_map_cs_enforcement(map)
5812#if XNU_TARGET_OS_OSX && __arm64__
5813 || !VM_MAP_IS_EXOTIC(map)
5814#endif /* XNU_TARGET_OS_OSX && __arm64__ */
5815 ) &&
5816 VM_MAP_POLICY_WX_FAIL(map)) {
d9a64523 5817 DTRACE_VM3(cs_wx,
0a7de745
A
5818 uint64_t, (uint64_t) start,
5819 uint64_t, (uint64_t) end,
5820 vm_prot_t, new_prot);
d9a64523 5821 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
0a7de745
A
5822 proc_selfpid(),
5823 (current_task()->bsd_info
5824 ? proc_name_address(current_task()->bsd_info)
5825 : "?"),
5826 __FUNCTION__);
d9a64523
A
5827 return KERN_PROTECTION_FAILURE;
5828 }
d9a64523
A
5829
5830 /*
5831 * Let vm_map_remap_extract() know that it will need to:
5832 * + make a copy of the mapping
5833 * + add VM_PROT_WRITE to the max protections
5834 * + remove any protections that are no longer allowed from the
5835 * max protections (to avoid any WRITE/EXECUTE conflict, for
5836 * example).
5837 * Note that "max_prot" is an IN/OUT parameter only for this
5838 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
5839 * only.
5840 */
5841 max_prot = new_prot & VM_PROT_ALL;
c3c9b80d 5842 cur_prot = VM_PROT_NONE;
5c9f4661
A
5843 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5844 kflags.vmkf_remap_prot_copy = TRUE;
d9a64523 5845 kflags.vmkf_overwrite_immutable = TRUE;
5c9f4661
A
5846 new_start = start;
5847 kr = vm_map_remap(map,
0a7de745
A
5848 &new_start,
5849 end - start,
c3c9b80d 5850 0, /* mask */
0a7de745
A
5851 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5852 kflags,
5853 0,
5854 map,
5855 start,
c3c9b80d
A
5856 TRUE, /* copy-on-write remapping! */
5857 &cur_prot, /* IN/OUT */
5858 &max_prot, /* IN/OUT */
0a7de745 5859 VM_INHERIT_DEFAULT);
5c9f4661
A
5860 if (kr != KERN_SUCCESS) {
5861 return kr;
5862 }
5863 new_prot &= ~VM_PROT_COPY;
5864 }
5865
1c79356b
A
5866 vm_map_lock(map);
5867
91447636
A
5868 /* LP64todo - remove this check when vm_map_commpage64()
5869 * no longer has to stuff in a map_entry for the commpage
5870 * above the map's max_offset.
5871 */
5872 if (start >= map->max_offset) {
5873 vm_map_unlock(map);
0a7de745 5874 return KERN_INVALID_ADDRESS;
91447636
A
5875 }
5876
0a7de745 5877 while (1) {
b0d623f7 5878 /*
0a7de745 5879 * Lookup the entry. If it doesn't start in a valid
b0d623f7
A
5880 * entry, return an error.
5881 */
0a7de745 5882 if (!vm_map_lookup_entry(map, start, &entry)) {
b0d623f7 5883 vm_map_unlock(map);
0a7de745 5884 return KERN_INVALID_ADDRESS;
b0d623f7
A
5885 }
5886
0a7de745 5887 if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
b0d623f7
A
5888 start = SUPERPAGE_ROUND_DOWN(start);
5889 continue;
5890 }
5891 break;
0a7de745
A
5892 }
5893 if (entry->superpage_size) {
5894 end = SUPERPAGE_ROUND_UP(end);
5895 }
1c79356b
A
5896
5897 /*
5898 * Make a first pass to check for protection and address
5899 * violations.
5900 */
5901
5902 current = entry;
5903 prev = current->vme_start;
5904 while ((current != vm_map_to_entry(map)) &&
0a7de745 5905 (current->vme_start < end)) {
1c79356b
A
5906 /*
5907 * If there is a hole, return an error.
5908 */
5909 if (current->vme_start != prev) {
5910 vm_map_unlock(map);
0a7de745 5911 return KERN_INVALID_ADDRESS;
1c79356b
A
5912 }
5913
5914 new_max = current->max_protection;
5c9f4661
A
5915 if ((new_prot & new_max) != new_prot) {
5916 vm_map_unlock(map);
0a7de745 5917 return KERN_PROTECTION_FAILURE;
1c79356b 5918 }
5ba3f43e 5919
a991bd8d
A
5920 if (current->used_for_jit &&
5921 pmap_has_prot_policy(map->pmap, current->translated_allow_execute, current->protection)) {
5922 vm_map_unlock(map);
5923 return KERN_PROTECTION_FAILURE;
5924 }
5925
d9a64523
A
5926 if ((new_prot & VM_PROT_WRITE) &&
5927 (new_prot & VM_PROT_EXECUTE) &&
f427ee49
A
5928#if XNU_TARGET_OS_OSX
5929 map->pmap != kernel_pmap &&
5930 (vm_map_cs_enforcement(map)
5931#if __arm64__
5932 || !VM_MAP_IS_EXOTIC(map)
5933#endif /* __arm64__ */
5934 ) &&
5935#endif /* XNU_TARGET_OS_OSX */
d9a64523
A
5936 !(current->used_for_jit)) {
5937 DTRACE_VM3(cs_wx,
0a7de745
A
5938 uint64_t, (uint64_t) current->vme_start,
5939 uint64_t, (uint64_t) current->vme_end,
5940 vm_prot_t, new_prot);
d9a64523 5941 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
0a7de745
A
5942 proc_selfpid(),
5943 (current_task()->bsd_info
5944 ? proc_name_address(current_task()->bsd_info)
5945 : "?"),
5946 __FUNCTION__);
d9a64523 5947 new_prot &= ~VM_PROT_EXECUTE;
f427ee49
A
5948 if (VM_MAP_POLICY_WX_FAIL(map)) {
5949 vm_map_unlock(map);
5950 return KERN_PROTECTION_FAILURE;
5951 }
5ba3f43e 5952 }
593a1d5f 5953
a39ff7e2
A
5954 /*
5955 * If the task has requested executable lockdown,
5956 * deny both:
5957 * - adding executable protections OR
5958 * - adding write protections to an existing executable mapping.
5959 */
5960 if (map->map_disallow_new_exec == TRUE) {
5961 if ((new_prot & VM_PROT_EXECUTE) ||
5962 ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
5963 vm_map_unlock(map);
0a7de745 5964 return KERN_PROTECTION_FAILURE;
a39ff7e2
A
5965 }
5966 }
5967
1c79356b
A
5968 prev = current->vme_end;
5969 current = current->vme_next;
5970 }
39037602 5971
5ba3f43e
A
5972#if __arm64__
5973 if (end > prev &&
5974 end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
5975 vm_map_entry_t prev_entry;
5976
5977 prev_entry = current->vme_prev;
5978 if (prev_entry != vm_map_to_entry(map) &&
5979 !prev_entry->map_aligned &&
5980 (vm_map_round_page(prev_entry->vme_end,
0a7de745
A
5981 VM_MAP_PAGE_MASK(map))
5982 == end)) {
5ba3f43e
A
5983 /*
5984 * The last entry in our range is not "map-aligned"
5985 * but it would have reached all the way to "end"
5986 * if it had been map-aligned, so this is not really
5987 * a hole in the range and we can proceed.
5988 */
5989 prev = end;
5990 }
5991 }
5992#endif /* __arm64__ */
39037602 5993
1c79356b
A
5994 if (end > prev) {
5995 vm_map_unlock(map);
0a7de745 5996 return KERN_INVALID_ADDRESS;
1c79356b
A
5997 }
5998
5999 /*
6000 * Go back and fix up protections.
6001 * Clip to start here if the range starts within
6002 * the entry.
6003 */
6004
6005 current = entry;
2d21ac55
A
6006 if (current != vm_map_to_entry(map)) {
6007 /* clip and unnest if necessary */
6008 vm_map_clip_start(map, current, start);
1c79356b 6009 }
2d21ac55 6010
1c79356b 6011 while ((current != vm_map_to_entry(map)) &&
0a7de745
A
6012 (current->vme_start < end)) {
6013 vm_prot_t old_prot;
1c79356b
A
6014
6015 vm_map_clip_end(map, current, end);
6016
fe8ab488
A
6017 if (current->is_sub_map) {
6018 /* clipping did unnest if needed */
6019 assert(!current->use_pmap);
6020 }
2d21ac55 6021
1c79356b
A
6022 old_prot = current->protection;
6023
5c9f4661
A
6024 if (set_max) {
6025 current->max_protection = new_prot;
6026 current->protection = new_prot & old_prot;
6027 } else {
6028 current->protection = new_prot;
6029 }
1c79356b
A
6030
6031 /*
6032 * Update physical map if necessary.
5ba3f43e
A
6033 * If the request is to turn off write protection,
6034 * we won't do it for real (in pmap). This is because
6035 * it would cause copy-on-write to fail. We've already
6036 * set, the new protection in the map, so if a
6037 * write-protect fault occurred, it will be fixed up
1c79356b
A
6038 * properly, COW or not.
6039 */
1c79356b 6040 if (current->protection != old_prot) {
1c79356b
A
6041 /* Look one level in we support nested pmaps */
6042 /* from mapped submaps which are direct entries */
6043 /* in our map */
0c530ab8 6044
2d21ac55 6045 vm_prot_t prot;
0c530ab8 6046
39037602
A
6047 prot = current->protection;
6048 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
0a7de745
A
6049 prot &= ~VM_PROT_WRITE;
6050 } else {
6051 assert(!VME_OBJECT(current)->code_signed);
6052 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
39037602 6053 }
2d21ac55 6054
0a7de745
A
6055 if (override_nx(map, VME_ALIAS(current)) && prot) {
6056 prot |= VM_PROT_EXECUTE;
6057 }
2d21ac55 6058
f427ee49 6059#if DEVELOPMENT || DEBUG
5ba3f43e
A
6060 if (!(old_prot & VM_PROT_EXECUTE) &&
6061 (prot & VM_PROT_EXECUTE) &&
d9a64523
A
6062 panic_on_unsigned_execute &&
6063 (proc_selfcsflags() & CS_KILL)) {
5ba3f43e
A
6064 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
6065 }
f427ee49 6066#endif /* DEVELOPMENT || DEBUG */
5ba3f43e 6067
f427ee49 6068 if (pmap_has_prot_policy(map->pmap, current->translated_allow_execute, prot)) {
5ba3f43e
A
6069 if (current->wired_count) {
6070 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
0a7de745 6071 map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
5ba3f43e
A
6072 }
6073
6074 /* If the pmap layer cares about this
6075 * protection type, force a fault for
6076 * each page so that vm_fault will
6077 * repopulate the page with the full
6078 * set of protections.
6079 */
6080 /*
6081 * TODO: We don't seem to need this,
6082 * but this is due to an internal
6083 * implementation detail of
6084 * pmap_protect. Do we want to rely
6085 * on this?
6086 */
6087 prot = VM_PROT_NONE;
6088 }
490019cf 6089
0c530ab8 6090 if (current->is_sub_map && current->use_pmap) {
5ba3f43e 6091 pmap_protect(VME_SUBMAP(current)->pmap,
0a7de745
A
6092 current->vme_start,
6093 current->vme_end,
6094 prot);
1c79356b 6095 } else {
5ba3f43e
A
6096 if (prot & VM_PROT_WRITE) {
6097 if (VME_OBJECT(current) == compressor_object) {
6098 /*
6099 * For write requests on the
6100 * compressor, we wil ask the
6101 * pmap layer to prevent us from
6102 * taking a write fault when we
6103 * attempt to access the mapping
6104 * next.
6105 */
6106 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6107 }
6108 }
6109
6110 pmap_protect_options(map->pmap,
0a7de745
A
6111 current->vme_start,
6112 current->vme_end,
6113 prot,
6114 pmap_options,
6115 NULL);
1c79356b 6116 }
1c79356b
A
6117 }
6118 current = current->vme_next;
6119 }
6120
5353443c 6121 current = entry;
91447636 6122 while ((current != vm_map_to_entry(map)) &&
0a7de745 6123 (current->vme_start <= end)) {
5353443c
A
6124 vm_map_simplify_entry(map, current);
6125 current = current->vme_next;
6126 }
6127
1c79356b 6128 vm_map_unlock(map);
0a7de745 6129 return KERN_SUCCESS;
1c79356b
A
6130}
6131
6132/*
6133 * vm_map_inherit:
6134 *
6135 * Sets the inheritance of the specified address
6136 * range in the target map. Inheritance
6137 * affects how the map will be shared with
6138 * child maps at the time of vm_map_fork.
6139 */
6140kern_return_t
6141vm_map_inherit(
0a7de745
A
6142 vm_map_t map,
6143 vm_map_offset_t start,
6144 vm_map_offset_t end,
6145 vm_inherit_t new_inheritance)
1c79356b 6146{
0a7de745
A
6147 vm_map_entry_t entry;
6148 vm_map_entry_t temp_entry;
1c79356b
A
6149
6150 vm_map_lock(map);
6151
6152 VM_MAP_RANGE_CHECK(map, start, end);
6153
6154 if (vm_map_lookup_entry(map, start, &temp_entry)) {
6155 entry = temp_entry;
0a7de745 6156 } else {
1c79356b
A
6157 temp_entry = temp_entry->vme_next;
6158 entry = temp_entry;
6159 }
6160
6161 /* first check entire range for submaps which can't support the */
6162 /* given inheritance. */
6163 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
0a7de745
A
6164 if (entry->is_sub_map) {
6165 if (new_inheritance == VM_INHERIT_COPY) {
91447636 6166 vm_map_unlock(map);
0a7de745 6167 return KERN_INVALID_ARGUMENT;
91447636 6168 }
1c79356b
A
6169 }
6170
6171 entry = entry->vme_next;
6172 }
6173
6174 entry = temp_entry;
2d21ac55
A
6175 if (entry != vm_map_to_entry(map)) {
6176 /* clip and unnest if necessary */
6177 vm_map_clip_start(map, entry, start);
6178 }
1c79356b
A
6179
6180 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6181 vm_map_clip_end(map, entry, end);
fe8ab488
A
6182 if (entry->is_sub_map) {
6183 /* clip did unnest if needed */
6184 assert(!entry->use_pmap);
6185 }
1c79356b
A
6186
6187 entry->inheritance = new_inheritance;
6188
6189 entry = entry->vme_next;
6190 }
6191
6192 vm_map_unlock(map);
0a7de745 6193 return KERN_SUCCESS;
1c79356b
A
6194}
6195
2d21ac55
A
6196/*
6197 * Update the accounting for the amount of wired memory in this map. If the user has
6198 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6199 */
6200
6201static kern_return_t
6202add_wire_counts(
0a7de745
A
6203 vm_map_t map,
6204 vm_map_entry_t entry,
6205 boolean_t user_wire)
5ba3f43e 6206{
0a7de745 6207 vm_map_size_t size;
2d21ac55
A
6208
6209 if (user_wire) {
6d2010ae 6210 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
2d21ac55
A
6211
6212 /*
6213 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6214 * this map entry.
6215 */
6216
6217 if (entry->user_wired_count == 0) {
6218 size = entry->vme_end - entry->vme_start;
5ba3f43e 6219
2d21ac55
A
6220 /*
6221 * Since this is the first time the user is wiring this map entry, check to see if we're
6222 * exceeding the user wire limits. There is a per map limit which is the smaller of either
4ba76501 6223 * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value. There is also
2d21ac55
A
6224 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6225 * limit, then we fail.
6226 */
6227
4ba76501
A
6228 if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_per_task_user_wire_limit) ||
6229 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
f427ee49
A
6230 if (size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6231 os_atomic_inc(&vm_add_wire_count_over_global_limit, relaxed);
6232 } else {
6233 os_atomic_inc(&vm_add_wire_count_over_user_limit, relaxed);
6234 }
2d21ac55 6235 return KERN_RESOURCE_SHORTAGE;
0a7de745 6236 }
2d21ac55
A
6237
6238 /*
6239 * The first time the user wires an entry, we also increment the wired_count and add this to
6240 * the total that has been wired in the map.
6241 */
6242
0a7de745 6243 if (entry->wired_count >= MAX_WIRE_COUNT) {
2d21ac55 6244 return KERN_FAILURE;
0a7de745 6245 }
2d21ac55
A
6246
6247 entry->wired_count++;
6248 map->user_wire_size += size;
6249 }
6250
0a7de745 6251 if (entry->user_wired_count >= MAX_WIRE_COUNT) {
2d21ac55 6252 return KERN_FAILURE;
0a7de745 6253 }
2d21ac55
A
6254
6255 entry->user_wired_count++;
2d21ac55 6256 } else {
2d21ac55
A
6257 /*
6258 * The kernel's wiring the memory. Just bump the count and continue.
6259 */
6260
0a7de745 6261 if (entry->wired_count >= MAX_WIRE_COUNT) {
2d21ac55 6262 panic("vm_map_wire: too many wirings");
0a7de745 6263 }
2d21ac55
A
6264
6265 entry->wired_count++;
6266 }
6267
6268 return KERN_SUCCESS;
6269}
6270
6271/*
6272 * Update the memory wiring accounting now that the given map entry is being unwired.
6273 */
6274
6275static void
6276subtract_wire_counts(
0a7de745
A
6277 vm_map_t map,
6278 vm_map_entry_t entry,
6279 boolean_t user_wire)
5ba3f43e 6280{
2d21ac55 6281 if (user_wire) {
2d21ac55
A
6282 /*
6283 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6284 */
6285
6286 if (entry->user_wired_count == 1) {
2d21ac55
A
6287 /*
6288 * We're removing the last user wire reference. Decrement the wired_count and the total
6289 * user wired memory for this map.
6290 */
6291
6292 assert(entry->wired_count >= 1);
6293 entry->wired_count--;
6294 map->user_wire_size -= entry->vme_end - entry->vme_start;
6295 }
6296
6297 assert(entry->user_wired_count >= 1);
6298 entry->user_wired_count--;
2d21ac55 6299 } else {
2d21ac55
A
6300 /*
6301 * The kernel is unwiring the memory. Just update the count.
6302 */
6303
6304 assert(entry->wired_count >= 1);
6305 entry->wired_count--;
6306 }
6307}
6308
5ba3f43e 6309int cs_executable_wire = 0;
39037602 6310
1c79356b
A
6311/*
6312 * vm_map_wire:
6313 *
6314 * Sets the pageability of the specified address range in the
6315 * target map as wired. Regions specified as not pageable require
6316 * locked-down physical memory and physical page maps. The
6317 * access_type variable indicates types of accesses that must not
6318 * generate page faults. This is checked against protection of
6319 * memory being locked-down.
6320 *
6321 * The map must not be locked, but a reference must remain to the
6322 * map throughout the call.
6323 */
91447636 6324static kern_return_t
1c79356b 6325vm_map_wire_nested(
0a7de745
A
6326 vm_map_t map,
6327 vm_map_offset_t start,
6328 vm_map_offset_t end,
6329 vm_prot_t caller_prot,
6330 vm_tag_t tag,
6331 boolean_t user_wire,
6332 pmap_t map_pmap,
6333 vm_map_offset_t pmap_addr,
6334 ppnum_t *physpage_p)
1c79356b 6335{
0a7de745
A
6336 vm_map_entry_t entry;
6337 vm_prot_t access_type;
6338 struct vm_map_entry *first_entry, tmp_entry;
6339 vm_map_t real_map;
6340 vm_map_offset_t s, e;
6341 kern_return_t rc;
6342 boolean_t need_wakeup;
6343 boolean_t main_map = FALSE;
6344 wait_interrupt_t interruptible_state;
6345 thread_t cur_thread;
6346 unsigned int last_timestamp;
6347 vm_map_size_t size;
6348 boolean_t wire_and_extract;
f427ee49
A
6349 vm_prot_t extra_prots;
6350
6351 extra_prots = VM_PROT_COPY;
6352 extra_prots |= VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6353#if XNU_TARGET_OS_OSX
6354 if (map->pmap == kernel_pmap ||
6355 !vm_map_cs_enforcement(map)) {
6356 extra_prots &= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6357 }
6358#endif /* XNU_TARGET_OS_OSX */
fe8ab488 6359
3e170ce0
A
6360 access_type = (caller_prot & VM_PROT_ALL);
6361
fe8ab488
A
6362 wire_and_extract = FALSE;
6363 if (physpage_p != NULL) {
6364 /*
6365 * The caller wants the physical page number of the
6366 * wired page. We return only one physical page number
6367 * so this works for only one page at a time.
6368 */
6369 if ((end - start) != PAGE_SIZE) {
6370 return KERN_INVALID_ARGUMENT;
6371 }
6372 wire_and_extract = TRUE;
6373 *physpage_p = 0;
6374 }
1c79356b
A
6375
6376 vm_map_lock(map);
0a7de745 6377 if (map_pmap == NULL) {
1c79356b 6378 main_map = TRUE;
0a7de745 6379 }
1c79356b
A
6380 last_timestamp = map->timestamp;
6381
6382 VM_MAP_RANGE_CHECK(map, start, end);
39236c6e
A
6383 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6384 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
f427ee49 6385
0b4e3aa0
A
6386 if (start == end) {
6387 /* We wired what the caller asked for, zero pages */
6388 vm_map_unlock(map);
6389 return KERN_SUCCESS;
6390 }
1c79356b 6391
2d21ac55
A
6392 need_wakeup = FALSE;
6393 cur_thread = current_thread();
6394
6395 s = start;
6396 rc = KERN_SUCCESS;
6397
6398 if (vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b 6399 entry = first_entry;
2d21ac55
A
6400 /*
6401 * vm_map_clip_start will be done later.
6402 * We don't want to unnest any nested submaps here !
6403 */
1c79356b
A
6404 } else {
6405 /* Start address is not in map */
2d21ac55
A
6406 rc = KERN_INVALID_ADDRESS;
6407 goto done;
1c79356b
A
6408 }
6409
2d21ac55
A
6410 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6411 /*
6412 * At this point, we have wired from "start" to "s".
6413 * We still need to wire from "s" to "end".
6414 *
6415 * "entry" hasn't been clipped, so it could start before "s"
6416 * and/or end after "end".
6417 */
6418
6419 /* "e" is how far we want to wire in this entry */
6420 e = entry->vme_end;
0a7de745 6421 if (e > end) {
2d21ac55 6422 e = end;
0a7de745 6423 }
2d21ac55 6424
1c79356b
A
6425 /*
6426 * If another thread is wiring/unwiring this entry then
6427 * block after informing other thread to wake us up.
6428 */
6429 if (entry->in_transition) {
9bccf70c
A
6430 wait_result_t wait_result;
6431
1c79356b
A
6432 /*
6433 * We have not clipped the entry. Make sure that
6434 * the start address is in range so that the lookup
6435 * below will succeed.
2d21ac55
A
6436 * "s" is the current starting point: we've already
6437 * wired from "start" to "s" and we still have
6438 * to wire from "s" to "end".
1c79356b 6439 */
1c79356b
A
6440
6441 entry->needs_wakeup = TRUE;
6442
6443 /*
6444 * wake up anybody waiting on entries that we have
6445 * already wired.
6446 */
6447 if (need_wakeup) {
6448 vm_map_entry_wakeup(map);
6449 need_wakeup = FALSE;
6450 }
6451 /*
6452 * User wiring is interruptible
6453 */
5ba3f43e 6454 wait_result = vm_map_entry_wait(map,
0a7de745
A
6455 (user_wire) ? THREAD_ABORTSAFE :
6456 THREAD_UNINT);
6457 if (user_wire && wait_result == THREAD_INTERRUPTED) {
1c79356b
A
6458 /*
6459 * undo the wirings we have done so far
6460 * We do not clear the needs_wakeup flag,
6461 * because we cannot tell if we were the
6462 * only one waiting.
6463 */
2d21ac55
A
6464 rc = KERN_FAILURE;
6465 goto done;
1c79356b
A
6466 }
6467
1c79356b
A
6468 /*
6469 * Cannot avoid a lookup here. reset timestamp.
6470 */
6471 last_timestamp = map->timestamp;
6472
6473 /*
6474 * The entry could have been clipped, look it up again.
6475 * Worse that can happen is, it may not exist anymore.
6476 */
6477 if (!vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b
A
6478 /*
6479 * User: undo everything upto the previous
6480 * entry. let vm_map_unwire worry about
6481 * checking the validity of the range.
6482 */
2d21ac55
A
6483 rc = KERN_FAILURE;
6484 goto done;
1c79356b
A
6485 }
6486 entry = first_entry;
6487 continue;
6488 }
5ba3f43e 6489
2d21ac55 6490 if (entry->is_sub_map) {
0a7de745
A
6491 vm_map_offset_t sub_start;
6492 vm_map_offset_t sub_end;
6493 vm_map_offset_t local_start;
6494 vm_map_offset_t local_end;
6495 pmap_t pmap;
2d21ac55 6496
fe8ab488
A
6497 if (wire_and_extract) {
6498 /*
6499 * Wiring would result in copy-on-write
6500 * which would not be compatible with
6501 * the sharing we have with the original
6502 * provider of this memory.
6503 */
6504 rc = KERN_INVALID_ARGUMENT;
6505 goto done;
6506 }
6507
2d21ac55 6508 vm_map_clip_start(map, entry, s);
1c79356b
A
6509 vm_map_clip_end(map, entry, end);
6510
3e170ce0 6511 sub_start = VME_OFFSET(entry);
2d21ac55 6512 sub_end = entry->vme_end;
3e170ce0 6513 sub_end += VME_OFFSET(entry) - entry->vme_start;
5ba3f43e 6514
1c79356b 6515 local_end = entry->vme_end;
0a7de745
A
6516 if (map_pmap == NULL) {
6517 vm_object_t object;
6518 vm_object_offset_t offset;
6519 vm_prot_t prot;
6520 boolean_t wired;
6521 vm_map_entry_t local_entry;
6522 vm_map_version_t version;
6523 vm_map_t lookup_map;
6524
6525 if (entry->use_pmap) {
3e170ce0 6526 pmap = VME_SUBMAP(entry)->pmap;
9bccf70c
A
6527 /* ppc implementation requires that */
6528 /* submaps pmap address ranges line */
6529 /* up with parent map */
6530#ifdef notdef
6531 pmap_addr = sub_start;
6532#endif
2d21ac55 6533 pmap_addr = s;
1c79356b
A
6534 } else {
6535 pmap = map->pmap;
2d21ac55 6536 pmap_addr = s;
1c79356b 6537 }
2d21ac55 6538
1c79356b 6539 if (entry->wired_count) {
0a7de745 6540 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
2d21ac55 6541 goto done;
0a7de745 6542 }
2d21ac55
A
6543
6544 /*
6545 * The map was not unlocked:
6546 * no need to goto re-lookup.
6547 * Just go directly to next entry.
6548 */
1c79356b 6549 entry = entry->vme_next;
2d21ac55 6550 s = entry->vme_start;
1c79356b 6551 continue;
2d21ac55 6552 }
9bccf70c 6553
2d21ac55
A
6554 /* call vm_map_lookup_locked to */
6555 /* cause any needs copy to be */
6556 /* evaluated */
6557 local_start = entry->vme_start;
6558 lookup_map = map;
6559 vm_map_lock_write_to_read(map);
f427ee49
A
6560 rc = vm_map_lookup_locked(
6561 &lookup_map, local_start,
6562 (access_type | extra_prots),
6563 OBJECT_LOCK_EXCLUSIVE,
6564 &version, &object,
6565 &offset, &prot, &wired,
6566 NULL,
6567 &real_map, NULL);
6568 if (rc != KERN_SUCCESS) {
2d21ac55 6569 vm_map_unlock_read(lookup_map);
4bd07ac2 6570 assert(map_pmap == NULL);
2d21ac55 6571 vm_map_unwire(map, start,
0a7de745 6572 s, user_wire);
f427ee49 6573 return rc;
2d21ac55 6574 }
316670eb 6575 vm_object_unlock(object);
0a7de745 6576 if (real_map != lookup_map) {
2d21ac55 6577 vm_map_unlock(real_map);
0a7de745 6578 }
2d21ac55
A
6579 vm_map_unlock_read(lookup_map);
6580 vm_map_lock(map);
1c79356b 6581
2d21ac55 6582 /* we unlocked, so must re-lookup */
5ba3f43e 6583 if (!vm_map_lookup_entry(map,
0a7de745
A
6584 local_start,
6585 &local_entry)) {
2d21ac55
A
6586 rc = KERN_FAILURE;
6587 goto done;
6588 }
6589
6590 /*
6591 * entry could have been "simplified",
6592 * so re-clip
6593 */
6594 entry = local_entry;
6595 assert(s == local_start);
6596 vm_map_clip_start(map, entry, s);
6597 vm_map_clip_end(map, entry, end);
6598 /* re-compute "e" */
6599 e = entry->vme_end;
0a7de745 6600 if (e > end) {
2d21ac55 6601 e = end;
0a7de745 6602 }
2d21ac55
A
6603
6604 /* did we have a change of type? */
6605 if (!entry->is_sub_map) {
6606 last_timestamp = map->timestamp;
6607 continue;
1c79356b
A
6608 }
6609 } else {
9bccf70c 6610 local_start = entry->vme_start;
2d21ac55
A
6611 pmap = map_pmap;
6612 }
6613
0a7de745 6614 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
2d21ac55 6615 goto done;
0a7de745 6616 }
2d21ac55
A
6617
6618 entry->in_transition = TRUE;
6619
6620 vm_map_unlock(map);
5ba3f43e 6621 rc = vm_map_wire_nested(VME_SUBMAP(entry),
0a7de745
A
6622 sub_start, sub_end,
6623 caller_prot, tag,
6624 user_wire, pmap, pmap_addr,
6625 NULL);
2d21ac55 6626 vm_map_lock(map);
9bccf70c 6627
1c79356b
A
6628 /*
6629 * Find the entry again. It could have been clipped
6630 * after we unlocked the map.
6631 */
9bccf70c 6632 if (!vm_map_lookup_entry(map, local_start,
0a7de745 6633 &first_entry)) {
9bccf70c 6634 panic("vm_map_wire: re-lookup failed");
0a7de745 6635 }
9bccf70c 6636 entry = first_entry;
1c79356b 6637
2d21ac55
A
6638 assert(local_start == s);
6639 /* re-compute "e" */
6640 e = entry->vme_end;
0a7de745 6641 if (e > end) {
2d21ac55 6642 e = end;
0a7de745 6643 }
2d21ac55 6644
1c79356b
A
6645 last_timestamp = map->timestamp;
6646 while ((entry != vm_map_to_entry(map)) &&
0a7de745 6647 (entry->vme_start < e)) {
1c79356b
A
6648 assert(entry->in_transition);
6649 entry->in_transition = FALSE;
6650 if (entry->needs_wakeup) {
6651 entry->needs_wakeup = FALSE;
6652 need_wakeup = TRUE;
6653 }
6654 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
2d21ac55 6655 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
6656 }
6657 entry = entry->vme_next;
6658 }
0a7de745 6659 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 6660 goto done;
1c79356b 6661 }
2d21ac55
A
6662
6663 /* no need to relookup again */
6664 s = entry->vme_start;
1c79356b
A
6665 continue;
6666 }
6667
6668 /*
6669 * If this entry is already wired then increment
6670 * the appropriate wire reference count.
6671 */
9bccf70c 6672 if (entry->wired_count) {
fe8ab488
A
6673 if ((entry->protection & access_type) != access_type) {
6674 /* found a protection problem */
6675
6676 /*
6677 * XXX FBDP
6678 * We should always return an error
6679 * in this case but since we didn't
6680 * enforce it before, let's do
6681 * it only for the new "wire_and_extract"
6682 * code path for now...
6683 */
6684 if (wire_and_extract) {
6685 rc = KERN_PROTECTION_FAILURE;
6686 goto done;
6687 }
6688 }
6689
1c79356b
A
6690 /*
6691 * entry is already wired down, get our reference
6692 * after clipping to our range.
6693 */
2d21ac55 6694 vm_map_clip_start(map, entry, s);
1c79356b 6695 vm_map_clip_end(map, entry, end);
1c79356b 6696
0a7de745 6697 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
2d21ac55 6698 goto done;
0a7de745 6699 }
2d21ac55 6700
fe8ab488 6701 if (wire_and_extract) {
0a7de745
A
6702 vm_object_t object;
6703 vm_object_offset_t offset;
6704 vm_page_t m;
fe8ab488
A
6705
6706 /*
6707 * We don't have to "wire" the page again
6708 * bit we still have to "extract" its
6709 * physical page number, after some sanity
6710 * checks.
6711 */
6712 assert((entry->vme_end - entry->vme_start)
0a7de745 6713 == PAGE_SIZE);
fe8ab488
A
6714 assert(!entry->needs_copy);
6715 assert(!entry->is_sub_map);
3e170ce0 6716 assert(VME_OBJECT(entry));
fe8ab488 6717 if (((entry->vme_end - entry->vme_start)
0a7de745 6718 != PAGE_SIZE) ||
fe8ab488
A
6719 entry->needs_copy ||
6720 entry->is_sub_map ||
3e170ce0 6721 VME_OBJECT(entry) == VM_OBJECT_NULL) {
fe8ab488
A
6722 rc = KERN_INVALID_ARGUMENT;
6723 goto done;
6724 }
6725
3e170ce0
A
6726 object = VME_OBJECT(entry);
6727 offset = VME_OFFSET(entry);
fe8ab488
A
6728 /* need exclusive lock to update m->dirty */
6729 if (entry->protection & VM_PROT_WRITE) {
6730 vm_object_lock(object);
6731 } else {
6732 vm_object_lock_shared(object);
6733 }
6734 m = vm_page_lookup(object, offset);
6735 assert(m != VM_PAGE_NULL);
39037602
A
6736 assert(VM_PAGE_WIRED(m));
6737 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6738 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
fe8ab488
A
6739 if (entry->protection & VM_PROT_WRITE) {
6740 vm_object_lock_assert_exclusive(
39037602 6741 object);
d9a64523 6742 m->vmp_dirty = TRUE;
fe8ab488
A
6743 }
6744 } else {
6745 /* not already wired !? */
6746 *physpage_p = 0;
6747 }
6748 vm_object_unlock(object);
6749 }
6750
2d21ac55 6751 /* map was not unlocked: no need to relookup */
1c79356b 6752 entry = entry->vme_next;
2d21ac55 6753 s = entry->vme_start;
1c79356b
A
6754 continue;
6755 }
6756
6757 /*
6758 * Unwired entry or wire request transmitted via submap
6759 */
6760
5ba3f43e
A
6761 /*
6762 * Wiring would copy the pages to the shadow object.
6763 * The shadow object would not be code-signed so
6764 * attempting to execute code from these copied pages
6765 * would trigger a code-signing violation.
6766 */
d9a64523
A
6767
6768 if ((entry->protection & VM_PROT_EXECUTE)
f427ee49 6769#if XNU_TARGET_OS_OSX
d9a64523 6770 &&
eb6b6ca3 6771 map->pmap != kernel_pmap &&
f427ee49
A
6772 (vm_map_cs_enforcement(map)
6773#if __arm64__
6774 || !VM_MAP_IS_EXOTIC(map)
6775#endif /* __arm64__ */
6776 )
6777#endif /* XNU_TARGET_OS_OSX */
0a7de745 6778 ) {
5ba3f43e
A
6779#if MACH_ASSERT
6780 printf("pid %d[%s] wiring executable range from "
0a7de745
A
6781 "0x%llx to 0x%llx: rejected to preserve "
6782 "code-signing\n",
6783 proc_selfpid(),
6784 (current_task()->bsd_info
6785 ? proc_name_address(current_task()->bsd_info)
6786 : "?"),
6787 (uint64_t) entry->vme_start,
6788 (uint64_t) entry->vme_end);
5ba3f43e
A
6789#endif /* MACH_ASSERT */
6790 DTRACE_VM2(cs_executable_wire,
0a7de745
A
6791 uint64_t, (uint64_t)entry->vme_start,
6792 uint64_t, (uint64_t)entry->vme_end);
5ba3f43e
A
6793 cs_executable_wire++;
6794 rc = KERN_PROTECTION_FAILURE;
6795 goto done;
6796 }
39037602 6797
1c79356b
A
6798 /*
6799 * Perform actions of vm_map_lookup that need the write
6800 * lock on the map: create a shadow object for a
6801 * copy-on-write region, or an object for a zero-fill
6802 * region.
6803 */
6804 size = entry->vme_end - entry->vme_start;
6805 /*
6806 * If wiring a copy-on-write page, we need to copy it now
6807 * even if we're only (currently) requesting read access.
6808 * This is aggressive, but once it's wired we can't move it.
6809 */
6810 if (entry->needs_copy) {
fe8ab488
A
6811 if (wire_and_extract) {
6812 /*
6813 * We're supposed to share with the original
6814 * provider so should not be "needs_copy"
6815 */
6816 rc = KERN_INVALID_ARGUMENT;
6817 goto done;
6818 }
3e170ce0
A
6819
6820 VME_OBJECT_SHADOW(entry, size);
1c79356b 6821 entry->needs_copy = FALSE;
3e170ce0 6822 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
fe8ab488
A
6823 if (wire_and_extract) {
6824 /*
6825 * We're supposed to share with the original
6826 * provider so should already have an object.
6827 */
6828 rc = KERN_INVALID_ARGUMENT;
6829 goto done;
6830 }
3e170ce0
A
6831 VME_OBJECT_SET(entry, vm_object_allocate(size));
6832 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
fe8ab488 6833 assert(entry->use_pmap);
1c79356b
A
6834 }
6835
2d21ac55 6836 vm_map_clip_start(map, entry, s);
1c79356b
A
6837 vm_map_clip_end(map, entry, end);
6838
2d21ac55 6839 /* re-compute "e" */
1c79356b 6840 e = entry->vme_end;
0a7de745 6841 if (e > end) {
2d21ac55 6842 e = end;
0a7de745 6843 }
1c79356b
A
6844
6845 /*
6846 * Check for holes and protection mismatch.
6847 * Holes: Next entry should be contiguous unless this
6848 * is the end of the region.
6849 * Protection: Access requested must be allowed, unless
6850 * wiring is by protection class
6851 */
2d21ac55
A
6852 if ((entry->vme_end < end) &&
6853 ((entry->vme_next == vm_map_to_entry(map)) ||
0a7de745 6854 (entry->vme_next->vme_start > entry->vme_end))) {
2d21ac55
A
6855 /* found a hole */
6856 rc = KERN_INVALID_ADDRESS;
6857 goto done;
6858 }
6859 if ((entry->protection & access_type) != access_type) {
6860 /* found a protection problem */
6861 rc = KERN_PROTECTION_FAILURE;
6862 goto done;
1c79356b
A
6863 }
6864
6865 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6866
0a7de745 6867 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
2d21ac55 6868 goto done;
0a7de745 6869 }
1c79356b
A
6870
6871 entry->in_transition = TRUE;
6872
6873 /*
6874 * This entry might get split once we unlock the map.
6875 * In vm_fault_wire(), we need the current range as
6876 * defined by this entry. In order for this to work
6877 * along with a simultaneous clip operation, we make a
6878 * temporary copy of this entry and use that for the
6879 * wiring. Note that the underlying objects do not
6880 * change during a clip.
6881 */
6882 tmp_entry = *entry;
6883
6884 /*
6885 * The in_transition state guarentees that the entry
6886 * (or entries for this range, if split occured) will be
6887 * there when the map lock is acquired for the second time.
6888 */
6889 vm_map_unlock(map);
0b4e3aa0 6890
0a7de745 6891 if (!user_wire && cur_thread != THREAD_NULL) {
9bccf70c 6892 interruptible_state = thread_interrupt_level(THREAD_UNINT);
0a7de745 6893 } else {
91447636 6894 interruptible_state = THREAD_UNINT;
0a7de745 6895 }
9bccf70c 6896
0a7de745 6897 if (map_pmap) {
5ba3f43e 6898 rc = vm_fault_wire(map,
0a7de745
A
6899 &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
6900 physpage_p);
6901 } else {
5ba3f43e 6902 rc = vm_fault_wire(map,
0a7de745
A
6903 &tmp_entry, caller_prot, tag, map->pmap,
6904 tmp_entry.vme_start,
6905 physpage_p);
6906 }
0b4e3aa0 6907
0a7de745 6908 if (!user_wire && cur_thread != THREAD_NULL) {
9bccf70c 6909 thread_interrupt_level(interruptible_state);
0a7de745 6910 }
0b4e3aa0 6911
1c79356b
A
6912 vm_map_lock(map);
6913
0a7de745 6914 if (last_timestamp + 1 != map->timestamp) {
1c79356b
A
6915 /*
6916 * Find the entry again. It could have been clipped
6917 * after we unlocked the map.
6918 */
6919 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
0a7de745 6920 &first_entry)) {
1c79356b 6921 panic("vm_map_wire: re-lookup failed");
0a7de745 6922 }
1c79356b
A
6923
6924 entry = first_entry;
6925 }
6926
6927 last_timestamp = map->timestamp;
6928
6929 while ((entry != vm_map_to_entry(map)) &&
0a7de745 6930 (entry->vme_start < tmp_entry.vme_end)) {
1c79356b
A
6931 assert(entry->in_transition);
6932 entry->in_transition = FALSE;
6933 if (entry->needs_wakeup) {
6934 entry->needs_wakeup = FALSE;
6935 need_wakeup = TRUE;
6936 }
0a7de745 6937 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 6938 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
6939 }
6940 entry = entry->vme_next;
6941 }
6942
0a7de745 6943 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 6944 goto done;
1c79356b 6945 }
2d21ac55 6946
d190cdc3
A
6947 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
6948 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
6949 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
6950 /* found a "new" hole */
6951 s = tmp_entry.vme_end;
6952 rc = KERN_INVALID_ADDRESS;
6953 goto done;
6954 }
6955
2d21ac55 6956 s = entry->vme_start;
1c79356b 6957 } /* end while loop through map entries */
2d21ac55
A
6958
6959done:
6960 if (rc == KERN_SUCCESS) {
6961 /* repair any damage we may have made to the VM map */
6962 vm_map_simplify_range(map, start, end);
6963 }
6964
1c79356b
A
6965 vm_map_unlock(map);
6966
6967 /*
6968 * wake up anybody waiting on entries we wired.
6969 */
0a7de745 6970 if (need_wakeup) {
1c79356b 6971 vm_map_entry_wakeup(map);
0a7de745 6972 }
1c79356b 6973
2d21ac55
A
6974 if (rc != KERN_SUCCESS) {
6975 /* undo what has been wired so far */
4bd07ac2 6976 vm_map_unwire_nested(map, start, s, user_wire,
0a7de745 6977 map_pmap, pmap_addr);
fe8ab488
A
6978 if (physpage_p) {
6979 *physpage_p = 0;
6980 }
2d21ac55
A
6981 }
6982
6983 return rc;
1c79356b
A
6984}
6985
6986kern_return_t
3e170ce0 6987vm_map_wire_external(
0a7de745
A
6988 vm_map_t map,
6989 vm_map_offset_t start,
6990 vm_map_offset_t end,
6991 vm_prot_t caller_prot,
6992 boolean_t user_wire)
1c79356b 6993{
0a7de745 6994 kern_return_t kret;
3e170ce0 6995
5ba3f43e 6996 kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
0a7de745 6997 user_wire, (pmap_t)NULL, 0, NULL);
3e170ce0
A
6998 return kret;
6999}
1c79356b 7000
3e170ce0 7001kern_return_t
5ba3f43e 7002vm_map_wire_kernel(
0a7de745
A
7003 vm_map_t map,
7004 vm_map_offset_t start,
7005 vm_map_offset_t end,
7006 vm_prot_t caller_prot,
7007 vm_tag_t tag,
7008 boolean_t user_wire)
3e170ce0 7009{
0a7de745 7010 kern_return_t kret;
1c79356b 7011
5ba3f43e 7012 kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
0a7de745 7013 user_wire, (pmap_t)NULL, 0, NULL);
fe8ab488
A
7014 return kret;
7015}
7016
7017kern_return_t
3e170ce0 7018vm_map_wire_and_extract_external(
0a7de745
A
7019 vm_map_t map,
7020 vm_map_offset_t start,
7021 vm_prot_t caller_prot,
7022 boolean_t user_wire,
7023 ppnum_t *physpage_p)
fe8ab488 7024{
0a7de745 7025 kern_return_t kret;
3e170ce0 7026
3e170ce0 7027 kret = vm_map_wire_nested(map,
0a7de745
A
7028 start,
7029 start + VM_MAP_PAGE_SIZE(map),
7030 caller_prot,
7031 vm_tag_bt(),
7032 user_wire,
7033 (pmap_t)NULL,
7034 0,
7035 physpage_p);
3e170ce0
A
7036 if (kret != KERN_SUCCESS &&
7037 physpage_p != NULL) {
7038 *physpage_p = 0;
7039 }
7040 return kret;
7041}
fe8ab488 7042
3e170ce0 7043kern_return_t
5ba3f43e 7044vm_map_wire_and_extract_kernel(
0a7de745
A
7045 vm_map_t map,
7046 vm_map_offset_t start,
7047 vm_prot_t caller_prot,
7048 vm_tag_t tag,
7049 boolean_t user_wire,
7050 ppnum_t *physpage_p)
3e170ce0 7051{
0a7de745 7052 kern_return_t kret;
fe8ab488
A
7053
7054 kret = vm_map_wire_nested(map,
0a7de745
A
7055 start,
7056 start + VM_MAP_PAGE_SIZE(map),
7057 caller_prot,
7058 tag,
7059 user_wire,
7060 (pmap_t)NULL,
7061 0,
7062 physpage_p);
fe8ab488
A
7063 if (kret != KERN_SUCCESS &&
7064 physpage_p != NULL) {
7065 *physpage_p = 0;
7066 }
1c79356b
A
7067 return kret;
7068}
7069
7070/*
7071 * vm_map_unwire:
7072 *
7073 * Sets the pageability of the specified address range in the target
7074 * as pageable. Regions specified must have been wired previously.
7075 *
7076 * The map must not be locked, but a reference must remain to the map
7077 * throughout the call.
7078 *
7079 * Kernel will panic on failures. User unwire ignores holes and
7080 * unwired and intransition entries to avoid losing memory by leaving
7081 * it unwired.
7082 */
91447636 7083static kern_return_t
1c79356b 7084vm_map_unwire_nested(
0a7de745
A
7085 vm_map_t map,
7086 vm_map_offset_t start,
7087 vm_map_offset_t end,
7088 boolean_t user_wire,
7089 pmap_t map_pmap,
7090 vm_map_offset_t pmap_addr)
1c79356b 7091{
0a7de745
A
7092 vm_map_entry_t entry;
7093 struct vm_map_entry *first_entry, tmp_entry;
7094 boolean_t need_wakeup;
7095 boolean_t main_map = FALSE;
7096 unsigned int last_timestamp;
1c79356b
A
7097
7098 vm_map_lock(map);
0a7de745 7099 if (map_pmap == NULL) {
1c79356b 7100 main_map = TRUE;
0a7de745 7101 }
1c79356b
A
7102 last_timestamp = map->timestamp;
7103
7104 VM_MAP_RANGE_CHECK(map, start, end);
39236c6e
A
7105 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7106 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1c79356b 7107
2d21ac55
A
7108 if (start == end) {
7109 /* We unwired what the caller asked for: zero pages */
7110 vm_map_unlock(map);
7111 return KERN_SUCCESS;
7112 }
7113
1c79356b
A
7114 if (vm_map_lookup_entry(map, start, &first_entry)) {
7115 entry = first_entry;
2d21ac55
A
7116 /*
7117 * vm_map_clip_start will be done later.
7118 * We don't want to unnest any nested sub maps here !
7119 */
0a7de745 7120 } else {
2d21ac55
A
7121 if (!user_wire) {
7122 panic("vm_map_unwire: start not found");
7123 }
1c79356b
A
7124 /* Start address is not in map. */
7125 vm_map_unlock(map);
0a7de745 7126 return KERN_INVALID_ADDRESS;
1c79356b
A
7127 }
7128
b0d623f7
A
7129 if (entry->superpage_size) {
7130 /* superpages are always wired */
7131 vm_map_unlock(map);
7132 return KERN_INVALID_ADDRESS;
7133 }
7134
1c79356b
A
7135 need_wakeup = FALSE;
7136 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7137 if (entry->in_transition) {
7138 /*
7139 * 1)
7140 * Another thread is wiring down this entry. Note
7141 * that if it is not for the other thread we would
7142 * be unwiring an unwired entry. This is not
7143 * permitted. If we wait, we will be unwiring memory
7144 * we did not wire.
7145 *
7146 * 2)
7147 * Another thread is unwiring this entry. We did not
7148 * have a reference to it, because if we did, this
7149 * entry will not be getting unwired now.
7150 */
2d21ac55
A
7151 if (!user_wire) {
7152 /*
7153 * XXX FBDP
7154 * This could happen: there could be some
7155 * overlapping vslock/vsunlock operations
7156 * going on.
7157 * We should probably just wait and retry,
7158 * but then we have to be careful that this
5ba3f43e 7159 * entry could get "simplified" after
2d21ac55
A
7160 * "in_transition" gets unset and before
7161 * we re-lookup the entry, so we would
7162 * have to re-clip the entry to avoid
7163 * re-unwiring what we have already unwired...
7164 * See vm_map_wire_nested().
7165 *
7166 * Or we could just ignore "in_transition"
7167 * here and proceed to decement the wired
7168 * count(s) on this entry. That should be fine
7169 * as long as "wired_count" doesn't drop all
7170 * the way to 0 (and we should panic if THAT
7171 * happens).
7172 */
1c79356b 7173 panic("vm_map_unwire: in_transition entry");
2d21ac55 7174 }
1c79356b
A
7175
7176 entry = entry->vme_next;
7177 continue;
7178 }
7179
2d21ac55 7180 if (entry->is_sub_map) {
0a7de745
A
7181 vm_map_offset_t sub_start;
7182 vm_map_offset_t sub_end;
7183 vm_map_offset_t local_end;
7184 pmap_t pmap;
5ba3f43e 7185
1c79356b
A
7186 vm_map_clip_start(map, entry, start);
7187 vm_map_clip_end(map, entry, end);
7188
3e170ce0 7189 sub_start = VME_OFFSET(entry);
1c79356b 7190 sub_end = entry->vme_end - entry->vme_start;
3e170ce0 7191 sub_end += VME_OFFSET(entry);
1c79356b 7192 local_end = entry->vme_end;
0a7de745
A
7193 if (map_pmap == NULL) {
7194 if (entry->use_pmap) {
3e170ce0 7195 pmap = VME_SUBMAP(entry)->pmap;
9bccf70c 7196 pmap_addr = sub_start;
2d21ac55 7197 } else {
1c79356b 7198 pmap = map->pmap;
9bccf70c 7199 pmap_addr = start;
2d21ac55
A
7200 }
7201 if (entry->wired_count == 0 ||
7202 (user_wire && entry->user_wired_count == 0)) {
0a7de745 7203 if (!user_wire) {
2d21ac55 7204 panic("vm_map_unwire: entry is unwired");
0a7de745 7205 }
2d21ac55
A
7206 entry = entry->vme_next;
7207 continue;
7208 }
7209
7210 /*
7211 * Check for holes
7212 * Holes: Next entry should be contiguous unless
7213 * this is the end of the region.
7214 */
5ba3f43e 7215 if (((entry->vme_end < end) &&
0a7de745
A
7216 ((entry->vme_next == vm_map_to_entry(map)) ||
7217 (entry->vme_next->vme_start
7218 > entry->vme_end)))) {
7219 if (!user_wire) {
2d21ac55 7220 panic("vm_map_unwire: non-contiguous region");
0a7de745 7221 }
1c79356b 7222/*
0a7de745
A
7223 * entry = entry->vme_next;
7224 * continue;
7225 */
2d21ac55 7226 }
1c79356b 7227
2d21ac55 7228 subtract_wire_counts(map, entry, user_wire);
1c79356b 7229
2d21ac55
A
7230 if (entry->wired_count != 0) {
7231 entry = entry->vme_next;
7232 continue;
7233 }
1c79356b 7234
2d21ac55
A
7235 entry->in_transition = TRUE;
7236 tmp_entry = *entry;/* see comment in vm_map_wire() */
7237
7238 /*
7239 * We can unlock the map now. The in_transition state
7240 * guarantees existance of the entry.
7241 */
7242 vm_map_unlock(map);
5ba3f43e 7243 vm_map_unwire_nested(VME_SUBMAP(entry),
0a7de745 7244 sub_start, sub_end, user_wire, pmap, pmap_addr);
2d21ac55 7245 vm_map_lock(map);
1c79356b 7246
0a7de745 7247 if (last_timestamp + 1 != map->timestamp) {
2d21ac55 7248 /*
5ba3f43e 7249 * Find the entry again. It could have been
2d21ac55
A
7250 * clipped or deleted after we unlocked the map.
7251 */
5ba3f43e 7252 if (!vm_map_lookup_entry(map,
0a7de745
A
7253 tmp_entry.vme_start,
7254 &first_entry)) {
7255 if (!user_wire) {
2d21ac55 7256 panic("vm_map_unwire: re-lookup failed");
0a7de745 7257 }
2d21ac55 7258 entry = first_entry->vme_next;
0a7de745 7259 } else {
2d21ac55 7260 entry = first_entry;
0a7de745 7261 }
2d21ac55
A
7262 }
7263 last_timestamp = map->timestamp;
1c79356b 7264
1c79356b 7265 /*
2d21ac55 7266 * clear transition bit for all constituent entries
5ba3f43e 7267 * that were in the original entry (saved in
2d21ac55
A
7268 * tmp_entry). Also check for waiters.
7269 */
7270 while ((entry != vm_map_to_entry(map)) &&
0a7de745 7271 (entry->vme_start < tmp_entry.vme_end)) {
2d21ac55
A
7272 assert(entry->in_transition);
7273 entry->in_transition = FALSE;
7274 if (entry->needs_wakeup) {
7275 entry->needs_wakeup = FALSE;
7276 need_wakeup = TRUE;
7277 }
7278 entry = entry->vme_next;
1c79356b 7279 }
2d21ac55 7280 continue;
1c79356b 7281 } else {
2d21ac55 7282 vm_map_unlock(map);
3e170ce0 7283 vm_map_unwire_nested(VME_SUBMAP(entry),
0a7de745
A
7284 sub_start, sub_end, user_wire, map_pmap,
7285 pmap_addr);
2d21ac55 7286 vm_map_lock(map);
1c79356b 7287
0a7de745 7288 if (last_timestamp + 1 != map->timestamp) {
2d21ac55 7289 /*
5ba3f43e 7290 * Find the entry again. It could have been
2d21ac55
A
7291 * clipped or deleted after we unlocked the map.
7292 */
5ba3f43e 7293 if (!vm_map_lookup_entry(map,
0a7de745
A
7294 tmp_entry.vme_start,
7295 &first_entry)) {
7296 if (!user_wire) {
2d21ac55 7297 panic("vm_map_unwire: re-lookup failed");
0a7de745 7298 }
2d21ac55 7299 entry = first_entry->vme_next;
0a7de745 7300 } else {
2d21ac55 7301 entry = first_entry;
0a7de745 7302 }
2d21ac55
A
7303 }
7304 last_timestamp = map->timestamp;
1c79356b
A
7305 }
7306 }
7307
7308
9bccf70c 7309 if ((entry->wired_count == 0) ||
2d21ac55 7310 (user_wire && entry->user_wired_count == 0)) {
0a7de745 7311 if (!user_wire) {
1c79356b 7312 panic("vm_map_unwire: entry is unwired");
0a7de745 7313 }
1c79356b
A
7314
7315 entry = entry->vme_next;
7316 continue;
7317 }
5ba3f43e 7318
1c79356b 7319 assert(entry->wired_count > 0 &&
0a7de745 7320 (!user_wire || entry->user_wired_count > 0));
1c79356b
A
7321
7322 vm_map_clip_start(map, entry, start);
7323 vm_map_clip_end(map, entry, end);
7324
7325 /*
7326 * Check for holes
7327 * Holes: Next entry should be contiguous unless
7328 * this is the end of the region.
7329 */
5ba3f43e 7330 if (((entry->vme_end < end) &&
0a7de745
A
7331 ((entry->vme_next == vm_map_to_entry(map)) ||
7332 (entry->vme_next->vme_start > entry->vme_end)))) {
7333 if (!user_wire) {
1c79356b 7334 panic("vm_map_unwire: non-contiguous region");
0a7de745 7335 }
1c79356b
A
7336 entry = entry->vme_next;
7337 continue;
7338 }
7339
2d21ac55 7340 subtract_wire_counts(map, entry, user_wire);
1c79356b 7341
9bccf70c 7342 if (entry->wired_count != 0) {
1c79356b
A
7343 entry = entry->vme_next;
7344 continue;
1c79356b
A
7345 }
7346
0a7de745 7347 if (entry->zero_wired_pages) {
b0d623f7
A
7348 entry->zero_wired_pages = FALSE;
7349 }
7350
1c79356b 7351 entry->in_transition = TRUE;
0a7de745 7352 tmp_entry = *entry; /* see comment in vm_map_wire() */
1c79356b
A
7353
7354 /*
7355 * We can unlock the map now. The in_transition state
7356 * guarantees existance of the entry.
7357 */
7358 vm_map_unlock(map);
0a7de745 7359 if (map_pmap) {
5ba3f43e 7360 vm_fault_unwire(map,
0a7de745 7361 &tmp_entry, FALSE, map_pmap, pmap_addr);
1c79356b 7362 } else {
5ba3f43e 7363 vm_fault_unwire(map,
0a7de745
A
7364 &tmp_entry, FALSE, map->pmap,
7365 tmp_entry.vme_start);
1c79356b
A
7366 }
7367 vm_map_lock(map);
7368
0a7de745 7369 if (last_timestamp + 1 != map->timestamp) {
1c79356b
A
7370 /*
7371 * Find the entry again. It could have been clipped
7372 * or deleted after we unlocked the map.
7373 */
7374 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
0a7de745
A
7375 &first_entry)) {
7376 if (!user_wire) {
2d21ac55 7377 panic("vm_map_unwire: re-lookup failed");
0a7de745 7378 }
1c79356b 7379 entry = first_entry->vme_next;
0a7de745 7380 } else {
1c79356b 7381 entry = first_entry;
0a7de745 7382 }
1c79356b
A
7383 }
7384 last_timestamp = map->timestamp;
7385
7386 /*
7387 * clear transition bit for all constituent entries that
7388 * were in the original entry (saved in tmp_entry). Also
7389 * check for waiters.
7390 */
7391 while ((entry != vm_map_to_entry(map)) &&
0a7de745 7392 (entry->vme_start < tmp_entry.vme_end)) {
1c79356b
A
7393 assert(entry->in_transition);
7394 entry->in_transition = FALSE;
7395 if (entry->needs_wakeup) {
7396 entry->needs_wakeup = FALSE;
7397 need_wakeup = TRUE;
7398 }
7399 entry = entry->vme_next;
7400 }
7401 }
91447636
A
7402
7403 /*
7404 * We might have fragmented the address space when we wired this
7405 * range of addresses. Attempt to re-coalesce these VM map entries
7406 * with their neighbors now that they're no longer wired.
7407 * Under some circumstances, address space fragmentation can
7408 * prevent VM object shadow chain collapsing, which can cause
7409 * swap space leaks.
7410 */
7411 vm_map_simplify_range(map, start, end);
7412
1c79356b
A
7413 vm_map_unlock(map);
7414 /*
7415 * wake up anybody waiting on entries that we have unwired.
7416 */
0a7de745 7417 if (need_wakeup) {
1c79356b 7418 vm_map_entry_wakeup(map);
0a7de745
A
7419 }
7420 return KERN_SUCCESS;
1c79356b
A
7421}
7422
7423kern_return_t
7424vm_map_unwire(
0a7de745
A
7425 vm_map_t map,
7426 vm_map_offset_t start,
7427 vm_map_offset_t end,
7428 boolean_t user_wire)
1c79356b 7429{
5ba3f43e 7430 return vm_map_unwire_nested(map, start, end,
0a7de745 7431 user_wire, (pmap_t)NULL, 0);
1c79356b
A
7432}
7433
7434
7435/*
7436 * vm_map_entry_delete: [ internal use only ]
7437 *
7438 * Deallocate the given entry from the target map.
5ba3f43e 7439 */
91447636 7440static void
1c79356b 7441vm_map_entry_delete(
0a7de745
A
7442 vm_map_t map,
7443 vm_map_entry_t entry)
1c79356b 7444{
0a7de745
A
7445 vm_map_offset_t s, e;
7446 vm_object_t object;
7447 vm_map_t submap;
1c79356b
A
7448
7449 s = entry->vme_start;
7450 e = entry->vme_end;
f427ee49
A
7451 assert(VM_MAP_PAGE_ALIGNED(s, FOURK_PAGE_MASK));
7452 assert(VM_MAP_PAGE_ALIGNED(e, FOURK_PAGE_MASK));
7453 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
7454 assert(page_aligned(s));
7455 assert(page_aligned(e));
7456 }
39236c6e
A
7457 if (entry->map_aligned == TRUE) {
7458 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7459 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7460 }
1c79356b
A
7461 assert(entry->wired_count == 0);
7462 assert(entry->user_wired_count == 0);
b0d623f7 7463 assert(!entry->permanent);
1c79356b
A
7464
7465 if (entry->is_sub_map) {
7466 object = NULL;
3e170ce0 7467 submap = VME_SUBMAP(entry);
1c79356b
A
7468 } else {
7469 submap = NULL;
3e170ce0 7470 object = VME_OBJECT(entry);
1c79356b
A
7471 }
7472
6d2010ae 7473 vm_map_store_entry_unlink(map, entry);
1c79356b
A
7474 map->size -= e - s;
7475
7476 vm_map_entry_dispose(map, entry);
7477
7478 vm_map_unlock(map);
7479 /*
7480 * Deallocate the object only after removing all
7481 * pmap entries pointing to its pages.
7482 */
0a7de745 7483 if (submap) {
1c79356b 7484 vm_map_deallocate(submap);
0a7de745 7485 } else {
2d21ac55 7486 vm_object_deallocate(object);
0a7de745 7487 }
1c79356b
A
7488}
7489
7490void
7491vm_map_submap_pmap_clean(
0a7de745
A
7492 vm_map_t map,
7493 vm_map_offset_t start,
7494 vm_map_offset_t end,
7495 vm_map_t sub_map,
7496 vm_map_offset_t offset)
1c79356b 7497{
0a7de745
A
7498 vm_map_offset_t submap_start;
7499 vm_map_offset_t submap_end;
7500 vm_map_size_t remove_size;
7501 vm_map_entry_t entry;
1c79356b
A
7502
7503 submap_end = offset + (end - start);
7504 submap_start = offset;
b7266188
A
7505
7506 vm_map_lock_read(sub_map);
0a7de745 7507 if (vm_map_lookup_entry(sub_map, offset, &entry)) {
1c79356b 7508 remove_size = (entry->vme_end - entry->vme_start);
0a7de745 7509 if (offset > entry->vme_start) {
1c79356b 7510 remove_size -= offset - entry->vme_start;
0a7de745 7511 }
5ba3f43e 7512
1c79356b 7513
0a7de745 7514 if (submap_end < entry->vme_end) {
1c79356b 7515 remove_size -=
0a7de745 7516 entry->vme_end - submap_end;
1c79356b 7517 }
0a7de745 7518 if (entry->is_sub_map) {
1c79356b
A
7519 vm_map_submap_pmap_clean(
7520 sub_map,
7521 start,
7522 start + remove_size,
3e170ce0
A
7523 VME_SUBMAP(entry),
7524 VME_OFFSET(entry));
1c79356b 7525 } else {
cb323159
A
7526 if (map->mapped_in_other_pmaps &&
7527 os_ref_get_count(&map->map_refcnt) != 0 &&
7528 VME_OBJECT(entry) != NULL) {
3e170ce0
A
7529 vm_object_pmap_protect_options(
7530 VME_OBJECT(entry),
7531 (VME_OFFSET(entry) +
0a7de745
A
7532 offset -
7533 entry->vme_start),
9bccf70c
A
7534 remove_size,
7535 PMAP_NULL,
f427ee49 7536 PAGE_SIZE,
9bccf70c 7537 entry->vme_start,
3e170ce0
A
7538 VM_PROT_NONE,
7539 PMAP_OPTIONS_REMOVE);
9bccf70c 7540 } else {
5ba3f43e 7541 pmap_remove(map->pmap,
0a7de745
A
7542 (addr64_t)start,
7543 (addr64_t)(start + remove_size));
9bccf70c 7544 }
1c79356b
A
7545 }
7546 }
7547
7548 entry = entry->vme_next;
2d21ac55 7549
0a7de745
A
7550 while ((entry != vm_map_to_entry(sub_map))
7551 && (entry->vme_start < submap_end)) {
5ba3f43e 7552 remove_size = (entry->vme_end - entry->vme_start);
0a7de745 7553 if (submap_end < entry->vme_end) {
1c79356b
A
7554 remove_size -= entry->vme_end - submap_end;
7555 }
0a7de745 7556 if (entry->is_sub_map) {
1c79356b
A
7557 vm_map_submap_pmap_clean(
7558 sub_map,
7559 (start + entry->vme_start) - offset,
7560 ((start + entry->vme_start) - offset) + remove_size,
3e170ce0
A
7561 VME_SUBMAP(entry),
7562 VME_OFFSET(entry));
1c79356b 7563 } else {
cb323159
A
7564 if (map->mapped_in_other_pmaps &&
7565 os_ref_get_count(&map->map_refcnt) != 0 &&
7566 VME_OBJECT(entry) != NULL) {
3e170ce0
A
7567 vm_object_pmap_protect_options(
7568 VME_OBJECT(entry),
7569 VME_OFFSET(entry),
9bccf70c
A
7570 remove_size,
7571 PMAP_NULL,
f427ee49 7572 PAGE_SIZE,
9bccf70c 7573 entry->vme_start,
3e170ce0
A
7574 VM_PROT_NONE,
7575 PMAP_OPTIONS_REMOVE);
9bccf70c 7576 } else {
5ba3f43e 7577 pmap_remove(map->pmap,
0a7de745
A
7578 (addr64_t)((start + entry->vme_start)
7579 - offset),
7580 (addr64_t)(((start + entry->vme_start)
7581 - offset) + remove_size));
9bccf70c 7582 }
1c79356b
A
7583 }
7584 entry = entry->vme_next;
b7266188
A
7585 }
7586 vm_map_unlock_read(sub_map);
1c79356b
A
7587 return;
7588}
7589
d9a64523
A
7590/*
7591 * virt_memory_guard_ast:
7592 *
7593 * Handle the AST callout for a virtual memory guard.
7594 * raise an EXC_GUARD exception and terminate the task
7595 * if configured to do so.
7596 */
7597void
7598virt_memory_guard_ast(
7599 thread_t thread,
7600 mach_exception_data_type_t code,
7601 mach_exception_data_type_t subcode)
7602{
7603 task_t task = thread->task;
7604 assert(task != kernel_task);
7605 assert(task == current_task());
7606 uint32_t behavior;
7607
7608 behavior = task->task_exc_guard;
7609
7610 /* Is delivery enabled */
7611 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7612 return;
7613 }
7614
7615 /* If only once, make sure we're that once */
7616 while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7617 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7618
7619 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7620 break;
7621 }
7622 behavior = task->task_exc_guard;
7623 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7624 return;
7625 }
7626 }
7627
7628 /* Raise exception via corpse fork or synchronously */
7629 if ((task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) &&
7630 (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) == 0) {
7631 task_violated_guard(code, subcode, NULL);
7632 } else {
7633 task_exception_notify(EXC_GUARD, code, subcode);
7634 }
7635
7636 /* Terminate the task if desired */
7637 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7638 task_bsdtask_kill(current_task());
7639 }
7640}
7641
7642/*
7643 * vm_map_guard_exception:
7644 *
7645 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7646 *
7647 * Right now, we do this when we find nothing mapped, or a
7648 * gap in the mapping when a user address space deallocate
7649 * was requested. We report the address of the first gap found.
7650 */
7651static void
7652vm_map_guard_exception(
7653 vm_map_offset_t gap_start,
7654 unsigned reason)
7655{
7656 mach_exception_code_t code = 0;
7657 unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7658 unsigned int target = 0; /* should we pass in pid associated with map? */
7659 mach_exception_data_type_t subcode = (uint64_t)gap_start;
cb323159
A
7660 boolean_t fatal = FALSE;
7661
7662 task_t task = current_task();
d9a64523
A
7663
7664 /* Can't deliver exceptions to kernel task */
cb323159 7665 if (task == kernel_task) {
d9a64523 7666 return;
0a7de745 7667 }
d9a64523
A
7668
7669 EXC_GUARD_ENCODE_TYPE(code, guard_type);
7670 EXC_GUARD_ENCODE_FLAVOR(code, reason);
7671 EXC_GUARD_ENCODE_TARGET(code, target);
cb323159
A
7672
7673 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7674 fatal = TRUE;
7675 }
7676 thread_guard_violation(current_thread(), code, subcode, fatal);
d9a64523
A
7677}
7678
1c79356b
A
7679/*
7680 * vm_map_delete: [ internal use only ]
7681 *
7682 * Deallocates the given address range from the target map.
7683 * Removes all user wirings. Unwires one kernel wiring if
7684 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7685 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7686 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7687 *
7688 * This routine is called with map locked and leaves map locked.
7689 */
91447636 7690static kern_return_t
1c79356b 7691vm_map_delete(
0a7de745
A
7692 vm_map_t map,
7693 vm_map_offset_t start,
7694 vm_map_offset_t end,
7695 int flags,
7696 vm_map_t zap_map)
1c79356b 7697{
0a7de745
A
7698 vm_map_entry_t entry, next;
7699 struct vm_map_entry *first_entry, tmp_entry;
7700 vm_map_offset_t s;
7701 vm_object_t object;
7702 boolean_t need_wakeup;
7703 unsigned int last_timestamp = ~0; /* unlikely value */
7704 int interruptible;
7705 vm_map_offset_t gap_start;
cb323159
A
7706 __unused vm_map_offset_t save_start = start;
7707 __unused vm_map_offset_t save_end = end;
0a7de745
A
7708 const vm_map_offset_t FIND_GAP = 1; /* a not page aligned value */
7709 const vm_map_offset_t GAPS_OK = 2; /* a different not page aligned value */
7710
ea3f0419 7711 if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK) && !map->terminated) {
d9a64523 7712 gap_start = FIND_GAP;
0a7de745 7713 } else {
d9a64523 7714 gap_start = GAPS_OK;
0a7de745 7715 }
1c79356b 7716
5ba3f43e 7717 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
0a7de745 7718 THREAD_ABORTSAFE : THREAD_UNINT;
1c79356b
A
7719
7720 /*
7721 * All our DMA I/O operations in IOKit are currently done by
7722 * wiring through the map entries of the task requesting the I/O.
7723 * Because of this, we must always wait for kernel wirings
7724 * to go away on the entries before deleting them.
7725 *
7726 * Any caller who wants to actually remove a kernel wiring
7727 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7728 * properly remove one wiring instead of blasting through
7729 * them all.
7730 */
7731 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7732
0a7de745 7733 while (1) {
b0d623f7
A
7734 /*
7735 * Find the start of the region, and clip it
7736 */
7737 if (vm_map_lookup_entry(map, start, &first_entry)) {
7738 entry = first_entry;
fe8ab488
A
7739 if (map == kalloc_map &&
7740 (entry->vme_start != start ||
0a7de745 7741 entry->vme_end != end)) {
fe8ab488 7742 panic("vm_map_delete(%p,0x%llx,0x%llx): "
0a7de745
A
7743 "mismatched entry %p [0x%llx:0x%llx]\n",
7744 map,
7745 (uint64_t)start,
7746 (uint64_t)end,
7747 entry,
7748 (uint64_t)entry->vme_start,
7749 (uint64_t)entry->vme_end);
fe8ab488 7750 }
d9a64523
A
7751
7752 /*
7753 * If in a superpage, extend the range to include the start of the mapping.
7754 */
7755 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
b0d623f7
A
7756 start = SUPERPAGE_ROUND_DOWN(start);
7757 continue;
7758 }
d9a64523 7759
b0d623f7
A
7760 if (start == entry->vme_start) {
7761 /*
7762 * No need to clip. We don't want to cause
7763 * any unnecessary unnesting in this case...
7764 */
7765 } else {
fe8ab488
A
7766 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7767 entry->map_aligned &&
7768 !VM_MAP_PAGE_ALIGNED(
7769 start,
7770 VM_MAP_PAGE_MASK(map))) {
7771 /*
7772 * The entry will no longer be
7773 * map-aligned after clipping
7774 * and the caller said it's OK.
7775 */
7776 entry->map_aligned = FALSE;
7777 }
7778 if (map == kalloc_map) {
7779 panic("vm_map_delete(%p,0x%llx,0x%llx):"
0a7de745
A
7780 " clipping %p at 0x%llx\n",
7781 map,
7782 (uint64_t)start,
7783 (uint64_t)end,
7784 entry,
7785 (uint64_t)start);
fe8ab488 7786 }
b0d623f7
A
7787 vm_map_clip_start(map, entry, start);
7788 }
7789
2d21ac55 7790 /*
b0d623f7
A
7791 * Fix the lookup hint now, rather than each
7792 * time through the loop.
2d21ac55 7793 */
b0d623f7 7794 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
2d21ac55 7795 } else {
fe8ab488 7796 if (map->pmap == kernel_pmap &&
cb323159 7797 os_ref_get_count(&map->map_refcnt) != 0) {
fe8ab488 7798 panic("vm_map_delete(%p,0x%llx,0x%llx): "
0a7de745
A
7799 "no map entry at 0x%llx\n",
7800 map,
7801 (uint64_t)start,
7802 (uint64_t)end,
7803 (uint64_t)start);
fe8ab488 7804 }
b0d623f7 7805 entry = first_entry->vme_next;
0a7de745 7806 if (gap_start == FIND_GAP) {
d9a64523 7807 gap_start = start;
0a7de745 7808 }
2d21ac55 7809 }
b0d623f7 7810 break;
1c79356b 7811 }
0a7de745 7812 if (entry->superpage_size) {
b0d623f7 7813 end = SUPERPAGE_ROUND_UP(end);
0a7de745 7814 }
1c79356b
A
7815
7816 need_wakeup = FALSE;
7817 /*
7818 * Step through all entries in this region
7819 */
2d21ac55
A
7820 s = entry->vme_start;
7821 while ((entry != vm_map_to_entry(map)) && (s < end)) {
7822 /*
7823 * At this point, we have deleted all the memory entries
7824 * between "start" and "s". We still need to delete
7825 * all memory entries between "s" and "end".
7826 * While we were blocked and the map was unlocked, some
7827 * new memory entries could have been re-allocated between
7828 * "start" and "s" and we don't want to mess with those.
7829 * Some of those entries could even have been re-assembled
7830 * with an entry after "s" (in vm_map_simplify_entry()), so
7831 * we may have to vm_map_clip_start() again.
7832 */
1c79356b 7833
2d21ac55
A
7834 if (entry->vme_start >= s) {
7835 /*
7836 * This entry starts on or after "s"
7837 * so no need to clip its start.
7838 */
7839 } else {
7840 /*
7841 * This entry has been re-assembled by a
7842 * vm_map_simplify_entry(). We need to
7843 * re-clip its start.
7844 */
fe8ab488
A
7845 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7846 entry->map_aligned &&
7847 !VM_MAP_PAGE_ALIGNED(s,
0a7de745 7848 VM_MAP_PAGE_MASK(map))) {
fe8ab488
A
7849 /*
7850 * The entry will no longer be map-aligned
7851 * after clipping and the caller said it's OK.
7852 */
7853 entry->map_aligned = FALSE;
7854 }
7855 if (map == kalloc_map) {
7856 panic("vm_map_delete(%p,0x%llx,0x%llx): "
0a7de745
A
7857 "clipping %p at 0x%llx\n",
7858 map,
7859 (uint64_t)start,
7860 (uint64_t)end,
7861 entry,
7862 (uint64_t)s);
fe8ab488 7863 }
2d21ac55
A
7864 vm_map_clip_start(map, entry, s);
7865 }
7866 if (entry->vme_end <= end) {
7867 /*
7868 * This entry is going away completely, so no need
7869 * to clip and possibly cause an unnecessary unnesting.
7870 */
7871 } else {
fe8ab488
A
7872 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7873 entry->map_aligned &&
7874 !VM_MAP_PAGE_ALIGNED(end,
0a7de745 7875 VM_MAP_PAGE_MASK(map))) {
fe8ab488
A
7876 /*
7877 * The entry will no longer be map-aligned
7878 * after clipping and the caller said it's OK.
7879 */
7880 entry->map_aligned = FALSE;
7881 }
7882 if (map == kalloc_map) {
7883 panic("vm_map_delete(%p,0x%llx,0x%llx): "
0a7de745
A
7884 "clipping %p at 0x%llx\n",
7885 map,
7886 (uint64_t)start,
7887 (uint64_t)end,
7888 entry,
7889 (uint64_t)end);
fe8ab488 7890 }
2d21ac55
A
7891 vm_map_clip_end(map, entry, end);
7892 }
b0d623f7
A
7893
7894 if (entry->permanent) {
5ba3f43e
A
7895 if (map->pmap == kernel_pmap) {
7896 panic("%s(%p,0x%llx,0x%llx): "
0a7de745
A
7897 "attempt to remove permanent "
7898 "VM map entry "
7899 "%p [0x%llx:0x%llx]\n",
7900 __FUNCTION__,
7901 map,
7902 (uint64_t) start,
7903 (uint64_t) end,
7904 entry,
7905 (uint64_t) entry->vme_start,
7906 (uint64_t) entry->vme_end);
5ba3f43e
A
7907 } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
7908// printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7909 entry->permanent = FALSE;
7910 } else {
d9a64523 7911 if (vm_map_executable_immutable_verbose) {
5ba3f43e 7912 printf("%d[%s] %s(0x%llx,0x%llx): "
0a7de745
A
7913 "permanent entry [0x%llx:0x%llx] "
7914 "prot 0x%x/0x%x\n",
7915 proc_selfpid(),
7916 (current_task()->bsd_info
7917 ? proc_name_address(current_task()->bsd_info)
7918 : "?"),
7919 __FUNCTION__,
7920 (uint64_t) start,
7921 (uint64_t) end,
7922 (uint64_t)entry->vme_start,
7923 (uint64_t)entry->vme_end,
7924 entry->protection,
7925 entry->max_protection);
5ba3f43e
A
7926 }
7927 /*
7928 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7929 */
7930 DTRACE_VM5(vm_map_delete_permanent,
0a7de745
A
7931 vm_map_offset_t, entry->vme_start,
7932 vm_map_offset_t, entry->vme_end,
7933 vm_prot_t, entry->protection,
7934 vm_prot_t, entry->max_protection,
7935 int, VME_ALIAS(entry));
5ba3f43e 7936 }
b0d623f7
A
7937 }
7938
7939
1c79356b 7940 if (entry->in_transition) {
9bccf70c
A
7941 wait_result_t wait_result;
7942
1c79356b
A
7943 /*
7944 * Another thread is wiring/unwiring this entry.
7945 * Let the other thread know we are waiting.
7946 */
2d21ac55 7947 assert(s == entry->vme_start);
1c79356b
A
7948 entry->needs_wakeup = TRUE;
7949
7950 /*
7951 * wake up anybody waiting on entries that we have
7952 * already unwired/deleted.
7953 */
7954 if (need_wakeup) {
7955 vm_map_entry_wakeup(map);
7956 need_wakeup = FALSE;
7957 }
7958
9bccf70c 7959 wait_result = vm_map_entry_wait(map, interruptible);
1c79356b
A
7960
7961 if (interruptible &&
9bccf70c 7962 wait_result == THREAD_INTERRUPTED) {
1c79356b
A
7963 /*
7964 * We do not clear the needs_wakeup flag,
7965 * since we cannot tell if we were the only one.
7966 */
7967 return KERN_ABORTED;
9bccf70c 7968 }
1c79356b
A
7969
7970 /*
7971 * The entry could have been clipped or it
7972 * may not exist anymore. Look it up again.
7973 */
7974 if (!vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b
A
7975 /*
7976 * User: use the next entry
7977 */
0a7de745 7978 if (gap_start == FIND_GAP) {
d9a64523 7979 gap_start = s;
0a7de745 7980 }
1c79356b 7981 entry = first_entry->vme_next;
2d21ac55 7982 s = entry->vme_start;
1c79356b
A
7983 } else {
7984 entry = first_entry;
0c530ab8 7985 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 7986 }
9bccf70c 7987 last_timestamp = map->timestamp;
1c79356b
A
7988 continue;
7989 } /* end in_transition */
7990
7991 if (entry->wired_count) {
0a7de745 7992 boolean_t user_wire;
2d21ac55
A
7993
7994 user_wire = entry->user_wired_count > 0;
7995
1c79356b 7996 /*
0a7de745 7997 * Remove a kernel wiring if requested
1c79356b 7998 */
b0d623f7 7999 if (flags & VM_MAP_REMOVE_KUNWIRE) {
1c79356b 8000 entry->wired_count--;
b0d623f7 8001 }
5ba3f43e 8002
b0d623f7
A
8003 /*
8004 * Remove all user wirings for proper accounting
8005 */
8006 if (entry->user_wired_count > 0) {
0a7de745 8007 while (entry->user_wired_count) {
b0d623f7 8008 subtract_wire_counts(map, entry, user_wire);
0a7de745 8009 }
b0d623f7 8010 }
1c79356b
A
8011
8012 if (entry->wired_count != 0) {
2d21ac55 8013 assert(map != kernel_map);
1c79356b
A
8014 /*
8015 * Cannot continue. Typical case is when
8016 * a user thread has physical io pending on
8017 * on this page. Either wait for the
8018 * kernel wiring to go away or return an
8019 * error.
8020 */
8021 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
9bccf70c 8022 wait_result_t wait_result;
1c79356b 8023
2d21ac55 8024 assert(s == entry->vme_start);
1c79356b 8025 entry->needs_wakeup = TRUE;
9bccf70c 8026 wait_result = vm_map_entry_wait(map,
0a7de745 8027 interruptible);
1c79356b
A
8028
8029 if (interruptible &&
2d21ac55 8030 wait_result == THREAD_INTERRUPTED) {
1c79356b 8031 /*
5ba3f43e
A
8032 * We do not clear the
8033 * needs_wakeup flag, since we
8034 * cannot tell if we were the
1c79356b 8035 * only one.
2d21ac55 8036 */
1c79356b 8037 return KERN_ABORTED;
9bccf70c 8038 }
1c79356b
A
8039
8040 /*
2d21ac55 8041 * The entry could have been clipped or
1c79356b
A
8042 * it may not exist anymore. Look it
8043 * up again.
2d21ac55 8044 */
5ba3f43e 8045 if (!vm_map_lookup_entry(map, s,
0a7de745 8046 &first_entry)) {
2d21ac55 8047 assert(map != kernel_map);
1c79356b 8048 /*
2d21ac55
A
8049 * User: use the next entry
8050 */
0a7de745 8051 if (gap_start == FIND_GAP) {
d9a64523 8052 gap_start = s;
0a7de745 8053 }
1c79356b 8054 entry = first_entry->vme_next;
2d21ac55 8055 s = entry->vme_start;
1c79356b
A
8056 } else {
8057 entry = first_entry;
0c530ab8 8058 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 8059 }
9bccf70c 8060 last_timestamp = map->timestamp;
1c79356b 8061 continue;
0a7de745 8062 } else {
1c79356b
A
8063 return KERN_FAILURE;
8064 }
8065 }
8066
8067 entry->in_transition = TRUE;
8068 /*
8069 * copy current entry. see comment in vm_map_wire()
8070 */
8071 tmp_entry = *entry;
2d21ac55 8072 assert(s == entry->vme_start);
1c79356b
A
8073
8074 /*
8075 * We can unlock the map now. The in_transition
8076 * state guarentees existance of the entry.
8077 */
8078 vm_map_unlock(map);
2d21ac55
A
8079
8080 if (tmp_entry.is_sub_map) {
8081 vm_map_t sub_map;
8082 vm_map_offset_t sub_start, sub_end;
8083 pmap_t pmap;
8084 vm_map_offset_t pmap_addr;
5ba3f43e 8085
2d21ac55 8086
3e170ce0
A
8087 sub_map = VME_SUBMAP(&tmp_entry);
8088 sub_start = VME_OFFSET(&tmp_entry);
2d21ac55 8089 sub_end = sub_start + (tmp_entry.vme_end -
0a7de745 8090 tmp_entry.vme_start);
2d21ac55
A
8091 if (tmp_entry.use_pmap) {
8092 pmap = sub_map->pmap;
8093 pmap_addr = tmp_entry.vme_start;
8094 } else {
8095 pmap = map->pmap;
8096 pmap_addr = tmp_entry.vme_start;
8097 }
8098 (void) vm_map_unwire_nested(sub_map,
0a7de745
A
8099 sub_start, sub_end,
8100 user_wire,
8101 pmap, pmap_addr);
2d21ac55 8102 } else {
3e170ce0 8103 if (VME_OBJECT(&tmp_entry) == kernel_object) {
39236c6e
A
8104 pmap_protect_options(
8105 map->pmap,
8106 tmp_entry.vme_start,
8107 tmp_entry.vme_end,
8108 VM_PROT_NONE,
8109 PMAP_OPTIONS_REMOVE,
8110 NULL);
8111 }
2d21ac55 8112 vm_fault_unwire(map, &tmp_entry,
0a7de745
A
8113 VME_OBJECT(&tmp_entry) == kernel_object,
8114 map->pmap, tmp_entry.vme_start);
2d21ac55
A
8115 }
8116
1c79356b
A
8117 vm_map_lock(map);
8118
0a7de745 8119 if (last_timestamp + 1 != map->timestamp) {
1c79356b
A
8120 /*
8121 * Find the entry again. It could have
8122 * been clipped after we unlocked the map.
8123 */
0a7de745 8124 if (!vm_map_lookup_entry(map, s, &first_entry)) {
5ba3f43e 8125 assert((map != kernel_map) &&
0a7de745
A
8126 (!entry->is_sub_map));
8127 if (gap_start == FIND_GAP) {
d9a64523 8128 gap_start = s;
0a7de745 8129 }
1c79356b 8130 first_entry = first_entry->vme_next;
2d21ac55 8131 s = first_entry->vme_start;
1c79356b 8132 } else {
0c530ab8 8133 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
8134 }
8135 } else {
0c530ab8 8136 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
8137 first_entry = entry;
8138 }
8139
8140 last_timestamp = map->timestamp;
8141
8142 entry = first_entry;
8143 while ((entry != vm_map_to_entry(map)) &&
0a7de745 8144 (entry->vme_start < tmp_entry.vme_end)) {
1c79356b
A
8145 assert(entry->in_transition);
8146 entry->in_transition = FALSE;
8147 if (entry->needs_wakeup) {
8148 entry->needs_wakeup = FALSE;
8149 need_wakeup = TRUE;
8150 }
8151 entry = entry->vme_next;
8152 }
8153 /*
8154 * We have unwired the entry(s). Go back and
8155 * delete them.
8156 */
8157 entry = first_entry;
8158 continue;
8159 }
8160
8161 /* entry is unwired */
8162 assert(entry->wired_count == 0);
8163 assert(entry->user_wired_count == 0);
8164
2d21ac55
A
8165 assert(s == entry->vme_start);
8166
8167 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
8168 /*
8169 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8170 * vm_map_delete(), some map entries might have been
8171 * transferred to a "zap_map", which doesn't have a
8172 * pmap. The original pmap has already been flushed
8173 * in the vm_map_delete() call targeting the original
8174 * map, but when we get to destroying the "zap_map",
8175 * we don't have any pmap to flush, so let's just skip
8176 * all this.
8177 */
8178 } else if (entry->is_sub_map) {
f427ee49
A
8179 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
8180 "map %p (%d) entry %p submap %p (%d)\n",
8181 map, VM_MAP_PAGE_SHIFT(map), entry,
8182 VME_SUBMAP(entry),
8183 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
2d21ac55 8184 if (entry->use_pmap) {
f427ee49
A
8185 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) == VM_MAP_PAGE_SHIFT(map),
8186 "map %p (%d) entry %p submap %p (%d)\n",
8187 map, VM_MAP_PAGE_SHIFT(map), entry,
8188 VME_SUBMAP(entry),
8189 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
0c530ab8 8190#ifndef NO_NESTED_PMAP
3e170ce0
A
8191 int pmap_flags;
8192
8193 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
8194 /*
8195 * This is the final cleanup of the
8196 * address space being terminated.
8197 * No new mappings are expected and
8198 * we don't really need to unnest the
8199 * shared region (and lose the "global"
8200 * pmap mappings, if applicable).
8201 *
8202 * Tell the pmap layer that we're
8203 * "clean" wrt nesting.
8204 */
8205 pmap_flags = PMAP_UNNEST_CLEAN;
8206 } else {
8207 /*
8208 * We're unmapping part of the nested
8209 * shared region, so we can't keep the
8210 * nested pmap.
8211 */
8212 pmap_flags = 0;
8213 }
8214 pmap_unnest_options(
8215 map->pmap,
8216 (addr64_t)entry->vme_start,
8217 entry->vme_end - entry->vme_start,
8218 pmap_flags);
0a7de745 8219#endif /* NO_NESTED_PMAP */
cb323159
A
8220 if (map->mapped_in_other_pmaps &&
8221 os_ref_get_count(&map->map_refcnt) != 0) {
9bccf70c
A
8222 /* clean up parent map/maps */
8223 vm_map_submap_pmap_clean(
8224 map, entry->vme_start,
8225 entry->vme_end,
3e170ce0
A
8226 VME_SUBMAP(entry),
8227 VME_OFFSET(entry));
9bccf70c 8228 }
2d21ac55 8229 } else {
1c79356b
A
8230 vm_map_submap_pmap_clean(
8231 map, entry->vme_start, entry->vme_end,
3e170ce0
A
8232 VME_SUBMAP(entry),
8233 VME_OFFSET(entry));
2d21ac55 8234 }
3e170ce0 8235 } else if (VME_OBJECT(entry) != kernel_object &&
0a7de745 8236 VME_OBJECT(entry) != compressor_object) {
3e170ce0 8237 object = VME_OBJECT(entry);
cb323159
A
8238 if (map->mapped_in_other_pmaps &&
8239 os_ref_get_count(&map->map_refcnt) != 0) {
39236c6e 8240 vm_object_pmap_protect_options(
3e170ce0 8241 object, VME_OFFSET(entry),
55e303ae
A
8242 entry->vme_end - entry->vme_start,
8243 PMAP_NULL,
f427ee49 8244 PAGE_SIZE,
55e303ae 8245 entry->vme_start,
39236c6e
A
8246 VM_PROT_NONE,
8247 PMAP_OPTIONS_REMOVE);
3e170ce0 8248 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
0a7de745 8249 (map->pmap == kernel_pmap)) {
39236c6e
A
8250 /* Remove translations associated
8251 * with this range unless the entry
8252 * does not have an object, or
8253 * it's the kernel map or a descendant
8254 * since the platform could potentially
8255 * create "backdoor" mappings invisible
8256 * to the VM. It is expected that
8257 * objectless, non-kernel ranges
8258 * do not have such VM invisible
8259 * translations.
8260 */
8261 pmap_remove_options(map->pmap,
0a7de745
A
8262 (addr64_t)entry->vme_start,
8263 (addr64_t)entry->vme_end,
8264 PMAP_OPTIONS_REMOVE);
1c79356b
A
8265 }
8266 }
8267
fe8ab488
A
8268 if (entry->iokit_acct) {
8269 /* alternate accounting */
ecc0ceb4 8270 DTRACE_VM4(vm_map_iokit_unmapped_region,
0a7de745
A
8271 vm_map_t, map,
8272 vm_map_offset_t, entry->vme_start,
8273 vm_map_offset_t, entry->vme_end,
8274 int, VME_ALIAS(entry));
fe8ab488 8275 vm_map_iokit_unmapped_region(map,
0a7de745
A
8276 (entry->vme_end -
8277 entry->vme_start));
fe8ab488 8278 entry->iokit_acct = FALSE;
a39ff7e2 8279 entry->use_pmap = FALSE;
fe8ab488
A
8280 }
8281
91447636
A
8282 /*
8283 * All pmap mappings for this map entry must have been
8284 * cleared by now.
8285 */
fe8ab488 8286#if DEBUG
91447636 8287 assert(vm_map_pmap_is_empty(map,
0a7de745
A
8288 entry->vme_start,
8289 entry->vme_end));
fe8ab488 8290#endif /* DEBUG */
91447636 8291
1c79356b 8292 next = entry->vme_next;
fe8ab488
A
8293
8294 if (map->pmap == kernel_pmap &&
cb323159 8295 os_ref_get_count(&map->map_refcnt) != 0 &&
fe8ab488
A
8296 entry->vme_end < end &&
8297 (next == vm_map_to_entry(map) ||
0a7de745 8298 next->vme_start != entry->vme_end)) {
fe8ab488 8299 panic("vm_map_delete(%p,0x%llx,0x%llx): "
0a7de745
A
8300 "hole after %p at 0x%llx\n",
8301 map,
8302 (uint64_t)start,
8303 (uint64_t)end,
8304 entry,
8305 (uint64_t)entry->vme_end);
fe8ab488
A
8306 }
8307
d9a64523
A
8308 /*
8309 * If the desired range didn't end with "entry", then there is a gap if
8310 * we wrapped around to the start of the map or if "entry" and "next"
8311 * aren't contiguous.
8312 *
8313 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8314 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8315 */
8316 if (gap_start == FIND_GAP &&
8317 vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8318 (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8319 gap_start = entry->vme_end;
8320 }
1c79356b
A
8321 s = next->vme_start;
8322 last_timestamp = map->timestamp;
91447636 8323
5ba3f43e
A
8324 if (entry->permanent) {
8325 /*
8326 * A permanent entry can not be removed, so leave it
8327 * in place but remove all access permissions.
8328 */
8329 entry->protection = VM_PROT_NONE;
8330 entry->max_protection = VM_PROT_NONE;
8331 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
0a7de745 8332 zap_map != VM_MAP_NULL) {
2d21ac55 8333 vm_map_size_t entry_size;
91447636
A
8334 /*
8335 * The caller wants to save the affected VM map entries
8336 * into the "zap_map". The caller will take care of
8337 * these entries.
8338 */
8339 /* unlink the entry from "map" ... */
6d2010ae 8340 vm_map_store_entry_unlink(map, entry);
91447636 8341 /* ... and add it to the end of the "zap_map" */
6d2010ae 8342 vm_map_store_entry_link(zap_map,
0a7de745
A
8343 vm_map_last_entry(zap_map),
8344 entry,
8345 VM_MAP_KERNEL_FLAGS_NONE);
2d21ac55
A
8346 entry_size = entry->vme_end - entry->vme_start;
8347 map->size -= entry_size;
8348 zap_map->size += entry_size;
8349 /* we didn't unlock the map, so no timestamp increase */
8350 last_timestamp--;
91447636
A
8351 } else {
8352 vm_map_entry_delete(map, entry);
8353 /* vm_map_entry_delete unlocks the map */
8354 vm_map_lock(map);
8355 }
8356
1c79356b
A
8357 entry = next;
8358
0a7de745 8359 if (entry == vm_map_to_entry(map)) {
1c79356b
A
8360 break;
8361 }
d9a64523 8362 if (last_timestamp + 1 != map->timestamp) {
1c79356b 8363 /*
d9a64523
A
8364 * We are responsible for deleting everything
8365 * from the given space. If someone has interfered,
8366 * we pick up where we left off. Back fills should
8367 * be all right for anyone, except map_delete, and
1c79356b
A
8368 * we have to assume that the task has been fully
8369 * disabled before we get here
8370 */
0a7de745
A
8371 if (!vm_map_lookup_entry(map, s, &entry)) {
8372 entry = entry->vme_next;
d9a64523
A
8373
8374 /*
8375 * Nothing found for s. If we weren't already done, then there is a gap.
8376 */
0a7de745 8377 if (gap_start == FIND_GAP && s < end) {
d9a64523 8378 gap_start = s;
0a7de745 8379 }
2d21ac55 8380 s = entry->vme_start;
0a7de745 8381 } else {
2d21ac55 8382 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
0a7de745 8383 }
5ba3f43e
A
8384 /*
8385 * others can not only allocate behind us, we can
8386 * also see coalesce while we don't have the map lock
1c79356b 8387 */
d9a64523 8388 if (entry == vm_map_to_entry(map)) {
1c79356b
A
8389 break;
8390 }
1c79356b
A
8391 }
8392 last_timestamp = map->timestamp;
8393 }
8394
0a7de745 8395 if (map->wait_for_space) {
1c79356b 8396 thread_wakeup((event_t) map);
0a7de745 8397 }
1c79356b
A
8398 /*
8399 * wake up anybody waiting on entries that we have already deleted.
8400 */
0a7de745 8401 if (need_wakeup) {
1c79356b 8402 vm_map_entry_wakeup(map);
0a7de745 8403 }
1c79356b 8404
d9a64523
A
8405 if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8406 DTRACE_VM3(kern_vm_deallocate_gap,
8407 vm_map_offset_t, gap_start,
8408 vm_map_offset_t, save_start,
8409 vm_map_offset_t, save_end);
8410 if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
d9a64523
A
8411 vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8412 }
8413 }
8414
1c79356b
A
8415 return KERN_SUCCESS;
8416}
8417
ea3f0419
A
8418
8419/*
8420 * vm_map_terminate:
8421 *
8422 * Clean out a task's map.
8423 */
8424kern_return_t
8425vm_map_terminate(
8426 vm_map_t map)
8427{
8428 vm_map_lock(map);
8429 map->terminated = TRUE;
8430 vm_map_unlock(map);
8431
8432 return vm_map_remove(map,
8433 map->min_offset,
8434 map->max_offset,
8435 /*
8436 * Final cleanup:
8437 * + no unnesting
8438 * + remove immutable mappings
8439 * + allow gaps in range
8440 */
8441 (VM_MAP_REMOVE_NO_UNNESTING |
8442 VM_MAP_REMOVE_IMMUTABLE |
8443 VM_MAP_REMOVE_GAPS_OK));
8444}
8445
1c79356b
A
8446/*
8447 * vm_map_remove:
8448 *
8449 * Remove the given address range from the target map.
8450 * This is the exported form of vm_map_delete.
8451 */
8452kern_return_t
8453vm_map_remove(
0a7de745
A
8454 vm_map_t map,
8455 vm_map_offset_t start,
8456 vm_map_offset_t end,
8457 boolean_t flags)
1c79356b 8458{
0a7de745 8459 kern_return_t result;
9bccf70c 8460
1c79356b
A
8461 vm_map_lock(map);
8462 VM_MAP_RANGE_CHECK(map, start, end);
39236c6e 8463 /*
f427ee49
A
8464 * For the zone maps, the kernel controls the allocation/freeing of memory.
8465 * Any free to the zone maps should be within the bounds of the map and
39236c6e 8466 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
f427ee49 8467 * free to the zone maps into a no-op, there is a problem and we should
39236c6e
A
8468 * panic.
8469 */
f427ee49
A
8470 if ((start == end) && zone_maps_owned(start, 1)) {
8471 panic("Nothing being freed to a zone map. start = end = %p\n", (void *)start);
0a7de745 8472 }
91447636 8473 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
1c79356b 8474 vm_map_unlock(map);
91447636 8475
0a7de745 8476 return result;
1c79356b
A
8477}
8478
39037602
A
8479/*
8480 * vm_map_remove_locked:
8481 *
8482 * Remove the given address range from the target locked map.
8483 * This is the exported form of vm_map_delete.
8484 */
8485kern_return_t
8486vm_map_remove_locked(
0a7de745
A
8487 vm_map_t map,
8488 vm_map_offset_t start,
8489 vm_map_offset_t end,
8490 boolean_t flags)
39037602 8491{
0a7de745 8492 kern_return_t result;
39037602
A
8493
8494 VM_MAP_RANGE_CHECK(map, start, end);
8495 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
0a7de745 8496 return result;
39037602
A
8497}
8498
1c79356b 8499
d9a64523
A
8500/*
8501 * Routine: vm_map_copy_allocate
8502 *
8503 * Description:
8504 * Allocates and initializes a map copy object.
8505 */
8506static vm_map_copy_t
8507vm_map_copy_allocate(void)
8508{
8509 vm_map_copy_t new_copy;
8510
8511 new_copy = zalloc(vm_map_copy_zone);
0a7de745 8512 bzero(new_copy, sizeof(*new_copy));
d9a64523
A
8513 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8514 vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8515 vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8516 return new_copy;
8517}
8518
1c79356b
A
8519/*
8520 * Routine: vm_map_copy_discard
8521 *
8522 * Description:
8523 * Dispose of a map copy object (returned by
8524 * vm_map_copyin).
8525 */
8526void
8527vm_map_copy_discard(
0a7de745 8528 vm_map_copy_t copy)
1c79356b 8529{
0a7de745 8530 if (copy == VM_MAP_COPY_NULL) {
1c79356b 8531 return;
0a7de745 8532 }
1c79356b
A
8533
8534 switch (copy->type) {
8535 case VM_MAP_COPY_ENTRY_LIST:
8536 while (vm_map_copy_first_entry(copy) !=
0a7de745
A
8537 vm_map_copy_to_entry(copy)) {
8538 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
1c79356b
A
8539
8540 vm_map_copy_entry_unlink(copy, entry);
39236c6e 8541 if (entry->is_sub_map) {
3e170ce0 8542 vm_map_deallocate(VME_SUBMAP(entry));
39236c6e 8543 } else {
3e170ce0 8544 vm_object_deallocate(VME_OBJECT(entry));
39236c6e 8545 }
1c79356b
A
8546 vm_map_copy_entry_dispose(copy, entry);
8547 }
8548 break;
0a7de745 8549 case VM_MAP_COPY_OBJECT:
1c79356b
A
8550 vm_object_deallocate(copy->cpy_object);
8551 break;
1c79356b
A
8552 case VM_MAP_COPY_KERNEL_BUFFER:
8553
8554 /*
8555 * The vm_map_copy_t and possibly the data buffer were
f427ee49 8556 * allocated by a single call to kheap_alloc(), i.e. the
1c79356b
A
8557 * vm_map_copy_t was not allocated out of the zone.
8558 */
0a7de745 8559 if (copy->size > msg_ool_size_small || copy->offset) {
3e170ce0 8560 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
0a7de745
A
8561 (long long)copy->size, (long long)copy->offset);
8562 }
f427ee49 8563 kheap_free(KHEAP_DATA_BUFFERS, copy->cpy_kdata, copy->size);
1c79356b 8564 }
91447636 8565 zfree(vm_map_copy_zone, copy);
1c79356b
A
8566}
8567
8568/*
8569 * Routine: vm_map_copy_copy
8570 *
8571 * Description:
8572 * Move the information in a map copy object to
8573 * a new map copy object, leaving the old one
8574 * empty.
8575 *
8576 * This is used by kernel routines that need
8577 * to look at out-of-line data (in copyin form)
8578 * before deciding whether to return SUCCESS.
8579 * If the routine returns FAILURE, the original
8580 * copy object will be deallocated; therefore,
8581 * these routines must make a copy of the copy
8582 * object and leave the original empty so that
8583 * deallocation will not fail.
8584 */
8585vm_map_copy_t
8586vm_map_copy_copy(
0a7de745 8587 vm_map_copy_t copy)
1c79356b 8588{
0a7de745 8589 vm_map_copy_t new_copy;
1c79356b 8590
0a7de745 8591 if (copy == VM_MAP_COPY_NULL) {
1c79356b 8592 return VM_MAP_COPY_NULL;
0a7de745 8593 }
1c79356b
A
8594
8595 /*
8596 * Allocate a new copy object, and copy the information
8597 * from the old one into it.
8598 */
8599
8600 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
f427ee49
A
8601 memcpy((void *) new_copy, (void *) copy, sizeof(struct vm_map_copy));
8602#if __has_feature(ptrauth_calls)
8603 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8604 new_copy->cpy_kdata = copy->cpy_kdata;
8605 }
8606#endif
1c79356b
A
8607
8608 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8609 /*
8610 * The links in the entry chain must be
8611 * changed to point to the new copy object.
8612 */
8613 vm_map_copy_first_entry(copy)->vme_prev
0a7de745 8614 = vm_map_copy_to_entry(new_copy);
1c79356b 8615 vm_map_copy_last_entry(copy)->vme_next
0a7de745 8616 = vm_map_copy_to_entry(new_copy);
1c79356b
A
8617 }
8618
8619 /*
8620 * Change the old copy object into one that contains
8621 * nothing to be deallocated.
8622 */
8623 copy->type = VM_MAP_COPY_OBJECT;
8624 copy->cpy_object = VM_OBJECT_NULL;
8625
8626 /*
8627 * Return the new object.
8628 */
8629 return new_copy;
8630}
8631
91447636 8632static kern_return_t
1c79356b 8633vm_map_overwrite_submap_recurse(
0a7de745
A
8634 vm_map_t dst_map,
8635 vm_map_offset_t dst_addr,
8636 vm_map_size_t dst_size)
1c79356b 8637{
0a7de745
A
8638 vm_map_offset_t dst_end;
8639 vm_map_entry_t tmp_entry;
8640 vm_map_entry_t entry;
8641 kern_return_t result;
8642 boolean_t encountered_sub_map = FALSE;
1c79356b
A
8643
8644
8645
8646 /*
8647 * Verify that the destination is all writeable
8648 * initially. We have to trunc the destination
8649 * address and round the copy size or we'll end up
8650 * splitting entries in strange ways.
8651 */
8652
39236c6e 8653 dst_end = vm_map_round_page(dst_addr + dst_size,
0a7de745 8654 VM_MAP_PAGE_MASK(dst_map));
9bccf70c 8655 vm_map_lock(dst_map);
1c79356b
A
8656
8657start_pass_1:
1c79356b
A
8658 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8659 vm_map_unlock(dst_map);
0a7de745 8660 return KERN_INVALID_ADDRESS;
1c79356b
A
8661 }
8662
39236c6e 8663 vm_map_clip_start(dst_map,
0a7de745
A
8664 tmp_entry,
8665 vm_map_trunc_page(dst_addr,
8666 VM_MAP_PAGE_MASK(dst_map)));
fe8ab488
A
8667 if (tmp_entry->is_sub_map) {
8668 /* clipping did unnest if needed */
8669 assert(!tmp_entry->use_pmap);
8670 }
1c79356b
A
8671
8672 for (entry = tmp_entry;;) {
0a7de745 8673 vm_map_entry_t next;
1c79356b
A
8674
8675 next = entry->vme_next;
0a7de745
A
8676 while (entry->is_sub_map) {
8677 vm_map_offset_t sub_start;
8678 vm_map_offset_t sub_end;
8679 vm_map_offset_t local_end;
1c79356b
A
8680
8681 if (entry->in_transition) {
2d21ac55
A
8682 /*
8683 * Say that we are waiting, and wait for entry.
8684 */
0a7de745
A
8685 entry->needs_wakeup = TRUE;
8686 vm_map_entry_wait(dst_map, THREAD_UNINT);
1c79356b
A
8687
8688 goto start_pass_1;
8689 }
8690
8691 encountered_sub_map = TRUE;
3e170ce0 8692 sub_start = VME_OFFSET(entry);
1c79356b 8693
0a7de745 8694 if (entry->vme_end < dst_end) {
1c79356b 8695 sub_end = entry->vme_end;
0a7de745 8696 } else {
1c79356b 8697 sub_end = dst_end;
0a7de745 8698 }
1c79356b 8699 sub_end -= entry->vme_start;
3e170ce0 8700 sub_end += VME_OFFSET(entry);
1c79356b
A
8701 local_end = entry->vme_end;
8702 vm_map_unlock(dst_map);
5ba3f43e 8703
1c79356b 8704 result = vm_map_overwrite_submap_recurse(
3e170ce0 8705 VME_SUBMAP(entry),
2d21ac55
A
8706 sub_start,
8707 sub_end - sub_start);
1c79356b 8708
0a7de745 8709 if (result != KERN_SUCCESS) {
1c79356b 8710 return result;
0a7de745
A
8711 }
8712 if (dst_end <= entry->vme_end) {
1c79356b 8713 return KERN_SUCCESS;
0a7de745 8714 }
1c79356b 8715 vm_map_lock(dst_map);
0a7de745
A
8716 if (!vm_map_lookup_entry(dst_map, local_end,
8717 &tmp_entry)) {
1c79356b 8718 vm_map_unlock(dst_map);
0a7de745 8719 return KERN_INVALID_ADDRESS;
1c79356b
A
8720 }
8721 entry = tmp_entry;
8722 next = entry->vme_next;
8723 }
8724
0a7de745 8725 if (!(entry->protection & VM_PROT_WRITE)) {
1c79356b 8726 vm_map_unlock(dst_map);
0a7de745 8727 return KERN_PROTECTION_FAILURE;
1c79356b
A
8728 }
8729
8730 /*
8731 * If the entry is in transition, we must wait
8732 * for it to exit that state. Anything could happen
8733 * when we unlock the map, so start over.
8734 */
0a7de745
A
8735 if (entry->in_transition) {
8736 /*
8737 * Say that we are waiting, and wait for entry.
8738 */
8739 entry->needs_wakeup = TRUE;
8740 vm_map_entry_wait(dst_map, THREAD_UNINT);
1c79356b
A
8741
8742 goto start_pass_1;
8743 }
8744
8745/*
8746 * our range is contained completely within this map entry
8747 */
8748 if (dst_end <= entry->vme_end) {
8749 vm_map_unlock(dst_map);
8750 return KERN_SUCCESS;
8751 }
8752/*
8753 * check that range specified is contiguous region
8754 */
8755 if ((next == vm_map_to_entry(dst_map)) ||
8756 (next->vme_start != entry->vme_end)) {
8757 vm_map_unlock(dst_map);
0a7de745 8758 return KERN_INVALID_ADDRESS;
1c79356b
A
8759 }
8760
8761 /*
8762 * Check for permanent objects in the destination.
8763 */
3e170ce0
A
8764 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8765 ((!VME_OBJECT(entry)->internal) ||
0a7de745
A
8766 (VME_OBJECT(entry)->true_share))) {
8767 if (encountered_sub_map) {
1c79356b 8768 vm_map_unlock(dst_map);
0a7de745 8769 return KERN_FAILURE;
1c79356b
A
8770 }
8771 }
8772
8773
8774 entry = next;
8775 }/* for */
8776 vm_map_unlock(dst_map);
0a7de745 8777 return KERN_SUCCESS;
1c79356b
A
8778}
8779
8780/*
8781 * Routine: vm_map_copy_overwrite
8782 *
8783 * Description:
8784 * Copy the memory described by the map copy
8785 * object (copy; returned by vm_map_copyin) onto
8786 * the specified destination region (dst_map, dst_addr).
8787 * The destination must be writeable.
8788 *
8789 * Unlike vm_map_copyout, this routine actually
8790 * writes over previously-mapped memory. If the
8791 * previous mapping was to a permanent (user-supplied)
8792 * memory object, it is preserved.
8793 *
8794 * The attributes (protection and inheritance) of the
8795 * destination region are preserved.
8796 *
8797 * If successful, consumes the copy object.
8798 * Otherwise, the caller is responsible for it.
8799 *
8800 * Implementation notes:
8801 * To overwrite aligned temporary virtual memory, it is
8802 * sufficient to remove the previous mapping and insert
8803 * the new copy. This replacement is done either on
8804 * the whole region (if no permanent virtual memory
8805 * objects are embedded in the destination region) or
8806 * in individual map entries.
8807 *
8808 * To overwrite permanent virtual memory , it is necessary
8809 * to copy each page, as the external memory management
8810 * interface currently does not provide any optimizations.
8811 *
8812 * Unaligned memory also has to be copied. It is possible
8813 * to use 'vm_trickery' to copy the aligned data. This is
8814 * not done but not hard to implement.
8815 *
8816 * Once a page of permanent memory has been overwritten,
8817 * it is impossible to interrupt this function; otherwise,
8818 * the call would be neither atomic nor location-independent.
8819 * The kernel-state portion of a user thread must be
8820 * interruptible.
8821 *
8822 * It may be expensive to forward all requests that might
8823 * overwrite permanent memory (vm_write, vm_copy) to
8824 * uninterruptible kernel threads. This routine may be
8825 * called by interruptible threads; however, success is
8826 * not guaranteed -- if the request cannot be performed
8827 * atomically and interruptibly, an error indication is
8828 * returned.
8829 */
8830
91447636 8831static kern_return_t
1c79356b 8832vm_map_copy_overwrite_nested(
0a7de745
A
8833 vm_map_t dst_map,
8834 vm_map_address_t dst_addr,
8835 vm_map_copy_t copy,
8836 boolean_t interruptible,
8837 pmap_t pmap,
8838 boolean_t discard_on_success)
1c79356b 8839{
0a7de745
A
8840 vm_map_offset_t dst_end;
8841 vm_map_entry_t tmp_entry;
8842 vm_map_entry_t entry;
8843 kern_return_t kr;
8844 boolean_t aligned = TRUE;
8845 boolean_t contains_permanent_objects = FALSE;
8846 boolean_t encountered_sub_map = FALSE;
8847 vm_map_offset_t base_addr;
8848 vm_map_size_t copy_size;
8849 vm_map_size_t total_size;
f427ee49 8850 int copy_page_shift;
1c79356b
A
8851
8852
8853 /*
8854 * Check for null copy object.
8855 */
8856
0a7de745
A
8857 if (copy == VM_MAP_COPY_NULL) {
8858 return KERN_SUCCESS;
8859 }
1c79356b 8860
f427ee49
A
8861 /*
8862 * Assert that the vm_map_copy is coming from the right
8863 * zone and hasn't been forged
8864 */
8865 vm_map_copy_require(copy);
8866
1c79356b
A
8867 /*
8868 * Check for special kernel buffer allocated
8869 * by new_ipc_kmsg_copyin.
8870 */
8871
8872 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
0a7de745
A
8873 return vm_map_copyout_kernel_buffer(
8874 dst_map, &dst_addr,
8875 copy, copy->size, TRUE, discard_on_success);
1c79356b
A
8876 }
8877
8878 /*
8879 * Only works for entry lists at the moment. Will
8880 * support page lists later.
8881 */
8882
8883 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8884
8885 if (copy->size == 0) {
0a7de745 8886 if (discard_on_success) {
6d2010ae 8887 vm_map_copy_discard(copy);
0a7de745
A
8888 }
8889 return KERN_SUCCESS;
1c79356b
A
8890 }
8891
f427ee49
A
8892 copy_page_shift = copy->cpy_hdr.page_shift;
8893
1c79356b
A
8894 /*
8895 * Verify that the destination is all writeable
8896 * initially. We have to trunc the destination
8897 * address and round the copy size or we'll end up
8898 * splitting entries in strange ways.
8899 */
8900
39236c6e 8901 if (!VM_MAP_PAGE_ALIGNED(copy->size,
0a7de745 8902 VM_MAP_PAGE_MASK(dst_map)) ||
39236c6e 8903 !VM_MAP_PAGE_ALIGNED(copy->offset,
0a7de745 8904 VM_MAP_PAGE_MASK(dst_map)) ||
39236c6e 8905 !VM_MAP_PAGE_ALIGNED(dst_addr,
f427ee49
A
8906 VM_MAP_PAGE_MASK(dst_map)) ||
8907 copy_page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
1c79356b 8908 aligned = FALSE;
39236c6e 8909 dst_end = vm_map_round_page(dst_addr + copy->size,
0a7de745 8910 VM_MAP_PAGE_MASK(dst_map));
1c79356b
A
8911 } else {
8912 dst_end = dst_addr + copy->size;
8913 }
8914
1c79356b 8915 vm_map_lock(dst_map);
9bccf70c 8916
91447636
A
8917 /* LP64todo - remove this check when vm_map_commpage64()
8918 * no longer has to stuff in a map_entry for the commpage
8919 * above the map's max_offset.
8920 */
8921 if (dst_addr >= dst_map->max_offset) {
8922 vm_map_unlock(dst_map);
0a7de745 8923 return KERN_INVALID_ADDRESS;
91447636 8924 }
5ba3f43e 8925
9bccf70c 8926start_pass_1:
1c79356b
A
8927 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8928 vm_map_unlock(dst_map);
0a7de745 8929 return KERN_INVALID_ADDRESS;
1c79356b 8930 }
39236c6e 8931 vm_map_clip_start(dst_map,
0a7de745
A
8932 tmp_entry,
8933 vm_map_trunc_page(dst_addr,
8934 VM_MAP_PAGE_MASK(dst_map)));
1c79356b 8935 for (entry = tmp_entry;;) {
0a7de745 8936 vm_map_entry_t next = entry->vme_next;
1c79356b 8937
0a7de745
A
8938 while (entry->is_sub_map) {
8939 vm_map_offset_t sub_start;
8940 vm_map_offset_t sub_end;
8941 vm_map_offset_t local_end;
1c79356b 8942
0a7de745 8943 if (entry->in_transition) {
2d21ac55
A
8944 /*
8945 * Say that we are waiting, and wait for entry.
8946 */
0a7de745
A
8947 entry->needs_wakeup = TRUE;
8948 vm_map_entry_wait(dst_map, THREAD_UNINT);
1c79356b
A
8949
8950 goto start_pass_1;
8951 }
8952
8953 local_end = entry->vme_end;
0a7de745 8954 if (!(entry->needs_copy)) {
1c79356b
A
8955 /* if needs_copy we are a COW submap */
8956 /* in such a case we just replace so */
8957 /* there is no need for the follow- */
8958 /* ing check. */
8959 encountered_sub_map = TRUE;
3e170ce0 8960 sub_start = VME_OFFSET(entry);
1c79356b 8961
0a7de745 8962 if (entry->vme_end < dst_end) {
1c79356b 8963 sub_end = entry->vme_end;
0a7de745 8964 } else {
1c79356b 8965 sub_end = dst_end;
0a7de745 8966 }
1c79356b 8967 sub_end -= entry->vme_start;
3e170ce0 8968 sub_end += VME_OFFSET(entry);
1c79356b 8969 vm_map_unlock(dst_map);
5ba3f43e 8970
1c79356b 8971 kr = vm_map_overwrite_submap_recurse(
3e170ce0 8972 VME_SUBMAP(entry),
1c79356b
A
8973 sub_start,
8974 sub_end - sub_start);
0a7de745 8975 if (kr != KERN_SUCCESS) {
1c79356b 8976 return kr;
0a7de745 8977 }
1c79356b
A
8978 vm_map_lock(dst_map);
8979 }
8980
0a7de745 8981 if (dst_end <= entry->vme_end) {
1c79356b 8982 goto start_overwrite;
0a7de745
A
8983 }
8984 if (!vm_map_lookup_entry(dst_map, local_end,
8985 &entry)) {
1c79356b 8986 vm_map_unlock(dst_map);
0a7de745 8987 return KERN_INVALID_ADDRESS;
1c79356b
A
8988 }
8989 next = entry->vme_next;
8990 }
8991
0a7de745 8992 if (!(entry->protection & VM_PROT_WRITE)) {
1c79356b 8993 vm_map_unlock(dst_map);
0a7de745 8994 return KERN_PROTECTION_FAILURE;
1c79356b
A
8995 }
8996
8997 /*
8998 * If the entry is in transition, we must wait
8999 * for it to exit that state. Anything could happen
9000 * when we unlock the map, so start over.
9001 */
0a7de745
A
9002 if (entry->in_transition) {
9003 /*
9004 * Say that we are waiting, and wait for entry.
9005 */
9006 entry->needs_wakeup = TRUE;
9007 vm_map_entry_wait(dst_map, THREAD_UNINT);
1c79356b
A
9008
9009 goto start_pass_1;
9010 }
9011
9012/*
9013 * our range is contained completely within this map entry
9014 */
0a7de745 9015 if (dst_end <= entry->vme_end) {
1c79356b 9016 break;
0a7de745 9017 }
1c79356b
A
9018/*
9019 * check that range specified is contiguous region
9020 */
9021 if ((next == vm_map_to_entry(dst_map)) ||
9022 (next->vme_start != entry->vme_end)) {
9023 vm_map_unlock(dst_map);
0a7de745 9024 return KERN_INVALID_ADDRESS;
1c79356b
A
9025 }
9026
9027
9028 /*
9029 * Check for permanent objects in the destination.
9030 */
3e170ce0
A
9031 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9032 ((!VME_OBJECT(entry)->internal) ||
0a7de745 9033 (VME_OBJECT(entry)->true_share))) {
1c79356b
A
9034 contains_permanent_objects = TRUE;
9035 }
9036
9037 entry = next;
9038 }/* for */
9039
9040start_overwrite:
9041 /*
9042 * If there are permanent objects in the destination, then
9043 * the copy cannot be interrupted.
9044 */
9045
9046 if (interruptible && contains_permanent_objects) {
9047 vm_map_unlock(dst_map);
0a7de745 9048 return KERN_FAILURE; /* XXX */
1c79356b
A
9049 }
9050
9051 /*
0a7de745 9052 *
1c79356b
A
9053 * Make a second pass, overwriting the data
9054 * At the beginning of each loop iteration,
9055 * the next entry to be overwritten is "tmp_entry"
9056 * (initially, the value returned from the lookup above),
9057 * and the starting address expected in that entry
9058 * is "start".
9059 */
9060
9061 total_size = copy->size;
0a7de745 9062 if (encountered_sub_map) {
1c79356b
A
9063 copy_size = 0;
9064 /* re-calculate tmp_entry since we've had the map */
9065 /* unlocked */
9066 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
9067 vm_map_unlock(dst_map);
0a7de745 9068 return KERN_INVALID_ADDRESS;
1c79356b
A
9069 }
9070 } else {
9071 copy_size = copy->size;
9072 }
5ba3f43e 9073
1c79356b 9074 base_addr = dst_addr;
0a7de745 9075 while (TRUE) {
1c79356b
A
9076 /* deconstruct the copy object and do in parts */
9077 /* only in sub_map, interruptable case */
0a7de745
A
9078 vm_map_entry_t copy_entry;
9079 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
9080 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
9081 int nentries;
9082 int remaining_entries = 0;
9083 vm_map_offset_t new_offset = 0;
5ba3f43e 9084
1c79356b 9085 for (entry = tmp_entry; copy_size == 0;) {
0a7de745 9086 vm_map_entry_t next;
1c79356b
A
9087
9088 next = entry->vme_next;
9089
9090 /* tmp_entry and base address are moved along */
9091 /* each time we encounter a sub-map. Otherwise */
9092 /* entry can outpase tmp_entry, and the copy_size */
9093 /* may reflect the distance between them */
9094 /* if the current entry is found to be in transition */
9095 /* we will start over at the beginning or the last */
9096 /* encounter of a submap as dictated by base_addr */
9097 /* we will zero copy_size accordingly. */
9098 if (entry->in_transition) {
0a7de745
A
9099 /*
9100 * Say that we are waiting, and wait for entry.
9101 */
9102 entry->needs_wakeup = TRUE;
9103 vm_map_entry_wait(dst_map, THREAD_UNINT);
9104
9105 if (!vm_map_lookup_entry(dst_map, base_addr,
9106 &tmp_entry)) {
1c79356b 9107 vm_map_unlock(dst_map);
0a7de745 9108 return KERN_INVALID_ADDRESS;
1c79356b
A
9109 }
9110 copy_size = 0;
9111 entry = tmp_entry;
9112 continue;
9113 }
5ba3f43e 9114 if (entry->is_sub_map) {
0a7de745
A
9115 vm_map_offset_t sub_start;
9116 vm_map_offset_t sub_end;
9117 vm_map_offset_t local_end;
1c79356b 9118
0a7de745 9119 if (entry->needs_copy) {
1c79356b
A
9120 /* if this is a COW submap */
9121 /* just back the range with a */
9122 /* anonymous entry */
0a7de745 9123 if (entry->vme_end < dst_end) {
1c79356b 9124 sub_end = entry->vme_end;
0a7de745 9125 } else {
1c79356b 9126 sub_end = dst_end;
0a7de745
A
9127 }
9128 if (entry->vme_start < base_addr) {
1c79356b 9129 sub_start = base_addr;
0a7de745 9130 } else {
1c79356b 9131 sub_start = entry->vme_start;
0a7de745 9132 }
1c79356b
A
9133 vm_map_clip_end(
9134 dst_map, entry, sub_end);
9135 vm_map_clip_start(
9136 dst_map, entry, sub_start);
2d21ac55 9137 assert(!entry->use_pmap);
a39ff7e2
A
9138 assert(!entry->iokit_acct);
9139 entry->use_pmap = TRUE;
1c79356b
A
9140 entry->is_sub_map = FALSE;
9141 vm_map_deallocate(
3e170ce0 9142 VME_SUBMAP(entry));
cb323159 9143 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5ba3f43e 9144 VME_OFFSET_SET(entry, 0);
1c79356b
A
9145 entry->is_shared = FALSE;
9146 entry->needs_copy = FALSE;
5ba3f43e 9147 entry->protection = VM_PROT_DEFAULT;
1c79356b
A
9148 entry->max_protection = VM_PROT_ALL;
9149 entry->wired_count = 0;
9150 entry->user_wired_count = 0;
0a7de745
A
9151 if (entry->inheritance
9152 == VM_INHERIT_SHARE) {
2d21ac55 9153 entry->inheritance = VM_INHERIT_COPY;
0a7de745 9154 }
1c79356b
A
9155 continue;
9156 }
9157 /* first take care of any non-sub_map */
9158 /* entries to send */
0a7de745 9159 if (base_addr < entry->vme_start) {
1c79356b 9160 /* stuff to send */
5ba3f43e 9161 copy_size =
0a7de745 9162 entry->vme_start - base_addr;
1c79356b
A
9163 break;
9164 }
3e170ce0 9165 sub_start = VME_OFFSET(entry);
1c79356b 9166
0a7de745 9167 if (entry->vme_end < dst_end) {
1c79356b 9168 sub_end = entry->vme_end;
0a7de745 9169 } else {
1c79356b 9170 sub_end = dst_end;
0a7de745 9171 }
1c79356b 9172 sub_end -= entry->vme_start;
3e170ce0 9173 sub_end += VME_OFFSET(entry);
1c79356b
A
9174 local_end = entry->vme_end;
9175 vm_map_unlock(dst_map);
9176 copy_size = sub_end - sub_start;
9177
9178 /* adjust the copy object */
9179 if (total_size > copy_size) {
0a7de745
A
9180 vm_map_size_t local_size = 0;
9181 vm_map_size_t entry_size;
1c79356b 9182
2d21ac55
A
9183 nentries = 1;
9184 new_offset = copy->offset;
9185 copy_entry = vm_map_copy_first_entry(copy);
0a7de745
A
9186 while (copy_entry !=
9187 vm_map_copy_to_entry(copy)) {
5ba3f43e 9188 entry_size = copy_entry->vme_end -
0a7de745
A
9189 copy_entry->vme_start;
9190 if ((local_size < copy_size) &&
9191 ((local_size + entry_size)
2d21ac55 9192 >= copy_size)) {
5ba3f43e 9193 vm_map_copy_clip_end(copy,
0a7de745
A
9194 copy_entry,
9195 copy_entry->vme_start +
9196 (copy_size - local_size));
5ba3f43e 9197 entry_size = copy_entry->vme_end -
0a7de745 9198 copy_entry->vme_start;
2d21ac55
A
9199 local_size += entry_size;
9200 new_offset += entry_size;
9201 }
0a7de745 9202 if (local_size >= copy_size) {
2d21ac55 9203 next_copy = copy_entry->vme_next;
5ba3f43e 9204 copy_entry->vme_next =
0a7de745 9205 vm_map_copy_to_entry(copy);
5ba3f43e 9206 previous_prev =
0a7de745 9207 copy->cpy_hdr.links.prev;
2d21ac55
A
9208 copy->cpy_hdr.links.prev = copy_entry;
9209 copy->size = copy_size;
5ba3f43e 9210 remaining_entries =
0a7de745 9211 copy->cpy_hdr.nentries;
2d21ac55
A
9212 remaining_entries -= nentries;
9213 copy->cpy_hdr.nentries = nentries;
9214 break;
9215 } else {
9216 local_size += entry_size;
9217 new_offset += entry_size;
9218 nentries++;
9219 }
9220 copy_entry = copy_entry->vme_next;
9221 }
1c79356b 9222 }
5ba3f43e 9223
0a7de745 9224 if ((entry->use_pmap) && (pmap == NULL)) {
1c79356b 9225 kr = vm_map_copy_overwrite_nested(
3e170ce0 9226 VME_SUBMAP(entry),
1c79356b
A
9227 sub_start,
9228 copy,
5ba3f43e 9229 interruptible,
3e170ce0 9230 VME_SUBMAP(entry)->pmap,
6d2010ae 9231 TRUE);
1c79356b
A
9232 } else if (pmap != NULL) {
9233 kr = vm_map_copy_overwrite_nested(
3e170ce0 9234 VME_SUBMAP(entry),
1c79356b
A
9235 sub_start,
9236 copy,
6d2010ae
A
9237 interruptible, pmap,
9238 TRUE);
1c79356b
A
9239 } else {
9240 kr = vm_map_copy_overwrite_nested(
3e170ce0 9241 VME_SUBMAP(entry),
1c79356b
A
9242 sub_start,
9243 copy,
9244 interruptible,
6d2010ae
A
9245 dst_map->pmap,
9246 TRUE);
1c79356b 9247 }
0a7de745
A
9248 if (kr != KERN_SUCCESS) {
9249 if (next_copy != NULL) {
5ba3f43e 9250 copy->cpy_hdr.nentries +=
0a7de745 9251 remaining_entries;
5ba3f43e 9252 copy->cpy_hdr.links.prev->vme_next =
0a7de745 9253 next_copy;
5ba3f43e 9254 copy->cpy_hdr.links.prev
0a7de745 9255 = previous_prev;
2d21ac55 9256 copy->size = total_size;
1c79356b
A
9257 }
9258 return kr;
9259 }
9260 if (dst_end <= local_end) {
0a7de745 9261 return KERN_SUCCESS;
1c79356b
A
9262 }
9263 /* otherwise copy no longer exists, it was */
9264 /* destroyed after successful copy_overwrite */
d9a64523 9265 copy = vm_map_copy_allocate();
1c79356b
A
9266 copy->type = VM_MAP_COPY_ENTRY_LIST;
9267 copy->offset = new_offset;
f427ee49 9268 copy->cpy_hdr.page_shift = copy_page_shift;
1c79356b 9269
e2d2fc5c
A
9270 /*
9271 * XXX FBDP
9272 * this does not seem to deal with
9273 * the VM map store (R&B tree)
9274 */
9275
1c79356b
A
9276 total_size -= copy_size;
9277 copy_size = 0;
9278 /* put back remainder of copy in container */
0a7de745 9279 if (next_copy != NULL) {
2d21ac55
A
9280 copy->cpy_hdr.nentries = remaining_entries;
9281 copy->cpy_hdr.links.next = next_copy;
9282 copy->cpy_hdr.links.prev = previous_prev;
9283 copy->size = total_size;
5ba3f43e 9284 next_copy->vme_prev =
0a7de745 9285 vm_map_copy_to_entry(copy);
2d21ac55 9286 next_copy = NULL;
1c79356b
A
9287 }
9288 base_addr = local_end;
9289 vm_map_lock(dst_map);
0a7de745
A
9290 if (!vm_map_lookup_entry(dst_map,
9291 local_end, &tmp_entry)) {
1c79356b 9292 vm_map_unlock(dst_map);
0a7de745 9293 return KERN_INVALID_ADDRESS;
1c79356b
A
9294 }
9295 entry = tmp_entry;
9296 continue;
5ba3f43e 9297 }
1c79356b
A
9298 if (dst_end <= entry->vme_end) {
9299 copy_size = dst_end - base_addr;
9300 break;
9301 }
9302
9303 if ((next == vm_map_to_entry(dst_map)) ||
2d21ac55 9304 (next->vme_start != entry->vme_end)) {
1c79356b 9305 vm_map_unlock(dst_map);
0a7de745 9306 return KERN_INVALID_ADDRESS;
1c79356b
A
9307 }
9308
9309 entry = next;
9310 }/* for */
9311
9312 next_copy = NULL;
9313 nentries = 1;
9314
9315 /* adjust the copy object */
9316 if (total_size > copy_size) {
0a7de745
A
9317 vm_map_size_t local_size = 0;
9318 vm_map_size_t entry_size;
1c79356b
A
9319
9320 new_offset = copy->offset;
9321 copy_entry = vm_map_copy_first_entry(copy);
0a7de745 9322 while (copy_entry != vm_map_copy_to_entry(copy)) {
5ba3f43e 9323 entry_size = copy_entry->vme_end -
0a7de745
A
9324 copy_entry->vme_start;
9325 if ((local_size < copy_size) &&
9326 ((local_size + entry_size)
2d21ac55 9327 >= copy_size)) {
5ba3f43e 9328 vm_map_copy_clip_end(copy, copy_entry,
0a7de745
A
9329 copy_entry->vme_start +
9330 (copy_size - local_size));
5ba3f43e 9331 entry_size = copy_entry->vme_end -
0a7de745 9332 copy_entry->vme_start;
1c79356b
A
9333 local_size += entry_size;
9334 new_offset += entry_size;
9335 }
0a7de745 9336 if (local_size >= copy_size) {
1c79356b 9337 next_copy = copy_entry->vme_next;
5ba3f43e 9338 copy_entry->vme_next =
0a7de745 9339 vm_map_copy_to_entry(copy);
5ba3f43e 9340 previous_prev =
0a7de745 9341 copy->cpy_hdr.links.prev;
1c79356b
A
9342 copy->cpy_hdr.links.prev = copy_entry;
9343 copy->size = copy_size;
5ba3f43e 9344 remaining_entries =
0a7de745 9345 copy->cpy_hdr.nentries;
1c79356b
A
9346 remaining_entries -= nentries;
9347 copy->cpy_hdr.nentries = nentries;
9348 break;
9349 } else {
9350 local_size += entry_size;
9351 new_offset += entry_size;
9352 nentries++;
9353 }
9354 copy_entry = copy_entry->vme_next;
9355 }
9356 }
9357
9358 if (aligned) {
0a7de745 9359 pmap_t local_pmap;
1c79356b 9360
0a7de745 9361 if (pmap) {
1c79356b 9362 local_pmap = pmap;
0a7de745 9363 } else {
1c79356b 9364 local_pmap = dst_map->pmap;
0a7de745 9365 }
1c79356b 9366
5ba3f43e 9367 if ((kr = vm_map_copy_overwrite_aligned(
0a7de745
A
9368 dst_map, tmp_entry, copy,
9369 base_addr, local_pmap)) != KERN_SUCCESS) {
9370 if (next_copy != NULL) {
5ba3f43e 9371 copy->cpy_hdr.nentries +=
0a7de745
A
9372 remaining_entries;
9373 copy->cpy_hdr.links.prev->vme_next =
9374 next_copy;
9375 copy->cpy_hdr.links.prev =
9376 previous_prev;
1c79356b
A
9377 copy->size += copy_size;
9378 }
9379 return kr;
9380 }
9381 vm_map_unlock(dst_map);
9382 } else {
2d21ac55
A
9383 /*
9384 * Performance gain:
9385 *
9386 * if the copy and dst address are misaligned but the same
9387 * offset within the page we can copy_not_aligned the
9388 * misaligned parts and copy aligned the rest. If they are
9389 * aligned but len is unaligned we simply need to copy
9390 * the end bit unaligned. We'll need to split the misaligned
9391 * bits of the region in this case !
9392 */
9393 /* ALWAYS UNLOCKS THE dst_map MAP */
39236c6e
A
9394 kr = vm_map_copy_overwrite_unaligned(
9395 dst_map,
9396 tmp_entry,
9397 copy,
9398 base_addr,
9399 discard_on_success);
9400 if (kr != KERN_SUCCESS) {
0a7de745 9401 if (next_copy != NULL) {
1c79356b 9402 copy->cpy_hdr.nentries +=
0a7de745
A
9403 remaining_entries;
9404 copy->cpy_hdr.links.prev->vme_next =
9405 next_copy;
9406 copy->cpy_hdr.links.prev =
9407 previous_prev;
1c79356b
A
9408 copy->size += copy_size;
9409 }
9410 return kr;
9411 }
9412 }
9413 total_size -= copy_size;
0a7de745 9414 if (total_size == 0) {
1c79356b 9415 break;
0a7de745 9416 }
1c79356b
A
9417 base_addr += copy_size;
9418 copy_size = 0;
9419 copy->offset = new_offset;
0a7de745 9420 if (next_copy != NULL) {
1c79356b
A
9421 copy->cpy_hdr.nentries = remaining_entries;
9422 copy->cpy_hdr.links.next = next_copy;
9423 copy->cpy_hdr.links.prev = previous_prev;
9424 next_copy->vme_prev = vm_map_copy_to_entry(copy);
9425 copy->size = total_size;
9426 }
9427 vm_map_lock(dst_map);
0a7de745 9428 while (TRUE) {
5ba3f43e 9429 if (!vm_map_lookup_entry(dst_map,
0a7de745 9430 base_addr, &tmp_entry)) {
1c79356b 9431 vm_map_unlock(dst_map);
0a7de745 9432 return KERN_INVALID_ADDRESS;
1c79356b 9433 }
0a7de745
A
9434 if (tmp_entry->in_transition) {
9435 entry->needs_wakeup = TRUE;
9436 vm_map_entry_wait(dst_map, THREAD_UNINT);
1c79356b
A
9437 } else {
9438 break;
9439 }
9440 }
39236c6e 9441 vm_map_clip_start(dst_map,
0a7de745
A
9442 tmp_entry,
9443 vm_map_trunc_page(base_addr,
9444 VM_MAP_PAGE_MASK(dst_map)));
1c79356b
A
9445
9446 entry = tmp_entry;
9447 } /* while */
9448
9449 /*
9450 * Throw away the vm_map_copy object
9451 */
0a7de745 9452 if (discard_on_success) {
6d2010ae 9453 vm_map_copy_discard(copy);
0a7de745 9454 }
1c79356b 9455
0a7de745 9456 return KERN_SUCCESS;
1c79356b
A
9457}/* vm_map_copy_overwrite */
9458
9459kern_return_t
9460vm_map_copy_overwrite(
0a7de745
A
9461 vm_map_t dst_map,
9462 vm_map_offset_t dst_addr,
9463 vm_map_copy_t copy,
eb6b6ca3 9464 vm_map_size_t copy_size,
0a7de745 9465 boolean_t interruptible)
1c79356b 9466{
0a7de745
A
9467 vm_map_size_t head_size, tail_size;
9468 vm_map_copy_t head_copy, tail_copy;
9469 vm_map_offset_t head_addr, tail_addr;
9470 vm_map_entry_t entry;
9471 kern_return_t kr;
9472 vm_map_offset_t effective_page_mask, effective_page_size;
f427ee49 9473 int copy_page_shift;
6d2010ae
A
9474
9475 head_size = 0;
9476 tail_size = 0;
9477 head_copy = NULL;
9478 tail_copy = NULL;
9479 head_addr = 0;
9480 tail_addr = 0;
9481
9482 if (interruptible ||
9483 copy == VM_MAP_COPY_NULL ||
9484 copy->type != VM_MAP_COPY_ENTRY_LIST) {
9485 /*
9486 * We can't split the "copy" map if we're interruptible
9487 * or if we don't have a "copy" map...
9488 */
0a7de745 9489blunt_copy:
6d2010ae 9490 return vm_map_copy_overwrite_nested(dst_map,
0a7de745
A
9491 dst_addr,
9492 copy,
9493 interruptible,
9494 (pmap_t) NULL,
9495 TRUE);
6d2010ae
A
9496 }
9497
f427ee49
A
9498 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy);
9499 if (copy_page_shift < PAGE_SHIFT ||
9500 VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9501 goto blunt_copy;
9502 }
9503
9504 if (VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9505 effective_page_mask = VM_MAP_PAGE_MASK(dst_map);
9506 } else {
9507 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9508 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9509 effective_page_mask);
9510 }
5ba3f43e
A
9511 effective_page_size = effective_page_mask + 1;
9512
eb6b6ca3 9513 if (copy_size < VM_MAP_COPY_OVERWRITE_OPTIMIZATION_THRESHOLD_PAGES * effective_page_size) {
6d2010ae
A
9514 /*
9515 * Too small to bother with optimizing...
9516 */
9517 goto blunt_copy;
9518 }
9519
5ba3f43e
A
9520 if ((dst_addr & effective_page_mask) !=
9521 (copy->offset & effective_page_mask)) {
6d2010ae
A
9522 /*
9523 * Incompatible mis-alignment of source and destination...
9524 */
9525 goto blunt_copy;
9526 }
9527
9528 /*
9529 * Proper alignment or identical mis-alignment at the beginning.
9530 * Let's try and do a small unaligned copy first (if needed)
9531 * and then an aligned copy for the rest.
9532 */
5ba3f43e 9533 if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
6d2010ae 9534 head_addr = dst_addr;
5ba3f43e 9535 head_size = (effective_page_size -
0a7de745 9536 (copy->offset & effective_page_mask));
eb6b6ca3 9537 head_size = MIN(head_size, copy_size);
6d2010ae 9538 }
eb6b6ca3 9539 if (!vm_map_page_aligned(copy->offset + copy_size,
0a7de745 9540 effective_page_mask)) {
6d2010ae
A
9541 /*
9542 * Mis-alignment at the end.
9543 * Do an aligned copy up to the last page and
9544 * then an unaligned copy for the remaining bytes.
9545 */
eb6b6ca3 9546 tail_size = ((copy->offset + copy_size) &
0a7de745 9547 effective_page_mask);
eb6b6ca3
A
9548 tail_size = MIN(tail_size, copy_size);
9549 tail_addr = dst_addr + copy_size - tail_size;
5ba3f43e 9550 assert(tail_addr >= head_addr + head_size);
6d2010ae 9551 }
eb6b6ca3 9552 assert(head_size + tail_size <= copy_size);
6d2010ae 9553
eb6b6ca3 9554 if (head_size + tail_size == copy_size) {
6d2010ae
A
9555 /*
9556 * It's all unaligned, no optimization possible...
9557 */
9558 goto blunt_copy;
9559 }
9560
9561 /*
9562 * Can't optimize if there are any submaps in the
9563 * destination due to the way we free the "copy" map
9564 * progressively in vm_map_copy_overwrite_nested()
9565 * in that case.
9566 */
9567 vm_map_lock_read(dst_map);
0a7de745 9568 if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
6d2010ae
A
9569 vm_map_unlock_read(dst_map);
9570 goto blunt_copy;
9571 }
9572 for (;
0a7de745 9573 (entry != vm_map_copy_to_entry(copy) &&
eb6b6ca3 9574 entry->vme_start < dst_addr + copy_size);
0a7de745 9575 entry = entry->vme_next) {
6d2010ae
A
9576 if (entry->is_sub_map) {
9577 vm_map_unlock_read(dst_map);
9578 goto blunt_copy;
9579 }
9580 }
9581 vm_map_unlock_read(dst_map);
9582
9583 if (head_size) {
9584 /*
9585 * Unaligned copy of the first "head_size" bytes, to reach
9586 * a page boundary.
9587 */
5ba3f43e 9588
6d2010ae
A
9589 /*
9590 * Extract "head_copy" out of "copy".
9591 */
d9a64523 9592 head_copy = vm_map_copy_allocate();
6d2010ae 9593 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
6d2010ae 9594 head_copy->cpy_hdr.entries_pageable =
0a7de745 9595 copy->cpy_hdr.entries_pageable;
6d2010ae 9596 vm_map_store_init(&head_copy->cpy_hdr);
f427ee49 9597 head_copy->cpy_hdr.page_shift = copy_page_shift;
6d2010ae 9598
5ba3f43e
A
9599 entry = vm_map_copy_first_entry(copy);
9600 if (entry->vme_end < copy->offset + head_size) {
9601 head_size = entry->vme_end - copy->offset;
9602 }
9603
6d2010ae
A
9604 head_copy->offset = copy->offset;
9605 head_copy->size = head_size;
6d2010ae
A
9606 copy->offset += head_size;
9607 copy->size -= head_size;
eb6b6ca3
A
9608 copy_size -= head_size;
9609 assert(copy_size > 0);
6d2010ae 9610
6d2010ae
A
9611 vm_map_copy_clip_end(copy, entry, copy->offset);
9612 vm_map_copy_entry_unlink(copy, entry);
9613 vm_map_copy_entry_link(head_copy,
0a7de745
A
9614 vm_map_copy_to_entry(head_copy),
9615 entry);
6d2010ae
A
9616
9617 /*
9618 * Do the unaligned copy.
9619 */
9620 kr = vm_map_copy_overwrite_nested(dst_map,
0a7de745
A
9621 head_addr,
9622 head_copy,
9623 interruptible,
9624 (pmap_t) NULL,
9625 FALSE);
9626 if (kr != KERN_SUCCESS) {
6d2010ae 9627 goto done;
0a7de745 9628 }
6d2010ae
A
9629 }
9630
9631 if (tail_size) {
9632 /*
9633 * Extract "tail_copy" out of "copy".
9634 */
d9a64523 9635 tail_copy = vm_map_copy_allocate();
6d2010ae 9636 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
6d2010ae 9637 tail_copy->cpy_hdr.entries_pageable =
0a7de745 9638 copy->cpy_hdr.entries_pageable;
6d2010ae 9639 vm_map_store_init(&tail_copy->cpy_hdr);
f427ee49 9640 tail_copy->cpy_hdr.page_shift = copy_page_shift;
6d2010ae 9641
eb6b6ca3 9642 tail_copy->offset = copy->offset + copy_size - tail_size;
6d2010ae
A
9643 tail_copy->size = tail_size;
9644
9645 copy->size -= tail_size;
eb6b6ca3
A
9646 copy_size -= tail_size;
9647 assert(copy_size > 0);
6d2010ae
A
9648
9649 entry = vm_map_copy_last_entry(copy);
9650 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9651 entry = vm_map_copy_last_entry(copy);
9652 vm_map_copy_entry_unlink(copy, entry);
9653 vm_map_copy_entry_link(tail_copy,
0a7de745
A
9654 vm_map_copy_last_entry(tail_copy),
9655 entry);
6d2010ae
A
9656 }
9657
eb6b6ca3
A
9658 /*
9659 * If we are here from ipc_kmsg_copyout_ool_descriptor(),
9660 * we want to avoid TOCTOU issues w.r.t copy->size but
9661 * we don't need to change vm_map_copy_overwrite_nested()
9662 * and all other vm_map_copy_overwrite variants.
9663 *
9664 * So we assign the original copy_size that was passed into
9665 * this routine back to copy.
9666 *
9667 * This use of local 'copy_size' passed into this routine is
9668 * to try and protect against TOCTOU attacks where the kernel
9669 * has been exploited. We don't expect this to be an issue
9670 * during normal system operation.
9671 */
9672 assertf(copy->size == copy_size,
9673 "Mismatch of copy sizes. Expected 0x%llx, Got 0x%llx\n", (uint64_t) copy_size, (uint64_t) copy->size);
9674 copy->size = copy_size;
9675
6d2010ae
A
9676 /*
9677 * Copy most (or possibly all) of the data.
9678 */
9679 kr = vm_map_copy_overwrite_nested(dst_map,
0a7de745
A
9680 dst_addr + head_size,
9681 copy,
9682 interruptible,
9683 (pmap_t) NULL,
9684 FALSE);
6d2010ae
A
9685 if (kr != KERN_SUCCESS) {
9686 goto done;
9687 }
9688
9689 if (tail_size) {
9690 kr = vm_map_copy_overwrite_nested(dst_map,
0a7de745
A
9691 tail_addr,
9692 tail_copy,
9693 interruptible,
9694 (pmap_t) NULL,
9695 FALSE);
6d2010ae
A
9696 }
9697
9698done:
9699 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9700 if (kr == KERN_SUCCESS) {
9701 /*
9702 * Discard all the copy maps.
9703 */
9704 if (head_copy) {
9705 vm_map_copy_discard(head_copy);
9706 head_copy = NULL;
9707 }
9708 vm_map_copy_discard(copy);
9709 if (tail_copy) {
9710 vm_map_copy_discard(tail_copy);
9711 tail_copy = NULL;
9712 }
9713 } else {
9714 /*
9715 * Re-assemble the original copy map.
9716 */
9717 if (head_copy) {
9718 entry = vm_map_copy_first_entry(head_copy);
9719 vm_map_copy_entry_unlink(head_copy, entry);
9720 vm_map_copy_entry_link(copy,
0a7de745
A
9721 vm_map_copy_to_entry(copy),
9722 entry);
6d2010ae
A
9723 copy->offset -= head_size;
9724 copy->size += head_size;
9725 vm_map_copy_discard(head_copy);
9726 head_copy = NULL;
9727 }
9728 if (tail_copy) {
9729 entry = vm_map_copy_last_entry(tail_copy);
9730 vm_map_copy_entry_unlink(tail_copy, entry);
9731 vm_map_copy_entry_link(copy,
0a7de745
A
9732 vm_map_copy_last_entry(copy),
9733 entry);
6d2010ae
A
9734 copy->size += tail_size;
9735 vm_map_copy_discard(tail_copy);
9736 tail_copy = NULL;
9737 }
9738 }
9739 return kr;
1c79356b
A
9740}
9741
9742
9743/*
91447636 9744 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
1c79356b
A
9745 *
9746 * Decription:
9747 * Physically copy unaligned data
9748 *
9749 * Implementation:
9750 * Unaligned parts of pages have to be physically copied. We use
9751 * a modified form of vm_fault_copy (which understands none-aligned
9752 * page offsets and sizes) to do the copy. We attempt to copy as
9753 * much memory in one go as possibly, however vm_fault_copy copies
9754 * within 1 memory object so we have to find the smaller of "amount left"
9755 * "source object data size" and "target object data size". With
9756 * unaligned data we don't need to split regions, therefore the source
9757 * (copy) object should be one map entry, the target range may be split
9758 * over multiple map entries however. In any event we are pessimistic
9759 * about these assumptions.
9760 *
9761 * Assumptions:
9762 * dst_map is locked on entry and is return locked on success,
9763 * unlocked on error.
9764 */
9765
91447636 9766static kern_return_t
1c79356b 9767vm_map_copy_overwrite_unaligned(
0a7de745
A
9768 vm_map_t dst_map,
9769 vm_map_entry_t entry,
9770 vm_map_copy_t copy,
9771 vm_map_offset_t start,
9772 boolean_t discard_on_success)
1c79356b 9773{
0a7de745
A
9774 vm_map_entry_t copy_entry;
9775 vm_map_entry_t copy_entry_next;
9776 vm_map_version_t version;
9777 vm_object_t dst_object;
9778 vm_object_offset_t dst_offset;
9779 vm_object_offset_t src_offset;
9780 vm_object_offset_t entry_offset;
9781 vm_map_offset_t entry_end;
9782 vm_map_size_t src_size,
9783 dst_size,
9784 copy_size,
9785 amount_left;
9786 kern_return_t kr = KERN_SUCCESS;
1c79356b 9787
5ba3f43e 9788
39236c6e
A
9789 copy_entry = vm_map_copy_first_entry(copy);
9790
1c79356b
A
9791 vm_map_lock_write_to_read(dst_map);
9792
f427ee49 9793 src_offset = copy->offset - trunc_page_mask_64(copy->offset, VM_MAP_COPY_PAGE_MASK(copy));
1c79356b
A
9794 amount_left = copy->size;
9795/*
9796 * unaligned so we never clipped this entry, we need the offset into
9797 * the vm_object not just the data.
5ba3f43e 9798 */
1c79356b 9799 while (amount_left > 0) {
1c79356b
A
9800 if (entry == vm_map_to_entry(dst_map)) {
9801 vm_map_unlock_read(dst_map);
9802 return KERN_INVALID_ADDRESS;
9803 }
9804
9805 /* "start" must be within the current map entry */
0a7de745 9806 assert((start >= entry->vme_start) && (start < entry->vme_end));
1c79356b
A
9807
9808 dst_offset = start - entry->vme_start;
9809
9810 dst_size = entry->vme_end - start;
9811
9812 src_size = copy_entry->vme_end -
0a7de745 9813 (copy_entry->vme_start + src_offset);
1c79356b
A
9814
9815 if (dst_size < src_size) {
9816/*
9817 * we can only copy dst_size bytes before
9818 * we have to get the next destination entry
9819 */
9820 copy_size = dst_size;
9821 } else {
9822/*
9823 * we can only copy src_size bytes before
9824 * we have to get the next source copy entry
9825 */
9826 copy_size = src_size;
9827 }
9828
9829 if (copy_size > amount_left) {
9830 copy_size = amount_left;
9831 }
9832/*
9833 * Entry needs copy, create a shadow shadow object for
9834 * Copy on write region.
9835 */
9836 if (entry->needs_copy &&
0a7de745 9837 ((entry->protection & VM_PROT_WRITE) != 0)) {
1c79356b
A
9838 if (vm_map_lock_read_to_write(dst_map)) {
9839 vm_map_lock_read(dst_map);
9840 goto RetryLookup;
9841 }
3e170ce0 9842 VME_OBJECT_SHADOW(entry,
0a7de745
A
9843 (vm_map_size_t)(entry->vme_end
9844 - entry->vme_start));
1c79356b
A
9845 entry->needs_copy = FALSE;
9846 vm_map_lock_write_to_read(dst_map);
9847 }
3e170ce0 9848 dst_object = VME_OBJECT(entry);
1c79356b
A
9849/*
9850 * unlike with the virtual (aligned) copy we're going
9851 * to fault on it therefore we need a target object.
9852 */
0a7de745 9853 if (dst_object == VM_OBJECT_NULL) {
1c79356b
A
9854 if (vm_map_lock_read_to_write(dst_map)) {
9855 vm_map_lock_read(dst_map);
9856 goto RetryLookup;
9857 }
91447636 9858 dst_object = vm_object_allocate((vm_map_size_t)
0a7de745 9859 entry->vme_end - entry->vme_start);
cb323159 9860 VME_OBJECT_SET(entry, dst_object);
3e170ce0 9861 VME_OFFSET_SET(entry, 0);
fe8ab488 9862 assert(entry->use_pmap);
1c79356b
A
9863 vm_map_lock_write_to_read(dst_map);
9864 }
9865/*
9866 * Take an object reference and unlock map. The "entry" may
9867 * disappear or change when the map is unlocked.
9868 */
9869 vm_object_reference(dst_object);
9870 version.main_timestamp = dst_map->timestamp;
3e170ce0 9871 entry_offset = VME_OFFSET(entry);
1c79356b
A
9872 entry_end = entry->vme_end;
9873 vm_map_unlock_read(dst_map);
9874/*
9875 * Copy as much as possible in one pass
9876 */
9877 kr = vm_fault_copy(
3e170ce0
A
9878 VME_OBJECT(copy_entry),
9879 VME_OFFSET(copy_entry) + src_offset,
1c79356b
A
9880 &copy_size,
9881 dst_object,
9882 entry_offset + dst_offset,
9883 dst_map,
9884 &version,
9885 THREAD_UNINT );
9886
9887 start += copy_size;
9888 src_offset += copy_size;
9889 amount_left -= copy_size;
9890/*
9891 * Release the object reference
9892 */
9893 vm_object_deallocate(dst_object);
9894/*
9895 * If a hard error occurred, return it now
9896 */
0a7de745 9897 if (kr != KERN_SUCCESS) {
1c79356b 9898 return kr;
0a7de745 9899 }
1c79356b
A
9900
9901 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
0a7de745 9902 || amount_left == 0) {
1c79356b
A
9903/*
9904 * all done with this copy entry, dispose.
9905 */
39236c6e
A
9906 copy_entry_next = copy_entry->vme_next;
9907
9908 if (discard_on_success) {
9909 vm_map_copy_entry_unlink(copy, copy_entry);
9910 assert(!copy_entry->is_sub_map);
3e170ce0 9911 vm_object_deallocate(VME_OBJECT(copy_entry));
39236c6e
A
9912 vm_map_copy_entry_dispose(copy, copy_entry);
9913 }
1c79356b 9914
39236c6e
A
9915 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
9916 amount_left) {
1c79356b
A
9917/*
9918 * not finished copying but run out of source
9919 */
9920 return KERN_INVALID_ADDRESS;
9921 }
39236c6e
A
9922
9923 copy_entry = copy_entry_next;
9924
1c79356b
A
9925 src_offset = 0;
9926 }
9927
0a7de745 9928 if (amount_left == 0) {
1c79356b 9929 return KERN_SUCCESS;
0a7de745 9930 }
1c79356b
A
9931
9932 vm_map_lock_read(dst_map);
9933 if (version.main_timestamp == dst_map->timestamp) {
9934 if (start == entry_end) {
9935/*
9936 * destination region is split. Use the version
9937 * information to avoid a lookup in the normal
9938 * case.
9939 */
9940 entry = entry->vme_next;
9941/*
9942 * should be contiguous. Fail if we encounter
9943 * a hole in the destination.
9944 */
9945 if (start != entry->vme_start) {
9946 vm_map_unlock_read(dst_map);
0a7de745 9947 return KERN_INVALID_ADDRESS;
1c79356b
A
9948 }
9949 }
9950 } else {
9951/*
9952 * Map version check failed.
9953 * we must lookup the entry because somebody
9954 * might have changed the map behind our backs.
9955 */
0a7de745
A
9956RetryLookup:
9957 if (!vm_map_lookup_entry(dst_map, start, &entry)) {
1c79356b 9958 vm_map_unlock_read(dst_map);
0a7de745 9959 return KERN_INVALID_ADDRESS;
1c79356b
A
9960 }
9961 }
9962 }/* while */
9963
1c79356b
A
9964 return KERN_SUCCESS;
9965}/* vm_map_copy_overwrite_unaligned */
9966
9967/*
91447636 9968 * Routine: vm_map_copy_overwrite_aligned [internal use only]
1c79356b
A
9969 *
9970 * Description:
9971 * Does all the vm_trickery possible for whole pages.
9972 *
9973 * Implementation:
9974 *
9975 * If there are no permanent objects in the destination,
9976 * and the source and destination map entry zones match,
9977 * and the destination map entry is not shared,
9978 * then the map entries can be deleted and replaced
9979 * with those from the copy. The following code is the
9980 * basic idea of what to do, but there are lots of annoying
9981 * little details about getting protection and inheritance
9982 * right. Should add protection, inheritance, and sharing checks
9983 * to the above pass and make sure that no wiring is involved.
9984 */
9985
e2d2fc5c
A
9986int vm_map_copy_overwrite_aligned_src_not_internal = 0;
9987int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
9988int vm_map_copy_overwrite_aligned_src_large = 0;
9989
91447636 9990static kern_return_t
1c79356b 9991vm_map_copy_overwrite_aligned(
0a7de745
A
9992 vm_map_t dst_map,
9993 vm_map_entry_t tmp_entry,
9994 vm_map_copy_t copy,
9995 vm_map_offset_t start,
9996 __unused pmap_t pmap)
1c79356b 9997{
0a7de745
A
9998 vm_object_t object;
9999 vm_map_entry_t copy_entry;
10000 vm_map_size_t copy_size;
10001 vm_map_size_t size;
10002 vm_map_entry_t entry;
5ba3f43e 10003
1c79356b 10004 while ((copy_entry = vm_map_copy_first_entry(copy))
0a7de745 10005 != vm_map_copy_to_entry(copy)) {
1c79356b 10006 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
5ba3f43e 10007
1c79356b 10008 entry = tmp_entry;
fe8ab488
A
10009 if (entry->is_sub_map) {
10010 /* unnested when clipped earlier */
10011 assert(!entry->use_pmap);
10012 }
1c79356b
A
10013 if (entry == vm_map_to_entry(dst_map)) {
10014 vm_map_unlock(dst_map);
10015 return KERN_INVALID_ADDRESS;
10016 }
10017 size = (entry->vme_end - entry->vme_start);
10018 /*
10019 * Make sure that no holes popped up in the
10020 * address map, and that the protection is
10021 * still valid, in case the map was unlocked
10022 * earlier.
10023 */
10024
10025 if ((entry->vme_start != start) || ((entry->is_sub_map)
0a7de745 10026 && !entry->needs_copy)) {
1c79356b 10027 vm_map_unlock(dst_map);
0a7de745 10028 return KERN_INVALID_ADDRESS;
1c79356b
A
10029 }
10030 assert(entry != vm_map_to_entry(dst_map));
10031
10032 /*
10033 * Check protection again
10034 */
10035
0a7de745 10036 if (!(entry->protection & VM_PROT_WRITE)) {
1c79356b 10037 vm_map_unlock(dst_map);
0a7de745 10038 return KERN_PROTECTION_FAILURE;
1c79356b
A
10039 }
10040
10041 /*
10042 * Adjust to source size first
10043 */
10044
10045 if (copy_size < size) {
fe8ab488
A
10046 if (entry->map_aligned &&
10047 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
0a7de745 10048 VM_MAP_PAGE_MASK(dst_map))) {
fe8ab488
A
10049 /* no longer map-aligned */
10050 entry->map_aligned = FALSE;
10051 }
1c79356b
A
10052 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
10053 size = copy_size;
10054 }
10055
10056 /*
10057 * Adjust to destination size
10058 */
10059
10060 if (size < copy_size) {
10061 vm_map_copy_clip_end(copy, copy_entry,
0a7de745 10062 copy_entry->vme_start + size);
1c79356b
A
10063 copy_size = size;
10064 }
10065
10066 assert((entry->vme_end - entry->vme_start) == size);
10067 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
10068 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
10069
10070 /*
10071 * If the destination contains temporary unshared memory,
10072 * we can perform the copy by throwing it away and
10073 * installing the source data.
10074 */
10075
3e170ce0 10076 object = VME_OBJECT(entry);
5ba3f43e 10077 if ((!entry->is_shared &&
0a7de745
A
10078 ((object == VM_OBJECT_NULL) ||
10079 (object->internal && !object->true_share))) ||
1c79356b 10080 entry->needs_copy) {
0a7de745
A
10081 vm_object_t old_object = VME_OBJECT(entry);
10082 vm_object_offset_t old_offset = VME_OFFSET(entry);
10083 vm_object_offset_t offset;
1c79356b
A
10084
10085 /*
10086 * Ensure that the source and destination aren't
10087 * identical
10088 */
3e170ce0
A
10089 if (old_object == VME_OBJECT(copy_entry) &&
10090 old_offset == VME_OFFSET(copy_entry)) {
1c79356b
A
10091 vm_map_copy_entry_unlink(copy, copy_entry);
10092 vm_map_copy_entry_dispose(copy, copy_entry);
10093
0a7de745 10094 if (old_object != VM_OBJECT_NULL) {
1c79356b 10095 vm_object_deallocate(old_object);
0a7de745 10096 }
1c79356b
A
10097
10098 start = tmp_entry->vme_end;
10099 tmp_entry = tmp_entry->vme_next;
10100 continue;
10101 }
10102
f427ee49 10103#if XNU_TARGET_OS_OSX
0a7de745
A
10104#define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
10105#define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
3e170ce0
A
10106 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
10107 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
e2d2fc5c
A
10108 copy_size <= __TRADEOFF1_COPY_SIZE) {
10109 /*
10110 * Virtual vs. Physical copy tradeoff #1.
10111 *
10112 * Copying only a few pages out of a large
10113 * object: do a physical copy instead of
10114 * a virtual copy, to avoid possibly keeping
10115 * the entire large object alive because of
10116 * those few copy-on-write pages.
10117 */
10118 vm_map_copy_overwrite_aligned_src_large++;
10119 goto slow_copy;
10120 }
f427ee49 10121#endif /* XNU_TARGET_OS_OSX */
e2d2fc5c 10122
3e170ce0
A
10123 if ((dst_map->pmap != kernel_pmap) &&
10124 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
0a7de745 10125 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
ebb1b9f4
A
10126 vm_object_t new_object, new_shadow;
10127
10128 /*
10129 * We're about to map something over a mapping
10130 * established by malloc()...
10131 */
3e170ce0 10132 new_object = VME_OBJECT(copy_entry);
ebb1b9f4
A
10133 if (new_object != VM_OBJECT_NULL) {
10134 vm_object_lock_shared(new_object);
10135 }
10136 while (new_object != VM_OBJECT_NULL &&
f427ee49 10137#if XNU_TARGET_OS_OSX
0a7de745
A
10138 !new_object->true_share &&
10139 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
f427ee49 10140#endif /* XNU_TARGET_OS_OSX */
0a7de745 10141 new_object->internal) {
ebb1b9f4
A
10142 new_shadow = new_object->shadow;
10143 if (new_shadow == VM_OBJECT_NULL) {
10144 break;
10145 }
10146 vm_object_lock_shared(new_shadow);
10147 vm_object_unlock(new_object);
10148 new_object = new_shadow;
10149 }
10150 if (new_object != VM_OBJECT_NULL) {
10151 if (!new_object->internal) {
10152 /*
10153 * The new mapping is backed
10154 * by an external object. We
10155 * don't want malloc'ed memory
10156 * to be replaced with such a
10157 * non-anonymous mapping, so
10158 * let's go off the optimized
10159 * path...
10160 */
e2d2fc5c 10161 vm_map_copy_overwrite_aligned_src_not_internal++;
ebb1b9f4
A
10162 vm_object_unlock(new_object);
10163 goto slow_copy;
10164 }
f427ee49 10165#if XNU_TARGET_OS_OSX
e2d2fc5c
A
10166 if (new_object->true_share ||
10167 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
10168 /*
10169 * Same if there's a "true_share"
10170 * object in the shadow chain, or
10171 * an object with a non-default
10172 * (SYMMETRIC) copy strategy.
10173 */
10174 vm_map_copy_overwrite_aligned_src_not_symmetric++;
10175 vm_object_unlock(new_object);
10176 goto slow_copy;
10177 }
f427ee49 10178#endif /* XNU_TARGET_OS_OSX */
ebb1b9f4
A
10179 vm_object_unlock(new_object);
10180 }
10181 /*
10182 * The new mapping is still backed by
10183 * anonymous (internal) memory, so it's
10184 * OK to substitute it for the original
10185 * malloc() mapping.
10186 */
10187 }
10188
1c79356b 10189 if (old_object != VM_OBJECT_NULL) {
0a7de745
A
10190 if (entry->is_sub_map) {
10191 if (entry->use_pmap) {
0c530ab8 10192#ifndef NO_NESTED_PMAP
5ba3f43e 10193 pmap_unnest(dst_map->pmap,
0a7de745
A
10194 (addr64_t)entry->vme_start,
10195 entry->vme_end - entry->vme_start);
10196#endif /* NO_NESTED_PMAP */
10197 if (dst_map->mapped_in_other_pmaps) {
9bccf70c
A
10198 /* clean up parent */
10199 /* map/maps */
2d21ac55
A
10200 vm_map_submap_pmap_clean(
10201 dst_map, entry->vme_start,
10202 entry->vme_end,
3e170ce0
A
10203 VME_SUBMAP(entry),
10204 VME_OFFSET(entry));
9bccf70c
A
10205 }
10206 } else {
10207 vm_map_submap_pmap_clean(
5ba3f43e 10208 dst_map, entry->vme_start,
9bccf70c 10209 entry->vme_end,
3e170ce0
A
10210 VME_SUBMAP(entry),
10211 VME_OFFSET(entry));
9bccf70c 10212 }
0a7de745
A
10213 vm_map_deallocate(VME_SUBMAP(entry));
10214 } else {
10215 if (dst_map->mapped_in_other_pmaps) {
39236c6e 10216 vm_object_pmap_protect_options(
3e170ce0
A
10217 VME_OBJECT(entry),
10218 VME_OFFSET(entry),
5ba3f43e 10219 entry->vme_end
2d21ac55 10220 - entry->vme_start,
9bccf70c 10221 PMAP_NULL,
f427ee49 10222 PAGE_SIZE,
9bccf70c 10223 entry->vme_start,
39236c6e
A
10224 VM_PROT_NONE,
10225 PMAP_OPTIONS_REMOVE);
9bccf70c 10226 } else {
39236c6e 10227 pmap_remove_options(
5ba3f43e
A
10228 dst_map->pmap,
10229 (addr64_t)(entry->vme_start),
39236c6e
A
10230 (addr64_t)(entry->vme_end),
10231 PMAP_OPTIONS_REMOVE);
9bccf70c 10232 }
1c79356b 10233 vm_object_deallocate(old_object);
0a7de745 10234 }
1c79356b
A
10235 }
10236
a39ff7e2
A
10237 if (entry->iokit_acct) {
10238 /* keep using iokit accounting */
10239 entry->use_pmap = FALSE;
10240 } else {
10241 /* use pmap accounting */
10242 entry->use_pmap = TRUE;
10243 }
1c79356b 10244 entry->is_sub_map = FALSE;
3e170ce0
A
10245 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
10246 object = VME_OBJECT(entry);
1c79356b
A
10247 entry->needs_copy = copy_entry->needs_copy;
10248 entry->wired_count = 0;
10249 entry->user_wired_count = 0;
3e170ce0 10250 offset = VME_OFFSET(copy_entry);
5ba3f43e 10251 VME_OFFSET_SET(entry, offset);
1c79356b
A
10252
10253 vm_map_copy_entry_unlink(copy, copy_entry);
10254 vm_map_copy_entry_dispose(copy, copy_entry);
2d21ac55 10255
1c79356b 10256 /*
2d21ac55 10257 * we could try to push pages into the pmap at this point, BUT
1c79356b
A
10258 * this optimization only saved on average 2 us per page if ALL
10259 * the pages in the source were currently mapped
10260 * and ALL the pages in the dest were touched, if there were fewer
10261 * than 2/3 of the pages touched, this optimization actually cost more cycles
2d21ac55 10262 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
1c79356b
A
10263 */
10264
1c79356b
A
10265 /*
10266 * Set up for the next iteration. The map
10267 * has not been unlocked, so the next
10268 * address should be at the end of this
10269 * entry, and the next map entry should be
10270 * the one following it.
10271 */
10272
10273 start = tmp_entry->vme_end;
10274 tmp_entry = tmp_entry->vme_next;
10275 } else {
0a7de745
A
10276 vm_map_version_t version;
10277 vm_object_t dst_object;
10278 vm_object_offset_t dst_offset;
10279 kern_return_t r;
1c79356b 10280
0a7de745 10281slow_copy:
e2d2fc5c 10282 if (entry->needs_copy) {
3e170ce0 10283 VME_OBJECT_SHADOW(entry,
0a7de745
A
10284 (entry->vme_end -
10285 entry->vme_start));
e2d2fc5c
A
10286 entry->needs_copy = FALSE;
10287 }
10288
3e170ce0
A
10289 dst_object = VME_OBJECT(entry);
10290 dst_offset = VME_OFFSET(entry);
ebb1b9f4 10291
1c79356b
A
10292 /*
10293 * Take an object reference, and record
10294 * the map version information so that the
10295 * map can be safely unlocked.
10296 */
10297
ebb1b9f4
A
10298 if (dst_object == VM_OBJECT_NULL) {
10299 /*
10300 * We would usually have just taken the
10301 * optimized path above if the destination
10302 * object has not been allocated yet. But we
10303 * now disable that optimization if the copy
10304 * entry's object is not backed by anonymous
10305 * memory to avoid replacing malloc'ed
10306 * (i.e. re-usable) anonymous memory with a
10307 * not-so-anonymous mapping.
10308 * So we have to handle this case here and
10309 * allocate a new VM object for this map entry.
10310 */
10311 dst_object = vm_object_allocate(
10312 entry->vme_end - entry->vme_start);
10313 dst_offset = 0;
3e170ce0
A
10314 VME_OBJECT_SET(entry, dst_object);
10315 VME_OFFSET_SET(entry, dst_offset);
fe8ab488 10316 assert(entry->use_pmap);
ebb1b9f4
A
10317 }
10318
1c79356b
A
10319 vm_object_reference(dst_object);
10320
9bccf70c
A
10321 /* account for unlock bumping up timestamp */
10322 version.main_timestamp = dst_map->timestamp + 1;
1c79356b
A
10323
10324 vm_map_unlock(dst_map);
10325
10326 /*
10327 * Copy as much as possible in one pass
10328 */
10329
10330 copy_size = size;
10331 r = vm_fault_copy(
3e170ce0
A
10332 VME_OBJECT(copy_entry),
10333 VME_OFFSET(copy_entry),
2d21ac55
A
10334 &copy_size,
10335 dst_object,
10336 dst_offset,
10337 dst_map,
10338 &version,
10339 THREAD_UNINT );
1c79356b
A
10340
10341 /*
10342 * Release the object reference
10343 */
10344
10345 vm_object_deallocate(dst_object);
10346
10347 /*
10348 * If a hard error occurred, return it now
10349 */
10350
0a7de745
A
10351 if (r != KERN_SUCCESS) {
10352 return r;
10353 }
1c79356b
A
10354
10355 if (copy_size != 0) {
10356 /*
10357 * Dispose of the copied region
10358 */
10359
10360 vm_map_copy_clip_end(copy, copy_entry,
0a7de745 10361 copy_entry->vme_start + copy_size);
1c79356b 10362 vm_map_copy_entry_unlink(copy, copy_entry);
3e170ce0 10363 vm_object_deallocate(VME_OBJECT(copy_entry));
1c79356b
A
10364 vm_map_copy_entry_dispose(copy, copy_entry);
10365 }
10366
10367 /*
10368 * Pick up in the destination map where we left off.
10369 *
10370 * Use the version information to avoid a lookup
10371 * in the normal case.
10372 */
10373
10374 start += copy_size;
10375 vm_map_lock(dst_map);
e2d2fc5c
A
10376 if (version.main_timestamp == dst_map->timestamp &&
10377 copy_size != 0) {
1c79356b
A
10378 /* We can safely use saved tmp_entry value */
10379
fe8ab488
A
10380 if (tmp_entry->map_aligned &&
10381 !VM_MAP_PAGE_ALIGNED(
10382 start,
10383 VM_MAP_PAGE_MASK(dst_map))) {
10384 /* no longer map-aligned */
10385 tmp_entry->map_aligned = FALSE;
10386 }
1c79356b
A
10387 vm_map_clip_end(dst_map, tmp_entry, start);
10388 tmp_entry = tmp_entry->vme_next;
10389 } else {
10390 /* Must do lookup of tmp_entry */
10391
10392 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10393 vm_map_unlock(dst_map);
0a7de745 10394 return KERN_INVALID_ADDRESS;
1c79356b 10395 }
fe8ab488
A
10396 if (tmp_entry->map_aligned &&
10397 !VM_MAP_PAGE_ALIGNED(
10398 start,
10399 VM_MAP_PAGE_MASK(dst_map))) {
10400 /* no longer map-aligned */
10401 tmp_entry->map_aligned = FALSE;
10402 }
1c79356b
A
10403 vm_map_clip_start(dst_map, tmp_entry, start);
10404 }
10405 }
10406 }/* while */
10407
0a7de745 10408 return KERN_SUCCESS;
1c79356b
A
10409}/* vm_map_copy_overwrite_aligned */
10410
10411/*
91447636 10412 * Routine: vm_map_copyin_kernel_buffer [internal use only]
1c79356b
A
10413 *
10414 * Description:
10415 * Copy in data to a kernel buffer from space in the
91447636 10416 * source map. The original space may be optionally
1c79356b
A
10417 * deallocated.
10418 *
10419 * If successful, returns a new copy object.
10420 */
91447636 10421static kern_return_t
1c79356b 10422vm_map_copyin_kernel_buffer(
0a7de745
A
10423 vm_map_t src_map,
10424 vm_map_offset_t src_addr,
10425 vm_map_size_t len,
10426 boolean_t src_destroy,
10427 vm_map_copy_t *copy_result)
1c79356b 10428{
91447636 10429 kern_return_t kr;
1c79356b 10430 vm_map_copy_t copy;
b0d623f7 10431
0a7de745 10432 if (len > msg_ool_size_small) {
3e170ce0 10433 return KERN_INVALID_ARGUMENT;
0a7de745 10434 }
1c79356b 10435
f427ee49 10436 copy = zalloc_flags(vm_map_copy_zone, Z_WAITOK | Z_ZERO);
0a7de745 10437 if (copy == VM_MAP_COPY_NULL) {
1c79356b 10438 return KERN_RESOURCE_SHORTAGE;
0a7de745 10439 }
f427ee49
A
10440 copy->cpy_kdata = kheap_alloc(KHEAP_DATA_BUFFERS, len, Z_WAITOK);
10441 if (copy->cpy_kdata == NULL) {
10442 zfree(vm_map_copy_zone, copy);
10443 return KERN_RESOURCE_SHORTAGE;
10444 }
10445
1c79356b
A
10446 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10447 copy->size = len;
10448 copy->offset = 0;
1c79356b 10449
3e170ce0 10450 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
91447636 10451 if (kr != KERN_SUCCESS) {
f427ee49
A
10452 kheap_free(KHEAP_DATA_BUFFERS, copy->cpy_kdata, len);
10453 zfree(vm_map_copy_zone, copy);
91447636 10454 return kr;
1c79356b
A
10455 }
10456 if (src_destroy) {
39236c6e
A
10457 (void) vm_map_remove(
10458 src_map,
10459 vm_map_trunc_page(src_addr,
0a7de745 10460 VM_MAP_PAGE_MASK(src_map)),
39236c6e 10461 vm_map_round_page(src_addr + len,
0a7de745 10462 VM_MAP_PAGE_MASK(src_map)),
39236c6e 10463 (VM_MAP_REMOVE_INTERRUPTIBLE |
0a7de745
A
10464 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10465 ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
1c79356b
A
10466 }
10467 *copy_result = copy;
10468 return KERN_SUCCESS;
10469}
10470
10471/*
91447636 10472 * Routine: vm_map_copyout_kernel_buffer [internal use only]
1c79356b
A
10473 *
10474 * Description:
10475 * Copy out data from a kernel buffer into space in the
10476 * destination map. The space may be otpionally dynamically
10477 * allocated.
10478 *
10479 * If successful, consumes the copy object.
10480 * Otherwise, the caller is responsible for it.
10481 */
91447636
A
10482static int vm_map_copyout_kernel_buffer_failures = 0;
10483static kern_return_t
1c79356b 10484vm_map_copyout_kernel_buffer(
0a7de745
A
10485 vm_map_t map,
10486 vm_map_address_t *addr, /* IN/OUT */
10487 vm_map_copy_t copy,
10488 vm_map_size_t copy_size,
10489 boolean_t overwrite,
10490 boolean_t consume_on_success)
1c79356b
A
10491{
10492 kern_return_t kr = KERN_SUCCESS;
91447636 10493 thread_t thread = current_thread();
1c79356b 10494
39037602
A
10495 assert(copy->size == copy_size);
10496
3e170ce0
A
10497 /*
10498 * check for corrupted vm_map_copy structure
10499 */
0a7de745 10500 if (copy_size > msg_ool_size_small || copy->offset) {
3e170ce0 10501 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
0a7de745
A
10502 (long long)copy->size, (long long)copy->offset);
10503 }
3e170ce0 10504
1c79356b 10505 if (!overwrite) {
1c79356b
A
10506 /*
10507 * Allocate space in the target map for the data
10508 */
10509 *addr = 0;
5ba3f43e 10510 kr = vm_map_enter(map,
0a7de745
A
10511 addr,
10512 vm_map_round_page(copy_size,
10513 VM_MAP_PAGE_MASK(map)),
10514 (vm_map_offset_t) 0,
10515 VM_FLAGS_ANYWHERE,
10516 VM_MAP_KERNEL_FLAGS_NONE,
10517 VM_KERN_MEMORY_NONE,
10518 VM_OBJECT_NULL,
10519 (vm_object_offset_t) 0,
10520 FALSE,
10521 VM_PROT_DEFAULT,
10522 VM_PROT_ALL,
10523 VM_INHERIT_DEFAULT);
10524 if (kr != KERN_SUCCESS) {
91447636 10525 return kr;
0a7de745 10526 }
5ba3f43e
A
10527#if KASAN
10528 if (map->pmap == kernel_pmap) {
10529 kasan_notify_address(*addr, copy->size);
10530 }
10531#endif
1c79356b
A
10532 }
10533
10534 /*
10535 * Copyout the data from the kernel buffer to the target map.
5ba3f43e 10536 */
91447636 10537 if (thread->map == map) {
1c79356b
A
10538 /*
10539 * If the target map is the current map, just do
10540 * the copy.
10541 */
39037602
A
10542 assert((vm_size_t)copy_size == copy_size);
10543 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
91447636 10544 kr = KERN_INVALID_ADDRESS;
1c79356b 10545 }
0a7de745 10546 } else {
1c79356b
A
10547 vm_map_t oldmap;
10548
10549 /*
10550 * If the target map is another map, assume the
10551 * target's address space identity for the duration
10552 * of the copy.
10553 */
10554 vm_map_reference(map);
10555 oldmap = vm_map_switch(map);
10556
39037602
A
10557 assert((vm_size_t)copy_size == copy_size);
10558 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
91447636
A
10559 vm_map_copyout_kernel_buffer_failures++;
10560 kr = KERN_INVALID_ADDRESS;
1c79356b 10561 }
5ba3f43e 10562
1c79356b
A
10563 (void) vm_map_switch(oldmap);
10564 vm_map_deallocate(map);
10565 }
10566
91447636
A
10567 if (kr != KERN_SUCCESS) {
10568 /* the copy failed, clean up */
10569 if (!overwrite) {
10570 /*
10571 * Deallocate the space we allocated in the target map.
10572 */
39236c6e
A
10573 (void) vm_map_remove(
10574 map,
10575 vm_map_trunc_page(*addr,
0a7de745 10576 VM_MAP_PAGE_MASK(map)),
39236c6e 10577 vm_map_round_page((*addr +
0a7de745
A
10578 vm_map_round_page(copy_size,
10579 VM_MAP_PAGE_MASK(map))),
10580 VM_MAP_PAGE_MASK(map)),
d9a64523 10581 VM_MAP_REMOVE_NO_FLAGS);
91447636
A
10582 *addr = 0;
10583 }
10584 } else {
10585 /* copy was successful, dicard the copy structure */
39236c6e 10586 if (consume_on_success) {
f427ee49
A
10587 kheap_free(KHEAP_DATA_BUFFERS, copy->cpy_kdata, copy_size);
10588 zfree(vm_map_copy_zone, copy);
39236c6e 10589 }
91447636 10590 }
1c79356b 10591
91447636 10592 return kr;
1c79356b 10593}
5ba3f43e 10594
1c79356b 10595/*
0a7de745 10596 * Routine: vm_map_copy_insert [internal use only]
5ba3f43e 10597 *
1c79356b
A
10598 * Description:
10599 * Link a copy chain ("copy") into a map at the
10600 * specified location (after "where").
10601 * Side effects:
10602 * The copy chain is destroyed.
1c79356b 10603 */
d9a64523
A
10604static void
10605vm_map_copy_insert(
0a7de745
A
10606 vm_map_t map,
10607 vm_map_entry_t after_where,
10608 vm_map_copy_t copy)
d9a64523 10609{
0a7de745 10610 vm_map_entry_t entry;
d9a64523
A
10611
10612 while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10613 entry = vm_map_copy_first_entry(copy);
10614 vm_map_copy_entry_unlink(copy, entry);
10615 vm_map_store_entry_link(map, after_where, entry,
0a7de745 10616 VM_MAP_KERNEL_FLAGS_NONE);
d9a64523
A
10617 after_where = entry;
10618 }
10619 zfree(vm_map_copy_zone, copy);
10620}
1c79356b 10621
39236c6e
A
10622void
10623vm_map_copy_remap(
0a7de745
A
10624 vm_map_t map,
10625 vm_map_entry_t where,
10626 vm_map_copy_t copy,
10627 vm_map_offset_t adjustment,
10628 vm_prot_t cur_prot,
10629 vm_prot_t max_prot,
10630 vm_inherit_t inheritance)
39236c6e 10631{
0a7de745 10632 vm_map_entry_t copy_entry, new_entry;
39236c6e
A
10633
10634 for (copy_entry = vm_map_copy_first_entry(copy);
0a7de745
A
10635 copy_entry != vm_map_copy_to_entry(copy);
10636 copy_entry = copy_entry->vme_next) {
39236c6e
A
10637 /* get a new VM map entry for the map */
10638 new_entry = vm_map_entry_create(map,
0a7de745 10639 !map->hdr.entries_pageable);
39236c6e 10640 /* copy the "copy entry" to the new entry */
f427ee49 10641 vm_map_entry_copy(map, new_entry, copy_entry);
39236c6e
A
10642 /* adjust "start" and "end" */
10643 new_entry->vme_start += adjustment;
10644 new_entry->vme_end += adjustment;
10645 /* clear some attributes */
10646 new_entry->inheritance = inheritance;
10647 new_entry->protection = cur_prot;
10648 new_entry->max_protection = max_prot;
10649 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10650 /* take an extra reference on the entry's "object" */
10651 if (new_entry->is_sub_map) {
fe8ab488 10652 assert(!new_entry->use_pmap); /* not nested */
3e170ce0
A
10653 vm_map_lock(VME_SUBMAP(new_entry));
10654 vm_map_reference(VME_SUBMAP(new_entry));
10655 vm_map_unlock(VME_SUBMAP(new_entry));
39236c6e 10656 } else {
3e170ce0 10657 vm_object_reference(VME_OBJECT(new_entry));
39236c6e
A
10658 }
10659 /* insert the new entry in the map */
d9a64523 10660 vm_map_store_entry_link(map, where, new_entry,
0a7de745 10661 VM_MAP_KERNEL_FLAGS_NONE);
39236c6e
A
10662 /* continue inserting the "copy entries" after the new entry */
10663 where = new_entry;
10664 }
10665}
10666
2dced7af 10667
39037602
A
10668/*
10669 * Returns true if *size matches (or is in the range of) copy->size.
10670 * Upon returning true, the *size field is updated with the actual size of the
10671 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10672 */
2dced7af
A
10673boolean_t
10674vm_map_copy_validate_size(
0a7de745
A
10675 vm_map_t dst_map,
10676 vm_map_copy_t copy,
10677 vm_map_size_t *size)
2dced7af 10678{
0a7de745 10679 if (copy == VM_MAP_COPY_NULL) {
2dced7af 10680 return FALSE;
0a7de745 10681 }
39037602
A
10682 vm_map_size_t copy_sz = copy->size;
10683 vm_map_size_t sz = *size;
2dced7af
A
10684 switch (copy->type) {
10685 case VM_MAP_COPY_OBJECT:
10686 case VM_MAP_COPY_KERNEL_BUFFER:
0a7de745 10687 if (sz == copy_sz) {
2dced7af 10688 return TRUE;
0a7de745 10689 }
2dced7af
A
10690 break;
10691 case VM_MAP_COPY_ENTRY_LIST:
10692 /*
10693 * potential page-size rounding prevents us from exactly
10694 * validating this flavor of vm_map_copy, but we can at least
10695 * assert that it's within a range.
10696 */
39037602
A
10697 if (copy_sz >= sz &&
10698 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10699 *size = copy_sz;
2dced7af 10700 return TRUE;
39037602 10701 }
2dced7af
A
10702 break;
10703 default:
10704 break;
10705 }
10706 return FALSE;
10707}
10708
39037602
A
10709/*
10710 * Routine: vm_map_copyout_size
10711 *
10712 * Description:
10713 * Copy out a copy chain ("copy") into newly-allocated
10714 * space in the destination map. Uses a prevalidated
10715 * size for the copy object (vm_map_copy_validate_size).
10716 *
10717 * If successful, consumes the copy object.
10718 * Otherwise, the caller is responsible for it.
10719 */
10720kern_return_t
10721vm_map_copyout_size(
0a7de745
A
10722 vm_map_t dst_map,
10723 vm_map_address_t *dst_addr, /* OUT */
10724 vm_map_copy_t copy,
10725 vm_map_size_t copy_size)
39037602
A
10726{
10727 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
0a7de745
A
10728 TRUE, /* consume_on_success */
10729 VM_PROT_DEFAULT,
10730 VM_PROT_ALL,
10731 VM_INHERIT_DEFAULT);
39037602 10732}
2dced7af 10733
1c79356b
A
10734/*
10735 * Routine: vm_map_copyout
10736 *
10737 * Description:
10738 * Copy out a copy chain ("copy") into newly-allocated
10739 * space in the destination map.
10740 *
10741 * If successful, consumes the copy object.
10742 * Otherwise, the caller is responsible for it.
10743 */
10744kern_return_t
10745vm_map_copyout(
0a7de745
A
10746 vm_map_t dst_map,
10747 vm_map_address_t *dst_addr, /* OUT */
10748 vm_map_copy_t copy)
39236c6e 10749{
39037602 10750 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
0a7de745
A
10751 TRUE, /* consume_on_success */
10752 VM_PROT_DEFAULT,
10753 VM_PROT_ALL,
10754 VM_INHERIT_DEFAULT);
39236c6e
A
10755}
10756
10757kern_return_t
10758vm_map_copyout_internal(
0a7de745
A
10759 vm_map_t dst_map,
10760 vm_map_address_t *dst_addr, /* OUT */
10761 vm_map_copy_t copy,
10762 vm_map_size_t copy_size,
10763 boolean_t consume_on_success,
10764 vm_prot_t cur_protection,
10765 vm_prot_t max_protection,
10766 vm_inherit_t inheritance)
1c79356b 10767{
0a7de745
A
10768 vm_map_size_t size;
10769 vm_map_size_t adjustment;
10770 vm_map_offset_t start;
10771 vm_object_offset_t vm_copy_start;
10772 vm_map_entry_t last;
10773 vm_map_entry_t entry;
10774 vm_map_entry_t hole_entry;
f427ee49 10775 vm_map_copy_t original_copy;
1c79356b
A
10776
10777 /*
10778 * Check for null copy object.
10779 */
10780
10781 if (copy == VM_MAP_COPY_NULL) {
10782 *dst_addr = 0;
0a7de745 10783 return KERN_SUCCESS;
1c79356b
A
10784 }
10785
f427ee49
A
10786 /*
10787 * Assert that the vm_map_copy is coming from the right
10788 * zone and hasn't been forged
10789 */
10790 vm_map_copy_require(copy);
10791
39037602
A
10792 if (copy->size != copy_size) {
10793 *dst_addr = 0;
10794 return KERN_FAILURE;
10795 }
10796
1c79356b
A
10797 /*
10798 * Check for special copy object, created
10799 * by vm_map_copyin_object.
10800 */
10801
10802 if (copy->type == VM_MAP_COPY_OBJECT) {
0a7de745
A
10803 vm_object_t object = copy->cpy_object;
10804 kern_return_t kr;
10805 vm_object_offset_t offset;
1c79356b 10806
91447636 10807 offset = vm_object_trunc_page(copy->offset);
39037602 10808 size = vm_map_round_page((copy_size +
0a7de745
A
10809 (vm_map_size_t)(copy->offset -
10810 offset)),
10811 VM_MAP_PAGE_MASK(dst_map));
1c79356b
A
10812 *dst_addr = 0;
10813 kr = vm_map_enter(dst_map, dst_addr, size,
0a7de745
A
10814 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
10815 VM_MAP_KERNEL_FLAGS_NONE,
10816 VM_KERN_MEMORY_NONE,
10817 object, offset, FALSE,
10818 VM_PROT_DEFAULT, VM_PROT_ALL,
10819 VM_INHERIT_DEFAULT);
10820 if (kr != KERN_SUCCESS) {
10821 return kr;
10822 }
1c79356b 10823 /* Account for non-pagealigned copy object */
91447636 10824 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
0a7de745 10825 if (consume_on_success) {
39236c6e 10826 zfree(vm_map_copy_zone, copy);
0a7de745
A
10827 }
10828 return KERN_SUCCESS;
1c79356b
A
10829 }
10830
10831 /*
10832 * Check for special kernel buffer allocated
10833 * by new_ipc_kmsg_copyin.
10834 */
10835
10836 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
39037602 10837 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
0a7de745
A
10838 copy, copy_size, FALSE,
10839 consume_on_success);
1c79356b
A
10840 }
10841
f427ee49
A
10842 original_copy = copy;
10843 if (copy->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
10844 kern_return_t kr;
10845 vm_map_copy_t target_copy;
10846 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
10847
10848 target_copy = VM_MAP_COPY_NULL;
10849 DEBUG4K_ADJUST("adjusting...\n");
10850 kr = vm_map_copy_adjust_to_target(
10851 copy,
10852 0, /* offset */
10853 copy->size, /* size */
10854 dst_map,
10855 TRUE, /* copy */
10856 &target_copy,
10857 &overmap_start,
10858 &overmap_end,
10859 &trimmed_start);
10860 if (kr != KERN_SUCCESS) {
10861 DEBUG4K_COPY("adjust failed 0x%x\n", kr);
10862 return kr;
10863 }
10864 DEBUG4K_COPY("copy %p (%d 0x%llx 0x%llx) dst_map %p (%d) target_copy %p (%d 0x%llx 0x%llx) overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx\n", copy, copy->cpy_hdr.page_shift, copy->offset, (uint64_t)copy->size, dst_map, VM_MAP_PAGE_SHIFT(dst_map), target_copy, target_copy->cpy_hdr.page_shift, target_copy->offset, (uint64_t)target_copy->size, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start);
10865 if (target_copy != copy) {
10866 copy = target_copy;
10867 }
10868 copy_size = copy->size;
10869 }
10870
1c79356b
A
10871 /*
10872 * Find space for the data
10873 */
10874
39236c6e 10875 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
0a7de745 10876 VM_MAP_COPY_PAGE_MASK(copy));
39037602 10877 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
0a7de745
A
10878 VM_MAP_COPY_PAGE_MASK(copy))
10879 - vm_copy_start;
1c79356b 10880
39236c6e 10881
0a7de745 10882StartAgain:;
1c79356b
A
10883
10884 vm_map_lock(dst_map);
0a7de745 10885 if (dst_map->disable_vmentry_reuse == TRUE) {
6d2010ae
A
10886 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
10887 last = entry;
10888 } else {
3e170ce0 10889 if (dst_map->holelistenabled) {
d9a64523 10890 hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
3e170ce0
A
10891
10892 if (hole_entry == NULL) {
10893 /*
10894 * No more space in the map?
10895 */
10896 vm_map_unlock(dst_map);
0a7de745 10897 return KERN_NO_SPACE;
3e170ce0
A
10898 }
10899
10900 last = hole_entry;
10901 start = last->vme_start;
10902 } else {
10903 assert(first_free_is_valid(dst_map));
10904 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
0a7de745 10905 vm_map_min(dst_map) : last->vme_end;
3e170ce0 10906 }
39236c6e 10907 start = vm_map_round_page(start,
0a7de745 10908 VM_MAP_PAGE_MASK(dst_map));
6d2010ae 10909 }
1c79356b
A
10910
10911 while (TRUE) {
0a7de745
A
10912 vm_map_entry_t next = last->vme_next;
10913 vm_map_offset_t end = start + size;
1c79356b
A
10914
10915 if ((end > dst_map->max_offset) || (end < start)) {
10916 if (dst_map->wait_for_space) {
10917 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
10918 assert_wait((event_t) dst_map,
0a7de745 10919 THREAD_INTERRUPTIBLE);
1c79356b 10920 vm_map_unlock(dst_map);
91447636 10921 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
10922 goto StartAgain;
10923 }
10924 }
10925 vm_map_unlock(dst_map);
0a7de745 10926 return KERN_NO_SPACE;
1c79356b
A
10927 }
10928
3e170ce0 10929 if (dst_map->holelistenabled) {
0a7de745 10930 if (last->vme_end >= end) {
3e170ce0 10931 break;
0a7de745 10932 }
3e170ce0
A
10933 } else {
10934 /*
10935 * If there are no more entries, we must win.
10936 *
10937 * OR
10938 *
10939 * If there is another entry, it must be
10940 * after the end of the potential new region.
10941 */
10942
0a7de745 10943 if (next == vm_map_to_entry(dst_map)) {
3e170ce0 10944 break;
0a7de745 10945 }
3e170ce0 10946
0a7de745 10947 if (next->vme_start >= end) {
3e170ce0 10948 break;
0a7de745 10949 }
3e170ce0 10950 }
1c79356b
A
10951
10952 last = next;
3e170ce0
A
10953
10954 if (dst_map->holelistenabled) {
d9a64523 10955 if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
3e170ce0
A
10956 /*
10957 * Wrapped around
10958 */
10959 vm_map_unlock(dst_map);
0a7de745 10960 return KERN_NO_SPACE;
3e170ce0
A
10961 }
10962 start = last->vme_start;
10963 } else {
10964 start = last->vme_end;
10965 }
39236c6e 10966 start = vm_map_round_page(start,
0a7de745 10967 VM_MAP_PAGE_MASK(dst_map));
39236c6e
A
10968 }
10969
3e170ce0
A
10970 if (dst_map->holelistenabled) {
10971 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
10972 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
10973 }
10974 }
10975
10976
39236c6e 10977 adjustment = start - vm_copy_start;
0a7de745 10978 if (!consume_on_success) {
39236c6e
A
10979 /*
10980 * We're not allowed to consume "copy", so we'll have to
10981 * copy its map entries into the destination map below.
10982 * No need to re-allocate map entries from the correct
10983 * (pageable or not) zone, since we'll get new map entries
10984 * during the transfer.
10985 * We'll also adjust the map entries's "start" and "end"
10986 * during the transfer, to keep "copy"'s entries consistent
10987 * with its "offset".
10988 */
10989 goto after_adjustments;
1c79356b
A
10990 }
10991
10992 /*
10993 * Since we're going to just drop the map
10994 * entries from the copy into the destination
10995 * map, they must come from the same pool.
10996 */
10997
10998 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
2d21ac55
A
10999 /*
11000 * Mismatches occur when dealing with the default
11001 * pager.
11002 */
0a7de745
A
11003 zone_t old_zone;
11004 vm_map_entry_t next, new;
2d21ac55
A
11005
11006 /*
11007 * Find the zone that the copies were allocated from
11008 */
7ddcb079 11009
2d21ac55
A
11010 entry = vm_map_copy_first_entry(copy);
11011
11012 /*
11013 * Reinitialize the copy so that vm_map_copy_entry_link
11014 * will work.
11015 */
6d2010ae 11016 vm_map_store_copy_reset(copy, entry);
2d21ac55 11017 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
2d21ac55
A
11018
11019 /*
11020 * Copy each entry.
11021 */
11022 while (entry != vm_map_copy_to_entry(copy)) {
7ddcb079 11023 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
2d21ac55 11024 vm_map_entry_copy_full(new, entry);
cb323159 11025 new->vme_no_copy_on_read = FALSE;
fe8ab488
A
11026 assert(!new->iokit_acct);
11027 if (new->is_sub_map) {
11028 /* clr address space specifics */
11029 new->use_pmap = FALSE;
11030 }
2d21ac55 11031 vm_map_copy_entry_link(copy,
0a7de745
A
11032 vm_map_copy_last_entry(copy),
11033 new);
2d21ac55 11034 next = entry->vme_next;
7ddcb079 11035 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
2d21ac55
A
11036 zfree(old_zone, entry);
11037 entry = next;
11038 }
1c79356b
A
11039 }
11040
11041 /*
11042 * Adjust the addresses in the copy chain, and
11043 * reset the region attributes.
11044 */
11045
1c79356b 11046 for (entry = vm_map_copy_first_entry(copy);
0a7de745
A
11047 entry != vm_map_copy_to_entry(copy);
11048 entry = entry->vme_next) {
39236c6e
A
11049 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
11050 /*
11051 * We're injecting this copy entry into a map that
11052 * has the standard page alignment, so clear
11053 * "map_aligned" (which might have been inherited
11054 * from the original map entry).
11055 */
11056 entry->map_aligned = FALSE;
11057 }
11058
1c79356b
A
11059 entry->vme_start += adjustment;
11060 entry->vme_end += adjustment;
11061
39236c6e
A
11062 if (entry->map_aligned) {
11063 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
0a7de745 11064 VM_MAP_PAGE_MASK(dst_map)));
39236c6e 11065 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
0a7de745 11066 VM_MAP_PAGE_MASK(dst_map)));
39236c6e
A
11067 }
11068
1c79356b
A
11069 entry->inheritance = VM_INHERIT_DEFAULT;
11070 entry->protection = VM_PROT_DEFAULT;
11071 entry->max_protection = VM_PROT_ALL;
11072 entry->behavior = VM_BEHAVIOR_DEFAULT;
11073
11074 /*
11075 * If the entry is now wired,
11076 * map the pages into the destination map.
11077 */
11078 if (entry->wired_count != 0) {
39037602 11079 vm_map_offset_t va;
0a7de745 11080 vm_object_offset_t offset;
39037602 11081 vm_object_t object;
2d21ac55 11082 vm_prot_t prot;
0a7de745 11083 int type_of_fault;
1c79356b 11084
f427ee49
A
11085 /* TODO4K would need to use actual page size */
11086 assert(VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT);
11087
3e170ce0
A
11088 object = VME_OBJECT(entry);
11089 offset = VME_OFFSET(entry);
2d21ac55 11090 va = entry->vme_start;
1c79356b 11091
2d21ac55 11092 pmap_pageable(dst_map->pmap,
0a7de745
A
11093 entry->vme_start,
11094 entry->vme_end,
11095 TRUE);
1c79356b 11096
2d21ac55 11097 while (va < entry->vme_end) {
0a7de745 11098 vm_page_t m;
d9a64523 11099 struct vm_object_fault_info fault_info = {};
1c79356b 11100
2d21ac55
A
11101 /*
11102 * Look up the page in the object.
11103 * Assert that the page will be found in the
11104 * top object:
11105 * either
11106 * the object was newly created by
11107 * vm_object_copy_slowly, and has
11108 * copies of all of the pages from
11109 * the source object
11110 * or
11111 * the object was moved from the old
11112 * map entry; because the old map
11113 * entry was wired, all of the pages
11114 * were in the top-level object.
11115 * (XXX not true if we wire pages for
11116 * reading)
11117 */
11118 vm_object_lock(object);
91447636 11119
2d21ac55 11120 m = vm_page_lookup(object, offset);
b0d623f7 11121 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
0a7de745 11122 m->vmp_absent) {
2d21ac55 11123 panic("vm_map_copyout: wiring %p", m);
0a7de745 11124 }
1c79356b 11125
2d21ac55 11126 prot = entry->protection;
1c79356b 11127
3e170ce0 11128 if (override_nx(dst_map, VME_ALIAS(entry)) &&
0a7de745
A
11129 prot) {
11130 prot |= VM_PROT_EXECUTE;
11131 }
1c79356b 11132
2d21ac55 11133 type_of_fault = DBG_CACHE_HIT_FAULT;
1c79356b 11134
d9a64523
A
11135 fault_info.user_tag = VME_ALIAS(entry);
11136 fault_info.pmap_options = 0;
11137 if (entry->iokit_acct ||
11138 (!entry->is_sub_map && !entry->use_pmap)) {
11139 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11140 }
11141
11142 vm_fault_enter(m,
0a7de745
A
11143 dst_map->pmap,
11144 va,
f427ee49 11145 PAGE_SIZE, 0,
0a7de745
A
11146 prot,
11147 prot,
11148 VM_PAGE_WIRED(m),
11149 FALSE, /* change_wiring */
11150 VM_KERN_MEMORY_NONE, /* tag - not wiring */
11151 &fault_info,
11152 NULL, /* need_retry */
11153 &type_of_fault);
1c79356b 11154
2d21ac55 11155 vm_object_unlock(object);
1c79356b 11156
2d21ac55
A
11157 offset += PAGE_SIZE_64;
11158 va += PAGE_SIZE;
1c79356b
A
11159 }
11160 }
11161 }
11162
39236c6e
A
11163after_adjustments:
11164
1c79356b
A
11165 /*
11166 * Correct the page alignment for the result
11167 */
11168
11169 *dst_addr = start + (copy->offset - vm_copy_start);
11170
5ba3f43e
A
11171#if KASAN
11172 kasan_notify_address(*dst_addr, size);
11173#endif
11174
1c79356b
A
11175 /*
11176 * Update the hints and the map size
11177 */
11178
39236c6e
A
11179 if (consume_on_success) {
11180 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
11181 } else {
11182 SAVE_HINT_MAP_WRITE(dst_map, last);
11183 }
1c79356b
A
11184
11185 dst_map->size += size;
11186
11187 /*
11188 * Link in the copy
11189 */
11190
39236c6e
A
11191 if (consume_on_success) {
11192 vm_map_copy_insert(dst_map, last, copy);
f427ee49
A
11193 if (copy != original_copy) {
11194 vm_map_copy_discard(original_copy);
11195 original_copy = VM_MAP_COPY_NULL;
11196 }
39236c6e
A
11197 } else {
11198 vm_map_copy_remap(dst_map, last, copy, adjustment,
0a7de745
A
11199 cur_protection, max_protection,
11200 inheritance);
f427ee49
A
11201 if (copy != original_copy && original_copy != VM_MAP_COPY_NULL) {
11202 vm_map_copy_discard(copy);
11203 copy = original_copy;
11204 }
39236c6e 11205 }
1c79356b 11206
f427ee49 11207
1c79356b
A
11208 vm_map_unlock(dst_map);
11209
11210 /*
11211 * XXX If wiring_required, call vm_map_pageable
11212 */
11213
0a7de745 11214 return KERN_SUCCESS;
1c79356b
A
11215}
11216
1c79356b
A
11217/*
11218 * Routine: vm_map_copyin
11219 *
11220 * Description:
2d21ac55
A
11221 * see vm_map_copyin_common. Exported via Unsupported.exports.
11222 *
11223 */
11224
11225#undef vm_map_copyin
11226
11227kern_return_t
11228vm_map_copyin(
0a7de745
A
11229 vm_map_t src_map,
11230 vm_map_address_t src_addr,
11231 vm_map_size_t len,
11232 boolean_t src_destroy,
11233 vm_map_copy_t *copy_result) /* OUT */
2d21ac55 11234{
0a7de745
A
11235 return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11236 FALSE, copy_result, FALSE);
2d21ac55
A
11237}
11238
11239/*
11240 * Routine: vm_map_copyin_common
11241 *
11242 * Description:
1c79356b
A
11243 * Copy the specified region (src_addr, len) from the
11244 * source address space (src_map), possibly removing
11245 * the region from the source address space (src_destroy).
11246 *
11247 * Returns:
11248 * A vm_map_copy_t object (copy_result), suitable for
11249 * insertion into another address space (using vm_map_copyout),
11250 * copying over another address space region (using
11251 * vm_map_copy_overwrite). If the copy is unused, it
11252 * should be destroyed (using vm_map_copy_discard).
11253 *
11254 * In/out conditions:
11255 * The source map should not be locked on entry.
11256 */
11257
11258typedef struct submap_map {
0a7de745
A
11259 vm_map_t parent_map;
11260 vm_map_offset_t base_start;
11261 vm_map_offset_t base_end;
11262 vm_map_size_t base_len;
1c79356b
A
11263 struct submap_map *next;
11264} submap_map_t;
11265
11266kern_return_t
11267vm_map_copyin_common(
0a7de745 11268 vm_map_t src_map,
91447636 11269 vm_map_address_t src_addr,
0a7de745
A
11270 vm_map_size_t len,
11271 boolean_t src_destroy,
11272 __unused boolean_t src_volatile,
11273 vm_map_copy_t *copy_result, /* OUT */
11274 boolean_t use_maxprot)
4bd07ac2
A
11275{
11276 int flags;
11277
11278 flags = 0;
11279 if (src_destroy) {
11280 flags |= VM_MAP_COPYIN_SRC_DESTROY;
11281 }
11282 if (use_maxprot) {
11283 flags |= VM_MAP_COPYIN_USE_MAXPROT;
11284 }
11285 return vm_map_copyin_internal(src_map,
0a7de745
A
11286 src_addr,
11287 len,
11288 flags,
11289 copy_result);
4bd07ac2
A
11290}
11291kern_return_t
11292vm_map_copyin_internal(
0a7de745 11293 vm_map_t src_map,
4bd07ac2 11294 vm_map_address_t src_addr,
0a7de745
A
11295 vm_map_size_t len,
11296 int flags,
11297 vm_map_copy_t *copy_result) /* OUT */
1c79356b 11298{
0a7de745
A
11299 vm_map_entry_t tmp_entry; /* Result of last map lookup --
11300 * in multi-level lookup, this
11301 * entry contains the actual
11302 * vm_object/offset.
11303 */
11304 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
11305
11306 vm_map_offset_t src_start; /* Start of current entry --
11307 * where copy is taking place now
11308 */
11309 vm_map_offset_t src_end; /* End of entire region to be
11310 * copied */
2d21ac55 11311 vm_map_offset_t src_base;
0a7de745
A
11312 vm_map_t base_map = src_map;
11313 boolean_t map_share = FALSE;
11314 submap_map_t *parent_maps = NULL;
1c79356b 11315
0a7de745 11316 vm_map_copy_t copy; /* Resulting copy */
fe8ab488 11317 vm_map_address_t copy_addr;
0a7de745
A
11318 vm_map_size_t copy_size;
11319 boolean_t src_destroy;
11320 boolean_t use_maxprot;
11321 boolean_t preserve_purgeable;
11322 boolean_t entry_was_shared;
11323 vm_map_entry_t saved_src_entry;
4bd07ac2
A
11324
11325 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11326 return KERN_INVALID_ARGUMENT;
11327 }
5ba3f43e 11328
4bd07ac2
A
11329 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11330 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
39037602 11331 preserve_purgeable =
0a7de745 11332 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
1c79356b
A
11333
11334 /*
11335 * Check for copies of zero bytes.
11336 */
11337
11338 if (len == 0) {
11339 *copy_result = VM_MAP_COPY_NULL;
0a7de745 11340 return KERN_SUCCESS;
1c79356b
A
11341 }
11342
4a249263
A
11343 /*
11344 * Check that the end address doesn't overflow
11345 */
11346 src_end = src_addr + len;
0a7de745 11347 if (src_end < src_addr) {
4a249263 11348 return KERN_INVALID_ADDRESS;
0a7de745 11349 }
4a249263 11350
39037602
A
11351 /*
11352 * Compute (page aligned) start and end of region
11353 */
11354 src_start = vm_map_trunc_page(src_addr,
0a7de745 11355 VM_MAP_PAGE_MASK(src_map));
39037602 11356 src_end = vm_map_round_page(src_end,
0a7de745 11357 VM_MAP_PAGE_MASK(src_map));
39037602 11358
1c79356b
A
11359 /*
11360 * If the copy is sufficiently small, use a kernel buffer instead
11361 * of making a virtual copy. The theory being that the cost of
11362 * setting up VM (and taking C-O-W faults) dominates the copy costs
11363 * for small regions.
11364 */
4bd07ac2
A
11365 if ((len < msg_ool_size_small) &&
11366 !use_maxprot &&
39037602
A
11367 !preserve_purgeable &&
11368 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11369 /*
11370 * Since the "msg_ool_size_small" threshold was increased and
11371 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11372 * address space limits, we revert to doing a virtual copy if the
11373 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11374 * of the commpage would now fail when it used to work.
11375 */
11376 (src_start >= vm_map_min(src_map) &&
0a7de745
A
11377 src_start < vm_map_max(src_map) &&
11378 src_end >= vm_map_min(src_map) &&
11379 src_end < vm_map_max(src_map))) {
2d21ac55 11380 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
0a7de745
A
11381 src_destroy, copy_result);
11382 }
1c79356b 11383
1c79356b
A
11384 /*
11385 * Allocate a header element for the list.
11386 *
5ba3f43e 11387 * Use the start and end in the header to
1c79356b
A
11388 * remember the endpoints prior to rounding.
11389 */
11390
d9a64523 11391 copy = vm_map_copy_allocate();
1c79356b 11392 copy->type = VM_MAP_COPY_ENTRY_LIST;
1c79356b 11393 copy->cpy_hdr.entries_pageable = TRUE;
f427ee49 11394 copy->cpy_hdr.page_shift = VM_MAP_PAGE_SHIFT(src_map);
1c79356b 11395
0a7de745 11396 vm_map_store_init( &(copy->cpy_hdr));
6d2010ae 11397
1c79356b
A
11398 copy->offset = src_addr;
11399 copy->size = len;
5ba3f43e 11400
7ddcb079 11401 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
1c79356b 11402
0a7de745
A
11403#define RETURN(x) \
11404 MACRO_BEGIN \
11405 vm_map_unlock(src_map); \
11406 if(src_map != base_map) \
11407 vm_map_deallocate(src_map); \
11408 if (new_entry != VM_MAP_ENTRY_NULL) \
11409 vm_map_copy_entry_dispose(copy,new_entry); \
11410 vm_map_copy_discard(copy); \
11411 { \
11412 submap_map_t *_ptr; \
11413 \
11414 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11415 parent_maps=parent_maps->next; \
11416 if (_ptr->parent_map != base_map) \
11417 vm_map_deallocate(_ptr->parent_map); \
11418 kfree(_ptr, sizeof(submap_map_t)); \
11419 } \
11420 } \
11421 MACRO_RETURN(x); \
1c79356b
A
11422 MACRO_END
11423
11424 /*
11425 * Find the beginning of the region.
11426 */
11427
0a7de745 11428 vm_map_lock(src_map);
1c79356b 11429
fe8ab488
A
11430 /*
11431 * Lookup the original "src_addr" rather than the truncated
11432 * "src_start", in case "src_start" falls in a non-map-aligned
11433 * map entry *before* the map entry that contains "src_addr"...
11434 */
0a7de745 11435 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
1c79356b 11436 RETURN(KERN_INVALID_ADDRESS);
0a7de745
A
11437 }
11438 if (!tmp_entry->is_sub_map) {
fe8ab488
A
11439 /*
11440 * ... but clip to the map-rounded "src_start" rather than
11441 * "src_addr" to preserve map-alignment. We'll adjust the
11442 * first copy entry at the end, if needed.
11443 */
1c79356b
A
11444 vm_map_clip_start(src_map, tmp_entry, src_start);
11445 }
fe8ab488
A
11446 if (src_start < tmp_entry->vme_start) {
11447 /*
11448 * Move "src_start" up to the start of the
11449 * first map entry to copy.
11450 */
11451 src_start = tmp_entry->vme_start;
11452 }
1c79356b
A
11453 /* set for later submap fix-up */
11454 copy_addr = src_start;
11455
11456 /*
11457 * Go through entries until we get to the end.
11458 */
11459
11460 while (TRUE) {
0a7de745
A
11461 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
11462 vm_map_size_t src_size; /* Size of source
11463 * map entry (in both
11464 * maps)
11465 */
11466
11467 vm_object_t src_object; /* Object to copy */
11468 vm_object_offset_t src_offset;
11469
11470 boolean_t src_needs_copy; /* Should source map
11471 * be made read-only
11472 * for copy-on-write?
11473 */
11474
11475 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
11476
11477 boolean_t was_wired; /* Was source wired? */
11478 vm_map_version_t version; /* Version before locks
11479 * dropped to make copy
11480 */
11481 kern_return_t result; /* Return value from
11482 * copy_strategically.
11483 */
11484 while (tmp_entry->is_sub_map) {
91447636 11485 vm_map_size_t submap_len;
1c79356b
A
11486 submap_map_t *ptr;
11487
11488 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
11489 ptr->next = parent_maps;
11490 parent_maps = ptr;
11491 ptr->parent_map = src_map;
11492 ptr->base_start = src_start;
11493 ptr->base_end = src_end;
11494 submap_len = tmp_entry->vme_end - src_start;
0a7de745
A
11495 if (submap_len > (src_end - src_start)) {
11496 submap_len = src_end - src_start;
11497 }
2d21ac55 11498 ptr->base_len = submap_len;
5ba3f43e 11499
1c79356b 11500 src_start -= tmp_entry->vme_start;
3e170ce0 11501 src_start += VME_OFFSET(tmp_entry);
1c79356b 11502 src_end = src_start + submap_len;
3e170ce0 11503 src_map = VME_SUBMAP(tmp_entry);
1c79356b 11504 vm_map_lock(src_map);
9bccf70c
A
11505 /* keep an outstanding reference for all maps in */
11506 /* the parents tree except the base map */
11507 vm_map_reference(src_map);
1c79356b
A
11508 vm_map_unlock(ptr->parent_map);
11509 if (!vm_map_lookup_entry(
0a7de745 11510 src_map, src_start, &tmp_entry)) {
1c79356b 11511 RETURN(KERN_INVALID_ADDRESS);
0a7de745 11512 }
1c79356b 11513 map_share = TRUE;
0a7de745 11514 if (!tmp_entry->is_sub_map) {
2d21ac55 11515 vm_map_clip_start(src_map, tmp_entry, src_start);
0a7de745 11516 }
1c79356b
A
11517 src_entry = tmp_entry;
11518 }
2d21ac55
A
11519 /* we are now in the lowest level submap... */
11520
5ba3f43e 11521 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
3e170ce0 11522 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
55e303ae
A
11523 /* This is not, supported for now.In future */
11524 /* we will need to detect the phys_contig */
11525 /* condition and then upgrade copy_slowly */
11526 /* to do physical copy from the device mem */
11527 /* based object. We can piggy-back off of */
11528 /* the was wired boolean to set-up the */
11529 /* proper handling */
0b4e3aa0
A
11530 RETURN(KERN_PROTECTION_FAILURE);
11531 }
1c79356b 11532 /*
5ba3f43e 11533 * Create a new address map entry to hold the result.
1c79356b
A
11534 * Fill in the fields from the appropriate source entries.
11535 * We must unlock the source map to do this if we need
11536 * to allocate a map entry.
11537 */
11538 if (new_entry == VM_MAP_ENTRY_NULL) {
2d21ac55
A
11539 version.main_timestamp = src_map->timestamp;
11540 vm_map_unlock(src_map);
1c79356b 11541
7ddcb079 11542 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
1c79356b 11543
2d21ac55
A
11544 vm_map_lock(src_map);
11545 if ((version.main_timestamp + 1) != src_map->timestamp) {
11546 if (!vm_map_lookup_entry(src_map, src_start,
0a7de745 11547 &tmp_entry)) {
2d21ac55
A
11548 RETURN(KERN_INVALID_ADDRESS);
11549 }
0a7de745 11550 if (!tmp_entry->is_sub_map) {
2d21ac55 11551 vm_map_clip_start(src_map, tmp_entry, src_start);
0a7de745 11552 }
2d21ac55 11553 continue; /* restart w/ new tmp_entry */
1c79356b 11554 }
1c79356b
A
11555 }
11556
11557 /*
11558 * Verify that the region can be read.
11559 */
11560 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
0a7de745
A
11561 !use_maxprot) ||
11562 (src_entry->max_protection & VM_PROT_READ) == 0) {
1c79356b 11563 RETURN(KERN_PROTECTION_FAILURE);
0a7de745 11564 }
1c79356b
A
11565
11566 /*
11567 * Clip against the endpoints of the entire region.
11568 */
11569
11570 vm_map_clip_end(src_map, src_entry, src_end);
11571
11572 src_size = src_entry->vme_end - src_start;
3e170ce0
A
11573 src_object = VME_OBJECT(src_entry);
11574 src_offset = VME_OFFSET(src_entry);
1c79356b
A
11575 was_wired = (src_entry->wired_count != 0);
11576
f427ee49 11577 vm_map_entry_copy(src_map, new_entry, src_entry);
fe8ab488
A
11578 if (new_entry->is_sub_map) {
11579 /* clr address space specifics */
11580 new_entry->use_pmap = FALSE;
a39ff7e2
A
11581 } else {
11582 /*
11583 * We're dealing with a copy-on-write operation,
11584 * so the resulting mapping should not inherit the
11585 * original mapping's accounting settings.
11586 * "iokit_acct" should have been cleared in
11587 * vm_map_entry_copy().
11588 * "use_pmap" should be reset to its default (TRUE)
11589 * so that the new mapping gets accounted for in
11590 * the task's memory footprint.
11591 */
11592 assert(!new_entry->iokit_acct);
11593 new_entry->use_pmap = TRUE;
fe8ab488 11594 }
1c79356b
A
11595
11596 /*
11597 * Attempt non-blocking copy-on-write optimizations.
11598 */
11599
4ba76501
A
11600 /*
11601 * If we are destroying the source, and the object
11602 * is internal, we could move the object reference
11603 * from the source to the copy. The copy is
11604 * copy-on-write only if the source is.
11605 * We make another reference to the object, because
11606 * destroying the source entry will deallocate it.
11607 *
11608 * This memory transfer has to be atomic, (to prevent
11609 * the VM object from being shared or copied while
11610 * it's being moved here), so we could only do this
11611 * if we won't have to unlock the VM map until the
11612 * original mapping has been fully removed.
11613 */
1c79356b 11614
0a7de745 11615RestartCopy:
55e303ae 11616 if ((src_object == VM_OBJECT_NULL ||
f427ee49
A
11617 (!was_wired && !map_share && !tmp_entry->is_shared
11618 && !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT))) &&
2d21ac55 11619 vm_object_copy_quickly(
cb323159 11620 VME_OBJECT_PTR(new_entry),
2d21ac55
A
11621 src_offset,
11622 src_size,
11623 &src_needs_copy,
11624 &new_entry_needs_copy)) {
1c79356b
A
11625 new_entry->needs_copy = new_entry_needs_copy;
11626
11627 /*
11628 * Handle copy-on-write obligations
11629 */
11630
11631 if (src_needs_copy && !tmp_entry->needs_copy) {
0a7de745 11632 vm_prot_t prot;
0c530ab8
A
11633
11634 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 11635
3e170ce0 11636 if (override_nx(src_map, VME_ALIAS(src_entry))
0a7de745
A
11637 && prot) {
11638 prot |= VM_PROT_EXECUTE;
11639 }
2d21ac55 11640
55e303ae
A
11641 vm_object_pmap_protect(
11642 src_object,
11643 src_offset,
11644 src_size,
0a7de745
A
11645 (src_entry->is_shared ?
11646 PMAP_NULL
11647 : src_map->pmap),
f427ee49 11648 VM_MAP_PAGE_SIZE(src_map),
55e303ae 11649 src_entry->vme_start,
0c530ab8
A
11650 prot);
11651
3e170ce0 11652 assert(tmp_entry->wired_count == 0);
55e303ae 11653 tmp_entry->needs_copy = TRUE;
1c79356b
A
11654 }
11655
11656 /*
11657 * The map has never been unlocked, so it's safe
11658 * to move to the next entry rather than doing
11659 * another lookup.
11660 */
11661
11662 goto CopySuccessful;
11663 }
11664
5ba3f43e
A
11665 entry_was_shared = tmp_entry->is_shared;
11666
1c79356b
A
11667 /*
11668 * Take an object reference, so that we may
11669 * release the map lock(s).
11670 */
11671
11672 assert(src_object != VM_OBJECT_NULL);
11673 vm_object_reference(src_object);
11674
11675 /*
11676 * Record the timestamp for later verification.
11677 * Unlock the map.
11678 */
11679
11680 version.main_timestamp = src_map->timestamp;
0a7de745 11681 vm_map_unlock(src_map); /* Increments timestamp once! */
5ba3f43e
A
11682 saved_src_entry = src_entry;
11683 tmp_entry = VM_MAP_ENTRY_NULL;
11684 src_entry = VM_MAP_ENTRY_NULL;
1c79356b
A
11685
11686 /*
11687 * Perform the copy
11688 */
11689
f427ee49
A
11690 if (was_wired ||
11691 (debug4k_no_cow_copyin &&
11692 VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT)) {
0a7de745 11693CopySlowly:
1c79356b
A
11694 vm_object_lock(src_object);
11695 result = vm_object_copy_slowly(
2d21ac55
A
11696 src_object,
11697 src_offset,
11698 src_size,
11699 THREAD_UNINT,
cb323159 11700 VME_OBJECT_PTR(new_entry));
f427ee49
A
11701 VME_OFFSET_SET(new_entry,
11702 src_offset - vm_object_trunc_page(src_offset));
1c79356b 11703 new_entry->needs_copy = FALSE;
0a7de745
A
11704 } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11705 (entry_was_shared || map_share)) {
11706 vm_object_t new_object;
55e303ae 11707
2d21ac55 11708 vm_object_lock_shared(src_object);
55e303ae 11709 new_object = vm_object_copy_delayed(
2d21ac55 11710 src_object,
5ba3f43e 11711 src_offset,
2d21ac55
A
11712 src_size,
11713 TRUE);
0a7de745
A
11714 if (new_object == VM_OBJECT_NULL) {
11715 goto CopySlowly;
11716 }
55e303ae 11717
3e170ce0
A
11718 VME_OBJECT_SET(new_entry, new_object);
11719 assert(new_entry->wired_count == 0);
55e303ae 11720 new_entry->needs_copy = TRUE;
fe8ab488
A
11721 assert(!new_entry->iokit_acct);
11722 assert(new_object->purgable == VM_PURGABLE_DENY);
a39ff7e2 11723 assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
55e303ae 11724 result = KERN_SUCCESS;
1c79356b 11725 } else {
3e170ce0
A
11726 vm_object_offset_t new_offset;
11727 new_offset = VME_OFFSET(new_entry);
1c79356b 11728 result = vm_object_copy_strategically(src_object,
0a7de745
A
11729 src_offset,
11730 src_size,
cb323159 11731 VME_OBJECT_PTR(new_entry),
0a7de745
A
11732 &new_offset,
11733 &new_entry_needs_copy);
3e170ce0
A
11734 if (new_offset != VME_OFFSET(new_entry)) {
11735 VME_OFFSET_SET(new_entry, new_offset);
11736 }
1c79356b
A
11737
11738 new_entry->needs_copy = new_entry_needs_copy;
1c79356b
A
11739 }
11740
39037602 11741 if (result == KERN_SUCCESS &&
f427ee49
A
11742 ((preserve_purgeable &&
11743 src_object->purgable != VM_PURGABLE_DENY) ||
11744 new_entry->used_for_jit)) {
11745 /*
11746 * Purgeable objects should be COPY_NONE, true share;
11747 * this should be propogated to the copy.
11748 *
11749 * Also force mappings the pmap specially protects to
11750 * be COPY_NONE; trying to COW these mappings would
11751 * change the effective protections, which could have
11752 * side effects if the pmap layer relies on the
11753 * specified protections.
11754 */
11755
0a7de745 11756 vm_object_t new_object;
39037602
A
11757
11758 new_object = VME_OBJECT(new_entry);
11759 assert(new_object != src_object);
11760 vm_object_lock(new_object);
11761 assert(new_object->ref_count == 1);
11762 assert(new_object->shadow == VM_OBJECT_NULL);
11763 assert(new_object->copy == VM_OBJECT_NULL);
d9a64523 11764 assert(new_object->vo_owner == NULL);
39037602
A
11765
11766 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
f427ee49
A
11767
11768 if (preserve_purgeable &&
11769 src_object->purgable != VM_PURGABLE_DENY) {
11770 new_object->true_share = TRUE;
11771
11772 /* start as non-volatile with no owner... */
11773 new_object->purgable = VM_PURGABLE_NONVOLATILE;
11774 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
11775 /* ... and move to src_object's purgeable state */
11776 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
11777 int state;
11778 state = src_object->purgable;
11779 vm_object_purgable_control(
11780 new_object,
11781 VM_PURGABLE_SET_STATE_FROM_KERNEL,
11782 &state);
11783 }
11784 /* no pmap accounting for purgeable objects */
11785 new_entry->use_pmap = FALSE;
39037602 11786 }
f427ee49 11787
39037602
A
11788 vm_object_unlock(new_object);
11789 new_object = VM_OBJECT_NULL;
11790 }
11791
1c79356b
A
11792 if (result != KERN_SUCCESS &&
11793 result != KERN_MEMORY_RESTART_COPY) {
11794 vm_map_lock(src_map);
11795 RETURN(result);
11796 }
11797
11798 /*
11799 * Throw away the extra reference
11800 */
11801
11802 vm_object_deallocate(src_object);
11803
11804 /*
11805 * Verify that the map has not substantially
11806 * changed while the copy was being made.
11807 */
11808
9bccf70c 11809 vm_map_lock(src_map);
1c79356b 11810
5ba3f43e
A
11811 if ((version.main_timestamp + 1) == src_map->timestamp) {
11812 /* src_map hasn't changed: src_entry is still valid */
11813 src_entry = saved_src_entry;
1c79356b 11814 goto VerificationSuccessful;
5ba3f43e 11815 }
1c79356b
A
11816
11817 /*
11818 * Simple version comparison failed.
11819 *
11820 * Retry the lookup and verify that the
11821 * same object/offset are still present.
11822 *
11823 * [Note: a memory manager that colludes with
11824 * the calling task can detect that we have
11825 * cheated. While the map was unlocked, the
11826 * mapping could have been changed and restored.]
11827 */
11828
11829 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
fe8ab488 11830 if (result != KERN_MEMORY_RESTART_COPY) {
3e170ce0
A
11831 vm_object_deallocate(VME_OBJECT(new_entry));
11832 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
a39ff7e2
A
11833 /* reset accounting state */
11834 new_entry->iokit_acct = FALSE;
fe8ab488
A
11835 new_entry->use_pmap = TRUE;
11836 }
1c79356b
A
11837 RETURN(KERN_INVALID_ADDRESS);
11838 }
11839
11840 src_entry = tmp_entry;
11841 vm_map_clip_start(src_map, src_entry, src_start);
11842
91447636 11843 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
0a7de745
A
11844 !use_maxprot) ||
11845 ((src_entry->max_protection & VM_PROT_READ) == 0)) {
1c79356b 11846 goto VerificationFailed;
0a7de745 11847 }
1c79356b 11848
39236c6e 11849 if (src_entry->vme_end < new_entry->vme_end) {
39037602
A
11850 /*
11851 * This entry might have been shortened
11852 * (vm_map_clip_end) or been replaced with
11853 * an entry that ends closer to "src_start"
11854 * than before.
11855 * Adjust "new_entry" accordingly; copying
11856 * less memory would be correct but we also
11857 * redo the copy (see below) if the new entry
11858 * no longer points at the same object/offset.
11859 */
39236c6e 11860 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
0a7de745 11861 VM_MAP_COPY_PAGE_MASK(copy)));
39236c6e
A
11862 new_entry->vme_end = src_entry->vme_end;
11863 src_size = new_entry->vme_end - src_start;
39037602
A
11864 } else if (src_entry->vme_end > new_entry->vme_end) {
11865 /*
11866 * This entry might have been extended
11867 * (vm_map_entry_simplify() or coalesce)
11868 * or been replaced with an entry that ends farther
5ba3f43e 11869 * from "src_start" than before.
39037602
A
11870 *
11871 * We've called vm_object_copy_*() only on
11872 * the previous <start:end> range, so we can't
11873 * just extend new_entry. We have to re-do
11874 * the copy based on the new entry as if it was
11875 * pointing at a different object/offset (see
11876 * "Verification failed" below).
11877 */
39236c6e 11878 }
1c79356b 11879
3e170ce0 11880 if ((VME_OBJECT(src_entry) != src_object) ||
39037602
A
11881 (VME_OFFSET(src_entry) != src_offset) ||
11882 (src_entry->vme_end > new_entry->vme_end)) {
1c79356b
A
11883 /*
11884 * Verification failed.
11885 *
11886 * Start over with this top-level entry.
11887 */
11888
0a7de745 11889VerificationFailed: ;
1c79356b 11890
3e170ce0 11891 vm_object_deallocate(VME_OBJECT(new_entry));
1c79356b
A
11892 tmp_entry = src_entry;
11893 continue;
11894 }
11895
11896 /*
11897 * Verification succeeded.
11898 */
11899
0a7de745 11900VerificationSuccessful:;
1c79356b 11901
0a7de745 11902 if (result == KERN_MEMORY_RESTART_COPY) {
1c79356b 11903 goto RestartCopy;
0a7de745 11904 }
1c79356b
A
11905
11906 /*
11907 * Copy succeeded.
11908 */
11909
0a7de745 11910CopySuccessful: ;
1c79356b
A
11911
11912 /*
11913 * Link in the new copy entry.
11914 */
11915
11916 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
0a7de745 11917 new_entry);
5ba3f43e 11918
1c79356b
A
11919 /*
11920 * Determine whether the entire region
11921 * has been copied.
11922 */
2d21ac55 11923 src_base = src_start;
1c79356b
A
11924 src_start = new_entry->vme_end;
11925 new_entry = VM_MAP_ENTRY_NULL;
11926 while ((src_start >= src_end) && (src_end != 0)) {
0a7de745 11927 submap_map_t *ptr;
fe8ab488
A
11928
11929 if (src_map == base_map) {
11930 /* back to the top */
1c79356b 11931 break;
fe8ab488
A
11932 }
11933
11934 ptr = parent_maps;
11935 assert(ptr != NULL);
11936 parent_maps = parent_maps->next;
11937
11938 /* fix up the damage we did in that submap */
11939 vm_map_simplify_range(src_map,
0a7de745
A
11940 src_base,
11941 src_end);
fe8ab488
A
11942
11943 vm_map_unlock(src_map);
11944 vm_map_deallocate(src_map);
11945 vm_map_lock(ptr->parent_map);
11946 src_map = ptr->parent_map;
11947 src_base = ptr->base_start;
11948 src_start = ptr->base_start + ptr->base_len;
11949 src_end = ptr->base_end;
11950 if (!vm_map_lookup_entry(src_map,
0a7de745
A
11951 src_start,
11952 &tmp_entry) &&
fe8ab488
A
11953 (src_end > src_start)) {
11954 RETURN(KERN_INVALID_ADDRESS);
11955 }
11956 kfree(ptr, sizeof(submap_map_t));
0a7de745 11957 if (parent_maps == NULL) {
fe8ab488 11958 map_share = FALSE;
0a7de745 11959 }
fe8ab488
A
11960 src_entry = tmp_entry->vme_prev;
11961 }
11962
11963 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
11964 (src_start >= src_addr + len) &&
11965 (src_addr + len != 0)) {
11966 /*
11967 * Stop copying now, even though we haven't reached
11968 * "src_end". We'll adjust the end of the last copy
11969 * entry at the end, if needed.
11970 *
11971 * If src_map's aligment is different from the
11972 * system's page-alignment, there could be
11973 * extra non-map-aligned map entries between
11974 * the original (non-rounded) "src_addr + len"
11975 * and the rounded "src_end".
11976 * We do not want to copy those map entries since
11977 * they're not part of the copied range.
11978 */
11979 break;
1c79356b 11980 }
fe8ab488 11981
0a7de745 11982 if ((src_start >= src_end) && (src_end != 0)) {
1c79356b 11983 break;
0a7de745 11984 }
1c79356b
A
11985
11986 /*
11987 * Verify that there are no gaps in the region
11988 */
11989
11990 tmp_entry = src_entry->vme_next;
fe8ab488 11991 if ((tmp_entry->vme_start != src_start) ||
39236c6e 11992 (tmp_entry == vm_map_to_entry(src_map))) {
1c79356b 11993 RETURN(KERN_INVALID_ADDRESS);
39236c6e 11994 }
1c79356b
A
11995 }
11996
11997 /*
11998 * If the source should be destroyed, do it now, since the
5ba3f43e 11999 * copy was successful.
1c79356b
A
12000 */
12001 if (src_destroy) {
39236c6e
A
12002 (void) vm_map_delete(
12003 src_map,
12004 vm_map_trunc_page(src_addr,
0a7de745 12005 VM_MAP_PAGE_MASK(src_map)),
39236c6e
A
12006 src_end,
12007 ((src_map == kernel_map) ?
0a7de745
A
12008 VM_MAP_REMOVE_KUNWIRE :
12009 VM_MAP_REMOVE_NO_FLAGS),
39236c6e 12010 VM_MAP_NULL);
2d21ac55
A
12011 } else {
12012 /* fix up the damage we did in the base map */
39236c6e
A
12013 vm_map_simplify_range(
12014 src_map,
12015 vm_map_trunc_page(src_addr,
0a7de745 12016 VM_MAP_PAGE_MASK(src_map)),
39236c6e 12017 vm_map_round_page(src_end,
0a7de745 12018 VM_MAP_PAGE_MASK(src_map)));
1c79356b
A
12019 }
12020
12021 vm_map_unlock(src_map);
5ba3f43e 12022 tmp_entry = VM_MAP_ENTRY_NULL;
1c79356b 12023
f427ee49
A
12024 if (VM_MAP_PAGE_SHIFT(src_map) > PAGE_SHIFT &&
12025 VM_MAP_PAGE_SHIFT(src_map) != VM_MAP_COPY_PAGE_SHIFT(copy)) {
fe8ab488 12026 vm_map_offset_t original_start, original_offset, original_end;
5ba3f43e 12027
39236c6e
A
12028 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
12029
12030 /* adjust alignment of first copy_entry's "vme_start" */
12031 tmp_entry = vm_map_copy_first_entry(copy);
12032 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12033 vm_map_offset_t adjustment;
fe8ab488
A
12034
12035 original_start = tmp_entry->vme_start;
3e170ce0 12036 original_offset = VME_OFFSET(tmp_entry);
fe8ab488
A
12037
12038 /* map-align the start of the first copy entry... */
12039 adjustment = (tmp_entry->vme_start -
0a7de745
A
12040 vm_map_trunc_page(
12041 tmp_entry->vme_start,
12042 VM_MAP_PAGE_MASK(src_map)));
fe8ab488 12043 tmp_entry->vme_start -= adjustment;
3e170ce0 12044 VME_OFFSET_SET(tmp_entry,
0a7de745 12045 VME_OFFSET(tmp_entry) - adjustment);
fe8ab488
A
12046 copy_addr -= adjustment;
12047 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12048 /* ... adjust for mis-aligned start of copy range */
39236c6e 12049 adjustment =
0a7de745
A
12050 (vm_map_trunc_page(copy->offset,
12051 PAGE_MASK) -
12052 vm_map_trunc_page(copy->offset,
12053 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
12054 if (adjustment) {
12055 assert(page_aligned(adjustment));
12056 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12057 tmp_entry->vme_start += adjustment;
3e170ce0 12058 VME_OFFSET_SET(tmp_entry,
0a7de745
A
12059 (VME_OFFSET(tmp_entry) +
12060 adjustment));
39236c6e
A
12061 copy_addr += adjustment;
12062 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12063 }
fe8ab488
A
12064
12065 /*
12066 * Assert that the adjustments haven't exposed
12067 * more than was originally copied...
12068 */
12069 assert(tmp_entry->vme_start >= original_start);
3e170ce0 12070 assert(VME_OFFSET(tmp_entry) >= original_offset);
fe8ab488
A
12071 /*
12072 * ... and that it did not adjust outside of a
12073 * a single 16K page.
12074 */
12075 assert(vm_map_trunc_page(tmp_entry->vme_start,
0a7de745
A
12076 VM_MAP_PAGE_MASK(src_map)) ==
12077 vm_map_trunc_page(original_start,
12078 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
12079 }
12080
12081 /* adjust alignment of last copy_entry's "vme_end" */
12082 tmp_entry = vm_map_copy_last_entry(copy);
12083 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12084 vm_map_offset_t adjustment;
fe8ab488
A
12085
12086 original_end = tmp_entry->vme_end;
12087
12088 /* map-align the end of the last copy entry... */
12089 tmp_entry->vme_end =
0a7de745
A
12090 vm_map_round_page(tmp_entry->vme_end,
12091 VM_MAP_PAGE_MASK(src_map));
fe8ab488 12092 /* ... adjust for mis-aligned end of copy range */
39236c6e 12093 adjustment =
0a7de745
A
12094 (vm_map_round_page((copy->offset +
12095 copy->size),
12096 VM_MAP_PAGE_MASK(src_map)) -
12097 vm_map_round_page((copy->offset +
12098 copy->size),
12099 PAGE_MASK));
39236c6e
A
12100 if (adjustment) {
12101 assert(page_aligned(adjustment));
12102 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12103 tmp_entry->vme_end -= adjustment;
12104 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12105 }
fe8ab488
A
12106
12107 /*
12108 * Assert that the adjustments haven't exposed
12109 * more than was originally copied...
12110 */
12111 assert(tmp_entry->vme_end <= original_end);
12112 /*
12113 * ... and that it did not adjust outside of a
12114 * a single 16K page.
12115 */
12116 assert(vm_map_round_page(tmp_entry->vme_end,
0a7de745
A
12117 VM_MAP_PAGE_MASK(src_map)) ==
12118 vm_map_round_page(original_end,
12119 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
12120 }
12121 }
12122
1c79356b
A
12123 /* Fix-up start and end points in copy. This is necessary */
12124 /* when the various entries in the copy object were picked */
12125 /* up from different sub-maps */
12126
12127 tmp_entry = vm_map_copy_first_entry(copy);
fe8ab488 12128 copy_size = 0; /* compute actual size */
1c79356b 12129 while (tmp_entry != vm_map_copy_to_entry(copy)) {
39236c6e 12130 assert(VM_MAP_PAGE_ALIGNED(
0a7de745
A
12131 copy_addr + (tmp_entry->vme_end -
12132 tmp_entry->vme_start),
f427ee49 12133 MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
39236c6e 12134 assert(VM_MAP_PAGE_ALIGNED(
0a7de745 12135 copy_addr,
f427ee49 12136 MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
39236c6e
A
12137
12138 /*
12139 * The copy_entries will be injected directly into the
12140 * destination map and might not be "map aligned" there...
12141 */
12142 tmp_entry->map_aligned = FALSE;
12143
5ba3f43e 12144 tmp_entry->vme_end = copy_addr +
0a7de745 12145 (tmp_entry->vme_end - tmp_entry->vme_start);
1c79356b 12146 tmp_entry->vme_start = copy_addr;
e2d2fc5c 12147 assert(tmp_entry->vme_start < tmp_entry->vme_end);
1c79356b 12148 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
fe8ab488 12149 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
1c79356b
A
12150 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
12151 }
12152
fe8ab488
A
12153 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
12154 copy_size < copy->size) {
12155 /*
12156 * The actual size of the VM map copy is smaller than what
12157 * was requested by the caller. This must be because some
12158 * PAGE_SIZE-sized pages are missing at the end of the last
12159 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
12160 * The caller might not have been aware of those missing
12161 * pages and might not want to be aware of it, which is
12162 * fine as long as they don't try to access (and crash on)
12163 * those missing pages.
12164 * Let's adjust the size of the "copy", to avoid failing
12165 * in vm_map_copyout() or vm_map_copy_overwrite().
12166 */
12167 assert(vm_map_round_page(copy_size,
0a7de745
A
12168 VM_MAP_PAGE_MASK(src_map)) ==
12169 vm_map_round_page(copy->size,
12170 VM_MAP_PAGE_MASK(src_map)));
fe8ab488
A
12171 copy->size = copy_size;
12172 }
12173
1c79356b 12174 *copy_result = copy;
0a7de745 12175 return KERN_SUCCESS;
1c79356b 12176
0a7de745 12177#undef RETURN
1c79356b
A
12178}
12179
39236c6e
A
12180kern_return_t
12181vm_map_copy_extract(
0a7de745
A
12182 vm_map_t src_map,
12183 vm_map_address_t src_addr,
12184 vm_map_size_t len,
f427ee49 12185 boolean_t do_copy,
0a7de745 12186 vm_map_copy_t *copy_result, /* OUT */
c3c9b80d
A
12187 vm_prot_t *cur_prot, /* IN/OUT */
12188 vm_prot_t *max_prot, /* IN/OUT */
f427ee49
A
12189 vm_inherit_t inheritance,
12190 vm_map_kernel_flags_t vmk_flags)
39236c6e 12191{
0a7de745
A
12192 vm_map_copy_t copy;
12193 kern_return_t kr;
c3c9b80d 12194 vm_prot_t required_cur_prot, required_max_prot;
39236c6e
A
12195
12196 /*
12197 * Check for copies of zero bytes.
12198 */
12199
12200 if (len == 0) {
12201 *copy_result = VM_MAP_COPY_NULL;
0a7de745 12202 return KERN_SUCCESS;
39236c6e
A
12203 }
12204
12205 /*
12206 * Check that the end address doesn't overflow
12207 */
f427ee49 12208 if (src_addr + len < src_addr) {
39236c6e 12209 return KERN_INVALID_ADDRESS;
0a7de745 12210 }
39236c6e 12211
f427ee49
A
12212 if (VM_MAP_PAGE_SIZE(src_map) < PAGE_SIZE) {
12213 DEBUG4K_SHARE("src_map %p src_addr 0x%llx src_end 0x%llx\n", src_map, (uint64_t)src_addr, (uint64_t)(src_addr + len));
12214 }
39236c6e 12215
c3c9b80d
A
12216 required_cur_prot = *cur_prot;
12217 required_max_prot = *max_prot;
12218
39236c6e
A
12219 /*
12220 * Allocate a header element for the list.
12221 *
5ba3f43e 12222 * Use the start and end in the header to
39236c6e
A
12223 * remember the endpoints prior to rounding.
12224 */
12225
d9a64523 12226 copy = vm_map_copy_allocate();
39236c6e 12227 copy->type = VM_MAP_COPY_ENTRY_LIST;
f427ee49 12228 copy->cpy_hdr.entries_pageable = vmk_flags.vmkf_copy_pageable;
39236c6e
A
12229
12230 vm_map_store_init(&copy->cpy_hdr);
12231
12232 copy->offset = 0;
12233 copy->size = len;
12234
12235 kr = vm_map_remap_extract(src_map,
0a7de745
A
12236 src_addr,
12237 len,
c3c9b80d 12238 do_copy, /* copy */
0a7de745 12239 &copy->cpy_hdr,
c3c9b80d
A
12240 cur_prot, /* IN/OUT */
12241 max_prot, /* IN/OUT */
f427ee49
A
12242 inheritance,
12243 vmk_flags);
39236c6e
A
12244 if (kr != KERN_SUCCESS) {
12245 vm_map_copy_discard(copy);
12246 return kr;
12247 }
c3c9b80d
A
12248 if (required_cur_prot != VM_PROT_NONE) {
12249 assert((*cur_prot & required_cur_prot) == required_cur_prot);
12250 assert((*max_prot & required_max_prot) == required_max_prot);
12251 }
39236c6e
A
12252
12253 *copy_result = copy;
12254 return KERN_SUCCESS;
12255}
12256
1c79356b
A
12257/*
12258 * vm_map_copyin_object:
12259 *
12260 * Create a copy object from an object.
12261 * Our caller donates an object reference.
12262 */
12263
12264kern_return_t
12265vm_map_copyin_object(
0a7de745
A
12266 vm_object_t object,
12267 vm_object_offset_t offset, /* offset of region in object */
12268 vm_object_size_t size, /* size of region in object */
12269 vm_map_copy_t *copy_result) /* OUT */
1c79356b 12270{
0a7de745 12271 vm_map_copy_t copy; /* Resulting copy */
1c79356b
A
12272
12273 /*
12274 * We drop the object into a special copy object
12275 * that contains the object directly.
12276 */
12277
d9a64523 12278 copy = vm_map_copy_allocate();
1c79356b
A
12279 copy->type = VM_MAP_COPY_OBJECT;
12280 copy->cpy_object = object;
1c79356b
A
12281 copy->offset = offset;
12282 copy->size = size;
12283
12284 *copy_result = copy;
0a7de745 12285 return KERN_SUCCESS;
1c79356b
A
12286}
12287
91447636 12288static void
1c79356b 12289vm_map_fork_share(
0a7de745
A
12290 vm_map_t old_map,
12291 vm_map_entry_t old_entry,
12292 vm_map_t new_map)
1c79356b 12293{
0a7de745
A
12294 vm_object_t object;
12295 vm_map_entry_t new_entry;
1c79356b
A
12296
12297 /*
12298 * New sharing code. New map entry
12299 * references original object. Internal
12300 * objects use asynchronous copy algorithm for
12301 * future copies. First make sure we have
12302 * the right object. If we need a shadow,
12303 * or someone else already has one, then
12304 * make a new shadow and share it.
12305 */
5ba3f43e 12306
3e170ce0 12307 object = VME_OBJECT(old_entry);
1c79356b
A
12308 if (old_entry->is_sub_map) {
12309 assert(old_entry->wired_count == 0);
0c530ab8 12310#ifndef NO_NESTED_PMAP
0a7de745
A
12311 if (old_entry->use_pmap) {
12312 kern_return_t result;
91447636 12313
5ba3f43e 12314 result = pmap_nest(new_map->pmap,
0a7de745
A
12315 (VME_SUBMAP(old_entry))->pmap,
12316 (addr64_t)old_entry->vme_start,
0a7de745
A
12317 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12318 if (result) {
1c79356b 12319 panic("vm_map_fork_share: pmap_nest failed!");
0a7de745 12320 }
1c79356b 12321 }
0a7de745 12322#endif /* NO_NESTED_PMAP */
1c79356b 12323 } else if (object == VM_OBJECT_NULL) {
91447636 12324 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
0a7de745 12325 old_entry->vme_start));
3e170ce0
A
12326 VME_OFFSET_SET(old_entry, 0);
12327 VME_OBJECT_SET(old_entry, object);
fe8ab488 12328 old_entry->use_pmap = TRUE;
a39ff7e2 12329// assert(!old_entry->needs_copy);
1c79356b 12330 } else if (object->copy_strategy !=
0a7de745 12331 MEMORY_OBJECT_COPY_SYMMETRIC) {
1c79356b
A
12332 /*
12333 * We are already using an asymmetric
12334 * copy, and therefore we already have
12335 * the right object.
12336 */
5ba3f43e 12337
0a7de745
A
12338 assert(!old_entry->needs_copy);
12339 } else if (old_entry->needs_copy || /* case 1 */
12340 object->shadowed || /* case 2 */
12341 (!object->true_share && /* case 3 */
12342 !old_entry->is_shared &&
12343 (object->vo_size >
12344 (vm_map_size_t)(old_entry->vme_end -
12345 old_entry->vme_start)))) {
1c79356b
A
12346 /*
12347 * We need to create a shadow.
12348 * There are three cases here.
12349 * In the first case, we need to
12350 * complete a deferred symmetrical
12351 * copy that we participated in.
12352 * In the second and third cases,
12353 * we need to create the shadow so
12354 * that changes that we make to the
12355 * object do not interfere with
12356 * any symmetrical copies which
12357 * have occured (case 2) or which
12358 * might occur (case 3).
12359 *
12360 * The first case is when we had
12361 * deferred shadow object creation
12362 * via the entry->needs_copy mechanism.
12363 * This mechanism only works when
12364 * only one entry points to the source
12365 * object, and we are about to create
12366 * a second entry pointing to the
12367 * same object. The problem is that
12368 * there is no way of mapping from
12369 * an object to the entries pointing
12370 * to it. (Deferred shadow creation
12371 * works with one entry because occurs
12372 * at fault time, and we walk from the
12373 * entry to the object when handling
12374 * the fault.)
12375 *
12376 * The second case is when the object
12377 * to be shared has already been copied
12378 * with a symmetric copy, but we point
12379 * directly to the object without
12380 * needs_copy set in our entry. (This
12381 * can happen because different ranges
12382 * of an object can be pointed to by
12383 * different entries. In particular,
12384 * a single entry pointing to an object
12385 * can be split by a call to vm_inherit,
12386 * which, combined with task_create, can
12387 * result in the different entries
12388 * having different needs_copy values.)
12389 * The shadowed flag in the object allows
12390 * us to detect this case. The problem
12391 * with this case is that if this object
12392 * has or will have shadows, then we
12393 * must not perform an asymmetric copy
12394 * of this object, since such a copy
12395 * allows the object to be changed, which
12396 * will break the previous symmetrical
12397 * copies (which rely upon the object
12398 * not changing). In a sense, the shadowed
12399 * flag says "don't change this object".
12400 * We fix this by creating a shadow
12401 * object for this object, and sharing
12402 * that. This works because we are free
12403 * to change the shadow object (and thus
12404 * to use an asymmetric copy strategy);
12405 * this is also semantically correct,
12406 * since this object is temporary, and
12407 * therefore a copy of the object is
12408 * as good as the object itself. (This
12409 * is not true for permanent objects,
12410 * since the pager needs to see changes,
12411 * which won't happen if the changes
12412 * are made to a copy.)
12413 *
12414 * The third case is when the object
12415 * to be shared has parts sticking
12416 * outside of the entry we're working
12417 * with, and thus may in the future
12418 * be subject to a symmetrical copy.
12419 * (This is a preemptive version of
12420 * case 2.)
12421 */
3e170ce0 12422 VME_OBJECT_SHADOW(old_entry,
0a7de745
A
12423 (vm_map_size_t) (old_entry->vme_end -
12424 old_entry->vme_start));
5ba3f43e 12425
1c79356b
A
12426 /*
12427 * If we're making a shadow for other than
12428 * copy on write reasons, then we have
12429 * to remove write permission.
12430 */
12431
1c79356b
A
12432 if (!old_entry->needs_copy &&
12433 (old_entry->protection & VM_PROT_WRITE)) {
0a7de745 12434 vm_prot_t prot;
0c530ab8 12435
f427ee49 12436 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
5ba3f43e 12437
0c530ab8 12438 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55 12439
f427ee49 12440 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
5ba3f43e 12441
0a7de745
A
12442 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12443 prot |= VM_PROT_EXECUTE;
12444 }
2d21ac55 12445
5ba3f43e 12446
316670eb 12447 if (old_map->mapped_in_other_pmaps) {
9bccf70c 12448 vm_object_pmap_protect(
3e170ce0
A
12449 VME_OBJECT(old_entry),
12450 VME_OFFSET(old_entry),
9bccf70c 12451 (old_entry->vme_end -
0a7de745 12452 old_entry->vme_start),
9bccf70c 12453 PMAP_NULL,
f427ee49 12454 PAGE_SIZE,
9bccf70c 12455 old_entry->vme_start,
0c530ab8 12456 prot);
1c79356b 12457 } else {
9bccf70c 12458 pmap_protect(old_map->pmap,
0a7de745
A
12459 old_entry->vme_start,
12460 old_entry->vme_end,
12461 prot);
1c79356b
A
12462 }
12463 }
5ba3f43e 12464
1c79356b 12465 old_entry->needs_copy = FALSE;
3e170ce0 12466 object = VME_OBJECT(old_entry);
1c79356b 12467 }
6d2010ae 12468
5ba3f43e 12469
1c79356b
A
12470 /*
12471 * If object was using a symmetric copy strategy,
12472 * change its copy strategy to the default
12473 * asymmetric copy strategy, which is copy_delay
12474 * in the non-norma case and copy_call in the
12475 * norma case. Bump the reference count for the
12476 * new entry.
12477 */
5ba3f43e 12478
0a7de745 12479 if (old_entry->is_sub_map) {
3e170ce0
A
12480 vm_map_lock(VME_SUBMAP(old_entry));
12481 vm_map_reference(VME_SUBMAP(old_entry));
12482 vm_map_unlock(VME_SUBMAP(old_entry));
1c79356b
A
12483 } else {
12484 vm_object_lock(object);
2d21ac55 12485 vm_object_reference_locked(object);
1c79356b
A
12486 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12487 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12488 }
12489 vm_object_unlock(object);
12490 }
5ba3f43e 12491
1c79356b
A
12492 /*
12493 * Clone the entry, using object ref from above.
12494 * Mark both entries as shared.
12495 */
5ba3f43e 12496
7ddcb079 12497 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
0a7de745 12498 * map or descendants */
f427ee49 12499 vm_map_entry_copy(old_map, new_entry, old_entry);
1c79356b
A
12500 old_entry->is_shared = TRUE;
12501 new_entry->is_shared = TRUE;
39037602 12502
a39ff7e2
A
12503 /*
12504 * We're dealing with a shared mapping, so the resulting mapping
12505 * should inherit some of the original mapping's accounting settings.
12506 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12507 * "use_pmap" should stay the same as before (if it hasn't been reset
12508 * to TRUE when we cleared "iokit_acct").
12509 */
12510 assert(!new_entry->iokit_acct);
12511
39037602
A
12512 /*
12513 * If old entry's inheritence is VM_INHERIT_NONE,
12514 * the new entry is for corpse fork, remove the
12515 * write permission from the new entry.
12516 */
12517 if (old_entry->inheritance == VM_INHERIT_NONE) {
39037602
A
12518 new_entry->protection &= ~VM_PROT_WRITE;
12519 new_entry->max_protection &= ~VM_PROT_WRITE;
12520 }
5ba3f43e 12521
1c79356b
A
12522 /*
12523 * Insert the entry into the new map -- we
12524 * know we're inserting at the end of the new
12525 * map.
12526 */
5ba3f43e 12527
d9a64523 12528 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
0a7de745 12529 VM_MAP_KERNEL_FLAGS_NONE);
5ba3f43e 12530
1c79356b
A
12531 /*
12532 * Update the physical map
12533 */
5ba3f43e 12534
1c79356b
A
12535 if (old_entry->is_sub_map) {
12536 /* Bill Angell pmap support goes here */
12537 } else {
12538 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
0a7de745
A
12539 old_entry->vme_end - old_entry->vme_start,
12540 old_entry->vme_start);
1c79356b
A
12541 }
12542}
12543
91447636 12544static boolean_t
1c79356b 12545vm_map_fork_copy(
0a7de745
A
12546 vm_map_t old_map,
12547 vm_map_entry_t *old_entry_p,
12548 vm_map_t new_map,
12549 int vm_map_copyin_flags)
1c79356b
A
12550{
12551 vm_map_entry_t old_entry = *old_entry_p;
91447636
A
12552 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12553 vm_map_offset_t start = old_entry->vme_start;
1c79356b
A
12554 vm_map_copy_t copy;
12555 vm_map_entry_t last = vm_map_last_entry(new_map);
12556
12557 vm_map_unlock(old_map);
12558 /*
12559 * Use maxprot version of copyin because we
12560 * care about whether this memory can ever
12561 * be accessed, not just whether it's accessible
12562 * right now.
12563 */
39037602
A
12564 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12565 if (vm_map_copyin_internal(old_map, start, entry_size,
0a7de745 12566 vm_map_copyin_flags, &copy)
1c79356b
A
12567 != KERN_SUCCESS) {
12568 /*
12569 * The map might have changed while it
12570 * was unlocked, check it again. Skip
12571 * any blank space or permanently
12572 * unreadable region.
12573 */
12574 vm_map_lock(old_map);
12575 if (!vm_map_lookup_entry(old_map, start, &last) ||
55e303ae 12576 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
1c79356b
A
12577 last = last->vme_next;
12578 }
12579 *old_entry_p = last;
12580
12581 /*
12582 * XXX For some error returns, want to
12583 * XXX skip to the next element. Note
12584 * that INVALID_ADDRESS and
12585 * PROTECTION_FAILURE are handled above.
12586 */
5ba3f43e 12587
1c79356b
A
12588 return FALSE;
12589 }
5ba3f43e 12590
1c79356b 12591 /*
f427ee49
A
12592 * Assert that the vm_map_copy is coming from the right
12593 * zone and hasn't been forged
1c79356b 12594 */
f427ee49 12595 vm_map_copy_require(copy);
5ba3f43e 12596
f427ee49
A
12597 /*
12598 * Insert the copy into the new map
12599 */
1c79356b 12600 vm_map_copy_insert(new_map, last, copy);
5ba3f43e 12601
1c79356b
A
12602 /*
12603 * Pick up the traversal at the end of
12604 * the copied region.
12605 */
5ba3f43e 12606
1c79356b
A
12607 vm_map_lock(old_map);
12608 start += entry_size;
0a7de745 12609 if (!vm_map_lookup_entry(old_map, start, &last)) {
1c79356b
A
12610 last = last->vme_next;
12611 } else {
2d21ac55
A
12612 if (last->vme_start == start) {
12613 /*
12614 * No need to clip here and we don't
12615 * want to cause any unnecessary
12616 * unnesting...
12617 */
12618 } else {
12619 vm_map_clip_start(old_map, last, start);
12620 }
1c79356b
A
12621 }
12622 *old_entry_p = last;
12623
12624 return TRUE;
12625}
12626
12627/*
12628 * vm_map_fork:
12629 *
12630 * Create and return a new map based on the old
12631 * map, according to the inheritance values on the
39037602 12632 * regions in that map and the options.
1c79356b
A
12633 *
12634 * The source map must not be locked.
12635 */
12636vm_map_t
12637vm_map_fork(
0a7de745
A
12638 ledger_t ledger,
12639 vm_map_t old_map,
12640 int options)
1c79356b 12641{
0a7de745
A
12642 pmap_t new_pmap;
12643 vm_map_t new_map;
12644 vm_map_entry_t old_entry;
12645 vm_map_size_t new_size = 0, entry_size;
12646 vm_map_entry_t new_entry;
12647 boolean_t src_needs_copy;
12648 boolean_t new_entry_needs_copy;
12649 boolean_t pmap_is64bit;
12650 int vm_map_copyin_flags;
12651 vm_inherit_t old_entry_inheritance;
12652 int map_create_options;
12653 kern_return_t footprint_collect_kr;
39037602
A
12654
12655 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
0a7de745
A
12656 VM_MAP_FORK_PRESERVE_PURGEABLE |
12657 VM_MAP_FORK_CORPSE_FOOTPRINT)) {
39037602
A
12658 /* unsupported option */
12659 return VM_MAP_NULL;
12660 }
1c79356b 12661
3e170ce0 12662 pmap_is64bit =
b0d623f7 12663#if defined(__i386__) || defined(__x86_64__)
0a7de745 12664 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
5ba3f43e 12665#elif defined(__arm64__)
0a7de745 12666 old_map->pmap->max == MACH_VM_MAX_ADDRESS;
5ba3f43e 12667#elif defined(__arm__)
0a7de745 12668 FALSE;
b0d623f7 12669#else
316670eb 12670#error Unknown architecture.
b0d623f7 12671#endif
3e170ce0 12672
cb323159
A
12673 unsigned int pmap_flags = 0;
12674 pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
12675#if defined(HAS_APPLE_PAC)
12676 pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
12677#endif
f427ee49
A
12678#if PMAP_CREATE_FORCE_4K_PAGES
12679 if (VM_MAP_PAGE_SIZE(old_map) == FOURK_PAGE_SIZE &&
12680 PAGE_SIZE != FOURK_PAGE_SIZE) {
12681 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
12682 }
12683#endif /* PMAP_CREATE_FORCE_4K_PAGES */
cb323159 12684 new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
2d21ac55 12685
c3c9b80d 12686 vm_map_reference(old_map);
1c79356b
A
12687 vm_map_lock(old_map);
12688
d9a64523
A
12689 map_create_options = 0;
12690 if (old_map->hdr.entries_pageable) {
12691 map_create_options |= VM_MAP_CREATE_PAGEABLE;
12692 }
12693 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12694 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12695 footprint_collect_kr = KERN_SUCCESS;
12696 }
12697 new_map = vm_map_create_options(new_pmap,
0a7de745
A
12698 old_map->min_offset,
12699 old_map->max_offset,
12700 map_create_options);
f427ee49
A
12701 /* inherit cs_enforcement */
12702 vm_map_cs_enforcement_set(new_map, old_map->cs_enforcement);
5ba3f43e 12703 vm_map_lock(new_map);
39037602 12704 vm_commit_pagezero_status(new_map);
39236c6e
A
12705 /* inherit the parent map's page size */
12706 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
1c79356b 12707 for (
2d21ac55
A
12708 old_entry = vm_map_first_entry(old_map);
12709 old_entry != vm_map_to_entry(old_map);
12710 ) {
1c79356b
A
12711 entry_size = old_entry->vme_end - old_entry->vme_start;
12712
d9a64523
A
12713 old_entry_inheritance = old_entry->inheritance;
12714 /*
12715 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12716 * share VM_INHERIT_NONE entries that are not backed by a
12717 * device pager.
12718 */
12719 if (old_entry_inheritance == VM_INHERIT_NONE &&
12720 (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
f427ee49 12721 (old_entry->protection & VM_PROT_READ) &&
d9a64523 12722 !(!old_entry->is_sub_map &&
0a7de745
A
12723 VME_OBJECT(old_entry) != NULL &&
12724 VME_OBJECT(old_entry)->pager != NULL &&
12725 is_device_pager_ops(
12726 VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
d9a64523
A
12727 old_entry_inheritance = VM_INHERIT_SHARE;
12728 }
12729
12730 if (old_entry_inheritance != VM_INHERIT_NONE &&
12731 (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12732 footprint_collect_kr == KERN_SUCCESS) {
39037602 12733 /*
d9a64523
A
12734 * The corpse won't have old_map->pmap to query
12735 * footprint information, so collect that data now
12736 * and store it in new_map->vmmap_corpse_footprint
12737 * for later autopsy.
39037602 12738 */
d9a64523 12739 footprint_collect_kr =
0a7de745
A
12740 vm_map_corpse_footprint_collect(old_map,
12741 old_entry,
12742 new_map);
d9a64523
A
12743 }
12744
12745 switch (old_entry_inheritance) {
12746 case VM_INHERIT_NONE:
12747 break;
1c79356b
A
12748
12749 case VM_INHERIT_SHARE:
12750 vm_map_fork_share(old_map, old_entry, new_map);
12751 new_size += entry_size;
12752 break;
12753
12754 case VM_INHERIT_COPY:
12755
12756 /*
12757 * Inline the copy_quickly case;
12758 * upon failure, fall back on call
12759 * to vm_map_fork_copy.
12760 */
12761
0a7de745 12762 if (old_entry->is_sub_map) {
1c79356b 12763 break;
0a7de745 12764 }
9bccf70c 12765 if ((old_entry->wired_count != 0) ||
3e170ce0 12766 ((VME_OBJECT(old_entry) != NULL) &&
0a7de745 12767 (VME_OBJECT(old_entry)->true_share))) {
1c79356b
A
12768 goto slow_vm_map_fork_copy;
12769 }
12770
7ddcb079 12771 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
f427ee49
A
12772 vm_map_entry_copy(old_map, new_entry, old_entry);
12773
12774 if (new_entry->used_for_jit == TRUE && new_map->jit_entry_exists == FALSE) {
12775 new_map->jit_entry_exists = TRUE;
12776 }
12777
fe8ab488
A
12778 if (new_entry->is_sub_map) {
12779 /* clear address space specifics */
12780 new_entry->use_pmap = FALSE;
a39ff7e2
A
12781 } else {
12782 /*
12783 * We're dealing with a copy-on-write operation,
12784 * so the resulting mapping should not inherit
12785 * the original mapping's accounting settings.
12786 * "iokit_acct" should have been cleared in
12787 * vm_map_entry_copy().
12788 * "use_pmap" should be reset to its default
12789 * (TRUE) so that the new mapping gets
12790 * accounted for in the task's memory footprint.
12791 */
12792 assert(!new_entry->iokit_acct);
12793 new_entry->use_pmap = TRUE;
fe8ab488 12794 }
1c79356b 12795
0a7de745 12796 if (!vm_object_copy_quickly(
cb323159 12797 VME_OBJECT_PTR(new_entry),
3e170ce0 12798 VME_OFFSET(old_entry),
2d21ac55 12799 (old_entry->vme_end -
0a7de745 12800 old_entry->vme_start),
2d21ac55
A
12801 &src_needs_copy,
12802 &new_entry_needs_copy)) {
1c79356b
A
12803 vm_map_entry_dispose(new_map, new_entry);
12804 goto slow_vm_map_fork_copy;
12805 }
12806
12807 /*
12808 * Handle copy-on-write obligations
12809 */
5ba3f43e 12810
1c79356b 12811 if (src_needs_copy && !old_entry->needs_copy) {
0a7de745 12812 vm_prot_t prot;
0c530ab8 12813
f427ee49 12814 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
5ba3f43e 12815
0c530ab8 12816 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55 12817
3e170ce0 12818 if (override_nx(old_map, VME_ALIAS(old_entry))
0a7de745
A
12819 && prot) {
12820 prot |= VM_PROT_EXECUTE;
12821 }
2d21ac55 12822
f427ee49 12823 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
5ba3f43e 12824
1c79356b 12825 vm_object_pmap_protect(
3e170ce0
A
12826 VME_OBJECT(old_entry),
12827 VME_OFFSET(old_entry),
1c79356b 12828 (old_entry->vme_end -
0a7de745 12829 old_entry->vme_start),
5ba3f43e 12830 ((old_entry->is_shared
0a7de745
A
12831 || old_map->mapped_in_other_pmaps)
12832 ? PMAP_NULL :
12833 old_map->pmap),
f427ee49 12834 VM_MAP_PAGE_SIZE(old_map),
1c79356b 12835 old_entry->vme_start,
0c530ab8 12836 prot);
1c79356b 12837
3e170ce0 12838 assert(old_entry->wired_count == 0);
1c79356b
A
12839 old_entry->needs_copy = TRUE;
12840 }
12841 new_entry->needs_copy = new_entry_needs_copy;
5ba3f43e 12842
1c79356b
A
12843 /*
12844 * Insert the entry at the end
12845 * of the map.
12846 */
5ba3f43e 12847
d9a64523 12848 vm_map_store_entry_link(new_map,
0a7de745
A
12849 vm_map_last_entry(new_map),
12850 new_entry,
12851 VM_MAP_KERNEL_FLAGS_NONE);
1c79356b
A
12852 new_size += entry_size;
12853 break;
12854
0a7de745 12855slow_vm_map_fork_copy:
39037602
A
12856 vm_map_copyin_flags = 0;
12857 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
12858 vm_map_copyin_flags |=
0a7de745 12859 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
39037602
A
12860 }
12861 if (vm_map_fork_copy(old_map,
0a7de745
A
12862 &old_entry,
12863 new_map,
12864 vm_map_copyin_flags)) {
1c79356b
A
12865 new_size += entry_size;
12866 }
12867 continue;
12868 }
12869 old_entry = old_entry->vme_next;
12870 }
12871
5ba3f43e
A
12872#if defined(__arm64__)
12873 pmap_insert_sharedpage(new_map->pmap);
f427ee49 12874#endif /* __arm64__ */
fe8ab488 12875
1c79356b 12876 new_map->size = new_size;
d9a64523
A
12877
12878 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12879 vm_map_corpse_footprint_collect_done(new_map);
12880 }
12881
f427ee49
A
12882 /* Propagate JIT entitlement for the pmap layer. */
12883 if (pmap_get_jit_entitled(old_map->pmap)) {
12884 /* Tell the pmap that it supports JIT. */
12885 pmap_set_jit_entitled(new_map->pmap);
12886 }
12887
5ba3f43e 12888 vm_map_unlock(new_map);
1c79356b
A
12889 vm_map_unlock(old_map);
12890 vm_map_deallocate(old_map);
12891
0a7de745 12892 return new_map;
1c79356b
A
12893}
12894
2d21ac55
A
12895/*
12896 * vm_map_exec:
12897 *
0a7de745 12898 * Setup the "new_map" with the proper execution environment according
2d21ac55
A
12899 * to the type of executable (platform, 64bit, chroot environment).
12900 * Map the comm page and shared region, etc...
12901 */
12902kern_return_t
12903vm_map_exec(
0a7de745
A
12904 vm_map_t new_map,
12905 task_t task,
12906 boolean_t is64bit,
12907 void *fsroot,
12908 cpu_type_t cpu,
f427ee49
A
12909 cpu_subtype_t cpu_subtype,
12910 boolean_t reslide)
2d21ac55
A
12911{
12912 SHARED_REGION_TRACE_DEBUG(
d9a64523 12913 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
0a7de745
A
12914 (void *)VM_KERNEL_ADDRPERM(current_task()),
12915 (void *)VM_KERNEL_ADDRPERM(new_map),
12916 (void *)VM_KERNEL_ADDRPERM(task),
12917 (void *)VM_KERNEL_ADDRPERM(fsroot),
12918 cpu,
12919 cpu_subtype));
39037602 12920 (void) vm_commpage_enter(new_map, task, is64bit);
f427ee49
A
12921
12922 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype, reslide);
12923
2d21ac55 12924 SHARED_REGION_TRACE_DEBUG(
d9a64523 12925 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
0a7de745
A
12926 (void *)VM_KERNEL_ADDRPERM(current_task()),
12927 (void *)VM_KERNEL_ADDRPERM(new_map),
12928 (void *)VM_KERNEL_ADDRPERM(task),
12929 (void *)VM_KERNEL_ADDRPERM(fsroot),
12930 cpu,
12931 cpu_subtype));
f427ee49
A
12932
12933 /*
12934 * Some devices have region(s) of memory that shouldn't get allocated by
12935 * user processes. The following code creates dummy vm_map_entry_t's for each
12936 * of the regions that needs to be reserved to prevent any allocations in
12937 * those regions.
12938 */
12939 kern_return_t kr = KERN_FAILURE;
12940 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
12941 vmk_flags.vmkf_permanent = TRUE;
12942 vmk_flags.vmkf_beyond_max = TRUE;
12943
12944 struct vm_reserved_region *regions = NULL;
12945 size_t num_regions = ml_get_vm_reserved_regions(is64bit, &regions);
12946 assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
12947
12948 for (size_t i = 0; i < num_regions; ++i) {
12949 kr = vm_map_enter(
12950 new_map,
12951 &regions[i].vmrr_addr,
12952 regions[i].vmrr_size,
12953 (vm_map_offset_t)0,
12954 VM_FLAGS_FIXED,
12955 vmk_flags,
12956 VM_KERN_MEMORY_NONE,
12957 VM_OBJECT_NULL,
12958 (vm_object_offset_t)0,
12959 FALSE,
12960 VM_PROT_NONE,
12961 VM_PROT_NONE,
12962 VM_INHERIT_NONE);
12963
12964 if (kr != KERN_SUCCESS) {
12965 panic("Failed to reserve %s region in user map %p %d", regions[i].vmrr_name, new_map, kr);
12966 }
12967 }
12968
12969 new_map->reserved_regions = (num_regions ? TRUE : FALSE);
12970
2d21ac55
A
12971 return KERN_SUCCESS;
12972}
1c79356b 12973
c3c9b80d
A
12974uint64_t vm_map_lookup_locked_copy_slowly_count = 0;
12975uint64_t vm_map_lookup_locked_copy_slowly_size = 0;
12976uint64_t vm_map_lookup_locked_copy_slowly_max = 0;
12977uint64_t vm_map_lookup_locked_copy_slowly_restart = 0;
12978uint64_t vm_map_lookup_locked_copy_slowly_error = 0;
12979uint64_t vm_map_lookup_locked_copy_strategically_count = 0;
12980uint64_t vm_map_lookup_locked_copy_strategically_size = 0;
12981uint64_t vm_map_lookup_locked_copy_strategically_max = 0;
12982uint64_t vm_map_lookup_locked_copy_strategically_restart = 0;
12983uint64_t vm_map_lookup_locked_copy_strategically_error = 0;
12984uint64_t vm_map_lookup_locked_copy_shadow_count = 0;
12985uint64_t vm_map_lookup_locked_copy_shadow_size = 0;
12986uint64_t vm_map_lookup_locked_copy_shadow_max = 0;
1c79356b
A
12987/*
12988 * vm_map_lookup_locked:
12989 *
12990 * Finds the VM object, offset, and
12991 * protection for a given virtual address in the
12992 * specified map, assuming a page fault of the
12993 * type specified.
12994 *
12995 * Returns the (object, offset, protection) for
12996 * this address, whether it is wired down, and whether
12997 * this map has the only reference to the data in question.
12998 * In order to later verify this lookup, a "version"
12999 * is returned.
f427ee49
A
13000 * If contended != NULL, *contended will be set to
13001 * true iff the thread had to spin or block to acquire
13002 * an exclusive lock.
1c79356b
A
13003 *
13004 * The map MUST be locked by the caller and WILL be
13005 * locked on exit. In order to guarantee the
13006 * existence of the returned object, it is returned
13007 * locked.
13008 *
13009 * If a lookup is requested with "write protection"
13010 * specified, the map may be changed to perform virtual
13011 * copying operations, although the data referenced will
13012 * remain the same.
13013 */
13014kern_return_t
13015vm_map_lookup_locked(
0a7de745
A
13016 vm_map_t *var_map, /* IN/OUT */
13017 vm_map_offset_t vaddr,
13018 vm_prot_t fault_type,
13019 int object_lock_type,
13020 vm_map_version_t *out_version, /* OUT */
13021 vm_object_t *object, /* OUT */
13022 vm_object_offset_t *offset, /* OUT */
13023 vm_prot_t *out_prot, /* OUT */
13024 boolean_t *wired, /* OUT */
13025 vm_object_fault_info_t fault_info, /* OUT */
f427ee49
A
13026 vm_map_t *real_map, /* OUT */
13027 bool *contended) /* OUT */
1c79356b 13028{
0a7de745
A
13029 vm_map_entry_t entry;
13030 vm_map_t map = *var_map;
13031 vm_map_t old_map = *var_map;
13032 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
13033 vm_map_offset_t cow_parent_vaddr = 0;
13034 vm_map_offset_t old_start = 0;
13035 vm_map_offset_t old_end = 0;
13036 vm_prot_t prot;
13037 boolean_t mask_protections;
13038 boolean_t force_copy;
f427ee49 13039 boolean_t no_force_copy_if_executable;
c3c9b80d 13040 boolean_t submap_needed_copy;
0a7de745 13041 vm_prot_t original_fault_type;
f427ee49 13042 vm_map_size_t fault_page_mask;
6d2010ae
A
13043
13044 /*
13045 * VM_PROT_MASK means that the caller wants us to use "fault_type"
13046 * as a mask against the mapping's actual protections, not as an
13047 * absolute value.
13048 */
13049 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
fe8ab488 13050 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
f427ee49 13051 no_force_copy_if_executable = (fault_type & VM_PROT_COPY_FAIL_IF_EXECUTABLE) ? TRUE : FALSE;
fe8ab488 13052 fault_type &= VM_PROT_ALL;
6d2010ae 13053 original_fault_type = fault_type;
f427ee49
A
13054 if (contended) {
13055 *contended = false;
13056 }
1c79356b 13057
91447636 13058 *real_map = map;
6d2010ae 13059
f427ee49
A
13060 fault_page_mask = MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK);
13061 vaddr = VM_MAP_TRUNC_PAGE(vaddr, fault_page_mask);
13062
6d2010ae
A
13063RetryLookup:
13064 fault_type = original_fault_type;
1c79356b
A
13065
13066 /*
13067 * If the map has an interesting hint, try it before calling
13068 * full blown lookup routine.
13069 */
1c79356b 13070 entry = map->hint;
1c79356b
A
13071
13072 if ((entry == vm_map_to_entry(map)) ||
13073 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
0a7de745 13074 vm_map_entry_t tmp_entry;
1c79356b
A
13075
13076 /*
13077 * Entry was either not a valid hint, or the vaddr
13078 * was not contained in the entry, so do a full lookup.
13079 */
13080 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
0a7de745 13081 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
1c79356b 13082 vm_map_unlock(cow_sub_map_parent);
0a7de745
A
13083 }
13084 if ((*real_map != map)
13085 && (*real_map != cow_sub_map_parent)) {
91447636 13086 vm_map_unlock(*real_map);
0a7de745 13087 }
1c79356b
A
13088 return KERN_INVALID_ADDRESS;
13089 }
13090
13091 entry = tmp_entry;
13092 }
0a7de745 13093 if (map == old_map) {
1c79356b
A
13094 old_start = entry->vme_start;
13095 old_end = entry->vme_end;
13096 }
13097
13098 /*
13099 * Handle submaps. Drop lock on upper map, submap is
13100 * returned locked.
13101 */
13102
c3c9b80d 13103 submap_needed_copy = FALSE;
1c79356b
A
13104submap_recurse:
13105 if (entry->is_sub_map) {
0a7de745
A
13106 vm_map_offset_t local_vaddr;
13107 vm_map_offset_t end_delta;
13108 vm_map_offset_t start_delta;
f427ee49
A
13109 vm_map_entry_t submap_entry, saved_submap_entry;
13110 vm_object_offset_t submap_entry_offset;
13111 vm_object_size_t submap_entry_size;
0a7de745
A
13112 vm_prot_t subentry_protection;
13113 vm_prot_t subentry_max_protection;
cb323159 13114 boolean_t subentry_no_copy_on_read;
0a7de745 13115 boolean_t mapped_needs_copy = FALSE;
f427ee49
A
13116 vm_map_version_t version;
13117
13118 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
13119 "map %p (%d) entry %p submap %p (%d)\n",
13120 map, VM_MAP_PAGE_SHIFT(map), entry,
13121 VME_SUBMAP(entry), VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
1c79356b
A
13122
13123 local_vaddr = vaddr;
13124
39037602 13125 if ((entry->use_pmap &&
0a7de745
A
13126 !((fault_type & VM_PROT_WRITE) ||
13127 force_copy))) {
91447636 13128 /* if real_map equals map we unlock below */
5ba3f43e 13129 if ((*real_map != map) &&
0a7de745 13130 (*real_map != cow_sub_map_parent)) {
91447636 13131 vm_map_unlock(*real_map);
0a7de745 13132 }
3e170ce0 13133 *real_map = VME_SUBMAP(entry);
1c79356b
A
13134 }
13135
0a7de745
A
13136 if (entry->needs_copy &&
13137 ((fault_type & VM_PROT_WRITE) ||
39037602 13138 force_copy)) {
1c79356b
A
13139 if (!mapped_needs_copy) {
13140 if (vm_map_lock_read_to_write(map)) {
13141 vm_map_lock_read(map);
99c3a104 13142 *real_map = map;
1c79356b
A
13143 goto RetryLookup;
13144 }
3e170ce0
A
13145 vm_map_lock_read(VME_SUBMAP(entry));
13146 *var_map = VME_SUBMAP(entry);
1c79356b
A
13147 cow_sub_map_parent = map;
13148 /* reset base to map before cow object */
13149 /* this is the map which will accept */
13150 /* the new cow object */
13151 old_start = entry->vme_start;
13152 old_end = entry->vme_end;
13153 cow_parent_vaddr = vaddr;
13154 mapped_needs_copy = TRUE;
13155 } else {
3e170ce0
A
13156 vm_map_lock_read(VME_SUBMAP(entry));
13157 *var_map = VME_SUBMAP(entry);
0a7de745
A
13158 if ((cow_sub_map_parent != map) &&
13159 (*real_map != map)) {
1c79356b 13160 vm_map_unlock(map);
0a7de745 13161 }
1c79356b
A
13162 }
13163 } else {
c3c9b80d
A
13164 if (entry->needs_copy) {
13165 submap_needed_copy = TRUE;
13166 }
3e170ce0 13167 vm_map_lock_read(VME_SUBMAP(entry));
5ba3f43e 13168 *var_map = VME_SUBMAP(entry);
1c79356b
A
13169 /* leave map locked if it is a target */
13170 /* cow sub_map above otherwise, just */
13171 /* follow the maps down to the object */
13172 /* here we unlock knowing we are not */
13173 /* revisiting the map. */
0a7de745 13174 if ((*real_map != map) && (map != cow_sub_map_parent)) {
1c79356b 13175 vm_map_unlock_read(map);
0a7de745 13176 }
1c79356b
A
13177 }
13178
99c3a104 13179 map = *var_map;
1c79356b
A
13180
13181 /* calculate the offset in the submap for vaddr */
3e170ce0 13182 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
f427ee49
A
13183 assertf(VM_MAP_PAGE_ALIGNED(local_vaddr, fault_page_mask),
13184 "local_vaddr 0x%llx entry->vme_start 0x%llx fault_page_mask 0x%llx\n",
13185 (uint64_t)local_vaddr, (uint64_t)entry->vme_start, (uint64_t)fault_page_mask);
1c79356b 13186
0a7de745
A
13187RetrySubMap:
13188 if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
13189 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
1c79356b
A
13190 vm_map_unlock(cow_sub_map_parent);
13191 }
0a7de745
A
13192 if ((*real_map != map)
13193 && (*real_map != cow_sub_map_parent)) {
91447636 13194 vm_map_unlock(*real_map);
1c79356b 13195 }
91447636 13196 *real_map = map;
1c79356b
A
13197 return KERN_INVALID_ADDRESS;
13198 }
2d21ac55 13199
1c79356b
A
13200 /* find the attenuated shadow of the underlying object */
13201 /* on our target map */
13202
13203 /* in english the submap object may extend beyond the */
13204 /* region mapped by the entry or, may only fill a portion */
13205 /* of it. For our purposes, we only care if the object */
13206 /* doesn't fill. In this case the area which will */
13207 /* ultimately be clipped in the top map will only need */
13208 /* to be as big as the portion of the underlying entry */
13209 /* which is mapped */
3e170ce0 13210 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
0a7de745 13211 submap_entry->vme_start - VME_OFFSET(entry) : 0;
1c79356b 13212
5ba3f43e 13213 end_delta =
0a7de745
A
13214 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
13215 submap_entry->vme_end ?
13216 0 : (VME_OFFSET(entry) +
13217 (old_end - old_start))
13218 - submap_entry->vme_end;
1c79356b
A
13219
13220 old_start += start_delta;
13221 old_end -= end_delta;
13222
0a7de745 13223 if (submap_entry->is_sub_map) {
1c79356b
A
13224 entry = submap_entry;
13225 vaddr = local_vaddr;
13226 goto submap_recurse;
13227 }
13228
39037602 13229 if (((fault_type & VM_PROT_WRITE) ||
0a7de745 13230 force_copy)
39037602 13231 && cow_sub_map_parent) {
0a7de745 13232 vm_object_t sub_object, copy_object;
2d21ac55 13233 vm_object_offset_t copy_offset;
0a7de745
A
13234 vm_map_offset_t local_start;
13235 vm_map_offset_t local_end;
c3c9b80d
A
13236 boolean_t object_copied = FALSE;
13237 vm_object_offset_t object_copied_offset = 0;
13238 boolean_t object_copied_needs_copy = FALSE;
f427ee49 13239 kern_return_t kr = KERN_SUCCESS;
1c79356b
A
13240
13241 if (vm_map_lock_read_to_write(map)) {
13242 vm_map_lock_read(map);
13243 old_start -= start_delta;
13244 old_end += end_delta;
13245 goto RetrySubMap;
13246 }
0b4e3aa0
A
13247
13248
3e170ce0 13249 sub_object = VME_OBJECT(submap_entry);
2d21ac55
A
13250 if (sub_object == VM_OBJECT_NULL) {
13251 sub_object =
0a7de745
A
13252 vm_object_allocate(
13253 (vm_map_size_t)
13254 (submap_entry->vme_end -
13255 submap_entry->vme_start));
3e170ce0
A
13256 VME_OBJECT_SET(submap_entry, sub_object);
13257 VME_OFFSET_SET(submap_entry, 0);
a39ff7e2
A
13258 assert(!submap_entry->is_sub_map);
13259 assert(submap_entry->use_pmap);
1c79356b 13260 }
5ba3f43e 13261 local_start = local_vaddr -
0a7de745 13262 (cow_parent_vaddr - old_start);
5ba3f43e 13263 local_end = local_vaddr +
0a7de745 13264 (old_end - cow_parent_vaddr);
1c79356b
A
13265 vm_map_clip_start(map, submap_entry, local_start);
13266 vm_map_clip_end(map, submap_entry, local_end);
fe8ab488
A
13267 if (submap_entry->is_sub_map) {
13268 /* unnesting was done when clipping */
13269 assert(!submap_entry->use_pmap);
13270 }
1c79356b
A
13271
13272 /* This is the COW case, lets connect */
13273 /* an entry in our space to the underlying */
13274 /* object in the submap, bypassing the */
13275 /* submap. */
c3c9b80d
A
13276 submap_entry_offset = VME_OFFSET(submap_entry);
13277 submap_entry_size = submap_entry->vme_end - submap_entry->vme_start;
13278
13279 if ((submap_entry->wired_count != 0 ||
13280 sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) &&
13281 (submap_entry->protection & VM_PROT_EXECUTE) &&
13282 no_force_copy_if_executable) {
13283// printf("FBDP map %p entry %p start 0x%llx end 0x%llx wired %d strat %d\n", map, submap_entry, (uint64_t)local_start, (uint64_t)local_end, submap_entry->wired_count, sub_object->copy_strategy);
13284 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13285 vm_map_unlock(cow_sub_map_parent);
13286 }
13287 if ((*real_map != map)
13288 && (*real_map != cow_sub_map_parent)) {
13289 vm_map_unlock(*real_map);
f427ee49 13290 }
c3c9b80d
A
13291 *real_map = map;
13292 vm_map_lock_write_to_read(map);
13293 kr = KERN_PROTECTION_FAILURE;
13294 DTRACE_VM4(submap_no_copy_executable,
13295 vm_map_t, map,
13296 vm_object_offset_t, submap_entry_offset,
13297 vm_object_size_t, submap_entry_size,
13298 int, kr);
13299 return kr;
13300 }
f427ee49 13301
c3c9b80d 13302 if (submap_entry->wired_count != 0) {
f427ee49
A
13303 vm_object_reference(sub_object);
13304
13305 assertf(VM_MAP_PAGE_ALIGNED(VME_OFFSET(submap_entry), VM_MAP_PAGE_MASK(map)),
13306 "submap_entry %p offset 0x%llx\n",
13307 submap_entry, VME_OFFSET(submap_entry));
f427ee49
A
13308
13309 DTRACE_VM6(submap_copy_slowly,
13310 vm_map_t, cow_sub_map_parent,
13311 vm_map_offset_t, vaddr,
13312 vm_map_t, map,
13313 vm_object_size_t, submap_entry_size,
13314 int, submap_entry->wired_count,
13315 int, sub_object->copy_strategy);
13316
13317 saved_submap_entry = submap_entry;
13318 version.main_timestamp = map->timestamp;
13319 vm_map_unlock(map); /* Increments timestamp by 1 */
13320 submap_entry = VM_MAP_ENTRY_NULL;
13321
2d21ac55 13322 vm_object_lock(sub_object);
f427ee49
A
13323 kr = vm_object_copy_slowly(sub_object,
13324 submap_entry_offset,
13325 submap_entry_size,
0a7de745
A
13326 FALSE,
13327 &copy_object);
c3c9b80d
A
13328 object_copied = TRUE;
13329 object_copied_offset = 0;
f427ee49 13330 /* 4k: account for extra offset in physical page */
c3c9b80d
A
13331 object_copied_offset += submap_entry_offset - vm_object_trunc_page(submap_entry_offset);
13332 object_copied_needs_copy = FALSE;
f427ee49
A
13333 vm_object_deallocate(sub_object);
13334
13335 vm_map_lock(map);
13336
13337 if (kr != KERN_SUCCESS &&
13338 kr != KERN_MEMORY_RESTART_COPY) {
13339 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13340 vm_map_unlock(cow_sub_map_parent);
13341 }
13342 if ((*real_map != map)
13343 && (*real_map != cow_sub_map_parent)) {
13344 vm_map_unlock(*real_map);
13345 }
13346 *real_map = map;
13347 vm_object_deallocate(copy_object);
13348 copy_object = VM_OBJECT_NULL;
13349 vm_map_lock_write_to_read(map);
c3c9b80d 13350 DTRACE_VM4(submap_copy_error_slowly,
f427ee49
A
13351 vm_object_t, sub_object,
13352 vm_object_offset_t, submap_entry_offset,
13353 vm_object_size_t, submap_entry_size,
13354 int, kr);
c3c9b80d 13355 vm_map_lookup_locked_copy_slowly_error++;
f427ee49
A
13356 return kr;
13357 }
13358
13359 if ((kr == KERN_SUCCESS) &&
13360 (version.main_timestamp + 1) == map->timestamp) {
13361 submap_entry = saved_submap_entry;
13362 } else {
13363 saved_submap_entry = NULL;
13364 old_start -= start_delta;
13365 old_end += end_delta;
13366 vm_object_deallocate(copy_object);
13367 copy_object = VM_OBJECT_NULL;
13368 vm_map_lock_write_to_read(map);
c3c9b80d
A
13369 vm_map_lookup_locked_copy_slowly_restart++;
13370 goto RetrySubMap;
13371 }
13372 vm_map_lookup_locked_copy_slowly_count++;
13373 vm_map_lookup_locked_copy_slowly_size += submap_entry_size;
13374 if (submap_entry_size > vm_map_lookup_locked_copy_slowly_max) {
13375 vm_map_lookup_locked_copy_slowly_max = submap_entry_size;
13376 }
13377 } else if (sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
13378 submap_entry_offset = VME_OFFSET(submap_entry);
13379 copy_object = VM_OBJECT_NULL;
13380 object_copied_offset = submap_entry_offset;
13381 object_copied_needs_copy = FALSE;
13382 DTRACE_VM6(submap_copy_strategically,
13383 vm_map_t, cow_sub_map_parent,
13384 vm_map_offset_t, vaddr,
13385 vm_map_t, map,
13386 vm_object_size_t, submap_entry_size,
13387 int, submap_entry->wired_count,
13388 int, sub_object->copy_strategy);
13389 kr = vm_object_copy_strategically(
13390 sub_object,
13391 submap_entry_offset,
13392 submap_entry->vme_end - submap_entry->vme_start,
13393 &copy_object,
13394 &object_copied_offset,
13395 &object_copied_needs_copy);
13396 if (kr == KERN_MEMORY_RESTART_COPY) {
13397 old_start -= start_delta;
13398 old_end += end_delta;
13399 vm_object_deallocate(copy_object);
13400 copy_object = VM_OBJECT_NULL;
13401 vm_map_lock_write_to_read(map);
13402 vm_map_lookup_locked_copy_strategically_restart++;
f427ee49
A
13403 goto RetrySubMap;
13404 }
c3c9b80d
A
13405 if (kr != KERN_SUCCESS) {
13406 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13407 vm_map_unlock(cow_sub_map_parent);
13408 }
13409 if ((*real_map != map)
13410 && (*real_map != cow_sub_map_parent)) {
13411 vm_map_unlock(*real_map);
13412 }
13413 *real_map = map;
13414 vm_object_deallocate(copy_object);
13415 copy_object = VM_OBJECT_NULL;
13416 vm_map_lock_write_to_read(map);
13417 DTRACE_VM4(submap_copy_error_strategically,
13418 vm_object_t, sub_object,
13419 vm_object_offset_t, submap_entry_offset,
13420 vm_object_size_t, submap_entry_size,
13421 int, kr);
13422 vm_map_lookup_locked_copy_strategically_error++;
13423 return kr;
13424 }
13425 assert(copy_object != VM_OBJECT_NULL);
13426 assert(copy_object != sub_object);
13427 object_copied = TRUE;
13428 vm_map_lookup_locked_copy_strategically_count++;
13429 vm_map_lookup_locked_copy_strategically_size += submap_entry_size;
13430 if (submap_entry_size > vm_map_lookup_locked_copy_strategically_max) {
13431 vm_map_lookup_locked_copy_strategically_max = submap_entry_size;
13432 }
0b4e3aa0 13433 } else {
0b4e3aa0 13434 /* set up shadow object */
c3c9b80d 13435 object_copied = FALSE;
2d21ac55 13436 copy_object = sub_object;
39037602
A
13437 vm_object_lock(sub_object);
13438 vm_object_reference_locked(sub_object);
2d21ac55 13439 sub_object->shadowed = TRUE;
39037602
A
13440 vm_object_unlock(sub_object);
13441
3e170ce0 13442 assert(submap_entry->wired_count == 0);
0b4e3aa0 13443 submap_entry->needs_copy = TRUE;
0c530ab8 13444
5ba3f43e 13445 prot = submap_entry->protection;
f427ee49 13446 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
5ba3f43e 13447 prot = prot & ~VM_PROT_WRITE;
f427ee49 13448 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
2d21ac55 13449
3e170ce0 13450 if (override_nx(old_map,
0a7de745
A
13451 VME_ALIAS(submap_entry))
13452 && prot) {
13453 prot |= VM_PROT_EXECUTE;
13454 }
2d21ac55 13455
0b4e3aa0 13456 vm_object_pmap_protect(
2d21ac55 13457 sub_object,
3e170ce0 13458 VME_OFFSET(submap_entry),
5ba3f43e 13459 submap_entry->vme_end -
2d21ac55 13460 submap_entry->vme_start,
5ba3f43e 13461 (submap_entry->is_shared
0a7de745 13462 || map->mapped_in_other_pmaps) ?
2d21ac55 13463 PMAP_NULL : map->pmap,
f427ee49 13464 VM_MAP_PAGE_SIZE(map),
1c79356b 13465 submap_entry->vme_start,
0c530ab8 13466 prot);
c3c9b80d
A
13467 vm_map_lookup_locked_copy_shadow_count++;
13468 vm_map_lookup_locked_copy_shadow_size += submap_entry_size;
13469 if (submap_entry_size > vm_map_lookup_locked_copy_shadow_max) {
13470 vm_map_lookup_locked_copy_shadow_max = submap_entry_size;
13471 }
0b4e3aa0 13472 }
5ba3f43e 13473
2d21ac55
A
13474 /*
13475 * Adjust the fault offset to the submap entry.
13476 */
13477 copy_offset = (local_vaddr -
0a7de745
A
13478 submap_entry->vme_start +
13479 VME_OFFSET(submap_entry));
1c79356b
A
13480
13481 /* This works diffently than the */
13482 /* normal submap case. We go back */
13483 /* to the parent of the cow map and*/
13484 /* clip out the target portion of */
13485 /* the sub_map, substituting the */
13486 /* new copy object, */
13487
5ba3f43e
A
13488 subentry_protection = submap_entry->protection;
13489 subentry_max_protection = submap_entry->max_protection;
cb323159 13490 subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
1c79356b 13491 vm_map_unlock(map);
5ba3f43e
A
13492 submap_entry = NULL; /* not valid after map unlock */
13493
1c79356b
A
13494 local_start = old_start;
13495 local_end = old_end;
13496 map = cow_sub_map_parent;
13497 *var_map = cow_sub_map_parent;
13498 vaddr = cow_parent_vaddr;
13499 cow_sub_map_parent = NULL;
13500
0a7de745
A
13501 if (!vm_map_lookup_entry(map,
13502 vaddr, &entry)) {
f427ee49
A
13503 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13504 vm_map_unlock(cow_sub_map_parent);
13505 }
13506 if ((*real_map != map)
13507 && (*real_map != cow_sub_map_parent)) {
13508 vm_map_unlock(*real_map);
13509 }
13510 *real_map = map;
2d21ac55
A
13511 vm_object_deallocate(
13512 copy_object);
f427ee49 13513 copy_object = VM_OBJECT_NULL;
2d21ac55 13514 vm_map_lock_write_to_read(map);
f427ee49
A
13515 DTRACE_VM4(submap_lookup_post_unlock,
13516 uint64_t, (uint64_t)entry->vme_start,
13517 uint64_t, (uint64_t)entry->vme_end,
13518 vm_map_offset_t, vaddr,
c3c9b80d 13519 int, object_copied);
2d21ac55
A
13520 return KERN_INVALID_ADDRESS;
13521 }
5ba3f43e 13522
2d21ac55
A
13523 /* clip out the portion of space */
13524 /* mapped by the sub map which */
13525 /* corresponds to the underlying */
13526 /* object */
13527
13528 /*
13529 * Clip (and unnest) the smallest nested chunk
13530 * possible around the faulting address...
13531 */
f427ee49
A
13532 local_start = vaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
13533 local_end = local_start + pmap_shared_region_size_min(map->pmap);
2d21ac55
A
13534 /*
13535 * ... but don't go beyond the "old_start" to "old_end"
13536 * range, to avoid spanning over another VM region
13537 * with a possibly different VM object and/or offset.
13538 */
13539 if (local_start < old_start) {
13540 local_start = old_start;
13541 }
13542 if (local_end > old_end) {
13543 local_end = old_end;
13544 }
13545 /*
13546 * Adjust copy_offset to the start of the range.
13547 */
13548 copy_offset -= (vaddr - local_start);
13549
1c79356b
A
13550 vm_map_clip_start(map, entry, local_start);
13551 vm_map_clip_end(map, entry, local_end);
fe8ab488
A
13552 if (entry->is_sub_map) {
13553 /* unnesting was done when clipping */
13554 assert(!entry->use_pmap);
13555 }
1c79356b
A
13556
13557 /* substitute copy object for */
13558 /* shared map entry */
3e170ce0 13559 vm_map_deallocate(VME_SUBMAP(entry));
fe8ab488 13560 assert(!entry->iokit_acct);
1c79356b 13561 entry->is_sub_map = FALSE;
fe8ab488 13562 entry->use_pmap = TRUE;
3e170ce0 13563 VME_OBJECT_SET(entry, copy_object);
1c79356b 13564
2d21ac55 13565 /* propagate the submap entry's protections */
d9a64523
A
13566 if (entry->protection != VM_PROT_READ) {
13567 /*
13568 * Someone has already altered the top entry's
13569 * protections via vm_protect(VM_PROT_COPY).
13570 * Respect these new values and ignore the
13571 * submap entry's protections.
13572 */
13573 } else {
13574 /*
13575 * Regular copy-on-write: propagate the submap
13576 * entry's protections to the top map entry.
13577 */
13578 entry->protection |= subentry_protection;
13579 }
5ba3f43e 13580 entry->max_protection |= subentry_max_protection;
cb323159
A
13581 /* propagate no_copy_on_read */
13582 entry->vme_no_copy_on_read = subentry_no_copy_on_read;
5ba3f43e 13583
d9a64523
A
13584 if ((entry->protection & VM_PROT_WRITE) &&
13585 (entry->protection & VM_PROT_EXECUTE) &&
f427ee49
A
13586#if XNU_TARGET_OS_OSX
13587 map->pmap != kernel_pmap &&
13588 (vm_map_cs_enforcement(map)
13589#if __arm64__
13590 || !VM_MAP_IS_EXOTIC(map)
13591#endif /* __arm64__ */
13592 ) &&
13593#endif /* XNU_TARGET_OS_OSX */
f427ee49
A
13594 !(entry->used_for_jit) &&
13595 VM_MAP_POLICY_WX_STRIP_X(map)) {
d9a64523 13596 DTRACE_VM3(cs_wx,
0a7de745
A
13597 uint64_t, (uint64_t)entry->vme_start,
13598 uint64_t, (uint64_t)entry->vme_end,
13599 vm_prot_t, entry->protection);
d9a64523 13600 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
0a7de745
A
13601 proc_selfpid(),
13602 (current_task()->bsd_info
13603 ? proc_name_address(current_task()->bsd_info)
13604 : "?"),
13605 __FUNCTION__);
d9a64523 13606 entry->protection &= ~VM_PROT_EXECUTE;
5ba3f43e 13607 }
2d21ac55 13608
c3c9b80d
A
13609 if (object_copied) {
13610 VME_OFFSET_SET(entry, local_start - old_start + object_copied_offset);
13611 entry->needs_copy = object_copied_needs_copy;
0b4e3aa0
A
13612 entry->is_shared = FALSE;
13613 } else {
c3c9b80d
A
13614 assert(VME_OBJECT(entry) != VM_OBJECT_NULL);
13615 assert(VME_OBJECT(entry)->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
3e170ce0 13616 assert(entry->wired_count == 0);
c3c9b80d 13617 VME_OFFSET_SET(entry, copy_offset);
0b4e3aa0 13618 entry->needs_copy = TRUE;
0a7de745 13619 if (map != old_map) {
0b4e3aa0 13620 entry->is_shared = TRUE;
0a7de745 13621 }
0b4e3aa0 13622 }
0a7de745 13623 if (entry->inheritance == VM_INHERIT_SHARE) {
0b4e3aa0 13624 entry->inheritance = VM_INHERIT_COPY;
0a7de745 13625 }
1c79356b
A
13626
13627 vm_map_lock_write_to_read(map);
13628 } else {
0a7de745
A
13629 if ((cow_sub_map_parent)
13630 && (cow_sub_map_parent != *real_map)
13631 && (cow_sub_map_parent != map)) {
1c79356b
A
13632 vm_map_unlock(cow_sub_map_parent);
13633 }
13634 entry = submap_entry;
13635 vaddr = local_vaddr;
13636 }
13637 }
5ba3f43e 13638
1c79356b
A
13639 /*
13640 * Check whether this task is allowed to have
13641 * this page.
13642 */
2d21ac55 13643
6601e61a 13644 prot = entry->protection;
0c530ab8 13645
3e170ce0 13646 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
0a7de745 13647 /*
2d21ac55 13648 * HACK -- if not a stack, then allow execution
0c530ab8 13649 */
0a7de745 13650 prot |= VM_PROT_EXECUTE;
2d21ac55
A
13651 }
13652
6d2010ae
A
13653 if (mask_protections) {
13654 fault_type &= prot;
13655 if (fault_type == VM_PROT_NONE) {
13656 goto protection_failure;
13657 }
13658 }
39037602 13659 if (((fault_type & prot) != fault_type)
5ba3f43e
A
13660#if __arm64__
13661 /* prefetch abort in execute-only page */
13662 && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
13663#endif
39037602 13664 ) {
0a7de745 13665protection_failure:
2d21ac55
A
13666 if (*real_map != map) {
13667 vm_map_unlock(*real_map);
0c530ab8
A
13668 }
13669 *real_map = map;
13670
0a7de745
A
13671 if ((fault_type & VM_PROT_EXECUTE) && prot) {
13672 log_stack_execution_failure((addr64_t)vaddr, prot);
13673 }
0c530ab8 13674
2d21ac55 13675 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
0c530ab8 13676 return KERN_PROTECTION_FAILURE;
1c79356b
A
13677 }
13678
13679 /*
13680 * If this page is not pageable, we have to get
13681 * it for all possible accesses.
13682 */
13683
91447636 13684 *wired = (entry->wired_count != 0);
0a7de745
A
13685 if (*wired) {
13686 fault_type = prot;
13687 }
1c79356b
A
13688
13689 /*
13690 * If the entry was copy-on-write, we either ...
13691 */
13692
13693 if (entry->needs_copy) {
0a7de745 13694 /*
1c79356b
A
13695 * If we want to write the page, we may as well
13696 * handle that now since we've got the map locked.
13697 *
13698 * If we don't need to write the page, we just
13699 * demote the permissions allowed.
13700 */
13701
fe8ab488 13702 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
1c79356b
A
13703 /*
13704 * Make a new object, and place it in the
13705 * object chain. Note that no new references
13706 * have appeared -- one just moved from the
13707 * map to the new object.
13708 */
13709
13710 if (vm_map_lock_read_to_write(map)) {
13711 vm_map_lock_read(map);
13712 goto RetryLookup;
13713 }
39037602
A
13714
13715 if (VME_OBJECT(entry)->shadowed == FALSE) {
13716 vm_object_lock(VME_OBJECT(entry));
13717 VME_OBJECT(entry)->shadowed = TRUE;
13718 vm_object_unlock(VME_OBJECT(entry));
13719 }
3e170ce0 13720 VME_OBJECT_SHADOW(entry,
0a7de745
A
13721 (vm_map_size_t) (entry->vme_end -
13722 entry->vme_start));
1c79356b 13723 entry->needs_copy = FALSE;
39037602 13724
1c79356b
A
13725 vm_map_lock_write_to_read(map);
13726 }
39037602 13727 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
1c79356b
A
13728 /*
13729 * We're attempting to read a copy-on-write
13730 * page -- don't allow writes.
13731 */
13732
13733 prot &= (~VM_PROT_WRITE);
13734 }
13735 }
13736
c3c9b80d
A
13737 if (submap_needed_copy && (prot & VM_PROT_WRITE)) {
13738 /*
13739 * We went through a "needs_copy" submap without triggering
13740 * a copy, so granting write access to the page would bypass
13741 * that submap's "needs_copy".
13742 */
13743 assert(!(fault_type & VM_PROT_WRITE));
13744 assert(!*wired);
13745 assert(!force_copy);
13746 // printf("FBDP %d[%s] submap_needed_copy for %p 0x%llx\n", proc_selfpid(), proc_name_address(current_task()->bsd_info), map, vaddr);
13747 prot &= ~VM_PROT_WRITE;
13748 }
13749
1c79356b
A
13750 /*
13751 * Create an object if necessary.
13752 */
3e170ce0 13753 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
1c79356b
A
13754 if (vm_map_lock_read_to_write(map)) {
13755 vm_map_lock_read(map);
13756 goto RetryLookup;
13757 }
13758
3e170ce0 13759 VME_OBJECT_SET(entry,
0a7de745
A
13760 vm_object_allocate(
13761 (vm_map_size_t)(entry->vme_end -
13762 entry->vme_start)));
3e170ce0 13763 VME_OFFSET_SET(entry, 0);
a39ff7e2 13764 assert(entry->use_pmap);
1c79356b
A
13765 vm_map_lock_write_to_read(map);
13766 }
13767
13768 /*
13769 * Return the object/offset from this entry. If the entry
13770 * was copy-on-write or empty, it has been fixed up. Also
13771 * return the protection.
13772 */
13773
0a7de745
A
13774 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13775 *object = VME_OBJECT(entry);
1c79356b 13776 *out_prot = prot;
f427ee49 13777 KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), (unsigned long) VME_ALIAS(entry), 0, 0);
2d21ac55
A
13778
13779 if (fault_info) {
13780 fault_info->interruptible = THREAD_UNINT; /* for now... */
13781 /* ... the caller will change "interruptible" if needed */
0a7de745 13782 fault_info->cluster_size = 0;
3e170ce0 13783 fault_info->user_tag = VME_ALIAS(entry);
fe8ab488
A
13784 fault_info->pmap_options = 0;
13785 if (entry->iokit_acct ||
13786 (!entry->is_sub_map && !entry->use_pmap)) {
13787 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13788 }
0a7de745 13789 fault_info->behavior = entry->behavior;
3e170ce0
A
13790 fault_info->lo_offset = VME_OFFSET(entry);
13791 fault_info->hi_offset =
0a7de745 13792 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
2d21ac55 13793 fault_info->no_cache = entry->no_cache;
b0d623f7 13794 fault_info->stealth = FALSE;
6d2010ae 13795 fault_info->io_sync = FALSE;
3e170ce0
A
13796 if (entry->used_for_jit ||
13797 entry->vme_resilient_codesign) {
13798 fault_info->cs_bypass = TRUE;
13799 } else {
13800 fault_info->cs_bypass = FALSE;
13801 }
d9a64523
A
13802 fault_info->pmap_cs_associated = FALSE;
13803#if CONFIG_PMAP_CS
13804 if (entry->pmap_cs_associated) {
13805 /*
13806 * The pmap layer will validate this page
13807 * before allowing it to be executed from.
13808 */
13809 fault_info->pmap_cs_associated = TRUE;
13810 }
13811#endif /* CONFIG_PMAP_CS */
0b4c1975 13812 fault_info->mark_zf_absent = FALSE;
316670eb 13813 fault_info->batch_pmap_op = FALSE;
cb323159
A
13814 fault_info->resilient_media = entry->vme_resilient_media;
13815 fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
f427ee49
A
13816 if (entry->translated_allow_execute) {
13817 fault_info->pmap_options |= PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE;
13818 }
2d21ac55 13819 }
1c79356b
A
13820
13821 /*
13822 * Lock the object to prevent it from disappearing
13823 */
0a7de745 13824 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
f427ee49
A
13825 if (contended == NULL) {
13826 vm_object_lock(*object);
13827 } else {
13828 *contended = vm_object_lock_check_contended(*object);
13829 }
0a7de745
A
13830 } else {
13831 vm_object_lock_shared(*object);
13832 }
5ba3f43e 13833
1c79356b
A
13834 /*
13835 * Save the version number
13836 */
13837
13838 out_version->main_timestamp = map->timestamp;
13839
13840 return KERN_SUCCESS;
13841}
13842
13843
13844/*
13845 * vm_map_verify:
13846 *
13847 * Verifies that the map in question has not changed
5ba3f43e
A
13848 * since the given version. The map has to be locked
13849 * ("shared" mode is fine) before calling this function
13850 * and it will be returned locked too.
1c79356b
A
13851 */
13852boolean_t
13853vm_map_verify(
0a7de745
A
13854 vm_map_t map,
13855 vm_map_version_t *version) /* REF */
1c79356b 13856{
0a7de745 13857 boolean_t result;
1c79356b 13858
5ba3f43e 13859 vm_map_lock_assert_held(map);
1c79356b
A
13860 result = (map->timestamp == version->main_timestamp);
13861
0a7de745 13862 return result;
1c79356b
A
13863}
13864
91447636
A
13865/*
13866 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
13867 * Goes away after regular vm_region_recurse function migrates to
13868 * 64 bits
13869 * vm_region_recurse: A form of vm_region which follows the
13870 * submaps in a target map
13871 *
13872 */
13873
13874kern_return_t
13875vm_map_region_recurse_64(
0a7de745
A
13876 vm_map_t map,
13877 vm_map_offset_t *address, /* IN/OUT */
13878 vm_map_size_t *size, /* OUT */
13879 natural_t *nesting_depth, /* IN/OUT */
13880 vm_region_submap_info_64_t submap_info, /* IN/OUT */
13881 mach_msg_type_number_t *count) /* IN/OUT */
91447636 13882{
0a7de745
A
13883 mach_msg_type_number_t original_count;
13884 vm_region_extended_info_data_t extended;
13885 vm_map_entry_t tmp_entry;
13886 vm_map_offset_t user_address;
13887 unsigned int user_max_depth;
91447636
A
13888
13889 /*
13890 * "curr_entry" is the VM map entry preceding or including the
13891 * address we're looking for.
13892 * "curr_map" is the map or sub-map containing "curr_entry".
5ba3f43e 13893 * "curr_address" is the equivalent of the top map's "user_address"
6d2010ae 13894 * in the current map.
91447636
A
13895 * "curr_offset" is the cumulated offset of "curr_map" in the
13896 * target task's address space.
13897 * "curr_depth" is the depth of "curr_map" in the chain of
13898 * sub-maps.
5ba3f43e 13899 *
6d2010ae
A
13900 * "curr_max_below" and "curr_max_above" limit the range (around
13901 * "curr_address") we should take into account in the current (sub)map.
13902 * They limit the range to what's visible through the map entries
13903 * we've traversed from the top map to the current map.
0a7de745 13904 *
91447636 13905 */
0a7de745
A
13906 vm_map_entry_t curr_entry;
13907 vm_map_address_t curr_address;
13908 vm_map_offset_t curr_offset;
13909 vm_map_t curr_map;
13910 unsigned int curr_depth;
13911 vm_map_offset_t curr_max_below, curr_max_above;
13912 vm_map_offset_t curr_skip;
91447636
A
13913
13914 /*
13915 * "next_" is the same as "curr_" but for the VM region immediately
13916 * after the address we're looking for. We need to keep track of this
13917 * too because we want to return info about that region if the
13918 * address we're looking for is not mapped.
13919 */
0a7de745
A
13920 vm_map_entry_t next_entry;
13921 vm_map_offset_t next_offset;
13922 vm_map_offset_t next_address;
13923 vm_map_t next_map;
13924 unsigned int next_depth;
13925 vm_map_offset_t next_max_below, next_max_above;
13926 vm_map_offset_t next_skip;
13927
13928 boolean_t look_for_pages;
2d21ac55 13929 vm_region_submap_short_info_64_t short_info;
0a7de745 13930 boolean_t do_region_footprint;
f427ee49 13931 int effective_page_size, effective_page_shift;
c3c9b80d 13932 boolean_t submap_needed_copy;
2d21ac55 13933
91447636
A
13934 if (map == VM_MAP_NULL) {
13935 /* no address space to work on */
13936 return KERN_INVALID_ARGUMENT;
13937 }
13938
f427ee49
A
13939 effective_page_shift = vm_self_region_page_shift(map);
13940 effective_page_size = (1 << effective_page_shift);
5ba3f43e 13941
39236c6e
A
13942 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
13943 /*
13944 * "info" structure is not big enough and
13945 * would overflow
13946 */
13947 return KERN_INVALID_ARGUMENT;
13948 }
5ba3f43e 13949
a39ff7e2 13950 do_region_footprint = task_self_region_footprint();
39236c6e 13951 original_count = *count;
5ba3f43e 13952
39236c6e
A
13953 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
13954 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
13955 look_for_pages = FALSE;
13956 short_info = (vm_region_submap_short_info_64_t) submap_info;
13957 submap_info = NULL;
2d21ac55
A
13958 } else {
13959 look_for_pages = TRUE;
39236c6e 13960 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
2d21ac55 13961 short_info = NULL;
5ba3f43e 13962
39236c6e
A
13963 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13964 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
13965 }
cb323159
A
13966 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
13967 *count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
13968 }
91447636 13969 }
5ba3f43e 13970
91447636
A
13971 user_address = *address;
13972 user_max_depth = *nesting_depth;
c3c9b80d 13973 submap_needed_copy = FALSE;
5ba3f43e 13974
3e170ce0
A
13975 if (not_in_kdp) {
13976 vm_map_lock_read(map);
13977 }
13978
13979recurse_again:
91447636
A
13980 curr_entry = NULL;
13981 curr_map = map;
6d2010ae 13982 curr_address = user_address;
91447636 13983 curr_offset = 0;
6d2010ae 13984 curr_skip = 0;
91447636 13985 curr_depth = 0;
6d2010ae
A
13986 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
13987 curr_max_below = curr_address;
91447636
A
13988
13989 next_entry = NULL;
13990 next_map = NULL;
6d2010ae 13991 next_address = 0;
91447636 13992 next_offset = 0;
6d2010ae 13993 next_skip = 0;
91447636 13994 next_depth = 0;
6d2010ae
A
13995 next_max_above = (vm_map_offset_t) -1;
13996 next_max_below = (vm_map_offset_t) -1;
91447636 13997
91447636
A
13998 for (;;) {
13999 if (vm_map_lookup_entry(curr_map,
0a7de745
A
14000 curr_address,
14001 &tmp_entry)) {
91447636
A
14002 /* tmp_entry contains the address we're looking for */
14003 curr_entry = tmp_entry;
14004 } else {
6d2010ae 14005 vm_map_offset_t skip;
91447636
A
14006 /*
14007 * The address is not mapped. "tmp_entry" is the
14008 * map entry preceding the address. We want the next
14009 * one, if it exists.
14010 */
14011 curr_entry = tmp_entry->vme_next;
6d2010ae 14012
91447636 14013 if (curr_entry == vm_map_to_entry(curr_map) ||
6d2010ae 14014 (curr_entry->vme_start >=
0a7de745 14015 curr_address + curr_max_above)) {
91447636
A
14016 /* no next entry at this level: stop looking */
14017 if (not_in_kdp) {
14018 vm_map_unlock_read(curr_map);
14019 }
14020 curr_entry = NULL;
14021 curr_map = NULL;
3e170ce0 14022 curr_skip = 0;
91447636
A
14023 curr_offset = 0;
14024 curr_depth = 0;
6d2010ae
A
14025 curr_max_above = 0;
14026 curr_max_below = 0;
91447636
A
14027 break;
14028 }
6d2010ae
A
14029
14030 /* adjust current address and offset */
14031 skip = curr_entry->vme_start - curr_address;
14032 curr_address = curr_entry->vme_start;
3e170ce0 14033 curr_skip += skip;
6d2010ae
A
14034 curr_offset += skip;
14035 curr_max_above -= skip;
14036 curr_max_below = 0;
91447636
A
14037 }
14038
14039 /*
14040 * Is the next entry at this level closer to the address (or
14041 * deeper in the submap chain) than the one we had
14042 * so far ?
14043 */
14044 tmp_entry = curr_entry->vme_next;
14045 if (tmp_entry == vm_map_to_entry(curr_map)) {
14046 /* no next entry at this level */
6d2010ae 14047 } else if (tmp_entry->vme_start >=
0a7de745 14048 curr_address + curr_max_above) {
91447636
A
14049 /*
14050 * tmp_entry is beyond the scope of what we mapped of
14051 * this submap in the upper level: ignore it.
14052 */
14053 } else if ((next_entry == NULL) ||
0a7de745
A
14054 (tmp_entry->vme_start + curr_offset <=
14055 next_entry->vme_start + next_offset)) {
91447636
A
14056 /*
14057 * We didn't have a "next_entry" or this one is
14058 * closer to the address we're looking for:
14059 * use this "tmp_entry" as the new "next_entry".
14060 */
14061 if (next_entry != NULL) {
14062 /* unlock the last "next_map" */
14063 if (next_map != curr_map && not_in_kdp) {
14064 vm_map_unlock_read(next_map);
14065 }
14066 }
14067 next_entry = tmp_entry;
14068 next_map = curr_map;
91447636 14069 next_depth = curr_depth;
6d2010ae
A
14070 next_address = next_entry->vme_start;
14071 next_skip = curr_skip;
3e170ce0 14072 next_skip += (next_address - curr_address);
6d2010ae
A
14073 next_offset = curr_offset;
14074 next_offset += (next_address - curr_address);
14075 next_max_above = MIN(next_max_above, curr_max_above);
14076 next_max_above = MIN(next_max_above,
0a7de745 14077 next_entry->vme_end - next_address);
6d2010ae
A
14078 next_max_below = MIN(next_max_below, curr_max_below);
14079 next_max_below = MIN(next_max_below,
0a7de745 14080 next_address - next_entry->vme_start);
91447636
A
14081 }
14082
6d2010ae
A
14083 /*
14084 * "curr_max_{above,below}" allow us to keep track of the
14085 * portion of the submap that is actually mapped at this level:
14086 * the rest of that submap is irrelevant to us, since it's not
14087 * mapped here.
14088 * The relevant portion of the map starts at
3e170ce0 14089 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
6d2010ae
A
14090 */
14091 curr_max_above = MIN(curr_max_above,
0a7de745 14092 curr_entry->vme_end - curr_address);
6d2010ae 14093 curr_max_below = MIN(curr_max_below,
0a7de745 14094 curr_address - curr_entry->vme_start);
6d2010ae 14095
91447636
A
14096 if (!curr_entry->is_sub_map ||
14097 curr_depth >= user_max_depth) {
14098 /*
14099 * We hit a leaf map or we reached the maximum depth
14100 * we could, so stop looking. Keep the current map
14101 * locked.
14102 */
14103 break;
14104 }
14105
14106 /*
14107 * Get down to the next submap level.
14108 */
14109
c3c9b80d
A
14110 if (curr_entry->needs_copy) {
14111 /* everything below this is effectively copy-on-write */
14112 submap_needed_copy = TRUE;
14113 }
14114
91447636
A
14115 /*
14116 * Lock the next level and unlock the current level,
14117 * unless we need to keep it locked to access the "next_entry"
14118 * later.
14119 */
14120 if (not_in_kdp) {
3e170ce0 14121 vm_map_lock_read(VME_SUBMAP(curr_entry));
91447636
A
14122 }
14123 if (curr_map == next_map) {
14124 /* keep "next_map" locked in case we need it */
14125 } else {
14126 /* release this map */
0a7de745 14127 if (not_in_kdp) {
b0d623f7 14128 vm_map_unlock_read(curr_map);
0a7de745 14129 }
91447636
A
14130 }
14131
14132 /*
14133 * Adjust the offset. "curr_entry" maps the submap
14134 * at relative address "curr_entry->vme_start" in the
3e170ce0 14135 * curr_map but skips the first "VME_OFFSET(curr_entry)"
91447636
A
14136 * bytes of the submap.
14137 * "curr_offset" always represents the offset of a virtual
14138 * address in the curr_map relative to the absolute address
14139 * space (i.e. the top-level VM map).
14140 */
14141 curr_offset +=
0a7de745 14142 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
6d2010ae 14143 curr_address = user_address + curr_offset;
91447636 14144 /* switch to the submap */
3e170ce0 14145 curr_map = VME_SUBMAP(curr_entry);
91447636 14146 curr_depth++;
91447636
A
14147 curr_entry = NULL;
14148 }
14149
a39ff7e2
A
14150// LP64todo: all the current tools are 32bit, obviously never worked for 64b
14151// so probably should be a real 32b ID vs. ptr.
14152// Current users just check for equality
14153
91447636
A
14154 if (curr_entry == NULL) {
14155 /* no VM region contains the address... */
a39ff7e2
A
14156
14157 if (do_region_footprint && /* we want footprint numbers */
39037602
A
14158 next_entry == NULL && /* & there are no more regions */
14159 /* & we haven't already provided our fake region: */
a39ff7e2 14160 user_address <= vm_map_last_entry(map)->vme_end) {
cb323159
A
14161 ledger_amount_t ledger_resident, ledger_compressed;
14162
39037602
A
14163 /*
14164 * Add a fake memory region to account for
cb323159
A
14165 * purgeable and/or ledger-tagged memory that
14166 * counts towards this task's memory footprint,
14167 * i.e. the resident/compressed pages of non-volatile
14168 * objects owned by that task.
39037602 14169 */
cb323159
A
14170 task_ledgers_footprint(map->pmap->ledger,
14171 &ledger_resident,
14172 &ledger_compressed);
14173 if (ledger_resident + ledger_compressed == 0) {
39037602 14174 /* no purgeable memory usage to report */
a39ff7e2 14175 return KERN_INVALID_ADDRESS;
39037602
A
14176 }
14177 /* fake region to show nonvolatile footprint */
a39ff7e2
A
14178 if (look_for_pages) {
14179 submap_info->protection = VM_PROT_DEFAULT;
14180 submap_info->max_protection = VM_PROT_DEFAULT;
14181 submap_info->inheritance = VM_INHERIT_DEFAULT;
14182 submap_info->offset = 0;
14183 submap_info->user_tag = -1;
f427ee49 14184 submap_info->pages_resident = (unsigned int) (ledger_resident / effective_page_size);
a39ff7e2 14185 submap_info->pages_shared_now_private = 0;
f427ee49 14186 submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / effective_page_size);
a39ff7e2
A
14187 submap_info->pages_dirtied = submap_info->pages_resident;
14188 submap_info->ref_count = 1;
14189 submap_info->shadow_depth = 0;
14190 submap_info->external_pager = 0;
14191 submap_info->share_mode = SM_PRIVATE;
c3c9b80d
A
14192 if (submap_needed_copy) {
14193 submap_info->share_mode = SM_COW;
14194 }
a39ff7e2
A
14195 submap_info->is_submap = 0;
14196 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
f427ee49 14197 submap_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
a39ff7e2
A
14198 submap_info->user_wired_count = 0;
14199 submap_info->pages_reusable = 0;
14200 } else {
14201 short_info->user_tag = -1;
14202 short_info->offset = 0;
14203 short_info->protection = VM_PROT_DEFAULT;
14204 short_info->inheritance = VM_INHERIT_DEFAULT;
14205 short_info->max_protection = VM_PROT_DEFAULT;
14206 short_info->behavior = VM_BEHAVIOR_DEFAULT;
14207 short_info->user_wired_count = 0;
14208 short_info->is_submap = 0;
f427ee49 14209 short_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
a39ff7e2
A
14210 short_info->external_pager = 0;
14211 short_info->shadow_depth = 0;
14212 short_info->share_mode = SM_PRIVATE;
c3c9b80d
A
14213 if (submap_needed_copy) {
14214 short_info->share_mode = SM_COW;
14215 }
a39ff7e2
A
14216 short_info->ref_count = 1;
14217 }
39037602 14218 *nesting_depth = 0;
cb323159 14219 *size = (vm_map_size_t) (ledger_resident + ledger_compressed);
a39ff7e2
A
14220// *address = user_address;
14221 *address = vm_map_last_entry(map)->vme_end;
39037602
A
14222 return KERN_SUCCESS;
14223 }
a39ff7e2 14224
91447636
A
14225 if (next_entry == NULL) {
14226 /* ... and no VM region follows it either */
14227 return KERN_INVALID_ADDRESS;
14228 }
14229 /* ... gather info about the next VM region */
14230 curr_entry = next_entry;
0a7de745 14231 curr_map = next_map; /* still locked ... */
6d2010ae
A
14232 curr_address = next_address;
14233 curr_skip = next_skip;
91447636
A
14234 curr_offset = next_offset;
14235 curr_depth = next_depth;
6d2010ae
A
14236 curr_max_above = next_max_above;
14237 curr_max_below = next_max_below;
91447636
A
14238 } else {
14239 /* we won't need "next_entry" after all */
14240 if (next_entry != NULL) {
14241 /* release "next_map" */
14242 if (next_map != curr_map && not_in_kdp) {
14243 vm_map_unlock_read(next_map);
14244 }
14245 }
14246 }
14247 next_entry = NULL;
14248 next_map = NULL;
14249 next_offset = 0;
6d2010ae 14250 next_skip = 0;
91447636 14251 next_depth = 0;
6d2010ae
A
14252 next_max_below = -1;
14253 next_max_above = -1;
91447636 14254
3e170ce0
A
14255 if (curr_entry->is_sub_map &&
14256 curr_depth < user_max_depth) {
14257 /*
14258 * We're not as deep as we could be: we must have
14259 * gone back up after not finding anything mapped
14260 * below the original top-level map entry's.
14261 * Let's move "curr_address" forward and recurse again.
14262 */
14263 user_address = curr_address;
14264 goto recurse_again;
14265 }
14266
91447636 14267 *nesting_depth = curr_depth;
6d2010ae
A
14268 *size = curr_max_above + curr_max_below;
14269 *address = user_address + curr_skip - curr_max_below;
91447636 14270
2d21ac55 14271 if (look_for_pages) {
3e170ce0 14272 submap_info->user_tag = VME_ALIAS(curr_entry);
5ba3f43e 14273 submap_info->offset = VME_OFFSET(curr_entry);
2d21ac55
A
14274 submap_info->protection = curr_entry->protection;
14275 submap_info->inheritance = curr_entry->inheritance;
14276 submap_info->max_protection = curr_entry->max_protection;
14277 submap_info->behavior = curr_entry->behavior;
14278 submap_info->user_wired_count = curr_entry->user_wired_count;
14279 submap_info->is_submap = curr_entry->is_sub_map;
f427ee49 14280 submap_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
2d21ac55 14281 } else {
3e170ce0 14282 short_info->user_tag = VME_ALIAS(curr_entry);
5ba3f43e 14283 short_info->offset = VME_OFFSET(curr_entry);
2d21ac55
A
14284 short_info->protection = curr_entry->protection;
14285 short_info->inheritance = curr_entry->inheritance;
14286 short_info->max_protection = curr_entry->max_protection;
14287 short_info->behavior = curr_entry->behavior;
14288 short_info->user_wired_count = curr_entry->user_wired_count;
14289 short_info->is_submap = curr_entry->is_sub_map;
f427ee49 14290 short_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
2d21ac55 14291 }
91447636
A
14292
14293 extended.pages_resident = 0;
14294 extended.pages_swapped_out = 0;
14295 extended.pages_shared_now_private = 0;
14296 extended.pages_dirtied = 0;
39236c6e 14297 extended.pages_reusable = 0;
91447636
A
14298 extended.external_pager = 0;
14299 extended.shadow_depth = 0;
3e170ce0
A
14300 extended.share_mode = SM_EMPTY;
14301 extended.ref_count = 0;
91447636
A
14302
14303 if (not_in_kdp) {
14304 if (!curr_entry->is_sub_map) {
6d2010ae
A
14305 vm_map_offset_t range_start, range_end;
14306 range_start = MAX((curr_address - curr_max_below),
0a7de745 14307 curr_entry->vme_start);
6d2010ae 14308 range_end = MIN((curr_address + curr_max_above),
0a7de745 14309 curr_entry->vme_end);
91447636 14310 vm_map_region_walk(curr_map,
0a7de745
A
14311 range_start,
14312 curr_entry,
14313 (VME_OFFSET(curr_entry) +
14314 (range_start -
14315 curr_entry->vme_start)),
14316 range_end - range_start,
14317 &extended,
14318 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
91447636
A
14319 if (extended.external_pager &&
14320 extended.ref_count == 2 &&
14321 extended.share_mode == SM_SHARED) {
2d21ac55 14322 extended.share_mode = SM_PRIVATE;
91447636 14323 }
c3c9b80d
A
14324 if (submap_needed_copy) {
14325 extended.share_mode = SM_COW;
14326 }
91447636
A
14327 } else {
14328 if (curr_entry->use_pmap) {
2d21ac55 14329 extended.share_mode = SM_TRUESHARED;
91447636 14330 } else {
2d21ac55 14331 extended.share_mode = SM_PRIVATE;
91447636 14332 }
cb323159 14333 extended.ref_count = os_ref_get_count(&VME_SUBMAP(curr_entry)->map_refcnt);
91447636
A
14334 }
14335 }
14336
2d21ac55
A
14337 if (look_for_pages) {
14338 submap_info->pages_resident = extended.pages_resident;
14339 submap_info->pages_swapped_out = extended.pages_swapped_out;
14340 submap_info->pages_shared_now_private =
0a7de745 14341 extended.pages_shared_now_private;
2d21ac55
A
14342 submap_info->pages_dirtied = extended.pages_dirtied;
14343 submap_info->external_pager = extended.external_pager;
14344 submap_info->shadow_depth = extended.shadow_depth;
14345 submap_info->share_mode = extended.share_mode;
14346 submap_info->ref_count = extended.ref_count;
5ba3f43e 14347
39236c6e
A
14348 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14349 submap_info->pages_reusable = extended.pages_reusable;
14350 }
cb323159
A
14351 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14352 submap_info->object_id_full = (vm_object_id_t) (VME_OBJECT(curr_entry) != NULL) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry)) : 0ULL;
14353 }
2d21ac55
A
14354 } else {
14355 short_info->external_pager = extended.external_pager;
14356 short_info->shadow_depth = extended.shadow_depth;
14357 short_info->share_mode = extended.share_mode;
14358 short_info->ref_count = extended.ref_count;
14359 }
91447636
A
14360
14361 if (not_in_kdp) {
14362 vm_map_unlock_read(curr_map);
14363 }
14364
14365 return KERN_SUCCESS;
14366}
14367
1c79356b
A
14368/*
14369 * vm_region:
14370 *
14371 * User call to obtain information about a region in
14372 * a task's address map. Currently, only one flavor is
14373 * supported.
14374 *
14375 * XXX The reserved and behavior fields cannot be filled
14376 * in until the vm merge from the IK is completed, and
14377 * vm_reserve is implemented.
1c79356b
A
14378 */
14379
14380kern_return_t
91447636 14381vm_map_region(
0a7de745
A
14382 vm_map_t map,
14383 vm_map_offset_t *address, /* IN/OUT */
14384 vm_map_size_t *size, /* OUT */
14385 vm_region_flavor_t flavor, /* IN */
14386 vm_region_info_t info, /* OUT */
14387 mach_msg_type_number_t *count, /* IN/OUT */
14388 mach_port_t *object_name) /* OUT */
1c79356b 14389{
0a7de745
A
14390 vm_map_entry_t tmp_entry;
14391 vm_map_entry_t entry;
14392 vm_map_offset_t start;
1c79356b 14393
0a7de745
A
14394 if (map == VM_MAP_NULL) {
14395 return KERN_INVALID_ARGUMENT;
14396 }
1c79356b
A
14397
14398 switch (flavor) {
1c79356b 14399 case VM_REGION_BASIC_INFO:
2d21ac55 14400 /* legacy for old 32-bit objects info */
1c79356b 14401 {
0a7de745 14402 vm_region_basic_info_t basic;
91447636 14403
0a7de745
A
14404 if (*count < VM_REGION_BASIC_INFO_COUNT) {
14405 return KERN_INVALID_ARGUMENT;
14406 }
1c79356b 14407
2d21ac55
A
14408 basic = (vm_region_basic_info_t) info;
14409 *count = VM_REGION_BASIC_INFO_COUNT;
1c79356b 14410
2d21ac55 14411 vm_map_lock_read(map);
1c79356b 14412
2d21ac55
A
14413 start = *address;
14414 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14415 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14416 vm_map_unlock_read(map);
0a7de745 14417 return KERN_INVALID_ADDRESS;
2d21ac55
A
14418 }
14419 } else {
14420 entry = tmp_entry;
1c79356b 14421 }
1c79356b 14422
2d21ac55 14423 start = entry->vme_start;
1c79356b 14424
3e170ce0 14425 basic->offset = (uint32_t)VME_OFFSET(entry);
2d21ac55
A
14426 basic->protection = entry->protection;
14427 basic->inheritance = entry->inheritance;
14428 basic->max_protection = entry->max_protection;
14429 basic->behavior = entry->behavior;
14430 basic->user_wired_count = entry->user_wired_count;
14431 basic->reserved = entry->is_sub_map;
14432 *address = start;
14433 *size = (entry->vme_end - start);
91447636 14434
0a7de745
A
14435 if (object_name) {
14436 *object_name = IP_NULL;
14437 }
2d21ac55
A
14438 if (entry->is_sub_map) {
14439 basic->shared = FALSE;
14440 } else {
14441 basic->shared = entry->is_shared;
14442 }
91447636 14443
2d21ac55 14444 vm_map_unlock_read(map);
0a7de745 14445 return KERN_SUCCESS;
91447636
A
14446 }
14447
14448 case VM_REGION_BASIC_INFO_64:
14449 {
0a7de745 14450 vm_region_basic_info_64_t basic;
91447636 14451
0a7de745
A
14452 if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
14453 return KERN_INVALID_ARGUMENT;
14454 }
2d21ac55
A
14455
14456 basic = (vm_region_basic_info_64_t) info;
14457 *count = VM_REGION_BASIC_INFO_COUNT_64;
14458
14459 vm_map_lock_read(map);
14460
14461 start = *address;
14462 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14463 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14464 vm_map_unlock_read(map);
0a7de745 14465 return KERN_INVALID_ADDRESS;
2d21ac55
A
14466 }
14467 } else {
14468 entry = tmp_entry;
14469 }
91447636 14470
2d21ac55 14471 start = entry->vme_start;
91447636 14472
3e170ce0 14473 basic->offset = VME_OFFSET(entry);
2d21ac55
A
14474 basic->protection = entry->protection;
14475 basic->inheritance = entry->inheritance;
14476 basic->max_protection = entry->max_protection;
14477 basic->behavior = entry->behavior;
14478 basic->user_wired_count = entry->user_wired_count;
14479 basic->reserved = entry->is_sub_map;
14480 *address = start;
14481 *size = (entry->vme_end - start);
91447636 14482
0a7de745
A
14483 if (object_name) {
14484 *object_name = IP_NULL;
14485 }
2d21ac55
A
14486 if (entry->is_sub_map) {
14487 basic->shared = FALSE;
14488 } else {
14489 basic->shared = entry->is_shared;
91447636 14490 }
2d21ac55
A
14491
14492 vm_map_unlock_read(map);
0a7de745 14493 return KERN_SUCCESS;
1c79356b
A
14494 }
14495 case VM_REGION_EXTENDED_INFO:
0a7de745
A
14496 if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
14497 return KERN_INVALID_ARGUMENT;
14498 }
f427ee49 14499 OS_FALLTHROUGH;
39236c6e 14500 case VM_REGION_EXTENDED_INFO__legacy:
0a7de745 14501 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
39236c6e 14502 return KERN_INVALID_ARGUMENT;
0a7de745 14503 }
39236c6e 14504
0a7de745
A
14505 {
14506 vm_region_extended_info_t extended;
14507 mach_msg_type_number_t original_count;
f427ee49 14508 int effective_page_size, effective_page_shift;
1c79356b 14509
0a7de745 14510 extended = (vm_region_extended_info_t) info;
1c79356b 14511
f427ee49
A
14512 effective_page_shift = vm_self_region_page_shift(map);
14513 effective_page_size = (1 << effective_page_shift);
14514
0a7de745 14515 vm_map_lock_read(map);
1c79356b 14516
0a7de745
A
14517 start = *address;
14518 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14519 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14520 vm_map_unlock_read(map);
14521 return KERN_INVALID_ADDRESS;
14522 }
14523 } else {
14524 entry = tmp_entry;
14525 }
14526 start = entry->vme_start;
1c79356b 14527
0a7de745
A
14528 extended->protection = entry->protection;
14529 extended->user_tag = VME_ALIAS(entry);
14530 extended->pages_resident = 0;
14531 extended->pages_swapped_out = 0;
14532 extended->pages_shared_now_private = 0;
14533 extended->pages_dirtied = 0;
14534 extended->external_pager = 0;
14535 extended->shadow_depth = 0;
14536
14537 original_count = *count;
14538 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
14539 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
14540 } else {
14541 extended->pages_reusable = 0;
14542 *count = VM_REGION_EXTENDED_INFO_COUNT;
14543 }
39236c6e 14544
0a7de745 14545 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
1c79356b 14546
0a7de745
A
14547 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
14548 extended->share_mode = SM_PRIVATE;
14549 }
1c79356b 14550
0a7de745
A
14551 if (object_name) {
14552 *object_name = IP_NULL;
14553 }
14554 *address = start;
14555 *size = (entry->vme_end - start);
1c79356b 14556
0a7de745
A
14557 vm_map_unlock_read(map);
14558 return KERN_SUCCESS;
14559 }
1c79356b 14560 case VM_REGION_TOP_INFO:
5ba3f43e 14561 {
0a7de745 14562 vm_region_top_info_t top;
1c79356b 14563
0a7de745
A
14564 if (*count < VM_REGION_TOP_INFO_COUNT) {
14565 return KERN_INVALID_ARGUMENT;
14566 }
1c79356b 14567
2d21ac55
A
14568 top = (vm_region_top_info_t) info;
14569 *count = VM_REGION_TOP_INFO_COUNT;
1c79356b 14570
2d21ac55 14571 vm_map_lock_read(map);
1c79356b 14572
2d21ac55
A
14573 start = *address;
14574 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14575 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14576 vm_map_unlock_read(map);
0a7de745 14577 return KERN_INVALID_ADDRESS;
2d21ac55
A
14578 }
14579 } else {
14580 entry = tmp_entry;
2d21ac55
A
14581 }
14582 start = entry->vme_start;
1c79356b 14583
2d21ac55
A
14584 top->private_pages_resident = 0;
14585 top->shared_pages_resident = 0;
1c79356b 14586
2d21ac55 14587 vm_map_region_top_walk(entry, top);
1c79356b 14588
0a7de745 14589 if (object_name) {
2d21ac55 14590 *object_name = IP_NULL;
0a7de745 14591 }
2d21ac55
A
14592 *address = start;
14593 *size = (entry->vme_end - start);
1c79356b 14594
2d21ac55 14595 vm_map_unlock_read(map);
0a7de745 14596 return KERN_SUCCESS;
1c79356b
A
14597 }
14598 default:
0a7de745 14599 return KERN_INVALID_ARGUMENT;
1c79356b
A
14600 }
14601}
14602
0a7de745
A
14603#define OBJ_RESIDENT_COUNT(obj, entry_size) \
14604 MIN((entry_size), \
14605 ((obj)->all_reusable ? \
14606 (obj)->wired_page_count : \
b0d623f7 14607 (obj)->resident_page_count - (obj)->reusable_page_count))
2d21ac55 14608
0c530ab8 14609void
91447636 14610vm_map_region_top_walk(
0a7de745 14611 vm_map_entry_t entry,
91447636 14612 vm_region_top_info_t top)
1c79356b 14613{
3e170ce0 14614 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
2d21ac55
A
14615 top->share_mode = SM_EMPTY;
14616 top->ref_count = 0;
14617 top->obj_id = 0;
14618 return;
1c79356b 14619 }
2d21ac55 14620
91447636 14621 {
0a7de745
A
14622 struct vm_object *obj, *tmp_obj;
14623 int ref_count;
14624 uint32_t entry_size;
1c79356b 14625
b0d623f7 14626 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
1c79356b 14627
3e170ce0 14628 obj = VME_OBJECT(entry);
1c79356b 14629
2d21ac55
A
14630 vm_object_lock(obj);
14631
0a7de745 14632 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
2d21ac55 14633 ref_count--;
0a7de745 14634 }
2d21ac55 14635
b0d623f7 14636 assert(obj->reusable_page_count <= obj->resident_page_count);
2d21ac55 14637 if (obj->shadow) {
0a7de745 14638 if (ref_count == 1) {
b0d623f7 14639 top->private_pages_resident =
0a7de745
A
14640 OBJ_RESIDENT_COUNT(obj, entry_size);
14641 } else {
b0d623f7 14642 top->shared_pages_resident =
0a7de745
A
14643 OBJ_RESIDENT_COUNT(obj, entry_size);
14644 }
2d21ac55
A
14645 top->ref_count = ref_count;
14646 top->share_mode = SM_COW;
5ba3f43e 14647
2d21ac55
A
14648 while ((tmp_obj = obj->shadow)) {
14649 vm_object_lock(tmp_obj);
14650 vm_object_unlock(obj);
14651 obj = tmp_obj;
1c79356b 14652
0a7de745 14653 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
2d21ac55 14654 ref_count--;
0a7de745 14655 }
1c79356b 14656
b0d623f7
A
14657 assert(obj->reusable_page_count <= obj->resident_page_count);
14658 top->shared_pages_resident +=
0a7de745 14659 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
14660 top->ref_count += ref_count - 1;
14661 }
1c79356b 14662 } else {
6d2010ae
A
14663 if (entry->superpage_size) {
14664 top->share_mode = SM_LARGE_PAGE;
14665 top->shared_pages_resident = 0;
14666 top->private_pages_resident = entry_size;
14667 } else if (entry->needs_copy) {
2d21ac55 14668 top->share_mode = SM_COW;
b0d623f7 14669 top->shared_pages_resident =
0a7de745 14670 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
14671 } else {
14672 if (ref_count == 1 ||
cb323159 14673 (ref_count == 2 && obj->named)) {
2d21ac55 14674 top->share_mode = SM_PRIVATE;
0a7de745
A
14675 top->private_pages_resident =
14676 OBJ_RESIDENT_COUNT(obj,
14677 entry_size);
2d21ac55
A
14678 } else {
14679 top->share_mode = SM_SHARED;
b0d623f7 14680 top->shared_pages_resident =
0a7de745
A
14681 OBJ_RESIDENT_COUNT(obj,
14682 entry_size);
2d21ac55
A
14683 }
14684 }
14685 top->ref_count = ref_count;
1c79356b 14686 }
b0d623f7 14687 /* XXX K64: obj_id will be truncated */
39236c6e 14688 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
1c79356b 14689
2d21ac55 14690 vm_object_unlock(obj);
1c79356b 14691 }
91447636
A
14692}
14693
0c530ab8 14694void
91447636 14695vm_map_region_walk(
0a7de745
A
14696 vm_map_t map,
14697 vm_map_offset_t va,
14698 vm_map_entry_t entry,
14699 vm_object_offset_t offset,
14700 vm_object_size_t range,
14701 vm_region_extended_info_t extended,
14702 boolean_t look_for_pages,
39236c6e 14703 mach_msg_type_number_t count)
91447636 14704{
0a7de745 14705 struct vm_object *obj, *tmp_obj;
39037602
A
14706 vm_map_offset_t last_offset;
14707 int i;
14708 int ref_count;
0a7de745 14709 struct vm_object *shadow_object;
f427ee49 14710 unsigned short shadow_depth;
0a7de745 14711 boolean_t do_region_footprint;
f427ee49
A
14712 int effective_page_size, effective_page_shift;
14713 vm_map_offset_t effective_page_mask;
a39ff7e2
A
14714
14715 do_region_footprint = task_self_region_footprint();
91447636 14716
3e170ce0 14717 if ((VME_OBJECT(entry) == 0) ||
2d21ac55 14718 (entry->is_sub_map) ||
3e170ce0 14719 (VME_OBJECT(entry)->phys_contiguous &&
0a7de745 14720 !entry->superpage_size)) {
2d21ac55
A
14721 extended->share_mode = SM_EMPTY;
14722 extended->ref_count = 0;
14723 return;
1c79356b 14724 }
6d2010ae
A
14725
14726 if (entry->superpage_size) {
14727 extended->shadow_depth = 0;
14728 extended->share_mode = SM_LARGE_PAGE;
14729 extended->ref_count = 1;
14730 extended->external_pager = 0;
f427ee49
A
14731
14732 /* TODO4K: Superpage in 4k mode? */
6d2010ae
A
14733 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
14734 extended->shadow_depth = 0;
14735 return;
14736 }
14737
f427ee49
A
14738 effective_page_shift = vm_self_region_page_shift(map);
14739 effective_page_size = (1 << effective_page_shift);
14740 effective_page_mask = effective_page_size - 1;
14741
14742 offset = vm_map_trunc_page(offset, effective_page_mask);
14743
39037602 14744 obj = VME_OBJECT(entry);
2d21ac55 14745
39037602 14746 vm_object_lock(obj);
2d21ac55 14747
0a7de745 14748 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
39037602 14749 ref_count--;
0a7de745 14750 }
2d21ac55 14751
39037602
A
14752 if (look_for_pages) {
14753 for (last_offset = offset + range;
0a7de745 14754 offset < last_offset;
f427ee49 14755 offset += effective_page_size, va += effective_page_size) {
a39ff7e2
A
14756 if (do_region_footprint) {
14757 int disp;
14758
14759 disp = 0;
d9a64523
A
14760 if (map->has_corpse_footprint) {
14761 /*
14762 * Query the page info data we saved
14763 * while forking the corpse.
14764 */
14765 vm_map_corpse_footprint_query_page_info(
14766 map,
14767 va,
14768 &disp);
14769 } else {
14770 /*
14771 * Query the pmap.
14772 */
f427ee49
A
14773 vm_map_footprint_query_page_info(
14774 map,
14775 entry,
14776 va,
14777 &disp);
d9a64523 14778 }
f427ee49
A
14779 if (disp & VM_PAGE_QUERY_PAGE_PRESENT) {
14780 extended->pages_resident++;
a39ff7e2 14781 }
f427ee49
A
14782 if (disp & VM_PAGE_QUERY_PAGE_REUSABLE) {
14783 extended->pages_reusable++;
14784 }
14785 if (disp & VM_PAGE_QUERY_PAGE_DIRTY) {
39037602 14786 extended->pages_dirtied++;
2d21ac55 14787 }
f427ee49
A
14788 if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
14789 extended->pages_swapped_out++;
14790 }
39037602 14791 continue;
2d21ac55 14792 }
a39ff7e2 14793
39037602 14794 vm_map_region_look_for_page(map, va, obj,
f427ee49 14795 vm_object_trunc_page(offset), ref_count,
0a7de745 14796 0, extended, count);
2d21ac55 14797 }
a39ff7e2
A
14798
14799 if (do_region_footprint) {
39037602
A
14800 goto collect_object_info;
14801 }
39037602 14802 } else {
0a7de745 14803collect_object_info:
39037602
A
14804 shadow_object = obj->shadow;
14805 shadow_depth = 0;
2d21ac55 14806
cb323159 14807 if (!(obj->internal)) {
39037602 14808 extended->external_pager = 1;
0a7de745 14809 }
39037602
A
14810
14811 if (shadow_object != VM_OBJECT_NULL) {
14812 vm_object_lock(shadow_object);
14813 for (;
0a7de745
A
14814 shadow_object != VM_OBJECT_NULL;
14815 shadow_depth++) {
14816 vm_object_t next_shadow;
39037602 14817
cb323159 14818 if (!(shadow_object->internal)) {
39037602 14819 extended->external_pager = 1;
0a7de745 14820 }
39037602
A
14821
14822 next_shadow = shadow_object->shadow;
14823 if (next_shadow) {
14824 vm_object_lock(next_shadow);
14825 }
14826 vm_object_unlock(shadow_object);
14827 shadow_object = next_shadow;
2d21ac55 14828 }
91447636 14829 }
39037602
A
14830 extended->shadow_depth = shadow_depth;
14831 }
1c79356b 14832
0a7de745 14833 if (extended->shadow_depth || entry->needs_copy) {
39037602 14834 extended->share_mode = SM_COW;
0a7de745
A
14835 } else {
14836 if (ref_count == 1) {
39037602 14837 extended->share_mode = SM_PRIVATE;
0a7de745
A
14838 } else {
14839 if (obj->true_share) {
39037602 14840 extended->share_mode = SM_TRUESHARED;
0a7de745 14841 } else {
39037602 14842 extended->share_mode = SM_SHARED;
0a7de745 14843 }
2d21ac55 14844 }
39037602
A
14845 }
14846 extended->ref_count = ref_count - extended->shadow_depth;
5ba3f43e 14847
39037602 14848 for (i = 0; i < extended->shadow_depth; i++) {
0a7de745 14849 if ((tmp_obj = obj->shadow) == 0) {
39037602 14850 break;
0a7de745 14851 }
39037602 14852 vm_object_lock(tmp_obj);
2d21ac55 14853 vm_object_unlock(obj);
1c79356b 14854
0a7de745 14855 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
39037602 14856 ref_count--;
0a7de745 14857 }
39037602
A
14858
14859 extended->ref_count += ref_count;
14860 obj = tmp_obj;
14861 }
14862 vm_object_unlock(obj);
91447636 14863
39037602 14864 if (extended->share_mode == SM_SHARED) {
0a7de745
A
14865 vm_map_entry_t cur;
14866 vm_map_entry_t last;
39037602 14867 int my_refs;
91447636 14868
39037602
A
14869 obj = VME_OBJECT(entry);
14870 last = vm_map_to_entry(map);
14871 my_refs = 0;
91447636 14872
0a7de745 14873 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
39037602 14874 ref_count--;
0a7de745
A
14875 }
14876 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
39037602 14877 my_refs += vm_map_region_count_obj_refs(cur, obj);
0a7de745 14878 }
39037602 14879
0a7de745 14880 if (my_refs == ref_count) {
39037602 14881 extended->share_mode = SM_PRIVATE_ALIASED;
0a7de745 14882 } else if (my_refs > 1) {
39037602 14883 extended->share_mode = SM_SHARED_ALIASED;
0a7de745 14884 }
91447636 14885 }
1c79356b
A
14886}
14887
1c79356b 14888
91447636
A
14889/* object is locked on entry and locked on return */
14890
14891
14892static void
14893vm_map_region_look_for_page(
0a7de745
A
14894 __unused vm_map_t map,
14895 __unused vm_map_offset_t va,
14896 vm_object_t object,
14897 vm_object_offset_t offset,
14898 int max_refcnt,
f427ee49 14899 unsigned short depth,
0a7de745 14900 vm_region_extended_info_t extended,
39236c6e 14901 mach_msg_type_number_t count)
1c79356b 14902{
0a7de745
A
14903 vm_page_t p;
14904 vm_object_t shadow;
14905 int ref_count;
14906 vm_object_t caller_object;
39037602 14907
91447636
A
14908 shadow = object->shadow;
14909 caller_object = object;
1c79356b 14910
5ba3f43e 14911
91447636 14912 while (TRUE) {
cb323159 14913 if (!(object->internal)) {
2d21ac55 14914 extended->external_pager = 1;
0a7de745 14915 }
1c79356b 14916
91447636 14917 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
0a7de745
A
14918 if (shadow && (max_refcnt == 1)) {
14919 extended->pages_shared_now_private++;
14920 }
1c79356b 14921
d9a64523 14922 if (!p->vmp_fictitious &&
0a7de745
A
14923 (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
14924 extended->pages_dirtied++;
14925 } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
d9a64523 14926 if (p->vmp_reusable || object->all_reusable) {
39236c6e
A
14927 extended->pages_reusable++;
14928 }
14929 }
1c79356b 14930
39236c6e 14931 extended->pages_resident++;
91447636 14932
0a7de745 14933 if (object != caller_object) {
2d21ac55 14934 vm_object_unlock(object);
0a7de745 14935 }
91447636
A
14936
14937 return;
1c79356b 14938 }
39236c6e
A
14939 if (object->internal &&
14940 object->alive &&
14941 !object->terminating &&
14942 object->pager_ready) {
39037602
A
14943 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
14944 == VM_EXTERNAL_STATE_EXISTS) {
14945 /* the pager has that page */
14946 extended->pages_swapped_out++;
0a7de745 14947 if (object != caller_object) {
39037602 14948 vm_object_unlock(object);
0a7de745 14949 }
39037602 14950 return;
2d21ac55 14951 }
1c79356b 14952 }
2d21ac55 14953
91447636 14954 if (shadow) {
2d21ac55 14955 vm_object_lock(shadow);
1c79356b 14956
0a7de745
A
14957 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
14958 ref_count--;
14959 }
1c79356b 14960
0a7de745
A
14961 if (++depth > extended->shadow_depth) {
14962 extended->shadow_depth = depth;
14963 }
1c79356b 14964
0a7de745
A
14965 if (ref_count > max_refcnt) {
14966 max_refcnt = ref_count;
14967 }
5ba3f43e 14968
0a7de745 14969 if (object != caller_object) {
2d21ac55 14970 vm_object_unlock(object);
0a7de745 14971 }
91447636 14972
6d2010ae 14973 offset = offset + object->vo_shadow_offset;
91447636
A
14974 object = shadow;
14975 shadow = object->shadow;
14976 continue;
1c79356b 14977 }
0a7de745 14978 if (object != caller_object) {
2d21ac55 14979 vm_object_unlock(object);
0a7de745 14980 }
91447636
A
14981 break;
14982 }
14983}
1c79356b 14984
91447636
A
14985static int
14986vm_map_region_count_obj_refs(
0a7de745 14987 vm_map_entry_t entry,
91447636
A
14988 vm_object_t object)
14989{
0a7de745 14990 int ref_count;
39037602
A
14991 vm_object_t chk_obj;
14992 vm_object_t tmp_obj;
1c79356b 14993
0a7de745
A
14994 if (VME_OBJECT(entry) == 0) {
14995 return 0;
14996 }
1c79356b 14997
0a7de745
A
14998 if (entry->is_sub_map) {
14999 return 0;
15000 } else {
2d21ac55 15001 ref_count = 0;
1c79356b 15002
3e170ce0 15003 chk_obj = VME_OBJECT(entry);
2d21ac55 15004 vm_object_lock(chk_obj);
1c79356b 15005
2d21ac55 15006 while (chk_obj) {
0a7de745 15007 if (chk_obj == object) {
2d21ac55 15008 ref_count++;
0a7de745 15009 }
2d21ac55 15010 tmp_obj = chk_obj->shadow;
0a7de745 15011 if (tmp_obj) {
2d21ac55 15012 vm_object_lock(tmp_obj);
0a7de745 15013 }
2d21ac55 15014 vm_object_unlock(chk_obj);
1c79356b 15015
2d21ac55
A
15016 chk_obj = tmp_obj;
15017 }
1c79356b 15018 }
0a7de745 15019 return ref_count;
1c79356b
A
15020}
15021
15022
15023/*
91447636
A
15024 * Routine: vm_map_simplify
15025 *
15026 * Description:
15027 * Attempt to simplify the map representation in
15028 * the vicinity of the given starting address.
15029 * Note:
15030 * This routine is intended primarily to keep the
15031 * kernel maps more compact -- they generally don't
15032 * benefit from the "expand a map entry" technology
15033 * at allocation time because the adjacent entry
15034 * is often wired down.
1c79356b 15035 */
91447636
A
15036void
15037vm_map_simplify_entry(
0a7de745
A
15038 vm_map_t map,
15039 vm_map_entry_t this_entry)
1c79356b 15040{
0a7de745 15041 vm_map_entry_t prev_entry;
1c79356b 15042
91447636 15043 prev_entry = this_entry->vme_prev;
1c79356b 15044
91447636 15045 if ((this_entry != vm_map_to_entry(map)) &&
2d21ac55 15046 (prev_entry != vm_map_to_entry(map)) &&
1c79356b 15047
91447636 15048 (prev_entry->vme_end == this_entry->vme_start) &&
1c79356b 15049
2d21ac55 15050 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
3e170ce0
A
15051 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
15052 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
0a7de745
A
15053 prev_entry->vme_start))
15054 == VME_OFFSET(this_entry)) &&
1c79356b 15055
fe8ab488
A
15056 (prev_entry->behavior == this_entry->behavior) &&
15057 (prev_entry->needs_copy == this_entry->needs_copy) &&
91447636
A
15058 (prev_entry->protection == this_entry->protection) &&
15059 (prev_entry->max_protection == this_entry->max_protection) &&
fe8ab488
A
15060 (prev_entry->inheritance == this_entry->inheritance) &&
15061 (prev_entry->use_pmap == this_entry->use_pmap) &&
3e170ce0 15062 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
2d21ac55 15063 (prev_entry->no_cache == this_entry->no_cache) &&
fe8ab488
A
15064 (prev_entry->permanent == this_entry->permanent) &&
15065 (prev_entry->map_aligned == this_entry->map_aligned) &&
15066 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
15067 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
d9a64523 15068 (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
fe8ab488
A
15069 /* from_reserved_zone: OK if that field doesn't match */
15070 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
3e170ce0 15071 (prev_entry->vme_resilient_codesign ==
0a7de745 15072 this_entry->vme_resilient_codesign) &&
3e170ce0 15073 (prev_entry->vme_resilient_media ==
0a7de745 15074 this_entry->vme_resilient_media) &&
cb323159 15075 (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
fe8ab488 15076
91447636
A
15077 (prev_entry->wired_count == this_entry->wired_count) &&
15078 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
1c79356b 15079
39037602 15080 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
91447636
A
15081 (prev_entry->in_transition == FALSE) &&
15082 (this_entry->in_transition == FALSE) &&
15083 (prev_entry->needs_wakeup == FALSE) &&
15084 (this_entry->needs_wakeup == FALSE) &&
f427ee49 15085 (prev_entry->is_shared == this_entry->is_shared) &&
fe8ab488
A
15086 (prev_entry->superpage_size == FALSE) &&
15087 (this_entry->superpage_size == FALSE)
0a7de745 15088 ) {
316670eb 15089 vm_map_store_entry_unlink(map, prev_entry);
e2d2fc5c 15090 assert(prev_entry->vme_start < this_entry->vme_end);
0a7de745 15091 if (prev_entry->map_aligned) {
39236c6e 15092 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
0a7de745
A
15093 VM_MAP_PAGE_MASK(map)));
15094 }
91447636 15095 this_entry->vme_start = prev_entry->vme_start;
3e170ce0
A
15096 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
15097
15098 if (map->holelistenabled) {
15099 vm_map_store_update_first_free(map, this_entry, TRUE);
15100 }
15101
2d21ac55 15102 if (prev_entry->is_sub_map) {
3e170ce0 15103 vm_map_deallocate(VME_SUBMAP(prev_entry));
2d21ac55 15104 } else {
3e170ce0 15105 vm_object_deallocate(VME_OBJECT(prev_entry));
2d21ac55 15106 }
91447636 15107 vm_map_entry_dispose(map, prev_entry);
0c530ab8 15108 SAVE_HINT_MAP_WRITE(map, this_entry);
1c79356b 15109 }
91447636 15110}
1c79356b 15111
91447636
A
15112void
15113vm_map_simplify(
0a7de745
A
15114 vm_map_t map,
15115 vm_map_offset_t start)
91447636 15116{
0a7de745 15117 vm_map_entry_t this_entry;
1c79356b 15118
91447636
A
15119 vm_map_lock(map);
15120 if (vm_map_lookup_entry(map, start, &this_entry)) {
15121 vm_map_simplify_entry(map, this_entry);
15122 vm_map_simplify_entry(map, this_entry->vme_next);
15123 }
91447636
A
15124 vm_map_unlock(map);
15125}
1c79356b 15126
91447636
A
15127static void
15128vm_map_simplify_range(
0a7de745
A
15129 vm_map_t map,
15130 vm_map_offset_t start,
15131 vm_map_offset_t end)
91447636 15132{
0a7de745 15133 vm_map_entry_t entry;
1c79356b 15134
91447636
A
15135 /*
15136 * The map should be locked (for "write") by the caller.
15137 */
1c79356b 15138
91447636
A
15139 if (start >= end) {
15140 /* invalid address range */
15141 return;
15142 }
1c79356b 15143
39236c6e 15144 start = vm_map_trunc_page(start,
0a7de745 15145 VM_MAP_PAGE_MASK(map));
39236c6e 15146 end = vm_map_round_page(end,
0a7de745 15147 VM_MAP_PAGE_MASK(map));
2d21ac55 15148
91447636
A
15149 if (!vm_map_lookup_entry(map, start, &entry)) {
15150 /* "start" is not mapped and "entry" ends before "start" */
15151 if (entry == vm_map_to_entry(map)) {
15152 /* start with first entry in the map */
15153 entry = vm_map_first_entry(map);
15154 } else {
15155 /* start with next entry */
15156 entry = entry->vme_next;
15157 }
15158 }
5ba3f43e 15159
91447636 15160 while (entry != vm_map_to_entry(map) &&
0a7de745 15161 entry->vme_start <= end) {
91447636
A
15162 /* try and coalesce "entry" with its previous entry */
15163 vm_map_simplify_entry(map, entry);
15164 entry = entry->vme_next;
15165 }
15166}
1c79356b 15167
1c79356b 15168
91447636
A
15169/*
15170 * Routine: vm_map_machine_attribute
15171 * Purpose:
15172 * Provide machine-specific attributes to mappings,
15173 * such as cachability etc. for machines that provide
15174 * them. NUMA architectures and machines with big/strange
15175 * caches will use this.
15176 * Note:
15177 * Responsibilities for locking and checking are handled here,
15178 * everything else in the pmap module. If any non-volatile
15179 * information must be kept, the pmap module should handle
15180 * it itself. [This assumes that attributes do not
15181 * need to be inherited, which seems ok to me]
15182 */
15183kern_return_t
15184vm_map_machine_attribute(
0a7de745
A
15185 vm_map_t map,
15186 vm_map_offset_t start,
15187 vm_map_offset_t end,
15188 vm_machine_attribute_t attribute,
15189 vm_machine_attribute_val_t* value) /* IN/OUT */
91447636 15190{
0a7de745 15191 kern_return_t ret;
91447636
A
15192 vm_map_size_t sync_size;
15193 vm_map_entry_t entry;
5ba3f43e 15194
0a7de745 15195 if (start < vm_map_min(map) || end > vm_map_max(map)) {
91447636 15196 return KERN_INVALID_ADDRESS;
0a7de745 15197 }
1c79356b 15198
91447636
A
15199 /* Figure how much memory we need to flush (in page increments) */
15200 sync_size = end - start;
1c79356b 15201
91447636 15202 vm_map_lock(map);
5ba3f43e
A
15203
15204 if (attribute != MATTR_CACHE) {
91447636
A
15205 /* If we don't have to find physical addresses, we */
15206 /* don't have to do an explicit traversal here. */
0a7de745
A
15207 ret = pmap_attribute(map->pmap, start, end - start,
15208 attribute, value);
91447636
A
15209 vm_map_unlock(map);
15210 return ret;
15211 }
1c79356b 15212
0a7de745 15213 ret = KERN_SUCCESS; /* Assume it all worked */
1c79356b 15214
0a7de745 15215 while (sync_size) {
91447636 15216 if (vm_map_lookup_entry(map, start, &entry)) {
0a7de745
A
15217 vm_map_size_t sub_size;
15218 if ((entry->vme_end - start) > sync_size) {
91447636
A
15219 sub_size = sync_size;
15220 sync_size = 0;
15221 } else {
15222 sub_size = entry->vme_end - start;
2d21ac55 15223 sync_size -= sub_size;
91447636 15224 }
0a7de745 15225 if (entry->is_sub_map) {
91447636
A
15226 vm_map_offset_t sub_start;
15227 vm_map_offset_t sub_end;
1c79356b 15228
5ba3f43e 15229 sub_start = (start - entry->vme_start)
0a7de745 15230 + VME_OFFSET(entry);
91447636
A
15231 sub_end = sub_start + sub_size;
15232 vm_map_machine_attribute(
5ba3f43e 15233 VME_SUBMAP(entry),
91447636
A
15234 sub_start,
15235 sub_end,
15236 attribute, value);
15237 } else {
3e170ce0 15238 if (VME_OBJECT(entry)) {
0a7de745
A
15239 vm_page_t m;
15240 vm_object_t object;
15241 vm_object_t base_object;
15242 vm_object_t last_object;
15243 vm_object_offset_t offset;
15244 vm_object_offset_t base_offset;
15245 vm_map_size_t range;
91447636
A
15246 range = sub_size;
15247 offset = (start - entry->vme_start)
0a7de745 15248 + VME_OFFSET(entry);
f427ee49 15249 offset = vm_object_trunc_page(offset);
91447636 15250 base_offset = offset;
3e170ce0 15251 object = VME_OBJECT(entry);
91447636
A
15252 base_object = object;
15253 last_object = NULL;
1c79356b 15254
91447636 15255 vm_object_lock(object);
1c79356b 15256
91447636
A
15257 while (range) {
15258 m = vm_page_lookup(
15259 object, offset);
1c79356b 15260
d9a64523 15261 if (m && !m->vmp_fictitious) {
0a7de745
A
15262 ret =
15263 pmap_attribute_cache_sync(
15264 VM_PAGE_GET_PHYS_PAGE(m),
15265 PAGE_SIZE,
15266 attribute, value);
91447636 15267 } else if (object->shadow) {
0a7de745 15268 offset = offset + object->vo_shadow_offset;
91447636
A
15269 last_object = object;
15270 object = object->shadow;
15271 vm_object_lock(last_object->shadow);
15272 vm_object_unlock(last_object);
15273 continue;
15274 }
f427ee49
A
15275 if (range < PAGE_SIZE) {
15276 range = 0;
15277 } else {
15278 range -= PAGE_SIZE;
15279 }
1c79356b 15280
91447636 15281 if (base_object != object) {
0a7de745 15282 vm_object_unlock(object);
91447636
A
15283 vm_object_lock(base_object);
15284 object = base_object;
15285 }
15286 /* Bump to the next page */
15287 base_offset += PAGE_SIZE;
15288 offset = base_offset;
15289 }
15290 vm_object_unlock(object);
15291 }
15292 }
15293 start += sub_size;
15294 } else {
15295 vm_map_unlock(map);
15296 return KERN_FAILURE;
15297 }
1c79356b 15298 }
e5568f75 15299
91447636 15300 vm_map_unlock(map);
e5568f75 15301
91447636
A
15302 return ret;
15303}
e5568f75 15304
91447636
A
15305/*
15306 * vm_map_behavior_set:
15307 *
15308 * Sets the paging reference behavior of the specified address
15309 * range in the target map. Paging reference behavior affects
5ba3f43e 15310 * how pagein operations resulting from faults on the map will be
91447636
A
15311 * clustered.
15312 */
5ba3f43e 15313kern_return_t
91447636 15314vm_map_behavior_set(
0a7de745
A
15315 vm_map_t map,
15316 vm_map_offset_t start,
15317 vm_map_offset_t end,
15318 vm_behavior_t new_behavior)
91447636 15319{
0a7de745
A
15320 vm_map_entry_t entry;
15321 vm_map_entry_t temp_entry;
e5568f75 15322
6d2010ae
A
15323 if (start > end ||
15324 start < vm_map_min(map) ||
15325 end > vm_map_max(map)) {
15326 return KERN_NO_SPACE;
15327 }
15328
91447636 15329 switch (new_behavior) {
b0d623f7
A
15330 /*
15331 * This first block of behaviors all set a persistent state on the specified
15332 * memory range. All we have to do here is to record the desired behavior
15333 * in the vm_map_entry_t's.
15334 */
15335
91447636
A
15336 case VM_BEHAVIOR_DEFAULT:
15337 case VM_BEHAVIOR_RANDOM:
15338 case VM_BEHAVIOR_SEQUENTIAL:
15339 case VM_BEHAVIOR_RSEQNTL:
b0d623f7
A
15340 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
15341 vm_map_lock(map);
5ba3f43e 15342
b0d623f7
A
15343 /*
15344 * The entire address range must be valid for the map.
0a7de745 15345 * Note that vm_map_range_check() does a
b0d623f7
A
15346 * vm_map_lookup_entry() internally and returns the
15347 * entry containing the start of the address range if
15348 * the entire range is valid.
15349 */
15350 if (vm_map_range_check(map, start, end, &temp_entry)) {
15351 entry = temp_entry;
15352 vm_map_clip_start(map, entry, start);
0a7de745 15353 } else {
b0d623f7 15354 vm_map_unlock(map);
0a7de745 15355 return KERN_INVALID_ADDRESS;
b0d623f7 15356 }
5ba3f43e 15357
b0d623f7
A
15358 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
15359 vm_map_clip_end(map, entry, end);
fe8ab488
A
15360 if (entry->is_sub_map) {
15361 assert(!entry->use_pmap);
15362 }
5ba3f43e 15363
0a7de745 15364 if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
b0d623f7
A
15365 entry->zero_wired_pages = TRUE;
15366 } else {
15367 entry->behavior = new_behavior;
15368 }
15369 entry = entry->vme_next;
15370 }
5ba3f43e 15371
b0d623f7 15372 vm_map_unlock(map);
91447636 15373 break;
b0d623f7
A
15374
15375 /*
15376 * The rest of these are different from the above in that they cause
5ba3f43e 15377 * an immediate action to take place as opposed to setting a behavior that
b0d623f7
A
15378 * affects future actions.
15379 */
15380
91447636 15381 case VM_BEHAVIOR_WILLNEED:
b0d623f7
A
15382 return vm_map_willneed(map, start, end);
15383
91447636 15384 case VM_BEHAVIOR_DONTNEED:
b0d623f7
A
15385 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
15386
15387 case VM_BEHAVIOR_FREE:
15388 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
15389
15390 case VM_BEHAVIOR_REUSABLE:
15391 return vm_map_reusable_pages(map, start, end);
15392
15393 case VM_BEHAVIOR_REUSE:
15394 return vm_map_reuse_pages(map, start, end);
15395
15396 case VM_BEHAVIOR_CAN_REUSE:
15397 return vm_map_can_reuse(map, start, end);
15398
3e170ce0
A
15399#if MACH_ASSERT
15400 case VM_BEHAVIOR_PAGEOUT:
15401 return vm_map_pageout(map, start, end);
15402#endif /* MACH_ASSERT */
15403
1c79356b 15404 default:
0a7de745 15405 return KERN_INVALID_ARGUMENT;
1c79356b 15406 }
1c79356b 15407
0a7de745 15408 return KERN_SUCCESS;
b0d623f7
A
15409}
15410
15411
15412/*
15413 * Internals for madvise(MADV_WILLNEED) system call.
15414 *
cb323159
A
15415 * The implementation is to do:-
15416 * a) read-ahead if the mapping corresponds to a mapped regular file
15417 * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
b0d623f7
A
15418 */
15419
15420
15421static kern_return_t
15422vm_map_willneed(
0a7de745
A
15423 vm_map_t map,
15424 vm_map_offset_t start,
15425 vm_map_offset_t end
15426 )
b0d623f7 15427{
0a7de745
A
15428 vm_map_entry_t entry;
15429 vm_object_t object;
15430 memory_object_t pager;
15431 struct vm_object_fault_info fault_info = {};
15432 kern_return_t kr;
15433 vm_object_size_t len;
15434 vm_object_offset_t offset;
15435
15436 fault_info.interruptible = THREAD_UNINT; /* ignored value */
b0d623f7 15437 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
0a7de745 15438 fault_info.stealth = TRUE;
b0d623f7
A
15439
15440 /*
15441 * The MADV_WILLNEED operation doesn't require any changes to the
15442 * vm_map_entry_t's, so the read lock is sufficient.
15443 */
15444
15445 vm_map_lock_read(map);
15446
15447 /*
15448 * The madvise semantics require that the address range be fully
15449 * allocated with no holes. Otherwise, we're required to return
15450 * an error.
15451 */
15452
0a7de745 15453 if (!vm_map_range_check(map, start, end, &entry)) {
6d2010ae
A
15454 vm_map_unlock_read(map);
15455 return KERN_INVALID_ADDRESS;
15456 }
b0d623f7 15457
6d2010ae
A
15458 /*
15459 * Examine each vm_map_entry_t in the range.
15460 */
0a7de745 15461 for (; entry != vm_map_to_entry(map) && start < end;) {
b0d623f7 15462 /*
6d2010ae
A
15463 * The first time through, the start address could be anywhere
15464 * within the vm_map_entry we found. So adjust the offset to
15465 * correspond. After that, the offset will always be zero to
15466 * correspond to the beginning of the current vm_map_entry.
b0d623f7 15467 */
3e170ce0 15468 offset = (start - entry->vme_start) + VME_OFFSET(entry);
b0d623f7 15469
6d2010ae
A
15470 /*
15471 * Set the length so we don't go beyond the end of the
15472 * map_entry or beyond the end of the range we were given.
15473 * This range could span also multiple map entries all of which
15474 * map different files, so make sure we only do the right amount
15475 * of I/O for each object. Note that it's possible for there
15476 * to be multiple map entries all referring to the same object
15477 * but with different page permissions, but it's not worth
15478 * trying to optimize that case.
15479 */
15480 len = MIN(entry->vme_end - start, end - start);
b0d623f7 15481
6d2010ae
A
15482 if ((vm_size_t) len != len) {
15483 /* 32-bit overflow */
15484 len = (vm_size_t) (0 - PAGE_SIZE);
15485 }
15486 fault_info.cluster_size = (vm_size_t) len;
5ba3f43e 15487 fault_info.lo_offset = offset;
6d2010ae 15488 fault_info.hi_offset = offset + len;
3e170ce0 15489 fault_info.user_tag = VME_ALIAS(entry);
fe8ab488
A
15490 fault_info.pmap_options = 0;
15491 if (entry->iokit_acct ||
15492 (!entry->is_sub_map && !entry->use_pmap)) {
15493 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
15494 }
b0d623f7 15495
6d2010ae 15496 /*
cb323159
A
15497 * If the entry is a submap OR there's no read permission
15498 * to this mapping, then just skip it.
6d2010ae 15499 */
cb323159 15500 if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
6d2010ae
A
15501 entry = entry->vme_next;
15502 start = entry->vme_start;
15503 continue;
15504 }
b0d623f7 15505
cb323159 15506 object = VME_OBJECT(entry);
b0d623f7 15507
cb323159
A
15508 if (object == NULL ||
15509 (object && object->internal)) {
15510 /*
15511 * Memory range backed by anonymous memory.
15512 */
15513 vm_size_t region_size = 0, effective_page_size = 0;
15514 vm_map_offset_t addr = 0, effective_page_mask = 0;
b0d623f7 15515
cb323159
A
15516 region_size = len;
15517 addr = start;
b0d623f7 15518
f427ee49 15519 effective_page_mask = MIN(vm_map_page_mask(current_map()), PAGE_MASK);
cb323159 15520 effective_page_size = effective_page_mask + 1;
b0d623f7 15521
cb323159 15522 vm_map_unlock_read(map);
b0d623f7 15523
cb323159
A
15524 while (region_size) {
15525 vm_pre_fault(
15526 vm_map_trunc_page(addr, effective_page_mask),
15527 VM_PROT_READ | VM_PROT_WRITE);
15528
15529 region_size -= effective_page_size;
15530 addr += effective_page_size;
15531 }
15532 } else {
15533 /*
15534 * Find the file object backing this map entry. If there is
15535 * none, then we simply ignore the "will need" advice for this
15536 * entry and go on to the next one.
15537 */
15538 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
15539 entry = entry->vme_next;
15540 start = entry->vme_start;
15541 continue;
15542 }
15543
15544 vm_object_paging_begin(object);
15545 pager = object->pager;
15546 vm_object_unlock(object);
15547
15548 /*
15549 * The data_request() could take a long time, so let's
15550 * release the map lock to avoid blocking other threads.
15551 */
15552 vm_map_unlock_read(map);
15553
15554 /*
15555 * Get the data from the object asynchronously.
15556 *
15557 * Note that memory_object_data_request() places limits on the
15558 * amount of I/O it will do. Regardless of the len we
15559 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15560 * silently truncates the len to that size. This isn't
15561 * necessarily bad since madvise shouldn't really be used to
15562 * page in unlimited amounts of data. Other Unix variants
15563 * limit the willneed case as well. If this turns out to be an
15564 * issue for developers, then we can always adjust the policy
15565 * here and still be backwards compatible since this is all
15566 * just "advice".
15567 */
15568 kr = memory_object_data_request(
15569 pager,
f427ee49 15570 vm_object_trunc_page(offset) + object->paging_offset,
cb323159
A
15571 0, /* ignored */
15572 VM_PROT_READ,
15573 (memory_object_fault_info_t)&fault_info);
15574
15575 vm_object_lock(object);
15576 vm_object_paging_end(object);
15577 vm_object_unlock(object);
15578
15579 /*
15580 * If we couldn't do the I/O for some reason, just give up on
15581 * the madvise. We still return success to the user since
15582 * madvise isn't supposed to fail when the advice can't be
15583 * taken.
15584 */
15585
15586 if (kr != KERN_SUCCESS) {
15587 return KERN_SUCCESS;
15588 }
6d2010ae 15589 }
b0d623f7 15590
6d2010ae
A
15591 start += len;
15592 if (start >= end) {
15593 /* done */
15594 return KERN_SUCCESS;
15595 }
b0d623f7 15596
6d2010ae
A
15597 /* look up next entry */
15598 vm_map_lock_read(map);
0a7de745 15599 if (!vm_map_lookup_entry(map, start, &entry)) {
b0d623f7 15600 /*
6d2010ae 15601 * There's a new hole in the address range.
b0d623f7 15602 */
6d2010ae
A
15603 vm_map_unlock_read(map);
15604 return KERN_INVALID_ADDRESS;
b0d623f7 15605 }
6d2010ae 15606 }
b0d623f7
A
15607
15608 vm_map_unlock_read(map);
6d2010ae 15609 return KERN_SUCCESS;
b0d623f7
A
15610}
15611
15612static boolean_t
15613vm_map_entry_is_reusable(
15614 vm_map_entry_t entry)
15615{
3e170ce0
A
15616 /* Only user map entries */
15617
b0d623f7
A
15618 vm_object_t object;
15619
2dced7af
A
15620 if (entry->is_sub_map) {
15621 return FALSE;
15622 }
15623
3e170ce0 15624 switch (VME_ALIAS(entry)) {
39236c6e
A
15625 case VM_MEMORY_MALLOC:
15626 case VM_MEMORY_MALLOC_SMALL:
15627 case VM_MEMORY_MALLOC_LARGE:
15628 case VM_MEMORY_REALLOC:
15629 case VM_MEMORY_MALLOC_TINY:
15630 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
15631 case VM_MEMORY_MALLOC_LARGE_REUSED:
15632 /*
15633 * This is a malloc() memory region: check if it's still
15634 * in its original state and can be re-used for more
15635 * malloc() allocations.
15636 */
15637 break;
15638 default:
15639 /*
15640 * Not a malloc() memory region: let the caller decide if
15641 * it's re-usable.
15642 */
15643 return TRUE;
15644 }
15645
d9a64523 15646 if (/*entry->is_shared ||*/
0a7de745
A
15647 entry->is_sub_map ||
15648 entry->in_transition ||
15649 entry->protection != VM_PROT_DEFAULT ||
15650 entry->max_protection != VM_PROT_ALL ||
15651 entry->inheritance != VM_INHERIT_DEFAULT ||
15652 entry->no_cache ||
15653 entry->permanent ||
15654 entry->superpage_size != FALSE ||
15655 entry->zero_wired_pages ||
15656 entry->wired_count != 0 ||
15657 entry->user_wired_count != 0) {
b0d623f7 15658 return FALSE;
91447636 15659 }
b0d623f7 15660
3e170ce0 15661 object = VME_OBJECT(entry);
b0d623f7
A
15662 if (object == VM_OBJECT_NULL) {
15663 return TRUE;
15664 }
316670eb
A
15665 if (
15666#if 0
15667 /*
15668 * Let's proceed even if the VM object is potentially
15669 * shared.
15670 * We check for this later when processing the actual
15671 * VM pages, so the contents will be safe if shared.
5ba3f43e 15672 *
316670eb
A
15673 * But we can still mark this memory region as "reusable" to
15674 * acknowledge that the caller did let us know that the memory
15675 * could be re-used and should not be penalized for holding
15676 * on to it. This allows its "resident size" to not include
15677 * the reusable range.
15678 */
0a7de745 15679 object->ref_count == 1 &&
316670eb 15680#endif
0a7de745
A
15681 object->wired_page_count == 0 &&
15682 object->copy == VM_OBJECT_NULL &&
15683 object->shadow == VM_OBJECT_NULL &&
15684 object->internal &&
15685 object->purgable == VM_PURGABLE_DENY &&
0a7de745
A
15686 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
15687 !object->code_signed) {
b0d623f7 15688 return TRUE;
1c79356b 15689 }
b0d623f7 15690 return FALSE;
b0d623f7 15691}
1c79356b 15692
b0d623f7
A
15693static kern_return_t
15694vm_map_reuse_pages(
0a7de745
A
15695 vm_map_t map,
15696 vm_map_offset_t start,
15697 vm_map_offset_t end)
b0d623f7 15698{
0a7de745
A
15699 vm_map_entry_t entry;
15700 vm_object_t object;
15701 vm_object_offset_t start_offset, end_offset;
b0d623f7
A
15702
15703 /*
15704 * The MADV_REUSE operation doesn't require any changes to the
15705 * vm_map_entry_t's, so the read lock is sufficient.
15706 */
0b4e3aa0 15707
f427ee49
A
15708 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
15709 /*
15710 * XXX TODO4K
15711 * need to figure out what reusable means for a
15712 * portion of a native page.
15713 */
15714 return KERN_SUCCESS;
15715 }
15716
b0d623f7 15717 vm_map_lock_read(map);
0a7de745 15718 assert(map->pmap != kernel_pmap); /* protect alias access */
1c79356b 15719
b0d623f7
A
15720 /*
15721 * The madvise semantics require that the address range be fully
15722 * allocated with no holes. Otherwise, we're required to return
15723 * an error.
15724 */
15725
15726 if (!vm_map_range_check(map, start, end, &entry)) {
15727 vm_map_unlock_read(map);
15728 vm_page_stats_reusable.reuse_pages_failure++;
15729 return KERN_INVALID_ADDRESS;
1c79356b 15730 }
91447636 15731
b0d623f7
A
15732 /*
15733 * Examine each vm_map_entry_t in the range.
15734 */
15735 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
0a7de745 15736 entry = entry->vme_next) {
b0d623f7
A
15737 /*
15738 * Sanity check on the VM map entry.
15739 */
0a7de745 15740 if (!vm_map_entry_is_reusable(entry)) {
b0d623f7
A
15741 vm_map_unlock_read(map);
15742 vm_page_stats_reusable.reuse_pages_failure++;
15743 return KERN_INVALID_ADDRESS;
15744 }
15745
15746 /*
15747 * The first time through, the start address could be anywhere
15748 * within the vm_map_entry we found. So adjust the offset to
15749 * correspond.
15750 */
15751 if (entry->vme_start < start) {
15752 start_offset = start - entry->vme_start;
15753 } else {
15754 start_offset = 0;
15755 }
15756 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
3e170ce0
A
15757 start_offset += VME_OFFSET(entry);
15758 end_offset += VME_OFFSET(entry);
b0d623f7 15759
2dced7af 15760 assert(!entry->is_sub_map);
3e170ce0 15761 object = VME_OBJECT(entry);
b0d623f7
A
15762 if (object != VM_OBJECT_NULL) {
15763 vm_object_lock(object);
15764 vm_object_reuse_pages(object, start_offset, end_offset,
0a7de745 15765 TRUE);
b0d623f7
A
15766 vm_object_unlock(object);
15767 }
15768
3e170ce0 15769 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
b0d623f7
A
15770 /*
15771 * XXX
15772 * We do not hold the VM map exclusively here.
15773 * The "alias" field is not that critical, so it's
15774 * safe to update it here, as long as it is the only
15775 * one that can be modified while holding the VM map
15776 * "shared".
15777 */
3e170ce0 15778 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
b0d623f7
A
15779 }
15780 }
5ba3f43e 15781
b0d623f7
A
15782 vm_map_unlock_read(map);
15783 vm_page_stats_reusable.reuse_pages_success++;
15784 return KERN_SUCCESS;
1c79356b
A
15785}
15786
1c79356b 15787
b0d623f7
A
15788static kern_return_t
15789vm_map_reusable_pages(
0a7de745
A
15790 vm_map_t map,
15791 vm_map_offset_t start,
15792 vm_map_offset_t end)
b0d623f7 15793{
0a7de745
A
15794 vm_map_entry_t entry;
15795 vm_object_t object;
15796 vm_object_offset_t start_offset, end_offset;
15797 vm_map_offset_t pmap_offset;
b0d623f7 15798
f427ee49
A
15799 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
15800 /*
15801 * XXX TODO4K
15802 * need to figure out what reusable means for a portion
15803 * of a native page.
15804 */
15805 return KERN_SUCCESS;
15806 }
15807
b0d623f7
A
15808 /*
15809 * The MADV_REUSABLE operation doesn't require any changes to the
15810 * vm_map_entry_t's, so the read lock is sufficient.
15811 */
15812
15813 vm_map_lock_read(map);
0a7de745 15814 assert(map->pmap != kernel_pmap); /* protect alias access */
b0d623f7
A
15815
15816 /*
15817 * The madvise semantics require that the address range be fully
15818 * allocated with no holes. Otherwise, we're required to return
15819 * an error.
15820 */
15821
15822 if (!vm_map_range_check(map, start, end, &entry)) {
15823 vm_map_unlock_read(map);
15824 vm_page_stats_reusable.reusable_pages_failure++;
15825 return KERN_INVALID_ADDRESS;
15826 }
15827
15828 /*
15829 * Examine each vm_map_entry_t in the range.
15830 */
15831 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
0a7de745 15832 entry = entry->vme_next) {
b0d623f7
A
15833 int kill_pages = 0;
15834
15835 /*
15836 * Sanity check on the VM map entry.
15837 */
0a7de745 15838 if (!vm_map_entry_is_reusable(entry)) {
b0d623f7
A
15839 vm_map_unlock_read(map);
15840 vm_page_stats_reusable.reusable_pages_failure++;
15841 return KERN_INVALID_ADDRESS;
15842 }
15843
0a7de745 15844 if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
39037602
A
15845 /* not writable: can't discard contents */
15846 vm_map_unlock_read(map);
15847 vm_page_stats_reusable.reusable_nonwritable++;
15848 vm_page_stats_reusable.reusable_pages_failure++;
15849 return KERN_PROTECTION_FAILURE;
15850 }
15851
b0d623f7
A
15852 /*
15853 * The first time through, the start address could be anywhere
15854 * within the vm_map_entry we found. So adjust the offset to
15855 * correspond.
15856 */
15857 if (entry->vme_start < start) {
15858 start_offset = start - entry->vme_start;
3e170ce0 15859 pmap_offset = start;
b0d623f7
A
15860 } else {
15861 start_offset = 0;
3e170ce0 15862 pmap_offset = entry->vme_start;
b0d623f7
A
15863 }
15864 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
3e170ce0
A
15865 start_offset += VME_OFFSET(entry);
15866 end_offset += VME_OFFSET(entry);
b0d623f7 15867
2dced7af 15868 assert(!entry->is_sub_map);
3e170ce0 15869 object = VME_OBJECT(entry);
0a7de745 15870 if (object == VM_OBJECT_NULL) {
b0d623f7 15871 continue;
0a7de745 15872 }
b0d623f7
A
15873
15874
15875 vm_object_lock(object);
39037602 15876 if (((object->ref_count == 1) ||
0a7de745
A
15877 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
15878 object->copy == VM_OBJECT_NULL)) &&
39037602 15879 object->shadow == VM_OBJECT_NULL &&
fe8ab488
A
15880 /*
15881 * "iokit_acct" entries are billed for their virtual size
15882 * (rather than for their resident pages only), so they
15883 * wouldn't benefit from making pages reusable, and it
15884 * would be hard to keep track of pages that are both
39037602
A
15885 * "iokit_acct" and "reusable" in the pmap stats and
15886 * ledgers.
fe8ab488
A
15887 */
15888 !(entry->iokit_acct ||
0a7de745 15889 (!entry->is_sub_map && !entry->use_pmap))) {
39037602
A
15890 if (object->ref_count != 1) {
15891 vm_page_stats_reusable.reusable_shared++;
15892 }
b0d623f7 15893 kill_pages = 1;
39037602 15894 } else {
b0d623f7 15895 kill_pages = -1;
39037602 15896 }
b0d623f7
A
15897 if (kill_pages != -1) {
15898 vm_object_deactivate_pages(object,
0a7de745
A
15899 start_offset,
15900 end_offset - start_offset,
15901 kill_pages,
15902 TRUE /*reusable_pages*/,
15903 map->pmap,
15904 pmap_offset);
b0d623f7
A
15905 } else {
15906 vm_page_stats_reusable.reusable_pages_shared++;
15907 }
15908 vm_object_unlock(object);
15909
3e170ce0
A
15910 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
15911 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
b0d623f7
A
15912 /*
15913 * XXX
15914 * We do not hold the VM map exclusively here.
15915 * The "alias" field is not that critical, so it's
15916 * safe to update it here, as long as it is the only
15917 * one that can be modified while holding the VM map
15918 * "shared".
15919 */
3e170ce0 15920 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
b0d623f7
A
15921 }
15922 }
5ba3f43e 15923
b0d623f7
A
15924 vm_map_unlock_read(map);
15925 vm_page_stats_reusable.reusable_pages_success++;
15926 return KERN_SUCCESS;
15927}
15928
15929
15930static kern_return_t
15931vm_map_can_reuse(
0a7de745
A
15932 vm_map_t map,
15933 vm_map_offset_t start,
15934 vm_map_offset_t end)
b0d623f7 15935{
0a7de745 15936 vm_map_entry_t entry;
b0d623f7
A
15937
15938 /*
15939 * The MADV_REUSABLE operation doesn't require any changes to the
15940 * vm_map_entry_t's, so the read lock is sufficient.
15941 */
15942
15943 vm_map_lock_read(map);
0a7de745 15944 assert(map->pmap != kernel_pmap); /* protect alias access */
b0d623f7
A
15945
15946 /*
15947 * The madvise semantics require that the address range be fully
15948 * allocated with no holes. Otherwise, we're required to return
15949 * an error.
15950 */
15951
15952 if (!vm_map_range_check(map, start, end, &entry)) {
15953 vm_map_unlock_read(map);
15954 vm_page_stats_reusable.can_reuse_failure++;
15955 return KERN_INVALID_ADDRESS;
15956 }
15957
15958 /*
15959 * Examine each vm_map_entry_t in the range.
15960 */
15961 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
0a7de745 15962 entry = entry->vme_next) {
b0d623f7
A
15963 /*
15964 * Sanity check on the VM map entry.
15965 */
0a7de745 15966 if (!vm_map_entry_is_reusable(entry)) {
b0d623f7
A
15967 vm_map_unlock_read(map);
15968 vm_page_stats_reusable.can_reuse_failure++;
15969 return KERN_INVALID_ADDRESS;
15970 }
15971 }
5ba3f43e 15972
b0d623f7
A
15973 vm_map_unlock_read(map);
15974 vm_page_stats_reusable.can_reuse_success++;
15975 return KERN_SUCCESS;
15976}
15977
15978
3e170ce0
A
15979#if MACH_ASSERT
15980static kern_return_t
15981vm_map_pageout(
0a7de745
A
15982 vm_map_t map,
15983 vm_map_offset_t start,
15984 vm_map_offset_t end)
3e170ce0 15985{
0a7de745 15986 vm_map_entry_t entry;
3e170ce0
A
15987
15988 /*
15989 * The MADV_PAGEOUT operation doesn't require any changes to the
15990 * vm_map_entry_t's, so the read lock is sufficient.
15991 */
15992
15993 vm_map_lock_read(map);
15994
15995 /*
15996 * The madvise semantics require that the address range be fully
15997 * allocated with no holes. Otherwise, we're required to return
15998 * an error.
15999 */
16000
16001 if (!vm_map_range_check(map, start, end, &entry)) {
16002 vm_map_unlock_read(map);
16003 return KERN_INVALID_ADDRESS;
16004 }
16005
16006 /*
16007 * Examine each vm_map_entry_t in the range.
16008 */
16009 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
0a7de745
A
16010 entry = entry->vme_next) {
16011 vm_object_t object;
3e170ce0
A
16012
16013 /*
16014 * Sanity check on the VM map entry.
16015 */
16016 if (entry->is_sub_map) {
16017 vm_map_t submap;
16018 vm_map_offset_t submap_start;
16019 vm_map_offset_t submap_end;
16020 vm_map_entry_t submap_entry;
16021
16022 submap = VME_SUBMAP(entry);
16023 submap_start = VME_OFFSET(entry);
5ba3f43e 16024 submap_end = submap_start + (entry->vme_end -
0a7de745 16025 entry->vme_start);
3e170ce0
A
16026
16027 vm_map_lock_read(submap);
16028
0a7de745
A
16029 if (!vm_map_range_check(submap,
16030 submap_start,
16031 submap_end,
16032 &submap_entry)) {
3e170ce0
A
16033 vm_map_unlock_read(submap);
16034 vm_map_unlock_read(map);
16035 return KERN_INVALID_ADDRESS;
16036 }
16037
16038 object = VME_OBJECT(submap_entry);
16039 if (submap_entry->is_sub_map ||
16040 object == VM_OBJECT_NULL ||
16041 !object->internal) {
16042 vm_map_unlock_read(submap);
16043 continue;
16044 }
16045
16046 vm_object_pageout(object);
16047
16048 vm_map_unlock_read(submap);
16049 submap = VM_MAP_NULL;
16050 submap_entry = VM_MAP_ENTRY_NULL;
16051 continue;
16052 }
16053
16054 object = VME_OBJECT(entry);
16055 if (entry->is_sub_map ||
16056 object == VM_OBJECT_NULL ||
16057 !object->internal) {
16058 continue;
16059 }
16060
16061 vm_object_pageout(object);
16062 }
5ba3f43e 16063
3e170ce0
A
16064 vm_map_unlock_read(map);
16065 return KERN_SUCCESS;
16066}
16067#endif /* MACH_ASSERT */
16068
16069
1c79356b 16070/*
91447636
A
16071 * Routine: vm_map_entry_insert
16072 *
d9a64523 16073 * Description: This routine inserts a new vm_entry in a locked map.
1c79356b 16074 */
91447636
A
16075vm_map_entry_t
16076vm_map_entry_insert(
0a7de745
A
16077 vm_map_t map,
16078 vm_map_entry_t insp_entry,
16079 vm_map_offset_t start,
16080 vm_map_offset_t end,
16081 vm_object_t object,
16082 vm_object_offset_t offset,
a991bd8d 16083 vm_map_kernel_flags_t vmk_flags,
0a7de745
A
16084 boolean_t needs_copy,
16085 boolean_t is_shared,
16086 boolean_t in_transition,
16087 vm_prot_t cur_protection,
16088 vm_prot_t max_protection,
16089 vm_behavior_t behavior,
16090 vm_inherit_t inheritance,
f427ee49 16091 unsigned short wired_count,
0a7de745
A
16092 boolean_t no_cache,
16093 boolean_t permanent,
cb323159 16094 boolean_t no_copy_on_read,
0a7de745
A
16095 unsigned int superpage_size,
16096 boolean_t clear_map_aligned,
16097 boolean_t is_submap,
16098 boolean_t used_for_jit,
f427ee49
A
16099 int alias,
16100 boolean_t translated_allow_execute)
1c79356b 16101{
0a7de745 16102 vm_map_entry_t new_entry;
1c79356b 16103
91447636 16104 assert(insp_entry != (vm_map_entry_t)0);
d9a64523 16105 vm_map_lock_assert_exclusive(map);
1c79356b 16106
a39ff7e2 16107#if DEVELOPMENT || DEBUG
0a7de745 16108 vm_object_offset_t end_offset = 0;
a39ff7e2
A
16109 assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
16110#endif /* DEVELOPMENT || DEBUG */
16111
7ddcb079 16112 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
1c79356b 16113
39236c6e
A
16114 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
16115 new_entry->map_aligned = TRUE;
16116 } else {
16117 new_entry->map_aligned = FALSE;
16118 }
16119 if (clear_map_aligned &&
0a7de745
A
16120 (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
16121 !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
39236c6e
A
16122 new_entry->map_aligned = FALSE;
16123 }
16124
91447636
A
16125 new_entry->vme_start = start;
16126 new_entry->vme_end = end;
39236c6e 16127 if (new_entry->map_aligned) {
fe8ab488 16128 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
0a7de745 16129 VM_MAP_PAGE_MASK(map)));
39236c6e 16130 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
0a7de745 16131 VM_MAP_PAGE_MASK(map)));
f427ee49
A
16132 } else {
16133 assert(page_aligned(new_entry->vme_start));
16134 assert(page_aligned(new_entry->vme_end));
39236c6e 16135 }
e2d2fc5c 16136 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 16137
3e170ce0
A
16138 VME_OBJECT_SET(new_entry, object);
16139 VME_OFFSET_SET(new_entry, offset);
91447636 16140 new_entry->is_shared = is_shared;
fe8ab488 16141 new_entry->is_sub_map = is_submap;
91447636
A
16142 new_entry->needs_copy = needs_copy;
16143 new_entry->in_transition = in_transition;
16144 new_entry->needs_wakeup = FALSE;
16145 new_entry->inheritance = inheritance;
16146 new_entry->protection = cur_protection;
16147 new_entry->max_protection = max_protection;
16148 new_entry->behavior = behavior;
16149 new_entry->wired_count = wired_count;
16150 new_entry->user_wired_count = 0;
fe8ab488
A
16151 if (is_submap) {
16152 /*
16153 * submap: "use_pmap" means "nested".
16154 * default: false.
16155 */
16156 new_entry->use_pmap = FALSE;
16157 } else {
16158 /*
16159 * object: "use_pmap" means "use pmap accounting" for footprint.
16160 * default: true.
16161 */
16162 new_entry->use_pmap = TRUE;
16163 }
5ba3f43e 16164 VME_ALIAS_SET(new_entry, alias);
b0d623f7 16165 new_entry->zero_wired_pages = FALSE;
2d21ac55 16166 new_entry->no_cache = no_cache;
b0d623f7 16167 new_entry->permanent = permanent;
0a7de745 16168 if (superpage_size) {
39236c6e 16169 new_entry->superpage_size = TRUE;
0a7de745 16170 } else {
39236c6e 16171 new_entry->superpage_size = FALSE;
0a7de745
A
16172 }
16173 if (used_for_jit) {
f427ee49
A
16174 if (!(map->jit_entry_exists) ||
16175 VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
5ba3f43e
A
16176 new_entry->used_for_jit = TRUE;
16177 map->jit_entry_exists = TRUE;
5ba3f43e
A
16178 }
16179 } else {
16180 new_entry->used_for_jit = FALSE;
16181 }
f427ee49
A
16182 if (translated_allow_execute) {
16183 new_entry->translated_allow_execute = TRUE;
16184 } else {
16185 new_entry->translated_allow_execute = FALSE;
16186 }
d9a64523 16187 new_entry->pmap_cs_associated = FALSE;
fe8ab488 16188 new_entry->iokit_acct = FALSE;
3e170ce0
A
16189 new_entry->vme_resilient_codesign = FALSE;
16190 new_entry->vme_resilient_media = FALSE;
39037602 16191 new_entry->vme_atomic = FALSE;
cb323159 16192 new_entry->vme_no_copy_on_read = no_copy_on_read;
1c79356b 16193
91447636
A
16194 /*
16195 * Insert the new entry into the list.
16196 */
1c79356b 16197
a991bd8d 16198 vm_map_store_entry_link(map, insp_entry, new_entry, vmk_flags);
91447636
A
16199 map->size += end - start;
16200
16201 /*
16202 * Update the free space hint and the lookup hint.
16203 */
16204
0c530ab8 16205 SAVE_HINT_MAP_WRITE(map, new_entry);
91447636 16206 return new_entry;
1c79356b
A
16207}
16208
16209/*
91447636
A
16210 * Routine: vm_map_remap_extract
16211 *
f427ee49 16212 * Description: This routine returns a vm_entry list from a map.
1c79356b 16213 */
91447636
A
16214static kern_return_t
16215vm_map_remap_extract(
0a7de745
A
16216 vm_map_t map,
16217 vm_map_offset_t addr,
16218 vm_map_size_t size,
16219 boolean_t copy,
16220 struct vm_map_header *map_header,
c3c9b80d
A
16221 vm_prot_t *cur_protection, /* IN/OUT */
16222 vm_prot_t *max_protection, /* IN/OUT */
91447636 16223 /* What, no behavior? */
0a7de745 16224 vm_inherit_t inheritance,
0a7de745 16225 vm_map_kernel_flags_t vmk_flags)
1c79356b 16226{
0a7de745
A
16227 kern_return_t result;
16228 vm_map_size_t mapped_size;
16229 vm_map_size_t tmp_size;
16230 vm_map_entry_t src_entry; /* result of last map lookup */
16231 vm_map_entry_t new_entry;
16232 vm_object_offset_t offset;
16233 vm_map_offset_t map_address;
16234 vm_map_offset_t src_start; /* start of entry to map */
16235 vm_map_offset_t src_end; /* end of region to be mapped */
16236 vm_object_t object;
16237 vm_map_version_t version;
16238 boolean_t src_needs_copy;
16239 boolean_t new_entry_needs_copy;
16240 vm_map_entry_t saved_src_entry;
16241 boolean_t src_entry_was_wired;
16242 vm_prot_t max_prot_for_prot_copy;
f427ee49
A
16243 vm_map_offset_t effective_page_mask;
16244 boolean_t pageable, same_map;
c3c9b80d
A
16245 boolean_t vm_remap_legacy;
16246 vm_prot_t required_cur_prot, required_max_prot;
f427ee49
A
16247
16248 pageable = vmk_flags.vmkf_copy_pageable;
16249 same_map = vmk_flags.vmkf_copy_same_map;
16250
16251 effective_page_mask = MIN(PAGE_MASK, VM_MAP_PAGE_MASK(map));
1c79356b 16252
91447636 16253 assert(map != VM_MAP_NULL);
39236c6e 16254 assert(size != 0);
f427ee49 16255 assert(size == vm_map_round_page(size, effective_page_mask));
91447636 16256 assert(inheritance == VM_INHERIT_NONE ||
0a7de745
A
16257 inheritance == VM_INHERIT_COPY ||
16258 inheritance == VM_INHERIT_SHARE);
c3c9b80d
A
16259 assert(!(*cur_protection & ~VM_PROT_ALL));
16260 assert(!(*max_protection & ~VM_PROT_ALL));
16261 assert((*cur_protection & *max_protection) == *cur_protection);
1c79356b 16262
91447636
A
16263 /*
16264 * Compute start and end of region.
16265 */
f427ee49
A
16266 src_start = vm_map_trunc_page(addr, effective_page_mask);
16267 src_end = vm_map_round_page(src_start + size, effective_page_mask);
1c79356b 16268
91447636
A
16269 /*
16270 * Initialize map_header.
16271 */
d9a64523
A
16272 map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16273 map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
91447636
A
16274 map_header->nentries = 0;
16275 map_header->entries_pageable = pageable;
f427ee49
A
16276// map_header->page_shift = MIN(VM_MAP_PAGE_SHIFT(map), PAGE_SHIFT);
16277 map_header->page_shift = VM_MAP_PAGE_SHIFT(map);
16278 map_header->rb_head_store.rbh_root = (void *)(int)SKIP_RB_TREE;
1c79356b 16279
6d2010ae
A
16280 vm_map_store_init( map_header );
16281
d9a64523 16282 if (copy && vmk_flags.vmkf_remap_prot_copy) {
c3c9b80d
A
16283 /*
16284 * Special case for vm_map_protect(VM_PROT_COPY):
16285 * we want to set the new mappings' max protection to the
16286 * specified *max_protection...
16287 */
d9a64523 16288 max_prot_for_prot_copy = *max_protection & VM_PROT_ALL;
c3c9b80d
A
16289 /* ... but we want to use the vm_remap() legacy mode */
16290 *max_protection = VM_PROT_NONE;
16291 *cur_protection = VM_PROT_NONE;
d9a64523
A
16292 } else {
16293 max_prot_for_prot_copy = VM_PROT_NONE;
16294 }
c3c9b80d
A
16295
16296 if (*cur_protection == VM_PROT_NONE &&
16297 *max_protection == VM_PROT_NONE) {
16298 /*
16299 * vm_remap() legacy mode:
16300 * Extract all memory regions in the specified range and
16301 * collect the strictest set of protections allowed on the
16302 * entire range, so the caller knows what they can do with
16303 * the remapped range.
16304 * We start with VM_PROT_ALL and we'll remove the protections
16305 * missing from each memory region.
16306 */
16307 vm_remap_legacy = TRUE;
16308 *cur_protection = VM_PROT_ALL;
16309 *max_protection = VM_PROT_ALL;
16310 required_cur_prot = VM_PROT_NONE;
16311 required_max_prot = VM_PROT_NONE;
16312 } else {
16313 /*
16314 * vm_remap_new() mode:
16315 * Extract all memory regions in the specified range and
16316 * ensure that they have at least the protections specified
16317 * by the caller via *cur_protection and *max_protection.
16318 * The resulting mapping should have these protections.
16319 */
16320 vm_remap_legacy = FALSE;
16321 if (copy) {
16322 required_cur_prot = VM_PROT_NONE;
16323 required_max_prot = VM_PROT_READ;
16324 } else {
16325 required_cur_prot = *cur_protection;
16326 required_max_prot = *max_protection;
16327 }
16328 }
1c79356b 16329
91447636
A
16330 map_address = 0;
16331 mapped_size = 0;
16332 result = KERN_SUCCESS;
1c79356b 16333
5ba3f43e 16334 /*
91447636
A
16335 * The specified source virtual space might correspond to
16336 * multiple map entries, need to loop on them.
16337 */
16338 vm_map_lock(map);
f427ee49
A
16339 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16340 /*
16341 * This address space uses sub-pages so the range might
16342 * not be re-mappable in an address space with larger
16343 * pages. Re-assemble any broken-up VM map entries to
16344 * improve our chances of making it work.
16345 */
16346 vm_map_simplify_range(map, src_start, src_end);
16347 }
91447636 16348 while (mapped_size != size) {
0a7de745 16349 vm_map_size_t entry_size;
1c79356b 16350
91447636
A
16351 /*
16352 * Find the beginning of the region.
5ba3f43e 16353 */
0a7de745 16354 if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
91447636
A
16355 result = KERN_INVALID_ADDRESS;
16356 break;
16357 }
1c79356b 16358
91447636
A
16359 if (src_start < src_entry->vme_start ||
16360 (mapped_size && src_start != src_entry->vme_start)) {
16361 result = KERN_INVALID_ADDRESS;
16362 break;
16363 }
1c79356b 16364
91447636 16365 tmp_size = size - mapped_size;
0a7de745 16366 if (src_end > src_entry->vme_end) {
91447636 16367 tmp_size -= (src_end - src_entry->vme_end);
0a7de745 16368 }
1c79356b 16369
91447636 16370 entry_size = (vm_map_size_t)(src_entry->vme_end -
0a7de745 16371 src_entry->vme_start);
1c79356b 16372
f427ee49
A
16373 if (src_entry->is_sub_map &&
16374 vmk_flags.vmkf_copy_single_object) {
16375 vm_map_t submap;
16376 vm_map_offset_t submap_start;
16377 vm_map_size_t submap_size;
c3c9b80d 16378 boolean_t submap_needs_copy;
f427ee49
A
16379
16380 /*
c3c9b80d 16381 * No check for "required protection" on "src_entry"
f427ee49
A
16382 * because the protections that matter are the ones
16383 * on the submap's VM map entry, which will be checked
16384 * during the call to vm_map_remap_extract() below.
16385 */
16386 submap_size = src_entry->vme_end - src_start;
16387 if (submap_size > size) {
16388 submap_size = size;
16389 }
16390 submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16391 submap = VME_SUBMAP(src_entry);
c3c9b80d
A
16392 if (copy) {
16393 /*
16394 * The caller wants a copy-on-write re-mapping,
16395 * so let's extract from the submap accordingly.
16396 */
16397 submap_needs_copy = TRUE;
16398 } else if (src_entry->needs_copy) {
16399 /*
16400 * The caller wants a shared re-mapping but the
16401 * submap is mapped with "needs_copy", so its
16402 * contents can't be shared as is. Extract the
16403 * contents of the submap as "copy-on-write".
16404 * The re-mapping won't be shared with the
16405 * original mapping but this is equivalent to
16406 * what happened with the original "remap from
16407 * submap" code.
16408 * The shared region is mapped "needs_copy", for
16409 * example.
16410 */
16411 submap_needs_copy = TRUE;
16412 } else {
16413 /*
16414 * The caller wants a shared re-mapping and
16415 * this mapping can be shared (no "needs_copy"),
16416 * so let's extract from the submap accordingly.
16417 * Kernel submaps are mapped without
16418 * "needs_copy", for example.
16419 */
16420 submap_needs_copy = FALSE;
16421 }
f427ee49
A
16422 vm_map_reference(submap);
16423 vm_map_unlock(map);
16424 src_entry = NULL;
c3c9b80d
A
16425 if (vm_remap_legacy) {
16426 *cur_protection = VM_PROT_NONE;
16427 *max_protection = VM_PROT_NONE;
16428 }
16429
16430 DTRACE_VM7(remap_submap_recurse,
16431 vm_map_t, map,
16432 vm_map_offset_t, addr,
16433 vm_map_size_t, size,
16434 boolean_t, copy,
16435 vm_map_offset_t, submap_start,
16436 vm_map_size_t, submap_size,
16437 boolean_t, submap_needs_copy);
16438
f427ee49
A
16439 result = vm_map_remap_extract(submap,
16440 submap_start,
16441 submap_size,
c3c9b80d 16442 submap_needs_copy,
f427ee49
A
16443 map_header,
16444 cur_protection,
16445 max_protection,
16446 inheritance,
16447 vmk_flags);
16448 vm_map_deallocate(submap);
16449 return result;
16450 }
16451
c3c9b80d
A
16452 if (src_entry->is_sub_map) {
16453 /* protections for submap mapping are irrelevant here */
16454 } else if (((src_entry->protection & required_cur_prot) !=
16455 required_cur_prot) ||
16456 ((src_entry->max_protection & required_max_prot) !=
16457 required_max_prot)) {
f427ee49
A
16458 if (vmk_flags.vmkf_copy_single_object &&
16459 mapped_size != 0) {
16460 /*
16461 * Single object extraction.
16462 * We can't extract more with the required
16463 * protection but we've extracted some, so
16464 * stop there and declare success.
16465 * The caller should check the size of
16466 * the copy entry we've extracted.
16467 */
16468 result = KERN_SUCCESS;
16469 } else {
16470 /*
16471 * VM range extraction.
16472 * Required proctection is not available
16473 * for this part of the range: fail.
16474 */
16475 result = KERN_PROTECTION_FAILURE;
16476 }
16477 break;
16478 }
16479
c3c9b80d 16480 if (src_entry->is_sub_map) {
f427ee49
A
16481 vm_map_t submap;
16482 vm_map_offset_t submap_start;
16483 vm_map_size_t submap_size;
16484 vm_map_copy_t submap_copy;
16485 vm_prot_t submap_curprot, submap_maxprot;
c3c9b80d 16486 boolean_t submap_needs_copy;
f427ee49
A
16487
16488 /*
c3c9b80d 16489 * No check for "required protection" on "src_entry"
f427ee49
A
16490 * because the protections that matter are the ones
16491 * on the submap's VM map entry, which will be checked
16492 * during the call to vm_map_copy_extract() below.
16493 */
16494 object = VM_OBJECT_NULL;
16495 submap_copy = VM_MAP_COPY_NULL;
16496
16497 /* find equivalent range in the submap */
16498 submap = VME_SUBMAP(src_entry);
16499 submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16500 submap_size = tmp_size;
c3c9b80d
A
16501 if (copy) {
16502 /*
16503 * The caller wants a copy-on-write re-mapping,
16504 * so let's extract from the submap accordingly.
16505 */
16506 submap_needs_copy = TRUE;
16507 } else if (src_entry->needs_copy) {
16508 /*
16509 * The caller wants a shared re-mapping but the
16510 * submap is mapped with "needs_copy", so its
16511 * contents can't be shared as is. Extract the
16512 * contents of the submap as "copy-on-write".
16513 * The re-mapping won't be shared with the
16514 * original mapping but this is equivalent to
16515 * what happened with the original "remap from
16516 * submap" code.
16517 * The shared region is mapped "needs_copy", for
16518 * example.
16519 */
16520 submap_needs_copy = TRUE;
16521 } else {
16522 /*
16523 * The caller wants a shared re-mapping and
16524 * this mapping can be shared (no "needs_copy"),
16525 * so let's extract from the submap accordingly.
16526 * Kernel submaps are mapped without
16527 * "needs_copy", for example.
16528 */
16529 submap_needs_copy = FALSE;
16530 }
f427ee49
A
16531 /* extra ref to keep submap alive */
16532 vm_map_reference(submap);
16533
c3c9b80d 16534 DTRACE_VM7(remap_submap_recurse,
f427ee49
A
16535 vm_map_t, map,
16536 vm_map_offset_t, addr,
16537 vm_map_size_t, size,
16538 boolean_t, copy,
16539 vm_map_offset_t, submap_start,
c3c9b80d
A
16540 vm_map_size_t, submap_size,
16541 boolean_t, submap_needs_copy);
f427ee49
A
16542
16543 /*
16544 * The map can be safely unlocked since we
16545 * already hold a reference on the submap.
16546 *
16547 * No timestamp since we don't care if the map
16548 * gets modified while we're down in the submap.
16549 * We'll resume the extraction at src_start + tmp_size
16550 * anyway.
16551 */
16552 vm_map_unlock(map);
16553 src_entry = NULL; /* not valid once map is unlocked */
16554
c3c9b80d
A
16555 if (vm_remap_legacy) {
16556 submap_curprot = VM_PROT_NONE;
16557 submap_maxprot = VM_PROT_NONE;
16558 if (max_prot_for_prot_copy) {
16559 submap_maxprot = max_prot_for_prot_copy;
16560 }
16561 } else {
16562 assert(!max_prot_for_prot_copy);
16563 submap_curprot = *cur_protection;
16564 submap_maxprot = *max_protection;
16565 }
f427ee49
A
16566 result = vm_map_copy_extract(submap,
16567 submap_start,
16568 submap_size,
c3c9b80d 16569 submap_needs_copy,
f427ee49
A
16570 &submap_copy,
16571 &submap_curprot,
16572 &submap_maxprot,
16573 inheritance,
16574 vmk_flags);
16575
16576 /* release extra ref on submap */
16577 vm_map_deallocate(submap);
16578 submap = VM_MAP_NULL;
16579
16580 if (result != KERN_SUCCESS) {
16581 vm_map_lock(map);
16582 break;
16583 }
16584
16585 /* transfer submap_copy entries to map_header */
16586 while (vm_map_copy_first_entry(submap_copy) !=
16587 vm_map_copy_to_entry(submap_copy)) {
16588 vm_map_entry_t copy_entry;
16589 vm_map_size_t copy_entry_size;
16590
16591 copy_entry = vm_map_copy_first_entry(submap_copy);
16592 assert(!copy_entry->is_sub_map);
c3c9b80d
A
16593 object = VME_OBJECT(copy_entry);
16594
16595 /*
16596 * Prevent kernel_object from being exposed to
16597 * user space.
16598 */
16599 if (__improbable(object == kernel_object)) {
16600 printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
16601 proc_selfpid(),
16602 (current_task()->bsd_info
16603 ? proc_name_address(current_task()->bsd_info)
16604 : "?"));
16605 DTRACE_VM(extract_kernel_only);
16606 result = KERN_INVALID_RIGHT;
16607 vm_map_copy_discard(submap_copy);
16608 submap_copy = VM_MAP_COPY_NULL;
16609 vm_map_lock(map);
16610 break;
16611 }
16612
f427ee49
A
16613 vm_map_copy_entry_unlink(submap_copy, copy_entry);
16614 copy_entry_size = copy_entry->vme_end - copy_entry->vme_start;
16615 copy_entry->vme_start = map_address;
16616 copy_entry->vme_end = map_address + copy_entry_size;
16617 map_address += copy_entry_size;
16618 mapped_size += copy_entry_size;
16619 src_start += copy_entry_size;
16620 assert(src_start <= src_end);
16621 _vm_map_store_entry_link(map_header,
16622 map_header->links.prev,
16623 copy_entry);
16624 }
16625 /* done with submap_copy */
16626 vm_map_copy_discard(submap_copy);
16627
c3c9b80d
A
16628 if (vm_remap_legacy) {
16629 *cur_protection &= submap_curprot;
16630 *max_protection &= submap_maxprot;
16631 }
f427ee49
A
16632
16633 /* re-acquire the map lock and continue to next entry */
16634 vm_map_lock(map);
16635 continue;
91447636 16636 } else {
3e170ce0 16637 object = VME_OBJECT(src_entry);
c3c9b80d
A
16638
16639 /*
16640 * Prevent kernel_object from being exposed to
16641 * user space.
16642 */
16643 if (__improbable(object == kernel_object)) {
16644 printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
16645 proc_selfpid(),
16646 (current_task()->bsd_info
16647 ? proc_name_address(current_task()->bsd_info)
16648 : "?"));
16649 DTRACE_VM(extract_kernel_only);
16650 result = KERN_INVALID_RIGHT;
16651 break;
16652 }
16653
fe8ab488
A
16654 if (src_entry->iokit_acct) {
16655 /*
16656 * This entry uses "IOKit accounting".
16657 */
16658 } else if (object != VM_OBJECT_NULL &&
cb323159
A
16659 (object->purgable != VM_PURGABLE_DENY ||
16660 object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
fe8ab488
A
16661 /*
16662 * Purgeable objects have their own accounting:
16663 * no pmap accounting for them.
16664 */
a39ff7e2 16665 assertf(!src_entry->use_pmap,
0a7de745
A
16666 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16667 map,
16668 src_entry,
16669 (uint64_t)src_entry->vme_start,
16670 (uint64_t)src_entry->vme_end,
16671 src_entry->protection,
16672 src_entry->max_protection,
16673 VME_ALIAS(src_entry));
fe8ab488
A
16674 } else {
16675 /*
16676 * Not IOKit or purgeable:
16677 * must be accounted by pmap stats.
16678 */
a39ff7e2 16679 assertf(src_entry->use_pmap,
0a7de745
A
16680 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16681 map,
16682 src_entry,
16683 (uint64_t)src_entry->vme_start,
16684 (uint64_t)src_entry->vme_end,
16685 src_entry->protection,
16686 src_entry->max_protection,
16687 VME_ALIAS(src_entry));
fe8ab488 16688 }
55e303ae 16689
91447636 16690 if (object == VM_OBJECT_NULL) {
f427ee49 16691 assert(!src_entry->needs_copy);
91447636 16692 object = vm_object_allocate(entry_size);
3e170ce0
A
16693 VME_OFFSET_SET(src_entry, 0);
16694 VME_OBJECT_SET(src_entry, object);
a39ff7e2 16695 assert(src_entry->use_pmap);
c3c9b80d 16696 assert(!map->mapped_in_other_pmaps);
f427ee49
A
16697 } else if (src_entry->wired_count ||
16698 object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
91447636 16699 /*
f427ee49
A
16700 * A wired memory region should not have
16701 * any pending copy-on-write and needs to
16702 * keep pointing at the VM object that
16703 * contains the wired pages.
16704 * If we're sharing this memory (copy=false),
16705 * we'll share this VM object.
16706 * If we're copying this memory (copy=true),
16707 * we'll call vm_object_copy_slowly() below
16708 * and use the new VM object for the remapping.
16709 *
16710 * Or, we are already using an asymmetric
16711 * copy, and therefore we already have
16712 * the right object.
91447636
A
16713 */
16714 assert(!src_entry->needs_copy);
16715 } else if (src_entry->needs_copy || object->shadowed ||
0a7de745
A
16716 (object->internal && !object->true_share &&
16717 !src_entry->is_shared &&
16718 object->vo_size > entry_size)) {
3e170ce0 16719 VME_OBJECT_SHADOW(src_entry, entry_size);
a39ff7e2 16720 assert(src_entry->use_pmap);
1c79356b 16721
91447636
A
16722 if (!src_entry->needs_copy &&
16723 (src_entry->protection & VM_PROT_WRITE)) {
0a7de745 16724 vm_prot_t prot;
0c530ab8 16725
f427ee49 16726 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
5ba3f43e 16727
0a7de745 16728 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 16729
3e170ce0 16730 if (override_nx(map,
0a7de745
A
16731 VME_ALIAS(src_entry))
16732 && prot) {
16733 prot |= VM_PROT_EXECUTE;
16734 }
2d21ac55 16735
f427ee49 16736 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
5ba3f43e 16737
0a7de745 16738 if (map->mapped_in_other_pmaps) {
2d21ac55 16739 vm_object_pmap_protect(
3e170ce0
A
16740 VME_OBJECT(src_entry),
16741 VME_OFFSET(src_entry),
2d21ac55
A
16742 entry_size,
16743 PMAP_NULL,
f427ee49 16744 PAGE_SIZE,
0c530ab8 16745 src_entry->vme_start,
0c530ab8 16746 prot);
f427ee49
A
16747#if MACH_ASSERT
16748 } else if (__improbable(map->pmap == PMAP_NULL)) {
16749 extern boolean_t vm_tests_in_progress;
16750 assert(vm_tests_in_progress);
16751 /*
16752 * Some VM tests (in vm_tests.c)
16753 * sometimes want to use a VM
16754 * map without a pmap.
16755 * Otherwise, this should never
16756 * happen.
16757 */
16758#endif /* MACH_ASSERT */
2d21ac55
A
16759 } else {
16760 pmap_protect(vm_map_pmap(map),
0a7de745
A
16761 src_entry->vme_start,
16762 src_entry->vme_end,
16763 prot);
91447636
A
16764 }
16765 }
1c79356b 16766
3e170ce0 16767 object = VME_OBJECT(src_entry);
91447636
A
16768 src_entry->needs_copy = FALSE;
16769 }
1c79356b 16770
1c79356b 16771
91447636 16772 vm_object_lock(object);
2d21ac55 16773 vm_object_reference_locked(object); /* object ref. for new entry */
f427ee49 16774 assert(!src_entry->needs_copy);
5ba3f43e 16775 if (object->copy_strategy ==
2d21ac55 16776 MEMORY_OBJECT_COPY_SYMMETRIC) {
f427ee49
A
16777 /*
16778 * If we want to share this object (copy==0),
16779 * it needs to be COPY_DELAY.
16780 * If we want to copy this object (copy==1),
16781 * we can't just set "needs_copy" on our side
16782 * and expect the other side to do the same
16783 * (symmetrically), so we can't let the object
16784 * stay COPY_SYMMETRIC.
16785 * So we always switch from COPY_SYMMETRIC to
16786 * COPY_DELAY.
16787 */
5ba3f43e 16788 object->copy_strategy =
0a7de745 16789 MEMORY_OBJECT_COPY_DELAY;
c3c9b80d 16790 object->true_share = TRUE;
91447636
A
16791 }
16792 vm_object_unlock(object);
16793 }
1c79356b 16794
3e170ce0 16795 offset = (VME_OFFSET(src_entry) +
0a7de745 16796 (src_start - src_entry->vme_start));
1c79356b 16797
7ddcb079 16798 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
f427ee49 16799 vm_map_entry_copy(map, new_entry, src_entry);
fe8ab488
A
16800 if (new_entry->is_sub_map) {
16801 /* clr address space specifics */
16802 new_entry->use_pmap = FALSE;
a39ff7e2
A
16803 } else if (copy) {
16804 /*
16805 * We're dealing with a copy-on-write operation,
16806 * so the resulting mapping should not inherit the
16807 * original mapping's accounting settings.
16808 * "use_pmap" should be reset to its default (TRUE)
16809 * so that the new mapping gets accounted for in
16810 * the task's memory footprint.
16811 */
16812 new_entry->use_pmap = TRUE;
fe8ab488 16813 }
a39ff7e2
A
16814 /* "iokit_acct" was cleared in vm_map_entry_copy() */
16815 assert(!new_entry->iokit_acct);
1c79356b 16816
39236c6e
A
16817 new_entry->map_aligned = FALSE;
16818
91447636
A
16819 new_entry->vme_start = map_address;
16820 new_entry->vme_end = map_address + tmp_size;
e2d2fc5c 16821 assert(new_entry->vme_start < new_entry->vme_end);
5c9f4661
A
16822 if (copy && vmk_flags.vmkf_remap_prot_copy) {
16823 /*
16824 * Remapping for vm_map_protect(VM_PROT_COPY)
16825 * to convert a read-only mapping into a
16826 * copy-on-write version of itself but
16827 * with write access:
0a7de745 16828 * keep the original inheritance and add
5c9f4661
A
16829 * VM_PROT_WRITE to the max protection.
16830 */
16831 new_entry->inheritance = src_entry->inheritance;
d9a64523 16832 new_entry->protection &= max_prot_for_prot_copy;
5c9f4661
A
16833 new_entry->max_protection |= VM_PROT_WRITE;
16834 } else {
16835 new_entry->inheritance = inheritance;
c3c9b80d
A
16836 if (!vm_remap_legacy) {
16837 new_entry->protection = *cur_protection;
16838 new_entry->max_protection = *max_protection;
16839 }
5c9f4661 16840 }
3e170ce0 16841 VME_OFFSET_SET(new_entry, offset);
0a7de745 16842
91447636
A
16843 /*
16844 * The new region has to be copied now if required.
16845 */
0a7de745 16846RestartCopy:
91447636 16847 if (!copy) {
cb323159
A
16848 if (src_entry->used_for_jit == TRUE) {
16849 if (same_map) {
f427ee49 16850 } else if (!VM_MAP_POLICY_ALLOW_JIT_SHARING(map)) {
cb323159
A
16851 /*
16852 * Cannot allow an entry describing a JIT
16853 * region to be shared across address spaces.
16854 */
16855 result = KERN_INVALID_ARGUMENT;
16856 break;
cb323159 16857 }
316670eb 16858 }
cb323159 16859
91447636
A
16860 src_entry->is_shared = TRUE;
16861 new_entry->is_shared = TRUE;
0a7de745 16862 if (!(new_entry->is_sub_map)) {
91447636 16863 new_entry->needs_copy = FALSE;
0a7de745 16864 }
91447636
A
16865 } else if (src_entry->is_sub_map) {
16866 /* make this a COW sub_map if not already */
3e170ce0 16867 assert(new_entry->wired_count == 0);
91447636
A
16868 new_entry->needs_copy = TRUE;
16869 object = VM_OBJECT_NULL;
16870 } else if (src_entry->wired_count == 0 &&
f427ee49 16871 !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) &&
cb323159 16872 vm_object_copy_quickly(VME_OBJECT_PTR(new_entry),
0a7de745
A
16873 VME_OFFSET(new_entry),
16874 (new_entry->vme_end -
16875 new_entry->vme_start),
16876 &src_needs_copy,
16877 &new_entry_needs_copy)) {
91447636
A
16878 new_entry->needs_copy = new_entry_needs_copy;
16879 new_entry->is_shared = FALSE;
a39ff7e2 16880 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
1c79356b 16881
91447636
A
16882 /*
16883 * Handle copy_on_write semantics.
16884 */
16885 if (src_needs_copy && !src_entry->needs_copy) {
0a7de745 16886 vm_prot_t prot;
0c530ab8 16887
f427ee49 16888 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
5ba3f43e 16889
0c530ab8 16890 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 16891
3e170ce0 16892 if (override_nx(map,
0a7de745
A
16893 VME_ALIAS(src_entry))
16894 && prot) {
16895 prot |= VM_PROT_EXECUTE;
16896 }
2d21ac55 16897
f427ee49 16898 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
5ba3f43e 16899
91447636 16900 vm_object_pmap_protect(object,
0a7de745
A
16901 offset,
16902 entry_size,
16903 ((src_entry->is_shared
16904 || map->mapped_in_other_pmaps) ?
16905 PMAP_NULL : map->pmap),
f427ee49 16906 VM_MAP_PAGE_SIZE(map),
0a7de745
A
16907 src_entry->vme_start,
16908 prot);
1c79356b 16909
3e170ce0 16910 assert(src_entry->wired_count == 0);
91447636
A
16911 src_entry->needs_copy = TRUE;
16912 }
16913 /*
16914 * Throw away the old object reference of the new entry.
16915 */
16916 vm_object_deallocate(object);
91447636
A
16917 } else {
16918 new_entry->is_shared = FALSE;
a39ff7e2
A
16919 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
16920
16921 src_entry_was_wired = (src_entry->wired_count > 0);
16922 saved_src_entry = src_entry;
16923 src_entry = VM_MAP_ENTRY_NULL;
1c79356b 16924
91447636
A
16925 /*
16926 * The map can be safely unlocked since we
16927 * already hold a reference on the object.
16928 *
16929 * Record the timestamp of the map for later
16930 * verification, and unlock the map.
16931 */
16932 version.main_timestamp = map->timestamp;
0a7de745 16933 vm_map_unlock(map); /* Increments timestamp once! */
55e303ae 16934
91447636
A
16935 /*
16936 * Perform the copy.
16937 */
f427ee49
A
16938 if (src_entry_was_wired > 0 ||
16939 (debug4k_no_cow_copyin &&
16940 VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT)) {
91447636
A
16941 vm_object_lock(object);
16942 result = vm_object_copy_slowly(
2d21ac55
A
16943 object,
16944 offset,
5ba3f43e
A
16945 (new_entry->vme_end -
16946 new_entry->vme_start),
2d21ac55 16947 THREAD_UNINT,
cb323159 16948 VME_OBJECT_PTR(new_entry));
1c79356b 16949
f427ee49 16950 VME_OFFSET_SET(new_entry, offset - vm_object_trunc_page(offset));
91447636
A
16951 new_entry->needs_copy = FALSE;
16952 } else {
3e170ce0
A
16953 vm_object_offset_t new_offset;
16954
16955 new_offset = VME_OFFSET(new_entry);
91447636 16956 result = vm_object_copy_strategically(
2d21ac55
A
16957 object,
16958 offset,
5ba3f43e
A
16959 (new_entry->vme_end -
16960 new_entry->vme_start),
cb323159 16961 VME_OBJECT_PTR(new_entry),
3e170ce0 16962 &new_offset,
2d21ac55 16963 &new_entry_needs_copy);
3e170ce0
A
16964 if (new_offset != VME_OFFSET(new_entry)) {
16965 VME_OFFSET_SET(new_entry, new_offset);
16966 }
1c79356b 16967
91447636
A
16968 new_entry->needs_copy = new_entry_needs_copy;
16969 }
1c79356b 16970
91447636
A
16971 /*
16972 * Throw away the old object reference of the new entry.
16973 */
16974 vm_object_deallocate(object);
1c79356b 16975
91447636
A
16976 if (result != KERN_SUCCESS &&
16977 result != KERN_MEMORY_RESTART_COPY) {
16978 _vm_map_entry_dispose(map_header, new_entry);
39037602 16979 vm_map_lock(map);
91447636
A
16980 break;
16981 }
1c79356b 16982
91447636
A
16983 /*
16984 * Verify that the map has not substantially
16985 * changed while the copy was being made.
16986 */
1c79356b 16987
91447636
A
16988 vm_map_lock(map);
16989 if (version.main_timestamp + 1 != map->timestamp) {
16990 /*
16991 * Simple version comparison failed.
16992 *
16993 * Retry the lookup and verify that the
16994 * same object/offset are still present.
16995 */
a39ff7e2 16996 saved_src_entry = VM_MAP_ENTRY_NULL;
3e170ce0 16997 vm_object_deallocate(VME_OBJECT(new_entry));
91447636 16998 _vm_map_entry_dispose(map_header, new_entry);
0a7de745 16999 if (result == KERN_MEMORY_RESTART_COPY) {
91447636 17000 result = KERN_SUCCESS;
0a7de745 17001 }
91447636
A
17002 continue;
17003 }
a39ff7e2
A
17004 /* map hasn't changed: src_entry is still valid */
17005 src_entry = saved_src_entry;
17006 saved_src_entry = VM_MAP_ENTRY_NULL;
1c79356b 17007
91447636
A
17008 if (result == KERN_MEMORY_RESTART_COPY) {
17009 vm_object_reference(object);
17010 goto RestartCopy;
17011 }
17012 }
1c79356b 17013
6d2010ae 17014 _vm_map_store_entry_link(map_header,
0a7de745 17015 map_header->links.prev, new_entry);
1c79356b 17016
c3c9b80d
A
17017 /* protections for submap mapping are irrelevant here */
17018 if (vm_remap_legacy && !src_entry->is_sub_map) {
6d2010ae
A
17019 *cur_protection &= src_entry->protection;
17020 *max_protection &= src_entry->max_protection;
17021 }
f427ee49 17022
91447636
A
17023 map_address += tmp_size;
17024 mapped_size += tmp_size;
17025 src_start += tmp_size;
f427ee49
A
17026
17027 if (vmk_flags.vmkf_copy_single_object) {
17028 if (mapped_size != size) {
17029 DEBUG4K_SHARE("map %p addr 0x%llx size 0x%llx clipped copy at mapped_size 0x%llx\n", map, (uint64_t)addr, (uint64_t)size, (uint64_t)mapped_size);
17030 if (src_entry->vme_next != vm_map_to_entry(map) &&
17031 VME_OBJECT(src_entry->vme_next) == VME_OBJECT(src_entry)) {
17032 /* XXX TODO4K */
17033 DEBUG4K_ERROR("could have extended copy to next entry...\n");
17034 }
17035 }
17036 break;
17037 }
91447636 17038 } /* end while */
1c79356b 17039
91447636
A
17040 vm_map_unlock(map);
17041 if (result != KERN_SUCCESS) {
17042 /*
17043 * Free all allocated elements.
17044 */
17045 for (src_entry = map_header->links.next;
0a7de745
A
17046 src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
17047 src_entry = new_entry) {
91447636 17048 new_entry = src_entry->vme_next;
6d2010ae 17049 _vm_map_store_entry_unlink(map_header, src_entry);
39236c6e 17050 if (src_entry->is_sub_map) {
3e170ce0 17051 vm_map_deallocate(VME_SUBMAP(src_entry));
39236c6e 17052 } else {
3e170ce0 17053 vm_object_deallocate(VME_OBJECT(src_entry));
39236c6e 17054 }
91447636
A
17055 _vm_map_entry_dispose(map_header, src_entry);
17056 }
17057 }
f427ee49
A
17058 return result;
17059}
17060
17061bool
17062vm_map_is_exotic(
17063 vm_map_t map)
17064{
17065 return VM_MAP_IS_EXOTIC(map);
17066}
17067
17068bool
17069vm_map_is_alien(
17070 vm_map_t map)
17071{
17072 return VM_MAP_IS_ALIEN(map);
17073}
17074
17075#if XNU_TARGET_OS_OSX
17076void
17077vm_map_mark_alien(
17078 vm_map_t map)
17079{
17080 vm_map_lock(map);
17081 map->is_alien = true;
17082 vm_map_unlock(map);
17083}
c3c9b80d
A
17084
17085void
17086vm_map_single_jit(
17087 vm_map_t map)
17088{
17089 vm_map_lock(map);
17090 map->single_jit = true;
17091 vm_map_unlock(map);
17092}
f427ee49
A
17093#endif /* XNU_TARGET_OS_OSX */
17094
17095void vm_map_copy_to_physcopy(vm_map_copy_t copy_map, vm_map_t target_map);
17096void
17097vm_map_copy_to_physcopy(
17098 vm_map_copy_t copy_map,
17099 vm_map_t target_map)
17100{
17101 vm_map_size_t size;
17102 vm_map_entry_t entry;
17103 vm_map_entry_t new_entry;
17104 vm_object_t new_object;
17105 unsigned int pmap_flags;
17106 pmap_t new_pmap;
17107 vm_map_t new_map;
17108 vm_map_address_t src_start, src_end, src_cur;
17109 vm_map_address_t dst_start, dst_end, dst_cur;
17110 kern_return_t kr;
17111 void *kbuf;
17112
17113 /*
17114 * Perform the equivalent of vm_allocate() and memcpy().
17115 * Replace the mappings in "copy_map" with the newly allocated mapping.
17116 */
17117 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) BEFORE\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17118
17119 assert(copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_MASK(target_map));
17120
17121 /* allocate new VM object */
17122 size = VM_MAP_ROUND_PAGE(copy_map->size, PAGE_MASK);
17123 new_object = vm_object_allocate(size);
17124 assert(new_object);
17125
17126 /* allocate new VM map entry */
17127 new_entry = vm_map_copy_entry_create(copy_map, FALSE);
17128 assert(new_entry);
17129
17130 /* finish initializing new VM map entry */
17131 new_entry->protection = VM_PROT_DEFAULT;
17132 new_entry->max_protection = VM_PROT_DEFAULT;
17133 new_entry->use_pmap = TRUE;
17134
17135 /* make new VM map entry point to new VM object */
17136 new_entry->vme_start = 0;
17137 new_entry->vme_end = size;
17138 VME_OBJECT_SET(new_entry, new_object);
17139 VME_OFFSET_SET(new_entry, 0);
17140
17141 /* create a new pmap to map "copy_map" */
17142 pmap_flags = 0;
17143 assert(copy_map->cpy_hdr.page_shift == FOURK_PAGE_SHIFT);
17144#if PMAP_CREATE_FORCE_4K_PAGES
17145 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
17146#endif /* PMAP_CREATE_FORCE_4K_PAGES */
17147 pmap_flags |= PMAP_CREATE_64BIT;
17148 new_pmap = pmap_create_options(NULL, (vm_map_size_t)0, pmap_flags);
17149 assert(new_pmap);
17150
17151 /* create a new pageable VM map to map "copy_map" */
17152 new_map = vm_map_create(new_pmap, 0, MACH_VM_MAX_ADDRESS, TRUE);
17153 assert(new_map);
17154 vm_map_set_page_shift(new_map, copy_map->cpy_hdr.page_shift);
17155
17156 /* map "copy_map" in the new VM map */
17157 src_start = 0;
17158 kr = vm_map_copyout_internal(
17159 new_map,
17160 &src_start,
17161 copy_map,
17162 copy_map->size,
17163 FALSE, /* consume_on_success */
17164 VM_PROT_DEFAULT,
17165 VM_PROT_DEFAULT,
17166 VM_INHERIT_DEFAULT);
17167 assert(kr == KERN_SUCCESS);
17168 src_end = src_start + copy_map->size;
17169
17170 /* map "new_object" in the new VM map */
17171 vm_object_reference(new_object);
17172 dst_start = 0;
17173 kr = vm_map_enter(new_map,
17174 &dst_start,
17175 size,
17176 0, /* mask */
17177 VM_FLAGS_ANYWHERE,
17178 VM_MAP_KERNEL_FLAGS_NONE,
17179 VM_KERN_MEMORY_OSFMK,
17180 new_object,
17181 0, /* offset */
17182 FALSE, /* needs copy */
17183 VM_PROT_DEFAULT,
17184 VM_PROT_DEFAULT,
17185 VM_INHERIT_DEFAULT);
17186 assert(kr == KERN_SUCCESS);
17187 dst_end = dst_start + size;
17188
17189 /* get a kernel buffer */
17190 kbuf = kheap_alloc(KHEAP_TEMP, PAGE_SIZE, Z_WAITOK);
17191 assert(kbuf);
17192
17193 /* physically copy "copy_map" mappings to new VM object */
17194 for (src_cur = src_start, dst_cur = dst_start;
17195 src_cur < src_end;
17196 src_cur += PAGE_SIZE, dst_cur += PAGE_SIZE) {
17197 vm_size_t bytes;
17198
17199 bytes = PAGE_SIZE;
17200 if (src_cur + PAGE_SIZE > src_end) {
17201 /* partial copy for last page */
17202 bytes = src_end - src_cur;
17203 assert(bytes > 0 && bytes < PAGE_SIZE);
17204 /* rest of dst page should be zero-filled */
17205 }
17206 /* get bytes from src mapping */
17207 kr = copyinmap(new_map, src_cur, kbuf, bytes);
17208 if (kr != KERN_SUCCESS) {
17209 DEBUG4K_COPY("copyinmap(%p, 0x%llx, %p, 0x%llx) kr 0x%x\n", new_map, (uint64_t)src_cur, kbuf, (uint64_t)bytes, kr);
17210 }
17211 /* put bytes in dst mapping */
17212 assert(dst_cur < dst_end);
17213 assert(dst_cur + bytes <= dst_end);
17214 kr = copyoutmap(new_map, kbuf, dst_cur, bytes);
17215 if (kr != KERN_SUCCESS) {
17216 DEBUG4K_COPY("copyoutmap(%p, %p, 0x%llx, 0x%llx) kr 0x%x\n", new_map, kbuf, (uint64_t)dst_cur, (uint64_t)bytes, kr);
17217 }
17218 }
17219
17220 /* free kernel buffer */
17221 kheap_free(KHEAP_TEMP, kbuf, PAGE_SIZE);
17222 kbuf = NULL;
17223
17224 /* destroy new map */
17225 vm_map_destroy(new_map, VM_MAP_REMOVE_NO_FLAGS);
17226 new_map = VM_MAP_NULL;
17227
17228 /* dispose of the old map entries in "copy_map" */
17229 while (vm_map_copy_first_entry(copy_map) !=
17230 vm_map_copy_to_entry(copy_map)) {
17231 entry = vm_map_copy_first_entry(copy_map);
17232 vm_map_copy_entry_unlink(copy_map, entry);
17233 if (entry->is_sub_map) {
17234 vm_map_deallocate(VME_SUBMAP(entry));
17235 } else {
17236 vm_object_deallocate(VME_OBJECT(entry));
17237 }
17238 vm_map_copy_entry_dispose(copy_map, entry);
17239 }
17240
17241 /* change "copy_map"'s page_size to match "target_map" */
17242 copy_map->cpy_hdr.page_shift = VM_MAP_PAGE_SHIFT(target_map);
17243 copy_map->offset = 0;
17244 copy_map->size = size;
17245
17246 /* insert new map entry in "copy_map" */
17247 assert(vm_map_copy_last_entry(copy_map) == vm_map_copy_to_entry(copy_map));
17248 vm_map_copy_entry_link(copy_map, vm_map_copy_last_entry(copy_map), new_entry);
17249
17250 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) AFTER\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17251}
17252
17253void
17254vm_map_copy_adjust_get_target_copy_map(
17255 vm_map_copy_t copy_map,
17256 vm_map_copy_t *target_copy_map_p);
17257void
17258vm_map_copy_adjust_get_target_copy_map(
17259 vm_map_copy_t copy_map,
17260 vm_map_copy_t *target_copy_map_p)
17261{
17262 vm_map_copy_t target_copy_map;
17263 vm_map_entry_t entry, target_entry;
17264
17265 if (*target_copy_map_p != VM_MAP_COPY_NULL) {
17266 /* the caller already has a "target_copy_map": use it */
17267 return;
17268 }
17269
17270 /* the caller wants us to create a new copy of "copy_map" */
17271 target_copy_map = vm_map_copy_allocate();
17272 target_copy_map->type = copy_map->type;
17273 assert(target_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17274 target_copy_map->offset = copy_map->offset;
17275 target_copy_map->size = copy_map->size;
17276 target_copy_map->cpy_hdr.page_shift = copy_map->cpy_hdr.page_shift;
17277 vm_map_store_init(&target_copy_map->cpy_hdr);
17278 for (entry = vm_map_copy_first_entry(copy_map);
17279 entry != vm_map_copy_to_entry(copy_map);
17280 entry = entry->vme_next) {
17281 target_entry = vm_map_copy_entry_create(target_copy_map, FALSE);
17282 vm_map_entry_copy_full(target_entry, entry);
17283 if (target_entry->is_sub_map) {
17284 vm_map_reference(VME_SUBMAP(target_entry));
17285 } else {
17286 vm_object_reference(VME_OBJECT(target_entry));
17287 }
17288 vm_map_copy_entry_link(
17289 target_copy_map,
17290 vm_map_copy_last_entry(target_copy_map),
17291 target_entry);
17292 }
17293 entry = VM_MAP_ENTRY_NULL;
17294 *target_copy_map_p = target_copy_map;
17295}
17296
17297void
17298vm_map_copy_trim(
17299 vm_map_copy_t copy_map,
17300 int new_page_shift,
17301 vm_map_offset_t trim_start,
17302 vm_map_offset_t trim_end);
17303void
17304vm_map_copy_trim(
17305 vm_map_copy_t copy_map,
17306 int new_page_shift,
17307 vm_map_offset_t trim_start,
17308 vm_map_offset_t trim_end)
17309{
17310 int copy_page_shift;
17311 vm_map_entry_t entry, next_entry;
17312
17313 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17314 assert(copy_map->cpy_hdr.nentries > 0);
17315
17316 trim_start += vm_map_copy_first_entry(copy_map)->vme_start;
17317 trim_end += vm_map_copy_first_entry(copy_map)->vme_start;
17318
17319 /* use the new page_shift to do the clipping */
17320 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17321 copy_map->cpy_hdr.page_shift = new_page_shift;
17322
17323 for (entry = vm_map_copy_first_entry(copy_map);
17324 entry != vm_map_copy_to_entry(copy_map);
17325 entry = next_entry) {
17326 next_entry = entry->vme_next;
17327 if (entry->vme_end <= trim_start) {
17328 /* entry fully before trim range: skip */
17329 continue;
17330 }
17331 if (entry->vme_start >= trim_end) {
17332 /* entry fully after trim range: done */
17333 break;
17334 }
17335 /* clip entry if needed */
17336 vm_map_copy_clip_start(copy_map, entry, trim_start);
17337 vm_map_copy_clip_end(copy_map, entry, trim_end);
17338 /* dispose of entry */
17339 copy_map->size -= entry->vme_end - entry->vme_start;
17340 vm_map_copy_entry_unlink(copy_map, entry);
17341 if (entry->is_sub_map) {
17342 vm_map_deallocate(VME_SUBMAP(entry));
17343 } else {
17344 vm_object_deallocate(VME_OBJECT(entry));
17345 }
17346 vm_map_copy_entry_dispose(copy_map, entry);
17347 entry = VM_MAP_ENTRY_NULL;
17348 }
17349
17350 /* restore copy_map's original page_shift */
17351 copy_map->cpy_hdr.page_shift = copy_page_shift;
17352}
17353
17354/*
17355 * Make any necessary adjustments to "copy_map" to allow it to be
17356 * mapped into "target_map".
17357 * If no changes were necessary, "target_copy_map" points to the
17358 * untouched "copy_map".
17359 * If changes are necessary, changes will be made to "target_copy_map".
17360 * If "target_copy_map" was NULL, we create a new "vm_map_copy_t" and
17361 * copy the original "copy_map" to it before applying the changes.
17362 * The caller should discard "target_copy_map" if it's not the same as
17363 * the original "copy_map".
17364 */
17365/* TODO4K: also adjust to sub-range in the copy_map -> add start&end? */
17366kern_return_t
17367vm_map_copy_adjust_to_target(
17368 vm_map_copy_t src_copy_map,
17369 vm_map_offset_t offset,
17370 vm_map_size_t size,
17371 vm_map_t target_map,
17372 boolean_t copy,
17373 vm_map_copy_t *target_copy_map_p,
17374 vm_map_offset_t *overmap_start_p,
17375 vm_map_offset_t *overmap_end_p,
17376 vm_map_offset_t *trimmed_start_p)
17377{
17378 vm_map_copy_t copy_map, target_copy_map;
17379 vm_map_size_t target_size;
17380 vm_map_size_t src_copy_map_size;
17381 vm_map_size_t overmap_start, overmap_end;
17382 int misalignments;
17383 vm_map_entry_t entry, target_entry;
17384 vm_map_offset_t addr_adjustment;
17385 vm_map_offset_t new_start, new_end;
17386 int copy_page_mask, target_page_mask;
17387 int copy_page_shift, target_page_shift;
17388 vm_map_offset_t trimmed_end;
17389
17390 /*
17391 * Assert that the vm_map_copy is coming from the right
17392 * zone and hasn't been forged
17393 */
17394 vm_map_copy_require(src_copy_map);
17395 assert(src_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17396
17397 /*
17398 * Start working with "src_copy_map" but we'll switch
17399 * to "target_copy_map" as soon as we start making adjustments.
17400 */
17401 copy_map = src_copy_map;
17402 src_copy_map_size = src_copy_map->size;
17403
17404 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17405 copy_page_mask = VM_MAP_COPY_PAGE_MASK(copy_map);
17406 target_page_shift = VM_MAP_PAGE_SHIFT(target_map);
17407 target_page_mask = VM_MAP_PAGE_MASK(target_map);
17408
17409 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p...\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, *target_copy_map_p);
17410
17411 target_copy_map = *target_copy_map_p;
17412 if (target_copy_map != VM_MAP_COPY_NULL) {
17413 vm_map_copy_require(target_copy_map);
17414 }
17415
17416 if (offset + size > copy_map->size) {
17417 DEBUG4K_ERROR("copy_map %p (%d->%d) copy_map->size 0x%llx offset 0x%llx size 0x%llx KERN_INVALID_ARGUMENT\n", copy_map, copy_page_shift, target_page_shift, (uint64_t)copy_map->size, (uint64_t)offset, (uint64_t)size);
17418 return KERN_INVALID_ARGUMENT;
17419 }
17420
17421 /* trim the end */
17422 trimmed_end = 0;
17423 new_end = VM_MAP_ROUND_PAGE(offset + size, target_page_mask);
17424 if (new_end < copy_map->size) {
17425 trimmed_end = src_copy_map_size - new_end;
17426 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim end from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)new_end, (uint64_t)copy_map->size);
17427 /* get "target_copy_map" if needed and adjust it */
17428 vm_map_copy_adjust_get_target_copy_map(copy_map,
17429 &target_copy_map);
17430 copy_map = target_copy_map;
17431 vm_map_copy_trim(target_copy_map, target_page_shift,
17432 new_end, copy_map->size);
17433 }
17434
17435 /* trim the start */
17436 new_start = VM_MAP_TRUNC_PAGE(offset, target_page_mask);
17437 if (new_start != 0) {
17438 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim start from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)0, (uint64_t)new_start);
17439 /* get "target_copy_map" if needed and adjust it */
17440 vm_map_copy_adjust_get_target_copy_map(copy_map,
17441 &target_copy_map);
17442 copy_map = target_copy_map;
17443 vm_map_copy_trim(target_copy_map, target_page_shift,
17444 0, new_start);
17445 }
17446 *trimmed_start_p = new_start;
17447
17448 /* target_size starts with what's left after trimming */
17449 target_size = copy_map->size;
17450 assertf(target_size == src_copy_map_size - *trimmed_start_p - trimmed_end,
17451 "target_size 0x%llx src_copy_map_size 0x%llx trimmed_start 0x%llx trimmed_end 0x%llx\n",
17452 (uint64_t)target_size, (uint64_t)src_copy_map_size,
17453 (uint64_t)*trimmed_start_p, (uint64_t)trimmed_end);
17454
17455 /* check for misalignments but don't adjust yet */
17456 misalignments = 0;
17457 overmap_start = 0;
17458 overmap_end = 0;
17459 if (copy_page_shift < target_page_shift) {
17460 /*
17461 * Remapping from 4K to 16K: check the VM object alignments
17462 * throughout the range.
17463 * If the start and end of the range are mis-aligned, we can
17464 * over-map to re-align, and adjust the "overmap" start/end
17465 * and "target_size" of the range accordingly.
17466 * If there is any mis-alignment within the range:
17467 * if "copy":
17468 * we can do immediate-copy instead of copy-on-write,
17469 * else:
17470 * no way to remap and share; fail.
17471 */
17472 for (entry = vm_map_copy_first_entry(copy_map);
17473 entry != vm_map_copy_to_entry(copy_map);
17474 entry = entry->vme_next) {
17475 vm_object_offset_t object_offset_start, object_offset_end;
17476
17477 object_offset_start = VME_OFFSET(entry);
17478 object_offset_end = object_offset_start;
17479 object_offset_end += entry->vme_end - entry->vme_start;
17480 if (object_offset_start & target_page_mask) {
17481 if (entry == vm_map_copy_first_entry(copy_map) && !copy) {
17482 overmap_start++;
17483 } else {
17484 misalignments++;
17485 }
17486 }
17487 if (object_offset_end & target_page_mask) {
17488 if (entry->vme_next == vm_map_copy_to_entry(copy_map) && !copy) {
17489 overmap_end++;
17490 } else {
17491 misalignments++;
17492 }
17493 }
17494 }
17495 }
17496 entry = VM_MAP_ENTRY_NULL;
17497
17498 /* decide how to deal with misalignments */
17499 assert(overmap_start <= 1);
17500 assert(overmap_end <= 1);
17501 if (!overmap_start && !overmap_end && !misalignments) {
17502 /* copy_map is properly aligned for target_map ... */
17503 if (*trimmed_start_p) {
17504 /* ... but we trimmed it, so still need to adjust */
17505 } else {
17506 /* ... and we didn't trim anything: we're done */
17507 if (target_copy_map == VM_MAP_COPY_NULL) {
17508 target_copy_map = copy_map;
17509 }
17510 *target_copy_map_p = target_copy_map;
17511 *overmap_start_p = 0;
17512 *overmap_end_p = 0;
17513 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17514 return KERN_SUCCESS;
17515 }
17516 } else if (misalignments && !copy) {
17517 /* can't "share" if misaligned */
17518 DEBUG4K_ADJUST("unsupported sharing\n");
17519#if MACH_ASSERT
17520 if (debug4k_panic_on_misaligned_sharing) {
17521 panic("DEBUG4k %s:%d unsupported sharing\n", __FUNCTION__, __LINE__);
17522 }
17523#endif /* MACH_ASSERT */
17524 DEBUG4K_ADJUST("copy_map %p (%d) target_map %p (%d) copy %d target_copy_map %p -> KERN_NOT_SUPPORTED\n", copy_map, copy_page_shift, target_map, target_page_shift, copy, *target_copy_map_p);
17525 return KERN_NOT_SUPPORTED;
17526 } else {
17527 /* can't virtual-copy if misaligned (but can physical-copy) */
17528 DEBUG4K_ADJUST("mis-aligned copying\n");
17529 }
17530
17531 /* get a "target_copy_map" if needed and switch to it */
17532 vm_map_copy_adjust_get_target_copy_map(copy_map, &target_copy_map);
17533 copy_map = target_copy_map;
17534
17535 if (misalignments && copy) {
17536 vm_map_size_t target_copy_map_size;
17537
17538 /*
17539 * Can't do copy-on-write with misaligned mappings.
17540 * Replace the mappings with a physical copy of the original
17541 * mappings' contents.
17542 */
17543 target_copy_map_size = target_copy_map->size;
17544 vm_map_copy_to_physcopy(target_copy_map, target_map);
17545 *target_copy_map_p = target_copy_map;
17546 *overmap_start_p = 0;
17547 *overmap_end_p = target_copy_map->size - target_copy_map_size;
17548 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx)-> trimmed 0x%llx overmap start 0x%llx end 0x%llx PHYSCOPY\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17549 return KERN_SUCCESS;
17550 }
17551
17552 /* apply the adjustments */
17553 misalignments = 0;
17554 overmap_start = 0;
17555 overmap_end = 0;
17556 /* remove copy_map->offset, so that everything starts at offset 0 */
17557 addr_adjustment = copy_map->offset;
17558 /* also remove whatever we trimmed from the start */
17559 addr_adjustment += *trimmed_start_p;
17560 for (target_entry = vm_map_copy_first_entry(target_copy_map);
17561 target_entry != vm_map_copy_to_entry(target_copy_map);
17562 target_entry = target_entry->vme_next) {
17563 vm_object_offset_t object_offset_start, object_offset_end;
17564
17565 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx BEFORE\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17566 object_offset_start = VME_OFFSET(target_entry);
17567 if (object_offset_start & target_page_mask) {
17568 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at start\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17569 if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
17570 /*
17571 * start of 1st entry is mis-aligned:
17572 * re-adjust by over-mapping.
17573 */
17574 overmap_start = object_offset_start - trunc_page_mask_64(object_offset_start, target_page_mask);
17575 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_start 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_start);
17576 VME_OFFSET_SET(target_entry, VME_OFFSET(target_entry) - overmap_start);
17577 } else {
17578 misalignments++;
17579 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
17580 assert(copy);
17581 }
17582 }
17583
17584 if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
17585 target_size += overmap_start;
17586 } else {
17587 target_entry->vme_start += overmap_start;
17588 }
17589 target_entry->vme_end += overmap_start;
17590
17591 object_offset_end = VME_OFFSET(target_entry) + target_entry->vme_end - target_entry->vme_start;
17592 if (object_offset_end & target_page_mask) {
17593 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at end\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17594 if (target_entry->vme_next == vm_map_copy_to_entry(target_copy_map)) {
17595 /*
17596 * end of last entry is mis-aligned: re-adjust by over-mapping.
17597 */
17598 overmap_end = round_page_mask_64(object_offset_end, target_page_mask) - object_offset_end;
17599 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_end 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_end);
17600 target_entry->vme_end += overmap_end;
17601 target_size += overmap_end;
17602 } else {
17603 misalignments++;
17604 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
17605 assert(copy);
17606 }
17607 }
17608 target_entry->vme_start -= addr_adjustment;
17609 target_entry->vme_end -= addr_adjustment;
17610 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx AFTER\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17611 }
17612
17613 target_copy_map->size = target_size;
17614 target_copy_map->offset += overmap_start;
17615 target_copy_map->offset -= addr_adjustment;
17616 target_copy_map->cpy_hdr.page_shift = target_page_shift;
17617
17618// assert(VM_MAP_PAGE_ALIGNED(target_copy_map->size, target_page_mask));
17619// assert(VM_MAP_PAGE_ALIGNED(target_copy_map->offset, FOURK_PAGE_MASK));
17620 assert(overmap_start < VM_MAP_PAGE_SIZE(target_map));
17621 assert(overmap_end < VM_MAP_PAGE_SIZE(target_map));
17622
17623 *target_copy_map_p = target_copy_map;
17624 *overmap_start_p = overmap_start;
17625 *overmap_end_p = overmap_end;
17626
17627 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17628 return KERN_SUCCESS;
17629}
17630
17631kern_return_t
17632vm_map_range_physical_size(
17633 vm_map_t map,
17634 vm_map_address_t start,
17635 mach_vm_size_t size,
17636 mach_vm_size_t * phys_size)
17637{
17638 kern_return_t kr;
17639 vm_map_copy_t copy_map, target_copy_map;
17640 vm_map_offset_t adjusted_start, adjusted_end;
17641 vm_map_size_t adjusted_size;
17642 vm_prot_t cur_prot, max_prot;
17643 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
17644 vm_map_kernel_flags_t vmk_flags;
17645
17646 adjusted_start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(map));
17647 adjusted_end = vm_map_round_page(start + size, VM_MAP_PAGE_MASK(map));
17648 adjusted_size = adjusted_end - adjusted_start;
17649 *phys_size = adjusted_size;
17650 if (VM_MAP_PAGE_SIZE(map) == PAGE_SIZE) {
17651 return KERN_SUCCESS;
17652 }
17653 if (start == 0) {
17654 adjusted_start = vm_map_trunc_page(start, PAGE_MASK);
17655 adjusted_end = vm_map_round_page(start + size, PAGE_MASK);
17656 adjusted_size = adjusted_end - adjusted_start;
17657 *phys_size = adjusted_size;
17658 return KERN_SUCCESS;
17659 }
17660 if (adjusted_size == 0) {
17661 DEBUG4K_SHARE("map %p start 0x%llx size 0x%llx adjusted 0x%llx -> phys_size 0!\n", map, (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_size);
17662 *phys_size = 0;
17663 return KERN_SUCCESS;
17664 }
17665
17666 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
17667 vmk_flags.vmkf_copy_pageable = TRUE;
17668 vmk_flags.vmkf_copy_same_map = TRUE;
17669 assert(adjusted_size != 0);
c3c9b80d
A
17670 cur_prot = VM_PROT_NONE; /* legacy mode */
17671 max_prot = VM_PROT_NONE; /* legacy mode */
f427ee49 17672 kr = vm_map_copy_extract(map, adjusted_start, adjusted_size,
f427ee49
A
17673 FALSE /* copy */,
17674 &copy_map,
17675 &cur_prot, &max_prot, VM_INHERIT_DEFAULT,
17676 vmk_flags);
17677 if (kr != KERN_SUCCESS) {
17678 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
17679 //assert(0);
17680 *phys_size = 0;
17681 return kr;
17682 }
17683 assert(copy_map != VM_MAP_COPY_NULL);
17684 target_copy_map = copy_map;
17685 DEBUG4K_ADJUST("adjusting...\n");
17686 kr = vm_map_copy_adjust_to_target(
17687 copy_map,
17688 start - adjusted_start, /* offset */
17689 size, /* size */
17690 kernel_map,
17691 FALSE, /* copy */
17692 &target_copy_map,
17693 &overmap_start,
17694 &overmap_end,
17695 &trimmed_start);
17696 if (kr == KERN_SUCCESS) {
17697 if (target_copy_map->size != *phys_size) {
17698 DEBUG4K_ADJUST("map %p (%d) start 0x%llx size 0x%llx adjusted_start 0x%llx adjusted_end 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx phys_size 0x%llx -> 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_start, (uint64_t)adjusted_end, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start, (uint64_t)*phys_size, (uint64_t)target_copy_map->size);
17699 }
17700 *phys_size = target_copy_map->size;
17701 } else {
17702 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
17703 //assert(0);
17704 *phys_size = 0;
17705 }
17706 vm_map_copy_discard(copy_map);
17707 copy_map = VM_MAP_COPY_NULL;
17708
17709 return kr;
17710}
17711
17712
17713kern_return_t
17714memory_entry_check_for_adjustment(
17715 vm_map_t src_map,
17716 ipc_port_t port,
17717 vm_map_offset_t *overmap_start,
17718 vm_map_offset_t *overmap_end)
17719{
17720 kern_return_t kr = KERN_SUCCESS;
17721 vm_map_copy_t copy_map = VM_MAP_COPY_NULL, target_copy_map = VM_MAP_COPY_NULL;
17722
17723 assert(port);
17724 assertf(ip_kotype(port) == IKOT_NAMED_ENTRY, "Port Type expected: %d...received:%d\n", IKOT_NAMED_ENTRY, ip_kotype(port));
17725
17726 vm_named_entry_t named_entry;
17727
c3c9b80d 17728 named_entry = (vm_named_entry_t) ipc_kobject_get(port);
f427ee49
A
17729 named_entry_lock(named_entry);
17730 copy_map = named_entry->backing.copy;
17731 target_copy_map = copy_map;
17732
17733 if (src_map && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT) {
17734 vm_map_offset_t trimmed_start;
17735
17736 trimmed_start = 0;
17737 DEBUG4K_ADJUST("adjusting...\n");
17738 kr = vm_map_copy_adjust_to_target(
17739 copy_map,
17740 0, /* offset */
17741 copy_map->size, /* size */
17742 src_map,
17743 FALSE, /* copy */
17744 &target_copy_map,
17745 overmap_start,
17746 overmap_end,
17747 &trimmed_start);
17748 assert(trimmed_start == 0);
17749 }
17750 named_entry_unlock(named_entry);
17751
17752 return kr;
1c79356b
A
17753}
17754
f427ee49 17755
1c79356b 17756/*
91447636 17757 * Routine: vm_remap
1c79356b 17758 *
91447636
A
17759 * Map portion of a task's address space.
17760 * Mapped region must not overlap more than
17761 * one vm memory object. Protections and
17762 * inheritance attributes remain the same
17763 * as in the original task and are out parameters.
17764 * Source and Target task can be identical
17765 * Other attributes are identical as for vm_map()
1c79356b
A
17766 */
17767kern_return_t
91447636 17768vm_map_remap(
0a7de745
A
17769 vm_map_t target_map,
17770 vm_map_address_t *address,
17771 vm_map_size_t size,
17772 vm_map_offset_t mask,
17773 int flags,
17774 vm_map_kernel_flags_t vmk_flags,
17775 vm_tag_t tag,
17776 vm_map_t src_map,
17777 vm_map_offset_t memory_address,
17778 boolean_t copy,
c3c9b80d
A
17779 vm_prot_t *cur_protection, /* IN/OUT */
17780 vm_prot_t *max_protection, /* IN/OUT */
0a7de745 17781 vm_inherit_t inheritance)
1c79356b 17782{
0a7de745
A
17783 kern_return_t result;
17784 vm_map_entry_t entry;
17785 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
17786 vm_map_entry_t new_entry;
f427ee49 17787 vm_map_copy_t copy_map;
0a7de745 17788 vm_map_offset_t offset_in_mapping;
f427ee49
A
17789 vm_map_size_t target_size = 0;
17790 vm_map_size_t src_page_mask, target_page_mask;
17791 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
17792 vm_map_offset_t initial_memory_address;
17793 vm_map_size_t initial_size;
0a7de745
A
17794
17795 if (target_map == VM_MAP_NULL) {
91447636 17796 return KERN_INVALID_ARGUMENT;
0a7de745 17797 }
1c79356b 17798
f427ee49
A
17799 initial_memory_address = memory_address;
17800 initial_size = size;
17801 src_page_mask = VM_MAP_PAGE_MASK(src_map);
17802 target_page_mask = VM_MAP_PAGE_MASK(target_map);
17803
91447636 17804 switch (inheritance) {
2d21ac55
A
17805 case VM_INHERIT_NONE:
17806 case VM_INHERIT_COPY:
17807 case VM_INHERIT_SHARE:
0a7de745 17808 if (size != 0 && src_map != VM_MAP_NULL) {
91447636 17809 break;
0a7de745 17810 }
f427ee49 17811 OS_FALLTHROUGH;
2d21ac55 17812 default:
91447636
A
17813 return KERN_INVALID_ARGUMENT;
17814 }
1c79356b 17815
f427ee49
A
17816 if (src_page_mask != target_page_mask) {
17817 if (copy) {
17818 DEBUG4K_COPY("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
17819 } else {
17820 DEBUG4K_SHARE("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
17821 }
17822 }
17823
5ba3f43e
A
17824 /*
17825 * If the user is requesting that we return the address of the
17826 * first byte of the data (rather than the base of the page),
17827 * then we use different rounding semantics: specifically,
39236c6e
A
17828 * we assume that (memory_address, size) describes a region
17829 * all of whose pages we must cover, rather than a base to be truncated
17830 * down and a size to be added to that base. So we figure out
17831 * the highest page that the requested region includes and make
17832 * sure that the size will cover it.
5ba3f43e 17833 *
0a7de745 17834 * The key example we're worried about it is of the form:
39236c6e 17835 *
0a7de745 17836 * memory_address = 0x1ff0, size = 0x20
5ba3f43e
A
17837 *
17838 * With the old semantics, we round down the memory_address to 0x1000
39236c6e
A
17839 * and round up the size to 0x1000, resulting in our covering *only*
17840 * page 0x1000. With the new semantics, we'd realize that the region covers
5ba3f43e 17841 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
39236c6e
A
17842 * 0x1000 and page 0x2000 in the region we remap.
17843 */
17844 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
f427ee49
A
17845 vm_map_offset_t range_start, range_end;
17846
17847 range_start = vm_map_trunc_page(memory_address, src_page_mask);
17848 range_end = vm_map_round_page(memory_address + size, src_page_mask);
17849 memory_address = range_start;
17850 size = range_end - range_start;
17851 offset_in_mapping = initial_memory_address - memory_address;
39236c6e 17852 } else {
f427ee49
A
17853 /*
17854 * IMPORTANT:
17855 * This legacy code path is broken: for the range mentioned
17856 * above [ memory_address = 0x1ff0,size = 0x20 ], which spans
17857 * two 4k pages, it yields [ memory_address = 0x1000,
17858 * size = 0x1000 ], which covers only the first 4k page.
17859 * BUT some code unfortunately depends on this bug, so we
17860 * can't fix it without breaking something.
17861 * New code should get automatically opted in the new
17862 * behavior with the new VM_FLAGS_RETURN_DATA_ADDR flags.
17863 */
17864 offset_in_mapping = 0;
17865 memory_address = vm_map_trunc_page(memory_address, src_page_mask);
17866 size = vm_map_round_page(size, src_page_mask);
17867 initial_memory_address = memory_address;
17868 initial_size = size;
5ba3f43e 17869 }
f427ee49
A
17870
17871
5ba3f43e
A
17872 if (size == 0) {
17873 return KERN_INVALID_ARGUMENT;
17874 }
1c79356b 17875
cb323159
A
17876 if (flags & VM_FLAGS_RESILIENT_MEDIA) {
17877 /* must be copy-on-write to be "media resilient" */
17878 if (!copy) {
17879 return KERN_INVALID_ARGUMENT;
17880 }
17881 }
17882
f427ee49
A
17883 vmk_flags.vmkf_copy_pageable = target_map->hdr.entries_pageable;
17884 vmk_flags.vmkf_copy_same_map = (src_map == target_map);
17885
17886 assert(size != 0);
17887 result = vm_map_copy_extract(src_map,
17888 memory_address,
17889 size,
f427ee49 17890 copy, &copy_map,
c3c9b80d
A
17891 cur_protection, /* IN/OUT */
17892 max_protection, /* IN/OUT */
0a7de745 17893 inheritance,
0a7de745 17894 vmk_flags);
91447636
A
17895 if (result != KERN_SUCCESS) {
17896 return result;
17897 }
f427ee49
A
17898 assert(copy_map != VM_MAP_COPY_NULL);
17899
17900 overmap_start = 0;
17901 overmap_end = 0;
17902 trimmed_start = 0;
17903 target_size = size;
17904 if (src_page_mask != target_page_mask) {
17905 vm_map_copy_t target_copy_map;
17906
17907 target_copy_map = copy_map; /* can modify "copy_map" itself */
17908 DEBUG4K_ADJUST("adjusting...\n");
17909 result = vm_map_copy_adjust_to_target(
17910 copy_map,
17911 offset_in_mapping, /* offset */
17912 initial_size,
17913 target_map,
17914 copy,
17915 &target_copy_map,
17916 &overmap_start,
17917 &overmap_end,
17918 &trimmed_start);
17919 if (result != KERN_SUCCESS) {
17920 DEBUG4K_COPY("failed to adjust 0x%x\n", result);
17921 vm_map_copy_discard(copy_map);
17922 return result;
17923 }
17924 if (trimmed_start == 0) {
17925 /* nothing trimmed: no adjustment needed */
17926 } else if (trimmed_start >= offset_in_mapping) {
17927 /* trimmed more than offset_in_mapping: nothing left */
17928 assert(overmap_start == 0);
17929 assert(overmap_end == 0);
17930 offset_in_mapping = 0;
17931 } else {
17932 /* trimmed some of offset_in_mapping: adjust */
17933 assert(overmap_start == 0);
17934 assert(overmap_end == 0);
17935 offset_in_mapping -= trimmed_start;
17936 }
17937 offset_in_mapping += overmap_start;
17938 target_size = target_copy_map->size;
17939 }
1c79356b 17940
91447636
A
17941 /*
17942 * Allocate/check a range of free virtual address
17943 * space for the target
1c79356b 17944 */
f427ee49 17945 *address = vm_map_trunc_page(*address, target_page_mask);
91447636 17946 vm_map_lock(target_map);
f427ee49
A
17947 target_size = vm_map_round_page(target_size, target_page_mask);
17948 result = vm_map_remap_range_allocate(target_map, address,
17949 target_size,
0a7de745
A
17950 mask, flags, vmk_flags, tag,
17951 &insp_entry);
1c79356b 17952
f427ee49
A
17953 for (entry = vm_map_copy_first_entry(copy_map);
17954 entry != vm_map_copy_to_entry(copy_map);
0a7de745 17955 entry = new_entry) {
91447636 17956 new_entry = entry->vme_next;
f427ee49 17957 vm_map_copy_entry_unlink(copy_map, entry);
91447636 17958 if (result == KERN_SUCCESS) {
3e170ce0
A
17959 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
17960 /* no codesigning -> read-only access */
3e170ce0
A
17961 entry->max_protection = VM_PROT_READ;
17962 entry->protection = VM_PROT_READ;
17963 entry->vme_resilient_codesign = TRUE;
17964 }
91447636
A
17965 entry->vme_start += *address;
17966 entry->vme_end += *address;
39236c6e 17967 assert(!entry->map_aligned);
cb323159
A
17968 if ((flags & VM_FLAGS_RESILIENT_MEDIA) &&
17969 !entry->is_sub_map &&
17970 (VME_OBJECT(entry) == VM_OBJECT_NULL ||
17971 VME_OBJECT(entry)->internal)) {
17972 entry->vme_resilient_media = TRUE;
17973 }
f427ee49
A
17974 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start, MIN(target_page_mask, PAGE_MASK)));
17975 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end, MIN(target_page_mask, PAGE_MASK)));
17976 assert(VM_MAP_PAGE_ALIGNED(VME_OFFSET(entry), MIN(target_page_mask, PAGE_MASK)));
d9a64523 17977 vm_map_store_entry_link(target_map, insp_entry, entry,
0a7de745 17978 vmk_flags);
91447636
A
17979 insp_entry = entry;
17980 } else {
17981 if (!entry->is_sub_map) {
3e170ce0 17982 vm_object_deallocate(VME_OBJECT(entry));
91447636 17983 } else {
3e170ce0 17984 vm_map_deallocate(VME_SUBMAP(entry));
2d21ac55 17985 }
f427ee49 17986 vm_map_copy_entry_dispose(copy_map, entry);
1c79356b 17987 }
91447636 17988 }
1c79356b 17989
3e170ce0
A
17990 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
17991 *cur_protection = VM_PROT_READ;
17992 *max_protection = VM_PROT_READ;
17993 }
17994
0a7de745 17995 if (target_map->disable_vmentry_reuse == TRUE) {
39037602 17996 assert(!target_map->is_nested_map);
0a7de745 17997 if (target_map->highest_entry_end < insp_entry->vme_end) {
6d2010ae
A
17998 target_map->highest_entry_end = insp_entry->vme_end;
17999 }
18000 }
18001
91447636 18002 if (result == KERN_SUCCESS) {
f427ee49 18003 target_map->size += target_size;
0c530ab8 18004 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
5ba3f43e 18005
d9a64523
A
18006 }
18007 vm_map_unlock(target_map);
18008
0a7de745 18009 if (result == KERN_SUCCESS && target_map->wiring_required) {
5ba3f43e 18010 result = vm_map_wire_kernel(target_map, *address,
0a7de745
A
18011 *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
18012 TRUE);
18013 }
39236c6e 18014
5ba3f43e
A
18015 /*
18016 * If requested, return the address of the data pointed to by the
39236c6e
A
18017 * request, rather than the base of the resulting page.
18018 */
18019 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
18020 *address += offset_in_mapping;
18021 }
18022
f427ee49
A
18023 if (src_page_mask != target_page_mask) {
18024 DEBUG4K_SHARE("vm_remap(%p 0x%llx 0x%llx copy=%d-> %p 0x%llx 0x%llx result=0x%x\n", src_map, (uint64_t)memory_address, (uint64_t)size, copy, target_map, (uint64_t)*address, (uint64_t)offset_in_mapping, result);
18025 }
18026 vm_map_copy_discard(copy_map);
18027 copy_map = VM_MAP_COPY_NULL;
18028
91447636
A
18029 return result;
18030}
1c79356b 18031
91447636
A
18032/*
18033 * Routine: vm_map_remap_range_allocate
18034 *
18035 * Description:
18036 * Allocate a range in the specified virtual address map.
18037 * returns the address and the map entry just before the allocated
18038 * range
18039 *
18040 * Map must be locked.
18041 */
1c79356b 18042
91447636
A
18043static kern_return_t
18044vm_map_remap_range_allocate(
0a7de745
A
18045 vm_map_t map,
18046 vm_map_address_t *address, /* IN/OUT */
18047 vm_map_size_t size,
18048 vm_map_offset_t mask,
18049 int flags,
18050 vm_map_kernel_flags_t vmk_flags,
5ba3f43e 18051 __unused vm_tag_t tag,
0a7de745 18052 vm_map_entry_t *map_entry) /* OUT */
91447636 18053{
0a7de745
A
18054 vm_map_entry_t entry;
18055 vm_map_offset_t start;
18056 vm_map_offset_t end;
18057 vm_map_offset_t desired_empty_end;
18058 kern_return_t kr;
18059 vm_map_entry_t hole_entry;
1c79356b 18060
0a7de745 18061StartAgain:;
1c79356b 18062
2d21ac55 18063 start = *address;
1c79356b 18064
0a7de745
A
18065 if (flags & VM_FLAGS_ANYWHERE) {
18066 if (flags & VM_FLAGS_RANDOM_ADDR) {
39037602
A
18067 /*
18068 * Get a random start address.
18069 */
18070 kr = vm_map_random_address_for_size(map, address, size);
18071 if (kr != KERN_SUCCESS) {
0a7de745 18072 return kr;
39037602
A
18073 }
18074 start = *address;
18075 }
18076
2d21ac55
A
18077 /*
18078 * Calculate the first possible address.
18079 */
1c79356b 18080
0a7de745 18081 if (start < map->min_offset) {
2d21ac55 18082 start = map->min_offset;
0a7de745
A
18083 }
18084 if (start > map->max_offset) {
18085 return KERN_NO_SPACE;
18086 }
5ba3f43e 18087
2d21ac55
A
18088 /*
18089 * Look for the first possible address;
18090 * if there's already something at this
18091 * address, we have to start after it.
18092 */
1c79356b 18093
0a7de745 18094 if (map->disable_vmentry_reuse == TRUE) {
6d2010ae 18095 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2d21ac55 18096 } else {
3e170ce0 18097 if (map->holelistenabled) {
d9a64523 18098 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
3e170ce0
A
18099
18100 if (hole_entry == NULL) {
18101 /*
18102 * No more space in the map?
18103 */
0a7de745 18104 return KERN_NO_SPACE;
3e170ce0 18105 } else {
3e170ce0
A
18106 boolean_t found_hole = FALSE;
18107
18108 do {
18109 if (hole_entry->vme_start >= start) {
18110 start = hole_entry->vme_start;
18111 found_hole = TRUE;
18112 break;
18113 }
18114
18115 if (hole_entry->vme_end > start) {
18116 found_hole = TRUE;
18117 break;
18118 }
18119 hole_entry = hole_entry->vme_next;
d9a64523 18120 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
3e170ce0
A
18121
18122 if (found_hole == FALSE) {
0a7de745 18123 return KERN_NO_SPACE;
3e170ce0
A
18124 }
18125
18126 entry = hole_entry;
18127 }
6d2010ae 18128 } else {
3e170ce0
A
18129 assert(first_free_is_valid(map));
18130 if (start == map->min_offset) {
0a7de745 18131 if ((entry = map->first_free) != vm_map_to_entry(map)) {
3e170ce0 18132 start = entry->vme_end;
0a7de745 18133 }
3e170ce0 18134 } else {
0a7de745
A
18135 vm_map_entry_t tmp_entry;
18136 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
3e170ce0 18137 start = tmp_entry->vme_end;
0a7de745 18138 }
3e170ce0
A
18139 entry = tmp_entry;
18140 }
6d2010ae 18141 }
39236c6e 18142 start = vm_map_round_page(start,
0a7de745 18143 VM_MAP_PAGE_MASK(map));
2d21ac55 18144 }
5ba3f43e 18145
2d21ac55
A
18146 /*
18147 * In any case, the "entry" always precedes
18148 * the proposed new region throughout the
18149 * loop:
18150 */
1c79356b 18151
2d21ac55 18152 while (TRUE) {
0a7de745 18153 vm_map_entry_t next;
2d21ac55
A
18154
18155 /*
18156 * Find the end of the proposed new region.
18157 * Be sure we didn't go beyond the end, or
18158 * wrap around the address.
18159 */
18160
18161 end = ((start + mask) & ~mask);
39236c6e 18162 end = vm_map_round_page(end,
0a7de745
A
18163 VM_MAP_PAGE_MASK(map));
18164 if (end < start) {
18165 return KERN_NO_SPACE;
18166 }
2d21ac55
A
18167 start = end;
18168 end += size;
18169
d9a64523
A
18170 /* We want an entire page of empty space, but don't increase the allocation size. */
18171 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
18172
18173 if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
2d21ac55
A
18174 if (map->wait_for_space) {
18175 if (size <= (map->max_offset -
0a7de745 18176 map->min_offset)) {
2d21ac55
A
18177 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
18178 vm_map_unlock(map);
18179 thread_block(THREAD_CONTINUE_NULL);
18180 vm_map_lock(map);
18181 goto StartAgain;
18182 }
18183 }
5ba3f43e 18184
0a7de745 18185 return KERN_NO_SPACE;
2d21ac55 18186 }
1c79356b 18187
2d21ac55 18188 next = entry->vme_next;
1c79356b 18189
3e170ce0 18190 if (map->holelistenabled) {
0a7de745 18191 if (entry->vme_end >= desired_empty_end) {
3e170ce0 18192 break;
0a7de745 18193 }
3e170ce0
A
18194 } else {
18195 /*
0a7de745 18196 * If there are no more entries, we must win.
3e170ce0
A
18197 *
18198 * OR
18199 *
18200 * If there is another entry, it must be
18201 * after the end of the potential new region.
18202 */
1c79356b 18203
0a7de745 18204 if (next == vm_map_to_entry(map)) {
3e170ce0 18205 break;
0a7de745 18206 }
3e170ce0 18207
0a7de745 18208 if (next->vme_start >= desired_empty_end) {
3e170ce0 18209 break;
0a7de745 18210 }
3e170ce0 18211 }
1c79356b 18212
2d21ac55
A
18213 /*
18214 * Didn't fit -- move to the next entry.
18215 */
1c79356b 18216
2d21ac55 18217 entry = next;
3e170ce0
A
18218
18219 if (map->holelistenabled) {
d9a64523 18220 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
3e170ce0
A
18221 /*
18222 * Wrapped around
18223 */
0a7de745 18224 return KERN_NO_SPACE;
3e170ce0
A
18225 }
18226 start = entry->vme_start;
18227 } else {
18228 start = entry->vme_end;
18229 }
18230 }
18231
18232 if (map->holelistenabled) {
3e170ce0
A
18233 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
18234 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
18235 }
2d21ac55 18236 }
3e170ce0 18237
2d21ac55
A
18238 *address = start;
18239 } else {
0a7de745 18240 vm_map_entry_t temp_entry;
5ba3f43e 18241
2d21ac55
A
18242 /*
18243 * Verify that:
18244 * the address doesn't itself violate
18245 * the mask requirement.
18246 */
1c79356b 18247
0a7de745
A
18248 if ((start & mask) != 0) {
18249 return KERN_NO_SPACE;
18250 }
1c79356b 18251
1c79356b 18252
2d21ac55
A
18253 /*
18254 * ... the address is within bounds
18255 */
1c79356b 18256
2d21ac55 18257 end = start + size;
1c79356b 18258
2d21ac55
A
18259 if ((start < map->min_offset) ||
18260 (end > map->max_offset) ||
18261 (start >= end)) {
0a7de745 18262 return KERN_INVALID_ADDRESS;
2d21ac55 18263 }
1c79356b 18264
060df5ea
A
18265 /*
18266 * If we're asked to overwrite whatever was mapped in that
18267 * range, first deallocate that range.
18268 */
18269 if (flags & VM_FLAGS_OVERWRITE) {
18270 vm_map_t zap_map;
d9a64523 18271 int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
060df5ea
A
18272
18273 /*
18274 * We use a "zap_map" to avoid having to unlock
18275 * the "map" in vm_map_delete(), which would compromise
18276 * the atomicity of the "deallocate" and then "remap"
18277 * combination.
18278 */
18279 zap_map = vm_map_create(PMAP_NULL,
0a7de745
A
18280 start,
18281 end,
18282 map->hdr.entries_pageable);
060df5ea
A
18283 if (zap_map == VM_MAP_NULL) {
18284 return KERN_RESOURCE_SHORTAGE;
18285 }
39236c6e 18286 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
3e170ce0 18287 vm_map_disable_hole_optimization(zap_map);
060df5ea 18288
d9a64523
A
18289 if (vmk_flags.vmkf_overwrite_immutable) {
18290 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
18291 }
060df5ea 18292 kr = vm_map_delete(map, start, end,
0a7de745
A
18293 remove_flags,
18294 zap_map);
060df5ea
A
18295 if (kr == KERN_SUCCESS) {
18296 vm_map_destroy(zap_map,
0a7de745 18297 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
060df5ea
A
18298 zap_map = VM_MAP_NULL;
18299 }
18300 }
18301
2d21ac55
A
18302 /*
18303 * ... the starting address isn't allocated
18304 */
91447636 18305
0a7de745
A
18306 if (vm_map_lookup_entry(map, start, &temp_entry)) {
18307 return KERN_NO_SPACE;
18308 }
91447636 18309
2d21ac55 18310 entry = temp_entry;
91447636 18311
2d21ac55
A
18312 /*
18313 * ... the next region doesn't overlap the
18314 * end point.
18315 */
1c79356b 18316
2d21ac55 18317 if ((entry->vme_next != vm_map_to_entry(map)) &&
0a7de745
A
18318 (entry->vme_next->vme_start < end)) {
18319 return KERN_NO_SPACE;
18320 }
2d21ac55
A
18321 }
18322 *map_entry = entry;
0a7de745 18323 return KERN_SUCCESS;
91447636 18324}
1c79356b 18325
91447636
A
18326/*
18327 * vm_map_switch:
18328 *
18329 * Set the address map for the current thread to the specified map
18330 */
1c79356b 18331
91447636
A
18332vm_map_t
18333vm_map_switch(
0a7de745 18334 vm_map_t map)
91447636 18335{
0a7de745
A
18336 int mycpu;
18337 thread_t thread = current_thread();
18338 vm_map_t oldmap = thread->map;
1c79356b 18339
91447636
A
18340 mp_disable_preemption();
18341 mycpu = cpu_number();
1c79356b 18342
91447636
A
18343 /*
18344 * Deactivate the current map and activate the requested map
18345 */
18346 PMAP_SWITCH_USER(thread, map, mycpu);
1c79356b 18347
91447636 18348 mp_enable_preemption();
0a7de745 18349 return oldmap;
91447636 18350}
1c79356b 18351
1c79356b 18352
91447636
A
18353/*
18354 * Routine: vm_map_write_user
18355 *
18356 * Description:
18357 * Copy out data from a kernel space into space in the
18358 * destination map. The space must already exist in the
18359 * destination map.
18360 * NOTE: This routine should only be called by threads
18361 * which can block on a page fault. i.e. kernel mode user
18362 * threads.
18363 *
18364 */
18365kern_return_t
18366vm_map_write_user(
0a7de745
A
18367 vm_map_t map,
18368 void *src_p,
18369 vm_map_address_t dst_addr,
18370 vm_size_t size)
91447636 18371{
0a7de745 18372 kern_return_t kr = KERN_SUCCESS;
1c79356b 18373
0a7de745 18374 if (current_map() == map) {
91447636
A
18375 if (copyout(src_p, dst_addr, size)) {
18376 kr = KERN_INVALID_ADDRESS;
18377 }
18378 } else {
0a7de745 18379 vm_map_t oldmap;
1c79356b 18380
91447636
A
18381 /* take on the identity of the target map while doing */
18382 /* the transfer */
1c79356b 18383
91447636
A
18384 vm_map_reference(map);
18385 oldmap = vm_map_switch(map);
18386 if (copyout(src_p, dst_addr, size)) {
18387 kr = KERN_INVALID_ADDRESS;
1c79356b 18388 }
91447636
A
18389 vm_map_switch(oldmap);
18390 vm_map_deallocate(map);
1c79356b 18391 }
91447636 18392 return kr;
1c79356b
A
18393}
18394
18395/*
91447636
A
18396 * Routine: vm_map_read_user
18397 *
18398 * Description:
18399 * Copy in data from a user space source map into the
18400 * kernel map. The space must already exist in the
18401 * kernel map.
18402 * NOTE: This routine should only be called by threads
18403 * which can block on a page fault. i.e. kernel mode user
18404 * threads.
1c79356b 18405 *
1c79356b
A
18406 */
18407kern_return_t
91447636 18408vm_map_read_user(
0a7de745
A
18409 vm_map_t map,
18410 vm_map_address_t src_addr,
18411 void *dst_p,
18412 vm_size_t size)
1c79356b 18413{
0a7de745 18414 kern_return_t kr = KERN_SUCCESS;
1c79356b 18415
0a7de745 18416 if (current_map() == map) {
91447636
A
18417 if (copyin(src_addr, dst_p, size)) {
18418 kr = KERN_INVALID_ADDRESS;
18419 }
18420 } else {
0a7de745 18421 vm_map_t oldmap;
1c79356b 18422
91447636
A
18423 /* take on the identity of the target map while doing */
18424 /* the transfer */
18425
18426 vm_map_reference(map);
18427 oldmap = vm_map_switch(map);
18428 if (copyin(src_addr, dst_p, size)) {
18429 kr = KERN_INVALID_ADDRESS;
18430 }
18431 vm_map_switch(oldmap);
18432 vm_map_deallocate(map);
1c79356b 18433 }
91447636
A
18434 return kr;
18435}
18436
1c79356b 18437
91447636
A
18438/*
18439 * vm_map_check_protection:
18440 *
18441 * Assert that the target map allows the specified
18442 * privilege on the entire address region given.
18443 * The entire region must be allocated.
18444 */
2d21ac55
A
18445boolean_t
18446vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
0a7de745 18447 vm_map_offset_t end, vm_prot_t protection)
91447636 18448{
2d21ac55
A
18449 vm_map_entry_t entry;
18450 vm_map_entry_t tmp_entry;
1c79356b 18451
91447636 18452 vm_map_lock(map);
1c79356b 18453
0a7de745 18454 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
2d21ac55 18455 vm_map_unlock(map);
0a7de745 18456 return FALSE;
1c79356b
A
18457 }
18458
91447636
A
18459 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
18460 vm_map_unlock(map);
0a7de745 18461 return FALSE;
91447636 18462 }
1c79356b 18463
91447636
A
18464 entry = tmp_entry;
18465
18466 while (start < end) {
18467 if (entry == vm_map_to_entry(map)) {
18468 vm_map_unlock(map);
0a7de745 18469 return FALSE;
1c79356b 18470 }
1c79356b 18471
91447636
A
18472 /*
18473 * No holes allowed!
18474 */
1c79356b 18475
91447636
A
18476 if (start < entry->vme_start) {
18477 vm_map_unlock(map);
0a7de745 18478 return FALSE;
91447636
A
18479 }
18480
18481 /*
18482 * Check protection associated with entry.
18483 */
18484
18485 if ((entry->protection & protection) != protection) {
18486 vm_map_unlock(map);
0a7de745 18487 return FALSE;
91447636
A
18488 }
18489
18490 /* go to next entry */
18491
18492 start = entry->vme_end;
18493 entry = entry->vme_next;
18494 }
18495 vm_map_unlock(map);
0a7de745 18496 return TRUE;
1c79356b
A
18497}
18498
1c79356b 18499kern_return_t
91447636 18500vm_map_purgable_control(
0a7de745
A
18501 vm_map_t map,
18502 vm_map_offset_t address,
18503 vm_purgable_t control,
18504 int *state)
1c79356b 18505{
0a7de745
A
18506 vm_map_entry_t entry;
18507 vm_object_t object;
18508 kern_return_t kr;
18509 boolean_t was_nonvolatile;
1c79356b 18510
1c79356b 18511 /*
91447636
A
18512 * Vet all the input parameters and current type and state of the
18513 * underlaying object. Return with an error if anything is amiss.
1c79356b 18514 */
0a7de745
A
18515 if (map == VM_MAP_NULL) {
18516 return KERN_INVALID_ARGUMENT;
18517 }
1c79356b 18518
91447636 18519 if (control != VM_PURGABLE_SET_STATE &&
b0d623f7 18520 control != VM_PURGABLE_GET_STATE &&
5ba3f43e 18521 control != VM_PURGABLE_PURGE_ALL &&
0a7de745
A
18522 control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
18523 return KERN_INVALID_ARGUMENT;
18524 }
1c79356b 18525
b0d623f7
A
18526 if (control == VM_PURGABLE_PURGE_ALL) {
18527 vm_purgeable_object_purge_all();
18528 return KERN_SUCCESS;
18529 }
18530
5ba3f43e 18531 if ((control == VM_PURGABLE_SET_STATE ||
0a7de745 18532 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
b0d623f7 18533 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
0a7de745
A
18534 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
18535 return KERN_INVALID_ARGUMENT;
18536 }
91447636 18537
b0d623f7 18538 vm_map_lock_read(map);
91447636
A
18539
18540 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
91447636
A
18541 /*
18542 * Must pass a valid non-submap address.
18543 */
b0d623f7 18544 vm_map_unlock_read(map);
0a7de745 18545 return KERN_INVALID_ADDRESS;
91447636
A
18546 }
18547
18548 if ((entry->protection & VM_PROT_WRITE) == 0) {
18549 /*
18550 * Can't apply purgable controls to something you can't write.
18551 */
b0d623f7 18552 vm_map_unlock_read(map);
0a7de745 18553 return KERN_PROTECTION_FAILURE;
91447636
A
18554 }
18555
3e170ce0 18556 object = VME_OBJECT(entry);
fe8ab488
A
18557 if (object == VM_OBJECT_NULL ||
18558 object->purgable == VM_PURGABLE_DENY) {
91447636 18559 /*
fe8ab488 18560 * Object must already be present and be purgeable.
91447636 18561 */
b0d623f7 18562 vm_map_unlock_read(map);
91447636
A
18563 return KERN_INVALID_ARGUMENT;
18564 }
5ba3f43e 18565
91447636
A
18566 vm_object_lock(object);
18567
39236c6e 18568#if 00
5ba3f43e 18569 if (VME_OFFSET(entry) != 0 ||
6d2010ae 18570 entry->vme_end - entry->vme_start != object->vo_size) {
91447636
A
18571 /*
18572 * Can only apply purgable controls to the whole (existing)
18573 * object at once.
18574 */
b0d623f7 18575 vm_map_unlock_read(map);
91447636
A
18576 vm_object_unlock(object);
18577 return KERN_INVALID_ARGUMENT;
1c79356b 18578 }
39236c6e 18579#endif
fe8ab488
A
18580
18581 assert(!entry->is_sub_map);
18582 assert(!entry->use_pmap); /* purgeable has its own accounting */
18583
b0d623f7 18584 vm_map_unlock_read(map);
1c79356b 18585
fe8ab488
A
18586 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
18587
91447636 18588 kr = vm_object_purgable_control(object, control, state);
1c79356b 18589
fe8ab488
A
18590 if (was_nonvolatile &&
18591 object->purgable != VM_PURGABLE_NONVOLATILE &&
18592 map->pmap == kernel_pmap) {
18593#if DEBUG
18594 object->vo_purgeable_volatilizer = kernel_task;
18595#endif /* DEBUG */
18596 }
18597
91447636 18598 vm_object_unlock(object);
1c79356b 18599
91447636
A
18600 return kr;
18601}
1c79356b 18602
f427ee49
A
18603void
18604vm_map_footprint_query_page_info(
18605 vm_map_t map,
18606 vm_map_entry_t map_entry,
18607 vm_map_offset_t curr_s_offset,
18608 int *disposition_p)
18609{
18610 int pmap_disp;
18611 vm_object_t object;
18612 int disposition;
18613 int effective_page_size;
18614
18615 vm_map_lock_assert_held(map);
18616 assert(!map->has_corpse_footprint);
18617 assert(curr_s_offset >= map_entry->vme_start);
18618 assert(curr_s_offset < map_entry->vme_end);
18619
18620 object = VME_OBJECT(map_entry);
18621 if (object == VM_OBJECT_NULL) {
18622 *disposition_p = 0;
18623 return;
18624 }
18625
18626 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
18627
18628 pmap_disp = 0;
18629 if (object == VM_OBJECT_NULL) {
18630 /* nothing mapped here: no need to ask */
18631 *disposition_p = 0;
18632 return;
18633 } else if (map_entry->is_sub_map &&
18634 !map_entry->use_pmap) {
18635 /* nested pmap: no footprint */
18636 *disposition_p = 0;
18637 return;
18638 }
18639
18640 /*
18641 * Query the pmap.
18642 */
18643 pmap_query_page_info(map->pmap, curr_s_offset, &pmap_disp);
18644
18645 /*
18646 * Compute this page's disposition.
18647 */
18648 disposition = 0;
18649
18650 /* deal with "alternate accounting" first */
18651 if (!map_entry->is_sub_map &&
18652 object->vo_no_footprint) {
18653 /* does not count in footprint */
18654 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18655 } else if (!map_entry->is_sub_map &&
18656 (object->purgable == VM_PURGABLE_NONVOLATILE ||
18657 (object->purgable == VM_PURGABLE_DENY &&
18658 object->vo_ledger_tag)) &&
18659 VM_OBJECT_OWNER(object) != NULL &&
18660 VM_OBJECT_OWNER(object)->map == map) {
18661 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18662 if ((((curr_s_offset
18663 - map_entry->vme_start
18664 + VME_OFFSET(map_entry))
18665 / effective_page_size) <
18666 (object->resident_page_count +
18667 vm_compressor_pager_get_count(object->pager)))) {
18668 /*
18669 * Non-volatile purgeable object owned
18670 * by this task: report the first
18671 * "#resident + #compressed" pages as
18672 * "resident" (to show that they
18673 * contribute to the footprint) but not
18674 * "dirty" (to avoid double-counting
18675 * with the fake "non-volatile" region
18676 * we'll report at the end of the
18677 * address space to account for all
18678 * (mapped or not) non-volatile memory
18679 * owned by this task.
18680 */
18681 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18682 }
18683 } else if (!map_entry->is_sub_map &&
18684 (object->purgable == VM_PURGABLE_VOLATILE ||
18685 object->purgable == VM_PURGABLE_EMPTY) &&
18686 VM_OBJECT_OWNER(object) != NULL &&
18687 VM_OBJECT_OWNER(object)->map == map) {
18688 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18689 if ((((curr_s_offset
18690 - map_entry->vme_start
18691 + VME_OFFSET(map_entry))
18692 / effective_page_size) <
18693 object->wired_page_count)) {
18694 /*
18695 * Volatile|empty purgeable object owned
18696 * by this task: report the first
18697 * "#wired" pages as "resident" (to
18698 * show that they contribute to the
18699 * footprint) but not "dirty" (to avoid
18700 * double-counting with the fake
18701 * "non-volatile" region we'll report
18702 * at the end of the address space to
18703 * account for all (mapped or not)
18704 * non-volatile memory owned by this
18705 * task.
18706 */
18707 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18708 }
18709 } else if (!map_entry->is_sub_map &&
18710 map_entry->iokit_acct &&
18711 object->internal &&
18712 object->purgable == VM_PURGABLE_DENY) {
18713 /*
18714 * Non-purgeable IOKit memory: phys_footprint
18715 * includes the entire virtual mapping.
18716 */
18717 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18718 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18719 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18720 } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
18721 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
18722 /* alternate accounting */
18723#if (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG)
18724 if (map->pmap->footprint_was_suspended) {
18725 /*
18726 * The assertion below can fail if dyld
18727 * suspended footprint accounting
18728 * while doing some adjustments to
18729 * this page; the mapping would say
18730 * "use pmap accounting" but the page
18731 * would be marked "alternate
18732 * accounting".
18733 */
18734 } else
18735#endif /* (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG) */
18736 {
18737 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18738 }
18739 disposition = 0;
18740 } else {
18741 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
18742 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18743 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18744 disposition |= VM_PAGE_QUERY_PAGE_REF;
18745 if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
18746 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18747 } else {
18748 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
18749 }
18750 if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
18751 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
18752 }
18753 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
18754 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18755 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
18756 }
18757 }
18758
18759 *disposition_p = disposition;
18760}
18761
91447636 18762kern_return_t
b0d623f7 18763vm_map_page_query_internal(
0a7de745
A
18764 vm_map_t target_map,
18765 vm_map_offset_t offset,
18766 int *disposition,
18767 int *ref_count)
91447636 18768{
0a7de745
A
18769 kern_return_t kr;
18770 vm_page_info_basic_data_t info;
18771 mach_msg_type_number_t count;
b0d623f7
A
18772
18773 count = VM_PAGE_INFO_BASIC_COUNT;
18774 kr = vm_map_page_info(target_map,
0a7de745
A
18775 offset,
18776 VM_PAGE_INFO_BASIC,
18777 (vm_page_info_t) &info,
18778 &count);
b0d623f7
A
18779 if (kr == KERN_SUCCESS) {
18780 *disposition = info.disposition;
18781 *ref_count = info.ref_count;
18782 } else {
18783 *disposition = 0;
18784 *ref_count = 0;
18785 }
2d21ac55 18786
b0d623f7
A
18787 return kr;
18788}
5ba3f43e 18789
b0d623f7
A
18790kern_return_t
18791vm_map_page_info(
0a7de745
A
18792 vm_map_t map,
18793 vm_map_offset_t offset,
18794 vm_page_info_flavor_t flavor,
18795 vm_page_info_t info,
18796 mach_msg_type_number_t *count)
5ba3f43e 18797{
0a7de745 18798 return vm_map_page_range_info_internal(map,
f427ee49
A
18799 offset, /* start of range */
18800 (offset + 1), /* this will get rounded in the call to the page boundary */
18801 (int)-1, /* effective_page_shift: unspecified */
0a7de745
A
18802 flavor,
18803 info,
18804 count);
5ba3f43e
A
18805}
18806
18807kern_return_t
18808vm_map_page_range_info_internal(
0a7de745
A
18809 vm_map_t map,
18810 vm_map_offset_t start_offset,
18811 vm_map_offset_t end_offset,
f427ee49 18812 int effective_page_shift,
0a7de745
A
18813 vm_page_info_flavor_t flavor,
18814 vm_page_info_t info,
18815 mach_msg_type_number_t *count)
b0d623f7 18816{
0a7de745
A
18817 vm_map_entry_t map_entry = VM_MAP_ENTRY_NULL;
18818 vm_object_t object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
18819 vm_page_t m = VM_PAGE_NULL;
18820 kern_return_t retval = KERN_SUCCESS;
18821 int disposition = 0;
18822 int ref_count = 0;
18823 int depth = 0, info_idx = 0;
18824 vm_page_info_basic_t basic_info = 0;
18825 vm_map_offset_t offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
18826 vm_map_offset_t start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
18827 boolean_t do_region_footprint;
cb323159 18828 ledger_amount_t ledger_resident, ledger_compressed;
f427ee49
A
18829 int effective_page_size;
18830 vm_map_offset_t effective_page_mask;
2d21ac55 18831
b0d623f7
A
18832 switch (flavor) {
18833 case VM_PAGE_INFO_BASIC:
18834 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
6d2010ae
A
18835 /*
18836 * The "vm_page_info_basic_data" structure was not
18837 * properly padded, so allow the size to be off by
18838 * one to maintain backwards binary compatibility...
18839 */
0a7de745 18840 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
6d2010ae 18841 return KERN_INVALID_ARGUMENT;
0a7de745 18842 }
b0d623f7
A
18843 }
18844 break;
18845 default:
18846 return KERN_INVALID_ARGUMENT;
91447636 18847 }
2d21ac55 18848
f427ee49
A
18849 if (effective_page_shift == -1) {
18850 effective_page_shift = vm_self_region_page_shift_safely(map);
18851 if (effective_page_shift == -1) {
18852 return KERN_INVALID_ARGUMENT;
18853 }
18854 }
18855 effective_page_size = (1 << effective_page_shift);
18856 effective_page_mask = effective_page_size - 1;
18857
a39ff7e2 18858 do_region_footprint = task_self_region_footprint();
b0d623f7
A
18859 disposition = 0;
18860 ref_count = 0;
b0d623f7 18861 depth = 0;
5ba3f43e 18862 info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
b0d623f7 18863 retval = KERN_SUCCESS;
5ba3f43e 18864
f427ee49
A
18865 offset_in_page = start_offset & effective_page_mask;
18866 start = vm_map_trunc_page(start_offset, effective_page_mask);
18867 end = vm_map_round_page(end_offset, effective_page_mask);
5ba3f43e 18868
0a7de745
A
18869 if (end < start) {
18870 return KERN_INVALID_ARGUMENT;
18871 }
18872
18873 assert((end - start) <= MAX_PAGE_RANGE_QUERY);
b0d623f7
A
18874
18875 vm_map_lock_read(map);
18876
cb323159
A
18877 task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
18878
5ba3f43e
A
18879 for (curr_s_offset = start; curr_s_offset < end;) {
18880 /*
18881 * New lookup needs reset of these variables.
18882 */
18883 curr_object = object = VM_OBJECT_NULL;
18884 offset_in_object = 0;
18885 ref_count = 0;
18886 depth = 0;
18887
a39ff7e2
A
18888 if (do_region_footprint &&
18889 curr_s_offset >= vm_map_last_entry(map)->vme_end) {
a39ff7e2
A
18890 /*
18891 * Request for "footprint" info about a page beyond
18892 * the end of address space: this must be for
18893 * the fake region vm_map_region_recurse_64()
18894 * reported to account for non-volatile purgeable
18895 * memory owned by this task.
18896 */
18897 disposition = 0;
cb323159 18898
a39ff7e2 18899 if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
cb323159 18900 (unsigned) ledger_compressed) {
a39ff7e2
A
18901 /*
18902 * We haven't reported all the "non-volatile
18903 * compressed" pages yet, so report this fake
18904 * page as "compressed".
18905 */
18906 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
18907 } else {
18908 /*
18909 * We've reported all the non-volatile
18910 * compressed page but not all the non-volatile
18911 * pages , so report this fake page as
18912 * "resident dirty".
18913 */
18914 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18915 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18916 disposition |= VM_PAGE_QUERY_PAGE_REF;
18917 }
18918 switch (flavor) {
18919 case VM_PAGE_INFO_BASIC:
18920 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
18921 basic_info->disposition = disposition;
18922 basic_info->ref_count = 1;
f427ee49 18923 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
a39ff7e2
A
18924 basic_info->offset = 0;
18925 basic_info->depth = 0;
18926
18927 info_idx++;
18928 break;
18929 }
f427ee49 18930 curr_s_offset += effective_page_size;
a39ff7e2
A
18931 continue;
18932 }
18933
5ba3f43e
A
18934 /*
18935 * First, find the map entry covering "curr_s_offset", going down
18936 * submaps if necessary.
18937 */
18938 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
18939 /* no entry -> no object -> no page */
18940
18941 if (curr_s_offset < vm_map_min(map)) {
18942 /*
18943 * Illegal address that falls below map min.
18944 */
18945 curr_e_offset = MIN(end, vm_map_min(map));
5ba3f43e
A
18946 } else if (curr_s_offset >= vm_map_max(map)) {
18947 /*
18948 * Illegal address that falls on/after map max.
18949 */
18950 curr_e_offset = end;
5ba3f43e
A
18951 } else if (map_entry == vm_map_to_entry(map)) {
18952 /*
18953 * Hit a hole.
18954 */
18955 if (map_entry->vme_next == vm_map_to_entry(map)) {
18956 /*
18957 * Empty map.
18958 */
18959 curr_e_offset = MIN(map->max_offset, end);
18960 } else {
18961 /*
0a7de745
A
18962 * Hole at start of the map.
18963 */
5ba3f43e
A
18964 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
18965 }
18966 } else {
18967 if (map_entry->vme_next == vm_map_to_entry(map)) {
18968 /*
18969 * Hole at the end of the map.
18970 */
18971 curr_e_offset = MIN(map->max_offset, end);
18972 } else {
18973 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
18974 }
18975 }
18976
18977 assert(curr_e_offset >= curr_s_offset);
18978
f427ee49 18979 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
5ba3f43e
A
18980
18981 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
18982
18983 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
18984
18985 curr_s_offset = curr_e_offset;
18986
18987 info_idx += num_pages;
18988
18989 continue;
b0d623f7 18990 }
5ba3f43e 18991
b0d623f7 18992 /* compute offset from this map entry's start */
5ba3f43e
A
18993 offset_in_object = curr_s_offset - map_entry->vme_start;
18994
b0d623f7 18995 /* compute offset into this map entry's object (or submap) */
5ba3f43e 18996 offset_in_object += VME_OFFSET(map_entry);
b0d623f7
A
18997
18998 if (map_entry->is_sub_map) {
5ba3f43e
A
18999 vm_map_t sub_map = VM_MAP_NULL;
19000 vm_page_info_t submap_info = 0;
19001 vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
19002
19003 range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
19004
19005 submap_s_offset = offset_in_object;
19006 submap_e_offset = submap_s_offset + range_len;
2d21ac55 19007
3e170ce0 19008 sub_map = VME_SUBMAP(map_entry);
5ba3f43e
A
19009
19010 vm_map_reference(sub_map);
b0d623f7 19011 vm_map_unlock_read(map);
2d21ac55 19012
5ba3f43e
A
19013 submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19014
f427ee49
A
19015 assertf(VM_MAP_PAGE_SHIFT(sub_map) >= VM_MAP_PAGE_SHIFT(map),
19016 "Submap page size (%d) differs from current map (%d)\n", VM_MAP_PAGE_SIZE(sub_map), VM_MAP_PAGE_SIZE(map));
19017
5ba3f43e 19018 retval = vm_map_page_range_info_internal(sub_map,
0a7de745
A
19019 submap_s_offset,
19020 submap_e_offset,
f427ee49 19021 effective_page_shift,
0a7de745
A
19022 VM_PAGE_INFO_BASIC,
19023 (vm_page_info_t) submap_info,
19024 count);
5ba3f43e
A
19025
19026 assert(retval == KERN_SUCCESS);
19027
19028 vm_map_lock_read(map);
19029 vm_map_deallocate(sub_map);
19030
19031 /* Move the "info" index by the number of pages we inspected.*/
f427ee49 19032 info_idx += range_len >> effective_page_shift;
5ba3f43e
A
19033
19034 /* Move our current offset by the size of the range we inspected.*/
19035 curr_s_offset += range_len;
b0d623f7 19036
b0d623f7 19037 continue;
1c79356b 19038 }
b0d623f7 19039
5ba3f43e 19040 object = VME_OBJECT(map_entry);
f427ee49 19041
5ba3f43e 19042 if (object == VM_OBJECT_NULL) {
5ba3f43e
A
19043 /*
19044 * We don't have an object here and, hence,
19045 * no pages to inspect. We'll fill up the
19046 * info structure appropriately.
19047 */
19048
19049 curr_e_offset = MIN(map_entry->vme_end, end);
19050
f427ee49 19051 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
5ba3f43e
A
19052
19053 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19054
19055 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19056
19057 curr_s_offset = curr_e_offset;
19058
19059 info_idx += num_pages;
19060
19061 continue;
19062 }
19063
a39ff7e2 19064 if (do_region_footprint) {
a39ff7e2 19065 disposition = 0;
d9a64523
A
19066 if (map->has_corpse_footprint) {
19067 /*
19068 * Query the page info data we saved
19069 * while forking the corpse.
19070 */
19071 vm_map_corpse_footprint_query_page_info(
19072 map,
19073 curr_s_offset,
f427ee49 19074 &disposition);
d9a64523
A
19075 } else {
19076 /*
f427ee49
A
19077 * Query the live pmap for footprint info
19078 * about this page.
a39ff7e2 19079 */
f427ee49
A
19080 vm_map_footprint_query_page_info(
19081 map,
19082 map_entry,
19083 curr_s_offset,
19084 &disposition);
a39ff7e2
A
19085 }
19086 switch (flavor) {
19087 case VM_PAGE_INFO_BASIC:
19088 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19089 basic_info->disposition = disposition;
19090 basic_info->ref_count = 1;
f427ee49 19091 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
a39ff7e2
A
19092 basic_info->offset = 0;
19093 basic_info->depth = 0;
19094
19095 info_idx++;
19096 break;
19097 }
f427ee49 19098 curr_s_offset += effective_page_size;
a39ff7e2
A
19099 continue;
19100 }
19101
5ba3f43e
A
19102 vm_object_reference(object);
19103 /*
19104 * Shared mode -- so we can allow other readers
19105 * to grab the lock too.
19106 */
19107 vm_object_lock_shared(object);
19108
19109 curr_e_offset = MIN(map_entry->vme_end, end);
19110
b0d623f7 19111 vm_map_unlock_read(map);
b0d623f7 19112
5ba3f43e 19113 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
b0d623f7 19114
5ba3f43e 19115 curr_object = object;
2d21ac55 19116
5ba3f43e 19117 for (; curr_s_offset < curr_e_offset;) {
5ba3f43e
A
19118 if (object == curr_object) {
19119 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
19120 } else {
19121 ref_count = curr_object->ref_count;
19122 }
19123
19124 curr_offset_in_object = offset_in_object;
19125
19126 for (;;) {
f427ee49 19127 m = vm_page_lookup(curr_object, vm_object_trunc_page(curr_offset_in_object));
5ba3f43e
A
19128
19129 if (m != VM_PAGE_NULL) {
5ba3f43e 19130 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
39037602 19131 break;
5ba3f43e
A
19132 } else {
19133 if (curr_object->internal &&
19134 curr_object->alive &&
19135 !curr_object->terminating &&
19136 curr_object->pager_ready) {
f427ee49 19137 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, vm_object_trunc_page(curr_offset_in_object))
5ba3f43e
A
19138 == VM_EXTERNAL_STATE_EXISTS) {
19139 /* the pager has that page */
19140 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19141 break;
19142 }
19143 }
0a7de745 19144
5ba3f43e
A
19145 /*
19146 * Go down the VM object shadow chain until we find the page
19147 * we're looking for.
19148 */
19149
19150 if (curr_object->shadow != VM_OBJECT_NULL) {
19151 vm_object_t shadow = VM_OBJECT_NULL;
19152
19153 curr_offset_in_object += curr_object->vo_shadow_offset;
19154 shadow = curr_object->shadow;
19155
19156 vm_object_lock_shared(shadow);
19157 vm_object_unlock(curr_object);
19158
19159 curr_object = shadow;
19160 depth++;
19161 continue;
19162 } else {
5ba3f43e
A
19163 break;
19164 }
2d21ac55
A
19165 }
19166 }
b0d623f7 19167
5ba3f43e
A
19168 /* The ref_count is not strictly accurate, it measures the number */
19169 /* of entities holding a ref on the object, they may not be mapping */
19170 /* the object or may not be mapping the section holding the */
19171 /* target page but its still a ball park number and though an over- */
19172 /* count, it picks up the copy-on-write cases */
2d21ac55 19173
5ba3f43e
A
19174 /* We could also get a picture of page sharing from pmap_attributes */
19175 /* but this would under count as only faulted-in mappings would */
19176 /* show up. */
2d21ac55 19177
0a7de745 19178 if ((curr_object == object) && curr_object->shadow) {
5ba3f43e 19179 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
0a7de745 19180 }
5ba3f43e 19181
0a7de745 19182 if (!curr_object->internal) {
5ba3f43e 19183 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
0a7de745 19184 }
5ba3f43e
A
19185
19186 if (m != VM_PAGE_NULL) {
d9a64523 19187 if (m->vmp_fictitious) {
5ba3f43e 19188 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
5ba3f43e 19189 } else {
0a7de745 19190 if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
5ba3f43e 19191 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
0a7de745 19192 }
5ba3f43e 19193
0a7de745 19194 if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
5ba3f43e 19195 disposition |= VM_PAGE_QUERY_PAGE_REF;
0a7de745 19196 }
5ba3f43e 19197
0a7de745 19198 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
5ba3f43e 19199 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
0a7de745 19200 }
5ba3f43e 19201
f427ee49
A
19202 /*
19203 * XXX TODO4K:
19204 * when this routine deals with 4k
19205 * pages, check the appropriate CS bit
19206 * here.
19207 */
0a7de745 19208 if (m->vmp_cs_validated) {
5ba3f43e 19209 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
0a7de745
A
19210 }
19211 if (m->vmp_cs_tainted) {
5ba3f43e 19212 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
0a7de745
A
19213 }
19214 if (m->vmp_cs_nx) {
5ba3f43e 19215 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
0a7de745 19216 }
cb323159
A
19217 if (m->vmp_reusable || curr_object->all_reusable) {
19218 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
19219 }
5ba3f43e 19220 }
91447636 19221 }
1c79356b 19222
5ba3f43e
A
19223 switch (flavor) {
19224 case VM_PAGE_INFO_BASIC:
19225 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19226 basic_info->disposition = disposition;
19227 basic_info->ref_count = ref_count;
19228 basic_info->object_id = (vm_object_id_t) (uintptr_t)
0a7de745 19229 VM_KERNEL_ADDRPERM(curr_object);
5ba3f43e 19230 basic_info->offset =
0a7de745 19231 (memory_object_offset_t) curr_offset_in_object + offset_in_page;
5ba3f43e
A
19232 basic_info->depth = depth;
19233
19234 info_idx++;
19235 break;
19236 }
1c79356b 19237
5ba3f43e
A
19238 disposition = 0;
19239 offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
b0d623f7 19240
5ba3f43e
A
19241 /*
19242 * Move to next offset in the range and in our object.
19243 */
f427ee49
A
19244 curr_s_offset += effective_page_size;
19245 offset_in_object += effective_page_size;
5ba3f43e 19246 curr_offset_in_object = offset_in_object;
2d21ac55 19247
5ba3f43e 19248 if (curr_object != object) {
5ba3f43e 19249 vm_object_unlock(curr_object);
1c79356b 19250
5ba3f43e 19251 curr_object = object;
1c79356b 19252
5ba3f43e
A
19253 vm_object_lock_shared(curr_object);
19254 } else {
5ba3f43e
A
19255 vm_object_lock_yield_shared(curr_object);
19256 }
19257 }
593a1d5f 19258
5ba3f43e
A
19259 vm_object_unlock(curr_object);
19260 vm_object_deallocate(curr_object);
b0d623f7 19261
5ba3f43e 19262 vm_map_lock_read(map);
b0d623f7 19263 }
0c530ab8 19264
5ba3f43e 19265 vm_map_unlock_read(map);
2d21ac55 19266 return retval;
91447636
A
19267}
19268
19269/*
19270 * vm_map_msync
19271 *
19272 * Synchronises the memory range specified with its backing store
19273 * image by either flushing or cleaning the contents to the appropriate
19274 * memory manager engaging in a memory object synchronize dialog with
19275 * the manager. The client doesn't return until the manager issues
19276 * m_o_s_completed message. MIG Magically converts user task parameter
19277 * to the task's address map.
19278 *
19279 * interpretation of sync_flags
19280 * VM_SYNC_INVALIDATE - discard pages, only return precious
19281 * pages to manager.
19282 *
19283 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
19284 * - discard pages, write dirty or precious
19285 * pages back to memory manager.
19286 *
19287 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
19288 * - write dirty or precious pages back to
19289 * the memory manager.
19290 *
19291 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
19292 * is a hole in the region, and we would
19293 * have returned KERN_SUCCESS, return
19294 * KERN_INVALID_ADDRESS instead.
19295 *
19296 * NOTE
19297 * The memory object attributes have not yet been implemented, this
19298 * function will have to deal with the invalidate attribute
19299 *
19300 * RETURNS
19301 * KERN_INVALID_TASK Bad task parameter
19302 * KERN_INVALID_ARGUMENT both sync and async were specified.
19303 * KERN_SUCCESS The usual.
19304 * KERN_INVALID_ADDRESS There was a hole in the region.
19305 */
19306
19307kern_return_t
19308vm_map_msync(
0a7de745
A
19309 vm_map_t map,
19310 vm_map_address_t address,
19311 vm_map_size_t size,
19312 vm_sync_t sync_flags)
91447636 19313{
0a7de745
A
19314 vm_map_entry_t entry;
19315 vm_map_size_t amount_left;
19316 vm_object_offset_t offset;
f427ee49 19317 vm_object_offset_t start_offset, end_offset;
0a7de745
A
19318 boolean_t do_sync_req;
19319 boolean_t had_hole = FALSE;
19320 vm_map_offset_t pmap_offset;
5ba3f43e 19321
91447636 19322 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
0a7de745
A
19323 (sync_flags & VM_SYNC_SYNCHRONOUS)) {
19324 return KERN_INVALID_ARGUMENT;
19325 }
1c79356b 19326
f427ee49
A
19327 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19328 DEBUG4K_SHARE("map %p address 0x%llx size 0x%llx flags 0x%x\n", map, (uint64_t)address, (uint64_t)size, sync_flags);
19329 }
19330
1c79356b 19331 /*
91447636 19332 * align address and size on page boundaries
1c79356b 19333 */
39236c6e 19334 size = (vm_map_round_page(address + size,
0a7de745
A
19335 VM_MAP_PAGE_MASK(map)) -
19336 vm_map_trunc_page(address,
19337 VM_MAP_PAGE_MASK(map)));
39236c6e 19338 address = vm_map_trunc_page(address,
0a7de745 19339 VM_MAP_PAGE_MASK(map));
1c79356b 19340
0a7de745
A
19341 if (map == VM_MAP_NULL) {
19342 return KERN_INVALID_TASK;
19343 }
1c79356b 19344
0a7de745
A
19345 if (size == 0) {
19346 return KERN_SUCCESS;
19347 }
1c79356b 19348
91447636 19349 amount_left = size;
1c79356b 19350
91447636 19351 while (amount_left > 0) {
0a7de745
A
19352 vm_object_size_t flush_size;
19353 vm_object_t object;
1c79356b 19354
91447636
A
19355 vm_map_lock(map);
19356 if (!vm_map_lookup_entry(map,
0a7de745
A
19357 address,
19358 &entry)) {
19359 vm_map_size_t skip;
91447636
A
19360
19361 /*
19362 * hole in the address map.
19363 */
19364 had_hole = TRUE;
19365
39037602
A
19366 if (sync_flags & VM_SYNC_KILLPAGES) {
19367 /*
19368 * For VM_SYNC_KILLPAGES, there should be
19369 * no holes in the range, since we couldn't
19370 * prevent someone else from allocating in
19371 * that hole and we wouldn't want to "kill"
19372 * their pages.
19373 */
19374 vm_map_unlock(map);
19375 break;
19376 }
19377
91447636
A
19378 /*
19379 * Check for empty map.
19380 */
19381 if (entry == vm_map_to_entry(map) &&
19382 entry->vme_next == entry) {
19383 vm_map_unlock(map);
19384 break;
19385 }
19386 /*
19387 * Check that we don't wrap and that
19388 * we have at least one real map entry.
19389 */
19390 if ((map->hdr.nentries == 0) ||
19391 (entry->vme_next->vme_start < address)) {
19392 vm_map_unlock(map);
19393 break;
19394 }
19395 /*
19396 * Move up to the next entry if needed
19397 */
19398 skip = (entry->vme_next->vme_start - address);
0a7de745 19399 if (skip >= amount_left) {
91447636 19400 amount_left = 0;
0a7de745 19401 } else {
91447636 19402 amount_left -= skip;
0a7de745 19403 }
91447636
A
19404 address = entry->vme_next->vme_start;
19405 vm_map_unlock(map);
19406 continue;
19407 }
1c79356b 19408
91447636 19409 offset = address - entry->vme_start;
3e170ce0 19410 pmap_offset = address;
1c79356b 19411
91447636
A
19412 /*
19413 * do we have more to flush than is contained in this
19414 * entry ?
19415 */
19416 if (amount_left + entry->vme_start + offset > entry->vme_end) {
19417 flush_size = entry->vme_end -
0a7de745 19418 (entry->vme_start + offset);
91447636
A
19419 } else {
19420 flush_size = amount_left;
19421 }
19422 amount_left -= flush_size;
19423 address += flush_size;
1c79356b 19424
91447636 19425 if (entry->is_sub_map == TRUE) {
0a7de745
A
19426 vm_map_t local_map;
19427 vm_map_offset_t local_offset;
1c79356b 19428
3e170ce0
A
19429 local_map = VME_SUBMAP(entry);
19430 local_offset = VME_OFFSET(entry);
94ff46dc 19431 vm_map_reference(local_map);
91447636
A
19432 vm_map_unlock(map);
19433 if (vm_map_msync(
2d21ac55
A
19434 local_map,
19435 local_offset,
19436 flush_size,
19437 sync_flags) == KERN_INVALID_ADDRESS) {
91447636
A
19438 had_hole = TRUE;
19439 }
94ff46dc 19440 vm_map_deallocate(local_map);
91447636
A
19441 continue;
19442 }
3e170ce0 19443 object = VME_OBJECT(entry);
1c79356b 19444
91447636
A
19445 /*
19446 * We can't sync this object if the object has not been
19447 * created yet
19448 */
19449 if (object == VM_OBJECT_NULL) {
19450 vm_map_unlock(map);
19451 continue;
19452 }
3e170ce0 19453 offset += VME_OFFSET(entry);
1c79356b 19454
0a7de745 19455 vm_object_lock(object);
1c79356b 19456
91447636 19457 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
0a7de745 19458 int kill_pages = 0;
b0d623f7 19459 boolean_t reusable_pages = FALSE;
91447636 19460
f427ee49
A
19461 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19462 /*
19463 * This is a destructive operation and so we
19464 * err on the side of limiting the range of
19465 * the operation.
19466 */
19467 start_offset = vm_object_round_page(offset);
19468 end_offset = vm_object_trunc_page(offset + flush_size);
19469
19470 if (end_offset <= start_offset) {
19471 vm_object_unlock(object);
19472 vm_map_unlock(map);
19473 continue;
19474 }
19475
19476 pmap_offset += start_offset - offset;;
19477 } else {
19478 start_offset = offset;
19479 end_offset = offset + flush_size;
19480 }
19481
91447636 19482 if (sync_flags & VM_SYNC_KILLPAGES) {
0a7de745
A
19483 if (((object->ref_count == 1) ||
19484 ((object->copy_strategy !=
19485 MEMORY_OBJECT_COPY_SYMMETRIC) &&
19486 (object->copy == VM_OBJECT_NULL))) &&
39037602
A
19487 (object->shadow == VM_OBJECT_NULL)) {
19488 if (object->ref_count != 1) {
19489 vm_page_stats_reusable.free_shared++;
19490 }
0a7de745 19491 kill_pages = 1;
39037602 19492 } else {
0a7de745 19493 kill_pages = -1;
39037602 19494 }
91447636 19495 }
0a7de745
A
19496 if (kill_pages != -1) {
19497 vm_object_deactivate_pages(
3e170ce0 19498 object,
f427ee49
A
19499 start_offset,
19500 (vm_object_size_t) (end_offset - start_offset),
3e170ce0
A
19501 kill_pages,
19502 reusable_pages,
19503 map->pmap,
19504 pmap_offset);
0a7de745 19505 }
91447636
A
19506 vm_object_unlock(object);
19507 vm_map_unlock(map);
19508 continue;
1c79356b 19509 }
91447636
A
19510 /*
19511 * We can't sync this object if there isn't a pager.
19512 * Don't bother to sync internal objects, since there can't
19513 * be any "permanent" storage for these objects anyway.
19514 */
19515 if ((object->pager == MEMORY_OBJECT_NULL) ||
19516 (object->internal) || (object->private)) {
19517 vm_object_unlock(object);
19518 vm_map_unlock(map);
19519 continue;
19520 }
19521 /*
19522 * keep reference on the object until syncing is done
19523 */
2d21ac55 19524 vm_object_reference_locked(object);
91447636 19525 vm_object_unlock(object);
1c79356b 19526
91447636 19527 vm_map_unlock(map);
1c79356b 19528
f427ee49
A
19529 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19530 start_offset = vm_object_trunc_page(offset);
19531 end_offset = vm_object_round_page(offset + flush_size);
19532 } else {
19533 start_offset = offset;
19534 end_offset = offset + flush_size;
19535 }
19536
91447636 19537 do_sync_req = vm_object_sync(object,
f427ee49
A
19538 start_offset,
19539 (end_offset - start_offset),
0a7de745
A
19540 sync_flags & VM_SYNC_INVALIDATE,
19541 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
19542 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
19543 sync_flags & VM_SYNC_SYNCHRONOUS);
2d21ac55 19544
5ba3f43e 19545 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
0a7de745 19546 /*
5ba3f43e
A
19547 * clear out the clustering and read-ahead hints
19548 */
0a7de745 19549 vm_object_lock(object);
2d21ac55 19550
5ba3f43e
A
19551 object->pages_created = 0;
19552 object->pages_used = 0;
19553 object->sequential = 0;
19554 object->last_alloc = 0;
2d21ac55 19555
2d21ac55 19556 vm_object_unlock(object);
2d21ac55 19557 }
5ba3f43e
A
19558 vm_object_deallocate(object);
19559 } /* while */
91447636
A
19560
19561 /* for proper msync() behaviour */
0a7de745
A
19562 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
19563 return KERN_INVALID_ADDRESS;
19564 }
91447636 19565
0a7de745 19566 return KERN_SUCCESS;
91447636 19567}/* vm_msync */
1c79356b 19568
f427ee49
A
19569kern_return_t
19570vm_named_entry_from_vm_object(
19571 vm_named_entry_t named_entry,
19572 vm_object_t object,
19573 vm_object_offset_t offset,
19574 vm_object_size_t size,
19575 vm_prot_t prot)
19576{
19577 vm_map_copy_t copy;
19578 vm_map_entry_t copy_entry;
19579
19580 assert(!named_entry->is_sub_map);
19581 assert(!named_entry->is_copy);
19582 assert(!named_entry->is_object);
19583 assert(!named_entry->internal);
19584 assert(named_entry->backing.copy == VM_MAP_COPY_NULL);
19585
19586 copy = vm_map_copy_allocate();
19587 copy->type = VM_MAP_COPY_ENTRY_LIST;
19588 copy->offset = offset;
19589 copy->size = size;
19590 copy->cpy_hdr.page_shift = PAGE_SHIFT;
19591 vm_map_store_init(&copy->cpy_hdr);
19592
19593 copy_entry = vm_map_copy_entry_create(copy, FALSE);
19594 copy_entry->protection = prot;
19595 copy_entry->max_protection = prot;
19596 copy_entry->use_pmap = TRUE;
19597 copy_entry->vme_start = VM_MAP_TRUNC_PAGE(offset, PAGE_MASK);
19598 copy_entry->vme_end = VM_MAP_ROUND_PAGE(offset + size, PAGE_MASK);
19599 VME_OBJECT_SET(copy_entry, object);
19600 VME_OFFSET_SET(copy_entry, vm_object_trunc_page(offset));
19601 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), copy_entry);
19602
19603 named_entry->backing.copy = copy;
19604 named_entry->is_object = TRUE;
19605 if (object->internal) {
19606 named_entry->internal = TRUE;
19607 }
19608
19609 DEBUG4K_MEMENTRY("named_entry %p copy %p object %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, object, offset, size, prot);
19610
19611 return KERN_SUCCESS;
19612}
19613
19614vm_object_t
19615vm_named_entry_to_vm_object(
19616 vm_named_entry_t named_entry)
19617{
19618 vm_map_copy_t copy;
19619 vm_map_entry_t copy_entry;
19620 vm_object_t object;
19621
19622 assert(!named_entry->is_sub_map);
19623 assert(!named_entry->is_copy);
19624 assert(named_entry->is_object);
19625 copy = named_entry->backing.copy;
19626 assert(copy != VM_MAP_COPY_NULL);
19627 assert(copy->cpy_hdr.nentries == 1);
19628 copy_entry = vm_map_copy_first_entry(copy);
19629 assert(!copy_entry->is_sub_map);
19630 object = VME_OBJECT(copy_entry);
19631
19632 DEBUG4K_MEMENTRY("%p -> %p -> %p [0x%llx 0x%llx 0x%llx 0x%x/0x%x ] -> %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, copy_entry, (uint64_t)copy_entry->vme_start, (uint64_t)copy_entry->vme_end, copy_entry->vme_offset, copy_entry->protection, copy_entry->max_protection, object, named_entry->offset, named_entry->size, named_entry->protection);
19633
19634 return object;
19635}
19636
1c79356b 19637/*
91447636
A
19638 * Routine: convert_port_entry_to_map
19639 * Purpose:
19640 * Convert from a port specifying an entry or a task
19641 * to a map. Doesn't consume the port ref; produces a map ref,
19642 * which may be null. Unlike convert_port_to_map, the
19643 * port may be task or a named entry backed.
19644 * Conditions:
19645 * Nothing locked.
1c79356b 19646 */
1c79356b 19647
1c79356b 19648
91447636
A
19649vm_map_t
19650convert_port_entry_to_map(
0a7de745 19651 ipc_port_t port)
91447636
A
19652{
19653 vm_map_t map;
0a7de745
A
19654 vm_named_entry_t named_entry;
19655 uint32_t try_failed_count = 0;
1c79356b 19656
0a7de745
A
19657 if (IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
19658 while (TRUE) {
91447636 19659 ip_lock(port);
0a7de745
A
19660 if (ip_active(port) && (ip_kotype(port)
19661 == IKOT_NAMED_ENTRY)) {
91447636 19662 named_entry =
ea3f0419 19663 (vm_named_entry_t) ip_get_kobject(port);
b0d623f7 19664 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
0a7de745 19665 ip_unlock(port);
2d21ac55
A
19666
19667 try_failed_count++;
0a7de745
A
19668 mutex_pause(try_failed_count);
19669 continue;
19670 }
91447636 19671 named_entry->ref_count++;
b0d623f7 19672 lck_mtx_unlock(&(named_entry)->Lock);
91447636
A
19673 ip_unlock(port);
19674 if ((named_entry->is_sub_map) &&
5ba3f43e 19675 (named_entry->protection
0a7de745 19676 & VM_PROT_WRITE)) {
91447636 19677 map = named_entry->backing.map;
f427ee49
A
19678 if (map->pmap != PMAP_NULL) {
19679 if (map->pmap == kernel_pmap) {
19680 panic("userspace has access "
19681 "to a kernel map %p", map);
19682 }
19683 pmap_require(map->pmap);
19684 }
91447636
A
19685 } else {
19686 mach_destroy_memory_entry(port);
19687 return VM_MAP_NULL;
19688 }
c3c9b80d 19689 vm_map_reference(map);
91447636
A
19690 mach_destroy_memory_entry(port);
19691 break;
0a7de745 19692 } else {
91447636 19693 return VM_MAP_NULL;
0a7de745 19694 }
91447636 19695 }
0a7de745 19696 } else {
91447636 19697 map = convert_port_to_map(port);
0a7de745 19698 }
1c79356b 19699
91447636
A
19700 return map;
19701}
1c79356b 19702
91447636
A
19703/*
19704 * Routine: convert_port_entry_to_object
19705 * Purpose:
19706 * Convert from a port specifying a named entry to an
19707 * object. Doesn't consume the port ref; produces a map ref,
5ba3f43e 19708 * which may be null.
91447636
A
19709 * Conditions:
19710 * Nothing locked.
19711 */
1c79356b 19712
1c79356b 19713
91447636
A
19714vm_object_t
19715convert_port_entry_to_object(
0a7de745 19716 ipc_port_t port)
91447636 19717{
0a7de745
A
19718 vm_object_t object = VM_OBJECT_NULL;
19719 vm_named_entry_t named_entry;
19720 uint32_t try_failed_count = 0;
39236c6e
A
19721
19722 if (IP_VALID(port) &&
19723 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
0a7de745 19724try_again:
39236c6e
A
19725 ip_lock(port);
19726 if (ip_active(port) &&
19727 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
ea3f0419 19728 named_entry = (vm_named_entry_t) ip_get_kobject(port);
39236c6e 19729 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
91447636 19730 ip_unlock(port);
39236c6e
A
19731 try_failed_count++;
19732 mutex_pause(try_failed_count);
0a7de745 19733 goto try_again;
39236c6e
A
19734 }
19735 named_entry->ref_count++;
19736 lck_mtx_unlock(&(named_entry)->Lock);
19737 ip_unlock(port);
19738 if (!(named_entry->is_sub_map) &&
39236c6e 19739 !(named_entry->is_copy) &&
f427ee49 19740 (named_entry->is_object) &&
39236c6e 19741 (named_entry->protection & VM_PROT_WRITE)) {
f427ee49
A
19742 vm_map_copy_t copy;
19743 vm_map_entry_t copy_entry;
19744
19745 copy = named_entry->backing.copy;
19746 assert(copy->cpy_hdr.nentries == 1);
19747 copy_entry = vm_map_copy_first_entry(copy);
19748 assert(!copy_entry->is_sub_map);
19749 object = VME_OBJECT(copy_entry);
19750 assert(object != VM_OBJECT_NULL);
39236c6e 19751 vm_object_reference(object);
91447636 19752 }
39236c6e 19753 mach_destroy_memory_entry(port);
1c79356b 19754 }
1c79356b 19755 }
91447636
A
19756
19757 return object;
1c79356b 19758}
9bccf70c
A
19759
19760/*
91447636
A
19761 * Export routines to other components for the things we access locally through
19762 * macros.
9bccf70c 19763 */
91447636
A
19764#undef current_map
19765vm_map_t
19766current_map(void)
9bccf70c 19767{
0a7de745 19768 return current_map_fast();
9bccf70c
A
19769}
19770
19771/*
19772 * vm_map_reference:
19773 *
c3c9b80d 19774 * Takes a reference on the specified map.
9bccf70c 19775 */
9bccf70c
A
19776void
19777vm_map_reference(
0a7de745 19778 vm_map_t map)
9bccf70c 19779{
c3c9b80d
A
19780 if (__probable(map != VM_MAP_NULL)) {
19781 vm_map_require(map);
19782 os_ref_retain(&map->map_refcnt);
0a7de745 19783 }
9bccf70c
A
19784}
19785
19786/*
19787 * vm_map_deallocate:
19788 *
19789 * Removes a reference from the specified map,
19790 * destroying it if no references remain.
19791 * The map should not be locked.
19792 */
19793void
19794vm_map_deallocate(
0a7de745 19795 vm_map_t map)
9bccf70c 19796{
c3c9b80d
A
19797 if (__probable(map != VM_MAP_NULL)) {
19798 vm_map_require(map);
19799 if (os_ref_release(&map->map_refcnt) == 0) {
19800 vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
19801 }
9bccf70c 19802 }
0c530ab8 19803}
91447636 19804
f427ee49
A
19805void
19806vm_map_inspect_deallocate(
19807 vm_map_inspect_t map)
19808{
19809 vm_map_deallocate((vm_map_t)map);
19810}
19811
19812void
19813vm_map_read_deallocate(
19814 vm_map_read_t map)
19815{
19816 vm_map_deallocate((vm_map_t)map);
19817}
19818
91447636 19819
0c530ab8
A
19820void
19821vm_map_disable_NX(vm_map_t map)
19822{
0a7de745
A
19823 if (map == NULL) {
19824 return;
19825 }
19826 if (map->pmap == NULL) {
19827 return;
19828 }
0c530ab8 19829
0a7de745 19830 pmap_disable_NX(map->pmap);
0c530ab8
A
19831}
19832
6d2010ae
A
19833void
19834vm_map_disallow_data_exec(vm_map_t map)
19835{
0a7de745
A
19836 if (map == NULL) {
19837 return;
19838 }
6d2010ae 19839
0a7de745 19840 map->map_disallow_data_exec = TRUE;
6d2010ae
A
19841}
19842
0c530ab8
A
19843/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
19844 * more descriptive.
19845 */
19846void
19847vm_map_set_32bit(vm_map_t map)
19848{
5ba3f43e
A
19849#if defined(__arm__) || defined(__arm64__)
19850 map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
19851#else
0c530ab8 19852 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
5ba3f43e 19853#endif
0c530ab8
A
19854}
19855
19856
19857void
19858vm_map_set_64bit(vm_map_t map)
19859{
5ba3f43e
A
19860#if defined(__arm__) || defined(__arm64__)
19861 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
19862#else
0c530ab8 19863 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
5ba3f43e 19864#endif
0c530ab8
A
19865}
19866
813fb2f6 19867/*
d9a64523 19868 * Expand the maximum size of an existing map to the maximum supported.
813fb2f6
A
19869 */
19870void
19871vm_map_set_jumbo(vm_map_t map)
19872{
f427ee49 19873#if defined (__arm64__) && !defined(CONFIG_ARROW)
d9a64523
A
19874 vm_map_set_max_addr(map, ~0);
19875#else /* arm64 */
19876 (void) map;
19877#endif
19878}
19879
cb323159
A
19880/*
19881 * This map has a JIT entitlement
19882 */
19883void
19884vm_map_set_jit_entitled(vm_map_t map)
19885{
19886#if defined (__arm64__)
19887 pmap_set_jit_entitled(map->pmap);
19888#else /* arm64 */
19889 (void) map;
19890#endif
19891}
19892
d9a64523
A
19893/*
19894 * Expand the maximum size of an existing map.
19895 */
19896void
19897vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
19898{
19899#if defined(__arm64__)
19900 vm_map_offset_t max_supported_offset = 0;
5ba3f43e 19901 vm_map_offset_t old_max_offset = map->max_offset;
d9a64523
A
19902 max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
19903
19904 new_max_offset = trunc_page(new_max_offset);
19905
19906 /* The address space cannot be shrunk using this routine. */
19907 if (old_max_offset >= new_max_offset) {
19908 return;
19909 }
19910
19911 if (max_supported_offset < new_max_offset) {
19912 new_max_offset = max_supported_offset;
19913 }
19914
19915 map->max_offset = new_max_offset;
19916
19917 if (map->holes_list->prev->vme_end == old_max_offset) {
5ba3f43e
A
19918 /*
19919 * There is already a hole at the end of the map; simply make it bigger.
19920 */
19921 map->holes_list->prev->vme_end = map->max_offset;
19922 } else {
19923 /*
19924 * There is no hole at the end, so we need to create a new hole
19925 * for the new empty space we're creating.
19926 */
19927 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
19928 new_hole->start = old_max_offset;
19929 new_hole->end = map->max_offset;
19930 new_hole->prev = map->holes_list->prev;
19931 new_hole->next = (struct vm_map_entry *)map->holes_list;
19932 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
19933 map->holes_list->prev = (struct vm_map_entry *)new_hole;
19934 }
d9a64523
A
19935#else
19936 (void)map;
19937 (void)new_max_offset;
5ba3f43e 19938#endif
813fb2f6
A
19939}
19940
0c530ab8 19941vm_map_offset_t
3e170ce0 19942vm_compute_max_offset(boolean_t is64)
0c530ab8 19943{
5ba3f43e 19944#if defined(__arm__) || defined(__arm64__)
0a7de745 19945 return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
5ba3f43e 19946#else
0a7de745 19947 return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
5ba3f43e
A
19948#endif
19949}
19950
19951void
19952vm_map_get_max_aslr_slide_section(
0a7de745
A
19953 vm_map_t map __unused,
19954 int64_t *max_sections,
19955 int64_t *section_size)
5ba3f43e
A
19956{
19957#if defined(__arm64__)
19958 *max_sections = 3;
19959 *section_size = ARM_TT_TWIG_SIZE;
19960#else
19961 *max_sections = 1;
19962 *section_size = 0;
19963#endif
0c530ab8
A
19964}
19965
39236c6e 19966uint64_t
5ba3f43e 19967vm_map_get_max_aslr_slide_pages(vm_map_t map)
39236c6e 19968{
5ba3f43e
A
19969#if defined(__arm64__)
19970 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
19971 * limited embedded address space; this is also meant to minimize pmap
19972 * memory usage on 16KB page systems.
19973 */
0a7de745 19974 return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
5ba3f43e 19975#else
0a7de745 19976 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
5ba3f43e
A
19977#endif
19978}
19979
19980uint64_t
19981vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
19982{
19983#if defined(__arm64__)
19984 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
19985 * of independent entropy on 16KB page systems.
19986 */
0a7de745 19987 return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
5ba3f43e 19988#else
0a7de745 19989 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
5ba3f43e 19990#endif
39236c6e
A
19991}
19992
0a7de745 19993#ifndef __arm__
0c530ab8 19994boolean_t
2d21ac55 19995vm_map_is_64bit(
0a7de745 19996 vm_map_t map)
2d21ac55
A
19997{
19998 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
19999}
5ba3f43e 20000#endif
2d21ac55
A
20001
20002boolean_t
316670eb 20003vm_map_has_hard_pagezero(
0a7de745
A
20004 vm_map_t map,
20005 vm_map_offset_t pagezero_size)
0c530ab8
A
20006{
20007 /*
20008 * XXX FBDP
20009 * We should lock the VM map (for read) here but we can get away
20010 * with it for now because there can't really be any race condition:
20011 * the VM map's min_offset is changed only when the VM map is created
20012 * and when the zero page is established (when the binary gets loaded),
20013 * and this routine gets called only when the task terminates and the
20014 * VM map is being torn down, and when a new map is created via
20015 * load_machfile()/execve().
20016 */
0a7de745 20017 return map->min_offset >= pagezero_size;
0c530ab8
A
20018}
20019
316670eb
A
20020/*
20021 * Raise a VM map's maximun offset.
20022 */
20023kern_return_t
20024vm_map_raise_max_offset(
0a7de745
A
20025 vm_map_t map,
20026 vm_map_offset_t new_max_offset)
316670eb 20027{
0a7de745 20028 kern_return_t ret;
316670eb
A
20029
20030 vm_map_lock(map);
20031 ret = KERN_INVALID_ADDRESS;
20032
20033 if (new_max_offset >= map->max_offset) {
5ba3f43e 20034 if (!vm_map_is_64bit(map)) {
316670eb
A
20035 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
20036 map->max_offset = new_max_offset;
20037 ret = KERN_SUCCESS;
20038 }
20039 } else {
20040 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
20041 map->max_offset = new_max_offset;
20042 ret = KERN_SUCCESS;
20043 }
20044 }
20045 }
20046
20047 vm_map_unlock(map);
20048 return ret;
20049}
20050
20051
0c530ab8
A
20052/*
20053 * Raise a VM map's minimum offset.
20054 * To strictly enforce "page zero" reservation.
20055 */
20056kern_return_t
20057vm_map_raise_min_offset(
0a7de745
A
20058 vm_map_t map,
20059 vm_map_offset_t new_min_offset)
0c530ab8 20060{
0a7de745 20061 vm_map_entry_t first_entry;
0c530ab8 20062
39236c6e 20063 new_min_offset = vm_map_round_page(new_min_offset,
0a7de745 20064 VM_MAP_PAGE_MASK(map));
0c530ab8
A
20065
20066 vm_map_lock(map);
20067
20068 if (new_min_offset < map->min_offset) {
20069 /*
20070 * Can't move min_offset backwards, as that would expose
20071 * a part of the address space that was previously, and for
20072 * possibly good reasons, inaccessible.
20073 */
20074 vm_map_unlock(map);
20075 return KERN_INVALID_ADDRESS;
20076 }
3e170ce0
A
20077 if (new_min_offset >= map->max_offset) {
20078 /* can't go beyond the end of the address space */
20079 vm_map_unlock(map);
20080 return KERN_INVALID_ADDRESS;
20081 }
0c530ab8
A
20082
20083 first_entry = vm_map_first_entry(map);
20084 if (first_entry != vm_map_to_entry(map) &&
20085 first_entry->vme_start < new_min_offset) {
20086 /*
20087 * Some memory was already allocated below the new
20088 * minimun offset. It's too late to change it now...
20089 */
20090 vm_map_unlock(map);
20091 return KERN_NO_SPACE;
20092 }
20093
20094 map->min_offset = new_min_offset;
20095
3e170ce0
A
20096 assert(map->holes_list);
20097 map->holes_list->start = new_min_offset;
20098 assert(new_min_offset < map->holes_list->end);
20099
0c530ab8
A
20100 vm_map_unlock(map);
20101
20102 return KERN_SUCCESS;
20103}
2d21ac55
A
20104
20105/*
20106 * Set the limit on the maximum amount of user wired memory allowed for this map.
20107 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
20108 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
20109 * don't have to reach over to the BSD data structures.
20110 */
20111
20112void
0a7de745
A
20113vm_map_set_user_wire_limit(vm_map_t map,
20114 vm_size_t limit)
2d21ac55
A
20115{
20116 map->user_wire_limit = limit;
20117}
593a1d5f 20118
b0d623f7 20119
0a7de745
A
20120void
20121vm_map_switch_protect(vm_map_t map,
20122 boolean_t val)
593a1d5f
A
20123{
20124 vm_map_lock(map);
0a7de745 20125 map->switch_protect = val;
593a1d5f 20126 vm_map_unlock(map);
b0d623f7 20127}
b7266188 20128
f427ee49
A
20129extern int cs_process_enforcement_enable;
20130boolean_t
20131vm_map_cs_enforcement(
20132 vm_map_t map)
20133{
20134 if (cs_process_enforcement_enable) {
20135 return TRUE;
20136 }
20137 return map->cs_enforcement;
20138}
20139
a991bd8d
A
20140kern_return_t
20141vm_map_cs_wx_enable(
20142 vm_map_t map)
20143{
20144 return pmap_cs_allow_invalid(vm_map_pmap(map));
20145}
20146
f427ee49
A
20147void
20148vm_map_cs_enforcement_set(
20149 vm_map_t map,
20150 boolean_t val)
20151{
20152 vm_map_lock(map);
20153 map->cs_enforcement = val;
20154 pmap_set_vm_map_cs_enforced(map->pmap, val);
20155 vm_map_unlock(map);
20156}
20157
39236c6e
A
20158/*
20159 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
20160 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
20161 * bump both counters.
20162 */
20163void
20164vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
20165{
20166 pmap_t pmap = vm_map_pmap(map);
20167
fe8ab488 20168 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
5ba3f43e 20169 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
39236c6e
A
20170}
20171
20172void
20173vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
20174{
20175 pmap_t pmap = vm_map_pmap(map);
20176
fe8ab488 20177 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
5ba3f43e 20178 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
39236c6e
A
20179}
20180
b7266188
A
20181/* Add (generate) code signature for memory range */
20182#if CONFIG_DYNAMIC_CODE_SIGNING
0a7de745
A
20183kern_return_t
20184vm_map_sign(vm_map_t map,
20185 vm_map_offset_t start,
20186 vm_map_offset_t end)
b7266188
A
20187{
20188 vm_map_entry_t entry;
20189 vm_page_t m;
20190 vm_object_t object;
5ba3f43e 20191
b7266188
A
20192 /*
20193 * Vet all the input parameters and current type and state of the
20194 * underlaying object. Return with an error if anything is amiss.
20195 */
0a7de745
A
20196 if (map == VM_MAP_NULL) {
20197 return KERN_INVALID_ARGUMENT;
20198 }
5ba3f43e 20199
b7266188 20200 vm_map_lock_read(map);
5ba3f43e 20201
b7266188
A
20202 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
20203 /*
20204 * Must pass a valid non-submap address.
20205 */
20206 vm_map_unlock_read(map);
0a7de745 20207 return KERN_INVALID_ADDRESS;
b7266188 20208 }
5ba3f43e 20209
0a7de745 20210 if ((entry->vme_start > start) || (entry->vme_end < end)) {
b7266188
A
20211 /*
20212 * Map entry doesn't cover the requested range. Not handling
20213 * this situation currently.
20214 */
20215 vm_map_unlock_read(map);
0a7de745 20216 return KERN_INVALID_ARGUMENT;
b7266188 20217 }
5ba3f43e 20218
3e170ce0 20219 object = VME_OBJECT(entry);
b7266188
A
20220 if (object == VM_OBJECT_NULL) {
20221 /*
20222 * Object must already be present or we can't sign.
20223 */
20224 vm_map_unlock_read(map);
20225 return KERN_INVALID_ARGUMENT;
20226 }
5ba3f43e 20227
b7266188
A
20228 vm_object_lock(object);
20229 vm_map_unlock_read(map);
5ba3f43e 20230
0a7de745 20231 while (start < end) {
b7266188 20232 uint32_t refmod;
5ba3f43e 20233
3e170ce0 20234 m = vm_page_lookup(object,
0a7de745
A
20235 start - entry->vme_start + VME_OFFSET(entry));
20236 if (m == VM_PAGE_NULL) {
5ba3f43e 20237 /* shoud we try to fault a page here? we can probably
b7266188
A
20238 * demand it exists and is locked for this request */
20239 vm_object_unlock(object);
20240 return KERN_FAILURE;
20241 }
20242 /* deal with special page status */
d9a64523
A
20243 if (m->vmp_busy ||
20244 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
b7266188
A
20245 vm_object_unlock(object);
20246 return KERN_FAILURE;
20247 }
5ba3f43e 20248
b7266188 20249 /* Page is OK... now "validate" it */
5ba3f43e 20250 /* This is the place where we'll call out to create a code
b7266188 20251 * directory, later */
f427ee49
A
20252 /* XXX TODO4K: deal with 4k subpages individually? */
20253 m->vmp_cs_validated = VMP_CS_ALL_TRUE;
b7266188
A
20254
20255 /* The page is now "clean" for codesigning purposes. That means
5ba3f43e 20256 * we don't consider it as modified (wpmapped) anymore. But
b7266188
A
20257 * we'll disconnect the page so we note any future modification
20258 * attempts. */
d9a64523 20259 m->vmp_wpmapped = FALSE;
39037602 20260 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
5ba3f43e
A
20261
20262 /* Pull the dirty status from the pmap, since we cleared the
b7266188 20263 * wpmapped bit */
d9a64523 20264 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
316670eb 20265 SET_PAGE_DIRTY(m, FALSE);
b7266188 20266 }
5ba3f43e 20267
b7266188
A
20268 /* On to the next page */
20269 start += PAGE_SIZE;
20270 }
20271 vm_object_unlock(object);
5ba3f43e 20272
b7266188
A
20273 return KERN_SUCCESS;
20274}
20275#endif
6d2010ae 20276
0a7de745
A
20277kern_return_t
20278vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
5ba3f43e 20279{
0a7de745 20280 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
fe8ab488 20281 vm_map_entry_t next_entry;
0a7de745
A
20282 kern_return_t kr = KERN_SUCCESS;
20283 vm_map_t zap_map;
fe8ab488
A
20284
20285 vm_map_lock(map);
20286
20287 /*
20288 * We use a "zap_map" to avoid having to unlock
20289 * the "map" in vm_map_delete().
20290 */
20291 zap_map = vm_map_create(PMAP_NULL,
0a7de745
A
20292 map->min_offset,
20293 map->max_offset,
20294 map->hdr.entries_pageable);
fe8ab488
A
20295
20296 if (zap_map == VM_MAP_NULL) {
20297 return KERN_RESOURCE_SHORTAGE;
20298 }
20299
5ba3f43e 20300 vm_map_set_page_shift(zap_map,
0a7de745 20301 VM_MAP_PAGE_SHIFT(map));
3e170ce0 20302 vm_map_disable_hole_optimization(zap_map);
fe8ab488
A
20303
20304 for (entry = vm_map_first_entry(map);
0a7de745
A
20305 entry != vm_map_to_entry(map);
20306 entry = next_entry) {
fe8ab488 20307 next_entry = entry->vme_next;
5ba3f43e 20308
3e170ce0
A
20309 if (VME_OBJECT(entry) &&
20310 !entry->is_sub_map &&
20311 (VME_OBJECT(entry)->internal == TRUE) &&
20312 (VME_OBJECT(entry)->ref_count == 1)) {
3e170ce0
A
20313 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
20314 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
fe8ab488 20315
5ba3f43e 20316 (void)vm_map_delete(map,
0a7de745
A
20317 entry->vme_start,
20318 entry->vme_end,
20319 VM_MAP_REMOVE_SAVE_ENTRIES,
20320 zap_map);
fe8ab488
A
20321 }
20322 }
20323
20324 vm_map_unlock(map);
20325
0a7de745 20326 /*
fe8ab488 20327 * Get rid of the "zap_maps" and all the map entries that
0a7de745
A
20328 * they may still contain.
20329 */
20330 if (zap_map != VM_MAP_NULL) {
20331 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
20332 zap_map = VM_MAP_NULL;
20333 }
fe8ab488
A
20334
20335 return kr;
20336}
20337
6d2010ae 20338
39037602
A
20339#if DEVELOPMENT || DEBUG
20340
20341int
20342vm_map_disconnect_page_mappings(
20343 vm_map_t map,
20344 boolean_t do_unnest)
6d2010ae
A
20345{
20346 vm_map_entry_t entry;
0a7de745 20347 int page_count = 0;
39037602
A
20348
20349 if (do_unnest == TRUE) {
20350#ifndef NO_NESTED_PMAP
20351 vm_map_lock(map);
20352
20353 for (entry = vm_map_first_entry(map);
0a7de745
A
20354 entry != vm_map_to_entry(map);
20355 entry = entry->vme_next) {
39037602
A
20356 if (entry->is_sub_map && entry->use_pmap) {
20357 /*
20358 * Make sure the range between the start of this entry and
20359 * the end of this entry is no longer nested, so that
20360 * we will only remove mappings from the pmap in use by this
20361 * this task
20362 */
20363 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
20364 }
20365 }
20366 vm_map_unlock(map);
20367#endif
20368 }
6d2010ae 20369 vm_map_lock_read(map);
39037602
A
20370
20371 page_count = map->pmap->stats.resident_count;
20372
6d2010ae 20373 for (entry = vm_map_first_entry(map);
0a7de745
A
20374 entry != vm_map_to_entry(map);
20375 entry = entry->vme_next) {
39037602 20376 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
0a7de745 20377 (VME_OBJECT(entry)->phys_contiguous))) {
6d2010ae
A
20378 continue;
20379 }
0a7de745 20380 if (entry->is_sub_map) {
39037602 20381 assert(!entry->use_pmap);
0a7de745 20382 }
6d2010ae 20383
39037602 20384 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
6d2010ae 20385 }
6d2010ae
A
20386 vm_map_unlock_read(map);
20387
39037602 20388 return page_count;
6d2010ae
A
20389}
20390
f427ee49
A
20391kern_return_t
20392vm_map_inject_error(vm_map_t map, vm_map_offset_t vaddr)
20393{
20394 vm_object_t object = NULL;
20395 vm_object_offset_t offset;
20396 vm_prot_t prot;
20397 boolean_t wired;
20398 vm_map_version_t version;
20399 vm_map_t real_map;
20400 int result = KERN_FAILURE;
20401
20402 vaddr = vm_map_trunc_page(vaddr, PAGE_MASK);
20403 vm_map_lock(map);
20404
20405 result = vm_map_lookup_locked(&map, vaddr, VM_PROT_READ,
20406 OBJECT_LOCK_EXCLUSIVE, &version, &object, &offset, &prot, &wired,
20407 NULL, &real_map, NULL);
20408 if (object == NULL) {
20409 result = KERN_MEMORY_ERROR;
20410 } else if (object->pager) {
20411 result = vm_compressor_pager_inject_error(object->pager,
20412 offset);
20413 } else {
20414 result = KERN_MEMORY_PRESENT;
20415 }
20416
20417 if (object != NULL) {
20418 vm_object_unlock(object);
20419 }
20420
20421 if (real_map != map) {
20422 vm_map_unlock(real_map);
20423 }
20424 vm_map_unlock(map);
20425
20426 return result;
20427}
20428
39037602
A
20429#endif
20430
20431
20432#if CONFIG_FREEZE
20433
20434
f427ee49 20435extern struct freezer_context freezer_context_global;
3e170ce0
A
20436AbsoluteTime c_freezer_last_yield_ts = 0;
20437
d9a64523
A
20438extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
20439extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
20440
20441kern_return_t
20442vm_map_freeze(
cb323159 20443 task_t task,
0a7de745
A
20444 unsigned int *purgeable_count,
20445 unsigned int *wired_count,
20446 unsigned int *clean_count,
20447 unsigned int *dirty_count,
cb323159 20448 unsigned int dirty_budget,
0a7de745
A
20449 unsigned int *shared_count,
20450 int *freezer_error_code,
20451 boolean_t eval_only)
5ba3f43e 20452{
0a7de745
A
20453 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
20454 kern_return_t kr = KERN_SUCCESS;
20455 boolean_t evaluation_phase = TRUE;
20456 vm_object_t cur_shared_object = NULL;
20457 int cur_shared_obj_ref_cnt = 0;
20458 unsigned int dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
6d2010ae 20459
d9a64523 20460 *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
6d2010ae 20461
6d2010ae
A
20462 /*
20463 * We need the exclusive lock here so that we can
20464 * block any page faults or lookups while we are
20465 * in the middle of freezing this vm map.
20466 */
cb323159
A
20467 vm_map_t map = task->map;
20468
6d2010ae
A
20469 vm_map_lock(map);
20470
39037602
A
20471 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
20472
20473 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
d9a64523
A
20474 if (vm_compressor_low_on_space()) {
20475 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20476 }
20477
20478 if (vm_swap_low_on_space()) {
20479 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20480 }
20481
39037602 20482 kr = KERN_NO_SPACE;
5ba3f43e 20483 goto done;
6d2010ae 20484 }
39037602 20485
d9a64523
A
20486 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
20487 /*
20488 * In-memory compressor backing the freezer. No disk.
20489 * So no need to do the evaluation phase.
20490 */
20491 evaluation_phase = FALSE;
20492
20493 if (eval_only == TRUE) {
20494 /*
20495 * We don't support 'eval_only' mode
20496 * in this non-swap config.
20497 */
20498 *freezer_error_code = FREEZER_ERROR_GENERIC;
20499 kr = KERN_INVALID_ARGUMENT;
20500 goto done;
20501 }
20502
f427ee49 20503 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
d9a64523
A
20504 clock_get_uptime(&c_freezer_last_yield_ts);
20505 }
20506again:
3e170ce0 20507
6d2010ae 20508 for (entry2 = vm_map_first_entry(map);
0a7de745
A
20509 entry2 != vm_map_to_entry(map);
20510 entry2 = entry2->vme_next) {
20511 vm_object_t src_object = VME_OBJECT(entry2);
6d2010ae 20512
39037602 20513 if (src_object &&
3e170ce0 20514 !entry2->is_sub_map &&
39037602 20515 !src_object->phys_contiguous) {
39236c6e 20516 /* If eligible, scan the entry, moving eligible pages over to our parent object */
6d2010ae 20517
39037602 20518 if (src_object->internal == TRUE) {
39037602 20519 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
cb323159
A
20520 /*
20521 * We skip purgeable objects during evaluation phase only.
20522 * If we decide to freeze this process, we'll explicitly
20523 * purge these objects before we go around again with
20524 * 'evaluation_phase' set to FALSE.
20525 */
20526
20527 if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
20528 /*
20529 * We want to purge objects that may not belong to this task but are mapped
20530 * in this task alone. Since we already purged this task's purgeable memory
20531 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
20532 * on this task's purgeable objects. Hence the check for only volatile objects.
20533 */
20534 if (evaluation_phase == FALSE &&
20535 (src_object->purgable == VM_PURGABLE_VOLATILE) &&
20536 (src_object->ref_count == 1)) {
20537 vm_object_lock(src_object);
20538 vm_object_purge(src_object, 0);
20539 vm_object_unlock(src_object);
20540 }
20541 continue;
20542 }
20543
39037602
A
20544 /*
20545 * Pages belonging to this object could be swapped to disk.
20546 * Make sure it's not a shared object because we could end
20547 * up just bringing it back in again.
d9a64523
A
20548 *
20549 * We try to optimize somewhat by checking for objects that are mapped
20550 * more than once within our own map. But we don't do full searches,
20551 * we just look at the entries following our current entry.
39037602 20552 */
cb323159 20553
39037602 20554 if (src_object->ref_count > 1) {
d9a64523
A
20555 if (src_object != cur_shared_object) {
20556 obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20557 dirty_shared_count += obj_pages_snapshot;
20558
20559 cur_shared_object = src_object;
20560 cur_shared_obj_ref_cnt = 1;
20561 continue;
20562 } else {
20563 cur_shared_obj_ref_cnt++;
20564 if (src_object->ref_count == cur_shared_obj_ref_cnt) {
20565 /*
20566 * Fall through to below and treat this object as private.
20567 * So deduct its pages from our shared total and add it to the
20568 * private total.
20569 */
20570
20571 dirty_shared_count -= obj_pages_snapshot;
20572 dirty_private_count += obj_pages_snapshot;
20573 } else {
20574 continue;
20575 }
20576 }
20577 }
20578
20579
20580 if (src_object->ref_count == 1) {
20581 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20582 }
20583
20584 if (evaluation_phase == TRUE) {
39037602 20585 continue;
3e170ce0 20586 }
3e170ce0 20587 }
d9a64523 20588
cb323159 20589 uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
d9a64523
A
20590 *wired_count += src_object->wired_page_count;
20591
3e170ce0 20592 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
d9a64523
A
20593 if (vm_compressor_low_on_space()) {
20594 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20595 }
20596
20597 if (vm_swap_low_on_space()) {
20598 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20599 }
20600
3e170ce0 20601 kr = KERN_NO_SPACE;
5ba3f43e 20602 break;
39236c6e 20603 }
cb323159
A
20604 if (paged_out_count >= dirty_budget) {
20605 break;
20606 }
20607 dirty_budget -= paged_out_count;
6d2010ae
A
20608 }
20609 }
20610 }
d9a64523 20611
ea3f0419 20612 *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
d9a64523 20613 if (evaluation_phase) {
d9a64523
A
20614 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
20615
20616 if (dirty_shared_count > shared_pages_threshold) {
20617 *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
20618 kr = KERN_FAILURE;
20619 goto done;
20620 }
20621
20622 if (dirty_shared_count &&
0a7de745 20623 ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
d9a64523
A
20624 *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
20625 kr = KERN_FAILURE;
20626 goto done;
20627 }
20628
20629 evaluation_phase = FALSE;
20630 dirty_shared_count = dirty_private_count = 0;
0a7de745 20631
f427ee49 20632 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
d9a64523
A
20633 clock_get_uptime(&c_freezer_last_yield_ts);
20634
20635 if (eval_only) {
20636 kr = KERN_SUCCESS;
20637 goto done;
20638 }
20639
cb323159
A
20640 vm_purgeable_purge_task_owned(task);
20641
d9a64523 20642 goto again;
d9a64523 20643 } else {
d9a64523 20644 kr = KERN_SUCCESS;
d9a64523
A
20645 }
20646
6d2010ae
A
20647done:
20648 vm_map_unlock(map);
5ba3f43e 20649
d9a64523
A
20650 if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
20651 vm_object_compressed_freezer_done();
3e170ce0 20652 }
6d2010ae
A
20653 return kr;
20654}
20655
6d2010ae 20656#endif
e2d2fc5c 20657
e2d2fc5c
A
20658/*
20659 * vm_map_entry_should_cow_for_true_share:
20660 *
20661 * Determines if the map entry should be clipped and setup for copy-on-write
20662 * to avoid applying "true_share" to a large VM object when only a subset is
20663 * targeted.
20664 *
20665 * For now, we target only the map entries created for the Objective C
20666 * Garbage Collector, which initially have the following properties:
20667 * - alias == VM_MEMORY_MALLOC
0a7de745
A
20668 * - wired_count == 0
20669 * - !needs_copy
e2d2fc5c 20670 * and a VM object with:
0a7de745
A
20671 * - internal
20672 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
20673 * - !true_share
20674 * - vo_size == ANON_CHUNK_SIZE
3e170ce0
A
20675 *
20676 * Only non-kernel map entries.
e2d2fc5c
A
20677 */
20678boolean_t
20679vm_map_entry_should_cow_for_true_share(
0a7de745 20680 vm_map_entry_t entry)
e2d2fc5c 20681{
0a7de745 20682 vm_object_t object;
e2d2fc5c
A
20683
20684 if (entry->is_sub_map) {
20685 /* entry does not point at a VM object */
20686 return FALSE;
20687 }
20688
20689 if (entry->needs_copy) {
20690 /* already set for copy_on_write: done! */
20691 return FALSE;
20692 }
20693
3e170ce0
A
20694 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
20695 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
fe8ab488 20696 /* not a malloc heap or Obj-C Garbage Collector heap */
e2d2fc5c
A
20697 return FALSE;
20698 }
20699
20700 if (entry->wired_count) {
20701 /* wired: can't change the map entry... */
fe8ab488 20702 vm_counters.should_cow_but_wired++;
e2d2fc5c
A
20703 return FALSE;
20704 }
20705
3e170ce0 20706 object = VME_OBJECT(entry);
e2d2fc5c
A
20707
20708 if (object == VM_OBJECT_NULL) {
20709 /* no object yet... */
20710 return FALSE;
20711 }
20712
20713 if (!object->internal) {
20714 /* not an internal object */
20715 return FALSE;
20716 }
20717
20718 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
20719 /* not the default copy strategy */
20720 return FALSE;
20721 }
20722
20723 if (object->true_share) {
20724 /* already true_share: too late to avoid it */
20725 return FALSE;
20726 }
20727
3e170ce0 20728 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
fe8ab488
A
20729 object->vo_size != ANON_CHUNK_SIZE) {
20730 /* ... not an object created for the ObjC Garbage Collector */
20731 return FALSE;
20732 }
20733
3e170ce0 20734 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
fe8ab488
A
20735 object->vo_size != 2048 * 4096) {
20736 /* ... not a "MALLOC_SMALL" heap */
e2d2fc5c
A
20737 return FALSE;
20738 }
20739
20740 /*
20741 * All the criteria match: we have a large object being targeted for "true_share".
20742 * To limit the adverse side-effects linked with "true_share", tell the caller to
20743 * try and avoid setting up the entire object for "true_share" by clipping the
20744 * targeted range and setting it up for copy-on-write.
20745 */
20746 return TRUE;
20747}
39236c6e 20748
5ba3f43e 20749vm_map_offset_t
39236c6e 20750vm_map_round_page_mask(
0a7de745
A
20751 vm_map_offset_t offset,
20752 vm_map_offset_t mask)
39236c6e
A
20753{
20754 return VM_MAP_ROUND_PAGE(offset, mask);
20755}
20756
5ba3f43e 20757vm_map_offset_t
39236c6e 20758vm_map_trunc_page_mask(
0a7de745
A
20759 vm_map_offset_t offset,
20760 vm_map_offset_t mask)
39236c6e
A
20761{
20762 return VM_MAP_TRUNC_PAGE(offset, mask);
20763}
20764
3e170ce0
A
20765boolean_t
20766vm_map_page_aligned(
0a7de745
A
20767 vm_map_offset_t offset,
20768 vm_map_offset_t mask)
3e170ce0
A
20769{
20770 return ((offset) & mask) == 0;
20771}
20772
39236c6e
A
20773int
20774vm_map_page_shift(
20775 vm_map_t map)
20776{
20777 return VM_MAP_PAGE_SHIFT(map);
20778}
20779
20780int
20781vm_map_page_size(
20782 vm_map_t map)
20783{
20784 return VM_MAP_PAGE_SIZE(map);
20785}
20786
3e170ce0 20787vm_map_offset_t
39236c6e
A
20788vm_map_page_mask(
20789 vm_map_t map)
20790{
20791 return VM_MAP_PAGE_MASK(map);
20792}
20793
20794kern_return_t
20795vm_map_set_page_shift(
0a7de745
A
20796 vm_map_t map,
20797 int pageshift)
39236c6e
A
20798{
20799 if (map->hdr.nentries != 0) {
20800 /* too late to change page size */
20801 return KERN_FAILURE;
20802 }
20803
20804 map->hdr.page_shift = pageshift;
20805
20806 return KERN_SUCCESS;
20807}
20808
20809kern_return_t
20810vm_map_query_volatile(
0a7de745
A
20811 vm_map_t map,
20812 mach_vm_size_t *volatile_virtual_size_p,
20813 mach_vm_size_t *volatile_resident_size_p,
20814 mach_vm_size_t *volatile_compressed_size_p,
20815 mach_vm_size_t *volatile_pmap_size_p,
20816 mach_vm_size_t *volatile_compressed_pmap_size_p)
39236c6e 20817{
0a7de745
A
20818 mach_vm_size_t volatile_virtual_size;
20819 mach_vm_size_t volatile_resident_count;
20820 mach_vm_size_t volatile_compressed_count;
20821 mach_vm_size_t volatile_pmap_count;
20822 mach_vm_size_t volatile_compressed_pmap_count;
20823 mach_vm_size_t resident_count;
20824 vm_map_entry_t entry;
20825 vm_object_t object;
39236c6e
A
20826
20827 /* map should be locked by caller */
20828
20829 volatile_virtual_size = 0;
20830 volatile_resident_count = 0;
3e170ce0 20831 volatile_compressed_count = 0;
39236c6e 20832 volatile_pmap_count = 0;
3e170ce0 20833 volatile_compressed_pmap_count = 0;
39236c6e
A
20834
20835 for (entry = vm_map_first_entry(map);
0a7de745
A
20836 entry != vm_map_to_entry(map);
20837 entry = entry->vme_next) {
20838 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
4bd07ac2 20839
39236c6e
A
20840 if (entry->is_sub_map) {
20841 continue;
20842 }
0a7de745 20843 if (!(entry->protection & VM_PROT_WRITE)) {
39236c6e
A
20844 continue;
20845 }
3e170ce0 20846 object = VME_OBJECT(entry);
39236c6e
A
20847 if (object == VM_OBJECT_NULL) {
20848 continue;
20849 }
3e170ce0
A
20850 if (object->purgable != VM_PURGABLE_VOLATILE &&
20851 object->purgable != VM_PURGABLE_EMPTY) {
39236c6e
A
20852 continue;
20853 }
3e170ce0 20854 if (VME_OFFSET(entry)) {
39236c6e
A
20855 /*
20856 * If the map entry has been split and the object now
20857 * appears several times in the VM map, we don't want
20858 * to count the object's resident_page_count more than
20859 * once. We count it only for the first one, starting
20860 * at offset 0 and ignore the other VM map entries.
20861 */
20862 continue;
20863 }
20864 resident_count = object->resident_page_count;
3e170ce0 20865 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
39236c6e
A
20866 resident_count = 0;
20867 } else {
3e170ce0 20868 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
39236c6e
A
20869 }
20870
20871 volatile_virtual_size += entry->vme_end - entry->vme_start;
20872 volatile_resident_count += resident_count;
3e170ce0
A
20873 if (object->pager) {
20874 volatile_compressed_count +=
0a7de745 20875 vm_compressor_pager_get_count(object->pager);
3e170ce0 20876 }
4bd07ac2
A
20877 pmap_compressed_bytes = 0;
20878 pmap_resident_bytes =
0a7de745
A
20879 pmap_query_resident(map->pmap,
20880 entry->vme_start,
20881 entry->vme_end,
20882 &pmap_compressed_bytes);
4bd07ac2
A
20883 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
20884 volatile_compressed_pmap_count += (pmap_compressed_bytes
0a7de745 20885 / PAGE_SIZE);
39236c6e
A
20886 }
20887
20888 /* map is still locked on return */
20889
20890 *volatile_virtual_size_p = volatile_virtual_size;
20891 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
3e170ce0 20892 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
39236c6e 20893 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
3e170ce0 20894 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
39236c6e
A
20895
20896 return KERN_SUCCESS;
20897}
fe8ab488 20898
3e170ce0
A
20899void
20900vm_map_sizes(vm_map_t map,
0a7de745
A
20901 vm_map_size_t * psize,
20902 vm_map_size_t * pfree,
20903 vm_map_size_t * plargest_free)
3e170ce0 20904{
0a7de745
A
20905 vm_map_entry_t entry;
20906 vm_map_offset_t prev;
20907 vm_map_size_t free, total_free, largest_free;
20908 boolean_t end;
20909
20910 if (!map) {
20911 *psize = *pfree = *plargest_free = 0;
20912 return;
20913 }
20914 total_free = largest_free = 0;
20915
20916 vm_map_lock_read(map);
20917 if (psize) {
20918 *psize = map->max_offset - map->min_offset;
20919 }
20920
20921 prev = map->min_offset;
20922 for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
20923 end = (entry == vm_map_to_entry(map));
20924
20925 if (end) {
20926 free = entry->vme_end - prev;
20927 } else {
20928 free = entry->vme_start - prev;
20929 }
20930
20931 total_free += free;
20932 if (free > largest_free) {
20933 largest_free = free;
20934 }
20935
20936 if (end) {
20937 break;
20938 }
20939 prev = entry->vme_end;
20940 }
20941 vm_map_unlock_read(map);
20942 if (pfree) {
20943 *pfree = total_free;
20944 }
20945 if (plargest_free) {
20946 *plargest_free = largest_free;
20947 }
3e170ce0
A
20948}
20949
fe8ab488
A
20950#if VM_SCAN_FOR_SHADOW_CHAIN
20951int vm_map_shadow_max(vm_map_t map);
0a7de745
A
20952int
20953vm_map_shadow_max(
fe8ab488
A
20954 vm_map_t map)
20955{
0a7de745
A
20956 int shadows, shadows_max;
20957 vm_map_entry_t entry;
20958 vm_object_t object, next_object;
fe8ab488 20959
0a7de745 20960 if (map == NULL) {
fe8ab488 20961 return 0;
0a7de745 20962 }
fe8ab488
A
20963
20964 shadows_max = 0;
20965
20966 vm_map_lock_read(map);
5ba3f43e 20967
fe8ab488 20968 for (entry = vm_map_first_entry(map);
0a7de745
A
20969 entry != vm_map_to_entry(map);
20970 entry = entry->vme_next) {
fe8ab488
A
20971 if (entry->is_sub_map) {
20972 continue;
20973 }
3e170ce0 20974 object = VME_OBJECT(entry);
fe8ab488
A
20975 if (object == NULL) {
20976 continue;
20977 }
20978 vm_object_lock_shared(object);
20979 for (shadows = 0;
0a7de745
A
20980 object->shadow != NULL;
20981 shadows++, object = next_object) {
fe8ab488
A
20982 next_object = object->shadow;
20983 vm_object_lock_shared(next_object);
20984 vm_object_unlock(object);
20985 }
20986 vm_object_unlock(object);
20987 if (shadows > shadows_max) {
20988 shadows_max = shadows;
20989 }
20990 }
20991
20992 vm_map_unlock_read(map);
20993
20994 return shadows_max;
20995}
20996#endif /* VM_SCAN_FOR_SHADOW_CHAIN */
39037602 20997
0a7de745
A
20998void
20999vm_commit_pagezero_status(vm_map_t lmap)
21000{
39037602
A
21001 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
21002}
5ba3f43e 21003
f427ee49 21004#if XNU_TARGET_OS_OSX
5ba3f43e
A
21005void
21006vm_map_set_high_start(
0a7de745
A
21007 vm_map_t map,
21008 vm_map_offset_t high_start)
5ba3f43e
A
21009{
21010 map->vmmap_high_start = high_start;
21011}
f427ee49 21012#endif /* XNU_TARGET_OS_OSX */
d9a64523 21013
d9a64523
A
21014
21015/*
21016 * FORKED CORPSE FOOTPRINT
21017 *
21018 * A forked corpse gets a copy of the original VM map but its pmap is mostly
21019 * empty since it never ran and never got to fault in any pages.
21020 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
21021 * a forked corpse would therefore return very little information.
21022 *
21023 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
21024 * to vm_map_fork() to collect footprint information from the original VM map
21025 * and its pmap, and store it in the forked corpse's VM map. That information
21026 * is stored in place of the VM map's "hole list" since we'll never need to
21027 * lookup for holes in the corpse's map.
21028 *
21029 * The corpse's footprint info looks like this:
21030 *
21031 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
21032 * as follows:
21033 * +---------------------------------------+
21034 * header-> | cf_size |
21035 * +-------------------+-------------------+
21036 * | cf_last_region | cf_last_zeroes |
21037 * +-------------------+-------------------+
21038 * region1-> | cfr_vaddr |
21039 * +-------------------+-------------------+
21040 * | cfr_num_pages | d0 | d1 | d2 | d3 |
21041 * +---------------------------------------+
21042 * | d4 | d5 | ... |
21043 * +---------------------------------------+
21044 * | ... |
21045 * +-------------------+-------------------+
21046 * | dy | dz | na | na | cfr_vaddr... | <-region2
21047 * +-------------------+-------------------+
21048 * | cfr_vaddr (ctd) | cfr_num_pages |
21049 * +---------------------------------------+
21050 * | d0 | d1 ... |
21051 * +---------------------------------------+
21052 * ...
21053 * +---------------------------------------+
21054 * last region-> | cfr_vaddr |
21055 * +---------------------------------------+
21056 * + cfr_num_pages | d0 | d1 | d2 | d3 |
21057 * +---------------------------------------+
21058 * ...
21059 * +---------------------------------------+
21060 * | dx | dy | dz | na | na | na | na | na |
21061 * +---------------------------------------+
21062 *
21063 * where:
0a7de745
A
21064 * cf_size: total size of the buffer (rounded to page size)
21065 * cf_last_region: offset in the buffer of the last "region" sub-header
d9a64523
A
21066 * cf_last_zeroes: number of trailing "zero" dispositions at the end
21067 * of last region
21068 * cfr_vaddr: virtual address of the start of the covered "region"
21069 * cfr_num_pages: number of pages in the covered "region"
21070 * d*: disposition of the page at that virtual address
21071 * Regions in the buffer are word-aligned.
21072 *
21073 * We estimate the size of the buffer based on the number of memory regions
21074 * and the virtual size of the address space. While copying each memory region
21075 * during vm_map_fork(), we also collect the footprint info for that region
21076 * and store it in the buffer, packing it as much as possible (coalescing
21077 * contiguous memory regions to avoid having too many region headers and
21078 * avoiding long streaks of "zero" page dispositions by splitting footprint
21079 * "regions", so the number of regions in the footprint buffer might not match
21080 * the number of memory regions in the address space.
21081 *
21082 * We also have to copy the original task's "nonvolatile" ledgers since that's
21083 * part of the footprint and will need to be reported to any tool asking for
21084 * the footprint information of the forked corpse.
21085 */
21086
21087uint64_t vm_map_corpse_footprint_count = 0;
21088uint64_t vm_map_corpse_footprint_size_avg = 0;
21089uint64_t vm_map_corpse_footprint_size_max = 0;
21090uint64_t vm_map_corpse_footprint_full = 0;
21091uint64_t vm_map_corpse_footprint_no_buf = 0;
21092
f427ee49
A
21093struct vm_map_corpse_footprint_header {
21094 vm_size_t cf_size; /* allocated buffer size */
21095 uint32_t cf_last_region; /* offset of last region in buffer */
21096 union {
21097 uint32_t cfu_last_zeroes; /* during creation:
21098 * number of "zero" dispositions at
21099 * end of last region */
21100 uint32_t cfu_hint_region; /* during lookup:
21101 * offset of last looked up region */
21102#define cf_last_zeroes cfu.cfu_last_zeroes
21103#define cf_hint_region cfu.cfu_hint_region
21104 } cfu;
21105};
21106typedef uint8_t cf_disp_t;
21107struct vm_map_corpse_footprint_region {
21108 vm_map_offset_t cfr_vaddr; /* region start virtual address */
21109 uint32_t cfr_num_pages; /* number of pages in this "region" */
21110 cf_disp_t cfr_disposition[0]; /* disposition of each page */
21111} __attribute__((packed));
21112
21113static cf_disp_t
21114vm_page_disposition_to_cf_disp(
21115 int disposition)
21116{
21117 assert(sizeof(cf_disp_t) == 1);
21118 /* relocate bits that don't fit in a "uint8_t" */
21119 if (disposition & VM_PAGE_QUERY_PAGE_REUSABLE) {
21120 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
21121 }
21122 /* cast gets rid of extra bits */
21123 return (cf_disp_t) disposition;
21124}
21125
21126static int
21127vm_page_cf_disp_to_disposition(
21128 cf_disp_t cf_disp)
21129{
21130 int disposition;
21131
21132 assert(sizeof(cf_disp_t) == 1);
21133 disposition = (int) cf_disp;
21134 /* move relocated bits back in place */
21135 if (cf_disp & VM_PAGE_QUERY_PAGE_FICTITIOUS) {
21136 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
21137 disposition &= ~VM_PAGE_QUERY_PAGE_FICTITIOUS;
21138 }
21139 return disposition;
21140}
21141
d9a64523
A
21142/*
21143 * vm_map_corpse_footprint_new_region:
0a7de745 21144 * closes the current footprint "region" and creates a new one
d9a64523
A
21145 *
21146 * Returns NULL if there's not enough space in the buffer for a new region.
21147 */
21148static struct vm_map_corpse_footprint_region *
21149vm_map_corpse_footprint_new_region(
21150 struct vm_map_corpse_footprint_header *footprint_header)
21151{
0a7de745
A
21152 uintptr_t footprint_edge;
21153 uint32_t new_region_offset;
d9a64523
A
21154 struct vm_map_corpse_footprint_region *footprint_region;
21155 struct vm_map_corpse_footprint_region *new_footprint_region;
21156
21157 footprint_edge = ((uintptr_t)footprint_header +
0a7de745 21158 footprint_header->cf_size);
d9a64523 21159 footprint_region = ((struct vm_map_corpse_footprint_region *)
0a7de745
A
21160 ((char *)footprint_header +
21161 footprint_header->cf_last_region));
21162 assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
21163 footprint_edge);
d9a64523
A
21164
21165 /* get rid of trailing zeroes in the last region */
21166 assert(footprint_region->cfr_num_pages >=
0a7de745 21167 footprint_header->cf_last_zeroes);
d9a64523 21168 footprint_region->cfr_num_pages -=
0a7de745 21169 footprint_header->cf_last_zeroes;
d9a64523
A
21170 footprint_header->cf_last_zeroes = 0;
21171
21172 /* reuse this region if it's now empty */
21173 if (footprint_region->cfr_num_pages == 0) {
21174 return footprint_region;
21175 }
21176
21177 /* compute offset of new region */
21178 new_region_offset = footprint_header->cf_last_region;
0a7de745 21179 new_region_offset += sizeof(*footprint_region);
f427ee49 21180 new_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
0a7de745 21181 new_region_offset = roundup(new_region_offset, sizeof(int));
d9a64523
A
21182
21183 /* check if we're going over the edge */
21184 if (((uintptr_t)footprint_header +
0a7de745
A
21185 new_region_offset +
21186 sizeof(*footprint_region)) >=
d9a64523
A
21187 footprint_edge) {
21188 /* over the edge: no new region */
21189 return NULL;
21190 }
21191
21192 /* adjust offset of last region in header */
21193 footprint_header->cf_last_region = new_region_offset;
21194
21195 new_footprint_region = (struct vm_map_corpse_footprint_region *)
0a7de745
A
21196 ((char *)footprint_header +
21197 footprint_header->cf_last_region);
d9a64523
A
21198 new_footprint_region->cfr_vaddr = 0;
21199 new_footprint_region->cfr_num_pages = 0;
21200 /* caller needs to initialize new region */
21201
21202 return new_footprint_region;
21203}
21204
21205/*
21206 * vm_map_corpse_footprint_collect:
21207 * collect footprint information for "old_entry" in "old_map" and
21208 * stores it in "new_map"'s vmmap_footprint_info.
21209 */
21210kern_return_t
21211vm_map_corpse_footprint_collect(
0a7de745
A
21212 vm_map_t old_map,
21213 vm_map_entry_t old_entry,
21214 vm_map_t new_map)
d9a64523 21215{
0a7de745 21216 vm_map_offset_t va;
0a7de745 21217 kern_return_t kr;
d9a64523
A
21218 struct vm_map_corpse_footprint_header *footprint_header;
21219 struct vm_map_corpse_footprint_region *footprint_region;
21220 struct vm_map_corpse_footprint_region *new_footprint_region;
f427ee49 21221 cf_disp_t *next_disp_p;
0a7de745
A
21222 uintptr_t footprint_edge;
21223 uint32_t num_pages_tmp;
f427ee49
A
21224 int effective_page_size;
21225
21226 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(old_map));
d9a64523
A
21227
21228 va = old_entry->vme_start;
21229
21230 vm_map_lock_assert_exclusive(old_map);
21231 vm_map_lock_assert_exclusive(new_map);
21232
21233 assert(new_map->has_corpse_footprint);
21234 assert(!old_map->has_corpse_footprint);
21235 if (!new_map->has_corpse_footprint ||
21236 old_map->has_corpse_footprint) {
21237 /*
21238 * This can only transfer footprint info from a
21239 * map with a live pmap to a map with a corpse footprint.
21240 */
21241 return KERN_NOT_SUPPORTED;
21242 }
21243
21244 if (new_map->vmmap_corpse_footprint == NULL) {
0a7de745
A
21245 vm_offset_t buf;
21246 vm_size_t buf_size;
d9a64523
A
21247
21248 buf = 0;
0a7de745
A
21249 buf_size = (sizeof(*footprint_header) +
21250 (old_map->hdr.nentries
21251 *
21252 (sizeof(*footprint_region) +
21253 +3)) /* potential alignment for each region */
21254 +
f427ee49 21255 ((old_map->size / effective_page_size)
0a7de745 21256 *
f427ee49 21257 sizeof(cf_disp_t))); /* disposition for each page */
d9a64523
A
21258// printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
21259 buf_size = round_page(buf_size);
21260
21261 /* limit buffer to 1 page to validate overflow detection */
21262// buf_size = PAGE_SIZE;
21263
21264 /* limit size to a somewhat sane amount */
f427ee49 21265#if XNU_TARGET_OS_OSX
0a7de745 21266#define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
f427ee49
A
21267#else /* XNU_TARGET_OS_OSX */
21268#define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
21269#endif /* XNU_TARGET_OS_OSX */
d9a64523
A
21270 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
21271 buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
21272 }
21273
21274 /*
21275 * Allocate the pageable buffer (with a trailing guard page).
21276 * It will be zero-filled on demand.
21277 */
21278 kr = kernel_memory_allocate(kernel_map,
0a7de745
A
21279 &buf,
21280 (buf_size
21281 + PAGE_SIZE), /* trailing guard page */
21282 0, /* mask */
21283 KMA_PAGEABLE | KMA_GUARD_LAST,
21284 VM_KERN_MEMORY_DIAG);
d9a64523
A
21285 if (kr != KERN_SUCCESS) {
21286 vm_map_corpse_footprint_no_buf++;
21287 return kr;
21288 }
21289
21290 /* initialize header and 1st region */
21291 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
21292 new_map->vmmap_corpse_footprint = footprint_header;
21293
21294 footprint_header->cf_size = buf_size;
21295 footprint_header->cf_last_region =
0a7de745 21296 sizeof(*footprint_header);
d9a64523
A
21297 footprint_header->cf_last_zeroes = 0;
21298
21299 footprint_region = (struct vm_map_corpse_footprint_region *)
0a7de745
A
21300 ((char *)footprint_header +
21301 footprint_header->cf_last_region);
d9a64523
A
21302 footprint_region->cfr_vaddr = 0;
21303 footprint_region->cfr_num_pages = 0;
21304 } else {
21305 /* retrieve header and last region */
21306 footprint_header = (struct vm_map_corpse_footprint_header *)
0a7de745 21307 new_map->vmmap_corpse_footprint;
d9a64523 21308 footprint_region = (struct vm_map_corpse_footprint_region *)
0a7de745
A
21309 ((char *)footprint_header +
21310 footprint_header->cf_last_region);
d9a64523
A
21311 }
21312 footprint_edge = ((uintptr_t)footprint_header +
0a7de745 21313 footprint_header->cf_size);
d9a64523
A
21314
21315 if ((footprint_region->cfr_vaddr +
0a7de745 21316 (((vm_map_offset_t)footprint_region->cfr_num_pages) *
f427ee49 21317 effective_page_size))
d9a64523 21318 != old_entry->vme_start) {
f427ee49
A
21319 uint64_t num_pages_delta, num_pages_delta_size;
21320 uint32_t region_offset_delta_size;
d9a64523
A
21321
21322 /*
21323 * Not the next contiguous virtual address:
21324 * start a new region or store "zero" dispositions for
21325 * the missing pages?
21326 */
21327 /* size of gap in actual page dispositions */
f427ee49
A
21328 num_pages_delta = ((old_entry->vme_start -
21329 footprint_region->cfr_vaddr) / effective_page_size)
21330 - footprint_region->cfr_num_pages;
21331 num_pages_delta_size = num_pages_delta * sizeof(cf_disp_t);
d9a64523 21332 /* size of gap as a new footprint region header */
f427ee49 21333 region_offset_delta_size =
0a7de745 21334 (sizeof(*footprint_region) +
f427ee49
A
21335 roundup(((footprint_region->cfr_num_pages -
21336 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)),
0a7de745 21337 sizeof(int)) -
f427ee49
A
21338 ((footprint_region->cfr_num_pages -
21339 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)));
d9a64523 21340// printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
f427ee49 21341 if (region_offset_delta_size < num_pages_delta_size ||
d9a64523 21342 os_add3_overflow(footprint_region->cfr_num_pages,
0a7de745
A
21343 (uint32_t) num_pages_delta,
21344 1,
21345 &num_pages_tmp)) {
d9a64523
A
21346 /*
21347 * Storing data for this gap would take more space
21348 * than inserting a new footprint region header:
21349 * let's start a new region and save space. If it's a
21350 * tie, let's avoid using a new region, since that
21351 * would require more region hops to find the right
21352 * range during lookups.
21353 *
21354 * If the current region's cfr_num_pages would overflow
21355 * if we added "zero" page dispositions for the gap,
21356 * no choice but to start a new region.
21357 */
21358// printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
21359 new_footprint_region =
0a7de745 21360 vm_map_corpse_footprint_new_region(footprint_header);
d9a64523
A
21361 /* check that we're not going over the edge */
21362 if (new_footprint_region == NULL) {
21363 goto over_the_edge;
21364 }
21365 footprint_region = new_footprint_region;
21366 /* initialize new region as empty */
21367 footprint_region->cfr_vaddr = old_entry->vme_start;
21368 footprint_region->cfr_num_pages = 0;
21369 } else {
21370 /*
21371 * Store "zero" page dispositions for the missing
21372 * pages.
21373 */
21374// printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
21375 for (; num_pages_delta > 0; num_pages_delta--) {
f427ee49
A
21376 next_disp_p = (cf_disp_t *)
21377 ((uintptr_t) footprint_region +
21378 sizeof(*footprint_region));
21379 next_disp_p += footprint_region->cfr_num_pages;
d9a64523
A
21380 /* check that we're not going over the edge */
21381 if ((uintptr_t)next_disp_p >= footprint_edge) {
21382 goto over_the_edge;
21383 }
21384 /* store "zero" disposition for this gap page */
21385 footprint_region->cfr_num_pages++;
f427ee49 21386 *next_disp_p = (cf_disp_t) 0;
d9a64523
A
21387 footprint_header->cf_last_zeroes++;
21388 }
21389 }
21390 }
21391
21392 for (va = old_entry->vme_start;
0a7de745 21393 va < old_entry->vme_end;
f427ee49
A
21394 va += effective_page_size) {
21395 int disposition;
21396 cf_disp_t cf_disp;
d9a64523 21397
f427ee49
A
21398 vm_map_footprint_query_page_info(old_map,
21399 old_entry,
21400 va,
21401 &disposition);
21402 cf_disp = vm_page_disposition_to_cf_disp(disposition);
d9a64523
A
21403
21404// if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
21405
f427ee49 21406 if (cf_disp == 0 && footprint_region->cfr_num_pages == 0) {
d9a64523
A
21407 /*
21408 * Ignore "zero" dispositions at start of
21409 * region: just move start of region.
21410 */
f427ee49 21411 footprint_region->cfr_vaddr += effective_page_size;
d9a64523
A
21412 continue;
21413 }
21414
21415 /* would region's cfr_num_pages overflow? */
21416 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
0a7de745 21417 &num_pages_tmp)) {
d9a64523
A
21418 /* overflow: create a new region */
21419 new_footprint_region =
0a7de745
A
21420 vm_map_corpse_footprint_new_region(
21421 footprint_header);
d9a64523
A
21422 if (new_footprint_region == NULL) {
21423 goto over_the_edge;
21424 }
21425 footprint_region = new_footprint_region;
21426 footprint_region->cfr_vaddr = va;
21427 footprint_region->cfr_num_pages = 0;
21428 }
21429
f427ee49
A
21430 next_disp_p = (cf_disp_t *) ((uintptr_t) footprint_region +
21431 sizeof(*footprint_region));
21432 next_disp_p += footprint_region->cfr_num_pages;
d9a64523
A
21433 /* check that we're not going over the edge */
21434 if ((uintptr_t)next_disp_p >= footprint_edge) {
21435 goto over_the_edge;
21436 }
21437 /* store this dispostion */
f427ee49 21438 *next_disp_p = cf_disp;
d9a64523
A
21439 footprint_region->cfr_num_pages++;
21440
f427ee49 21441 if (cf_disp != 0) {
d9a64523
A
21442 /* non-zero disp: break the current zero streak */
21443 footprint_header->cf_last_zeroes = 0;
21444 /* done */
21445 continue;
21446 }
21447
21448 /* zero disp: add to the current streak of zeroes */
21449 footprint_header->cf_last_zeroes++;
21450 if ((footprint_header->cf_last_zeroes +
f427ee49
A
21451 roundup(((footprint_region->cfr_num_pages -
21452 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)) &
0a7de745
A
21453 (sizeof(int) - 1),
21454 sizeof(int))) <
21455 (sizeof(*footprint_header))) {
d9a64523
A
21456 /*
21457 * There are not enough trailing "zero" dispositions
21458 * (+ the extra padding we would need for the previous
21459 * region); creating a new region would not save space
21460 * at this point, so let's keep this "zero" disposition
21461 * in this region and reconsider later.
21462 */
21463 continue;
21464 }
21465 /*
21466 * Create a new region to avoid having too many consecutive
21467 * "zero" dispositions.
21468 */
21469 new_footprint_region =
0a7de745 21470 vm_map_corpse_footprint_new_region(footprint_header);
d9a64523
A
21471 if (new_footprint_region == NULL) {
21472 goto over_the_edge;
21473 }
21474 footprint_region = new_footprint_region;
21475 /* initialize the new region as empty ... */
21476 footprint_region->cfr_num_pages = 0;
21477 /* ... and skip this "zero" disp */
f427ee49 21478 footprint_region->cfr_vaddr = va + effective_page_size;
d9a64523
A
21479 }
21480
21481 return KERN_SUCCESS;
21482
21483over_the_edge:
21484// printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
21485 vm_map_corpse_footprint_full++;
21486 return KERN_RESOURCE_SHORTAGE;
21487}
21488
21489/*
21490 * vm_map_corpse_footprint_collect_done:
21491 * completes the footprint collection by getting rid of any remaining
21492 * trailing "zero" dispositions and trimming the unused part of the
21493 * kernel buffer
21494 */
21495void
21496vm_map_corpse_footprint_collect_done(
0a7de745 21497 vm_map_t new_map)
d9a64523
A
21498{
21499 struct vm_map_corpse_footprint_header *footprint_header;
21500 struct vm_map_corpse_footprint_region *footprint_region;
0a7de745
A
21501 vm_size_t buf_size, actual_size;
21502 kern_return_t kr;
d9a64523
A
21503
21504 assert(new_map->has_corpse_footprint);
21505 if (!new_map->has_corpse_footprint ||
21506 new_map->vmmap_corpse_footprint == NULL) {
21507 return;
21508 }
21509
21510 footprint_header = (struct vm_map_corpse_footprint_header *)
0a7de745 21511 new_map->vmmap_corpse_footprint;
d9a64523
A
21512 buf_size = footprint_header->cf_size;
21513
21514 footprint_region = (struct vm_map_corpse_footprint_region *)
0a7de745
A
21515 ((char *)footprint_header +
21516 footprint_header->cf_last_region);
d9a64523
A
21517
21518 /* get rid of trailing zeroes in last region */
21519 assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
21520 footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
21521 footprint_header->cf_last_zeroes = 0;
21522
21523 actual_size = (vm_size_t)(footprint_header->cf_last_region +
0a7de745 21524 sizeof(*footprint_region) +
f427ee49 21525 (footprint_region->cfr_num_pages * sizeof(cf_disp_t)));
d9a64523
A
21526
21527// printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
0a7de745
A
21528 vm_map_corpse_footprint_size_avg =
21529 (((vm_map_corpse_footprint_size_avg *
21530 vm_map_corpse_footprint_count) +
21531 actual_size) /
21532 (vm_map_corpse_footprint_count + 1));
d9a64523
A
21533 vm_map_corpse_footprint_count++;
21534 if (actual_size > vm_map_corpse_footprint_size_max) {
21535 vm_map_corpse_footprint_size_max = actual_size;
21536 }
21537
21538 actual_size = round_page(actual_size);
21539 if (buf_size > actual_size) {
21540 kr = vm_deallocate(kernel_map,
0a7de745
A
21541 ((vm_address_t)footprint_header +
21542 actual_size +
21543 PAGE_SIZE), /* trailing guard page */
21544 (buf_size - actual_size));
d9a64523 21545 assertf(kr == KERN_SUCCESS,
0a7de745
A
21546 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21547 footprint_header,
21548 (uint64_t) buf_size,
21549 (uint64_t) actual_size,
21550 kr);
d9a64523 21551 kr = vm_protect(kernel_map,
0a7de745
A
21552 ((vm_address_t)footprint_header +
21553 actual_size),
21554 PAGE_SIZE,
21555 FALSE, /* set_maximum */
21556 VM_PROT_NONE);
d9a64523 21557 assertf(kr == KERN_SUCCESS,
0a7de745
A
21558 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21559 footprint_header,
21560 (uint64_t) buf_size,
21561 (uint64_t) actual_size,
21562 kr);
d9a64523
A
21563 }
21564
21565 footprint_header->cf_size = actual_size;
21566}
21567
21568/*
21569 * vm_map_corpse_footprint_query_page_info:
21570 * retrieves the disposition of the page at virtual address "vaddr"
21571 * in the forked corpse's VM map
21572 *
f427ee49 21573 * This is the equivalent of vm_map_footprint_query_page_info() for a forked corpse.
d9a64523
A
21574 */
21575kern_return_t
21576vm_map_corpse_footprint_query_page_info(
0a7de745
A
21577 vm_map_t map,
21578 vm_map_offset_t va,
f427ee49 21579 int *disposition_p)
d9a64523
A
21580{
21581 struct vm_map_corpse_footprint_header *footprint_header;
21582 struct vm_map_corpse_footprint_region *footprint_region;
0a7de745
A
21583 uint32_t footprint_region_offset;
21584 vm_map_offset_t region_start, region_end;
21585 int disp_idx;
21586 kern_return_t kr;
f427ee49
A
21587 int effective_page_size;
21588 cf_disp_t cf_disp;
d9a64523
A
21589
21590 if (!map->has_corpse_footprint) {
f427ee49 21591 *disposition_p = 0;
d9a64523
A
21592 kr = KERN_INVALID_ARGUMENT;
21593 goto done;
21594 }
21595
21596 footprint_header = map->vmmap_corpse_footprint;
21597 if (footprint_header == NULL) {
f427ee49
A
21598 *disposition_p = 0;
21599// if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
d9a64523
A
21600 kr = KERN_INVALID_ARGUMENT;
21601 goto done;
21602 }
21603
21604 /* start looking at the hint ("cf_hint_region") */
21605 footprint_region_offset = footprint_header->cf_hint_region;
21606
f427ee49
A
21607 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
21608
d9a64523 21609lookup_again:
0a7de745 21610 if (footprint_region_offset < sizeof(*footprint_header)) {
d9a64523 21611 /* hint too low: start from 1st region */
0a7de745 21612 footprint_region_offset = sizeof(*footprint_header);
d9a64523
A
21613 }
21614 if (footprint_region_offset >= footprint_header->cf_last_region) {
21615 /* hint too high: re-start from 1st region */
0a7de745 21616 footprint_region_offset = sizeof(*footprint_header);
d9a64523
A
21617 }
21618 footprint_region = (struct vm_map_corpse_footprint_region *)
0a7de745 21619 ((char *)footprint_header + footprint_region_offset);
d9a64523
A
21620 region_start = footprint_region->cfr_vaddr;
21621 region_end = (region_start +
0a7de745 21622 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
f427ee49 21623 effective_page_size));
d9a64523 21624 if (va < region_start &&
0a7de745 21625 footprint_region_offset != sizeof(*footprint_header)) {
d9a64523
A
21626 /* our range starts before the hint region */
21627
21628 /* reset the hint (in a racy way...) */
0a7de745 21629 footprint_header->cf_hint_region = sizeof(*footprint_header);
d9a64523 21630 /* lookup "va" again from 1st region */
0a7de745 21631 footprint_region_offset = sizeof(*footprint_header);
d9a64523
A
21632 goto lookup_again;
21633 }
21634
21635 while (va >= region_end) {
21636 if (footprint_region_offset >= footprint_header->cf_last_region) {
21637 break;
21638 }
21639 /* skip the region's header */
0a7de745 21640 footprint_region_offset += sizeof(*footprint_region);
d9a64523 21641 /* skip the region's page dispositions */
f427ee49 21642 footprint_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
d9a64523
A
21643 /* align to next word boundary */
21644 footprint_region_offset =
0a7de745
A
21645 roundup(footprint_region_offset,
21646 sizeof(int));
d9a64523 21647 footprint_region = (struct vm_map_corpse_footprint_region *)
0a7de745 21648 ((char *)footprint_header + footprint_region_offset);
d9a64523
A
21649 region_start = footprint_region->cfr_vaddr;
21650 region_end = (region_start +
0a7de745 21651 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
f427ee49 21652 effective_page_size));
d9a64523
A
21653 }
21654 if (va < region_start || va >= region_end) {
21655 /* page not found */
f427ee49
A
21656 *disposition_p = 0;
21657// if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
d9a64523
A
21658 kr = KERN_SUCCESS;
21659 goto done;
21660 }
21661
21662 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
21663 footprint_header->cf_hint_region = footprint_region_offset;
21664
21665 /* get page disposition for "va" in this region */
f427ee49
A
21666 disp_idx = (int) ((va - footprint_region->cfr_vaddr) / effective_page_size);
21667 cf_disp = footprint_region->cfr_disposition[disp_idx];
21668 *disposition_p = vm_page_cf_disp_to_disposition(cf_disp);
d9a64523
A
21669 kr = KERN_SUCCESS;
21670done:
f427ee49 21671// if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
d9a64523
A
21672 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
21673 DTRACE_VM4(footprint_query_page_info,
0a7de745
A
21674 vm_map_t, map,
21675 vm_map_offset_t, va,
f427ee49 21676 int, *disposition_p,
0a7de745 21677 kern_return_t, kr);
d9a64523
A
21678
21679 return kr;
21680}
21681
f427ee49 21682void
d9a64523 21683vm_map_corpse_footprint_destroy(
0a7de745 21684 vm_map_t map)
d9a64523
A
21685{
21686 if (map->has_corpse_footprint &&
21687 map->vmmap_corpse_footprint != 0) {
21688 struct vm_map_corpse_footprint_header *footprint_header;
21689 vm_size_t buf_size;
21690 kern_return_t kr;
21691
21692 footprint_header = map->vmmap_corpse_footprint;
21693 buf_size = footprint_header->cf_size;
21694 kr = vm_deallocate(kernel_map,
0a7de745
A
21695 (vm_offset_t) map->vmmap_corpse_footprint,
21696 ((vm_size_t) buf_size
21697 + PAGE_SIZE)); /* trailing guard page */
d9a64523
A
21698 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
21699 map->vmmap_corpse_footprint = 0;
21700 map->has_corpse_footprint = FALSE;
21701 }
21702}
21703
21704/*
21705 * vm_map_copy_footprint_ledgers:
21706 * copies any ledger that's relevant to the memory footprint of "old_task"
21707 * into the forked corpse's task ("new_task")
21708 */
21709void
21710vm_map_copy_footprint_ledgers(
0a7de745
A
21711 task_t old_task,
21712 task_t new_task)
d9a64523
A
21713{
21714 vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
21715 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
21716 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
0a7de745
A
21717 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
21718 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
21719 vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
21720 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
21721 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
21722 vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
cb323159
A
21723 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
21724 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
0a7de745
A
21725 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
21726 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
cb323159
A
21727 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
21728 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
21729 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
21730 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
21731 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
21732 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
0a7de745 21733 vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
d9a64523
A
21734}
21735
21736/*
21737 * vm_map_copy_ledger:
21738 * copy a single ledger from "old_task" to "new_task"
21739 */
21740void
21741vm_map_copy_ledger(
0a7de745
A
21742 task_t old_task,
21743 task_t new_task,
21744 int ledger_entry)
d9a64523 21745{
0a7de745 21746 ledger_amount_t old_balance, new_balance, delta;
d9a64523
A
21747
21748 assert(new_task->map->has_corpse_footprint);
0a7de745 21749 if (!new_task->map->has_corpse_footprint) {
d9a64523 21750 return;
0a7de745 21751 }
d9a64523
A
21752
21753 /* turn off sanity checks for the ledger we're about to mess with */
21754 ledger_disable_panic_on_negative(new_task->ledger,
0a7de745 21755 ledger_entry);
d9a64523
A
21756
21757 /* adjust "new_task" to match "old_task" */
21758 ledger_get_balance(old_task->ledger,
0a7de745
A
21759 ledger_entry,
21760 &old_balance);
d9a64523 21761 ledger_get_balance(new_task->ledger,
0a7de745
A
21762 ledger_entry,
21763 &new_balance);
d9a64523
A
21764 if (new_balance == old_balance) {
21765 /* new == old: done */
21766 } else if (new_balance > old_balance) {
21767 /* new > old ==> new -= new - old */
21768 delta = new_balance - old_balance;
21769 ledger_debit(new_task->ledger,
0a7de745
A
21770 ledger_entry,
21771 delta);
d9a64523
A
21772 } else {
21773 /* new < old ==> new += old - new */
21774 delta = old_balance - new_balance;
21775 ledger_credit(new_task->ledger,
0a7de745
A
21776 ledger_entry,
21777 delta);
d9a64523
A
21778 }
21779}
cb323159
A
21780
21781#if MACH_ASSERT
21782
21783extern int pmap_ledgers_panic;
21784extern int pmap_ledgers_panic_leeway;
21785
21786#define LEDGER_DRIFT(__LEDGER) \
21787 int __LEDGER##_over; \
21788 ledger_amount_t __LEDGER##_over_total; \
21789 ledger_amount_t __LEDGER##_over_max; \
21790 int __LEDGER##_under; \
21791 ledger_amount_t __LEDGER##_under_total; \
21792 ledger_amount_t __LEDGER##_under_max
21793
21794struct {
21795 uint64_t num_pmaps_checked;
21796
21797 LEDGER_DRIFT(phys_footprint);
21798 LEDGER_DRIFT(internal);
21799 LEDGER_DRIFT(internal_compressed);
21800 LEDGER_DRIFT(iokit_mapped);
21801 LEDGER_DRIFT(alternate_accounting);
21802 LEDGER_DRIFT(alternate_accounting_compressed);
21803 LEDGER_DRIFT(page_table);
21804 LEDGER_DRIFT(purgeable_volatile);
21805 LEDGER_DRIFT(purgeable_nonvolatile);
21806 LEDGER_DRIFT(purgeable_volatile_compressed);
21807 LEDGER_DRIFT(purgeable_nonvolatile_compressed);
21808 LEDGER_DRIFT(tagged_nofootprint);
21809 LEDGER_DRIFT(tagged_footprint);
21810 LEDGER_DRIFT(tagged_nofootprint_compressed);
21811 LEDGER_DRIFT(tagged_footprint_compressed);
21812 LEDGER_DRIFT(network_volatile);
21813 LEDGER_DRIFT(network_nonvolatile);
21814 LEDGER_DRIFT(network_volatile_compressed);
21815 LEDGER_DRIFT(network_nonvolatile_compressed);
21816 LEDGER_DRIFT(media_nofootprint);
21817 LEDGER_DRIFT(media_footprint);
21818 LEDGER_DRIFT(media_nofootprint_compressed);
21819 LEDGER_DRIFT(media_footprint_compressed);
21820 LEDGER_DRIFT(graphics_nofootprint);
21821 LEDGER_DRIFT(graphics_footprint);
21822 LEDGER_DRIFT(graphics_nofootprint_compressed);
21823 LEDGER_DRIFT(graphics_footprint_compressed);
21824 LEDGER_DRIFT(neural_nofootprint);
21825 LEDGER_DRIFT(neural_footprint);
21826 LEDGER_DRIFT(neural_nofootprint_compressed);
21827 LEDGER_DRIFT(neural_footprint_compressed);
21828} pmap_ledgers_drift;
21829
21830void
21831vm_map_pmap_check_ledgers(
21832 pmap_t pmap,
21833 ledger_t ledger,
21834 int pid,
21835 char *procname)
21836{
21837 ledger_amount_t bal;
21838 boolean_t do_panic;
21839
21840 do_panic = FALSE;
21841
21842 pmap_ledgers_drift.num_pmaps_checked++;
21843
21844#define LEDGER_CHECK_BALANCE(__LEDGER) \
21845MACRO_BEGIN \
21846 int panic_on_negative = TRUE; \
21847 ledger_get_balance(ledger, \
21848 task_ledgers.__LEDGER, \
21849 &bal); \
21850 ledger_get_panic_on_negative(ledger, \
21851 task_ledgers.__LEDGER, \
21852 &panic_on_negative); \
21853 if (bal != 0) { \
21854 if (panic_on_negative || \
21855 (pmap_ledgers_panic && \
21856 pmap_ledgers_panic_leeway > 0 && \
21857 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
21858 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
21859 do_panic = TRUE; \
21860 } \
21861 printf("LEDGER BALANCE proc %d (%s) " \
21862 "\"%s\" = %lld\n", \
21863 pid, procname, #__LEDGER, bal); \
21864 if (bal > 0) { \
21865 pmap_ledgers_drift.__LEDGER##_over++; \
21866 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
21867 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
21868 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
21869 } \
21870 } else if (bal < 0) { \
21871 pmap_ledgers_drift.__LEDGER##_under++; \
21872 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
21873 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
21874 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
21875 } \
21876 } \
21877 } \
21878MACRO_END
21879
21880 LEDGER_CHECK_BALANCE(phys_footprint);
21881 LEDGER_CHECK_BALANCE(internal);
21882 LEDGER_CHECK_BALANCE(internal_compressed);
21883 LEDGER_CHECK_BALANCE(iokit_mapped);
21884 LEDGER_CHECK_BALANCE(alternate_accounting);
21885 LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
21886 LEDGER_CHECK_BALANCE(page_table);
21887 LEDGER_CHECK_BALANCE(purgeable_volatile);
21888 LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
21889 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
21890 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
21891 LEDGER_CHECK_BALANCE(tagged_nofootprint);
21892 LEDGER_CHECK_BALANCE(tagged_footprint);
21893 LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
21894 LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
21895 LEDGER_CHECK_BALANCE(network_volatile);
21896 LEDGER_CHECK_BALANCE(network_nonvolatile);
21897 LEDGER_CHECK_BALANCE(network_volatile_compressed);
21898 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
21899 LEDGER_CHECK_BALANCE(media_nofootprint);
21900 LEDGER_CHECK_BALANCE(media_footprint);
21901 LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
21902 LEDGER_CHECK_BALANCE(media_footprint_compressed);
21903 LEDGER_CHECK_BALANCE(graphics_nofootprint);
21904 LEDGER_CHECK_BALANCE(graphics_footprint);
21905 LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
21906 LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
21907 LEDGER_CHECK_BALANCE(neural_nofootprint);
21908 LEDGER_CHECK_BALANCE(neural_footprint);
21909 LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
21910 LEDGER_CHECK_BALANCE(neural_footprint_compressed);
21911
21912 if (do_panic) {
21913 if (pmap_ledgers_panic) {
21914 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
21915 pmap, pid, procname);
21916 } else {
21917 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
21918 pmap, pid, procname);
21919 }
21920 }
21921}
21922#endif /* MACH_ASSERT */