]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
4ba91fb042cff097248cf6c28ab21c43129fbe23
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68
69 #include <vm/vm_options.h>
70
71 #include <libkern/OSAtomic.h>
72
73 #include <mach/kern_return.h>
74 #include <mach/port.h>
75 #include <mach/vm_attributes.h>
76 #include <mach/vm_param.h>
77 #include <mach/vm_behavior.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/memory_object.h>
80 #include <mach/mach_vm.h>
81 #include <machine/cpu_capabilities.h>
82 #include <mach/sdt.h>
83
84 #include <kern/assert.h>
85 #include <kern/backtrace.h>
86 #include <kern/counters.h>
87 #include <kern/exc_guard.h>
88 #include <kern/kalloc.h>
89 #include <kern/zalloc_internal.h>
90
91 #include <vm/cpm.h>
92 #include <vm/vm_compressor.h>
93 #include <vm/vm_compressor_pager.h>
94 #include <vm/vm_init.h>
95 #include <vm/vm_fault.h>
96 #include <vm/vm_map.h>
97 #include <vm/vm_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
100 #include <vm/pmap.h>
101 #include <vm/vm_kern.h>
102 #include <ipc/ipc_port.h>
103 #include <kern/sched_prim.h>
104 #include <kern/misc_protos.h>
105
106 #include <mach/vm_map_server.h>
107 #include <mach/mach_host_server.h>
108 #include <vm/vm_protos.h>
109 #include <vm/vm_purgeable_internal.h>
110
111 #include <vm/vm_protos.h>
112 #include <vm/vm_shared_region.h>
113 #include <vm/vm_map_store.h>
114
115 #include <san/kasan.h>
116
117 #include <sys/codesign.h>
118 #include <sys/mman.h>
119
120 #include <libkern/section_keywords.h>
121 #if DEVELOPMENT || DEBUG
122 extern int proc_selfcsflags(void);
123 int panic_on_unsigned_execute = 0;
124 #endif /* DEVELOPMENT || DEBUG */
125
126 #if MACH_ASSERT
127 int debug4k_filter = 0;
128 char debug4k_proc_name[1024] = "";
129 int debug4k_proc_filter = (int)-1 & ~(1 << __DEBUG4K_FAULT);
130 int debug4k_panic_on_misaligned_sharing = 0;
131 const char *debug4k_category_name[] = {
132 "error", /* 0 */
133 "life", /* 1 */
134 "load", /* 2 */
135 "fault", /* 3 */
136 "copy", /* 4 */
137 "share", /* 5 */
138 "adjust", /* 6 */
139 "pmap", /* 7 */
140 "mementry", /* 8 */
141 "iokit", /* 9 */
142 "upl", /* 10 */
143 "exc", /* 11 */
144 "vfs" /* 12 */
145 };
146 #endif /* MACH_ASSERT */
147 int debug4k_no_cow_copyin = 0;
148
149
150 #if __arm64__
151 extern const int fourk_binary_compatibility_unsafe;
152 extern const int fourk_binary_compatibility_allow_wx;
153 #endif /* __arm64__ */
154 extern int proc_selfpid(void);
155 extern char *proc_name_address(void *p);
156
157 #if VM_MAP_DEBUG_APPLE_PROTECT
158 int vm_map_debug_apple_protect = 0;
159 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
160 #if VM_MAP_DEBUG_FOURK
161 int vm_map_debug_fourk = 0;
162 #endif /* VM_MAP_DEBUG_FOURK */
163
164 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
165 int vm_map_executable_immutable_verbose = 0;
166
167 os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
168
169 extern u_int32_t random(void); /* from <libkern/libkern.h> */
170 /* Internal prototypes
171 */
172
173 static void vm_map_simplify_range(
174 vm_map_t map,
175 vm_map_offset_t start,
176 vm_map_offset_t end); /* forward */
177
178 static boolean_t vm_map_range_check(
179 vm_map_t map,
180 vm_map_offset_t start,
181 vm_map_offset_t end,
182 vm_map_entry_t *entry);
183
184 static vm_map_entry_t _vm_map_entry_create(
185 struct vm_map_header *map_header, boolean_t map_locked);
186
187 static void _vm_map_entry_dispose(
188 struct vm_map_header *map_header,
189 vm_map_entry_t entry);
190
191 static void vm_map_pmap_enter(
192 vm_map_t map,
193 vm_map_offset_t addr,
194 vm_map_offset_t end_addr,
195 vm_object_t object,
196 vm_object_offset_t offset,
197 vm_prot_t protection);
198
199 static void _vm_map_clip_end(
200 struct vm_map_header *map_header,
201 vm_map_entry_t entry,
202 vm_map_offset_t end);
203
204 static void _vm_map_clip_start(
205 struct vm_map_header *map_header,
206 vm_map_entry_t entry,
207 vm_map_offset_t start);
208
209 static void vm_map_entry_delete(
210 vm_map_t map,
211 vm_map_entry_t entry);
212
213 static kern_return_t vm_map_delete(
214 vm_map_t map,
215 vm_map_offset_t start,
216 vm_map_offset_t end,
217 int flags,
218 vm_map_t zap_map);
219
220 static void vm_map_copy_insert(
221 vm_map_t map,
222 vm_map_entry_t after_where,
223 vm_map_copy_t copy);
224
225 static kern_return_t vm_map_copy_overwrite_unaligned(
226 vm_map_t dst_map,
227 vm_map_entry_t entry,
228 vm_map_copy_t copy,
229 vm_map_address_t start,
230 boolean_t discard_on_success);
231
232 static kern_return_t vm_map_copy_overwrite_aligned(
233 vm_map_t dst_map,
234 vm_map_entry_t tmp_entry,
235 vm_map_copy_t copy,
236 vm_map_offset_t start,
237 pmap_t pmap);
238
239 static kern_return_t vm_map_copyin_kernel_buffer(
240 vm_map_t src_map,
241 vm_map_address_t src_addr,
242 vm_map_size_t len,
243 boolean_t src_destroy,
244 vm_map_copy_t *copy_result); /* OUT */
245
246 static kern_return_t vm_map_copyout_kernel_buffer(
247 vm_map_t map,
248 vm_map_address_t *addr, /* IN/OUT */
249 vm_map_copy_t copy,
250 vm_map_size_t copy_size,
251 boolean_t overwrite,
252 boolean_t consume_on_success);
253
254 static void vm_map_fork_share(
255 vm_map_t old_map,
256 vm_map_entry_t old_entry,
257 vm_map_t new_map);
258
259 static boolean_t vm_map_fork_copy(
260 vm_map_t old_map,
261 vm_map_entry_t *old_entry_p,
262 vm_map_t new_map,
263 int vm_map_copyin_flags);
264
265 static kern_return_t vm_map_wire_nested(
266 vm_map_t map,
267 vm_map_offset_t start,
268 vm_map_offset_t end,
269 vm_prot_t caller_prot,
270 vm_tag_t tag,
271 boolean_t user_wire,
272 pmap_t map_pmap,
273 vm_map_offset_t pmap_addr,
274 ppnum_t *physpage_p);
275
276 static kern_return_t vm_map_unwire_nested(
277 vm_map_t map,
278 vm_map_offset_t start,
279 vm_map_offset_t end,
280 boolean_t user_wire,
281 pmap_t map_pmap,
282 vm_map_offset_t pmap_addr);
283
284 static kern_return_t vm_map_overwrite_submap_recurse(
285 vm_map_t dst_map,
286 vm_map_offset_t dst_addr,
287 vm_map_size_t dst_size);
288
289 static kern_return_t vm_map_copy_overwrite_nested(
290 vm_map_t dst_map,
291 vm_map_offset_t dst_addr,
292 vm_map_copy_t copy,
293 boolean_t interruptible,
294 pmap_t pmap,
295 boolean_t discard_on_success);
296
297 static kern_return_t vm_map_remap_extract(
298 vm_map_t map,
299 vm_map_offset_t addr,
300 vm_map_size_t size,
301 vm_prot_t required_protection,
302 boolean_t copy,
303 struct vm_map_header *map_header,
304 vm_prot_t *cur_protection,
305 vm_prot_t *max_protection,
306 vm_inherit_t inheritance,
307 vm_map_kernel_flags_t vmk_flags);
308
309 static kern_return_t vm_map_remap_range_allocate(
310 vm_map_t map,
311 vm_map_address_t *address,
312 vm_map_size_t size,
313 vm_map_offset_t mask,
314 int flags,
315 vm_map_kernel_flags_t vmk_flags,
316 vm_tag_t tag,
317 vm_map_entry_t *map_entry);
318
319 static void vm_map_region_look_for_page(
320 vm_map_t map,
321 vm_map_offset_t va,
322 vm_object_t object,
323 vm_object_offset_t offset,
324 int max_refcnt,
325 unsigned short depth,
326 vm_region_extended_info_t extended,
327 mach_msg_type_number_t count);
328
329 static int vm_map_region_count_obj_refs(
330 vm_map_entry_t entry,
331 vm_object_t object);
332
333
334 static kern_return_t vm_map_willneed(
335 vm_map_t map,
336 vm_map_offset_t start,
337 vm_map_offset_t end);
338
339 static kern_return_t vm_map_reuse_pages(
340 vm_map_t map,
341 vm_map_offset_t start,
342 vm_map_offset_t end);
343
344 static kern_return_t vm_map_reusable_pages(
345 vm_map_t map,
346 vm_map_offset_t start,
347 vm_map_offset_t end);
348
349 static kern_return_t vm_map_can_reuse(
350 vm_map_t map,
351 vm_map_offset_t start,
352 vm_map_offset_t end);
353
354 #if MACH_ASSERT
355 static kern_return_t vm_map_pageout(
356 vm_map_t map,
357 vm_map_offset_t start,
358 vm_map_offset_t end);
359 #endif /* MACH_ASSERT */
360
361 kern_return_t vm_map_corpse_footprint_collect(
362 vm_map_t old_map,
363 vm_map_entry_t old_entry,
364 vm_map_t new_map);
365 void vm_map_corpse_footprint_collect_done(
366 vm_map_t new_map);
367 void vm_map_corpse_footprint_destroy(
368 vm_map_t map);
369 kern_return_t vm_map_corpse_footprint_query_page_info(
370 vm_map_t map,
371 vm_map_offset_t va,
372 int *disposition_p);
373 void vm_map_footprint_query_page_info(
374 vm_map_t map,
375 vm_map_entry_t map_entry,
376 vm_map_offset_t curr_s_offset,
377 int *disposition_p);
378
379 static const struct vm_map_entry vm_map_entry_template = {
380 .behavior = VM_BEHAVIOR_DEFAULT,
381 .inheritance = VM_INHERIT_DEFAULT,
382 };
383
384 pid_t find_largest_process_vm_map_entries(void);
385
386 /*
387 * Macros to copy a vm_map_entry. We must be careful to correctly
388 * manage the wired page count. vm_map_entry_copy() creates a new
389 * map entry to the same memory - the wired count in the new entry
390 * must be set to zero. vm_map_entry_copy_full() creates a new
391 * entry that is identical to the old entry. This preserves the
392 * wire count; it's used for map splitting and zone changing in
393 * vm_map_copyout.
394 */
395
396 static inline void
397 vm_map_entry_copy_pmap_cs_assoc(
398 vm_map_t map __unused,
399 vm_map_entry_t new __unused,
400 vm_map_entry_t old __unused)
401 {
402 #if PMAP_CS
403 /* when pmap_cs is enabled, we want to reset on copy */
404 new->pmap_cs_associated = FALSE;
405 #else /* PMAP_CS */
406 /* when pmap_cs is not enabled, assert as a sanity check */
407 assert(new->pmap_cs_associated == FALSE);
408 #endif /* PMAP_CS */
409 }
410
411 /*
412 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
413 * But for security reasons on some platforms, we don't want the
414 * new mapping to be "used for jit", so we reset the flag here.
415 */
416 static inline void
417 vm_map_entry_copy_code_signing(
418 vm_map_t map,
419 vm_map_entry_t new,
420 vm_map_entry_t old __unused)
421 {
422 if (VM_MAP_POLICY_ALLOW_JIT_COPY(map)) {
423 assert(new->used_for_jit == old->used_for_jit);
424 } else {
425 new->used_for_jit = FALSE;
426 }
427 }
428
429 static inline void
430 vm_map_entry_copy(
431 vm_map_t map,
432 vm_map_entry_t new,
433 vm_map_entry_t old)
434 {
435 boolean_t _vmec_reserved = new->from_reserved_zone;
436 *new = *old;
437 new->is_shared = FALSE;
438 new->needs_wakeup = FALSE;
439 new->in_transition = FALSE;
440 new->wired_count = 0;
441 new->user_wired_count = 0;
442 new->permanent = FALSE;
443 vm_map_entry_copy_code_signing(map, new, old);
444 vm_map_entry_copy_pmap_cs_assoc(map, new, old);
445 new->from_reserved_zone = _vmec_reserved;
446 if (new->iokit_acct) {
447 assertf(!new->use_pmap, "old %p new %p\n", old, new);
448 new->iokit_acct = FALSE;
449 new->use_pmap = TRUE;
450 }
451 new->vme_resilient_codesign = FALSE;
452 new->vme_resilient_media = FALSE;
453 new->vme_atomic = FALSE;
454 new->vme_no_copy_on_read = FALSE;
455 }
456
457 static inline void
458 vm_map_entry_copy_full(
459 vm_map_entry_t new,
460 vm_map_entry_t old)
461 {
462 boolean_t _vmecf_reserved = new->from_reserved_zone;
463 *new = *old;
464 new->from_reserved_zone = _vmecf_reserved;
465 }
466
467 /*
468 * Normal lock_read_to_write() returns FALSE/0 on failure.
469 * These functions evaluate to zero on success and non-zero value on failure.
470 */
471 __attribute__((always_inline))
472 int
473 vm_map_lock_read_to_write(vm_map_t map)
474 {
475 if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
476 DTRACE_VM(vm_map_lock_upgrade);
477 return 0;
478 }
479 return 1;
480 }
481
482 __attribute__((always_inline))
483 boolean_t
484 vm_map_try_lock(vm_map_t map)
485 {
486 if (lck_rw_try_lock_exclusive(&(map)->lock)) {
487 DTRACE_VM(vm_map_lock_w);
488 return TRUE;
489 }
490 return FALSE;
491 }
492
493 __attribute__((always_inline))
494 boolean_t
495 vm_map_try_lock_read(vm_map_t map)
496 {
497 if (lck_rw_try_lock_shared(&(map)->lock)) {
498 DTRACE_VM(vm_map_lock_r);
499 return TRUE;
500 }
501 return FALSE;
502 }
503
504 /*
505 * Routines to get the page size the caller should
506 * use while inspecting the target address space.
507 * Use the "_safely" variant if the caller is dealing with a user-provided
508 * array whose size depends on the page size, to avoid any overflow or
509 * underflow of a user-allocated buffer.
510 */
511 int
512 vm_self_region_page_shift_safely(
513 vm_map_t target_map)
514 {
515 int effective_page_shift = 0;
516
517 if (PAGE_SIZE == (4096)) {
518 /* x86_64 and 4k watches: always use 4k */
519 return PAGE_SHIFT;
520 }
521 /* did caller provide an explicit page size for this thread to use? */
522 effective_page_shift = thread_self_region_page_shift();
523 if (effective_page_shift) {
524 /* use the explicitly-provided page size */
525 return effective_page_shift;
526 }
527 /* no explicit page size: use the caller's page size... */
528 effective_page_shift = VM_MAP_PAGE_SHIFT(current_map());
529 if (effective_page_shift == VM_MAP_PAGE_SHIFT(target_map)) {
530 /* page size match: safe to use */
531 return effective_page_shift;
532 }
533 /* page size mismatch */
534 return -1;
535 }
536 int
537 vm_self_region_page_shift(
538 vm_map_t target_map)
539 {
540 int effective_page_shift;
541
542 effective_page_shift = vm_self_region_page_shift_safely(target_map);
543 if (effective_page_shift == -1) {
544 /* no safe value but OK to guess for caller */
545 effective_page_shift = MIN(VM_MAP_PAGE_SHIFT(current_map()),
546 VM_MAP_PAGE_SHIFT(target_map));
547 }
548 return effective_page_shift;
549 }
550
551
552 /*
553 * Decide if we want to allow processes to execute from their data or stack areas.
554 * override_nx() returns true if we do. Data/stack execution can be enabled independently
555 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
556 * or allow_stack_exec to enable data execution for that type of data area for that particular
557 * ABI (or both by or'ing the flags together). These are initialized in the architecture
558 * specific pmap files since the default behavior varies according to architecture. The
559 * main reason it varies is because of the need to provide binary compatibility with old
560 * applications that were written before these restrictions came into being. In the old
561 * days, an app could execute anything it could read, but this has slowly been tightened
562 * up over time. The default behavior is:
563 *
564 * 32-bit PPC apps may execute from both stack and data areas
565 * 32-bit Intel apps may exeucte from data areas but not stack
566 * 64-bit PPC/Intel apps may not execute from either data or stack
567 *
568 * An application on any architecture may override these defaults by explicitly
569 * adding PROT_EXEC permission to the page in question with the mprotect(2)
570 * system call. This code here just determines what happens when an app tries to
571 * execute from a page that lacks execute permission.
572 *
573 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
574 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
575 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
576 * execution from data areas for a particular binary even if the arch normally permits it. As
577 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
578 * to support some complicated use cases, notably browsers with out-of-process plugins that
579 * are not all NX-safe.
580 */
581
582 extern int allow_data_exec, allow_stack_exec;
583
584 int
585 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
586 {
587 int current_abi;
588
589 if (map->pmap == kernel_pmap) {
590 return FALSE;
591 }
592
593 /*
594 * Determine if the app is running in 32 or 64 bit mode.
595 */
596
597 if (vm_map_is_64bit(map)) {
598 current_abi = VM_ABI_64;
599 } else {
600 current_abi = VM_ABI_32;
601 }
602
603 /*
604 * Determine if we should allow the execution based on whether it's a
605 * stack or data area and the current architecture.
606 */
607
608 if (user_tag == VM_MEMORY_STACK) {
609 return allow_stack_exec & current_abi;
610 }
611
612 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
613 }
614
615
616 /*
617 * Virtual memory maps provide for the mapping, protection,
618 * and sharing of virtual memory objects. In addition,
619 * this module provides for an efficient virtual copy of
620 * memory from one map to another.
621 *
622 * Synchronization is required prior to most operations.
623 *
624 * Maps consist of an ordered doubly-linked list of simple
625 * entries; a single hint is used to speed up lookups.
626 *
627 * Sharing maps have been deleted from this version of Mach.
628 * All shared objects are now mapped directly into the respective
629 * maps. This requires a change in the copy on write strategy;
630 * the asymmetric (delayed) strategy is used for shared temporary
631 * objects instead of the symmetric (shadow) strategy. All maps
632 * are now "top level" maps (either task map, kernel map or submap
633 * of the kernel map).
634 *
635 * Since portions of maps are specified by start/end addreses,
636 * which may not align with existing map entries, all
637 * routines merely "clip" entries to these start/end values.
638 * [That is, an entry is split into two, bordering at a
639 * start or end value.] Note that these clippings may not
640 * always be necessary (as the two resulting entries are then
641 * not changed); however, the clipping is done for convenience.
642 * No attempt is currently made to "glue back together" two
643 * abutting entries.
644 *
645 * The symmetric (shadow) copy strategy implements virtual copy
646 * by copying VM object references from one map to
647 * another, and then marking both regions as copy-on-write.
648 * It is important to note that only one writeable reference
649 * to a VM object region exists in any map when this strategy
650 * is used -- this means that shadow object creation can be
651 * delayed until a write operation occurs. The symmetric (delayed)
652 * strategy allows multiple maps to have writeable references to
653 * the same region of a vm object, and hence cannot delay creating
654 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
655 * Copying of permanent objects is completely different; see
656 * vm_object_copy_strategically() in vm_object.c.
657 */
658
659 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_zone; /* zone for vm_map structures */
660 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_entry_reserved_zone; /* zone with reserve for non-blocking allocations */
661 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_copy_zone; /* zone for vm_map_copy structures */
662
663 SECURITY_READ_ONLY_LATE(zone_t) vm_map_entry_zone; /* zone for vm_map_entry structures */
664 SECURITY_READ_ONLY_LATE(zone_t) vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
665
666 #define VM_MAP_ZONE_NAME "maps"
667 #define VM_MAP_ZFLAGS ( \
668 ZC_NOENCRYPT | \
669 ZC_NOGC | \
670 ZC_NOGZALLOC | \
671 ZC_ALLOW_FOREIGN)
672
673 #define VME_RESERVED_ZONE_NAME "Reserved VM map entries"
674 #define VM_MAP_RESERVED_ZFLAGS ( \
675 ZC_NOENCRYPT | \
676 ZC_ALLOW_FOREIGN | \
677 ZC_NOCALLOUT | \
678 ZC_NOGZALLOC | \
679 ZC_KASAN_NOQUARANTINE | \
680 ZC_NOGC)
681
682 #define VM_MAP_HOLES_ZONE_NAME "VM map holes"
683 #define VM_MAP_HOLES_ZFLAGS ( \
684 ZC_NOENCRYPT | \
685 ZC_NOGC | \
686 ZC_NOGZALLOC | \
687 ZC_ALLOW_FOREIGN)
688
689 /*
690 * Asserts that a vm_map_copy object is coming from the
691 * vm_map_copy_zone to ensure that it isn't a fake constructed
692 * anywhere else.
693 */
694 static inline void
695 vm_map_copy_require(struct vm_map_copy *copy)
696 {
697 zone_id_require(ZONE_ID_VM_MAP_COPY, sizeof(struct vm_map_copy), copy);
698 }
699
700 /*
701 * Placeholder object for submap operations. This object is dropped
702 * into the range by a call to vm_map_find, and removed when
703 * vm_map_submap creates the submap.
704 */
705
706 vm_object_t vm_submap_object;
707
708 static __startup_data vm_offset_t map_data;
709 static __startup_data vm_size_t map_data_size;
710 static __startup_data vm_offset_t kentry_data;
711 static __startup_data vm_size_t kentry_data_size;
712 static __startup_data vm_offset_t map_holes_data;
713 static __startup_data vm_size_t map_holes_data_size;
714
715 #if XNU_TARGET_OS_OSX
716 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
717 #else /* XNU_TARGET_OS_OSX */
718 #define NO_COALESCE_LIMIT 0
719 #endif /* XNU_TARGET_OS_OSX */
720
721 /* Skip acquiring locks if we're in the midst of a kernel core dump */
722 unsigned int not_in_kdp = 1;
723
724 unsigned int vm_map_set_cache_attr_count = 0;
725
726 kern_return_t
727 vm_map_set_cache_attr(
728 vm_map_t map,
729 vm_map_offset_t va)
730 {
731 vm_map_entry_t map_entry;
732 vm_object_t object;
733 kern_return_t kr = KERN_SUCCESS;
734
735 vm_map_lock_read(map);
736
737 if (!vm_map_lookup_entry(map, va, &map_entry) ||
738 map_entry->is_sub_map) {
739 /*
740 * that memory is not properly mapped
741 */
742 kr = KERN_INVALID_ARGUMENT;
743 goto done;
744 }
745 object = VME_OBJECT(map_entry);
746
747 if (object == VM_OBJECT_NULL) {
748 /*
749 * there should be a VM object here at this point
750 */
751 kr = KERN_INVALID_ARGUMENT;
752 goto done;
753 }
754 vm_object_lock(object);
755 object->set_cache_attr = TRUE;
756 vm_object_unlock(object);
757
758 vm_map_set_cache_attr_count++;
759 done:
760 vm_map_unlock_read(map);
761
762 return kr;
763 }
764
765
766 #if CONFIG_CODE_DECRYPTION
767 /*
768 * vm_map_apple_protected:
769 * This remaps the requested part of the object with an object backed by
770 * the decrypting pager.
771 * crypt_info contains entry points and session data for the crypt module.
772 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
773 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
774 */
775 kern_return_t
776 vm_map_apple_protected(
777 vm_map_t map,
778 vm_map_offset_t start,
779 vm_map_offset_t end,
780 vm_object_offset_t crypto_backing_offset,
781 struct pager_crypt_info *crypt_info,
782 uint32_t cryptid)
783 {
784 boolean_t map_locked;
785 kern_return_t kr;
786 vm_map_entry_t map_entry;
787 struct vm_map_entry tmp_entry;
788 memory_object_t unprotected_mem_obj;
789 vm_object_t protected_object;
790 vm_map_offset_t map_addr;
791 vm_map_offset_t start_aligned, end_aligned;
792 vm_object_offset_t crypto_start, crypto_end;
793 int vm_flags;
794 vm_map_kernel_flags_t vmk_flags;
795
796 vm_flags = 0;
797 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
798
799 map_locked = FALSE;
800 unprotected_mem_obj = MEMORY_OBJECT_NULL;
801
802 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
803 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
804 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
805 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
806
807 #if __arm64__
808 /*
809 * "start" and "end" might be 4K-aligned but not 16K-aligned,
810 * so we might have to loop and establish up to 3 mappings:
811 *
812 * + the first 16K-page, which might overlap with the previous
813 * 4K-aligned mapping,
814 * + the center,
815 * + the last 16K-page, which might overlap with the next
816 * 4K-aligned mapping.
817 * Each of these mapping might be backed by a vnode pager (if
818 * properly page-aligned) or a "fourk_pager", itself backed by a
819 * vnode pager (if 4K-aligned but not page-aligned).
820 */
821 #endif /* __arm64__ */
822
823 map_addr = start_aligned;
824 for (map_addr = start_aligned;
825 map_addr < end;
826 map_addr = tmp_entry.vme_end) {
827 vm_map_lock(map);
828 map_locked = TRUE;
829
830 /* lookup the protected VM object */
831 if (!vm_map_lookup_entry(map,
832 map_addr,
833 &map_entry) ||
834 map_entry->is_sub_map ||
835 VME_OBJECT(map_entry) == VM_OBJECT_NULL) {
836 /* that memory is not properly mapped */
837 kr = KERN_INVALID_ARGUMENT;
838 goto done;
839 }
840
841 /* ensure mapped memory is mapped as executable except
842 * except for model decryption flow */
843 if ((cryptid != CRYPTID_MODEL_ENCRYPTION) &&
844 !(map_entry->protection & VM_PROT_EXECUTE)) {
845 kr = KERN_INVALID_ARGUMENT;
846 goto done;
847 }
848
849 /* get the protected object to be decrypted */
850 protected_object = VME_OBJECT(map_entry);
851 if (protected_object == VM_OBJECT_NULL) {
852 /* there should be a VM object here at this point */
853 kr = KERN_INVALID_ARGUMENT;
854 goto done;
855 }
856 /* ensure protected object stays alive while map is unlocked */
857 vm_object_reference(protected_object);
858
859 /* limit the map entry to the area we want to cover */
860 vm_map_clip_start(map, map_entry, start_aligned);
861 vm_map_clip_end(map, map_entry, end_aligned);
862
863 tmp_entry = *map_entry;
864 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
865 vm_map_unlock(map);
866 map_locked = FALSE;
867
868 /*
869 * This map entry might be only partially encrypted
870 * (if not fully "page-aligned").
871 */
872 crypto_start = 0;
873 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
874 if (tmp_entry.vme_start < start) {
875 if (tmp_entry.vme_start != start_aligned) {
876 kr = KERN_INVALID_ADDRESS;
877 }
878 crypto_start += (start - tmp_entry.vme_start);
879 }
880 if (tmp_entry.vme_end > end) {
881 if (tmp_entry.vme_end != end_aligned) {
882 kr = KERN_INVALID_ADDRESS;
883 }
884 crypto_end -= (tmp_entry.vme_end - end);
885 }
886
887 /*
888 * This "extra backing offset" is needed to get the decryption
889 * routine to use the right key. It adjusts for the possibly
890 * relative offset of an interposed "4K" pager...
891 */
892 if (crypto_backing_offset == (vm_object_offset_t) -1) {
893 crypto_backing_offset = VME_OFFSET(&tmp_entry);
894 }
895
896 /*
897 * Lookup (and create if necessary) the protected memory object
898 * matching that VM object.
899 * If successful, this also grabs a reference on the memory object,
900 * to guarantee that it doesn't go away before we get a chance to map
901 * it.
902 */
903 unprotected_mem_obj = apple_protect_pager_setup(
904 protected_object,
905 VME_OFFSET(&tmp_entry),
906 crypto_backing_offset,
907 crypt_info,
908 crypto_start,
909 crypto_end);
910
911 /* release extra ref on protected object */
912 vm_object_deallocate(protected_object);
913
914 if (unprotected_mem_obj == NULL) {
915 kr = KERN_FAILURE;
916 goto done;
917 }
918
919 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
920 /* can overwrite an immutable mapping */
921 vmk_flags.vmkf_overwrite_immutable = TRUE;
922 #if __arm64__
923 if (tmp_entry.used_for_jit &&
924 (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
925 PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
926 fourk_binary_compatibility_unsafe &&
927 fourk_binary_compatibility_allow_wx) {
928 printf("** FOURK_COMPAT [%d]: "
929 "allowing write+execute at 0x%llx\n",
930 proc_selfpid(), tmp_entry.vme_start);
931 vmk_flags.vmkf_map_jit = TRUE;
932 }
933 #endif /* __arm64__ */
934
935 /* map this memory object in place of the current one */
936 map_addr = tmp_entry.vme_start;
937 kr = vm_map_enter_mem_object(map,
938 &map_addr,
939 (tmp_entry.vme_end -
940 tmp_entry.vme_start),
941 (mach_vm_offset_t) 0,
942 vm_flags,
943 vmk_flags,
944 VM_KERN_MEMORY_NONE,
945 (ipc_port_t)(uintptr_t) unprotected_mem_obj,
946 0,
947 TRUE,
948 tmp_entry.protection,
949 tmp_entry.max_protection,
950 tmp_entry.inheritance);
951 assertf(kr == KERN_SUCCESS,
952 "kr = 0x%x\n", kr);
953 assertf(map_addr == tmp_entry.vme_start,
954 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
955 (uint64_t)map_addr,
956 (uint64_t) tmp_entry.vme_start,
957 &tmp_entry);
958
959 #if VM_MAP_DEBUG_APPLE_PROTECT
960 if (vm_map_debug_apple_protect) {
961 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
962 " backing:[object:%p,offset:0x%llx,"
963 "crypto_backing_offset:0x%llx,"
964 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
965 map,
966 (uint64_t) map_addr,
967 (uint64_t) (map_addr + (tmp_entry.vme_end -
968 tmp_entry.vme_start)),
969 unprotected_mem_obj,
970 protected_object,
971 VME_OFFSET(&tmp_entry),
972 crypto_backing_offset,
973 crypto_start,
974 crypto_end);
975 }
976 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
977
978 /*
979 * Release the reference obtained by
980 * apple_protect_pager_setup().
981 * The mapping (if it succeeded) is now holding a reference on
982 * the memory object.
983 */
984 memory_object_deallocate(unprotected_mem_obj);
985 unprotected_mem_obj = MEMORY_OBJECT_NULL;
986
987 /* continue with next map entry */
988 crypto_backing_offset += (tmp_entry.vme_end -
989 tmp_entry.vme_start);
990 crypto_backing_offset -= crypto_start;
991 }
992 kr = KERN_SUCCESS;
993
994 done:
995 if (map_locked) {
996 vm_map_unlock(map);
997 }
998 return kr;
999 }
1000 #endif /* CONFIG_CODE_DECRYPTION */
1001
1002
1003 LCK_GRP_DECLARE(vm_map_lck_grp, "vm_map");
1004 LCK_ATTR_DECLARE(vm_map_lck_attr, 0, 0);
1005 LCK_ATTR_DECLARE(vm_map_lck_rw_attr, 0, LCK_ATTR_DEBUG);
1006
1007 #if XNU_TARGET_OS_OSX
1008 int malloc_no_cow = 0;
1009 #else /* XNU_TARGET_OS_OSX */
1010 int malloc_no_cow = 1;
1011 #endif /* XNU_TARGET_OS_OSX */
1012 uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
1013 #if DEBUG
1014 int vm_check_map_sanity = 0;
1015 #endif
1016
1017 /*
1018 * vm_map_init:
1019 *
1020 * Initialize the vm_map module. Must be called before
1021 * any other vm_map routines.
1022 *
1023 * Map and entry structures are allocated from zones -- we must
1024 * initialize those zones.
1025 *
1026 * There are three zones of interest:
1027 *
1028 * vm_map_zone: used to allocate maps.
1029 * vm_map_entry_zone: used to allocate map entries.
1030 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
1031 *
1032 * The kernel allocates map entries from a special zone that is initially
1033 * "crammed" with memory. It would be difficult (perhaps impossible) for
1034 * the kernel to allocate more memory to a entry zone when it became
1035 * empty since the very act of allocating memory implies the creation
1036 * of a new entry.
1037 */
1038 __startup_func
1039 void
1040 vm_map_init(void)
1041 {
1042 const char *mez_name = "VM map entries";
1043
1044
1045 #if MACH_ASSERT
1046 PE_parse_boot_argn("debug4k_filter", &debug4k_filter,
1047 sizeof(debug4k_filter));
1048 #endif /* MACH_ASSERT */
1049
1050 vm_map_zone = zone_create(VM_MAP_ZONE_NAME, sizeof(struct _vm_map),
1051 VM_MAP_ZFLAGS);
1052
1053 vm_map_entry_zone = zone_create(mez_name, sizeof(struct vm_map_entry),
1054 ZC_NOENCRYPT | ZC_NOGZALLOC | ZC_NOCALLOUT);
1055
1056 /*
1057 * Don't quarantine because we always need elements available
1058 * Disallow GC on this zone... to aid the GC.
1059 */
1060 vm_map_entry_reserved_zone = zone_create_ext(VME_RESERVED_ZONE_NAME,
1061 sizeof(struct vm_map_entry), VM_MAP_RESERVED_ZFLAGS,
1062 ZONE_ID_ANY, ^(zone_t z) {
1063 zone_set_noexpand(z, 64 * kentry_data_size);
1064 });
1065
1066 vm_map_copy_zone = zone_create_ext("VM map copies", sizeof(struct vm_map_copy),
1067 ZC_NOENCRYPT | ZC_CACHING, ZONE_ID_VM_MAP_COPY, NULL);
1068
1069 vm_map_holes_zone = zone_create(VM_MAP_HOLES_ZONE_NAME,
1070 sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS);
1071
1072 /*
1073 * Add the stolen memory to zones, adjust zone size and stolen counts.
1074 */
1075 zcram(vm_map_zone, map_data, map_data_size);
1076 zcram(vm_map_entry_reserved_zone, kentry_data, kentry_data_size);
1077 zcram(vm_map_holes_zone, map_holes_data, map_holes_data_size);
1078
1079 /*
1080 * Since these are covered by zones, remove them from stolen page accounting.
1081 */
1082 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
1083
1084 #if VM_MAP_DEBUG_APPLE_PROTECT
1085 PE_parse_boot_argn("vm_map_debug_apple_protect",
1086 &vm_map_debug_apple_protect,
1087 sizeof(vm_map_debug_apple_protect));
1088 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1089 #if VM_MAP_DEBUG_APPLE_FOURK
1090 PE_parse_boot_argn("vm_map_debug_fourk",
1091 &vm_map_debug_fourk,
1092 sizeof(vm_map_debug_fourk));
1093 #endif /* VM_MAP_DEBUG_FOURK */
1094 PE_parse_boot_argn("vm_map_executable_immutable",
1095 &vm_map_executable_immutable,
1096 sizeof(vm_map_executable_immutable));
1097 PE_parse_boot_argn("vm_map_executable_immutable_verbose",
1098 &vm_map_executable_immutable_verbose,
1099 sizeof(vm_map_executable_immutable_verbose));
1100
1101 PE_parse_boot_argn("malloc_no_cow",
1102 &malloc_no_cow,
1103 sizeof(malloc_no_cow));
1104 if (malloc_no_cow) {
1105 vm_memory_malloc_no_cow_mask = 0ULL;
1106 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1107 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
1108 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
1109 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1110 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1111 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1112 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1113 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1114 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1115 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1116 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1117 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1118 &vm_memory_malloc_no_cow_mask,
1119 sizeof(vm_memory_malloc_no_cow_mask));
1120 }
1121
1122 #if DEBUG
1123 PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity, sizeof(vm_check_map_sanity));
1124 if (vm_check_map_sanity) {
1125 kprintf("VM sanity checking enabled\n");
1126 } else {
1127 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1128 }
1129 #endif /* DEBUG */
1130
1131 #if DEVELOPMENT || DEBUG
1132 PE_parse_boot_argn("panic_on_unsigned_execute",
1133 &panic_on_unsigned_execute,
1134 sizeof(panic_on_unsigned_execute));
1135 #endif /* DEVELOPMENT || DEBUG */
1136 }
1137
1138 __startup_func
1139 static void
1140 vm_map_steal_memory(void)
1141 {
1142 uint16_t kentry_initial_pages;
1143
1144 map_data_size = zone_get_foreign_alloc_size(VM_MAP_ZONE_NAME,
1145 sizeof(struct _vm_map), VM_MAP_ZFLAGS, 1);
1146
1147 /*
1148 * kentry_initial_pages corresponds to the number of kernel map entries
1149 * required during bootstrap until the asynchronous replenishment
1150 * scheme is activated and/or entries are available from the general
1151 * map entry pool.
1152 */
1153 #if defined(__LP64__)
1154 kentry_initial_pages = 10;
1155 #else
1156 kentry_initial_pages = 6;
1157 #endif
1158
1159 #if CONFIG_GZALLOC
1160 /* If using the guard allocator, reserve more memory for the kernel
1161 * reserved map entry pool.
1162 */
1163 if (gzalloc_enabled()) {
1164 kentry_initial_pages *= 1024;
1165 }
1166 #endif
1167
1168 kentry_data_size = zone_get_foreign_alloc_size(VME_RESERVED_ZONE_NAME,
1169 sizeof(struct vm_map_entry), VM_MAP_RESERVED_ZFLAGS,
1170 kentry_initial_pages);
1171
1172 map_holes_data_size = zone_get_foreign_alloc_size(VM_MAP_HOLES_ZONE_NAME,
1173 sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS,
1174 kentry_initial_pages);
1175
1176 /*
1177 * Steal a contiguous range of memory so that a simple range check
1178 * can validate foreign addresses being freed/crammed to these
1179 * zones
1180 */
1181 vm_size_t total_size;
1182 if (os_add3_overflow(map_data_size, kentry_data_size,
1183 map_holes_data_size, &total_size)) {
1184 panic("vm_map_steal_memory: overflow in amount of memory requested");
1185 }
1186 map_data = zone_foreign_mem_init(total_size);
1187 kentry_data = map_data + map_data_size;
1188 map_holes_data = kentry_data + kentry_data_size;
1189 }
1190 STARTUP(PMAP_STEAL, STARTUP_RANK_FIRST, vm_map_steal_memory);
1191
1192 boolean_t vm_map_supports_hole_optimization = FALSE;
1193
1194 void
1195 vm_kernel_reserved_entry_init(void)
1196 {
1197 zone_prio_refill_configure(vm_map_entry_reserved_zone);
1198
1199 /*
1200 * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1201 */
1202 zone_prio_refill_configure(vm_map_holes_zone);
1203 vm_map_supports_hole_optimization = TRUE;
1204 }
1205
1206 void
1207 vm_map_disable_hole_optimization(vm_map_t map)
1208 {
1209 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
1210
1211 if (map->holelistenabled) {
1212 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1213
1214 while (hole_entry != NULL) {
1215 next_hole_entry = hole_entry->vme_next;
1216
1217 hole_entry->vme_next = NULL;
1218 hole_entry->vme_prev = NULL;
1219 zfree(vm_map_holes_zone, hole_entry);
1220
1221 if (next_hole_entry == head_entry) {
1222 hole_entry = NULL;
1223 } else {
1224 hole_entry = next_hole_entry;
1225 }
1226 }
1227
1228 map->holes_list = NULL;
1229 map->holelistenabled = FALSE;
1230
1231 map->first_free = vm_map_first_entry(map);
1232 SAVE_HINT_HOLE_WRITE(map, NULL);
1233 }
1234 }
1235
1236 boolean_t
1237 vm_kernel_map_is_kernel(vm_map_t map)
1238 {
1239 return map->pmap == kernel_pmap;
1240 }
1241
1242 /*
1243 * vm_map_create:
1244 *
1245 * Creates and returns a new empty VM map with
1246 * the given physical map structure, and having
1247 * the given lower and upper address bounds.
1248 */
1249
1250 vm_map_t
1251 vm_map_create(
1252 pmap_t pmap,
1253 vm_map_offset_t min,
1254 vm_map_offset_t max,
1255 boolean_t pageable)
1256 {
1257 int options;
1258
1259 options = 0;
1260 if (pageable) {
1261 options |= VM_MAP_CREATE_PAGEABLE;
1262 }
1263 return vm_map_create_options(pmap, min, max, options);
1264 }
1265
1266 vm_map_t
1267 vm_map_create_options(
1268 pmap_t pmap,
1269 vm_map_offset_t min,
1270 vm_map_offset_t max,
1271 int options)
1272 {
1273 vm_map_t result;
1274 struct vm_map_links *hole_entry = NULL;
1275
1276 if (options & ~(VM_MAP_CREATE_ALL_OPTIONS)) {
1277 /* unknown option */
1278 return VM_MAP_NULL;
1279 }
1280
1281 result = (vm_map_t) zalloc(vm_map_zone);
1282 if (result == VM_MAP_NULL) {
1283 panic("vm_map_create");
1284 }
1285
1286 vm_map_first_entry(result) = vm_map_to_entry(result);
1287 vm_map_last_entry(result) = vm_map_to_entry(result);
1288 result->hdr.nentries = 0;
1289 if (options & VM_MAP_CREATE_PAGEABLE) {
1290 result->hdr.entries_pageable = TRUE;
1291 } else {
1292 result->hdr.entries_pageable = FALSE;
1293 }
1294
1295 vm_map_store_init( &(result->hdr));
1296
1297 result->hdr.page_shift = PAGE_SHIFT;
1298
1299 result->size = 0;
1300 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
1301 result->user_wire_size = 0;
1302 #if XNU_TARGET_OS_OSX
1303 result->vmmap_high_start = 0;
1304 #endif
1305 os_ref_init_count(&result->map_refcnt, &map_refgrp, 1);
1306 #if TASK_SWAPPER
1307 result->res_count = 1;
1308 result->sw_state = MAP_SW_IN;
1309 #endif /* TASK_SWAPPER */
1310 result->pmap = pmap;
1311 result->min_offset = min;
1312 result->max_offset = max;
1313 result->wiring_required = FALSE;
1314 result->no_zero_fill = FALSE;
1315 result->mapped_in_other_pmaps = FALSE;
1316 result->wait_for_space = FALSE;
1317 result->switch_protect = FALSE;
1318 result->disable_vmentry_reuse = FALSE;
1319 result->map_disallow_data_exec = FALSE;
1320 result->is_nested_map = FALSE;
1321 result->map_disallow_new_exec = FALSE;
1322 result->terminated = FALSE;
1323 result->cs_enforcement = FALSE;
1324 result->highest_entry_end = 0;
1325 result->first_free = vm_map_to_entry(result);
1326 result->hint = vm_map_to_entry(result);
1327 result->jit_entry_exists = FALSE;
1328 result->is_alien = FALSE;
1329 result->reserved_regions = FALSE;
1330
1331 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1332 if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1333 result->has_corpse_footprint = TRUE;
1334 result->holelistenabled = FALSE;
1335 result->vmmap_corpse_footprint = NULL;
1336 } else {
1337 result->has_corpse_footprint = FALSE;
1338 if (vm_map_supports_hole_optimization) {
1339 hole_entry = zalloc(vm_map_holes_zone);
1340
1341 hole_entry->start = min;
1342 #if defined(__arm__) || defined(__arm64__)
1343 hole_entry->end = result->max_offset;
1344 #else
1345 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
1346 #endif
1347 result->holes_list = result->hole_hint = hole_entry;
1348 hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1349 result->holelistenabled = TRUE;
1350 } else {
1351 result->holelistenabled = FALSE;
1352 }
1353 }
1354
1355 vm_map_lock_init(result);
1356 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
1357
1358 return result;
1359 }
1360
1361 vm_map_size_t
1362 vm_map_adjusted_size(vm_map_t map)
1363 {
1364 struct vm_reserved_region *regions = NULL;
1365 size_t num_regions = 0;
1366 mach_vm_size_t reserved_size = 0, map_size = 0;
1367
1368 if (map == NULL || (map->size == 0)) {
1369 return 0;
1370 }
1371
1372 map_size = map->size;
1373
1374 if (map->reserved_regions == FALSE || !vm_map_is_exotic(map) || map->terminated) {
1375 /*
1376 * No special reserved regions or not an exotic map or the task
1377 * is terminating and these special regions might have already
1378 * been deallocated.
1379 */
1380 return map_size;
1381 }
1382
1383 num_regions = ml_get_vm_reserved_regions(vm_map_is_64bit(map), &regions);
1384 assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
1385
1386 while (num_regions) {
1387 reserved_size += regions[--num_regions].vmrr_size;
1388 }
1389
1390 /*
1391 * There are a few places where the map is being switched out due to
1392 * 'termination' without that bit being set (e.g. exec and corpse purging).
1393 * In those cases, we could have the map's regions being deallocated on
1394 * a core while some accounting process is trying to get the map's size.
1395 * So this assert can't be enabled till all those places are uniform in
1396 * their use of the 'map->terminated' bit.
1397 *
1398 * assert(map_size >= reserved_size);
1399 */
1400
1401 return (map_size >= reserved_size) ? (map_size - reserved_size) : map_size;
1402 }
1403
1404 /*
1405 * vm_map_entry_create: [ internal use only ]
1406 *
1407 * Allocates a VM map entry for insertion in the
1408 * given map (or map copy). No fields are filled.
1409 */
1410 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1411
1412 #define vm_map_copy_entry_create(copy, map_locked) \
1413 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1414 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1415
1416 static vm_map_entry_t
1417 _vm_map_entry_create(
1418 struct vm_map_header *map_header, boolean_t __unused map_locked)
1419 {
1420 zone_t zone;
1421 vm_map_entry_t entry;
1422
1423 zone = vm_map_entry_zone;
1424
1425 assert(map_header->entries_pageable ? !map_locked : TRUE);
1426
1427 if (map_header->entries_pageable) {
1428 entry = (vm_map_entry_t) zalloc(zone);
1429 } else {
1430 entry = (vm_map_entry_t) zalloc_noblock(zone);
1431
1432 if (entry == VM_MAP_ENTRY_NULL) {
1433 zone = vm_map_entry_reserved_zone;
1434 entry = (vm_map_entry_t) zalloc(zone);
1435 OSAddAtomic(1, &reserved_zalloc_count);
1436 } else {
1437 OSAddAtomic(1, &nonreserved_zalloc_count);
1438 }
1439 }
1440
1441 if (entry == VM_MAP_ENTRY_NULL) {
1442 panic("vm_map_entry_create");
1443 }
1444 *entry = vm_map_entry_template;
1445 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1446
1447 vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1448 #if MAP_ENTRY_CREATION_DEBUG
1449 entry->vme_creation_maphdr = map_header;
1450 backtrace(&entry->vme_creation_bt[0],
1451 (sizeof(entry->vme_creation_bt) / sizeof(uintptr_t)), NULL);
1452 #endif
1453 return entry;
1454 }
1455
1456 /*
1457 * vm_map_entry_dispose: [ internal use only ]
1458 *
1459 * Inverse of vm_map_entry_create.
1460 *
1461 * write map lock held so no need to
1462 * do anything special to insure correctness
1463 * of the stores
1464 */
1465 #define vm_map_entry_dispose(map, entry) \
1466 _vm_map_entry_dispose(&(map)->hdr, (entry))
1467
1468 #define vm_map_copy_entry_dispose(copy, entry) \
1469 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1470
1471 static void
1472 _vm_map_entry_dispose(
1473 struct vm_map_header *map_header,
1474 vm_map_entry_t entry)
1475 {
1476 zone_t zone;
1477
1478 if (map_header->entries_pageable || !(entry->from_reserved_zone)) {
1479 zone = vm_map_entry_zone;
1480 } else {
1481 zone = vm_map_entry_reserved_zone;
1482 }
1483
1484 if (!map_header->entries_pageable) {
1485 if (zone == vm_map_entry_zone) {
1486 OSAddAtomic(-1, &nonreserved_zalloc_count);
1487 } else {
1488 OSAddAtomic(-1, &reserved_zalloc_count);
1489 }
1490 }
1491
1492 zfree(zone, entry);
1493 }
1494
1495 #if MACH_ASSERT
1496 static boolean_t first_free_check = FALSE;
1497 boolean_t
1498 first_free_is_valid(
1499 vm_map_t map)
1500 {
1501 if (!first_free_check) {
1502 return TRUE;
1503 }
1504
1505 return first_free_is_valid_store( map );
1506 }
1507 #endif /* MACH_ASSERT */
1508
1509
1510 #define vm_map_copy_entry_link(copy, after_where, entry) \
1511 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1512
1513 #define vm_map_copy_entry_unlink(copy, entry) \
1514 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1515
1516 #if MACH_ASSERT && TASK_SWAPPER
1517 /*
1518 * vm_map_res_reference:
1519 *
1520 * Adds another valid residence count to the given map.
1521 *
1522 * Map is locked so this function can be called from
1523 * vm_map_swapin.
1524 *
1525 */
1526 void
1527 vm_map_res_reference(vm_map_t map)
1528 {
1529 /* assert map is locked */
1530 assert(map->res_count >= 0);
1531 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1532 if (map->res_count == 0) {
1533 lck_mtx_unlock(&map->s_lock);
1534 vm_map_lock(map);
1535 vm_map_swapin(map);
1536 lck_mtx_lock(&map->s_lock);
1537 ++map->res_count;
1538 vm_map_unlock(map);
1539 } else {
1540 ++map->res_count;
1541 }
1542 }
1543
1544 /*
1545 * vm_map_reference_swap:
1546 *
1547 * Adds valid reference and residence counts to the given map.
1548 *
1549 * The map may not be in memory (i.e. zero residence count).
1550 *
1551 */
1552 void
1553 vm_map_reference_swap(vm_map_t map)
1554 {
1555 assert(map != VM_MAP_NULL);
1556 lck_mtx_lock(&map->s_lock);
1557 assert(map->res_count >= 0);
1558 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1559 os_ref_retain_locked(&map->map_refcnt);
1560 vm_map_res_reference(map);
1561 lck_mtx_unlock(&map->s_lock);
1562 }
1563
1564 /*
1565 * vm_map_res_deallocate:
1566 *
1567 * Decrement residence count on a map; possibly causing swapout.
1568 *
1569 * The map must be in memory (i.e. non-zero residence count).
1570 *
1571 * The map is locked, so this function is callable from vm_map_deallocate.
1572 *
1573 */
1574 void
1575 vm_map_res_deallocate(vm_map_t map)
1576 {
1577 assert(map->res_count > 0);
1578 if (--map->res_count == 0) {
1579 lck_mtx_unlock(&map->s_lock);
1580 vm_map_lock(map);
1581 vm_map_swapout(map);
1582 vm_map_unlock(map);
1583 lck_mtx_lock(&map->s_lock);
1584 }
1585 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1586 }
1587 #endif /* MACH_ASSERT && TASK_SWAPPER */
1588
1589 /*
1590 * vm_map_destroy:
1591 *
1592 * Actually destroy a map.
1593 */
1594 void
1595 vm_map_destroy(
1596 vm_map_t map,
1597 int flags)
1598 {
1599 vm_map_lock(map);
1600
1601 /* final cleanup: no need to unnest shared region */
1602 flags |= VM_MAP_REMOVE_NO_UNNESTING;
1603 /* final cleanup: ok to remove immutable mappings */
1604 flags |= VM_MAP_REMOVE_IMMUTABLE;
1605 /* final cleanup: allow gaps in range */
1606 flags |= VM_MAP_REMOVE_GAPS_OK;
1607
1608 /* clean up regular map entries */
1609 (void) vm_map_delete(map, map->min_offset, map->max_offset,
1610 flags, VM_MAP_NULL);
1611 /* clean up leftover special mappings (commpage, GPU carveout, etc...) */
1612 #if !defined(__arm__)
1613 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1614 flags, VM_MAP_NULL);
1615 #endif /* !__arm__ */
1616
1617 vm_map_disable_hole_optimization(map);
1618 vm_map_corpse_footprint_destroy(map);
1619
1620 vm_map_unlock(map);
1621
1622 assert(map->hdr.nentries == 0);
1623
1624 if (map->pmap) {
1625 pmap_destroy(map->pmap);
1626 }
1627
1628 if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1629 /*
1630 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1631 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1632 * structure or kalloc'ed via lck_mtx_init.
1633 * An example is s_lock_ext within struct _vm_map.
1634 *
1635 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1636 * can add another tag to detect embedded vs alloc'ed indirect external
1637 * mutexes but that'll be additional checks in the lock path and require
1638 * updating dependencies for the old vs new tag.
1639 *
1640 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1641 * just when lock debugging is ON, we choose to forego explicitly destroying
1642 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1643 * count on vm_map_lck_grp, which has no serious side-effect.
1644 */
1645 } else {
1646 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1647 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1648 }
1649
1650 zfree(vm_map_zone, map);
1651 }
1652
1653 /*
1654 * Returns pid of the task with the largest number of VM map entries.
1655 * Used in the zone-map-exhaustion jetsam path.
1656 */
1657 pid_t
1658 find_largest_process_vm_map_entries(void)
1659 {
1660 pid_t victim_pid = -1;
1661 int max_vm_map_entries = 0;
1662 task_t task = TASK_NULL;
1663 queue_head_t *task_list = &tasks;
1664
1665 lck_mtx_lock(&tasks_threads_lock);
1666 queue_iterate(task_list, task, task_t, tasks) {
1667 if (task == kernel_task || !task->active) {
1668 continue;
1669 }
1670
1671 vm_map_t task_map = task->map;
1672 if (task_map != VM_MAP_NULL) {
1673 int task_vm_map_entries = task_map->hdr.nentries;
1674 if (task_vm_map_entries > max_vm_map_entries) {
1675 max_vm_map_entries = task_vm_map_entries;
1676 victim_pid = pid_from_task(task);
1677 }
1678 }
1679 }
1680 lck_mtx_unlock(&tasks_threads_lock);
1681
1682 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1683 return victim_pid;
1684 }
1685
1686 #if TASK_SWAPPER
1687 /*
1688 * vm_map_swapin/vm_map_swapout
1689 *
1690 * Swap a map in and out, either referencing or releasing its resources.
1691 * These functions are internal use only; however, they must be exported
1692 * because they may be called from macros, which are exported.
1693 *
1694 * In the case of swapout, there could be races on the residence count,
1695 * so if the residence count is up, we return, assuming that a
1696 * vm_map_deallocate() call in the near future will bring us back.
1697 *
1698 * Locking:
1699 * -- We use the map write lock for synchronization among races.
1700 * -- The map write lock, and not the simple s_lock, protects the
1701 * swap state of the map.
1702 * -- If a map entry is a share map, then we hold both locks, in
1703 * hierarchical order.
1704 *
1705 * Synchronization Notes:
1706 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1707 * will block on the map lock and proceed when swapout is through.
1708 * 2) A vm_map_reference() call at this time is illegal, and will
1709 * cause a panic. vm_map_reference() is only allowed on resident
1710 * maps, since it refuses to block.
1711 * 3) A vm_map_swapin() call during a swapin will block, and
1712 * proceeed when the first swapin is done, turning into a nop.
1713 * This is the reason the res_count is not incremented until
1714 * after the swapin is complete.
1715 * 4) There is a timing hole after the checks of the res_count, before
1716 * the map lock is taken, during which a swapin may get the lock
1717 * before a swapout about to happen. If this happens, the swapin
1718 * will detect the state and increment the reference count, causing
1719 * the swapout to be a nop, thereby delaying it until a later
1720 * vm_map_deallocate. If the swapout gets the lock first, then
1721 * the swapin will simply block until the swapout is done, and
1722 * then proceed.
1723 *
1724 * Because vm_map_swapin() is potentially an expensive operation, it
1725 * should be used with caution.
1726 *
1727 * Invariants:
1728 * 1) A map with a residence count of zero is either swapped, or
1729 * being swapped.
1730 * 2) A map with a non-zero residence count is either resident,
1731 * or being swapped in.
1732 */
1733
1734 int vm_map_swap_enable = 1;
1735
1736 void
1737 vm_map_swapin(vm_map_t map)
1738 {
1739 vm_map_entry_t entry;
1740
1741 if (!vm_map_swap_enable) { /* debug */
1742 return;
1743 }
1744
1745 /*
1746 * Map is locked
1747 * First deal with various races.
1748 */
1749 if (map->sw_state == MAP_SW_IN) {
1750 /*
1751 * we raced with swapout and won. Returning will incr.
1752 * the res_count, turning the swapout into a nop.
1753 */
1754 return;
1755 }
1756
1757 /*
1758 * The residence count must be zero. If we raced with another
1759 * swapin, the state would have been IN; if we raced with a
1760 * swapout (after another competing swapin), we must have lost
1761 * the race to get here (see above comment), in which case
1762 * res_count is still 0.
1763 */
1764 assert(map->res_count == 0);
1765
1766 /*
1767 * There are no intermediate states of a map going out or
1768 * coming in, since the map is locked during the transition.
1769 */
1770 assert(map->sw_state == MAP_SW_OUT);
1771
1772 /*
1773 * We now operate upon each map entry. If the entry is a sub-
1774 * or share-map, we call vm_map_res_reference upon it.
1775 * If the entry is an object, we call vm_object_res_reference
1776 * (this may iterate through the shadow chain).
1777 * Note that we hold the map locked the entire time,
1778 * even if we get back here via a recursive call in
1779 * vm_map_res_reference.
1780 */
1781 entry = vm_map_first_entry(map);
1782
1783 while (entry != vm_map_to_entry(map)) {
1784 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1785 if (entry->is_sub_map) {
1786 vm_map_t lmap = VME_SUBMAP(entry);
1787 lck_mtx_lock(&lmap->s_lock);
1788 vm_map_res_reference(lmap);
1789 lck_mtx_unlock(&lmap->s_lock);
1790 } else {
1791 vm_object_t object = VME_OBEJCT(entry);
1792 vm_object_lock(object);
1793 /*
1794 * This call may iterate through the
1795 * shadow chain.
1796 */
1797 vm_object_res_reference(object);
1798 vm_object_unlock(object);
1799 }
1800 }
1801 entry = entry->vme_next;
1802 }
1803 assert(map->sw_state == MAP_SW_OUT);
1804 map->sw_state = MAP_SW_IN;
1805 }
1806
1807 void
1808 vm_map_swapout(vm_map_t map)
1809 {
1810 vm_map_entry_t entry;
1811
1812 /*
1813 * Map is locked
1814 * First deal with various races.
1815 * If we raced with a swapin and lost, the residence count
1816 * will have been incremented to 1, and we simply return.
1817 */
1818 lck_mtx_lock(&map->s_lock);
1819 if (map->res_count != 0) {
1820 lck_mtx_unlock(&map->s_lock);
1821 return;
1822 }
1823 lck_mtx_unlock(&map->s_lock);
1824
1825 /*
1826 * There are no intermediate states of a map going out or
1827 * coming in, since the map is locked during the transition.
1828 */
1829 assert(map->sw_state == MAP_SW_IN);
1830
1831 if (!vm_map_swap_enable) {
1832 return;
1833 }
1834
1835 /*
1836 * We now operate upon each map entry. If the entry is a sub-
1837 * or share-map, we call vm_map_res_deallocate upon it.
1838 * If the entry is an object, we call vm_object_res_deallocate
1839 * (this may iterate through the shadow chain).
1840 * Note that we hold the map locked the entire time,
1841 * even if we get back here via a recursive call in
1842 * vm_map_res_deallocate.
1843 */
1844 entry = vm_map_first_entry(map);
1845
1846 while (entry != vm_map_to_entry(map)) {
1847 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1848 if (entry->is_sub_map) {
1849 vm_map_t lmap = VME_SUBMAP(entry);
1850 lck_mtx_lock(&lmap->s_lock);
1851 vm_map_res_deallocate(lmap);
1852 lck_mtx_unlock(&lmap->s_lock);
1853 } else {
1854 vm_object_t object = VME_OBJECT(entry);
1855 vm_object_lock(object);
1856 /*
1857 * This call may take a long time,
1858 * since it could actively push
1859 * out pages (if we implement it
1860 * that way).
1861 */
1862 vm_object_res_deallocate(object);
1863 vm_object_unlock(object);
1864 }
1865 }
1866 entry = entry->vme_next;
1867 }
1868 assert(map->sw_state == MAP_SW_IN);
1869 map->sw_state = MAP_SW_OUT;
1870 }
1871
1872 #endif /* TASK_SWAPPER */
1873
1874 /*
1875 * vm_map_lookup_entry: [ internal use only ]
1876 *
1877 * Calls into the vm map store layer to find the map
1878 * entry containing (or immediately preceding) the
1879 * specified address in the given map; the entry is returned
1880 * in the "entry" parameter. The boolean
1881 * result indicates whether the address is
1882 * actually contained in the map.
1883 */
1884 boolean_t
1885 vm_map_lookup_entry(
1886 vm_map_t map,
1887 vm_map_offset_t address,
1888 vm_map_entry_t *entry) /* OUT */
1889 {
1890 return vm_map_store_lookup_entry( map, address, entry );
1891 }
1892
1893 /*
1894 * Routine: vm_map_find_space
1895 * Purpose:
1896 * Allocate a range in the specified virtual address map,
1897 * returning the entry allocated for that range.
1898 * Used by kmem_alloc, etc.
1899 *
1900 * The map must be NOT be locked. It will be returned locked
1901 * on KERN_SUCCESS, unlocked on failure.
1902 *
1903 * If an entry is allocated, the object/offset fields
1904 * are initialized to zero.
1905 *
1906 * If VM_MAP_FIND_LAST_FREE flag is set, allocate from end of map. This
1907 * is currently only used for allocating memory for zones backing
1908 * one of the kalloc heaps.(rdar://65832263)
1909 */
1910 kern_return_t
1911 vm_map_find_space(
1912 vm_map_t map,
1913 vm_map_offset_t *address, /* OUT */
1914 vm_map_size_t size,
1915 vm_map_offset_t mask,
1916 int flags,
1917 vm_map_kernel_flags_t vmk_flags,
1918 vm_tag_t tag,
1919 vm_map_entry_t *o_entry) /* OUT */
1920 {
1921 vm_map_entry_t entry, new_entry, hole_entry;
1922 vm_map_offset_t start;
1923 vm_map_offset_t end;
1924
1925 if (size == 0) {
1926 *address = 0;
1927 return KERN_INVALID_ARGUMENT;
1928 }
1929
1930 new_entry = vm_map_entry_create(map, FALSE);
1931 vm_map_lock(map);
1932
1933 if (flags & VM_MAP_FIND_LAST_FREE) {
1934 assert(!map->disable_vmentry_reuse);
1935 /* TODO: Make backward lookup generic and support guard pages */
1936 assert(!vmk_flags.vmkf_guard_after && !vmk_flags.vmkf_guard_before);
1937 assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
1938
1939 /* Allocate space from end of map */
1940 vm_map_store_find_last_free(map, &entry);
1941
1942 if (!entry) {
1943 goto noSpace;
1944 }
1945
1946 if (entry == vm_map_to_entry(map)) {
1947 end = map->max_offset;
1948 } else {
1949 end = entry->vme_start;
1950 }
1951
1952 while (TRUE) {
1953 vm_map_entry_t prev;
1954
1955 start = end - size;
1956
1957 if ((start < map->min_offset) || end < start) {
1958 goto noSpace;
1959 }
1960
1961 prev = entry->vme_prev;
1962 entry = prev;
1963
1964 if (prev == vm_map_to_entry(map)) {
1965 break;
1966 }
1967
1968 if (prev->vme_end <= start) {
1969 break;
1970 }
1971
1972 /*
1973 * Didn't fit -- move to the next entry.
1974 */
1975
1976 end = entry->vme_start;
1977 }
1978 } else {
1979 if (vmk_flags.vmkf_guard_after) {
1980 /* account for the back guard page in the size */
1981 size += VM_MAP_PAGE_SIZE(map);
1982 }
1983
1984 /*
1985 * Look for the first possible address; if there's already
1986 * something at this address, we have to start after it.
1987 */
1988
1989 if (map->disable_vmentry_reuse == TRUE) {
1990 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1991 } else {
1992 if (map->holelistenabled) {
1993 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1994
1995 if (hole_entry == NULL) {
1996 /*
1997 * No more space in the map?
1998 */
1999 goto noSpace;
2000 }
2001
2002 entry = hole_entry;
2003 start = entry->vme_start;
2004 } else {
2005 assert(first_free_is_valid(map));
2006 if ((entry = map->first_free) == vm_map_to_entry(map)) {
2007 start = map->min_offset;
2008 } else {
2009 start = entry->vme_end;
2010 }
2011 }
2012 }
2013
2014 /*
2015 * In any case, the "entry" always precedes
2016 * the proposed new region throughout the loop:
2017 */
2018
2019 while (TRUE) {
2020 vm_map_entry_t next;
2021
2022 /*
2023 * Find the end of the proposed new region.
2024 * Be sure we didn't go beyond the end, or
2025 * wrap around the address.
2026 */
2027
2028 if (vmk_flags.vmkf_guard_before) {
2029 /* reserve space for the front guard page */
2030 start += VM_MAP_PAGE_SIZE(map);
2031 }
2032 end = ((start + mask) & ~mask);
2033
2034 if (end < start) {
2035 goto noSpace;
2036 }
2037 start = end;
2038 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
2039 end += size;
2040 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
2041
2042 if ((end > map->max_offset) || (end < start)) {
2043 goto noSpace;
2044 }
2045
2046 next = entry->vme_next;
2047
2048 if (map->holelistenabled) {
2049 if (entry->vme_end >= end) {
2050 break;
2051 }
2052 } else {
2053 /*
2054 * If there are no more entries, we must win.
2055 *
2056 * OR
2057 *
2058 * If there is another entry, it must be
2059 * after the end of the potential new region.
2060 */
2061
2062 if (next == vm_map_to_entry(map)) {
2063 break;
2064 }
2065
2066 if (next->vme_start >= end) {
2067 break;
2068 }
2069 }
2070
2071 /*
2072 * Didn't fit -- move to the next entry.
2073 */
2074
2075 entry = next;
2076
2077 if (map->holelistenabled) {
2078 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2079 /*
2080 * Wrapped around
2081 */
2082 goto noSpace;
2083 }
2084 start = entry->vme_start;
2085 } else {
2086 start = entry->vme_end;
2087 }
2088 }
2089
2090 if (vmk_flags.vmkf_guard_before) {
2091 /* go back for the front guard page */
2092 start -= VM_MAP_PAGE_SIZE(map);
2093 }
2094 }
2095
2096 if (map->holelistenabled) {
2097 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2098 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2099 }
2100 }
2101
2102 /*
2103 * At this point,
2104 * "start" and "end" should define the endpoints of the
2105 * available new range, and
2106 * "entry" should refer to the region before the new
2107 * range, and
2108 *
2109 * the map should be locked.
2110 */
2111
2112 *address = start;
2113
2114 assert(start < end);
2115 new_entry->vme_start = start;
2116 new_entry->vme_end = end;
2117 assert(page_aligned(new_entry->vme_start));
2118 assert(page_aligned(new_entry->vme_end));
2119 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
2120 VM_MAP_PAGE_MASK(map)));
2121 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
2122 VM_MAP_PAGE_MASK(map)));
2123
2124 new_entry->is_shared = FALSE;
2125 new_entry->is_sub_map = FALSE;
2126 new_entry->use_pmap = TRUE;
2127 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
2128 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
2129
2130 new_entry->needs_copy = FALSE;
2131
2132 new_entry->inheritance = VM_INHERIT_DEFAULT;
2133 new_entry->protection = VM_PROT_DEFAULT;
2134 new_entry->max_protection = VM_PROT_ALL;
2135 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
2136 new_entry->wired_count = 0;
2137 new_entry->user_wired_count = 0;
2138
2139 new_entry->in_transition = FALSE;
2140 new_entry->needs_wakeup = FALSE;
2141 new_entry->no_cache = FALSE;
2142 new_entry->permanent = FALSE;
2143 new_entry->superpage_size = FALSE;
2144 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
2145 new_entry->map_aligned = TRUE;
2146 } else {
2147 new_entry->map_aligned = FALSE;
2148 }
2149
2150 new_entry->used_for_jit = FALSE;
2151 new_entry->pmap_cs_associated = FALSE;
2152 new_entry->zero_wired_pages = FALSE;
2153 new_entry->iokit_acct = FALSE;
2154 new_entry->vme_resilient_codesign = FALSE;
2155 new_entry->vme_resilient_media = FALSE;
2156 if (vmk_flags.vmkf_atomic_entry) {
2157 new_entry->vme_atomic = TRUE;
2158 } else {
2159 new_entry->vme_atomic = FALSE;
2160 }
2161
2162 VME_ALIAS_SET(new_entry, tag);
2163
2164 /*
2165 * Insert the new entry into the list
2166 */
2167
2168 vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
2169
2170 map->size += size;
2171
2172 /*
2173 * Update the lookup hint
2174 */
2175 SAVE_HINT_MAP_WRITE(map, new_entry);
2176
2177 *o_entry = new_entry;
2178 return KERN_SUCCESS;
2179
2180 noSpace:
2181
2182 vm_map_entry_dispose(map, new_entry);
2183 vm_map_unlock(map);
2184 return KERN_NO_SPACE;
2185 }
2186
2187 int vm_map_pmap_enter_print = FALSE;
2188 int vm_map_pmap_enter_enable = FALSE;
2189
2190 /*
2191 * Routine: vm_map_pmap_enter [internal only]
2192 *
2193 * Description:
2194 * Force pages from the specified object to be entered into
2195 * the pmap at the specified address if they are present.
2196 * As soon as a page not found in the object the scan ends.
2197 *
2198 * Returns:
2199 * Nothing.
2200 *
2201 * In/out conditions:
2202 * The source map should not be locked on entry.
2203 */
2204 __unused static void
2205 vm_map_pmap_enter(
2206 vm_map_t map,
2207 vm_map_offset_t addr,
2208 vm_map_offset_t end_addr,
2209 vm_object_t object,
2210 vm_object_offset_t offset,
2211 vm_prot_t protection)
2212 {
2213 int type_of_fault;
2214 kern_return_t kr;
2215 struct vm_object_fault_info fault_info = {};
2216
2217 if (map->pmap == 0) {
2218 return;
2219 }
2220
2221 assert(VM_MAP_PAGE_SHIFT(map) == PAGE_SHIFT);
2222
2223 while (addr < end_addr) {
2224 vm_page_t m;
2225
2226
2227 /*
2228 * TODO:
2229 * From vm_map_enter(), we come into this function without the map
2230 * lock held or the object lock held.
2231 * We haven't taken a reference on the object either.
2232 * We should do a proper lookup on the map to make sure
2233 * that things are sane before we go locking objects that
2234 * could have been deallocated from under us.
2235 */
2236
2237 vm_object_lock(object);
2238
2239 m = vm_page_lookup(object, offset);
2240
2241 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
2242 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_absent))) {
2243 vm_object_unlock(object);
2244 return;
2245 }
2246
2247 if (vm_map_pmap_enter_print) {
2248 printf("vm_map_pmap_enter:");
2249 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2250 map, (unsigned long long)addr, object, (unsigned long long)offset);
2251 }
2252 type_of_fault = DBG_CACHE_HIT_FAULT;
2253 kr = vm_fault_enter(m, map->pmap,
2254 addr,
2255 PAGE_SIZE, 0,
2256 protection, protection,
2257 VM_PAGE_WIRED(m),
2258 FALSE, /* change_wiring */
2259 VM_KERN_MEMORY_NONE, /* tag - not wiring */
2260 &fault_info,
2261 NULL, /* need_retry */
2262 &type_of_fault);
2263
2264 vm_object_unlock(object);
2265
2266 offset += PAGE_SIZE_64;
2267 addr += PAGE_SIZE;
2268 }
2269 }
2270
2271 boolean_t vm_map_pmap_is_empty(
2272 vm_map_t map,
2273 vm_map_offset_t start,
2274 vm_map_offset_t end);
2275 boolean_t
2276 vm_map_pmap_is_empty(
2277 vm_map_t map,
2278 vm_map_offset_t start,
2279 vm_map_offset_t end)
2280 {
2281 #ifdef MACHINE_PMAP_IS_EMPTY
2282 return pmap_is_empty(map->pmap, start, end);
2283 #else /* MACHINE_PMAP_IS_EMPTY */
2284 vm_map_offset_t offset;
2285 ppnum_t phys_page;
2286
2287 if (map->pmap == NULL) {
2288 return TRUE;
2289 }
2290
2291 for (offset = start;
2292 offset < end;
2293 offset += PAGE_SIZE) {
2294 phys_page = pmap_find_phys(map->pmap, offset);
2295 if (phys_page) {
2296 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
2297 "page %d at 0x%llx\n",
2298 map, (long long)start, (long long)end,
2299 phys_page, (long long)offset);
2300 return FALSE;
2301 }
2302 }
2303 return TRUE;
2304 #endif /* MACHINE_PMAP_IS_EMPTY */
2305 }
2306
2307 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2308 kern_return_t
2309 vm_map_random_address_for_size(
2310 vm_map_t map,
2311 vm_map_offset_t *address,
2312 vm_map_size_t size)
2313 {
2314 kern_return_t kr = KERN_SUCCESS;
2315 int tries = 0;
2316 vm_map_offset_t random_addr = 0;
2317 vm_map_offset_t hole_end;
2318
2319 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
2320 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
2321 vm_map_size_t vm_hole_size = 0;
2322 vm_map_size_t addr_space_size;
2323
2324 addr_space_size = vm_map_max(map) - vm_map_min(map);
2325
2326 assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
2327
2328 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2329 random_addr = ((vm_map_offset_t)random()) << VM_MAP_PAGE_SHIFT(map);
2330 random_addr = vm_map_trunc_page(
2331 vm_map_min(map) + (random_addr % addr_space_size),
2332 VM_MAP_PAGE_MASK(map));
2333
2334 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2335 if (prev_entry == vm_map_to_entry(map)) {
2336 next_entry = vm_map_first_entry(map);
2337 } else {
2338 next_entry = prev_entry->vme_next;
2339 }
2340 if (next_entry == vm_map_to_entry(map)) {
2341 hole_end = vm_map_max(map);
2342 } else {
2343 hole_end = next_entry->vme_start;
2344 }
2345 vm_hole_size = hole_end - random_addr;
2346 if (vm_hole_size >= size) {
2347 *address = random_addr;
2348 break;
2349 }
2350 }
2351 tries++;
2352 }
2353
2354 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2355 kr = KERN_NO_SPACE;
2356 }
2357 return kr;
2358 }
2359
2360 static boolean_t
2361 vm_memory_malloc_no_cow(
2362 int alias)
2363 {
2364 uint64_t alias_mask;
2365
2366 if (alias > 63) {
2367 return FALSE;
2368 }
2369
2370 alias_mask = 1ULL << alias;
2371 if (alias_mask & vm_memory_malloc_no_cow_mask) {
2372 return TRUE;
2373 }
2374 return FALSE;
2375 }
2376
2377 /*
2378 * Routine: vm_map_enter
2379 *
2380 * Description:
2381 * Allocate a range in the specified virtual address map.
2382 * The resulting range will refer to memory defined by
2383 * the given memory object and offset into that object.
2384 *
2385 * Arguments are as defined in the vm_map call.
2386 */
2387 static unsigned int vm_map_enter_restore_successes = 0;
2388 static unsigned int vm_map_enter_restore_failures = 0;
2389 kern_return_t
2390 vm_map_enter(
2391 vm_map_t map,
2392 vm_map_offset_t *address, /* IN/OUT */
2393 vm_map_size_t size,
2394 vm_map_offset_t mask,
2395 int flags,
2396 vm_map_kernel_flags_t vmk_flags,
2397 vm_tag_t alias,
2398 vm_object_t object,
2399 vm_object_offset_t offset,
2400 boolean_t needs_copy,
2401 vm_prot_t cur_protection,
2402 vm_prot_t max_protection,
2403 vm_inherit_t inheritance)
2404 {
2405 vm_map_entry_t entry, new_entry;
2406 vm_map_offset_t start, tmp_start, tmp_offset;
2407 vm_map_offset_t end, tmp_end;
2408 vm_map_offset_t tmp2_start, tmp2_end;
2409 vm_map_offset_t desired_empty_end;
2410 vm_map_offset_t step;
2411 kern_return_t result = KERN_SUCCESS;
2412 vm_map_t zap_old_map = VM_MAP_NULL;
2413 vm_map_t zap_new_map = VM_MAP_NULL;
2414 boolean_t map_locked = FALSE;
2415 boolean_t pmap_empty = TRUE;
2416 boolean_t new_mapping_established = FALSE;
2417 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2418 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2419 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2420 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2421 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2422 boolean_t is_submap = vmk_flags.vmkf_submap;
2423 boolean_t permanent = vmk_flags.vmkf_permanent;
2424 boolean_t no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
2425 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
2426 boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
2427 boolean_t translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
2428 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2429 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2430 boolean_t random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2431 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2432 vm_tag_t user_alias;
2433 vm_map_offset_t effective_min_offset, effective_max_offset;
2434 kern_return_t kr;
2435 boolean_t clear_map_aligned = FALSE;
2436 vm_map_entry_t hole_entry;
2437 vm_map_size_t chunk_size = 0;
2438
2439 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2440
2441 if (flags & VM_FLAGS_4GB_CHUNK) {
2442 #if defined(__LP64__)
2443 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2444 #else /* __LP64__ */
2445 chunk_size = ANON_CHUNK_SIZE;
2446 #endif /* __LP64__ */
2447 } else {
2448 chunk_size = ANON_CHUNK_SIZE;
2449 }
2450
2451 if (superpage_size) {
2452 switch (superpage_size) {
2453 /*
2454 * Note that the current implementation only supports
2455 * a single size for superpages, SUPERPAGE_SIZE, per
2456 * architecture. As soon as more sizes are supposed
2457 * to be supported, SUPERPAGE_SIZE has to be replaced
2458 * with a lookup of the size depending on superpage_size.
2459 */
2460 #ifdef __x86_64__
2461 case SUPERPAGE_SIZE_ANY:
2462 /* handle it like 2 MB and round up to page size */
2463 size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2464 OS_FALLTHROUGH;
2465 case SUPERPAGE_SIZE_2MB:
2466 break;
2467 #endif
2468 default:
2469 return KERN_INVALID_ARGUMENT;
2470 }
2471 mask = SUPERPAGE_SIZE - 1;
2472 if (size & (SUPERPAGE_SIZE - 1)) {
2473 return KERN_INVALID_ARGUMENT;
2474 }
2475 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
2476 }
2477
2478
2479 if ((cur_protection & VM_PROT_WRITE) &&
2480 (cur_protection & VM_PROT_EXECUTE) &&
2481 #if XNU_TARGET_OS_OSX
2482 map->pmap != kernel_pmap &&
2483 (cs_process_global_enforcement() ||
2484 (vmk_flags.vmkf_cs_enforcement_override
2485 ? vmk_flags.vmkf_cs_enforcement
2486 : (vm_map_cs_enforcement(map)
2487 #if __arm64__
2488 || !VM_MAP_IS_EXOTIC(map)
2489 #endif /* __arm64__ */
2490 ))) &&
2491 #endif /* XNU_TARGET_OS_OSX */
2492 #if PMAP_CS
2493 !pmap_cs_exempt(map->pmap) &&
2494 #endif
2495 (VM_MAP_POLICY_WX_FAIL(map) ||
2496 VM_MAP_POLICY_WX_STRIP_X(map)) &&
2497 !entry_for_jit) {
2498 boolean_t vm_protect_wx_fail = VM_MAP_POLICY_WX_FAIL(map);
2499
2500 DTRACE_VM3(cs_wx,
2501 uint64_t, 0,
2502 uint64_t, 0,
2503 vm_prot_t, cur_protection);
2504 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. %s\n",
2505 proc_selfpid(),
2506 (current_task()->bsd_info
2507 ? proc_name_address(current_task()->bsd_info)
2508 : "?"),
2509 __FUNCTION__,
2510 (vm_protect_wx_fail ? "failing" : "turning off execute"));
2511 cur_protection &= ~VM_PROT_EXECUTE;
2512 if (vm_protect_wx_fail) {
2513 return KERN_PROTECTION_FAILURE;
2514 }
2515 }
2516
2517 /*
2518 * If the task has requested executable lockdown,
2519 * deny any new executable mapping.
2520 */
2521 if (map->map_disallow_new_exec == TRUE) {
2522 if (cur_protection & VM_PROT_EXECUTE) {
2523 return KERN_PROTECTION_FAILURE;
2524 }
2525 }
2526
2527 if (resilient_codesign) {
2528 assert(!is_submap);
2529 int reject_prot = (needs_copy ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
2530 if ((cur_protection | max_protection) & reject_prot) {
2531 return KERN_PROTECTION_FAILURE;
2532 }
2533 }
2534
2535 if (resilient_media) {
2536 assert(!is_submap);
2537 // assert(!needs_copy);
2538 if (object != VM_OBJECT_NULL &&
2539 !object->internal) {
2540 /*
2541 * This mapping is directly backed by an external
2542 * memory manager (e.g. a vnode pager for a file):
2543 * we would not have any safe place to inject
2544 * a zero-filled page if an actual page is not
2545 * available, without possibly impacting the actual
2546 * contents of the mapped object (e.g. the file),
2547 * so we can't provide any media resiliency here.
2548 */
2549 return KERN_INVALID_ARGUMENT;
2550 }
2551 }
2552
2553 if (is_submap) {
2554 if (purgable) {
2555 /* submaps can not be purgeable */
2556 return KERN_INVALID_ARGUMENT;
2557 }
2558 if (object == VM_OBJECT_NULL) {
2559 /* submaps can not be created lazily */
2560 return KERN_INVALID_ARGUMENT;
2561 }
2562 }
2563 if (vmk_flags.vmkf_already) {
2564 /*
2565 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2566 * is already present. For it to be meaningul, the requested
2567 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2568 * we shouldn't try and remove what was mapped there first
2569 * (!VM_FLAGS_OVERWRITE).
2570 */
2571 if ((flags & VM_FLAGS_ANYWHERE) ||
2572 (flags & VM_FLAGS_OVERWRITE)) {
2573 return KERN_INVALID_ARGUMENT;
2574 }
2575 }
2576
2577 effective_min_offset = map->min_offset;
2578
2579 if (vmk_flags.vmkf_beyond_max) {
2580 /*
2581 * Allow an insertion beyond the map's max offset.
2582 */
2583 #if !defined(__arm__)
2584 if (vm_map_is_64bit(map)) {
2585 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2586 } else
2587 #endif /* __arm__ */
2588 effective_max_offset = 0x00000000FFFFF000ULL;
2589 } else {
2590 #if XNU_TARGET_OS_OSX
2591 if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2592 effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2593 } else {
2594 effective_max_offset = map->max_offset;
2595 }
2596 #else /* XNU_TARGET_OS_OSX */
2597 effective_max_offset = map->max_offset;
2598 #endif /* XNU_TARGET_OS_OSX */
2599 }
2600
2601 if (size == 0 ||
2602 (offset & MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK_64)) != 0) {
2603 *address = 0;
2604 return KERN_INVALID_ARGUMENT;
2605 }
2606
2607 if (map->pmap == kernel_pmap) {
2608 user_alias = VM_KERN_MEMORY_NONE;
2609 } else {
2610 user_alias = alias;
2611 }
2612
2613 if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2614 chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2615 }
2616
2617 #define RETURN(value) { result = value; goto BailOut; }
2618
2619 assertf(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK), "0x%llx", (uint64_t)*address);
2620 assertf(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK), "0x%llx", (uint64_t)size);
2621 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
2622 assertf(page_aligned(*address), "0x%llx", (uint64_t)*address);
2623 assertf(page_aligned(size), "0x%llx", (uint64_t)size);
2624 }
2625
2626 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2627 !VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2628 /*
2629 * In most cases, the caller rounds the size up to the
2630 * map's page size.
2631 * If we get a size that is explicitly not map-aligned here,
2632 * we'll have to respect the caller's wish and mark the
2633 * mapping as "not map-aligned" to avoid tripping the
2634 * map alignment checks later.
2635 */
2636 clear_map_aligned = TRUE;
2637 }
2638 if (!anywhere &&
2639 VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2640 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2641 /*
2642 * We've been asked to map at a fixed address and that
2643 * address is not aligned to the map's specific alignment.
2644 * The caller should know what it's doing (i.e. most likely
2645 * mapping some fragmented copy map, transferring memory from
2646 * a VM map with a different alignment), so clear map_aligned
2647 * for this new VM map entry and proceed.
2648 */
2649 clear_map_aligned = TRUE;
2650 }
2651
2652 /*
2653 * Only zero-fill objects are allowed to be purgable.
2654 * LP64todo - limit purgable objects to 32-bits for now
2655 */
2656 if (purgable &&
2657 (offset != 0 ||
2658 (object != VM_OBJECT_NULL &&
2659 (object->vo_size != size ||
2660 object->purgable == VM_PURGABLE_DENY))
2661 || size > ANON_MAX_SIZE)) { /* LP64todo: remove when dp capable */
2662 return KERN_INVALID_ARGUMENT;
2663 }
2664
2665 if (!anywhere && overwrite) {
2666 /*
2667 * Create a temporary VM map to hold the old mappings in the
2668 * affected area while we create the new one.
2669 * This avoids releasing the VM map lock in
2670 * vm_map_entry_delete() and allows atomicity
2671 * when we want to replace some mappings with a new one.
2672 * It also allows us to restore the old VM mappings if the
2673 * new mapping fails.
2674 */
2675 zap_old_map = vm_map_create(PMAP_NULL,
2676 *address,
2677 *address + size,
2678 map->hdr.entries_pageable);
2679 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2680 vm_map_disable_hole_optimization(zap_old_map);
2681 }
2682
2683 StartAgain:;
2684
2685 start = *address;
2686
2687 if (anywhere) {
2688 vm_map_lock(map);
2689 map_locked = TRUE;
2690
2691 if (entry_for_jit) {
2692 if (map->jit_entry_exists &&
2693 !VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
2694 result = KERN_INVALID_ARGUMENT;
2695 goto BailOut;
2696 }
2697 if (VM_MAP_POLICY_ALLOW_JIT_RANDOM_ADDRESS(map)) {
2698 random_address = TRUE;
2699 }
2700 }
2701
2702 if (random_address) {
2703 /*
2704 * Get a random start address.
2705 */
2706 result = vm_map_random_address_for_size(map, address, size);
2707 if (result != KERN_SUCCESS) {
2708 goto BailOut;
2709 }
2710 start = *address;
2711 }
2712 #if XNU_TARGET_OS_OSX
2713 else if ((start == 0 || start == vm_map_min(map)) &&
2714 !map->disable_vmentry_reuse &&
2715 map->vmmap_high_start != 0) {
2716 start = map->vmmap_high_start;
2717 }
2718 #endif /* XNU_TARGET_OS_OSX */
2719
2720
2721 /*
2722 * Calculate the first possible address.
2723 */
2724
2725 if (start < effective_min_offset) {
2726 start = effective_min_offset;
2727 }
2728 if (start > effective_max_offset) {
2729 RETURN(KERN_NO_SPACE);
2730 }
2731
2732 /*
2733 * Look for the first possible address;
2734 * if there's already something at this
2735 * address, we have to start after it.
2736 */
2737
2738 if (map->disable_vmentry_reuse == TRUE) {
2739 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2740 } else {
2741 if (map->holelistenabled) {
2742 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
2743
2744 if (hole_entry == NULL) {
2745 /*
2746 * No more space in the map?
2747 */
2748 result = KERN_NO_SPACE;
2749 goto BailOut;
2750 } else {
2751 boolean_t found_hole = FALSE;
2752
2753 do {
2754 if (hole_entry->vme_start >= start) {
2755 start = hole_entry->vme_start;
2756 found_hole = TRUE;
2757 break;
2758 }
2759
2760 if (hole_entry->vme_end > start) {
2761 found_hole = TRUE;
2762 break;
2763 }
2764 hole_entry = hole_entry->vme_next;
2765 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
2766
2767 if (found_hole == FALSE) {
2768 result = KERN_NO_SPACE;
2769 goto BailOut;
2770 }
2771
2772 entry = hole_entry;
2773
2774 if (start == 0) {
2775 start += PAGE_SIZE_64;
2776 }
2777 }
2778 } else {
2779 assert(first_free_is_valid(map));
2780
2781 entry = map->first_free;
2782
2783 if (entry == vm_map_to_entry(map)) {
2784 entry = NULL;
2785 } else {
2786 if (entry->vme_next == vm_map_to_entry(map)) {
2787 /*
2788 * Hole at the end of the map.
2789 */
2790 entry = NULL;
2791 } else {
2792 if (start < (entry->vme_next)->vme_start) {
2793 start = entry->vme_end;
2794 start = vm_map_round_page(start,
2795 VM_MAP_PAGE_MASK(map));
2796 } else {
2797 /*
2798 * Need to do a lookup.
2799 */
2800 entry = NULL;
2801 }
2802 }
2803 }
2804
2805 if (entry == NULL) {
2806 vm_map_entry_t tmp_entry;
2807 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2808 assert(!entry_for_jit);
2809 start = tmp_entry->vme_end;
2810 start = vm_map_round_page(start,
2811 VM_MAP_PAGE_MASK(map));
2812 }
2813 entry = tmp_entry;
2814 }
2815 }
2816 }
2817
2818 /*
2819 * In any case, the "entry" always precedes
2820 * the proposed new region throughout the
2821 * loop:
2822 */
2823
2824 while (TRUE) {
2825 vm_map_entry_t next;
2826
2827 /*
2828 * Find the end of the proposed new region.
2829 * Be sure we didn't go beyond the end, or
2830 * wrap around the address.
2831 */
2832
2833 end = ((start + mask) & ~mask);
2834 end = vm_map_round_page(end,
2835 VM_MAP_PAGE_MASK(map));
2836 if (end < start) {
2837 RETURN(KERN_NO_SPACE);
2838 }
2839 start = end;
2840 assert(VM_MAP_PAGE_ALIGNED(start,
2841 VM_MAP_PAGE_MASK(map)));
2842 end += size;
2843
2844 /* We want an entire page of empty space, but don't increase the allocation size. */
2845 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2846
2847 if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
2848 if (map->wait_for_space) {
2849 assert(!keep_map_locked);
2850 if (size <= (effective_max_offset -
2851 effective_min_offset)) {
2852 assert_wait((event_t)map,
2853 THREAD_ABORTSAFE);
2854 vm_map_unlock(map);
2855 map_locked = FALSE;
2856 thread_block(THREAD_CONTINUE_NULL);
2857 goto StartAgain;
2858 }
2859 }
2860 RETURN(KERN_NO_SPACE);
2861 }
2862
2863 next = entry->vme_next;
2864
2865 if (map->holelistenabled) {
2866 if (entry->vme_end >= desired_empty_end) {
2867 break;
2868 }
2869 } else {
2870 /*
2871 * If there are no more entries, we must win.
2872 *
2873 * OR
2874 *
2875 * If there is another entry, it must be
2876 * after the end of the potential new region.
2877 */
2878
2879 if (next == vm_map_to_entry(map)) {
2880 break;
2881 }
2882
2883 if (next->vme_start >= desired_empty_end) {
2884 break;
2885 }
2886 }
2887
2888 /*
2889 * Didn't fit -- move to the next entry.
2890 */
2891
2892 entry = next;
2893
2894 if (map->holelistenabled) {
2895 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2896 /*
2897 * Wrapped around
2898 */
2899 result = KERN_NO_SPACE;
2900 goto BailOut;
2901 }
2902 start = entry->vme_start;
2903 } else {
2904 start = entry->vme_end;
2905 }
2906
2907 start = vm_map_round_page(start,
2908 VM_MAP_PAGE_MASK(map));
2909 }
2910
2911 if (map->holelistenabled) {
2912 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2913 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2914 }
2915 }
2916
2917 *address = start;
2918 assert(VM_MAP_PAGE_ALIGNED(*address,
2919 VM_MAP_PAGE_MASK(map)));
2920 } else {
2921 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT &&
2922 !overwrite &&
2923 user_alias == VM_MEMORY_REALLOC) {
2924 /*
2925 * Force realloc() to switch to a new allocation,
2926 * to prevent 4k-fragmented virtual ranges.
2927 */
2928 // DEBUG4K_ERROR("no realloc in place");
2929 return KERN_NO_SPACE;
2930 }
2931
2932 /*
2933 * Verify that:
2934 * the address doesn't itself violate
2935 * the mask requirement.
2936 */
2937
2938 vm_map_lock(map);
2939 map_locked = TRUE;
2940 if ((start & mask) != 0) {
2941 RETURN(KERN_NO_SPACE);
2942 }
2943
2944 /*
2945 * ... the address is within bounds
2946 */
2947
2948 end = start + size;
2949
2950 if ((start < effective_min_offset) ||
2951 (end > effective_max_offset) ||
2952 (start >= end)) {
2953 RETURN(KERN_INVALID_ADDRESS);
2954 }
2955
2956 if (overwrite && zap_old_map != VM_MAP_NULL) {
2957 int remove_flags;
2958 /*
2959 * Fixed mapping and "overwrite" flag: attempt to
2960 * remove all existing mappings in the specified
2961 * address range, saving them in our "zap_old_map".
2962 */
2963 remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2964 remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2965 if (vmk_flags.vmkf_overwrite_immutable) {
2966 /* we can overwrite immutable mappings */
2967 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2968 }
2969 (void) vm_map_delete(map, start, end,
2970 remove_flags,
2971 zap_old_map);
2972 }
2973
2974 /*
2975 * ... the starting address isn't allocated
2976 */
2977
2978 if (vm_map_lookup_entry(map, start, &entry)) {
2979 if (!(vmk_flags.vmkf_already)) {
2980 RETURN(KERN_NO_SPACE);
2981 }
2982 /*
2983 * Check if what's already there is what we want.
2984 */
2985 tmp_start = start;
2986 tmp_offset = offset;
2987 if (entry->vme_start < start) {
2988 tmp_start -= start - entry->vme_start;
2989 tmp_offset -= start - entry->vme_start;
2990 }
2991 for (; entry->vme_start < end;
2992 entry = entry->vme_next) {
2993 /*
2994 * Check if the mapping's attributes
2995 * match the existing map entry.
2996 */
2997 if (entry == vm_map_to_entry(map) ||
2998 entry->vme_start != tmp_start ||
2999 entry->is_sub_map != is_submap ||
3000 VME_OFFSET(entry) != tmp_offset ||
3001 entry->needs_copy != needs_copy ||
3002 entry->protection != cur_protection ||
3003 entry->max_protection != max_protection ||
3004 entry->inheritance != inheritance ||
3005 entry->iokit_acct != iokit_acct ||
3006 VME_ALIAS(entry) != alias) {
3007 /* not the same mapping ! */
3008 RETURN(KERN_NO_SPACE);
3009 }
3010 /*
3011 * Check if the same object is being mapped.
3012 */
3013 if (is_submap) {
3014 if (VME_SUBMAP(entry) !=
3015 (vm_map_t) object) {
3016 /* not the same submap */
3017 RETURN(KERN_NO_SPACE);
3018 }
3019 } else {
3020 if (VME_OBJECT(entry) != object) {
3021 /* not the same VM object... */
3022 vm_object_t obj2;
3023
3024 obj2 = VME_OBJECT(entry);
3025 if ((obj2 == VM_OBJECT_NULL ||
3026 obj2->internal) &&
3027 (object == VM_OBJECT_NULL ||
3028 object->internal)) {
3029 /*
3030 * ... but both are
3031 * anonymous memory,
3032 * so equivalent.
3033 */
3034 } else {
3035 RETURN(KERN_NO_SPACE);
3036 }
3037 }
3038 }
3039
3040 tmp_offset += entry->vme_end - entry->vme_start;
3041 tmp_start += entry->vme_end - entry->vme_start;
3042 if (entry->vme_end >= end) {
3043 /* reached the end of our mapping */
3044 break;
3045 }
3046 }
3047 /* it all matches: let's use what's already there ! */
3048 RETURN(KERN_MEMORY_PRESENT);
3049 }
3050
3051 /*
3052 * ... the next region doesn't overlap the
3053 * end point.
3054 */
3055
3056 if ((entry->vme_next != vm_map_to_entry(map)) &&
3057 (entry->vme_next->vme_start < end)) {
3058 RETURN(KERN_NO_SPACE);
3059 }
3060 }
3061
3062 /*
3063 * At this point,
3064 * "start" and "end" should define the endpoints of the
3065 * available new range, and
3066 * "entry" should refer to the region before the new
3067 * range, and
3068 *
3069 * the map should be locked.
3070 */
3071
3072 /*
3073 * See whether we can avoid creating a new entry (and object) by
3074 * extending one of our neighbors. [So far, we only attempt to
3075 * extend from below.] Note that we can never extend/join
3076 * purgable objects because they need to remain distinct
3077 * entities in order to implement their "volatile object"
3078 * semantics.
3079 */
3080
3081 if (purgable ||
3082 entry_for_jit ||
3083 vm_memory_malloc_no_cow(user_alias)) {
3084 if (object == VM_OBJECT_NULL) {
3085 object = vm_object_allocate(size);
3086 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
3087 object->true_share = FALSE;
3088 if (purgable) {
3089 task_t owner;
3090 object->purgable = VM_PURGABLE_NONVOLATILE;
3091 if (map->pmap == kernel_pmap) {
3092 /*
3093 * Purgeable mappings made in a kernel
3094 * map are "owned" by the kernel itself
3095 * rather than the current user task
3096 * because they're likely to be used by
3097 * more than this user task (see
3098 * execargs_purgeable_allocate(), for
3099 * example).
3100 */
3101 owner = kernel_task;
3102 } else {
3103 owner = current_task();
3104 }
3105 assert(object->vo_owner == NULL);
3106 assert(object->resident_page_count == 0);
3107 assert(object->wired_page_count == 0);
3108 vm_object_lock(object);
3109 vm_purgeable_nonvolatile_enqueue(object, owner);
3110 vm_object_unlock(object);
3111 }
3112 offset = (vm_object_offset_t)0;
3113 }
3114 } else if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
3115 /* no coalescing if address space uses sub-pages */
3116 } else if ((is_submap == FALSE) &&
3117 (object == VM_OBJECT_NULL) &&
3118 (entry != vm_map_to_entry(map)) &&
3119 (entry->vme_end == start) &&
3120 (!entry->is_shared) &&
3121 (!entry->is_sub_map) &&
3122 (!entry->in_transition) &&
3123 (!entry->needs_wakeup) &&
3124 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
3125 (entry->protection == cur_protection) &&
3126 (entry->max_protection == max_protection) &&
3127 (entry->inheritance == inheritance) &&
3128 ((user_alias == VM_MEMORY_REALLOC) ||
3129 (VME_ALIAS(entry) == alias)) &&
3130 (entry->no_cache == no_cache) &&
3131 (entry->permanent == permanent) &&
3132 /* no coalescing for immutable executable mappings */
3133 !((entry->protection & VM_PROT_EXECUTE) &&
3134 entry->permanent) &&
3135 (!entry->superpage_size && !superpage_size) &&
3136 /*
3137 * No coalescing if not map-aligned, to avoid propagating
3138 * that condition any further than needed:
3139 */
3140 (!entry->map_aligned || !clear_map_aligned) &&
3141 (!entry->zero_wired_pages) &&
3142 (!entry->used_for_jit && !entry_for_jit) &&
3143 (!entry->pmap_cs_associated) &&
3144 (entry->iokit_acct == iokit_acct) &&
3145 (!entry->vme_resilient_codesign) &&
3146 (!entry->vme_resilient_media) &&
3147 (!entry->vme_atomic) &&
3148 (entry->vme_no_copy_on_read == no_copy_on_read) &&
3149
3150 ((entry->vme_end - entry->vme_start) + size <=
3151 (user_alias == VM_MEMORY_REALLOC ?
3152 ANON_CHUNK_SIZE :
3153 NO_COALESCE_LIMIT)) &&
3154
3155 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
3156 if (vm_object_coalesce(VME_OBJECT(entry),
3157 VM_OBJECT_NULL,
3158 VME_OFFSET(entry),
3159 (vm_object_offset_t) 0,
3160 (vm_map_size_t)(entry->vme_end - entry->vme_start),
3161 (vm_map_size_t)(end - entry->vme_end))) {
3162 /*
3163 * Coalesced the two objects - can extend
3164 * the previous map entry to include the
3165 * new range.
3166 */
3167 map->size += (end - entry->vme_end);
3168 assert(entry->vme_start < end);
3169 assert(VM_MAP_PAGE_ALIGNED(end,
3170 VM_MAP_PAGE_MASK(map)));
3171 if (__improbable(vm_debug_events)) {
3172 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
3173 }
3174 entry->vme_end = end;
3175 if (map->holelistenabled) {
3176 vm_map_store_update_first_free(map, entry, TRUE);
3177 } else {
3178 vm_map_store_update_first_free(map, map->first_free, TRUE);
3179 }
3180 new_mapping_established = TRUE;
3181 RETURN(KERN_SUCCESS);
3182 }
3183 }
3184
3185 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
3186 new_entry = NULL;
3187
3188 for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
3189 tmp2_end = tmp2_start + step;
3190 /*
3191 * Create a new entry
3192 *
3193 * XXX FBDP
3194 * The reserved "page zero" in each process's address space can
3195 * be arbitrarily large. Splitting it into separate objects and
3196 * therefore different VM map entries serves no purpose and just
3197 * slows down operations on the VM map, so let's not split the
3198 * allocation into chunks if the max protection is NONE. That
3199 * memory should never be accessible, so it will never get to the
3200 * default pager.
3201 */
3202 tmp_start = tmp2_start;
3203 if (object == VM_OBJECT_NULL &&
3204 size > chunk_size &&
3205 max_protection != VM_PROT_NONE &&
3206 superpage_size == 0) {
3207 tmp_end = tmp_start + chunk_size;
3208 } else {
3209 tmp_end = tmp2_end;
3210 }
3211 do {
3212 new_entry = vm_map_entry_insert(map,
3213 entry, tmp_start, tmp_end,
3214 object, offset, needs_copy,
3215 FALSE, FALSE,
3216 cur_protection, max_protection,
3217 VM_BEHAVIOR_DEFAULT,
3218 (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3219 VM_INHERIT_NONE : inheritance),
3220 0,
3221 no_cache,
3222 permanent,
3223 no_copy_on_read,
3224 superpage_size,
3225 clear_map_aligned,
3226 is_submap,
3227 entry_for_jit,
3228 alias,
3229 translated_allow_execute);
3230
3231 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
3232
3233 if (resilient_codesign) {
3234 int reject_prot = (needs_copy ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
3235 if (!((cur_protection | max_protection) & reject_prot)) {
3236 new_entry->vme_resilient_codesign = TRUE;
3237 }
3238 }
3239
3240 if (resilient_media &&
3241 (object == VM_OBJECT_NULL ||
3242 object->internal)) {
3243 new_entry->vme_resilient_media = TRUE;
3244 }
3245
3246 assert(!new_entry->iokit_acct);
3247 if (!is_submap &&
3248 object != VM_OBJECT_NULL &&
3249 (object->purgable != VM_PURGABLE_DENY ||
3250 object->vo_ledger_tag)) {
3251 assert(new_entry->use_pmap);
3252 assert(!new_entry->iokit_acct);
3253 /*
3254 * Turn off pmap accounting since
3255 * purgeable (or tagged) objects have their
3256 * own ledgers.
3257 */
3258 new_entry->use_pmap = FALSE;
3259 } else if (!is_submap &&
3260 iokit_acct &&
3261 object != VM_OBJECT_NULL &&
3262 object->internal) {
3263 /* alternate accounting */
3264 assert(!new_entry->iokit_acct);
3265 assert(new_entry->use_pmap);
3266 new_entry->iokit_acct = TRUE;
3267 new_entry->use_pmap = FALSE;
3268 DTRACE_VM4(
3269 vm_map_iokit_mapped_region,
3270 vm_map_t, map,
3271 vm_map_offset_t, new_entry->vme_start,
3272 vm_map_offset_t, new_entry->vme_end,
3273 int, VME_ALIAS(new_entry));
3274 vm_map_iokit_mapped_region(
3275 map,
3276 (new_entry->vme_end -
3277 new_entry->vme_start));
3278 } else if (!is_submap) {
3279 assert(!new_entry->iokit_acct);
3280 assert(new_entry->use_pmap);
3281 }
3282
3283 if (is_submap) {
3284 vm_map_t submap;
3285 boolean_t submap_is_64bit;
3286 boolean_t use_pmap;
3287
3288 assert(new_entry->is_sub_map);
3289 assert(!new_entry->use_pmap);
3290 assert(!new_entry->iokit_acct);
3291 submap = (vm_map_t) object;
3292 submap_is_64bit = vm_map_is_64bit(submap);
3293 use_pmap = vmk_flags.vmkf_nested_pmap;
3294 #ifndef NO_NESTED_PMAP
3295 if (use_pmap && submap->pmap == NULL) {
3296 ledger_t ledger = map->pmap->ledger;
3297 /* we need a sub pmap to nest... */
3298 submap->pmap = pmap_create_options(ledger, 0,
3299 submap_is_64bit ? PMAP_CREATE_64BIT : 0);
3300 if (submap->pmap == NULL) {
3301 /* let's proceed without nesting... */
3302 }
3303 #if defined(__arm__) || defined(__arm64__)
3304 else {
3305 pmap_set_nested(submap->pmap);
3306 }
3307 #endif
3308 }
3309 if (use_pmap && submap->pmap != NULL) {
3310 if (VM_MAP_PAGE_SHIFT(map) != VM_MAP_PAGE_SHIFT(submap)) {
3311 DEBUG4K_ERROR("map %p (%d) submap %p (%d): incompatible page sizes\n", map, VM_MAP_PAGE_SHIFT(map), submap, VM_MAP_PAGE_SHIFT(submap));
3312 kr = KERN_FAILURE;
3313 } else {
3314 kr = pmap_nest(map->pmap,
3315 submap->pmap,
3316 tmp_start,
3317 tmp_end - tmp_start);
3318 }
3319 if (kr != KERN_SUCCESS) {
3320 printf("vm_map_enter: "
3321 "pmap_nest(0x%llx,0x%llx) "
3322 "error 0x%x\n",
3323 (long long)tmp_start,
3324 (long long)tmp_end,
3325 kr);
3326 } else {
3327 /* we're now nested ! */
3328 new_entry->use_pmap = TRUE;
3329 pmap_empty = FALSE;
3330 }
3331 }
3332 #endif /* NO_NESTED_PMAP */
3333 }
3334 entry = new_entry;
3335
3336 if (superpage_size) {
3337 vm_page_t pages, m;
3338 vm_object_t sp_object;
3339 vm_object_offset_t sp_offset;
3340
3341 VME_OFFSET_SET(entry, 0);
3342
3343 /* allocate one superpage */
3344 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
3345 if (kr != KERN_SUCCESS) {
3346 /* deallocate whole range... */
3347 new_mapping_established = TRUE;
3348 /* ... but only up to "tmp_end" */
3349 size -= end - tmp_end;
3350 RETURN(kr);
3351 }
3352
3353 /* create one vm_object per superpage */
3354 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3355 sp_object->phys_contiguous = TRUE;
3356 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3357 VME_OBJECT_SET(entry, sp_object);
3358 assert(entry->use_pmap);
3359
3360 /* enter the base pages into the object */
3361 vm_object_lock(sp_object);
3362 for (sp_offset = 0;
3363 sp_offset < SUPERPAGE_SIZE;
3364 sp_offset += PAGE_SIZE) {
3365 m = pages;
3366 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
3367 pages = NEXT_PAGE(m);
3368 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3369 vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
3370 }
3371 vm_object_unlock(sp_object);
3372 }
3373 } while (tmp_end != tmp2_end &&
3374 (tmp_start = tmp_end) &&
3375 (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3376 tmp_end + chunk_size : tmp2_end));
3377 }
3378
3379 new_mapping_established = TRUE;
3380
3381 BailOut:
3382 assert(map_locked == TRUE);
3383
3384 if (result == KERN_SUCCESS) {
3385 vm_prot_t pager_prot;
3386 memory_object_t pager;
3387
3388 #if DEBUG
3389 if (pmap_empty &&
3390 !(vmk_flags.vmkf_no_pmap_check)) {
3391 assert(vm_map_pmap_is_empty(map,
3392 *address,
3393 *address + size));
3394 }
3395 #endif /* DEBUG */
3396
3397 /*
3398 * For "named" VM objects, let the pager know that the
3399 * memory object is being mapped. Some pagers need to keep
3400 * track of this, to know when they can reclaim the memory
3401 * object, for example.
3402 * VM calls memory_object_map() for each mapping (specifying
3403 * the protection of each mapping) and calls
3404 * memory_object_last_unmap() when all the mappings are gone.
3405 */
3406 pager_prot = max_protection;
3407 if (needs_copy) {
3408 /*
3409 * Copy-On-Write mapping: won't modify
3410 * the memory object.
3411 */
3412 pager_prot &= ~VM_PROT_WRITE;
3413 }
3414 if (!is_submap &&
3415 object != VM_OBJECT_NULL &&
3416 object->named &&
3417 object->pager != MEMORY_OBJECT_NULL) {
3418 vm_object_lock(object);
3419 pager = object->pager;
3420 if (object->named &&
3421 pager != MEMORY_OBJECT_NULL) {
3422 assert(object->pager_ready);
3423 vm_object_mapping_wait(object, THREAD_UNINT);
3424 vm_object_mapping_begin(object);
3425 vm_object_unlock(object);
3426
3427 kr = memory_object_map(pager, pager_prot);
3428 assert(kr == KERN_SUCCESS);
3429
3430 vm_object_lock(object);
3431 vm_object_mapping_end(object);
3432 }
3433 vm_object_unlock(object);
3434 }
3435 }
3436
3437 assert(map_locked == TRUE);
3438
3439 if (!keep_map_locked) {
3440 vm_map_unlock(map);
3441 map_locked = FALSE;
3442 }
3443
3444 /*
3445 * We can't hold the map lock if we enter this block.
3446 */
3447
3448 if (result == KERN_SUCCESS) {
3449 /* Wire down the new entry if the user
3450 * requested all new map entries be wired.
3451 */
3452 if ((map->wiring_required) || (superpage_size)) {
3453 assert(!keep_map_locked);
3454 pmap_empty = FALSE; /* pmap won't be empty */
3455 kr = vm_map_wire_kernel(map, start, end,
3456 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3457 TRUE);
3458 result = kr;
3459 }
3460
3461 }
3462
3463 if (result != KERN_SUCCESS) {
3464 if (new_mapping_established) {
3465 /*
3466 * We have to get rid of the new mappings since we
3467 * won't make them available to the user.
3468 * Try and do that atomically, to minimize the risk
3469 * that someone else create new mappings that range.
3470 */
3471 zap_new_map = vm_map_create(PMAP_NULL,
3472 *address,
3473 *address + size,
3474 map->hdr.entries_pageable);
3475 vm_map_set_page_shift(zap_new_map,
3476 VM_MAP_PAGE_SHIFT(map));
3477 vm_map_disable_hole_optimization(zap_new_map);
3478
3479 if (!map_locked) {
3480 vm_map_lock(map);
3481 map_locked = TRUE;
3482 }
3483 (void) vm_map_delete(map, *address, *address + size,
3484 (VM_MAP_REMOVE_SAVE_ENTRIES |
3485 VM_MAP_REMOVE_NO_MAP_ALIGN),
3486 zap_new_map);
3487 }
3488 if (zap_old_map != VM_MAP_NULL &&
3489 zap_old_map->hdr.nentries != 0) {
3490 vm_map_entry_t entry1, entry2;
3491
3492 /*
3493 * The new mapping failed. Attempt to restore
3494 * the old mappings, saved in the "zap_old_map".
3495 */
3496 if (!map_locked) {
3497 vm_map_lock(map);
3498 map_locked = TRUE;
3499 }
3500
3501 /* first check if the coast is still clear */
3502 start = vm_map_first_entry(zap_old_map)->vme_start;
3503 end = vm_map_last_entry(zap_old_map)->vme_end;
3504 if (vm_map_lookup_entry(map, start, &entry1) ||
3505 vm_map_lookup_entry(map, end, &entry2) ||
3506 entry1 != entry2) {
3507 /*
3508 * Part of that range has already been
3509 * re-mapped: we can't restore the old
3510 * mappings...
3511 */
3512 vm_map_enter_restore_failures++;
3513 } else {
3514 /*
3515 * Transfer the saved map entries from
3516 * "zap_old_map" to the original "map",
3517 * inserting them all after "entry1".
3518 */
3519 for (entry2 = vm_map_first_entry(zap_old_map);
3520 entry2 != vm_map_to_entry(zap_old_map);
3521 entry2 = vm_map_first_entry(zap_old_map)) {
3522 vm_map_size_t entry_size;
3523
3524 entry_size = (entry2->vme_end -
3525 entry2->vme_start);
3526 vm_map_store_entry_unlink(zap_old_map,
3527 entry2);
3528 zap_old_map->size -= entry_size;
3529 vm_map_store_entry_link(map, entry1, entry2,
3530 VM_MAP_KERNEL_FLAGS_NONE);
3531 map->size += entry_size;
3532 entry1 = entry2;
3533 }
3534 if (map->wiring_required) {
3535 /*
3536 * XXX TODO: we should rewire the
3537 * old pages here...
3538 */
3539 }
3540 vm_map_enter_restore_successes++;
3541 }
3542 }
3543 }
3544
3545 /*
3546 * The caller is responsible for releasing the lock if it requested to
3547 * keep the map locked.
3548 */
3549 if (map_locked && !keep_map_locked) {
3550 vm_map_unlock(map);
3551 }
3552
3553 /*
3554 * Get rid of the "zap_maps" and all the map entries that
3555 * they may still contain.
3556 */
3557 if (zap_old_map != VM_MAP_NULL) {
3558 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3559 zap_old_map = VM_MAP_NULL;
3560 }
3561 if (zap_new_map != VM_MAP_NULL) {
3562 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3563 zap_new_map = VM_MAP_NULL;
3564 }
3565
3566 return result;
3567
3568 #undef RETURN
3569 }
3570
3571 #if __arm64__
3572 extern const struct memory_object_pager_ops fourk_pager_ops;
3573 kern_return_t
3574 vm_map_enter_fourk(
3575 vm_map_t map,
3576 vm_map_offset_t *address, /* IN/OUT */
3577 vm_map_size_t size,
3578 vm_map_offset_t mask,
3579 int flags,
3580 vm_map_kernel_flags_t vmk_flags,
3581 vm_tag_t alias,
3582 vm_object_t object,
3583 vm_object_offset_t offset,
3584 boolean_t needs_copy,
3585 vm_prot_t cur_protection,
3586 vm_prot_t max_protection,
3587 vm_inherit_t inheritance)
3588 {
3589 vm_map_entry_t entry, new_entry;
3590 vm_map_offset_t start, fourk_start;
3591 vm_map_offset_t end, fourk_end;
3592 vm_map_size_t fourk_size;
3593 kern_return_t result = KERN_SUCCESS;
3594 vm_map_t zap_old_map = VM_MAP_NULL;
3595 vm_map_t zap_new_map = VM_MAP_NULL;
3596 boolean_t map_locked = FALSE;
3597 boolean_t pmap_empty = TRUE;
3598 boolean_t new_mapping_established = FALSE;
3599 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3600 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3601 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3602 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3603 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3604 boolean_t is_submap = vmk_flags.vmkf_submap;
3605 boolean_t permanent = vmk_flags.vmkf_permanent;
3606 boolean_t no_copy_on_read = vmk_flags.vmkf_permanent;
3607 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
3608 // boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3609 boolean_t translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
3610 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3611 vm_map_offset_t effective_min_offset, effective_max_offset;
3612 kern_return_t kr;
3613 boolean_t clear_map_aligned = FALSE;
3614 memory_object_t fourk_mem_obj;
3615 vm_object_t fourk_object;
3616 vm_map_offset_t fourk_pager_offset;
3617 int fourk_pager_index_start, fourk_pager_index_num;
3618 int cur_idx;
3619 boolean_t fourk_copy;
3620 vm_object_t copy_object;
3621 vm_object_offset_t copy_offset;
3622
3623 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
3624 panic("%s:%d\n", __FUNCTION__, __LINE__);
3625 }
3626 fourk_mem_obj = MEMORY_OBJECT_NULL;
3627 fourk_object = VM_OBJECT_NULL;
3628
3629 if (superpage_size) {
3630 return KERN_NOT_SUPPORTED;
3631 }
3632
3633 if ((cur_protection & VM_PROT_WRITE) &&
3634 (cur_protection & VM_PROT_EXECUTE) &&
3635 #if XNU_TARGET_OS_OSX
3636 map->pmap != kernel_pmap &&
3637 (vm_map_cs_enforcement(map)
3638 #if __arm64__
3639 || !VM_MAP_IS_EXOTIC(map)
3640 #endif /* __arm64__ */
3641 ) &&
3642 #endif /* XNU_TARGET_OS_OSX */
3643 #if PMAP_CS
3644 !pmap_cs_exempt(map->pmap) &&
3645 #endif
3646 !entry_for_jit) {
3647 DTRACE_VM3(cs_wx,
3648 uint64_t, 0,
3649 uint64_t, 0,
3650 vm_prot_t, cur_protection);
3651 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3652 "turning off execute\n",
3653 proc_selfpid(),
3654 (current_task()->bsd_info
3655 ? proc_name_address(current_task()->bsd_info)
3656 : "?"),
3657 __FUNCTION__);
3658 cur_protection &= ~VM_PROT_EXECUTE;
3659 }
3660
3661 /*
3662 * If the task has requested executable lockdown,
3663 * deny any new executable mapping.
3664 */
3665 if (map->map_disallow_new_exec == TRUE) {
3666 if (cur_protection & VM_PROT_EXECUTE) {
3667 return KERN_PROTECTION_FAILURE;
3668 }
3669 }
3670
3671 if (is_submap) {
3672 return KERN_NOT_SUPPORTED;
3673 }
3674 if (vmk_flags.vmkf_already) {
3675 return KERN_NOT_SUPPORTED;
3676 }
3677 if (purgable || entry_for_jit) {
3678 return KERN_NOT_SUPPORTED;
3679 }
3680
3681 effective_min_offset = map->min_offset;
3682
3683 if (vmk_flags.vmkf_beyond_max) {
3684 return KERN_NOT_SUPPORTED;
3685 } else {
3686 effective_max_offset = map->max_offset;
3687 }
3688
3689 if (size == 0 ||
3690 (offset & FOURK_PAGE_MASK) != 0) {
3691 *address = 0;
3692 return KERN_INVALID_ARGUMENT;
3693 }
3694
3695 #define RETURN(value) { result = value; goto BailOut; }
3696
3697 assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3698 assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3699
3700 if (!anywhere && overwrite) {
3701 return KERN_NOT_SUPPORTED;
3702 }
3703 if (!anywhere && overwrite) {
3704 /*
3705 * Create a temporary VM map to hold the old mappings in the
3706 * affected area while we create the new one.
3707 * This avoids releasing the VM map lock in
3708 * vm_map_entry_delete() and allows atomicity
3709 * when we want to replace some mappings with a new one.
3710 * It also allows us to restore the old VM mappings if the
3711 * new mapping fails.
3712 */
3713 zap_old_map = vm_map_create(PMAP_NULL,
3714 *address,
3715 *address + size,
3716 map->hdr.entries_pageable);
3717 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3718 vm_map_disable_hole_optimization(zap_old_map);
3719 }
3720
3721 fourk_start = *address;
3722 fourk_size = size;
3723 fourk_end = fourk_start + fourk_size;
3724
3725 start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3726 end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3727 size = end - start;
3728
3729 if (anywhere) {
3730 return KERN_NOT_SUPPORTED;
3731 } else {
3732 /*
3733 * Verify that:
3734 * the address doesn't itself violate
3735 * the mask requirement.
3736 */
3737
3738 vm_map_lock(map);
3739 map_locked = TRUE;
3740 if ((start & mask) != 0) {
3741 RETURN(KERN_NO_SPACE);
3742 }
3743
3744 /*
3745 * ... the address is within bounds
3746 */
3747
3748 end = start + size;
3749
3750 if ((start < effective_min_offset) ||
3751 (end > effective_max_offset) ||
3752 (start >= end)) {
3753 RETURN(KERN_INVALID_ADDRESS);
3754 }
3755
3756 if (overwrite && zap_old_map != VM_MAP_NULL) {
3757 /*
3758 * Fixed mapping and "overwrite" flag: attempt to
3759 * remove all existing mappings in the specified
3760 * address range, saving them in our "zap_old_map".
3761 */
3762 (void) vm_map_delete(map, start, end,
3763 (VM_MAP_REMOVE_SAVE_ENTRIES |
3764 VM_MAP_REMOVE_NO_MAP_ALIGN),
3765 zap_old_map);
3766 }
3767
3768 /*
3769 * ... the starting address isn't allocated
3770 */
3771 if (vm_map_lookup_entry(map, start, &entry)) {
3772 vm_object_t cur_object, shadow_object;
3773
3774 /*
3775 * We might already some 4K mappings
3776 * in a 16K page here.
3777 */
3778
3779 if (entry->vme_end - entry->vme_start
3780 != SIXTEENK_PAGE_SIZE) {
3781 RETURN(KERN_NO_SPACE);
3782 }
3783 if (entry->is_sub_map) {
3784 RETURN(KERN_NO_SPACE);
3785 }
3786 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3787 RETURN(KERN_NO_SPACE);
3788 }
3789
3790 /* go all the way down the shadow chain */
3791 cur_object = VME_OBJECT(entry);
3792 vm_object_lock(cur_object);
3793 while (cur_object->shadow != VM_OBJECT_NULL) {
3794 shadow_object = cur_object->shadow;
3795 vm_object_lock(shadow_object);
3796 vm_object_unlock(cur_object);
3797 cur_object = shadow_object;
3798 shadow_object = VM_OBJECT_NULL;
3799 }
3800 if (cur_object->internal ||
3801 cur_object->pager == NULL) {
3802 vm_object_unlock(cur_object);
3803 RETURN(KERN_NO_SPACE);
3804 }
3805 if (cur_object->pager->mo_pager_ops
3806 != &fourk_pager_ops) {
3807 vm_object_unlock(cur_object);
3808 RETURN(KERN_NO_SPACE);
3809 }
3810 fourk_object = cur_object;
3811 fourk_mem_obj = fourk_object->pager;
3812
3813 /* keep the "4K" object alive */
3814 vm_object_reference_locked(fourk_object);
3815 memory_object_reference(fourk_mem_obj);
3816 vm_object_unlock(fourk_object);
3817
3818 /* merge permissions */
3819 entry->protection |= cur_protection;
3820 entry->max_protection |= max_protection;
3821 if ((entry->protection & (VM_PROT_WRITE |
3822 VM_PROT_EXECUTE)) ==
3823 (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3824 fourk_binary_compatibility_unsafe &&
3825 fourk_binary_compatibility_allow_wx) {
3826 /* write+execute: need to be "jit" */
3827 entry->used_for_jit = TRUE;
3828 }
3829 goto map_in_fourk_pager;
3830 }
3831
3832 /*
3833 * ... the next region doesn't overlap the
3834 * end point.
3835 */
3836
3837 if ((entry->vme_next != vm_map_to_entry(map)) &&
3838 (entry->vme_next->vme_start < end)) {
3839 RETURN(KERN_NO_SPACE);
3840 }
3841 }
3842
3843 /*
3844 * At this point,
3845 * "start" and "end" should define the endpoints of the
3846 * available new range, and
3847 * "entry" should refer to the region before the new
3848 * range, and
3849 *
3850 * the map should be locked.
3851 */
3852
3853 /* create a new "4K" pager */
3854 fourk_mem_obj = fourk_pager_create();
3855 fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3856 assert(fourk_object);
3857
3858 /* keep the "4" object alive */
3859 vm_object_reference(fourk_object);
3860
3861 /* create a "copy" object, to map the "4K" object copy-on-write */
3862 fourk_copy = TRUE;
3863 result = vm_object_copy_strategically(fourk_object,
3864 0,
3865 end - start,
3866 &copy_object,
3867 &copy_offset,
3868 &fourk_copy);
3869 assert(result == KERN_SUCCESS);
3870 assert(copy_object != VM_OBJECT_NULL);
3871 assert(copy_offset == 0);
3872
3873 /* map the "4K" pager's copy object */
3874 new_entry =
3875 vm_map_entry_insert(map, entry,
3876 vm_map_trunc_page(start,
3877 VM_MAP_PAGE_MASK(map)),
3878 vm_map_round_page(end,
3879 VM_MAP_PAGE_MASK(map)),
3880 copy_object,
3881 0, /* offset */
3882 FALSE, /* needs_copy */
3883 FALSE,
3884 FALSE,
3885 cur_protection, max_protection,
3886 VM_BEHAVIOR_DEFAULT,
3887 (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3888 VM_INHERIT_NONE : inheritance),
3889 0,
3890 no_cache,
3891 permanent,
3892 no_copy_on_read,
3893 superpage_size,
3894 clear_map_aligned,
3895 is_submap,
3896 FALSE, /* jit */
3897 alias,
3898 translated_allow_execute);
3899 entry = new_entry;
3900
3901 #if VM_MAP_DEBUG_FOURK
3902 if (vm_map_debug_fourk) {
3903 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3904 map,
3905 (uint64_t) entry->vme_start,
3906 (uint64_t) entry->vme_end,
3907 fourk_mem_obj);
3908 }
3909 #endif /* VM_MAP_DEBUG_FOURK */
3910
3911 new_mapping_established = TRUE;
3912
3913 map_in_fourk_pager:
3914 /* "map" the original "object" where it belongs in the "4K" pager */
3915 fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3916 fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3917 if (fourk_size > SIXTEENK_PAGE_SIZE) {
3918 fourk_pager_index_num = 4;
3919 } else {
3920 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3921 }
3922 if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3923 fourk_pager_index_num = 4 - fourk_pager_index_start;
3924 }
3925 for (cur_idx = 0;
3926 cur_idx < fourk_pager_index_num;
3927 cur_idx++) {
3928 vm_object_t old_object;
3929 vm_object_offset_t old_offset;
3930
3931 kr = fourk_pager_populate(fourk_mem_obj,
3932 TRUE, /* overwrite */
3933 fourk_pager_index_start + cur_idx,
3934 object,
3935 (object
3936 ? (offset +
3937 (cur_idx * FOURK_PAGE_SIZE))
3938 : 0),
3939 &old_object,
3940 &old_offset);
3941 #if VM_MAP_DEBUG_FOURK
3942 if (vm_map_debug_fourk) {
3943 if (old_object == (vm_object_t) -1 &&
3944 old_offset == (vm_object_offset_t) -1) {
3945 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3946 "pager [%p:0x%llx] "
3947 "populate[%d] "
3948 "[object:%p,offset:0x%llx]\n",
3949 map,
3950 (uint64_t) entry->vme_start,
3951 (uint64_t) entry->vme_end,
3952 fourk_mem_obj,
3953 VME_OFFSET(entry),
3954 fourk_pager_index_start + cur_idx,
3955 object,
3956 (object
3957 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3958 : 0));
3959 } else {
3960 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3961 "pager [%p:0x%llx] "
3962 "populate[%d] [object:%p,offset:0x%llx] "
3963 "old [%p:0x%llx]\n",
3964 map,
3965 (uint64_t) entry->vme_start,
3966 (uint64_t) entry->vme_end,
3967 fourk_mem_obj,
3968 VME_OFFSET(entry),
3969 fourk_pager_index_start + cur_idx,
3970 object,
3971 (object
3972 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3973 : 0),
3974 old_object,
3975 old_offset);
3976 }
3977 }
3978 #endif /* VM_MAP_DEBUG_FOURK */
3979
3980 assert(kr == KERN_SUCCESS);
3981 if (object != old_object &&
3982 object != VM_OBJECT_NULL &&
3983 object != (vm_object_t) -1) {
3984 vm_object_reference(object);
3985 }
3986 if (object != old_object &&
3987 old_object != VM_OBJECT_NULL &&
3988 old_object != (vm_object_t) -1) {
3989 vm_object_deallocate(old_object);
3990 }
3991 }
3992
3993 BailOut:
3994 assert(map_locked == TRUE);
3995
3996 if (result == KERN_SUCCESS) {
3997 vm_prot_t pager_prot;
3998 memory_object_t pager;
3999
4000 #if DEBUG
4001 if (pmap_empty &&
4002 !(vmk_flags.vmkf_no_pmap_check)) {
4003 assert(vm_map_pmap_is_empty(map,
4004 *address,
4005 *address + size));
4006 }
4007 #endif /* DEBUG */
4008
4009 /*
4010 * For "named" VM objects, let the pager know that the
4011 * memory object is being mapped. Some pagers need to keep
4012 * track of this, to know when they can reclaim the memory
4013 * object, for example.
4014 * VM calls memory_object_map() for each mapping (specifying
4015 * the protection of each mapping) and calls
4016 * memory_object_last_unmap() when all the mappings are gone.
4017 */
4018 pager_prot = max_protection;
4019 if (needs_copy) {
4020 /*
4021 * Copy-On-Write mapping: won't modify
4022 * the memory object.
4023 */
4024 pager_prot &= ~VM_PROT_WRITE;
4025 }
4026 if (!is_submap &&
4027 object != VM_OBJECT_NULL &&
4028 object->named &&
4029 object->pager != MEMORY_OBJECT_NULL) {
4030 vm_object_lock(object);
4031 pager = object->pager;
4032 if (object->named &&
4033 pager != MEMORY_OBJECT_NULL) {
4034 assert(object->pager_ready);
4035 vm_object_mapping_wait(object, THREAD_UNINT);
4036 vm_object_mapping_begin(object);
4037 vm_object_unlock(object);
4038
4039 kr = memory_object_map(pager, pager_prot);
4040 assert(kr == KERN_SUCCESS);
4041
4042 vm_object_lock(object);
4043 vm_object_mapping_end(object);
4044 }
4045 vm_object_unlock(object);
4046 }
4047 if (!is_submap &&
4048 fourk_object != VM_OBJECT_NULL &&
4049 fourk_object->named &&
4050 fourk_object->pager != MEMORY_OBJECT_NULL) {
4051 vm_object_lock(fourk_object);
4052 pager = fourk_object->pager;
4053 if (fourk_object->named &&
4054 pager != MEMORY_OBJECT_NULL) {
4055 assert(fourk_object->pager_ready);
4056 vm_object_mapping_wait(fourk_object,
4057 THREAD_UNINT);
4058 vm_object_mapping_begin(fourk_object);
4059 vm_object_unlock(fourk_object);
4060
4061 kr = memory_object_map(pager, VM_PROT_READ);
4062 assert(kr == KERN_SUCCESS);
4063
4064 vm_object_lock(fourk_object);
4065 vm_object_mapping_end(fourk_object);
4066 }
4067 vm_object_unlock(fourk_object);
4068 }
4069 }
4070
4071 if (fourk_object != VM_OBJECT_NULL) {
4072 vm_object_deallocate(fourk_object);
4073 fourk_object = VM_OBJECT_NULL;
4074 memory_object_deallocate(fourk_mem_obj);
4075 fourk_mem_obj = MEMORY_OBJECT_NULL;
4076 }
4077
4078 assert(map_locked == TRUE);
4079
4080 if (!keep_map_locked) {
4081 vm_map_unlock(map);
4082 map_locked = FALSE;
4083 }
4084
4085 /*
4086 * We can't hold the map lock if we enter this block.
4087 */
4088
4089 if (result == KERN_SUCCESS) {
4090 /* Wire down the new entry if the user
4091 * requested all new map entries be wired.
4092 */
4093 if ((map->wiring_required) || (superpage_size)) {
4094 assert(!keep_map_locked);
4095 pmap_empty = FALSE; /* pmap won't be empty */
4096 kr = vm_map_wire_kernel(map, start, end,
4097 new_entry->protection, VM_KERN_MEMORY_MLOCK,
4098 TRUE);
4099 result = kr;
4100 }
4101
4102 }
4103
4104 if (result != KERN_SUCCESS) {
4105 if (new_mapping_established) {
4106 /*
4107 * We have to get rid of the new mappings since we
4108 * won't make them available to the user.
4109 * Try and do that atomically, to minimize the risk
4110 * that someone else create new mappings that range.
4111 */
4112 zap_new_map = vm_map_create(PMAP_NULL,
4113 *address,
4114 *address + size,
4115 map->hdr.entries_pageable);
4116 vm_map_set_page_shift(zap_new_map,
4117 VM_MAP_PAGE_SHIFT(map));
4118 vm_map_disable_hole_optimization(zap_new_map);
4119
4120 if (!map_locked) {
4121 vm_map_lock(map);
4122 map_locked = TRUE;
4123 }
4124 (void) vm_map_delete(map, *address, *address + size,
4125 (VM_MAP_REMOVE_SAVE_ENTRIES |
4126 VM_MAP_REMOVE_NO_MAP_ALIGN),
4127 zap_new_map);
4128 }
4129 if (zap_old_map != VM_MAP_NULL &&
4130 zap_old_map->hdr.nentries != 0) {
4131 vm_map_entry_t entry1, entry2;
4132
4133 /*
4134 * The new mapping failed. Attempt to restore
4135 * the old mappings, saved in the "zap_old_map".
4136 */
4137 if (!map_locked) {
4138 vm_map_lock(map);
4139 map_locked = TRUE;
4140 }
4141
4142 /* first check if the coast is still clear */
4143 start = vm_map_first_entry(zap_old_map)->vme_start;
4144 end = vm_map_last_entry(zap_old_map)->vme_end;
4145 if (vm_map_lookup_entry(map, start, &entry1) ||
4146 vm_map_lookup_entry(map, end, &entry2) ||
4147 entry1 != entry2) {
4148 /*
4149 * Part of that range has already been
4150 * re-mapped: we can't restore the old
4151 * mappings...
4152 */
4153 vm_map_enter_restore_failures++;
4154 } else {
4155 /*
4156 * Transfer the saved map entries from
4157 * "zap_old_map" to the original "map",
4158 * inserting them all after "entry1".
4159 */
4160 for (entry2 = vm_map_first_entry(zap_old_map);
4161 entry2 != vm_map_to_entry(zap_old_map);
4162 entry2 = vm_map_first_entry(zap_old_map)) {
4163 vm_map_size_t entry_size;
4164
4165 entry_size = (entry2->vme_end -
4166 entry2->vme_start);
4167 vm_map_store_entry_unlink(zap_old_map,
4168 entry2);
4169 zap_old_map->size -= entry_size;
4170 vm_map_store_entry_link(map, entry1, entry2,
4171 VM_MAP_KERNEL_FLAGS_NONE);
4172 map->size += entry_size;
4173 entry1 = entry2;
4174 }
4175 if (map->wiring_required) {
4176 /*
4177 * XXX TODO: we should rewire the
4178 * old pages here...
4179 */
4180 }
4181 vm_map_enter_restore_successes++;
4182 }
4183 }
4184 }
4185
4186 /*
4187 * The caller is responsible for releasing the lock if it requested to
4188 * keep the map locked.
4189 */
4190 if (map_locked && !keep_map_locked) {
4191 vm_map_unlock(map);
4192 }
4193
4194 /*
4195 * Get rid of the "zap_maps" and all the map entries that
4196 * they may still contain.
4197 */
4198 if (zap_old_map != VM_MAP_NULL) {
4199 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
4200 zap_old_map = VM_MAP_NULL;
4201 }
4202 if (zap_new_map != VM_MAP_NULL) {
4203 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
4204 zap_new_map = VM_MAP_NULL;
4205 }
4206
4207 return result;
4208
4209 #undef RETURN
4210 }
4211 #endif /* __arm64__ */
4212
4213 /*
4214 * Counters for the prefault optimization.
4215 */
4216 int64_t vm_prefault_nb_pages = 0;
4217 int64_t vm_prefault_nb_bailout = 0;
4218
4219 static kern_return_t
4220 vm_map_enter_mem_object_helper(
4221 vm_map_t target_map,
4222 vm_map_offset_t *address,
4223 vm_map_size_t initial_size,
4224 vm_map_offset_t mask,
4225 int flags,
4226 vm_map_kernel_flags_t vmk_flags,
4227 vm_tag_t tag,
4228 ipc_port_t port,
4229 vm_object_offset_t offset,
4230 boolean_t copy,
4231 vm_prot_t cur_protection,
4232 vm_prot_t max_protection,
4233 vm_inherit_t inheritance,
4234 upl_page_list_ptr_t page_list,
4235 unsigned int page_list_count)
4236 {
4237 vm_map_address_t map_addr;
4238 vm_map_size_t map_size;
4239 vm_object_t object;
4240 vm_object_size_t size;
4241 kern_return_t result;
4242 boolean_t mask_cur_protection, mask_max_protection;
4243 boolean_t kernel_prefault, try_prefault = (page_list_count != 0);
4244 vm_map_offset_t offset_in_mapping = 0;
4245 #if __arm64__
4246 boolean_t fourk = vmk_flags.vmkf_fourk;
4247 #endif /* __arm64__ */
4248
4249 if (VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4250 /* XXX TODO4K prefaulting depends on page size... */
4251 try_prefault = FALSE;
4252 }
4253
4254 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
4255
4256 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
4257 mask_max_protection = max_protection & VM_PROT_IS_MASK;
4258 cur_protection &= ~VM_PROT_IS_MASK;
4259 max_protection &= ~VM_PROT_IS_MASK;
4260
4261 /*
4262 * Check arguments for validity
4263 */
4264 if ((target_map == VM_MAP_NULL) ||
4265 (cur_protection & ~VM_PROT_ALL) ||
4266 (max_protection & ~VM_PROT_ALL) ||
4267 (inheritance > VM_INHERIT_LAST_VALID) ||
4268 (try_prefault && (copy || !page_list)) ||
4269 initial_size == 0) {
4270 return KERN_INVALID_ARGUMENT;
4271 }
4272
4273 #if __arm64__
4274 if (fourk && VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4275 /* no "fourk" if map is using a sub-page page size */
4276 fourk = FALSE;
4277 }
4278 if (fourk) {
4279 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
4280 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
4281 } else
4282 #endif /* __arm64__ */
4283 {
4284 map_addr = vm_map_trunc_page(*address,
4285 VM_MAP_PAGE_MASK(target_map));
4286 map_size = vm_map_round_page(initial_size,
4287 VM_MAP_PAGE_MASK(target_map));
4288 }
4289 size = vm_object_round_page(initial_size);
4290
4291 /*
4292 * Find the vm object (if any) corresponding to this port.
4293 */
4294 if (!IP_VALID(port)) {
4295 object = VM_OBJECT_NULL;
4296 offset = 0;
4297 copy = FALSE;
4298 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
4299 vm_named_entry_t named_entry;
4300 vm_object_offset_t data_offset;
4301
4302 named_entry = (vm_named_entry_t) ip_get_kobject(port);
4303
4304 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4305 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4306 data_offset = named_entry->data_offset;
4307 offset += named_entry->data_offset;
4308 } else {
4309 data_offset = 0;
4310 }
4311
4312 /* a few checks to make sure user is obeying rules */
4313 if (size == 0) {
4314 if (offset >= named_entry->size) {
4315 return KERN_INVALID_RIGHT;
4316 }
4317 size = named_entry->size - offset;
4318 }
4319 if (mask_max_protection) {
4320 max_protection &= named_entry->protection;
4321 }
4322 if (mask_cur_protection) {
4323 cur_protection &= named_entry->protection;
4324 }
4325 if ((named_entry->protection & max_protection) !=
4326 max_protection) {
4327 return KERN_INVALID_RIGHT;
4328 }
4329 if ((named_entry->protection & cur_protection) !=
4330 cur_protection) {
4331 return KERN_INVALID_RIGHT;
4332 }
4333 if (offset + size < offset) {
4334 /* overflow */
4335 return KERN_INVALID_ARGUMENT;
4336 }
4337 if (named_entry->size < (offset + initial_size)) {
4338 return KERN_INVALID_ARGUMENT;
4339 }
4340
4341 if (named_entry->is_copy) {
4342 /* for a vm_map_copy, we can only map it whole */
4343 if ((size != named_entry->size) &&
4344 (vm_map_round_page(size,
4345 VM_MAP_PAGE_MASK(target_map)) ==
4346 named_entry->size)) {
4347 /* XXX FBDP use the rounded size... */
4348 size = vm_map_round_page(
4349 size,
4350 VM_MAP_PAGE_MASK(target_map));
4351 }
4352 }
4353
4354 /* the callers parameter offset is defined to be the */
4355 /* offset from beginning of named entry offset in object */
4356 offset = offset + named_entry->offset;
4357
4358 if (!VM_MAP_PAGE_ALIGNED(size,
4359 VM_MAP_PAGE_MASK(target_map))) {
4360 /*
4361 * Let's not map more than requested;
4362 * vm_map_enter() will handle this "not map-aligned"
4363 * case.
4364 */
4365 map_size = size;
4366 }
4367
4368 named_entry_lock(named_entry);
4369 if (named_entry->is_sub_map) {
4370 vm_map_t submap;
4371
4372 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4373 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4374 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4375 }
4376
4377 submap = named_entry->backing.map;
4378 vm_map_reference(submap);
4379 named_entry_unlock(named_entry);
4380
4381 vmk_flags.vmkf_submap = TRUE;
4382
4383 result = vm_map_enter(target_map,
4384 &map_addr,
4385 map_size,
4386 mask,
4387 flags,
4388 vmk_flags,
4389 tag,
4390 (vm_object_t)(uintptr_t) submap,
4391 offset,
4392 copy,
4393 cur_protection,
4394 max_protection,
4395 inheritance);
4396 if (result != KERN_SUCCESS) {
4397 vm_map_deallocate(submap);
4398 } else {
4399 /*
4400 * No need to lock "submap" just to check its
4401 * "mapped" flag: that flag is never reset
4402 * once it's been set and if we race, we'll
4403 * just end up setting it twice, which is OK.
4404 */
4405 if (submap->mapped_in_other_pmaps == FALSE &&
4406 vm_map_pmap(submap) != PMAP_NULL &&
4407 vm_map_pmap(submap) !=
4408 vm_map_pmap(target_map)) {
4409 /*
4410 * This submap is being mapped in a map
4411 * that uses a different pmap.
4412 * Set its "mapped_in_other_pmaps" flag
4413 * to indicate that we now need to
4414 * remove mappings from all pmaps rather
4415 * than just the submap's pmap.
4416 */
4417 vm_map_lock(submap);
4418 submap->mapped_in_other_pmaps = TRUE;
4419 vm_map_unlock(submap);
4420 }
4421 *address = map_addr;
4422 }
4423 return result;
4424 } else if (named_entry->is_copy) {
4425 kern_return_t kr;
4426 vm_map_copy_t copy_map;
4427 vm_map_entry_t copy_entry;
4428 vm_map_offset_t copy_addr;
4429 vm_map_copy_t target_copy_map;
4430 vm_map_offset_t overmap_start, overmap_end;
4431 vm_map_offset_t trimmed_start;
4432 vm_map_size_t target_size;
4433
4434 if (flags & ~(VM_FLAGS_FIXED |
4435 VM_FLAGS_ANYWHERE |
4436 VM_FLAGS_OVERWRITE |
4437 VM_FLAGS_RETURN_4K_DATA_ADDR |
4438 VM_FLAGS_RETURN_DATA_ADDR |
4439 VM_FLAGS_ALIAS_MASK)) {
4440 named_entry_unlock(named_entry);
4441 return KERN_INVALID_ARGUMENT;
4442 }
4443
4444 copy_map = named_entry->backing.copy;
4445 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4446 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4447 /* unsupported type; should not happen */
4448 printf("vm_map_enter_mem_object: "
4449 "memory_entry->backing.copy "
4450 "unsupported type 0x%x\n",
4451 copy_map->type);
4452 named_entry_unlock(named_entry);
4453 return KERN_INVALID_ARGUMENT;
4454 }
4455
4456 if (VM_MAP_PAGE_SHIFT(target_map) != copy_map->cpy_hdr.page_shift) {
4457 DEBUG4K_SHARE("copy_map %p offset %llx size 0x%llx pgshift %d -> target_map %p pgshift %d\n", copy_map, offset, (uint64_t)map_size, copy_map->cpy_hdr.page_shift, target_map, VM_MAP_PAGE_SHIFT(target_map));
4458 }
4459
4460 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4461 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4462 offset_in_mapping = offset & VM_MAP_PAGE_MASK(target_map);
4463 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4464 offset_in_mapping &= ~((signed)(0xFFF));
4465 }
4466 }
4467
4468 target_copy_map = VM_MAP_COPY_NULL;
4469 target_size = copy_map->size;
4470 overmap_start = 0;
4471 overmap_end = 0;
4472 trimmed_start = 0;
4473 if (copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(target_map)) {
4474 DEBUG4K_ADJUST("adjusting...\n");
4475 kr = vm_map_copy_adjust_to_target(
4476 copy_map,
4477 offset /* includes data_offset */,
4478 initial_size,
4479 target_map,
4480 copy,
4481 &target_copy_map,
4482 &overmap_start,
4483 &overmap_end,
4484 &trimmed_start);
4485 if (kr != KERN_SUCCESS) {
4486 named_entry_unlock(named_entry);
4487 return kr;
4488 }
4489 target_size = target_copy_map->size;
4490 if (trimmed_start >= data_offset) {
4491 data_offset = offset & VM_MAP_PAGE_MASK(target_map);
4492 } else {
4493 data_offset -= trimmed_start;
4494 }
4495 } else {
4496 target_copy_map = copy_map;
4497 }
4498
4499 /* reserve a contiguous range */
4500 kr = vm_map_enter(target_map,
4501 &map_addr,
4502 vm_map_round_page(target_size, VM_MAP_PAGE_MASK(target_map)),
4503 mask,
4504 flags & (VM_FLAGS_ANYWHERE |
4505 VM_FLAGS_OVERWRITE |
4506 VM_FLAGS_RETURN_4K_DATA_ADDR |
4507 VM_FLAGS_RETURN_DATA_ADDR),
4508 vmk_flags,
4509 tag,
4510 VM_OBJECT_NULL,
4511 0,
4512 FALSE, /* copy */
4513 cur_protection,
4514 max_protection,
4515 inheritance);
4516 if (kr != KERN_SUCCESS) {
4517 DEBUG4K_ERROR("kr 0x%x\n", kr);
4518 if (target_copy_map != copy_map) {
4519 vm_map_copy_discard(target_copy_map);
4520 target_copy_map = VM_MAP_COPY_NULL;
4521 }
4522 named_entry_unlock(named_entry);
4523 return kr;
4524 }
4525
4526 copy_addr = map_addr;
4527
4528 for (copy_entry = vm_map_copy_first_entry(target_copy_map);
4529 copy_entry != vm_map_copy_to_entry(target_copy_map);
4530 copy_entry = copy_entry->vme_next) {
4531 int remap_flags;
4532 vm_map_kernel_flags_t vmk_remap_flags;
4533 vm_map_t copy_submap;
4534 vm_object_t copy_object;
4535 vm_map_size_t copy_size;
4536 vm_object_offset_t copy_offset;
4537 int copy_vm_alias;
4538
4539 remap_flags = 0;
4540 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4541
4542 copy_object = VME_OBJECT(copy_entry);
4543 copy_offset = VME_OFFSET(copy_entry);
4544 copy_size = (copy_entry->vme_end -
4545 copy_entry->vme_start);
4546 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4547 if (copy_vm_alias == 0) {
4548 /*
4549 * Caller does not want a specific
4550 * alias for this new mapping: use
4551 * the alias of the original mapping.
4552 */
4553 copy_vm_alias = VME_ALIAS(copy_entry);
4554 }
4555
4556 /* sanity check */
4557 if ((copy_addr + copy_size) >
4558 (map_addr +
4559 overmap_start + overmap_end +
4560 named_entry->size /* XXX full size */)) {
4561 /* over-mapping too much !? */
4562 kr = KERN_INVALID_ARGUMENT;
4563 DEBUG4K_ERROR("kr 0x%x\n", kr);
4564 /* abort */
4565 break;
4566 }
4567
4568 /* take a reference on the object */
4569 if (copy_entry->is_sub_map) {
4570 vmk_remap_flags.vmkf_submap = TRUE;
4571 copy_submap = VME_SUBMAP(copy_entry);
4572 vm_map_lock(copy_submap);
4573 vm_map_reference(copy_submap);
4574 vm_map_unlock(copy_submap);
4575 copy_object = (vm_object_t)(uintptr_t) copy_submap;
4576 } else if (!copy &&
4577 copy_object != VM_OBJECT_NULL &&
4578 (copy_entry->needs_copy ||
4579 copy_object->shadowed ||
4580 (!copy_object->true_share &&
4581 !copy_entry->is_shared &&
4582 copy_object->vo_size > copy_size))) {
4583 /*
4584 * We need to resolve our side of this
4585 * "symmetric" copy-on-write now; we
4586 * need a new object to map and share,
4587 * instead of the current one which
4588 * might still be shared with the
4589 * original mapping.
4590 *
4591 * Note: A "vm_map_copy_t" does not
4592 * have a lock but we're protected by
4593 * the named entry's lock here.
4594 */
4595 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4596 VME_OBJECT_SHADOW(copy_entry, copy_size);
4597 if (!copy_entry->needs_copy &&
4598 copy_entry->protection & VM_PROT_WRITE) {
4599 vm_prot_t prot;
4600
4601 prot = copy_entry->protection & ~VM_PROT_WRITE;
4602 vm_object_pmap_protect(copy_object,
4603 copy_offset,
4604 copy_size,
4605 PMAP_NULL,
4606 PAGE_SIZE,
4607 0,
4608 prot);
4609 }
4610
4611 copy_entry->needs_copy = FALSE;
4612 copy_entry->is_shared = TRUE;
4613 copy_object = VME_OBJECT(copy_entry);
4614 copy_offset = VME_OFFSET(copy_entry);
4615 vm_object_lock(copy_object);
4616 vm_object_reference_locked(copy_object);
4617 if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4618 /* we're about to make a shared mapping of this object */
4619 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4620 copy_object->true_share = TRUE;
4621 }
4622 vm_object_unlock(copy_object);
4623 } else {
4624 /*
4625 * We already have the right object
4626 * to map.
4627 */
4628 copy_object = VME_OBJECT(copy_entry);
4629 vm_object_reference(copy_object);
4630 }
4631
4632 /* over-map the object into destination */
4633 remap_flags |= flags;
4634 remap_flags |= VM_FLAGS_FIXED;
4635 remap_flags |= VM_FLAGS_OVERWRITE;
4636 remap_flags &= ~VM_FLAGS_ANYWHERE;
4637 if (!copy && !copy_entry->is_sub_map) {
4638 /*
4639 * copy-on-write should have been
4640 * resolved at this point, or we would
4641 * end up sharing instead of copying.
4642 */
4643 assert(!copy_entry->needs_copy);
4644 }
4645 #if XNU_TARGET_OS_OSX
4646 if (copy_entry->used_for_jit) {
4647 vmk_remap_flags.vmkf_map_jit = TRUE;
4648 }
4649 #endif /* XNU_TARGET_OS_OSX */
4650
4651 assertf((copy_vm_alias & VME_ALIAS_MASK) == copy_vm_alias,
4652 "VM Tag truncated from 0x%x to 0x%x\n", copy_vm_alias, (copy_vm_alias & VME_ALIAS_MASK));
4653 kr = vm_map_enter(target_map,
4654 &copy_addr,
4655 copy_size,
4656 (vm_map_offset_t) 0,
4657 remap_flags,
4658 vmk_remap_flags,
4659 (vm_tag_t) copy_vm_alias, /* see comment at end of vm_fault_unwire re. cast*/
4660 copy_object,
4661 copy_offset,
4662 ((copy_object == NULL) ? FALSE : copy),
4663 cur_protection,
4664 max_protection,
4665 inheritance);
4666 if (kr != KERN_SUCCESS) {
4667 DEBUG4K_SHARE("failed kr 0x%x\n", kr);
4668 if (copy_entry->is_sub_map) {
4669 vm_map_deallocate(copy_submap);
4670 } else {
4671 vm_object_deallocate(copy_object);
4672 }
4673 /* abort */
4674 break;
4675 }
4676
4677 /* next mapping */
4678 copy_addr += copy_size;
4679 }
4680
4681 if (kr == KERN_SUCCESS) {
4682 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4683 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4684 *address = map_addr + offset_in_mapping;
4685 } else {
4686 *address = map_addr;
4687 }
4688 if (overmap_start) {
4689 *address += overmap_start;
4690 DEBUG4K_SHARE("map %p map_addr 0x%llx offset_in_mapping 0x%llx overmap_start 0x%llx -> *address 0x%llx\n", target_map, (uint64_t)map_addr, (uint64_t) offset_in_mapping, (uint64_t)overmap_start, (uint64_t)*address);
4691 }
4692 }
4693 named_entry_unlock(named_entry);
4694 if (target_copy_map != copy_map) {
4695 vm_map_copy_discard(target_copy_map);
4696 target_copy_map = VM_MAP_COPY_NULL;
4697 }
4698
4699 if (kr != KERN_SUCCESS) {
4700 if (!(flags & VM_FLAGS_OVERWRITE)) {
4701 /* deallocate the contiguous range */
4702 (void) vm_deallocate(target_map,
4703 map_addr,
4704 map_size);
4705 }
4706 }
4707
4708 return kr;
4709 }
4710
4711 if (named_entry->is_object) {
4712 unsigned int access;
4713 vm_prot_t protections;
4714 unsigned int wimg_mode;
4715
4716 /* we are mapping a VM object */
4717
4718 protections = named_entry->protection & VM_PROT_ALL;
4719 access = GET_MAP_MEM(named_entry->protection);
4720
4721 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4722 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4723 offset_in_mapping = offset - VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4724 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4725 offset_in_mapping &= ~((signed)(0xFFF));
4726 }
4727 offset = VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4728 map_size = VM_MAP_ROUND_PAGE((offset + offset_in_mapping + initial_size) - offset, VM_MAP_PAGE_MASK(target_map));
4729 }
4730
4731 object = vm_named_entry_to_vm_object(named_entry);
4732 assert(object != VM_OBJECT_NULL);
4733 vm_object_lock(object);
4734 named_entry_unlock(named_entry);
4735
4736 vm_object_reference_locked(object);
4737
4738 wimg_mode = object->wimg_bits;
4739 vm_prot_to_wimg(access, &wimg_mode);
4740 if (object->wimg_bits != wimg_mode) {
4741 vm_object_change_wimg_mode(object, wimg_mode);
4742 }
4743
4744 vm_object_unlock(object);
4745 } else {
4746 panic("invalid VM named entry %p", named_entry);
4747 }
4748 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4749 /*
4750 * JMM - This is temporary until we unify named entries
4751 * and raw memory objects.
4752 *
4753 * Detected fake ip_kotype for a memory object. In
4754 * this case, the port isn't really a port at all, but
4755 * instead is just a raw memory object.
4756 */
4757 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4758 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4759 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4760 }
4761
4762 object = memory_object_to_vm_object((memory_object_t)port);
4763 if (object == VM_OBJECT_NULL) {
4764 return KERN_INVALID_OBJECT;
4765 }
4766 vm_object_reference(object);
4767
4768 /* wait for object (if any) to be ready */
4769 if (object != VM_OBJECT_NULL) {
4770 if (object == kernel_object) {
4771 printf("Warning: Attempt to map kernel object"
4772 " by a non-private kernel entity\n");
4773 return KERN_INVALID_OBJECT;
4774 }
4775 if (!object->pager_ready) {
4776 vm_object_lock(object);
4777
4778 while (!object->pager_ready) {
4779 vm_object_wait(object,
4780 VM_OBJECT_EVENT_PAGER_READY,
4781 THREAD_UNINT);
4782 vm_object_lock(object);
4783 }
4784 vm_object_unlock(object);
4785 }
4786 }
4787 } else {
4788 return KERN_INVALID_OBJECT;
4789 }
4790
4791 if (object != VM_OBJECT_NULL &&
4792 object->named &&
4793 object->pager != MEMORY_OBJECT_NULL &&
4794 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4795 memory_object_t pager;
4796 vm_prot_t pager_prot;
4797 kern_return_t kr;
4798
4799 /*
4800 * For "named" VM objects, let the pager know that the
4801 * memory object is being mapped. Some pagers need to keep
4802 * track of this, to know when they can reclaim the memory
4803 * object, for example.
4804 * VM calls memory_object_map() for each mapping (specifying
4805 * the protection of each mapping) and calls
4806 * memory_object_last_unmap() when all the mappings are gone.
4807 */
4808 pager_prot = max_protection;
4809 if (copy) {
4810 /*
4811 * Copy-On-Write mapping: won't modify the
4812 * memory object.
4813 */
4814 pager_prot &= ~VM_PROT_WRITE;
4815 }
4816 vm_object_lock(object);
4817 pager = object->pager;
4818 if (object->named &&
4819 pager != MEMORY_OBJECT_NULL &&
4820 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4821 assert(object->pager_ready);
4822 vm_object_mapping_wait(object, THREAD_UNINT);
4823 vm_object_mapping_begin(object);
4824 vm_object_unlock(object);
4825
4826 kr = memory_object_map(pager, pager_prot);
4827 assert(kr == KERN_SUCCESS);
4828
4829 vm_object_lock(object);
4830 vm_object_mapping_end(object);
4831 }
4832 vm_object_unlock(object);
4833 }
4834
4835 /*
4836 * Perform the copy if requested
4837 */
4838
4839 if (copy) {
4840 vm_object_t new_object;
4841 vm_object_offset_t new_offset;
4842
4843 result = vm_object_copy_strategically(object, offset,
4844 map_size,
4845 &new_object, &new_offset,
4846 &copy);
4847
4848
4849 if (result == KERN_MEMORY_RESTART_COPY) {
4850 boolean_t success;
4851 boolean_t src_needs_copy;
4852
4853 /*
4854 * XXX
4855 * We currently ignore src_needs_copy.
4856 * This really is the issue of how to make
4857 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4858 * non-kernel users to use. Solution forthcoming.
4859 * In the meantime, since we don't allow non-kernel
4860 * memory managers to specify symmetric copy,
4861 * we won't run into problems here.
4862 */
4863 new_object = object;
4864 new_offset = offset;
4865 success = vm_object_copy_quickly(&new_object,
4866 new_offset,
4867 map_size,
4868 &src_needs_copy,
4869 &copy);
4870 assert(success);
4871 result = KERN_SUCCESS;
4872 }
4873 /*
4874 * Throw away the reference to the
4875 * original object, as it won't be mapped.
4876 */
4877
4878 vm_object_deallocate(object);
4879
4880 if (result != KERN_SUCCESS) {
4881 return result;
4882 }
4883
4884 object = new_object;
4885 offset = new_offset;
4886 }
4887
4888 /*
4889 * If non-kernel users want to try to prefault pages, the mapping and prefault
4890 * needs to be atomic.
4891 */
4892 kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4893 vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4894
4895 #if __arm64__
4896 if (fourk) {
4897 /* map this object in a "4K" pager */
4898 result = vm_map_enter_fourk(target_map,
4899 &map_addr,
4900 map_size,
4901 (vm_map_offset_t) mask,
4902 flags,
4903 vmk_flags,
4904 tag,
4905 object,
4906 offset,
4907 copy,
4908 cur_protection,
4909 max_protection,
4910 inheritance);
4911 } else
4912 #endif /* __arm64__ */
4913 {
4914 result = vm_map_enter(target_map,
4915 &map_addr, map_size,
4916 (vm_map_offset_t)mask,
4917 flags,
4918 vmk_flags,
4919 tag,
4920 object, offset,
4921 copy,
4922 cur_protection, max_protection,
4923 inheritance);
4924 }
4925 if (result != KERN_SUCCESS) {
4926 vm_object_deallocate(object);
4927 }
4928
4929 /*
4930 * Try to prefault, and do not forget to release the vm map lock.
4931 */
4932 if (result == KERN_SUCCESS && try_prefault) {
4933 mach_vm_address_t va = map_addr;
4934 kern_return_t kr = KERN_SUCCESS;
4935 unsigned int i = 0;
4936 int pmap_options;
4937
4938 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4939 if (object->internal) {
4940 pmap_options |= PMAP_OPTIONS_INTERNAL;
4941 }
4942
4943 for (i = 0; i < page_list_count; ++i) {
4944 if (!UPL_VALID_PAGE(page_list, i)) {
4945 if (kernel_prefault) {
4946 assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4947 result = KERN_MEMORY_ERROR;
4948 break;
4949 }
4950 } else {
4951 /*
4952 * If this function call failed, we should stop
4953 * trying to optimize, other calls are likely
4954 * going to fail too.
4955 *
4956 * We are not gonna report an error for such
4957 * failure though. That's an optimization, not
4958 * something critical.
4959 */
4960 kr = pmap_enter_options(target_map->pmap,
4961 va, UPL_PHYS_PAGE(page_list, i),
4962 cur_protection, VM_PROT_NONE,
4963 0, TRUE, pmap_options, NULL);
4964 if (kr != KERN_SUCCESS) {
4965 OSIncrementAtomic64(&vm_prefault_nb_bailout);
4966 if (kernel_prefault) {
4967 result = kr;
4968 }
4969 break;
4970 }
4971 OSIncrementAtomic64(&vm_prefault_nb_pages);
4972 }
4973
4974 /* Next virtual address */
4975 va += PAGE_SIZE;
4976 }
4977 if (vmk_flags.vmkf_keep_map_locked) {
4978 vm_map_unlock(target_map);
4979 }
4980 }
4981
4982 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4983 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4984 *address = map_addr + offset_in_mapping;
4985 } else {
4986 *address = map_addr;
4987 }
4988 return result;
4989 }
4990
4991 kern_return_t
4992 vm_map_enter_mem_object(
4993 vm_map_t target_map,
4994 vm_map_offset_t *address,
4995 vm_map_size_t initial_size,
4996 vm_map_offset_t mask,
4997 int flags,
4998 vm_map_kernel_flags_t vmk_flags,
4999 vm_tag_t tag,
5000 ipc_port_t port,
5001 vm_object_offset_t offset,
5002 boolean_t copy,
5003 vm_prot_t cur_protection,
5004 vm_prot_t max_protection,
5005 vm_inherit_t inheritance)
5006 {
5007 kern_return_t ret;
5008
5009 ret = vm_map_enter_mem_object_helper(target_map,
5010 address,
5011 initial_size,
5012 mask,
5013 flags,
5014 vmk_flags,
5015 tag,
5016 port,
5017 offset,
5018 copy,
5019 cur_protection,
5020 max_protection,
5021 inheritance,
5022 NULL,
5023 0);
5024
5025 #if KASAN
5026 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
5027 kasan_notify_address(*address, initial_size);
5028 }
5029 #endif
5030
5031 return ret;
5032 }
5033
5034 kern_return_t
5035 vm_map_enter_mem_object_prefault(
5036 vm_map_t target_map,
5037 vm_map_offset_t *address,
5038 vm_map_size_t initial_size,
5039 vm_map_offset_t mask,
5040 int flags,
5041 vm_map_kernel_flags_t vmk_flags,
5042 vm_tag_t tag,
5043 ipc_port_t port,
5044 vm_object_offset_t offset,
5045 vm_prot_t cur_protection,
5046 vm_prot_t max_protection,
5047 upl_page_list_ptr_t page_list,
5048 unsigned int page_list_count)
5049 {
5050 kern_return_t ret;
5051
5052 ret = vm_map_enter_mem_object_helper(target_map,
5053 address,
5054 initial_size,
5055 mask,
5056 flags,
5057 vmk_flags,
5058 tag,
5059 port,
5060 offset,
5061 FALSE,
5062 cur_protection,
5063 max_protection,
5064 VM_INHERIT_DEFAULT,
5065 page_list,
5066 page_list_count);
5067
5068 #if KASAN
5069 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
5070 kasan_notify_address(*address, initial_size);
5071 }
5072 #endif
5073
5074 return ret;
5075 }
5076
5077
5078 kern_return_t
5079 vm_map_enter_mem_object_control(
5080 vm_map_t target_map,
5081 vm_map_offset_t *address,
5082 vm_map_size_t initial_size,
5083 vm_map_offset_t mask,
5084 int flags,
5085 vm_map_kernel_flags_t vmk_flags,
5086 vm_tag_t tag,
5087 memory_object_control_t control,
5088 vm_object_offset_t offset,
5089 boolean_t copy,
5090 vm_prot_t cur_protection,
5091 vm_prot_t max_protection,
5092 vm_inherit_t inheritance)
5093 {
5094 vm_map_address_t map_addr;
5095 vm_map_size_t map_size;
5096 vm_object_t object;
5097 vm_object_size_t size;
5098 kern_return_t result;
5099 memory_object_t pager;
5100 vm_prot_t pager_prot;
5101 kern_return_t kr;
5102 #if __arm64__
5103 boolean_t fourk = vmk_flags.vmkf_fourk;
5104 #endif /* __arm64__ */
5105
5106 /*
5107 * Check arguments for validity
5108 */
5109 if ((target_map == VM_MAP_NULL) ||
5110 (cur_protection & ~VM_PROT_ALL) ||
5111 (max_protection & ~VM_PROT_ALL) ||
5112 (inheritance > VM_INHERIT_LAST_VALID) ||
5113 initial_size == 0) {
5114 return KERN_INVALID_ARGUMENT;
5115 }
5116
5117 #if __arm64__
5118 if (fourk && VM_MAP_PAGE_MASK(target_map) < PAGE_MASK) {
5119 fourk = FALSE;
5120 }
5121
5122 if (fourk) {
5123 map_addr = vm_map_trunc_page(*address,
5124 FOURK_PAGE_MASK);
5125 map_size = vm_map_round_page(initial_size,
5126 FOURK_PAGE_MASK);
5127 } else
5128 #endif /* __arm64__ */
5129 {
5130 map_addr = vm_map_trunc_page(*address,
5131 VM_MAP_PAGE_MASK(target_map));
5132 map_size = vm_map_round_page(initial_size,
5133 VM_MAP_PAGE_MASK(target_map));
5134 }
5135 size = vm_object_round_page(initial_size);
5136
5137 object = memory_object_control_to_vm_object(control);
5138
5139 if (object == VM_OBJECT_NULL) {
5140 return KERN_INVALID_OBJECT;
5141 }
5142
5143 if (object == kernel_object) {
5144 printf("Warning: Attempt to map kernel object"
5145 " by a non-private kernel entity\n");
5146 return KERN_INVALID_OBJECT;
5147 }
5148
5149 vm_object_lock(object);
5150 object->ref_count++;
5151 vm_object_res_reference(object);
5152
5153 /*
5154 * For "named" VM objects, let the pager know that the
5155 * memory object is being mapped. Some pagers need to keep
5156 * track of this, to know when they can reclaim the memory
5157 * object, for example.
5158 * VM calls memory_object_map() for each mapping (specifying
5159 * the protection of each mapping) and calls
5160 * memory_object_last_unmap() when all the mappings are gone.
5161 */
5162 pager_prot = max_protection;
5163 if (copy) {
5164 pager_prot &= ~VM_PROT_WRITE;
5165 }
5166 pager = object->pager;
5167 if (object->named &&
5168 pager != MEMORY_OBJECT_NULL &&
5169 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
5170 assert(object->pager_ready);
5171 vm_object_mapping_wait(object, THREAD_UNINT);
5172 vm_object_mapping_begin(object);
5173 vm_object_unlock(object);
5174
5175 kr = memory_object_map(pager, pager_prot);
5176 assert(kr == KERN_SUCCESS);
5177
5178 vm_object_lock(object);
5179 vm_object_mapping_end(object);
5180 }
5181 vm_object_unlock(object);
5182
5183 /*
5184 * Perform the copy if requested
5185 */
5186
5187 if (copy) {
5188 vm_object_t new_object;
5189 vm_object_offset_t new_offset;
5190
5191 result = vm_object_copy_strategically(object, offset, size,
5192 &new_object, &new_offset,
5193 &copy);
5194
5195
5196 if (result == KERN_MEMORY_RESTART_COPY) {
5197 boolean_t success;
5198 boolean_t src_needs_copy;
5199
5200 /*
5201 * XXX
5202 * We currently ignore src_needs_copy.
5203 * This really is the issue of how to make
5204 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
5205 * non-kernel users to use. Solution forthcoming.
5206 * In the meantime, since we don't allow non-kernel
5207 * memory managers to specify symmetric copy,
5208 * we won't run into problems here.
5209 */
5210 new_object = object;
5211 new_offset = offset;
5212 success = vm_object_copy_quickly(&new_object,
5213 new_offset, size,
5214 &src_needs_copy,
5215 &copy);
5216 assert(success);
5217 result = KERN_SUCCESS;
5218 }
5219 /*
5220 * Throw away the reference to the
5221 * original object, as it won't be mapped.
5222 */
5223
5224 vm_object_deallocate(object);
5225
5226 if (result != KERN_SUCCESS) {
5227 return result;
5228 }
5229
5230 object = new_object;
5231 offset = new_offset;
5232 }
5233
5234 #if __arm64__
5235 if (fourk) {
5236 result = vm_map_enter_fourk(target_map,
5237 &map_addr,
5238 map_size,
5239 (vm_map_offset_t)mask,
5240 flags,
5241 vmk_flags,
5242 tag,
5243 object, offset,
5244 copy,
5245 cur_protection, max_protection,
5246 inheritance);
5247 } else
5248 #endif /* __arm64__ */
5249 {
5250 result = vm_map_enter(target_map,
5251 &map_addr, map_size,
5252 (vm_map_offset_t)mask,
5253 flags,
5254 vmk_flags,
5255 tag,
5256 object, offset,
5257 copy,
5258 cur_protection, max_protection,
5259 inheritance);
5260 }
5261 if (result != KERN_SUCCESS) {
5262 vm_object_deallocate(object);
5263 }
5264 *address = map_addr;
5265
5266 return result;
5267 }
5268
5269
5270 #if VM_CPM
5271
5272 #ifdef MACH_ASSERT
5273 extern pmap_paddr_t avail_start, avail_end;
5274 #endif
5275
5276 /*
5277 * Allocate memory in the specified map, with the caveat that
5278 * the memory is physically contiguous. This call may fail
5279 * if the system can't find sufficient contiguous memory.
5280 * This call may cause or lead to heart-stopping amounts of
5281 * paging activity.
5282 *
5283 * Memory obtained from this call should be freed in the
5284 * normal way, viz., via vm_deallocate.
5285 */
5286 kern_return_t
5287 vm_map_enter_cpm(
5288 vm_map_t map,
5289 vm_map_offset_t *addr,
5290 vm_map_size_t size,
5291 int flags)
5292 {
5293 vm_object_t cpm_obj;
5294 pmap_t pmap;
5295 vm_page_t m, pages;
5296 kern_return_t kr;
5297 vm_map_offset_t va, start, end, offset;
5298 #if MACH_ASSERT
5299 vm_map_offset_t prev_addr = 0;
5300 #endif /* MACH_ASSERT */
5301
5302 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
5303 vm_tag_t tag;
5304
5305 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
5306 /* XXX TODO4K do we need to support this? */
5307 *addr = 0;
5308 return KERN_NOT_SUPPORTED;
5309 }
5310
5311 VM_GET_FLAGS_ALIAS(flags, tag);
5312
5313 if (size == 0) {
5314 *addr = 0;
5315 return KERN_SUCCESS;
5316 }
5317 if (anywhere) {
5318 *addr = vm_map_min(map);
5319 } else {
5320 *addr = vm_map_trunc_page(*addr,
5321 VM_MAP_PAGE_MASK(map));
5322 }
5323 size = vm_map_round_page(size,
5324 VM_MAP_PAGE_MASK(map));
5325
5326 /*
5327 * LP64todo - cpm_allocate should probably allow
5328 * allocations of >4GB, but not with the current
5329 * algorithm, so just cast down the size for now.
5330 */
5331 if (size > VM_MAX_ADDRESS) {
5332 return KERN_RESOURCE_SHORTAGE;
5333 }
5334 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
5335 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
5336 return kr;
5337 }
5338
5339 cpm_obj = vm_object_allocate((vm_object_size_t)size);
5340 assert(cpm_obj != VM_OBJECT_NULL);
5341 assert(cpm_obj->internal);
5342 assert(cpm_obj->vo_size == (vm_object_size_t)size);
5343 assert(cpm_obj->can_persist == FALSE);
5344 assert(cpm_obj->pager_created == FALSE);
5345 assert(cpm_obj->pageout == FALSE);
5346 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5347
5348 /*
5349 * Insert pages into object.
5350 */
5351
5352 vm_object_lock(cpm_obj);
5353 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5354 m = pages;
5355 pages = NEXT_PAGE(m);
5356 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5357
5358 assert(!m->vmp_gobbled);
5359 assert(!m->vmp_wanted);
5360 assert(!m->vmp_pageout);
5361 assert(!m->vmp_tabled);
5362 assert(VM_PAGE_WIRED(m));
5363 assert(m->vmp_busy);
5364 assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
5365
5366 m->vmp_busy = FALSE;
5367 vm_page_insert(m, cpm_obj, offset);
5368 }
5369 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5370 vm_object_unlock(cpm_obj);
5371
5372 /*
5373 * Hang onto a reference on the object in case a
5374 * multi-threaded application for some reason decides
5375 * to deallocate the portion of the address space into
5376 * which we will insert this object.
5377 *
5378 * Unfortunately, we must insert the object now before
5379 * we can talk to the pmap module about which addresses
5380 * must be wired down. Hence, the race with a multi-
5381 * threaded app.
5382 */
5383 vm_object_reference(cpm_obj);
5384
5385 /*
5386 * Insert object into map.
5387 */
5388
5389 kr = vm_map_enter(
5390 map,
5391 addr,
5392 size,
5393 (vm_map_offset_t)0,
5394 flags,
5395 VM_MAP_KERNEL_FLAGS_NONE,
5396 cpm_obj,
5397 (vm_object_offset_t)0,
5398 FALSE,
5399 VM_PROT_ALL,
5400 VM_PROT_ALL,
5401 VM_INHERIT_DEFAULT);
5402
5403 if (kr != KERN_SUCCESS) {
5404 /*
5405 * A CPM object doesn't have can_persist set,
5406 * so all we have to do is deallocate it to
5407 * free up these pages.
5408 */
5409 assert(cpm_obj->pager_created == FALSE);
5410 assert(cpm_obj->can_persist == FALSE);
5411 assert(cpm_obj->pageout == FALSE);
5412 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5413 vm_object_deallocate(cpm_obj); /* kill acquired ref */
5414 vm_object_deallocate(cpm_obj); /* kill creation ref */
5415 }
5416
5417 /*
5418 * Inform the physical mapping system that the
5419 * range of addresses may not fault, so that
5420 * page tables and such can be locked down as well.
5421 */
5422 start = *addr;
5423 end = start + size;
5424 pmap = vm_map_pmap(map);
5425 pmap_pageable(pmap, start, end, FALSE);
5426
5427 /*
5428 * Enter each page into the pmap, to avoid faults.
5429 * Note that this loop could be coded more efficiently,
5430 * if the need arose, rather than looking up each page
5431 * again.
5432 */
5433 for (offset = 0, va = start; offset < size;
5434 va += PAGE_SIZE, offset += PAGE_SIZE) {
5435 int type_of_fault;
5436
5437 vm_object_lock(cpm_obj);
5438 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5439 assert(m != VM_PAGE_NULL);
5440
5441 vm_page_zero_fill(m);
5442
5443 type_of_fault = DBG_ZERO_FILL_FAULT;
5444
5445 vm_fault_enter(m, pmap, va,
5446 PAGE_SIZE, 0,
5447 VM_PROT_ALL, VM_PROT_WRITE,
5448 VM_PAGE_WIRED(m),
5449 FALSE, /* change_wiring */
5450 VM_KERN_MEMORY_NONE, /* tag - not wiring */
5451 FALSE, /* no_cache */
5452 FALSE, /* cs_bypass */
5453 0, /* user_tag */
5454 0, /* pmap_options */
5455 NULL, /* need_retry */
5456 &type_of_fault);
5457
5458 vm_object_unlock(cpm_obj);
5459 }
5460
5461 #if MACH_ASSERT
5462 /*
5463 * Verify ordering in address space.
5464 */
5465 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5466 vm_object_lock(cpm_obj);
5467 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5468 vm_object_unlock(cpm_obj);
5469 if (m == VM_PAGE_NULL) {
5470 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
5471 cpm_obj, (uint64_t)offset);
5472 }
5473 assert(m->vmp_tabled);
5474 assert(!m->vmp_busy);
5475 assert(!m->vmp_wanted);
5476 assert(!m->vmp_fictitious);
5477 assert(!m->vmp_private);
5478 assert(!m->vmp_absent);
5479 assert(!m->vmp_error);
5480 assert(!m->vmp_cleaning);
5481 assert(!m->vmp_laundry);
5482 assert(!m->vmp_precious);
5483 assert(!m->vmp_clustered);
5484 if (offset != 0) {
5485 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5486 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5487 (uint64_t)start, (uint64_t)end, (uint64_t)va);
5488 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5489 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5490 panic("vm_allocate_cpm: pages not contig!");
5491 }
5492 }
5493 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5494 }
5495 #endif /* MACH_ASSERT */
5496
5497 vm_object_deallocate(cpm_obj); /* kill extra ref */
5498
5499 return kr;
5500 }
5501
5502
5503 #else /* VM_CPM */
5504
5505 /*
5506 * Interface is defined in all cases, but unless the kernel
5507 * is built explicitly for this option, the interface does
5508 * nothing.
5509 */
5510
5511 kern_return_t
5512 vm_map_enter_cpm(
5513 __unused vm_map_t map,
5514 __unused vm_map_offset_t *addr,
5515 __unused vm_map_size_t size,
5516 __unused int flags)
5517 {
5518 return KERN_FAILURE;
5519 }
5520 #endif /* VM_CPM */
5521
5522 /* Not used without nested pmaps */
5523 #ifndef NO_NESTED_PMAP
5524 /*
5525 * Clip and unnest a portion of a nested submap mapping.
5526 */
5527
5528
5529 static void
5530 vm_map_clip_unnest(
5531 vm_map_t map,
5532 vm_map_entry_t entry,
5533 vm_map_offset_t start_unnest,
5534 vm_map_offset_t end_unnest)
5535 {
5536 vm_map_offset_t old_start_unnest = start_unnest;
5537 vm_map_offset_t old_end_unnest = end_unnest;
5538
5539 assert(entry->is_sub_map);
5540 assert(VME_SUBMAP(entry) != NULL);
5541 assert(entry->use_pmap);
5542
5543 /*
5544 * Query the platform for the optimal unnest range.
5545 * DRK: There's some duplication of effort here, since
5546 * callers may have adjusted the range to some extent. This
5547 * routine was introduced to support 1GiB subtree nesting
5548 * for x86 platforms, which can also nest on 2MiB boundaries
5549 * depending on size/alignment.
5550 */
5551 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5552 assert(VME_SUBMAP(entry)->is_nested_map);
5553 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5554 log_unnest_badness(map,
5555 old_start_unnest,
5556 old_end_unnest,
5557 VME_SUBMAP(entry)->is_nested_map,
5558 (entry->vme_start +
5559 VME_SUBMAP(entry)->lowest_unnestable_start -
5560 VME_OFFSET(entry)));
5561 }
5562
5563 if (entry->vme_start > start_unnest ||
5564 entry->vme_end < end_unnest) {
5565 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5566 "bad nested entry: start=0x%llx end=0x%llx\n",
5567 (long long)start_unnest, (long long)end_unnest,
5568 (long long)entry->vme_start, (long long)entry->vme_end);
5569 }
5570
5571 if (start_unnest > entry->vme_start) {
5572 _vm_map_clip_start(&map->hdr,
5573 entry,
5574 start_unnest);
5575 if (map->holelistenabled) {
5576 vm_map_store_update_first_free(map, NULL, FALSE);
5577 } else {
5578 vm_map_store_update_first_free(map, map->first_free, FALSE);
5579 }
5580 }
5581 if (entry->vme_end > end_unnest) {
5582 _vm_map_clip_end(&map->hdr,
5583 entry,
5584 end_unnest);
5585 if (map->holelistenabled) {
5586 vm_map_store_update_first_free(map, NULL, FALSE);
5587 } else {
5588 vm_map_store_update_first_free(map, map->first_free, FALSE);
5589 }
5590 }
5591
5592 pmap_unnest(map->pmap,
5593 entry->vme_start,
5594 entry->vme_end - entry->vme_start);
5595 if ((map->mapped_in_other_pmaps) && os_ref_get_count(&map->map_refcnt) != 0) {
5596 /* clean up parent map/maps */
5597 vm_map_submap_pmap_clean(
5598 map, entry->vme_start,
5599 entry->vme_end,
5600 VME_SUBMAP(entry),
5601 VME_OFFSET(entry));
5602 }
5603 entry->use_pmap = FALSE;
5604 if ((map->pmap != kernel_pmap) &&
5605 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5606 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5607 }
5608 }
5609 #endif /* NO_NESTED_PMAP */
5610
5611 /*
5612 * vm_map_clip_start: [ internal use only ]
5613 *
5614 * Asserts that the given entry begins at or after
5615 * the specified address; if necessary,
5616 * it splits the entry into two.
5617 */
5618 void
5619 vm_map_clip_start(
5620 vm_map_t map,
5621 vm_map_entry_t entry,
5622 vm_map_offset_t startaddr)
5623 {
5624 #ifndef NO_NESTED_PMAP
5625 if (entry->is_sub_map &&
5626 entry->use_pmap &&
5627 startaddr >= entry->vme_start) {
5628 vm_map_offset_t start_unnest, end_unnest;
5629
5630 /*
5631 * Make sure "startaddr" is no longer in a nested range
5632 * before we clip. Unnest only the minimum range the platform
5633 * can handle.
5634 * vm_map_clip_unnest may perform additional adjustments to
5635 * the unnest range.
5636 */
5637 start_unnest = startaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
5638 end_unnest = start_unnest + pmap_shared_region_size_min(map->pmap);
5639 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5640 }
5641 #endif /* NO_NESTED_PMAP */
5642 if (startaddr > entry->vme_start) {
5643 if (VME_OBJECT(entry) &&
5644 !entry->is_sub_map &&
5645 VME_OBJECT(entry)->phys_contiguous) {
5646 pmap_remove(map->pmap,
5647 (addr64_t)(entry->vme_start),
5648 (addr64_t)(entry->vme_end));
5649 }
5650 if (entry->vme_atomic) {
5651 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5652 }
5653
5654 DTRACE_VM5(
5655 vm_map_clip_start,
5656 vm_map_t, map,
5657 vm_map_offset_t, entry->vme_start,
5658 vm_map_offset_t, entry->vme_end,
5659 vm_map_offset_t, startaddr,
5660 int, VME_ALIAS(entry));
5661
5662 _vm_map_clip_start(&map->hdr, entry, startaddr);
5663 if (map->holelistenabled) {
5664 vm_map_store_update_first_free(map, NULL, FALSE);
5665 } else {
5666 vm_map_store_update_first_free(map, map->first_free, FALSE);
5667 }
5668 }
5669 }
5670
5671
5672 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5673 MACRO_BEGIN \
5674 if ((startaddr) > (entry)->vme_start) \
5675 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5676 MACRO_END
5677
5678 /*
5679 * This routine is called only when it is known that
5680 * the entry must be split.
5681 */
5682 static void
5683 _vm_map_clip_start(
5684 struct vm_map_header *map_header,
5685 vm_map_entry_t entry,
5686 vm_map_offset_t start)
5687 {
5688 vm_map_entry_t new_entry;
5689
5690 /*
5691 * Split off the front portion --
5692 * note that we must insert the new
5693 * entry BEFORE this one, so that
5694 * this entry has the specified starting
5695 * address.
5696 */
5697
5698 if (entry->map_aligned) {
5699 assert(VM_MAP_PAGE_ALIGNED(start,
5700 VM_MAP_HDR_PAGE_MASK(map_header)));
5701 }
5702
5703 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5704 vm_map_entry_copy_full(new_entry, entry);
5705
5706 new_entry->vme_end = start;
5707 assert(new_entry->vme_start < new_entry->vme_end);
5708 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5709 assert(start < entry->vme_end);
5710 entry->vme_start = start;
5711
5712 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5713
5714 if (entry->is_sub_map) {
5715 vm_map_reference(VME_SUBMAP(new_entry));
5716 } else {
5717 vm_object_reference(VME_OBJECT(new_entry));
5718 }
5719 }
5720
5721
5722 /*
5723 * vm_map_clip_end: [ internal use only ]
5724 *
5725 * Asserts that the given entry ends at or before
5726 * the specified address; if necessary,
5727 * it splits the entry into two.
5728 */
5729 void
5730 vm_map_clip_end(
5731 vm_map_t map,
5732 vm_map_entry_t entry,
5733 vm_map_offset_t endaddr)
5734 {
5735 if (endaddr > entry->vme_end) {
5736 /*
5737 * Within the scope of this clipping, limit "endaddr" to
5738 * the end of this map entry...
5739 */
5740 endaddr = entry->vme_end;
5741 }
5742 #ifndef NO_NESTED_PMAP
5743 if (entry->is_sub_map && entry->use_pmap) {
5744 vm_map_offset_t start_unnest, end_unnest;
5745
5746 /*
5747 * Make sure the range between the start of this entry and
5748 * the new "endaddr" is no longer nested before we clip.
5749 * Unnest only the minimum range the platform can handle.
5750 * vm_map_clip_unnest may perform additional adjustments to
5751 * the unnest range.
5752 */
5753 start_unnest = entry->vme_start;
5754 end_unnest =
5755 (endaddr + pmap_shared_region_size_min(map->pmap) - 1) &
5756 ~(pmap_shared_region_size_min(map->pmap) - 1);
5757 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5758 }
5759 #endif /* NO_NESTED_PMAP */
5760 if (endaddr < entry->vme_end) {
5761 if (VME_OBJECT(entry) &&
5762 !entry->is_sub_map &&
5763 VME_OBJECT(entry)->phys_contiguous) {
5764 pmap_remove(map->pmap,
5765 (addr64_t)(entry->vme_start),
5766 (addr64_t)(entry->vme_end));
5767 }
5768 if (entry->vme_atomic) {
5769 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5770 }
5771 DTRACE_VM5(
5772 vm_map_clip_end,
5773 vm_map_t, map,
5774 vm_map_offset_t, entry->vme_start,
5775 vm_map_offset_t, entry->vme_end,
5776 vm_map_offset_t, endaddr,
5777 int, VME_ALIAS(entry));
5778
5779 _vm_map_clip_end(&map->hdr, entry, endaddr);
5780 if (map->holelistenabled) {
5781 vm_map_store_update_first_free(map, NULL, FALSE);
5782 } else {
5783 vm_map_store_update_first_free(map, map->first_free, FALSE);
5784 }
5785 }
5786 }
5787
5788
5789 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5790 MACRO_BEGIN \
5791 if ((endaddr) < (entry)->vme_end) \
5792 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5793 MACRO_END
5794
5795 /*
5796 * This routine is called only when it is known that
5797 * the entry must be split.
5798 */
5799 static void
5800 _vm_map_clip_end(
5801 struct vm_map_header *map_header,
5802 vm_map_entry_t entry,
5803 vm_map_offset_t end)
5804 {
5805 vm_map_entry_t new_entry;
5806
5807 /*
5808 * Create a new entry and insert it
5809 * AFTER the specified entry
5810 */
5811
5812 if (entry->map_aligned) {
5813 assert(VM_MAP_PAGE_ALIGNED(end,
5814 VM_MAP_HDR_PAGE_MASK(map_header)));
5815 }
5816
5817 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5818 vm_map_entry_copy_full(new_entry, entry);
5819
5820 assert(entry->vme_start < end);
5821 new_entry->vme_start = entry->vme_end = end;
5822 VME_OFFSET_SET(new_entry,
5823 VME_OFFSET(new_entry) + (end - entry->vme_start));
5824 assert(new_entry->vme_start < new_entry->vme_end);
5825
5826 _vm_map_store_entry_link(map_header, entry, new_entry);
5827
5828 if (entry->is_sub_map) {
5829 vm_map_reference(VME_SUBMAP(new_entry));
5830 } else {
5831 vm_object_reference(VME_OBJECT(new_entry));
5832 }
5833 }
5834
5835
5836 /*
5837 * VM_MAP_RANGE_CHECK: [ internal use only ]
5838 *
5839 * Asserts that the starting and ending region
5840 * addresses fall within the valid range of the map.
5841 */
5842 #define VM_MAP_RANGE_CHECK(map, start, end) \
5843 MACRO_BEGIN \
5844 if (start < vm_map_min(map)) \
5845 start = vm_map_min(map); \
5846 if (end > vm_map_max(map)) \
5847 end = vm_map_max(map); \
5848 if (start > end) \
5849 start = end; \
5850 MACRO_END
5851
5852 /*
5853 * vm_map_range_check: [ internal use only ]
5854 *
5855 * Check that the region defined by the specified start and
5856 * end addresses are wholly contained within a single map
5857 * entry or set of adjacent map entries of the spacified map,
5858 * i.e. the specified region contains no unmapped space.
5859 * If any or all of the region is unmapped, FALSE is returned.
5860 * Otherwise, TRUE is returned and if the output argument 'entry'
5861 * is not NULL it points to the map entry containing the start
5862 * of the region.
5863 *
5864 * The map is locked for reading on entry and is left locked.
5865 */
5866 static boolean_t
5867 vm_map_range_check(
5868 vm_map_t map,
5869 vm_map_offset_t start,
5870 vm_map_offset_t end,
5871 vm_map_entry_t *entry)
5872 {
5873 vm_map_entry_t cur;
5874 vm_map_offset_t prev;
5875
5876 /*
5877 * Basic sanity checks first
5878 */
5879 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5880 return FALSE;
5881 }
5882
5883 /*
5884 * Check first if the region starts within a valid
5885 * mapping for the map.
5886 */
5887 if (!vm_map_lookup_entry(map, start, &cur)) {
5888 return FALSE;
5889 }
5890
5891 /*
5892 * Optimize for the case that the region is contained
5893 * in a single map entry.
5894 */
5895 if (entry != (vm_map_entry_t *) NULL) {
5896 *entry = cur;
5897 }
5898 if (end <= cur->vme_end) {
5899 return TRUE;
5900 }
5901
5902 /*
5903 * If the region is not wholly contained within a
5904 * single entry, walk the entries looking for holes.
5905 */
5906 prev = cur->vme_end;
5907 cur = cur->vme_next;
5908 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5909 if (end <= cur->vme_end) {
5910 return TRUE;
5911 }
5912 prev = cur->vme_end;
5913 cur = cur->vme_next;
5914 }
5915 return FALSE;
5916 }
5917
5918 /*
5919 * vm_map_submap: [ kernel use only ]
5920 *
5921 * Mark the given range as handled by a subordinate map.
5922 *
5923 * This range must have been created with vm_map_find using
5924 * the vm_submap_object, and no other operations may have been
5925 * performed on this range prior to calling vm_map_submap.
5926 *
5927 * Only a limited number of operations can be performed
5928 * within this rage after calling vm_map_submap:
5929 * vm_fault
5930 * [Don't try vm_map_copyin!]
5931 *
5932 * To remove a submapping, one must first remove the
5933 * range from the superior map, and then destroy the
5934 * submap (if desired). [Better yet, don't try it.]
5935 */
5936 kern_return_t
5937 vm_map_submap(
5938 vm_map_t map,
5939 vm_map_offset_t start,
5940 vm_map_offset_t end,
5941 vm_map_t submap,
5942 vm_map_offset_t offset,
5943 #ifdef NO_NESTED_PMAP
5944 __unused
5945 #endif /* NO_NESTED_PMAP */
5946 boolean_t use_pmap)
5947 {
5948 vm_map_entry_t entry;
5949 kern_return_t result = KERN_INVALID_ARGUMENT;
5950 vm_object_t object;
5951
5952 vm_map_lock(map);
5953
5954 if (!vm_map_lookup_entry(map, start, &entry)) {
5955 entry = entry->vme_next;
5956 }
5957
5958 if (entry == vm_map_to_entry(map) ||
5959 entry->is_sub_map) {
5960 vm_map_unlock(map);
5961 return KERN_INVALID_ARGUMENT;
5962 }
5963
5964 vm_map_clip_start(map, entry, start);
5965 vm_map_clip_end(map, entry, end);
5966
5967 if ((entry->vme_start == start) && (entry->vme_end == end) &&
5968 (!entry->is_sub_map) &&
5969 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5970 (object->resident_page_count == 0) &&
5971 (object->copy == VM_OBJECT_NULL) &&
5972 (object->shadow == VM_OBJECT_NULL) &&
5973 (!object->pager_created)) {
5974 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5975 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5976 vm_object_deallocate(object);
5977 entry->is_sub_map = TRUE;
5978 entry->use_pmap = FALSE;
5979 VME_SUBMAP_SET(entry, submap);
5980 vm_map_reference(submap);
5981 if (submap->mapped_in_other_pmaps == FALSE &&
5982 vm_map_pmap(submap) != PMAP_NULL &&
5983 vm_map_pmap(submap) != vm_map_pmap(map)) {
5984 /*
5985 * This submap is being mapped in a map
5986 * that uses a different pmap.
5987 * Set its "mapped_in_other_pmaps" flag
5988 * to indicate that we now need to
5989 * remove mappings from all pmaps rather
5990 * than just the submap's pmap.
5991 */
5992 submap->mapped_in_other_pmaps = TRUE;
5993 }
5994
5995 #ifndef NO_NESTED_PMAP
5996 if (use_pmap) {
5997 /* nest if platform code will allow */
5998 if (submap->pmap == NULL) {
5999 ledger_t ledger = map->pmap->ledger;
6000 submap->pmap = pmap_create_options(ledger,
6001 (vm_map_size_t) 0, 0);
6002 if (submap->pmap == PMAP_NULL) {
6003 vm_map_unlock(map);
6004 return KERN_NO_SPACE;
6005 }
6006 #if defined(__arm__) || defined(__arm64__)
6007 pmap_set_nested(submap->pmap);
6008 #endif
6009 }
6010 result = pmap_nest(map->pmap,
6011 (VME_SUBMAP(entry))->pmap,
6012 (addr64_t)start,
6013 (uint64_t)(end - start));
6014 if (result) {
6015 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
6016 }
6017 entry->use_pmap = TRUE;
6018 }
6019 #else /* NO_NESTED_PMAP */
6020 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
6021 #endif /* NO_NESTED_PMAP */
6022 result = KERN_SUCCESS;
6023 }
6024 vm_map_unlock(map);
6025
6026 return result;
6027 }
6028
6029 /*
6030 * vm_map_protect:
6031 *
6032 * Sets the protection of the specified address
6033 * region in the target map. If "set_max" is
6034 * specified, the maximum protection is to be set;
6035 * otherwise, only the current protection is affected.
6036 */
6037 kern_return_t
6038 vm_map_protect(
6039 vm_map_t map,
6040 vm_map_offset_t start,
6041 vm_map_offset_t end,
6042 vm_prot_t new_prot,
6043 boolean_t set_max)
6044 {
6045 vm_map_entry_t current;
6046 vm_map_offset_t prev;
6047 vm_map_entry_t entry;
6048 vm_prot_t new_max;
6049 int pmap_options = 0;
6050 kern_return_t kr;
6051
6052 if (new_prot & VM_PROT_COPY) {
6053 vm_map_offset_t new_start;
6054 vm_prot_t cur_prot, max_prot;
6055 vm_map_kernel_flags_t kflags;
6056
6057 /* LP64todo - see below */
6058 if (start >= map->max_offset) {
6059 return KERN_INVALID_ADDRESS;
6060 }
6061
6062 if ((new_prot & VM_PROT_EXECUTE) &&
6063 map->pmap != kernel_pmap &&
6064 (vm_map_cs_enforcement(map)
6065 #if XNU_TARGET_OS_OSX && __arm64__
6066 || !VM_MAP_IS_EXOTIC(map)
6067 #endif /* XNU_TARGET_OS_OSX && __arm64__ */
6068 ) &&
6069 VM_MAP_POLICY_WX_FAIL(map)) {
6070 DTRACE_VM3(cs_wx,
6071 uint64_t, (uint64_t) start,
6072 uint64_t, (uint64_t) end,
6073 vm_prot_t, new_prot);
6074 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
6075 proc_selfpid(),
6076 (current_task()->bsd_info
6077 ? proc_name_address(current_task()->bsd_info)
6078 : "?"),
6079 __FUNCTION__);
6080 return KERN_PROTECTION_FAILURE;
6081 }
6082
6083 /*
6084 * Let vm_map_remap_extract() know that it will need to:
6085 * + make a copy of the mapping
6086 * + add VM_PROT_WRITE to the max protections
6087 * + remove any protections that are no longer allowed from the
6088 * max protections (to avoid any WRITE/EXECUTE conflict, for
6089 * example).
6090 * Note that "max_prot" is an IN/OUT parameter only for this
6091 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
6092 * only.
6093 */
6094 max_prot = new_prot & VM_PROT_ALL;
6095 kflags = VM_MAP_KERNEL_FLAGS_NONE;
6096 kflags.vmkf_remap_prot_copy = TRUE;
6097 kflags.vmkf_overwrite_immutable = TRUE;
6098 new_start = start;
6099 kr = vm_map_remap(map,
6100 &new_start,
6101 end - start,
6102 0, /* mask */
6103 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
6104 kflags,
6105 0,
6106 map,
6107 start,
6108 TRUE, /* copy-on-write remapping! */
6109 &cur_prot,
6110 &max_prot,
6111 VM_INHERIT_DEFAULT);
6112 if (kr != KERN_SUCCESS) {
6113 return kr;
6114 }
6115 new_prot &= ~VM_PROT_COPY;
6116 }
6117
6118 vm_map_lock(map);
6119
6120 /* LP64todo - remove this check when vm_map_commpage64()
6121 * no longer has to stuff in a map_entry for the commpage
6122 * above the map's max_offset.
6123 */
6124 if (start >= map->max_offset) {
6125 vm_map_unlock(map);
6126 return KERN_INVALID_ADDRESS;
6127 }
6128
6129 while (1) {
6130 /*
6131 * Lookup the entry. If it doesn't start in a valid
6132 * entry, return an error.
6133 */
6134 if (!vm_map_lookup_entry(map, start, &entry)) {
6135 vm_map_unlock(map);
6136 return KERN_INVALID_ADDRESS;
6137 }
6138
6139 if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
6140 start = SUPERPAGE_ROUND_DOWN(start);
6141 continue;
6142 }
6143 break;
6144 }
6145 if (entry->superpage_size) {
6146 end = SUPERPAGE_ROUND_UP(end);
6147 }
6148
6149 /*
6150 * Make a first pass to check for protection and address
6151 * violations.
6152 */
6153
6154 current = entry;
6155 prev = current->vme_start;
6156 while ((current != vm_map_to_entry(map)) &&
6157 (current->vme_start < end)) {
6158 /*
6159 * If there is a hole, return an error.
6160 */
6161 if (current->vme_start != prev) {
6162 vm_map_unlock(map);
6163 return KERN_INVALID_ADDRESS;
6164 }
6165
6166 new_max = current->max_protection;
6167 #if PMAP_CS
6168 if (set_max && (new_prot & VM_PROT_EXECUTE) && pmap_cs_exempt(map->pmap)) {
6169 new_max |= VM_PROT_EXECUTE;
6170 }
6171 #endif
6172 if ((new_prot & new_max) != new_prot) {
6173 vm_map_unlock(map);
6174 return KERN_PROTECTION_FAILURE;
6175 }
6176
6177 if ((new_prot & VM_PROT_WRITE) &&
6178 (new_prot & VM_PROT_EXECUTE) &&
6179 #if XNU_TARGET_OS_OSX
6180 map->pmap != kernel_pmap &&
6181 (vm_map_cs_enforcement(map)
6182 #if __arm64__
6183 || !VM_MAP_IS_EXOTIC(map)
6184 #endif /* __arm64__ */
6185 ) &&
6186 #endif /* XNU_TARGET_OS_OSX */
6187 #if PMAP_CS
6188 !pmap_cs_exempt(map->pmap) &&
6189 #endif
6190 !(current->used_for_jit)) {
6191 DTRACE_VM3(cs_wx,
6192 uint64_t, (uint64_t) current->vme_start,
6193 uint64_t, (uint64_t) current->vme_end,
6194 vm_prot_t, new_prot);
6195 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
6196 proc_selfpid(),
6197 (current_task()->bsd_info
6198 ? proc_name_address(current_task()->bsd_info)
6199 : "?"),
6200 __FUNCTION__);
6201 new_prot &= ~VM_PROT_EXECUTE;
6202 if (VM_MAP_POLICY_WX_FAIL(map)) {
6203 vm_map_unlock(map);
6204 return KERN_PROTECTION_FAILURE;
6205 }
6206 }
6207
6208 /*
6209 * If the task has requested executable lockdown,
6210 * deny both:
6211 * - adding executable protections OR
6212 * - adding write protections to an existing executable mapping.
6213 */
6214 if (map->map_disallow_new_exec == TRUE) {
6215 if ((new_prot & VM_PROT_EXECUTE) ||
6216 ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
6217 vm_map_unlock(map);
6218 return KERN_PROTECTION_FAILURE;
6219 }
6220 }
6221
6222 prev = current->vme_end;
6223 current = current->vme_next;
6224 }
6225
6226 #if __arm64__
6227 if (end > prev &&
6228 end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
6229 vm_map_entry_t prev_entry;
6230
6231 prev_entry = current->vme_prev;
6232 if (prev_entry != vm_map_to_entry(map) &&
6233 !prev_entry->map_aligned &&
6234 (vm_map_round_page(prev_entry->vme_end,
6235 VM_MAP_PAGE_MASK(map))
6236 == end)) {
6237 /*
6238 * The last entry in our range is not "map-aligned"
6239 * but it would have reached all the way to "end"
6240 * if it had been map-aligned, so this is not really
6241 * a hole in the range and we can proceed.
6242 */
6243 prev = end;
6244 }
6245 }
6246 #endif /* __arm64__ */
6247
6248 if (end > prev) {
6249 vm_map_unlock(map);
6250 return KERN_INVALID_ADDRESS;
6251 }
6252
6253 /*
6254 * Go back and fix up protections.
6255 * Clip to start here if the range starts within
6256 * the entry.
6257 */
6258
6259 current = entry;
6260 if (current != vm_map_to_entry(map)) {
6261 /* clip and unnest if necessary */
6262 vm_map_clip_start(map, current, start);
6263 }
6264
6265 while ((current != vm_map_to_entry(map)) &&
6266 (current->vme_start < end)) {
6267 vm_prot_t old_prot;
6268
6269 vm_map_clip_end(map, current, end);
6270
6271 if (current->is_sub_map) {
6272 /* clipping did unnest if needed */
6273 assert(!current->use_pmap);
6274 }
6275
6276 old_prot = current->protection;
6277
6278 if (set_max) {
6279 current->max_protection = new_prot;
6280 current->protection = new_prot & old_prot;
6281 } else {
6282 current->protection = new_prot;
6283 }
6284
6285 /*
6286 * Update physical map if necessary.
6287 * If the request is to turn off write protection,
6288 * we won't do it for real (in pmap). This is because
6289 * it would cause copy-on-write to fail. We've already
6290 * set, the new protection in the map, so if a
6291 * write-protect fault occurred, it will be fixed up
6292 * properly, COW or not.
6293 */
6294 if (current->protection != old_prot) {
6295 /* Look one level in we support nested pmaps */
6296 /* from mapped submaps which are direct entries */
6297 /* in our map */
6298
6299 vm_prot_t prot;
6300
6301 prot = current->protection;
6302 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
6303 prot &= ~VM_PROT_WRITE;
6304 } else {
6305 assert(!VME_OBJECT(current)->code_signed);
6306 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
6307 }
6308
6309 if (override_nx(map, VME_ALIAS(current)) && prot) {
6310 prot |= VM_PROT_EXECUTE;
6311 }
6312
6313 #if DEVELOPMENT || DEBUG
6314 if (!(old_prot & VM_PROT_EXECUTE) &&
6315 (prot & VM_PROT_EXECUTE) &&
6316 panic_on_unsigned_execute &&
6317 (proc_selfcsflags() & CS_KILL)) {
6318 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
6319 }
6320 #endif /* DEVELOPMENT || DEBUG */
6321
6322 if (pmap_has_prot_policy(map->pmap, current->translated_allow_execute, prot)) {
6323 if (current->wired_count) {
6324 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
6325 map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
6326 }
6327
6328 /* If the pmap layer cares about this
6329 * protection type, force a fault for
6330 * each page so that vm_fault will
6331 * repopulate the page with the full
6332 * set of protections.
6333 */
6334 /*
6335 * TODO: We don't seem to need this,
6336 * but this is due to an internal
6337 * implementation detail of
6338 * pmap_protect. Do we want to rely
6339 * on this?
6340 */
6341 prot = VM_PROT_NONE;
6342 }
6343
6344 if (current->is_sub_map && current->use_pmap) {
6345 pmap_protect(VME_SUBMAP(current)->pmap,
6346 current->vme_start,
6347 current->vme_end,
6348 prot);
6349 } else {
6350 if (prot & VM_PROT_WRITE) {
6351 if (VME_OBJECT(current) == compressor_object) {
6352 /*
6353 * For write requests on the
6354 * compressor, we wil ask the
6355 * pmap layer to prevent us from
6356 * taking a write fault when we
6357 * attempt to access the mapping
6358 * next.
6359 */
6360 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6361 }
6362 }
6363
6364 pmap_protect_options(map->pmap,
6365 current->vme_start,
6366 current->vme_end,
6367 prot,
6368 pmap_options,
6369 NULL);
6370 }
6371 }
6372 current = current->vme_next;
6373 }
6374
6375 current = entry;
6376 while ((current != vm_map_to_entry(map)) &&
6377 (current->vme_start <= end)) {
6378 vm_map_simplify_entry(map, current);
6379 current = current->vme_next;
6380 }
6381
6382 vm_map_unlock(map);
6383 return KERN_SUCCESS;
6384 }
6385
6386 /*
6387 * vm_map_inherit:
6388 *
6389 * Sets the inheritance of the specified address
6390 * range in the target map. Inheritance
6391 * affects how the map will be shared with
6392 * child maps at the time of vm_map_fork.
6393 */
6394 kern_return_t
6395 vm_map_inherit(
6396 vm_map_t map,
6397 vm_map_offset_t start,
6398 vm_map_offset_t end,
6399 vm_inherit_t new_inheritance)
6400 {
6401 vm_map_entry_t entry;
6402 vm_map_entry_t temp_entry;
6403
6404 vm_map_lock(map);
6405
6406 VM_MAP_RANGE_CHECK(map, start, end);
6407
6408 if (vm_map_lookup_entry(map, start, &temp_entry)) {
6409 entry = temp_entry;
6410 } else {
6411 temp_entry = temp_entry->vme_next;
6412 entry = temp_entry;
6413 }
6414
6415 /* first check entire range for submaps which can't support the */
6416 /* given inheritance. */
6417 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6418 if (entry->is_sub_map) {
6419 if (new_inheritance == VM_INHERIT_COPY) {
6420 vm_map_unlock(map);
6421 return KERN_INVALID_ARGUMENT;
6422 }
6423 }
6424
6425 entry = entry->vme_next;
6426 }
6427
6428 entry = temp_entry;
6429 if (entry != vm_map_to_entry(map)) {
6430 /* clip and unnest if necessary */
6431 vm_map_clip_start(map, entry, start);
6432 }
6433
6434 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6435 vm_map_clip_end(map, entry, end);
6436 if (entry->is_sub_map) {
6437 /* clip did unnest if needed */
6438 assert(!entry->use_pmap);
6439 }
6440
6441 entry->inheritance = new_inheritance;
6442
6443 entry = entry->vme_next;
6444 }
6445
6446 vm_map_unlock(map);
6447 return KERN_SUCCESS;
6448 }
6449
6450 /*
6451 * Update the accounting for the amount of wired memory in this map. If the user has
6452 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6453 */
6454
6455 static kern_return_t
6456 add_wire_counts(
6457 vm_map_t map,
6458 vm_map_entry_t entry,
6459 boolean_t user_wire)
6460 {
6461 vm_map_size_t size;
6462
6463 if (user_wire) {
6464 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
6465
6466 /*
6467 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6468 * this map entry.
6469 */
6470
6471 if (entry->user_wired_count == 0) {
6472 size = entry->vme_end - entry->vme_start;
6473
6474 /*
6475 * Since this is the first time the user is wiring this map entry, check to see if we're
6476 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6477 * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value. There is also
6478 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6479 * limit, then we fail.
6480 */
6481
6482 if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_per_task_user_wire_limit) ||
6483 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6484 if (size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6485 os_atomic_inc(&vm_add_wire_count_over_global_limit, relaxed);
6486 } else {
6487 os_atomic_inc(&vm_add_wire_count_over_user_limit, relaxed);
6488 }
6489 return KERN_RESOURCE_SHORTAGE;
6490 }
6491
6492 /*
6493 * The first time the user wires an entry, we also increment the wired_count and add this to
6494 * the total that has been wired in the map.
6495 */
6496
6497 if (entry->wired_count >= MAX_WIRE_COUNT) {
6498 return KERN_FAILURE;
6499 }
6500
6501 entry->wired_count++;
6502 map->user_wire_size += size;
6503 }
6504
6505 if (entry->user_wired_count >= MAX_WIRE_COUNT) {
6506 return KERN_FAILURE;
6507 }
6508
6509 entry->user_wired_count++;
6510 } else {
6511 /*
6512 * The kernel's wiring the memory. Just bump the count and continue.
6513 */
6514
6515 if (entry->wired_count >= MAX_WIRE_COUNT) {
6516 panic("vm_map_wire: too many wirings");
6517 }
6518
6519 entry->wired_count++;
6520 }
6521
6522 return KERN_SUCCESS;
6523 }
6524
6525 /*
6526 * Update the memory wiring accounting now that the given map entry is being unwired.
6527 */
6528
6529 static void
6530 subtract_wire_counts(
6531 vm_map_t map,
6532 vm_map_entry_t entry,
6533 boolean_t user_wire)
6534 {
6535 if (user_wire) {
6536 /*
6537 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6538 */
6539
6540 if (entry->user_wired_count == 1) {
6541 /*
6542 * We're removing the last user wire reference. Decrement the wired_count and the total
6543 * user wired memory for this map.
6544 */
6545
6546 assert(entry->wired_count >= 1);
6547 entry->wired_count--;
6548 map->user_wire_size -= entry->vme_end - entry->vme_start;
6549 }
6550
6551 assert(entry->user_wired_count >= 1);
6552 entry->user_wired_count--;
6553 } else {
6554 /*
6555 * The kernel is unwiring the memory. Just update the count.
6556 */
6557
6558 assert(entry->wired_count >= 1);
6559 entry->wired_count--;
6560 }
6561 }
6562
6563 int cs_executable_wire = 0;
6564
6565 /*
6566 * vm_map_wire:
6567 *
6568 * Sets the pageability of the specified address range in the
6569 * target map as wired. Regions specified as not pageable require
6570 * locked-down physical memory and physical page maps. The
6571 * access_type variable indicates types of accesses that must not
6572 * generate page faults. This is checked against protection of
6573 * memory being locked-down.
6574 *
6575 * The map must not be locked, but a reference must remain to the
6576 * map throughout the call.
6577 */
6578 static kern_return_t
6579 vm_map_wire_nested(
6580 vm_map_t map,
6581 vm_map_offset_t start,
6582 vm_map_offset_t end,
6583 vm_prot_t caller_prot,
6584 vm_tag_t tag,
6585 boolean_t user_wire,
6586 pmap_t map_pmap,
6587 vm_map_offset_t pmap_addr,
6588 ppnum_t *physpage_p)
6589 {
6590 vm_map_entry_t entry;
6591 vm_prot_t access_type;
6592 struct vm_map_entry *first_entry, tmp_entry;
6593 vm_map_t real_map;
6594 vm_map_offset_t s, e;
6595 kern_return_t rc;
6596 boolean_t need_wakeup;
6597 boolean_t main_map = FALSE;
6598 wait_interrupt_t interruptible_state;
6599 thread_t cur_thread;
6600 unsigned int last_timestamp;
6601 vm_map_size_t size;
6602 boolean_t wire_and_extract;
6603 vm_prot_t extra_prots;
6604
6605 extra_prots = VM_PROT_COPY;
6606 extra_prots |= VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6607 #if XNU_TARGET_OS_OSX
6608 if (map->pmap == kernel_pmap ||
6609 !vm_map_cs_enforcement(map)) {
6610 extra_prots &= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6611 }
6612 #endif /* XNU_TARGET_OS_OSX */
6613 #if PMAP_CS
6614 if (pmap_cs_exempt(map->pmap)) {
6615 extra_prots &= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6616 }
6617 #endif /* PMAP_CS */
6618
6619 access_type = (caller_prot & VM_PROT_ALL);
6620
6621 wire_and_extract = FALSE;
6622 if (physpage_p != NULL) {
6623 /*
6624 * The caller wants the physical page number of the
6625 * wired page. We return only one physical page number
6626 * so this works for only one page at a time.
6627 */
6628 if ((end - start) != PAGE_SIZE) {
6629 return KERN_INVALID_ARGUMENT;
6630 }
6631 wire_and_extract = TRUE;
6632 *physpage_p = 0;
6633 }
6634
6635 vm_map_lock(map);
6636 if (map_pmap == NULL) {
6637 main_map = TRUE;
6638 }
6639 last_timestamp = map->timestamp;
6640
6641 VM_MAP_RANGE_CHECK(map, start, end);
6642 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6643 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6644
6645 if (start == end) {
6646 /* We wired what the caller asked for, zero pages */
6647 vm_map_unlock(map);
6648 return KERN_SUCCESS;
6649 }
6650
6651 need_wakeup = FALSE;
6652 cur_thread = current_thread();
6653
6654 s = start;
6655 rc = KERN_SUCCESS;
6656
6657 if (vm_map_lookup_entry(map, s, &first_entry)) {
6658 entry = first_entry;
6659 /*
6660 * vm_map_clip_start will be done later.
6661 * We don't want to unnest any nested submaps here !
6662 */
6663 } else {
6664 /* Start address is not in map */
6665 rc = KERN_INVALID_ADDRESS;
6666 goto done;
6667 }
6668
6669 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6670 /*
6671 * At this point, we have wired from "start" to "s".
6672 * We still need to wire from "s" to "end".
6673 *
6674 * "entry" hasn't been clipped, so it could start before "s"
6675 * and/or end after "end".
6676 */
6677
6678 /* "e" is how far we want to wire in this entry */
6679 e = entry->vme_end;
6680 if (e > end) {
6681 e = end;
6682 }
6683
6684 /*
6685 * If another thread is wiring/unwiring this entry then
6686 * block after informing other thread to wake us up.
6687 */
6688 if (entry->in_transition) {
6689 wait_result_t wait_result;
6690
6691 /*
6692 * We have not clipped the entry. Make sure that
6693 * the start address is in range so that the lookup
6694 * below will succeed.
6695 * "s" is the current starting point: we've already
6696 * wired from "start" to "s" and we still have
6697 * to wire from "s" to "end".
6698 */
6699
6700 entry->needs_wakeup = TRUE;
6701
6702 /*
6703 * wake up anybody waiting on entries that we have
6704 * already wired.
6705 */
6706 if (need_wakeup) {
6707 vm_map_entry_wakeup(map);
6708 need_wakeup = FALSE;
6709 }
6710 /*
6711 * User wiring is interruptible
6712 */
6713 wait_result = vm_map_entry_wait(map,
6714 (user_wire) ? THREAD_ABORTSAFE :
6715 THREAD_UNINT);
6716 if (user_wire && wait_result == THREAD_INTERRUPTED) {
6717 /*
6718 * undo the wirings we have done so far
6719 * We do not clear the needs_wakeup flag,
6720 * because we cannot tell if we were the
6721 * only one waiting.
6722 */
6723 rc = KERN_FAILURE;
6724 goto done;
6725 }
6726
6727 /*
6728 * Cannot avoid a lookup here. reset timestamp.
6729 */
6730 last_timestamp = map->timestamp;
6731
6732 /*
6733 * The entry could have been clipped, look it up again.
6734 * Worse that can happen is, it may not exist anymore.
6735 */
6736 if (!vm_map_lookup_entry(map, s, &first_entry)) {
6737 /*
6738 * User: undo everything upto the previous
6739 * entry. let vm_map_unwire worry about
6740 * checking the validity of the range.
6741 */
6742 rc = KERN_FAILURE;
6743 goto done;
6744 }
6745 entry = first_entry;
6746 continue;
6747 }
6748
6749 if (entry->is_sub_map) {
6750 vm_map_offset_t sub_start;
6751 vm_map_offset_t sub_end;
6752 vm_map_offset_t local_start;
6753 vm_map_offset_t local_end;
6754 pmap_t pmap;
6755
6756 if (wire_and_extract) {
6757 /*
6758 * Wiring would result in copy-on-write
6759 * which would not be compatible with
6760 * the sharing we have with the original
6761 * provider of this memory.
6762 */
6763 rc = KERN_INVALID_ARGUMENT;
6764 goto done;
6765 }
6766
6767 vm_map_clip_start(map, entry, s);
6768 vm_map_clip_end(map, entry, end);
6769
6770 sub_start = VME_OFFSET(entry);
6771 sub_end = entry->vme_end;
6772 sub_end += VME_OFFSET(entry) - entry->vme_start;
6773
6774 local_end = entry->vme_end;
6775 if (map_pmap == NULL) {
6776 vm_object_t object;
6777 vm_object_offset_t offset;
6778 vm_prot_t prot;
6779 boolean_t wired;
6780 vm_map_entry_t local_entry;
6781 vm_map_version_t version;
6782 vm_map_t lookup_map;
6783
6784 if (entry->use_pmap) {
6785 pmap = VME_SUBMAP(entry)->pmap;
6786 /* ppc implementation requires that */
6787 /* submaps pmap address ranges line */
6788 /* up with parent map */
6789 #ifdef notdef
6790 pmap_addr = sub_start;
6791 #endif
6792 pmap_addr = s;
6793 } else {
6794 pmap = map->pmap;
6795 pmap_addr = s;
6796 }
6797
6798 if (entry->wired_count) {
6799 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6800 goto done;
6801 }
6802
6803 /*
6804 * The map was not unlocked:
6805 * no need to goto re-lookup.
6806 * Just go directly to next entry.
6807 */
6808 entry = entry->vme_next;
6809 s = entry->vme_start;
6810 continue;
6811 }
6812
6813 /* call vm_map_lookup_locked to */
6814 /* cause any needs copy to be */
6815 /* evaluated */
6816 local_start = entry->vme_start;
6817 lookup_map = map;
6818 vm_map_lock_write_to_read(map);
6819 rc = vm_map_lookup_locked(
6820 &lookup_map, local_start,
6821 (access_type | extra_prots),
6822 OBJECT_LOCK_EXCLUSIVE,
6823 &version, &object,
6824 &offset, &prot, &wired,
6825 NULL,
6826 &real_map, NULL);
6827 if (rc != KERN_SUCCESS) {
6828 vm_map_unlock_read(lookup_map);
6829 assert(map_pmap == NULL);
6830 vm_map_unwire(map, start,
6831 s, user_wire);
6832 return rc;
6833 }
6834 vm_object_unlock(object);
6835 if (real_map != lookup_map) {
6836 vm_map_unlock(real_map);
6837 }
6838 vm_map_unlock_read(lookup_map);
6839 vm_map_lock(map);
6840
6841 /* we unlocked, so must re-lookup */
6842 if (!vm_map_lookup_entry(map,
6843 local_start,
6844 &local_entry)) {
6845 rc = KERN_FAILURE;
6846 goto done;
6847 }
6848
6849 /*
6850 * entry could have been "simplified",
6851 * so re-clip
6852 */
6853 entry = local_entry;
6854 assert(s == local_start);
6855 vm_map_clip_start(map, entry, s);
6856 vm_map_clip_end(map, entry, end);
6857 /* re-compute "e" */
6858 e = entry->vme_end;
6859 if (e > end) {
6860 e = end;
6861 }
6862
6863 /* did we have a change of type? */
6864 if (!entry->is_sub_map) {
6865 last_timestamp = map->timestamp;
6866 continue;
6867 }
6868 } else {
6869 local_start = entry->vme_start;
6870 pmap = map_pmap;
6871 }
6872
6873 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6874 goto done;
6875 }
6876
6877 entry->in_transition = TRUE;
6878
6879 vm_map_unlock(map);
6880 rc = vm_map_wire_nested(VME_SUBMAP(entry),
6881 sub_start, sub_end,
6882 caller_prot, tag,
6883 user_wire, pmap, pmap_addr,
6884 NULL);
6885 vm_map_lock(map);
6886
6887 /*
6888 * Find the entry again. It could have been clipped
6889 * after we unlocked the map.
6890 */
6891 if (!vm_map_lookup_entry(map, local_start,
6892 &first_entry)) {
6893 panic("vm_map_wire: re-lookup failed");
6894 }
6895 entry = first_entry;
6896
6897 assert(local_start == s);
6898 /* re-compute "e" */
6899 e = entry->vme_end;
6900 if (e > end) {
6901 e = end;
6902 }
6903
6904 last_timestamp = map->timestamp;
6905 while ((entry != vm_map_to_entry(map)) &&
6906 (entry->vme_start < e)) {
6907 assert(entry->in_transition);
6908 entry->in_transition = FALSE;
6909 if (entry->needs_wakeup) {
6910 entry->needs_wakeup = FALSE;
6911 need_wakeup = TRUE;
6912 }
6913 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6914 subtract_wire_counts(map, entry, user_wire);
6915 }
6916 entry = entry->vme_next;
6917 }
6918 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6919 goto done;
6920 }
6921
6922 /* no need to relookup again */
6923 s = entry->vme_start;
6924 continue;
6925 }
6926
6927 /*
6928 * If this entry is already wired then increment
6929 * the appropriate wire reference count.
6930 */
6931 if (entry->wired_count) {
6932 if ((entry->protection & access_type) != access_type) {
6933 /* found a protection problem */
6934
6935 /*
6936 * XXX FBDP
6937 * We should always return an error
6938 * in this case but since we didn't
6939 * enforce it before, let's do
6940 * it only for the new "wire_and_extract"
6941 * code path for now...
6942 */
6943 if (wire_and_extract) {
6944 rc = KERN_PROTECTION_FAILURE;
6945 goto done;
6946 }
6947 }
6948
6949 /*
6950 * entry is already wired down, get our reference
6951 * after clipping to our range.
6952 */
6953 vm_map_clip_start(map, entry, s);
6954 vm_map_clip_end(map, entry, end);
6955
6956 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6957 goto done;
6958 }
6959
6960 if (wire_and_extract) {
6961 vm_object_t object;
6962 vm_object_offset_t offset;
6963 vm_page_t m;
6964
6965 /*
6966 * We don't have to "wire" the page again
6967 * bit we still have to "extract" its
6968 * physical page number, after some sanity
6969 * checks.
6970 */
6971 assert((entry->vme_end - entry->vme_start)
6972 == PAGE_SIZE);
6973 assert(!entry->needs_copy);
6974 assert(!entry->is_sub_map);
6975 assert(VME_OBJECT(entry));
6976 if (((entry->vme_end - entry->vme_start)
6977 != PAGE_SIZE) ||
6978 entry->needs_copy ||
6979 entry->is_sub_map ||
6980 VME_OBJECT(entry) == VM_OBJECT_NULL) {
6981 rc = KERN_INVALID_ARGUMENT;
6982 goto done;
6983 }
6984
6985 object = VME_OBJECT(entry);
6986 offset = VME_OFFSET(entry);
6987 /* need exclusive lock to update m->dirty */
6988 if (entry->protection & VM_PROT_WRITE) {
6989 vm_object_lock(object);
6990 } else {
6991 vm_object_lock_shared(object);
6992 }
6993 m = vm_page_lookup(object, offset);
6994 assert(m != VM_PAGE_NULL);
6995 assert(VM_PAGE_WIRED(m));
6996 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6997 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6998 if (entry->protection & VM_PROT_WRITE) {
6999 vm_object_lock_assert_exclusive(
7000 object);
7001 m->vmp_dirty = TRUE;
7002 }
7003 } else {
7004 /* not already wired !? */
7005 *physpage_p = 0;
7006 }
7007 vm_object_unlock(object);
7008 }
7009
7010 /* map was not unlocked: no need to relookup */
7011 entry = entry->vme_next;
7012 s = entry->vme_start;
7013 continue;
7014 }
7015
7016 /*
7017 * Unwired entry or wire request transmitted via submap
7018 */
7019
7020 /*
7021 * Wiring would copy the pages to the shadow object.
7022 * The shadow object would not be code-signed so
7023 * attempting to execute code from these copied pages
7024 * would trigger a code-signing violation.
7025 */
7026
7027 if ((entry->protection & VM_PROT_EXECUTE)
7028 #if XNU_TARGET_OS_OSX
7029 &&
7030 map->pmap != kernel_pmap &&
7031 (vm_map_cs_enforcement(map)
7032 #if __arm64__
7033 || !VM_MAP_IS_EXOTIC(map)
7034 #endif /* __arm64__ */
7035 )
7036 #endif /* XNU_TARGET_OS_OSX */
7037 #if PMAP_CS
7038 &&
7039 !pmap_cs_exempt(map->pmap)
7040 #endif
7041 ) {
7042 #if MACH_ASSERT
7043 printf("pid %d[%s] wiring executable range from "
7044 "0x%llx to 0x%llx: rejected to preserve "
7045 "code-signing\n",
7046 proc_selfpid(),
7047 (current_task()->bsd_info
7048 ? proc_name_address(current_task()->bsd_info)
7049 : "?"),
7050 (uint64_t) entry->vme_start,
7051 (uint64_t) entry->vme_end);
7052 #endif /* MACH_ASSERT */
7053 DTRACE_VM2(cs_executable_wire,
7054 uint64_t, (uint64_t)entry->vme_start,
7055 uint64_t, (uint64_t)entry->vme_end);
7056 cs_executable_wire++;
7057 rc = KERN_PROTECTION_FAILURE;
7058 goto done;
7059 }
7060
7061 /*
7062 * Perform actions of vm_map_lookup that need the write
7063 * lock on the map: create a shadow object for a
7064 * copy-on-write region, or an object for a zero-fill
7065 * region.
7066 */
7067 size = entry->vme_end - entry->vme_start;
7068 /*
7069 * If wiring a copy-on-write page, we need to copy it now
7070 * even if we're only (currently) requesting read access.
7071 * This is aggressive, but once it's wired we can't move it.
7072 */
7073 if (entry->needs_copy) {
7074 if (wire_and_extract) {
7075 /*
7076 * We're supposed to share with the original
7077 * provider so should not be "needs_copy"
7078 */
7079 rc = KERN_INVALID_ARGUMENT;
7080 goto done;
7081 }
7082
7083 VME_OBJECT_SHADOW(entry, size);
7084 entry->needs_copy = FALSE;
7085 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
7086 if (wire_and_extract) {
7087 /*
7088 * We're supposed to share with the original
7089 * provider so should already have an object.
7090 */
7091 rc = KERN_INVALID_ARGUMENT;
7092 goto done;
7093 }
7094 VME_OBJECT_SET(entry, vm_object_allocate(size));
7095 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
7096 assert(entry->use_pmap);
7097 }
7098
7099 vm_map_clip_start(map, entry, s);
7100 vm_map_clip_end(map, entry, end);
7101
7102 /* re-compute "e" */
7103 e = entry->vme_end;
7104 if (e > end) {
7105 e = end;
7106 }
7107
7108 /*
7109 * Check for holes and protection mismatch.
7110 * Holes: Next entry should be contiguous unless this
7111 * is the end of the region.
7112 * Protection: Access requested must be allowed, unless
7113 * wiring is by protection class
7114 */
7115 if ((entry->vme_end < end) &&
7116 ((entry->vme_next == vm_map_to_entry(map)) ||
7117 (entry->vme_next->vme_start > entry->vme_end))) {
7118 /* found a hole */
7119 rc = KERN_INVALID_ADDRESS;
7120 goto done;
7121 }
7122 if ((entry->protection & access_type) != access_type) {
7123 /* found a protection problem */
7124 rc = KERN_PROTECTION_FAILURE;
7125 goto done;
7126 }
7127
7128 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
7129
7130 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
7131 goto done;
7132 }
7133
7134 entry->in_transition = TRUE;
7135
7136 /*
7137 * This entry might get split once we unlock the map.
7138 * In vm_fault_wire(), we need the current range as
7139 * defined by this entry. In order for this to work
7140 * along with a simultaneous clip operation, we make a
7141 * temporary copy of this entry and use that for the
7142 * wiring. Note that the underlying objects do not
7143 * change during a clip.
7144 */
7145 tmp_entry = *entry;
7146
7147 /*
7148 * The in_transition state guarentees that the entry
7149 * (or entries for this range, if split occured) will be
7150 * there when the map lock is acquired for the second time.
7151 */
7152 vm_map_unlock(map);
7153
7154 if (!user_wire && cur_thread != THREAD_NULL) {
7155 interruptible_state = thread_interrupt_level(THREAD_UNINT);
7156 } else {
7157 interruptible_state = THREAD_UNINT;
7158 }
7159
7160 if (map_pmap) {
7161 rc = vm_fault_wire(map,
7162 &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
7163 physpage_p);
7164 } else {
7165 rc = vm_fault_wire(map,
7166 &tmp_entry, caller_prot, tag, map->pmap,
7167 tmp_entry.vme_start,
7168 physpage_p);
7169 }
7170
7171 if (!user_wire && cur_thread != THREAD_NULL) {
7172 thread_interrupt_level(interruptible_state);
7173 }
7174
7175 vm_map_lock(map);
7176
7177 if (last_timestamp + 1 != map->timestamp) {
7178 /*
7179 * Find the entry again. It could have been clipped
7180 * after we unlocked the map.
7181 */
7182 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7183 &first_entry)) {
7184 panic("vm_map_wire: re-lookup failed");
7185 }
7186
7187 entry = first_entry;
7188 }
7189
7190 last_timestamp = map->timestamp;
7191
7192 while ((entry != vm_map_to_entry(map)) &&
7193 (entry->vme_start < tmp_entry.vme_end)) {
7194 assert(entry->in_transition);
7195 entry->in_transition = FALSE;
7196 if (entry->needs_wakeup) {
7197 entry->needs_wakeup = FALSE;
7198 need_wakeup = TRUE;
7199 }
7200 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
7201 subtract_wire_counts(map, entry, user_wire);
7202 }
7203 entry = entry->vme_next;
7204 }
7205
7206 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
7207 goto done;
7208 }
7209
7210 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
7211 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
7212 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
7213 /* found a "new" hole */
7214 s = tmp_entry.vme_end;
7215 rc = KERN_INVALID_ADDRESS;
7216 goto done;
7217 }
7218
7219 s = entry->vme_start;
7220 } /* end while loop through map entries */
7221
7222 done:
7223 if (rc == KERN_SUCCESS) {
7224 /* repair any damage we may have made to the VM map */
7225 vm_map_simplify_range(map, start, end);
7226 }
7227
7228 vm_map_unlock(map);
7229
7230 /*
7231 * wake up anybody waiting on entries we wired.
7232 */
7233 if (need_wakeup) {
7234 vm_map_entry_wakeup(map);
7235 }
7236
7237 if (rc != KERN_SUCCESS) {
7238 /* undo what has been wired so far */
7239 vm_map_unwire_nested(map, start, s, user_wire,
7240 map_pmap, pmap_addr);
7241 if (physpage_p) {
7242 *physpage_p = 0;
7243 }
7244 }
7245
7246 return rc;
7247 }
7248
7249 kern_return_t
7250 vm_map_wire_external(
7251 vm_map_t map,
7252 vm_map_offset_t start,
7253 vm_map_offset_t end,
7254 vm_prot_t caller_prot,
7255 boolean_t user_wire)
7256 {
7257 kern_return_t kret;
7258
7259 kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
7260 user_wire, (pmap_t)NULL, 0, NULL);
7261 return kret;
7262 }
7263
7264 kern_return_t
7265 vm_map_wire_kernel(
7266 vm_map_t map,
7267 vm_map_offset_t start,
7268 vm_map_offset_t end,
7269 vm_prot_t caller_prot,
7270 vm_tag_t tag,
7271 boolean_t user_wire)
7272 {
7273 kern_return_t kret;
7274
7275 kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
7276 user_wire, (pmap_t)NULL, 0, NULL);
7277 return kret;
7278 }
7279
7280 kern_return_t
7281 vm_map_wire_and_extract_external(
7282 vm_map_t map,
7283 vm_map_offset_t start,
7284 vm_prot_t caller_prot,
7285 boolean_t user_wire,
7286 ppnum_t *physpage_p)
7287 {
7288 kern_return_t kret;
7289
7290 kret = vm_map_wire_nested(map,
7291 start,
7292 start + VM_MAP_PAGE_SIZE(map),
7293 caller_prot,
7294 vm_tag_bt(),
7295 user_wire,
7296 (pmap_t)NULL,
7297 0,
7298 physpage_p);
7299 if (kret != KERN_SUCCESS &&
7300 physpage_p != NULL) {
7301 *physpage_p = 0;
7302 }
7303 return kret;
7304 }
7305
7306 kern_return_t
7307 vm_map_wire_and_extract_kernel(
7308 vm_map_t map,
7309 vm_map_offset_t start,
7310 vm_prot_t caller_prot,
7311 vm_tag_t tag,
7312 boolean_t user_wire,
7313 ppnum_t *physpage_p)
7314 {
7315 kern_return_t kret;
7316
7317 kret = vm_map_wire_nested(map,
7318 start,
7319 start + VM_MAP_PAGE_SIZE(map),
7320 caller_prot,
7321 tag,
7322 user_wire,
7323 (pmap_t)NULL,
7324 0,
7325 physpage_p);
7326 if (kret != KERN_SUCCESS &&
7327 physpage_p != NULL) {
7328 *physpage_p = 0;
7329 }
7330 return kret;
7331 }
7332
7333 /*
7334 * vm_map_unwire:
7335 *
7336 * Sets the pageability of the specified address range in the target
7337 * as pageable. Regions specified must have been wired previously.
7338 *
7339 * The map must not be locked, but a reference must remain to the map
7340 * throughout the call.
7341 *
7342 * Kernel will panic on failures. User unwire ignores holes and
7343 * unwired and intransition entries to avoid losing memory by leaving
7344 * it unwired.
7345 */
7346 static kern_return_t
7347 vm_map_unwire_nested(
7348 vm_map_t map,
7349 vm_map_offset_t start,
7350 vm_map_offset_t end,
7351 boolean_t user_wire,
7352 pmap_t map_pmap,
7353 vm_map_offset_t pmap_addr)
7354 {
7355 vm_map_entry_t entry;
7356 struct vm_map_entry *first_entry, tmp_entry;
7357 boolean_t need_wakeup;
7358 boolean_t main_map = FALSE;
7359 unsigned int last_timestamp;
7360
7361 vm_map_lock(map);
7362 if (map_pmap == NULL) {
7363 main_map = TRUE;
7364 }
7365 last_timestamp = map->timestamp;
7366
7367 VM_MAP_RANGE_CHECK(map, start, end);
7368 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7369 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
7370
7371 if (start == end) {
7372 /* We unwired what the caller asked for: zero pages */
7373 vm_map_unlock(map);
7374 return KERN_SUCCESS;
7375 }
7376
7377 if (vm_map_lookup_entry(map, start, &first_entry)) {
7378 entry = first_entry;
7379 /*
7380 * vm_map_clip_start will be done later.
7381 * We don't want to unnest any nested sub maps here !
7382 */
7383 } else {
7384 if (!user_wire) {
7385 panic("vm_map_unwire: start not found");
7386 }
7387 /* Start address is not in map. */
7388 vm_map_unlock(map);
7389 return KERN_INVALID_ADDRESS;
7390 }
7391
7392 if (entry->superpage_size) {
7393 /* superpages are always wired */
7394 vm_map_unlock(map);
7395 return KERN_INVALID_ADDRESS;
7396 }
7397
7398 need_wakeup = FALSE;
7399 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7400 if (entry->in_transition) {
7401 /*
7402 * 1)
7403 * Another thread is wiring down this entry. Note
7404 * that if it is not for the other thread we would
7405 * be unwiring an unwired entry. This is not
7406 * permitted. If we wait, we will be unwiring memory
7407 * we did not wire.
7408 *
7409 * 2)
7410 * Another thread is unwiring this entry. We did not
7411 * have a reference to it, because if we did, this
7412 * entry will not be getting unwired now.
7413 */
7414 if (!user_wire) {
7415 /*
7416 * XXX FBDP
7417 * This could happen: there could be some
7418 * overlapping vslock/vsunlock operations
7419 * going on.
7420 * We should probably just wait and retry,
7421 * but then we have to be careful that this
7422 * entry could get "simplified" after
7423 * "in_transition" gets unset and before
7424 * we re-lookup the entry, so we would
7425 * have to re-clip the entry to avoid
7426 * re-unwiring what we have already unwired...
7427 * See vm_map_wire_nested().
7428 *
7429 * Or we could just ignore "in_transition"
7430 * here and proceed to decement the wired
7431 * count(s) on this entry. That should be fine
7432 * as long as "wired_count" doesn't drop all
7433 * the way to 0 (and we should panic if THAT
7434 * happens).
7435 */
7436 panic("vm_map_unwire: in_transition entry");
7437 }
7438
7439 entry = entry->vme_next;
7440 continue;
7441 }
7442
7443 if (entry->is_sub_map) {
7444 vm_map_offset_t sub_start;
7445 vm_map_offset_t sub_end;
7446 vm_map_offset_t local_end;
7447 pmap_t pmap;
7448
7449 vm_map_clip_start(map, entry, start);
7450 vm_map_clip_end(map, entry, end);
7451
7452 sub_start = VME_OFFSET(entry);
7453 sub_end = entry->vme_end - entry->vme_start;
7454 sub_end += VME_OFFSET(entry);
7455 local_end = entry->vme_end;
7456 if (map_pmap == NULL) {
7457 if (entry->use_pmap) {
7458 pmap = VME_SUBMAP(entry)->pmap;
7459 pmap_addr = sub_start;
7460 } else {
7461 pmap = map->pmap;
7462 pmap_addr = start;
7463 }
7464 if (entry->wired_count == 0 ||
7465 (user_wire && entry->user_wired_count == 0)) {
7466 if (!user_wire) {
7467 panic("vm_map_unwire: entry is unwired");
7468 }
7469 entry = entry->vme_next;
7470 continue;
7471 }
7472
7473 /*
7474 * Check for holes
7475 * Holes: Next entry should be contiguous unless
7476 * this is the end of the region.
7477 */
7478 if (((entry->vme_end < end) &&
7479 ((entry->vme_next == vm_map_to_entry(map)) ||
7480 (entry->vme_next->vme_start
7481 > entry->vme_end)))) {
7482 if (!user_wire) {
7483 panic("vm_map_unwire: non-contiguous region");
7484 }
7485 /*
7486 * entry = entry->vme_next;
7487 * continue;
7488 */
7489 }
7490
7491 subtract_wire_counts(map, entry, user_wire);
7492
7493 if (entry->wired_count != 0) {
7494 entry = entry->vme_next;
7495 continue;
7496 }
7497
7498 entry->in_transition = TRUE;
7499 tmp_entry = *entry;/* see comment in vm_map_wire() */
7500
7501 /*
7502 * We can unlock the map now. The in_transition state
7503 * guarantees existance of the entry.
7504 */
7505 vm_map_unlock(map);
7506 vm_map_unwire_nested(VME_SUBMAP(entry),
7507 sub_start, sub_end, user_wire, pmap, pmap_addr);
7508 vm_map_lock(map);
7509
7510 if (last_timestamp + 1 != map->timestamp) {
7511 /*
7512 * Find the entry again. It could have been
7513 * clipped or deleted after we unlocked the map.
7514 */
7515 if (!vm_map_lookup_entry(map,
7516 tmp_entry.vme_start,
7517 &first_entry)) {
7518 if (!user_wire) {
7519 panic("vm_map_unwire: re-lookup failed");
7520 }
7521 entry = first_entry->vme_next;
7522 } else {
7523 entry = first_entry;
7524 }
7525 }
7526 last_timestamp = map->timestamp;
7527
7528 /*
7529 * clear transition bit for all constituent entries
7530 * that were in the original entry (saved in
7531 * tmp_entry). Also check for waiters.
7532 */
7533 while ((entry != vm_map_to_entry(map)) &&
7534 (entry->vme_start < tmp_entry.vme_end)) {
7535 assert(entry->in_transition);
7536 entry->in_transition = FALSE;
7537 if (entry->needs_wakeup) {
7538 entry->needs_wakeup = FALSE;
7539 need_wakeup = TRUE;
7540 }
7541 entry = entry->vme_next;
7542 }
7543 continue;
7544 } else {
7545 vm_map_unlock(map);
7546 vm_map_unwire_nested(VME_SUBMAP(entry),
7547 sub_start, sub_end, user_wire, map_pmap,
7548 pmap_addr);
7549 vm_map_lock(map);
7550
7551 if (last_timestamp + 1 != map->timestamp) {
7552 /*
7553 * Find the entry again. It could have been
7554 * clipped or deleted after we unlocked the map.
7555 */
7556 if (!vm_map_lookup_entry(map,
7557 tmp_entry.vme_start,
7558 &first_entry)) {
7559 if (!user_wire) {
7560 panic("vm_map_unwire: re-lookup failed");
7561 }
7562 entry = first_entry->vme_next;
7563 } else {
7564 entry = first_entry;
7565 }
7566 }
7567 last_timestamp = map->timestamp;
7568 }
7569 }
7570
7571
7572 if ((entry->wired_count == 0) ||
7573 (user_wire && entry->user_wired_count == 0)) {
7574 if (!user_wire) {
7575 panic("vm_map_unwire: entry is unwired");
7576 }
7577
7578 entry = entry->vme_next;
7579 continue;
7580 }
7581
7582 assert(entry->wired_count > 0 &&
7583 (!user_wire || entry->user_wired_count > 0));
7584
7585 vm_map_clip_start(map, entry, start);
7586 vm_map_clip_end(map, entry, end);
7587
7588 /*
7589 * Check for holes
7590 * Holes: Next entry should be contiguous unless
7591 * this is the end of the region.
7592 */
7593 if (((entry->vme_end < end) &&
7594 ((entry->vme_next == vm_map_to_entry(map)) ||
7595 (entry->vme_next->vme_start > entry->vme_end)))) {
7596 if (!user_wire) {
7597 panic("vm_map_unwire: non-contiguous region");
7598 }
7599 entry = entry->vme_next;
7600 continue;
7601 }
7602
7603 subtract_wire_counts(map, entry, user_wire);
7604
7605 if (entry->wired_count != 0) {
7606 entry = entry->vme_next;
7607 continue;
7608 }
7609
7610 if (entry->zero_wired_pages) {
7611 entry->zero_wired_pages = FALSE;
7612 }
7613
7614 entry->in_transition = TRUE;
7615 tmp_entry = *entry; /* see comment in vm_map_wire() */
7616
7617 /*
7618 * We can unlock the map now. The in_transition state
7619 * guarantees existance of the entry.
7620 */
7621 vm_map_unlock(map);
7622 if (map_pmap) {
7623 vm_fault_unwire(map,
7624 &tmp_entry, FALSE, map_pmap, pmap_addr);
7625 } else {
7626 vm_fault_unwire(map,
7627 &tmp_entry, FALSE, map->pmap,
7628 tmp_entry.vme_start);
7629 }
7630 vm_map_lock(map);
7631
7632 if (last_timestamp + 1 != map->timestamp) {
7633 /*
7634 * Find the entry again. It could have been clipped
7635 * or deleted after we unlocked the map.
7636 */
7637 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7638 &first_entry)) {
7639 if (!user_wire) {
7640 panic("vm_map_unwire: re-lookup failed");
7641 }
7642 entry = first_entry->vme_next;
7643 } else {
7644 entry = first_entry;
7645 }
7646 }
7647 last_timestamp = map->timestamp;
7648
7649 /*
7650 * clear transition bit for all constituent entries that
7651 * were in the original entry (saved in tmp_entry). Also
7652 * check for waiters.
7653 */
7654 while ((entry != vm_map_to_entry(map)) &&
7655 (entry->vme_start < tmp_entry.vme_end)) {
7656 assert(entry->in_transition);
7657 entry->in_transition = FALSE;
7658 if (entry->needs_wakeup) {
7659 entry->needs_wakeup = FALSE;
7660 need_wakeup = TRUE;
7661 }
7662 entry = entry->vme_next;
7663 }
7664 }
7665
7666 /*
7667 * We might have fragmented the address space when we wired this
7668 * range of addresses. Attempt to re-coalesce these VM map entries
7669 * with their neighbors now that they're no longer wired.
7670 * Under some circumstances, address space fragmentation can
7671 * prevent VM object shadow chain collapsing, which can cause
7672 * swap space leaks.
7673 */
7674 vm_map_simplify_range(map, start, end);
7675
7676 vm_map_unlock(map);
7677 /*
7678 * wake up anybody waiting on entries that we have unwired.
7679 */
7680 if (need_wakeup) {
7681 vm_map_entry_wakeup(map);
7682 }
7683 return KERN_SUCCESS;
7684 }
7685
7686 kern_return_t
7687 vm_map_unwire(
7688 vm_map_t map,
7689 vm_map_offset_t start,
7690 vm_map_offset_t end,
7691 boolean_t user_wire)
7692 {
7693 return vm_map_unwire_nested(map, start, end,
7694 user_wire, (pmap_t)NULL, 0);
7695 }
7696
7697
7698 /*
7699 * vm_map_entry_delete: [ internal use only ]
7700 *
7701 * Deallocate the given entry from the target map.
7702 */
7703 static void
7704 vm_map_entry_delete(
7705 vm_map_t map,
7706 vm_map_entry_t entry)
7707 {
7708 vm_map_offset_t s, e;
7709 vm_object_t object;
7710 vm_map_t submap;
7711
7712 s = entry->vme_start;
7713 e = entry->vme_end;
7714 assert(VM_MAP_PAGE_ALIGNED(s, FOURK_PAGE_MASK));
7715 assert(VM_MAP_PAGE_ALIGNED(e, FOURK_PAGE_MASK));
7716 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
7717 assert(page_aligned(s));
7718 assert(page_aligned(e));
7719 }
7720 if (entry->map_aligned == TRUE) {
7721 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7722 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7723 }
7724 assert(entry->wired_count == 0);
7725 assert(entry->user_wired_count == 0);
7726 assert(!entry->permanent);
7727
7728 if (entry->is_sub_map) {
7729 object = NULL;
7730 submap = VME_SUBMAP(entry);
7731 } else {
7732 submap = NULL;
7733 object = VME_OBJECT(entry);
7734 }
7735
7736 vm_map_store_entry_unlink(map, entry);
7737 map->size -= e - s;
7738
7739 vm_map_entry_dispose(map, entry);
7740
7741 vm_map_unlock(map);
7742 /*
7743 * Deallocate the object only after removing all
7744 * pmap entries pointing to its pages.
7745 */
7746 if (submap) {
7747 vm_map_deallocate(submap);
7748 } else {
7749 vm_object_deallocate(object);
7750 }
7751 }
7752
7753 void
7754 vm_map_submap_pmap_clean(
7755 vm_map_t map,
7756 vm_map_offset_t start,
7757 vm_map_offset_t end,
7758 vm_map_t sub_map,
7759 vm_map_offset_t offset)
7760 {
7761 vm_map_offset_t submap_start;
7762 vm_map_offset_t submap_end;
7763 vm_map_size_t remove_size;
7764 vm_map_entry_t entry;
7765
7766 submap_end = offset + (end - start);
7767 submap_start = offset;
7768
7769 vm_map_lock_read(sub_map);
7770 if (vm_map_lookup_entry(sub_map, offset, &entry)) {
7771 remove_size = (entry->vme_end - entry->vme_start);
7772 if (offset > entry->vme_start) {
7773 remove_size -= offset - entry->vme_start;
7774 }
7775
7776
7777 if (submap_end < entry->vme_end) {
7778 remove_size -=
7779 entry->vme_end - submap_end;
7780 }
7781 if (entry->is_sub_map) {
7782 vm_map_submap_pmap_clean(
7783 sub_map,
7784 start,
7785 start + remove_size,
7786 VME_SUBMAP(entry),
7787 VME_OFFSET(entry));
7788 } else {
7789 if (map->mapped_in_other_pmaps &&
7790 os_ref_get_count(&map->map_refcnt) != 0 &&
7791 VME_OBJECT(entry) != NULL) {
7792 vm_object_pmap_protect_options(
7793 VME_OBJECT(entry),
7794 (VME_OFFSET(entry) +
7795 offset -
7796 entry->vme_start),
7797 remove_size,
7798 PMAP_NULL,
7799 PAGE_SIZE,
7800 entry->vme_start,
7801 VM_PROT_NONE,
7802 PMAP_OPTIONS_REMOVE);
7803 } else {
7804 pmap_remove(map->pmap,
7805 (addr64_t)start,
7806 (addr64_t)(start + remove_size));
7807 }
7808 }
7809 }
7810
7811 entry = entry->vme_next;
7812
7813 while ((entry != vm_map_to_entry(sub_map))
7814 && (entry->vme_start < submap_end)) {
7815 remove_size = (entry->vme_end - entry->vme_start);
7816 if (submap_end < entry->vme_end) {
7817 remove_size -= entry->vme_end - submap_end;
7818 }
7819 if (entry->is_sub_map) {
7820 vm_map_submap_pmap_clean(
7821 sub_map,
7822 (start + entry->vme_start) - offset,
7823 ((start + entry->vme_start) - offset) + remove_size,
7824 VME_SUBMAP(entry),
7825 VME_OFFSET(entry));
7826 } else {
7827 if (map->mapped_in_other_pmaps &&
7828 os_ref_get_count(&map->map_refcnt) != 0 &&
7829 VME_OBJECT(entry) != NULL) {
7830 vm_object_pmap_protect_options(
7831 VME_OBJECT(entry),
7832 VME_OFFSET(entry),
7833 remove_size,
7834 PMAP_NULL,
7835 PAGE_SIZE,
7836 entry->vme_start,
7837 VM_PROT_NONE,
7838 PMAP_OPTIONS_REMOVE);
7839 } else {
7840 pmap_remove(map->pmap,
7841 (addr64_t)((start + entry->vme_start)
7842 - offset),
7843 (addr64_t)(((start + entry->vme_start)
7844 - offset) + remove_size));
7845 }
7846 }
7847 entry = entry->vme_next;
7848 }
7849 vm_map_unlock_read(sub_map);
7850 return;
7851 }
7852
7853 /*
7854 * virt_memory_guard_ast:
7855 *
7856 * Handle the AST callout for a virtual memory guard.
7857 * raise an EXC_GUARD exception and terminate the task
7858 * if configured to do so.
7859 */
7860 void
7861 virt_memory_guard_ast(
7862 thread_t thread,
7863 mach_exception_data_type_t code,
7864 mach_exception_data_type_t subcode)
7865 {
7866 task_t task = thread->task;
7867 assert(task != kernel_task);
7868 assert(task == current_task());
7869 uint32_t behavior;
7870
7871 behavior = task->task_exc_guard;
7872
7873 /* Is delivery enabled */
7874 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7875 return;
7876 }
7877
7878 /* If only once, make sure we're that once */
7879 while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7880 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7881
7882 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7883 break;
7884 }
7885 behavior = task->task_exc_guard;
7886 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7887 return;
7888 }
7889 }
7890
7891 /* Raise exception via corpse fork or synchronously */
7892 if ((task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) &&
7893 (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) == 0) {
7894 task_violated_guard(code, subcode, NULL);
7895 } else {
7896 task_exception_notify(EXC_GUARD, code, subcode);
7897 }
7898
7899 /* Terminate the task if desired */
7900 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7901 task_bsdtask_kill(current_task());
7902 }
7903 }
7904
7905 /*
7906 * vm_map_guard_exception:
7907 *
7908 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7909 *
7910 * Right now, we do this when we find nothing mapped, or a
7911 * gap in the mapping when a user address space deallocate
7912 * was requested. We report the address of the first gap found.
7913 */
7914 static void
7915 vm_map_guard_exception(
7916 vm_map_offset_t gap_start,
7917 unsigned reason)
7918 {
7919 mach_exception_code_t code = 0;
7920 unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7921 unsigned int target = 0; /* should we pass in pid associated with map? */
7922 mach_exception_data_type_t subcode = (uint64_t)gap_start;
7923 boolean_t fatal = FALSE;
7924
7925 task_t task = current_task();
7926
7927 /* Can't deliver exceptions to kernel task */
7928 if (task == kernel_task) {
7929 return;
7930 }
7931
7932 EXC_GUARD_ENCODE_TYPE(code, guard_type);
7933 EXC_GUARD_ENCODE_FLAVOR(code, reason);
7934 EXC_GUARD_ENCODE_TARGET(code, target);
7935
7936 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7937 fatal = TRUE;
7938 }
7939 thread_guard_violation(current_thread(), code, subcode, fatal);
7940 }
7941
7942 /*
7943 * vm_map_delete: [ internal use only ]
7944 *
7945 * Deallocates the given address range from the target map.
7946 * Removes all user wirings. Unwires one kernel wiring if
7947 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7948 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7949 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7950 *
7951 * This routine is called with map locked and leaves map locked.
7952 */
7953 static kern_return_t
7954 vm_map_delete(
7955 vm_map_t map,
7956 vm_map_offset_t start,
7957 vm_map_offset_t end,
7958 int flags,
7959 vm_map_t zap_map)
7960 {
7961 vm_map_entry_t entry, next;
7962 struct vm_map_entry *first_entry, tmp_entry;
7963 vm_map_offset_t s;
7964 vm_object_t object;
7965 boolean_t need_wakeup;
7966 unsigned int last_timestamp = ~0; /* unlikely value */
7967 int interruptible;
7968 vm_map_offset_t gap_start;
7969 __unused vm_map_offset_t save_start = start;
7970 __unused vm_map_offset_t save_end = end;
7971 const vm_map_offset_t FIND_GAP = 1; /* a not page aligned value */
7972 const vm_map_offset_t GAPS_OK = 2; /* a different not page aligned value */
7973
7974 if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK) && !map->terminated) {
7975 gap_start = FIND_GAP;
7976 } else {
7977 gap_start = GAPS_OK;
7978 }
7979
7980 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7981 THREAD_ABORTSAFE : THREAD_UNINT;
7982
7983 /*
7984 * All our DMA I/O operations in IOKit are currently done by
7985 * wiring through the map entries of the task requesting the I/O.
7986 * Because of this, we must always wait for kernel wirings
7987 * to go away on the entries before deleting them.
7988 *
7989 * Any caller who wants to actually remove a kernel wiring
7990 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7991 * properly remove one wiring instead of blasting through
7992 * them all.
7993 */
7994 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7995
7996 while (1) {
7997 /*
7998 * Find the start of the region, and clip it
7999 */
8000 if (vm_map_lookup_entry(map, start, &first_entry)) {
8001 entry = first_entry;
8002 if (map == kalloc_map &&
8003 (entry->vme_start != start ||
8004 entry->vme_end != end)) {
8005 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8006 "mismatched entry %p [0x%llx:0x%llx]\n",
8007 map,
8008 (uint64_t)start,
8009 (uint64_t)end,
8010 entry,
8011 (uint64_t)entry->vme_start,
8012 (uint64_t)entry->vme_end);
8013 }
8014
8015 /*
8016 * If in a superpage, extend the range to include the start of the mapping.
8017 */
8018 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
8019 start = SUPERPAGE_ROUND_DOWN(start);
8020 continue;
8021 }
8022
8023 if (start == entry->vme_start) {
8024 /*
8025 * No need to clip. We don't want to cause
8026 * any unnecessary unnesting in this case...
8027 */
8028 } else {
8029 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
8030 entry->map_aligned &&
8031 !VM_MAP_PAGE_ALIGNED(
8032 start,
8033 VM_MAP_PAGE_MASK(map))) {
8034 /*
8035 * The entry will no longer be
8036 * map-aligned after clipping
8037 * and the caller said it's OK.
8038 */
8039 entry->map_aligned = FALSE;
8040 }
8041 if (map == kalloc_map) {
8042 panic("vm_map_delete(%p,0x%llx,0x%llx):"
8043 " clipping %p at 0x%llx\n",
8044 map,
8045 (uint64_t)start,
8046 (uint64_t)end,
8047 entry,
8048 (uint64_t)start);
8049 }
8050 vm_map_clip_start(map, entry, start);
8051 }
8052
8053 /*
8054 * Fix the lookup hint now, rather than each
8055 * time through the loop.
8056 */
8057 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8058 } else {
8059 if (map->pmap == kernel_pmap &&
8060 os_ref_get_count(&map->map_refcnt) != 0) {
8061 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8062 "no map entry at 0x%llx\n",
8063 map,
8064 (uint64_t)start,
8065 (uint64_t)end,
8066 (uint64_t)start);
8067 }
8068 entry = first_entry->vme_next;
8069 if (gap_start == FIND_GAP) {
8070 gap_start = start;
8071 }
8072 }
8073 break;
8074 }
8075 if (entry->superpage_size) {
8076 end = SUPERPAGE_ROUND_UP(end);
8077 }
8078
8079 need_wakeup = FALSE;
8080 /*
8081 * Step through all entries in this region
8082 */
8083 s = entry->vme_start;
8084 while ((entry != vm_map_to_entry(map)) && (s < end)) {
8085 /*
8086 * At this point, we have deleted all the memory entries
8087 * between "start" and "s". We still need to delete
8088 * all memory entries between "s" and "end".
8089 * While we were blocked and the map was unlocked, some
8090 * new memory entries could have been re-allocated between
8091 * "start" and "s" and we don't want to mess with those.
8092 * Some of those entries could even have been re-assembled
8093 * with an entry after "s" (in vm_map_simplify_entry()), so
8094 * we may have to vm_map_clip_start() again.
8095 */
8096
8097 if (entry->vme_start >= s) {
8098 /*
8099 * This entry starts on or after "s"
8100 * so no need to clip its start.
8101 */
8102 } else {
8103 /*
8104 * This entry has been re-assembled by a
8105 * vm_map_simplify_entry(). We need to
8106 * re-clip its start.
8107 */
8108 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
8109 entry->map_aligned &&
8110 !VM_MAP_PAGE_ALIGNED(s,
8111 VM_MAP_PAGE_MASK(map))) {
8112 /*
8113 * The entry will no longer be map-aligned
8114 * after clipping and the caller said it's OK.
8115 */
8116 entry->map_aligned = FALSE;
8117 }
8118 if (map == kalloc_map) {
8119 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8120 "clipping %p at 0x%llx\n",
8121 map,
8122 (uint64_t)start,
8123 (uint64_t)end,
8124 entry,
8125 (uint64_t)s);
8126 }
8127 vm_map_clip_start(map, entry, s);
8128 }
8129 if (entry->vme_end <= end) {
8130 /*
8131 * This entry is going away completely, so no need
8132 * to clip and possibly cause an unnecessary unnesting.
8133 */
8134 } else {
8135 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
8136 entry->map_aligned &&
8137 !VM_MAP_PAGE_ALIGNED(end,
8138 VM_MAP_PAGE_MASK(map))) {
8139 /*
8140 * The entry will no longer be map-aligned
8141 * after clipping and the caller said it's OK.
8142 */
8143 entry->map_aligned = FALSE;
8144 }
8145 if (map == kalloc_map) {
8146 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8147 "clipping %p at 0x%llx\n",
8148 map,
8149 (uint64_t)start,
8150 (uint64_t)end,
8151 entry,
8152 (uint64_t)end);
8153 }
8154 vm_map_clip_end(map, entry, end);
8155 }
8156
8157 if (entry->permanent) {
8158 if (map->pmap == kernel_pmap) {
8159 panic("%s(%p,0x%llx,0x%llx): "
8160 "attempt to remove permanent "
8161 "VM map entry "
8162 "%p [0x%llx:0x%llx]\n",
8163 __FUNCTION__,
8164 map,
8165 (uint64_t) start,
8166 (uint64_t) end,
8167 entry,
8168 (uint64_t) entry->vme_start,
8169 (uint64_t) entry->vme_end);
8170 } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
8171 // printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
8172 entry->permanent = FALSE;
8173 #if PMAP_CS
8174 } else if ((entry->protection & VM_PROT_EXECUTE) && !pmap_cs_enforced(map->pmap)) {
8175 entry->permanent = FALSE;
8176
8177 printf("%d[%s] %s(0x%llx,0x%llx): "
8178 "pmap_cs disabled, allowing for permanent executable entry [0x%llx:0x%llx] "
8179 "prot 0x%x/0x%x\n",
8180 proc_selfpid(),
8181 (current_task()->bsd_info
8182 ? proc_name_address(current_task()->bsd_info)
8183 : "?"),
8184 __FUNCTION__,
8185 (uint64_t) start,
8186 (uint64_t) end,
8187 (uint64_t)entry->vme_start,
8188 (uint64_t)entry->vme_end,
8189 entry->protection,
8190 entry->max_protection);
8191 #endif
8192 } else {
8193 if (vm_map_executable_immutable_verbose) {
8194 printf("%d[%s] %s(0x%llx,0x%llx): "
8195 "permanent entry [0x%llx:0x%llx] "
8196 "prot 0x%x/0x%x\n",
8197 proc_selfpid(),
8198 (current_task()->bsd_info
8199 ? proc_name_address(current_task()->bsd_info)
8200 : "?"),
8201 __FUNCTION__,
8202 (uint64_t) start,
8203 (uint64_t) end,
8204 (uint64_t)entry->vme_start,
8205 (uint64_t)entry->vme_end,
8206 entry->protection,
8207 entry->max_protection);
8208 }
8209 /*
8210 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
8211 */
8212 DTRACE_VM5(vm_map_delete_permanent,
8213 vm_map_offset_t, entry->vme_start,
8214 vm_map_offset_t, entry->vme_end,
8215 vm_prot_t, entry->protection,
8216 vm_prot_t, entry->max_protection,
8217 int, VME_ALIAS(entry));
8218 }
8219 }
8220
8221
8222 if (entry->in_transition) {
8223 wait_result_t wait_result;
8224
8225 /*
8226 * Another thread is wiring/unwiring this entry.
8227 * Let the other thread know we are waiting.
8228 */
8229 assert(s == entry->vme_start);
8230 entry->needs_wakeup = TRUE;
8231
8232 /*
8233 * wake up anybody waiting on entries that we have
8234 * already unwired/deleted.
8235 */
8236 if (need_wakeup) {
8237 vm_map_entry_wakeup(map);
8238 need_wakeup = FALSE;
8239 }
8240
8241 wait_result = vm_map_entry_wait(map, interruptible);
8242
8243 if (interruptible &&
8244 wait_result == THREAD_INTERRUPTED) {
8245 /*
8246 * We do not clear the needs_wakeup flag,
8247 * since we cannot tell if we were the only one.
8248 */
8249 return KERN_ABORTED;
8250 }
8251
8252 /*
8253 * The entry could have been clipped or it
8254 * may not exist anymore. Look it up again.
8255 */
8256 if (!vm_map_lookup_entry(map, s, &first_entry)) {
8257 /*
8258 * User: use the next entry
8259 */
8260 if (gap_start == FIND_GAP) {
8261 gap_start = s;
8262 }
8263 entry = first_entry->vme_next;
8264 s = entry->vme_start;
8265 } else {
8266 entry = first_entry;
8267 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8268 }
8269 last_timestamp = map->timestamp;
8270 continue;
8271 } /* end in_transition */
8272
8273 if (entry->wired_count) {
8274 boolean_t user_wire;
8275
8276 user_wire = entry->user_wired_count > 0;
8277
8278 /*
8279 * Remove a kernel wiring if requested
8280 */
8281 if (flags & VM_MAP_REMOVE_KUNWIRE) {
8282 entry->wired_count--;
8283 }
8284
8285 /*
8286 * Remove all user wirings for proper accounting
8287 */
8288 if (entry->user_wired_count > 0) {
8289 while (entry->user_wired_count) {
8290 subtract_wire_counts(map, entry, user_wire);
8291 }
8292 }
8293
8294 if (entry->wired_count != 0) {
8295 assert(map != kernel_map);
8296 /*
8297 * Cannot continue. Typical case is when
8298 * a user thread has physical io pending on
8299 * on this page. Either wait for the
8300 * kernel wiring to go away or return an
8301 * error.
8302 */
8303 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
8304 wait_result_t wait_result;
8305
8306 assert(s == entry->vme_start);
8307 entry->needs_wakeup = TRUE;
8308 wait_result = vm_map_entry_wait(map,
8309 interruptible);
8310
8311 if (interruptible &&
8312 wait_result == THREAD_INTERRUPTED) {
8313 /*
8314 * We do not clear the
8315 * needs_wakeup flag, since we
8316 * cannot tell if we were the
8317 * only one.
8318 */
8319 return KERN_ABORTED;
8320 }
8321
8322 /*
8323 * The entry could have been clipped or
8324 * it may not exist anymore. Look it
8325 * up again.
8326 */
8327 if (!vm_map_lookup_entry(map, s,
8328 &first_entry)) {
8329 assert(map != kernel_map);
8330 /*
8331 * User: use the next entry
8332 */
8333 if (gap_start == FIND_GAP) {
8334 gap_start = s;
8335 }
8336 entry = first_entry->vme_next;
8337 s = entry->vme_start;
8338 } else {
8339 entry = first_entry;
8340 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8341 }
8342 last_timestamp = map->timestamp;
8343 continue;
8344 } else {
8345 return KERN_FAILURE;
8346 }
8347 }
8348
8349 entry->in_transition = TRUE;
8350 /*
8351 * copy current entry. see comment in vm_map_wire()
8352 */
8353 tmp_entry = *entry;
8354 assert(s == entry->vme_start);
8355
8356 /*
8357 * We can unlock the map now. The in_transition
8358 * state guarentees existance of the entry.
8359 */
8360 vm_map_unlock(map);
8361
8362 if (tmp_entry.is_sub_map) {
8363 vm_map_t sub_map;
8364 vm_map_offset_t sub_start, sub_end;
8365 pmap_t pmap;
8366 vm_map_offset_t pmap_addr;
8367
8368
8369 sub_map = VME_SUBMAP(&tmp_entry);
8370 sub_start = VME_OFFSET(&tmp_entry);
8371 sub_end = sub_start + (tmp_entry.vme_end -
8372 tmp_entry.vme_start);
8373 if (tmp_entry.use_pmap) {
8374 pmap = sub_map->pmap;
8375 pmap_addr = tmp_entry.vme_start;
8376 } else {
8377 pmap = map->pmap;
8378 pmap_addr = tmp_entry.vme_start;
8379 }
8380 (void) vm_map_unwire_nested(sub_map,
8381 sub_start, sub_end,
8382 user_wire,
8383 pmap, pmap_addr);
8384 } else {
8385 if (VME_OBJECT(&tmp_entry) == kernel_object) {
8386 pmap_protect_options(
8387 map->pmap,
8388 tmp_entry.vme_start,
8389 tmp_entry.vme_end,
8390 VM_PROT_NONE,
8391 PMAP_OPTIONS_REMOVE,
8392 NULL);
8393 }
8394 vm_fault_unwire(map, &tmp_entry,
8395 VME_OBJECT(&tmp_entry) == kernel_object,
8396 map->pmap, tmp_entry.vme_start);
8397 }
8398
8399 vm_map_lock(map);
8400
8401 if (last_timestamp + 1 != map->timestamp) {
8402 /*
8403 * Find the entry again. It could have
8404 * been clipped after we unlocked the map.
8405 */
8406 if (!vm_map_lookup_entry(map, s, &first_entry)) {
8407 assert((map != kernel_map) &&
8408 (!entry->is_sub_map));
8409 if (gap_start == FIND_GAP) {
8410 gap_start = s;
8411 }
8412 first_entry = first_entry->vme_next;
8413 s = first_entry->vme_start;
8414 } else {
8415 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8416 }
8417 } else {
8418 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8419 first_entry = entry;
8420 }
8421
8422 last_timestamp = map->timestamp;
8423
8424 entry = first_entry;
8425 while ((entry != vm_map_to_entry(map)) &&
8426 (entry->vme_start < tmp_entry.vme_end)) {
8427 assert(entry->in_transition);
8428 entry->in_transition = FALSE;
8429 if (entry->needs_wakeup) {
8430 entry->needs_wakeup = FALSE;
8431 need_wakeup = TRUE;
8432 }
8433 entry = entry->vme_next;
8434 }
8435 /*
8436 * We have unwired the entry(s). Go back and
8437 * delete them.
8438 */
8439 entry = first_entry;
8440 continue;
8441 }
8442
8443 /* entry is unwired */
8444 assert(entry->wired_count == 0);
8445 assert(entry->user_wired_count == 0);
8446
8447 assert(s == entry->vme_start);
8448
8449 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
8450 /*
8451 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8452 * vm_map_delete(), some map entries might have been
8453 * transferred to a "zap_map", which doesn't have a
8454 * pmap. The original pmap has already been flushed
8455 * in the vm_map_delete() call targeting the original
8456 * map, but when we get to destroying the "zap_map",
8457 * we don't have any pmap to flush, so let's just skip
8458 * all this.
8459 */
8460 } else if (entry->is_sub_map) {
8461 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
8462 "map %p (%d) entry %p submap %p (%d)\n",
8463 map, VM_MAP_PAGE_SHIFT(map), entry,
8464 VME_SUBMAP(entry),
8465 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8466 if (entry->use_pmap) {
8467 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) == VM_MAP_PAGE_SHIFT(map),
8468 "map %p (%d) entry %p submap %p (%d)\n",
8469 map, VM_MAP_PAGE_SHIFT(map), entry,
8470 VME_SUBMAP(entry),
8471 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8472 #ifndef NO_NESTED_PMAP
8473 int pmap_flags;
8474
8475 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
8476 /*
8477 * This is the final cleanup of the
8478 * address space being terminated.
8479 * No new mappings are expected and
8480 * we don't really need to unnest the
8481 * shared region (and lose the "global"
8482 * pmap mappings, if applicable).
8483 *
8484 * Tell the pmap layer that we're
8485 * "clean" wrt nesting.
8486 */
8487 pmap_flags = PMAP_UNNEST_CLEAN;
8488 } else {
8489 /*
8490 * We're unmapping part of the nested
8491 * shared region, so we can't keep the
8492 * nested pmap.
8493 */
8494 pmap_flags = 0;
8495 }
8496 pmap_unnest_options(
8497 map->pmap,
8498 (addr64_t)entry->vme_start,
8499 entry->vme_end - entry->vme_start,
8500 pmap_flags);
8501 #endif /* NO_NESTED_PMAP */
8502 if (map->mapped_in_other_pmaps &&
8503 os_ref_get_count(&map->map_refcnt) != 0) {
8504 /* clean up parent map/maps */
8505 vm_map_submap_pmap_clean(
8506 map, entry->vme_start,
8507 entry->vme_end,
8508 VME_SUBMAP(entry),
8509 VME_OFFSET(entry));
8510 }
8511 } else {
8512 vm_map_submap_pmap_clean(
8513 map, entry->vme_start, entry->vme_end,
8514 VME_SUBMAP(entry),
8515 VME_OFFSET(entry));
8516 }
8517 } else if (VME_OBJECT(entry) != kernel_object &&
8518 VME_OBJECT(entry) != compressor_object) {
8519 object = VME_OBJECT(entry);
8520 if (map->mapped_in_other_pmaps &&
8521 os_ref_get_count(&map->map_refcnt) != 0) {
8522 vm_object_pmap_protect_options(
8523 object, VME_OFFSET(entry),
8524 entry->vme_end - entry->vme_start,
8525 PMAP_NULL,
8526 PAGE_SIZE,
8527 entry->vme_start,
8528 VM_PROT_NONE,
8529 PMAP_OPTIONS_REMOVE);
8530 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
8531 (map->pmap == kernel_pmap)) {
8532 /* Remove translations associated
8533 * with this range unless the entry
8534 * does not have an object, or
8535 * it's the kernel map or a descendant
8536 * since the platform could potentially
8537 * create "backdoor" mappings invisible
8538 * to the VM. It is expected that
8539 * objectless, non-kernel ranges
8540 * do not have such VM invisible
8541 * translations.
8542 */
8543 pmap_remove_options(map->pmap,
8544 (addr64_t)entry->vme_start,
8545 (addr64_t)entry->vme_end,
8546 PMAP_OPTIONS_REMOVE);
8547 }
8548 }
8549
8550 if (entry->iokit_acct) {
8551 /* alternate accounting */
8552 DTRACE_VM4(vm_map_iokit_unmapped_region,
8553 vm_map_t, map,
8554 vm_map_offset_t, entry->vme_start,
8555 vm_map_offset_t, entry->vme_end,
8556 int, VME_ALIAS(entry));
8557 vm_map_iokit_unmapped_region(map,
8558 (entry->vme_end -
8559 entry->vme_start));
8560 entry->iokit_acct = FALSE;
8561 entry->use_pmap = FALSE;
8562 }
8563
8564 /*
8565 * All pmap mappings for this map entry must have been
8566 * cleared by now.
8567 */
8568 #if DEBUG
8569 assert(vm_map_pmap_is_empty(map,
8570 entry->vme_start,
8571 entry->vme_end));
8572 #endif /* DEBUG */
8573
8574 next = entry->vme_next;
8575
8576 if (map->pmap == kernel_pmap &&
8577 os_ref_get_count(&map->map_refcnt) != 0 &&
8578 entry->vme_end < end &&
8579 (next == vm_map_to_entry(map) ||
8580 next->vme_start != entry->vme_end)) {
8581 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8582 "hole after %p at 0x%llx\n",
8583 map,
8584 (uint64_t)start,
8585 (uint64_t)end,
8586 entry,
8587 (uint64_t)entry->vme_end);
8588 }
8589
8590 /*
8591 * If the desired range didn't end with "entry", then there is a gap if
8592 * we wrapped around to the start of the map or if "entry" and "next"
8593 * aren't contiguous.
8594 *
8595 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8596 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8597 */
8598 if (gap_start == FIND_GAP &&
8599 vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8600 (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8601 gap_start = entry->vme_end;
8602 }
8603 s = next->vme_start;
8604 last_timestamp = map->timestamp;
8605
8606 if (entry->permanent) {
8607 /*
8608 * A permanent entry can not be removed, so leave it
8609 * in place but remove all access permissions.
8610 */
8611 entry->protection = VM_PROT_NONE;
8612 entry->max_protection = VM_PROT_NONE;
8613 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
8614 zap_map != VM_MAP_NULL) {
8615 vm_map_size_t entry_size;
8616 /*
8617 * The caller wants to save the affected VM map entries
8618 * into the "zap_map". The caller will take care of
8619 * these entries.
8620 */
8621 /* unlink the entry from "map" ... */
8622 vm_map_store_entry_unlink(map, entry);
8623 /* ... and add it to the end of the "zap_map" */
8624 vm_map_store_entry_link(zap_map,
8625 vm_map_last_entry(zap_map),
8626 entry,
8627 VM_MAP_KERNEL_FLAGS_NONE);
8628 entry_size = entry->vme_end - entry->vme_start;
8629 map->size -= entry_size;
8630 zap_map->size += entry_size;
8631 /* we didn't unlock the map, so no timestamp increase */
8632 last_timestamp--;
8633 } else {
8634 vm_map_entry_delete(map, entry);
8635 /* vm_map_entry_delete unlocks the map */
8636 vm_map_lock(map);
8637 }
8638
8639 entry = next;
8640
8641 if (entry == vm_map_to_entry(map)) {
8642 break;
8643 }
8644 if (last_timestamp + 1 != map->timestamp) {
8645 /*
8646 * We are responsible for deleting everything
8647 * from the given space. If someone has interfered,
8648 * we pick up where we left off. Back fills should
8649 * be all right for anyone, except map_delete, and
8650 * we have to assume that the task has been fully
8651 * disabled before we get here
8652 */
8653 if (!vm_map_lookup_entry(map, s, &entry)) {
8654 entry = entry->vme_next;
8655
8656 /*
8657 * Nothing found for s. If we weren't already done, then there is a gap.
8658 */
8659 if (gap_start == FIND_GAP && s < end) {
8660 gap_start = s;
8661 }
8662 s = entry->vme_start;
8663 } else {
8664 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8665 }
8666 /*
8667 * others can not only allocate behind us, we can
8668 * also see coalesce while we don't have the map lock
8669 */
8670 if (entry == vm_map_to_entry(map)) {
8671 break;
8672 }
8673 }
8674 last_timestamp = map->timestamp;
8675 }
8676
8677 if (map->wait_for_space) {
8678 thread_wakeup((event_t) map);
8679 }
8680 /*
8681 * wake up anybody waiting on entries that we have already deleted.
8682 */
8683 if (need_wakeup) {
8684 vm_map_entry_wakeup(map);
8685 }
8686
8687 if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8688 DTRACE_VM3(kern_vm_deallocate_gap,
8689 vm_map_offset_t, gap_start,
8690 vm_map_offset_t, save_start,
8691 vm_map_offset_t, save_end);
8692 if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
8693 vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8694 }
8695 }
8696
8697 return KERN_SUCCESS;
8698 }
8699
8700
8701 /*
8702 * vm_map_terminate:
8703 *
8704 * Clean out a task's map.
8705 */
8706 kern_return_t
8707 vm_map_terminate(
8708 vm_map_t map)
8709 {
8710 vm_map_lock(map);
8711 map->terminated = TRUE;
8712 vm_map_unlock(map);
8713
8714 return vm_map_remove(map,
8715 map->min_offset,
8716 map->max_offset,
8717 /*
8718 * Final cleanup:
8719 * + no unnesting
8720 * + remove immutable mappings
8721 * + allow gaps in range
8722 */
8723 (VM_MAP_REMOVE_NO_UNNESTING |
8724 VM_MAP_REMOVE_IMMUTABLE |
8725 VM_MAP_REMOVE_GAPS_OK));
8726 }
8727
8728 /*
8729 * vm_map_remove:
8730 *
8731 * Remove the given address range from the target map.
8732 * This is the exported form of vm_map_delete.
8733 */
8734 kern_return_t
8735 vm_map_remove(
8736 vm_map_t map,
8737 vm_map_offset_t start,
8738 vm_map_offset_t end,
8739 boolean_t flags)
8740 {
8741 kern_return_t result;
8742
8743 vm_map_lock(map);
8744 VM_MAP_RANGE_CHECK(map, start, end);
8745 /*
8746 * For the zone maps, the kernel controls the allocation/freeing of memory.
8747 * Any free to the zone maps should be within the bounds of the map and
8748 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8749 * free to the zone maps into a no-op, there is a problem and we should
8750 * panic.
8751 */
8752 if ((start == end) && zone_maps_owned(start, 1)) {
8753 panic("Nothing being freed to a zone map. start = end = %p\n", (void *)start);
8754 }
8755 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8756 vm_map_unlock(map);
8757
8758 return result;
8759 }
8760
8761 /*
8762 * vm_map_remove_locked:
8763 *
8764 * Remove the given address range from the target locked map.
8765 * This is the exported form of vm_map_delete.
8766 */
8767 kern_return_t
8768 vm_map_remove_locked(
8769 vm_map_t map,
8770 vm_map_offset_t start,
8771 vm_map_offset_t end,
8772 boolean_t flags)
8773 {
8774 kern_return_t result;
8775
8776 VM_MAP_RANGE_CHECK(map, start, end);
8777 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8778 return result;
8779 }
8780
8781
8782 /*
8783 * Routine: vm_map_copy_allocate
8784 *
8785 * Description:
8786 * Allocates and initializes a map copy object.
8787 */
8788 static vm_map_copy_t
8789 vm_map_copy_allocate(void)
8790 {
8791 vm_map_copy_t new_copy;
8792
8793 new_copy = zalloc(vm_map_copy_zone);
8794 bzero(new_copy, sizeof(*new_copy));
8795 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8796 vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8797 vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8798 return new_copy;
8799 }
8800
8801 /*
8802 * Routine: vm_map_copy_discard
8803 *
8804 * Description:
8805 * Dispose of a map copy object (returned by
8806 * vm_map_copyin).
8807 */
8808 void
8809 vm_map_copy_discard(
8810 vm_map_copy_t copy)
8811 {
8812 if (copy == VM_MAP_COPY_NULL) {
8813 return;
8814 }
8815
8816 switch (copy->type) {
8817 case VM_MAP_COPY_ENTRY_LIST:
8818 while (vm_map_copy_first_entry(copy) !=
8819 vm_map_copy_to_entry(copy)) {
8820 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
8821
8822 vm_map_copy_entry_unlink(copy, entry);
8823 if (entry->is_sub_map) {
8824 vm_map_deallocate(VME_SUBMAP(entry));
8825 } else {
8826 vm_object_deallocate(VME_OBJECT(entry));
8827 }
8828 vm_map_copy_entry_dispose(copy, entry);
8829 }
8830 break;
8831 case VM_MAP_COPY_OBJECT:
8832 vm_object_deallocate(copy->cpy_object);
8833 break;
8834 case VM_MAP_COPY_KERNEL_BUFFER:
8835
8836 /*
8837 * The vm_map_copy_t and possibly the data buffer were
8838 * allocated by a single call to kheap_alloc(), i.e. the
8839 * vm_map_copy_t was not allocated out of the zone.
8840 */
8841 if (copy->size > msg_ool_size_small || copy->offset) {
8842 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8843 (long long)copy->size, (long long)copy->offset);
8844 }
8845 kheap_free(KHEAP_DATA_BUFFERS, copy->cpy_kdata, copy->size);
8846 }
8847 zfree(vm_map_copy_zone, copy);
8848 }
8849
8850 /*
8851 * Routine: vm_map_copy_copy
8852 *
8853 * Description:
8854 * Move the information in a map copy object to
8855 * a new map copy object, leaving the old one
8856 * empty.
8857 *
8858 * This is used by kernel routines that need
8859 * to look at out-of-line data (in copyin form)
8860 * before deciding whether to return SUCCESS.
8861 * If the routine returns FAILURE, the original
8862 * copy object will be deallocated; therefore,
8863 * these routines must make a copy of the copy
8864 * object and leave the original empty so that
8865 * deallocation will not fail.
8866 */
8867 vm_map_copy_t
8868 vm_map_copy_copy(
8869 vm_map_copy_t copy)
8870 {
8871 vm_map_copy_t new_copy;
8872
8873 if (copy == VM_MAP_COPY_NULL) {
8874 return VM_MAP_COPY_NULL;
8875 }
8876
8877 /*
8878 * Allocate a new copy object, and copy the information
8879 * from the old one into it.
8880 */
8881
8882 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8883 memcpy((void *) new_copy, (void *) copy, sizeof(struct vm_map_copy));
8884 #if __has_feature(ptrauth_calls)
8885 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8886 new_copy->cpy_kdata = copy->cpy_kdata;
8887 }
8888 #endif
8889
8890 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8891 /*
8892 * The links in the entry chain must be
8893 * changed to point to the new copy object.
8894 */
8895 vm_map_copy_first_entry(copy)->vme_prev
8896 = vm_map_copy_to_entry(new_copy);
8897 vm_map_copy_last_entry(copy)->vme_next
8898 = vm_map_copy_to_entry(new_copy);
8899 }
8900
8901 /*
8902 * Change the old copy object into one that contains
8903 * nothing to be deallocated.
8904 */
8905 copy->type = VM_MAP_COPY_OBJECT;
8906 copy->cpy_object = VM_OBJECT_NULL;
8907
8908 /*
8909 * Return the new object.
8910 */
8911 return new_copy;
8912 }
8913
8914 static kern_return_t
8915 vm_map_overwrite_submap_recurse(
8916 vm_map_t dst_map,
8917 vm_map_offset_t dst_addr,
8918 vm_map_size_t dst_size)
8919 {
8920 vm_map_offset_t dst_end;
8921 vm_map_entry_t tmp_entry;
8922 vm_map_entry_t entry;
8923 kern_return_t result;
8924 boolean_t encountered_sub_map = FALSE;
8925
8926
8927
8928 /*
8929 * Verify that the destination is all writeable
8930 * initially. We have to trunc the destination
8931 * address and round the copy size or we'll end up
8932 * splitting entries in strange ways.
8933 */
8934
8935 dst_end = vm_map_round_page(dst_addr + dst_size,
8936 VM_MAP_PAGE_MASK(dst_map));
8937 vm_map_lock(dst_map);
8938
8939 start_pass_1:
8940 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8941 vm_map_unlock(dst_map);
8942 return KERN_INVALID_ADDRESS;
8943 }
8944
8945 vm_map_clip_start(dst_map,
8946 tmp_entry,
8947 vm_map_trunc_page(dst_addr,
8948 VM_MAP_PAGE_MASK(dst_map)));
8949 if (tmp_entry->is_sub_map) {
8950 /* clipping did unnest if needed */
8951 assert(!tmp_entry->use_pmap);
8952 }
8953
8954 for (entry = tmp_entry;;) {
8955 vm_map_entry_t next;
8956
8957 next = entry->vme_next;
8958 while (entry->is_sub_map) {
8959 vm_map_offset_t sub_start;
8960 vm_map_offset_t sub_end;
8961 vm_map_offset_t local_end;
8962
8963 if (entry->in_transition) {
8964 /*
8965 * Say that we are waiting, and wait for entry.
8966 */
8967 entry->needs_wakeup = TRUE;
8968 vm_map_entry_wait(dst_map, THREAD_UNINT);
8969
8970 goto start_pass_1;
8971 }
8972
8973 encountered_sub_map = TRUE;
8974 sub_start = VME_OFFSET(entry);
8975
8976 if (entry->vme_end < dst_end) {
8977 sub_end = entry->vme_end;
8978 } else {
8979 sub_end = dst_end;
8980 }
8981 sub_end -= entry->vme_start;
8982 sub_end += VME_OFFSET(entry);
8983 local_end = entry->vme_end;
8984 vm_map_unlock(dst_map);
8985
8986 result = vm_map_overwrite_submap_recurse(
8987 VME_SUBMAP(entry),
8988 sub_start,
8989 sub_end - sub_start);
8990
8991 if (result != KERN_SUCCESS) {
8992 return result;
8993 }
8994 if (dst_end <= entry->vme_end) {
8995 return KERN_SUCCESS;
8996 }
8997 vm_map_lock(dst_map);
8998 if (!vm_map_lookup_entry(dst_map, local_end,
8999 &tmp_entry)) {
9000 vm_map_unlock(dst_map);
9001 return KERN_INVALID_ADDRESS;
9002 }
9003 entry = tmp_entry;
9004 next = entry->vme_next;
9005 }
9006
9007 if (!(entry->protection & VM_PROT_WRITE)) {
9008 vm_map_unlock(dst_map);
9009 return KERN_PROTECTION_FAILURE;
9010 }
9011
9012 /*
9013 * If the entry is in transition, we must wait
9014 * for it to exit that state. Anything could happen
9015 * when we unlock the map, so start over.
9016 */
9017 if (entry->in_transition) {
9018 /*
9019 * Say that we are waiting, and wait for entry.
9020 */
9021 entry->needs_wakeup = TRUE;
9022 vm_map_entry_wait(dst_map, THREAD_UNINT);
9023
9024 goto start_pass_1;
9025 }
9026
9027 /*
9028 * our range is contained completely within this map entry
9029 */
9030 if (dst_end <= entry->vme_end) {
9031 vm_map_unlock(dst_map);
9032 return KERN_SUCCESS;
9033 }
9034 /*
9035 * check that range specified is contiguous region
9036 */
9037 if ((next == vm_map_to_entry(dst_map)) ||
9038 (next->vme_start != entry->vme_end)) {
9039 vm_map_unlock(dst_map);
9040 return KERN_INVALID_ADDRESS;
9041 }
9042
9043 /*
9044 * Check for permanent objects in the destination.
9045 */
9046 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9047 ((!VME_OBJECT(entry)->internal) ||
9048 (VME_OBJECT(entry)->true_share))) {
9049 if (encountered_sub_map) {
9050 vm_map_unlock(dst_map);
9051 return KERN_FAILURE;
9052 }
9053 }
9054
9055
9056 entry = next;
9057 }/* for */
9058 vm_map_unlock(dst_map);
9059 return KERN_SUCCESS;
9060 }
9061
9062 /*
9063 * Routine: vm_map_copy_overwrite
9064 *
9065 * Description:
9066 * Copy the memory described by the map copy
9067 * object (copy; returned by vm_map_copyin) onto
9068 * the specified destination region (dst_map, dst_addr).
9069 * The destination must be writeable.
9070 *
9071 * Unlike vm_map_copyout, this routine actually
9072 * writes over previously-mapped memory. If the
9073 * previous mapping was to a permanent (user-supplied)
9074 * memory object, it is preserved.
9075 *
9076 * The attributes (protection and inheritance) of the
9077 * destination region are preserved.
9078 *
9079 * If successful, consumes the copy object.
9080 * Otherwise, the caller is responsible for it.
9081 *
9082 * Implementation notes:
9083 * To overwrite aligned temporary virtual memory, it is
9084 * sufficient to remove the previous mapping and insert
9085 * the new copy. This replacement is done either on
9086 * the whole region (if no permanent virtual memory
9087 * objects are embedded in the destination region) or
9088 * in individual map entries.
9089 *
9090 * To overwrite permanent virtual memory , it is necessary
9091 * to copy each page, as the external memory management
9092 * interface currently does not provide any optimizations.
9093 *
9094 * Unaligned memory also has to be copied. It is possible
9095 * to use 'vm_trickery' to copy the aligned data. This is
9096 * not done but not hard to implement.
9097 *
9098 * Once a page of permanent memory has been overwritten,
9099 * it is impossible to interrupt this function; otherwise,
9100 * the call would be neither atomic nor location-independent.
9101 * The kernel-state portion of a user thread must be
9102 * interruptible.
9103 *
9104 * It may be expensive to forward all requests that might
9105 * overwrite permanent memory (vm_write, vm_copy) to
9106 * uninterruptible kernel threads. This routine may be
9107 * called by interruptible threads; however, success is
9108 * not guaranteed -- if the request cannot be performed
9109 * atomically and interruptibly, an error indication is
9110 * returned.
9111 */
9112
9113 static kern_return_t
9114 vm_map_copy_overwrite_nested(
9115 vm_map_t dst_map,
9116 vm_map_address_t dst_addr,
9117 vm_map_copy_t copy,
9118 boolean_t interruptible,
9119 pmap_t pmap,
9120 boolean_t discard_on_success)
9121 {
9122 vm_map_offset_t dst_end;
9123 vm_map_entry_t tmp_entry;
9124 vm_map_entry_t entry;
9125 kern_return_t kr;
9126 boolean_t aligned = TRUE;
9127 boolean_t contains_permanent_objects = FALSE;
9128 boolean_t encountered_sub_map = FALSE;
9129 vm_map_offset_t base_addr;
9130 vm_map_size_t copy_size;
9131 vm_map_size_t total_size;
9132 int copy_page_shift;
9133
9134
9135 /*
9136 * Check for null copy object.
9137 */
9138
9139 if (copy == VM_MAP_COPY_NULL) {
9140 return KERN_SUCCESS;
9141 }
9142
9143 /*
9144 * Assert that the vm_map_copy is coming from the right
9145 * zone and hasn't been forged
9146 */
9147 vm_map_copy_require(copy);
9148
9149 /*
9150 * Check for special kernel buffer allocated
9151 * by new_ipc_kmsg_copyin.
9152 */
9153
9154 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
9155 return vm_map_copyout_kernel_buffer(
9156 dst_map, &dst_addr,
9157 copy, copy->size, TRUE, discard_on_success);
9158 }
9159
9160 /*
9161 * Only works for entry lists at the moment. Will
9162 * support page lists later.
9163 */
9164
9165 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9166
9167 if (copy->size == 0) {
9168 if (discard_on_success) {
9169 vm_map_copy_discard(copy);
9170 }
9171 return KERN_SUCCESS;
9172 }
9173
9174 copy_page_shift = copy->cpy_hdr.page_shift;
9175
9176 /*
9177 * Verify that the destination is all writeable
9178 * initially. We have to trunc the destination
9179 * address and round the copy size or we'll end up
9180 * splitting entries in strange ways.
9181 */
9182
9183 if (!VM_MAP_PAGE_ALIGNED(copy->size,
9184 VM_MAP_PAGE_MASK(dst_map)) ||
9185 !VM_MAP_PAGE_ALIGNED(copy->offset,
9186 VM_MAP_PAGE_MASK(dst_map)) ||
9187 !VM_MAP_PAGE_ALIGNED(dst_addr,
9188 VM_MAP_PAGE_MASK(dst_map)) ||
9189 copy_page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
9190 aligned = FALSE;
9191 dst_end = vm_map_round_page(dst_addr + copy->size,
9192 VM_MAP_PAGE_MASK(dst_map));
9193 } else {
9194 dst_end = dst_addr + copy->size;
9195 }
9196
9197 vm_map_lock(dst_map);
9198
9199 /* LP64todo - remove this check when vm_map_commpage64()
9200 * no longer has to stuff in a map_entry for the commpage
9201 * above the map's max_offset.
9202 */
9203 if (dst_addr >= dst_map->max_offset) {
9204 vm_map_unlock(dst_map);
9205 return KERN_INVALID_ADDRESS;
9206 }
9207
9208 start_pass_1:
9209 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
9210 vm_map_unlock(dst_map);
9211 return KERN_INVALID_ADDRESS;
9212 }
9213 vm_map_clip_start(dst_map,
9214 tmp_entry,
9215 vm_map_trunc_page(dst_addr,
9216 VM_MAP_PAGE_MASK(dst_map)));
9217 for (entry = tmp_entry;;) {
9218 vm_map_entry_t next = entry->vme_next;
9219
9220 while (entry->is_sub_map) {
9221 vm_map_offset_t sub_start;
9222 vm_map_offset_t sub_end;
9223 vm_map_offset_t local_end;
9224
9225 if (entry->in_transition) {
9226 /*
9227 * Say that we are waiting, and wait for entry.
9228 */
9229 entry->needs_wakeup = TRUE;
9230 vm_map_entry_wait(dst_map, THREAD_UNINT);
9231
9232 goto start_pass_1;
9233 }
9234
9235 local_end = entry->vme_end;
9236 if (!(entry->needs_copy)) {
9237 /* if needs_copy we are a COW submap */
9238 /* in such a case we just replace so */
9239 /* there is no need for the follow- */
9240 /* ing check. */
9241 encountered_sub_map = TRUE;
9242 sub_start = VME_OFFSET(entry);
9243
9244 if (entry->vme_end < dst_end) {
9245 sub_end = entry->vme_end;
9246 } else {
9247 sub_end = dst_end;
9248 }
9249 sub_end -= entry->vme_start;
9250 sub_end += VME_OFFSET(entry);
9251 vm_map_unlock(dst_map);
9252
9253 kr = vm_map_overwrite_submap_recurse(
9254 VME_SUBMAP(entry),
9255 sub_start,
9256 sub_end - sub_start);
9257 if (kr != KERN_SUCCESS) {
9258 return kr;
9259 }
9260 vm_map_lock(dst_map);
9261 }
9262
9263 if (dst_end <= entry->vme_end) {
9264 goto start_overwrite;
9265 }
9266 if (!vm_map_lookup_entry(dst_map, local_end,
9267 &entry)) {
9268 vm_map_unlock(dst_map);
9269 return KERN_INVALID_ADDRESS;
9270 }
9271 next = entry->vme_next;
9272 }
9273
9274 if (!(entry->protection & VM_PROT_WRITE)) {
9275 vm_map_unlock(dst_map);
9276 return KERN_PROTECTION_FAILURE;
9277 }
9278
9279 /*
9280 * If the entry is in transition, we must wait
9281 * for it to exit that state. Anything could happen
9282 * when we unlock the map, so start over.
9283 */
9284 if (entry->in_transition) {
9285 /*
9286 * Say that we are waiting, and wait for entry.
9287 */
9288 entry->needs_wakeup = TRUE;
9289 vm_map_entry_wait(dst_map, THREAD_UNINT);
9290
9291 goto start_pass_1;
9292 }
9293
9294 /*
9295 * our range is contained completely within this map entry
9296 */
9297 if (dst_end <= entry->vme_end) {
9298 break;
9299 }
9300 /*
9301 * check that range specified is contiguous region
9302 */
9303 if ((next == vm_map_to_entry(dst_map)) ||
9304 (next->vme_start != entry->vme_end)) {
9305 vm_map_unlock(dst_map);
9306 return KERN_INVALID_ADDRESS;
9307 }
9308
9309
9310 /*
9311 * Check for permanent objects in the destination.
9312 */
9313 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9314 ((!VME_OBJECT(entry)->internal) ||
9315 (VME_OBJECT(entry)->true_share))) {
9316 contains_permanent_objects = TRUE;
9317 }
9318
9319 entry = next;
9320 }/* for */
9321
9322 start_overwrite:
9323 /*
9324 * If there are permanent objects in the destination, then
9325 * the copy cannot be interrupted.
9326 */
9327
9328 if (interruptible && contains_permanent_objects) {
9329 vm_map_unlock(dst_map);
9330 return KERN_FAILURE; /* XXX */
9331 }
9332
9333 /*
9334 *
9335 * Make a second pass, overwriting the data
9336 * At the beginning of each loop iteration,
9337 * the next entry to be overwritten is "tmp_entry"
9338 * (initially, the value returned from the lookup above),
9339 * and the starting address expected in that entry
9340 * is "start".
9341 */
9342
9343 total_size = copy->size;
9344 if (encountered_sub_map) {
9345 copy_size = 0;
9346 /* re-calculate tmp_entry since we've had the map */
9347 /* unlocked */
9348 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
9349 vm_map_unlock(dst_map);
9350 return KERN_INVALID_ADDRESS;
9351 }
9352 } else {
9353 copy_size = copy->size;
9354 }
9355
9356 base_addr = dst_addr;
9357 while (TRUE) {
9358 /* deconstruct the copy object and do in parts */
9359 /* only in sub_map, interruptable case */
9360 vm_map_entry_t copy_entry;
9361 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
9362 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
9363 int nentries;
9364 int remaining_entries = 0;
9365 vm_map_offset_t new_offset = 0;
9366
9367 for (entry = tmp_entry; copy_size == 0;) {
9368 vm_map_entry_t next;
9369
9370 next = entry->vme_next;
9371
9372 /* tmp_entry and base address are moved along */
9373 /* each time we encounter a sub-map. Otherwise */
9374 /* entry can outpase tmp_entry, and the copy_size */
9375 /* may reflect the distance between them */
9376 /* if the current entry is found to be in transition */
9377 /* we will start over at the beginning or the last */
9378 /* encounter of a submap as dictated by base_addr */
9379 /* we will zero copy_size accordingly. */
9380 if (entry->in_transition) {
9381 /*
9382 * Say that we are waiting, and wait for entry.
9383 */
9384 entry->needs_wakeup = TRUE;
9385 vm_map_entry_wait(dst_map, THREAD_UNINT);
9386
9387 if (!vm_map_lookup_entry(dst_map, base_addr,
9388 &tmp_entry)) {
9389 vm_map_unlock(dst_map);
9390 return KERN_INVALID_ADDRESS;
9391 }
9392 copy_size = 0;
9393 entry = tmp_entry;
9394 continue;
9395 }
9396 if (entry->is_sub_map) {
9397 vm_map_offset_t sub_start;
9398 vm_map_offset_t sub_end;
9399 vm_map_offset_t local_end;
9400
9401 if (entry->needs_copy) {
9402 /* if this is a COW submap */
9403 /* just back the range with a */
9404 /* anonymous entry */
9405 if (entry->vme_end < dst_end) {
9406 sub_end = entry->vme_end;
9407 } else {
9408 sub_end = dst_end;
9409 }
9410 if (entry->vme_start < base_addr) {
9411 sub_start = base_addr;
9412 } else {
9413 sub_start = entry->vme_start;
9414 }
9415 vm_map_clip_end(
9416 dst_map, entry, sub_end);
9417 vm_map_clip_start(
9418 dst_map, entry, sub_start);
9419 assert(!entry->use_pmap);
9420 assert(!entry->iokit_acct);
9421 entry->use_pmap = TRUE;
9422 entry->is_sub_map = FALSE;
9423 vm_map_deallocate(
9424 VME_SUBMAP(entry));
9425 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
9426 VME_OFFSET_SET(entry, 0);
9427 entry->is_shared = FALSE;
9428 entry->needs_copy = FALSE;
9429 entry->protection = VM_PROT_DEFAULT;
9430 entry->max_protection = VM_PROT_ALL;
9431 entry->wired_count = 0;
9432 entry->user_wired_count = 0;
9433 if (entry->inheritance
9434 == VM_INHERIT_SHARE) {
9435 entry->inheritance = VM_INHERIT_COPY;
9436 }
9437 continue;
9438 }
9439 /* first take care of any non-sub_map */
9440 /* entries to send */
9441 if (base_addr < entry->vme_start) {
9442 /* stuff to send */
9443 copy_size =
9444 entry->vme_start - base_addr;
9445 break;
9446 }
9447 sub_start = VME_OFFSET(entry);
9448
9449 if (entry->vme_end < dst_end) {
9450 sub_end = entry->vme_end;
9451 } else {
9452 sub_end = dst_end;
9453 }
9454 sub_end -= entry->vme_start;
9455 sub_end += VME_OFFSET(entry);
9456 local_end = entry->vme_end;
9457 vm_map_unlock(dst_map);
9458 copy_size = sub_end - sub_start;
9459
9460 /* adjust the copy object */
9461 if (total_size > copy_size) {
9462 vm_map_size_t local_size = 0;
9463 vm_map_size_t entry_size;
9464
9465 nentries = 1;
9466 new_offset = copy->offset;
9467 copy_entry = vm_map_copy_first_entry(copy);
9468 while (copy_entry !=
9469 vm_map_copy_to_entry(copy)) {
9470 entry_size = copy_entry->vme_end -
9471 copy_entry->vme_start;
9472 if ((local_size < copy_size) &&
9473 ((local_size + entry_size)
9474 >= copy_size)) {
9475 vm_map_copy_clip_end(copy,
9476 copy_entry,
9477 copy_entry->vme_start +
9478 (copy_size - local_size));
9479 entry_size = copy_entry->vme_end -
9480 copy_entry->vme_start;
9481 local_size += entry_size;
9482 new_offset += entry_size;
9483 }
9484 if (local_size >= copy_size) {
9485 next_copy = copy_entry->vme_next;
9486 copy_entry->vme_next =
9487 vm_map_copy_to_entry(copy);
9488 previous_prev =
9489 copy->cpy_hdr.links.prev;
9490 copy->cpy_hdr.links.prev = copy_entry;
9491 copy->size = copy_size;
9492 remaining_entries =
9493 copy->cpy_hdr.nentries;
9494 remaining_entries -= nentries;
9495 copy->cpy_hdr.nentries = nentries;
9496 break;
9497 } else {
9498 local_size += entry_size;
9499 new_offset += entry_size;
9500 nentries++;
9501 }
9502 copy_entry = copy_entry->vme_next;
9503 }
9504 }
9505
9506 if ((entry->use_pmap) && (pmap == NULL)) {
9507 kr = vm_map_copy_overwrite_nested(
9508 VME_SUBMAP(entry),
9509 sub_start,
9510 copy,
9511 interruptible,
9512 VME_SUBMAP(entry)->pmap,
9513 TRUE);
9514 } else if (pmap != NULL) {
9515 kr = vm_map_copy_overwrite_nested(
9516 VME_SUBMAP(entry),
9517 sub_start,
9518 copy,
9519 interruptible, pmap,
9520 TRUE);
9521 } else {
9522 kr = vm_map_copy_overwrite_nested(
9523 VME_SUBMAP(entry),
9524 sub_start,
9525 copy,
9526 interruptible,
9527 dst_map->pmap,
9528 TRUE);
9529 }
9530 if (kr != KERN_SUCCESS) {
9531 if (next_copy != NULL) {
9532 copy->cpy_hdr.nentries +=
9533 remaining_entries;
9534 copy->cpy_hdr.links.prev->vme_next =
9535 next_copy;
9536 copy->cpy_hdr.links.prev
9537 = previous_prev;
9538 copy->size = total_size;
9539 }
9540 return kr;
9541 }
9542 if (dst_end <= local_end) {
9543 return KERN_SUCCESS;
9544 }
9545 /* otherwise copy no longer exists, it was */
9546 /* destroyed after successful copy_overwrite */
9547 copy = vm_map_copy_allocate();
9548 copy->type = VM_MAP_COPY_ENTRY_LIST;
9549 copy->offset = new_offset;
9550 copy->cpy_hdr.page_shift = copy_page_shift;
9551
9552 /*
9553 * XXX FBDP
9554 * this does not seem to deal with
9555 * the VM map store (R&B tree)
9556 */
9557
9558 total_size -= copy_size;
9559 copy_size = 0;
9560 /* put back remainder of copy in container */
9561 if (next_copy != NULL) {
9562 copy->cpy_hdr.nentries = remaining_entries;
9563 copy->cpy_hdr.links.next = next_copy;
9564 copy->cpy_hdr.links.prev = previous_prev;
9565 copy->size = total_size;
9566 next_copy->vme_prev =
9567 vm_map_copy_to_entry(copy);
9568 next_copy = NULL;
9569 }
9570 base_addr = local_end;
9571 vm_map_lock(dst_map);
9572 if (!vm_map_lookup_entry(dst_map,
9573 local_end, &tmp_entry)) {
9574 vm_map_unlock(dst_map);
9575 return KERN_INVALID_ADDRESS;
9576 }
9577 entry = tmp_entry;
9578 continue;
9579 }
9580 if (dst_end <= entry->vme_end) {
9581 copy_size = dst_end - base_addr;
9582 break;
9583 }
9584
9585 if ((next == vm_map_to_entry(dst_map)) ||
9586 (next->vme_start != entry->vme_end)) {
9587 vm_map_unlock(dst_map);
9588 return KERN_INVALID_ADDRESS;
9589 }
9590
9591 entry = next;
9592 }/* for */
9593
9594 next_copy = NULL;
9595 nentries = 1;
9596
9597 /* adjust the copy object */
9598 if (total_size > copy_size) {
9599 vm_map_size_t local_size = 0;
9600 vm_map_size_t entry_size;
9601
9602 new_offset = copy->offset;
9603 copy_entry = vm_map_copy_first_entry(copy);
9604 while (copy_entry != vm_map_copy_to_entry(copy)) {
9605 entry_size = copy_entry->vme_end -
9606 copy_entry->vme_start;
9607 if ((local_size < copy_size) &&
9608 ((local_size + entry_size)
9609 >= copy_size)) {
9610 vm_map_copy_clip_end(copy, copy_entry,
9611 copy_entry->vme_start +
9612 (copy_size - local_size));
9613 entry_size = copy_entry->vme_end -
9614 copy_entry->vme_start;
9615 local_size += entry_size;
9616 new_offset += entry_size;
9617 }
9618 if (local_size >= copy_size) {
9619 next_copy = copy_entry->vme_next;
9620 copy_entry->vme_next =
9621 vm_map_copy_to_entry(copy);
9622 previous_prev =
9623 copy->cpy_hdr.links.prev;
9624 copy->cpy_hdr.links.prev = copy_entry;
9625 copy->size = copy_size;
9626 remaining_entries =
9627 copy->cpy_hdr.nentries;
9628 remaining_entries -= nentries;
9629 copy->cpy_hdr.nentries = nentries;
9630 break;
9631 } else {
9632 local_size += entry_size;
9633 new_offset += entry_size;
9634 nentries++;
9635 }
9636 copy_entry = copy_entry->vme_next;
9637 }
9638 }
9639
9640 if (aligned) {
9641 pmap_t local_pmap;
9642
9643 if (pmap) {
9644 local_pmap = pmap;
9645 } else {
9646 local_pmap = dst_map->pmap;
9647 }
9648
9649 if ((kr = vm_map_copy_overwrite_aligned(
9650 dst_map, tmp_entry, copy,
9651 base_addr, local_pmap)) != KERN_SUCCESS) {
9652 if (next_copy != NULL) {
9653 copy->cpy_hdr.nentries +=
9654 remaining_entries;
9655 copy->cpy_hdr.links.prev->vme_next =
9656 next_copy;
9657 copy->cpy_hdr.links.prev =
9658 previous_prev;
9659 copy->size += copy_size;
9660 }
9661 return kr;
9662 }
9663 vm_map_unlock(dst_map);
9664 } else {
9665 /*
9666 * Performance gain:
9667 *
9668 * if the copy and dst address are misaligned but the same
9669 * offset within the page we can copy_not_aligned the
9670 * misaligned parts and copy aligned the rest. If they are
9671 * aligned but len is unaligned we simply need to copy
9672 * the end bit unaligned. We'll need to split the misaligned
9673 * bits of the region in this case !
9674 */
9675 /* ALWAYS UNLOCKS THE dst_map MAP */
9676 kr = vm_map_copy_overwrite_unaligned(
9677 dst_map,
9678 tmp_entry,
9679 copy,
9680 base_addr,
9681 discard_on_success);
9682 if (kr != KERN_SUCCESS) {
9683 if (next_copy != NULL) {
9684 copy->cpy_hdr.nentries +=
9685 remaining_entries;
9686 copy->cpy_hdr.links.prev->vme_next =
9687 next_copy;
9688 copy->cpy_hdr.links.prev =
9689 previous_prev;
9690 copy->size += copy_size;
9691 }
9692 return kr;
9693 }
9694 }
9695 total_size -= copy_size;
9696 if (total_size == 0) {
9697 break;
9698 }
9699 base_addr += copy_size;
9700 copy_size = 0;
9701 copy->offset = new_offset;
9702 if (next_copy != NULL) {
9703 copy->cpy_hdr.nentries = remaining_entries;
9704 copy->cpy_hdr.links.next = next_copy;
9705 copy->cpy_hdr.links.prev = previous_prev;
9706 next_copy->vme_prev = vm_map_copy_to_entry(copy);
9707 copy->size = total_size;
9708 }
9709 vm_map_lock(dst_map);
9710 while (TRUE) {
9711 if (!vm_map_lookup_entry(dst_map,
9712 base_addr, &tmp_entry)) {
9713 vm_map_unlock(dst_map);
9714 return KERN_INVALID_ADDRESS;
9715 }
9716 if (tmp_entry->in_transition) {
9717 entry->needs_wakeup = TRUE;
9718 vm_map_entry_wait(dst_map, THREAD_UNINT);
9719 } else {
9720 break;
9721 }
9722 }
9723 vm_map_clip_start(dst_map,
9724 tmp_entry,
9725 vm_map_trunc_page(base_addr,
9726 VM_MAP_PAGE_MASK(dst_map)));
9727
9728 entry = tmp_entry;
9729 } /* while */
9730
9731 /*
9732 * Throw away the vm_map_copy object
9733 */
9734 if (discard_on_success) {
9735 vm_map_copy_discard(copy);
9736 }
9737
9738 return KERN_SUCCESS;
9739 }/* vm_map_copy_overwrite */
9740
9741 kern_return_t
9742 vm_map_copy_overwrite(
9743 vm_map_t dst_map,
9744 vm_map_offset_t dst_addr,
9745 vm_map_copy_t copy,
9746 vm_map_size_t copy_size,
9747 boolean_t interruptible)
9748 {
9749 vm_map_size_t head_size, tail_size;
9750 vm_map_copy_t head_copy, tail_copy;
9751 vm_map_offset_t head_addr, tail_addr;
9752 vm_map_entry_t entry;
9753 kern_return_t kr;
9754 vm_map_offset_t effective_page_mask, effective_page_size;
9755 int copy_page_shift;
9756
9757 head_size = 0;
9758 tail_size = 0;
9759 head_copy = NULL;
9760 tail_copy = NULL;
9761 head_addr = 0;
9762 tail_addr = 0;
9763
9764 if (interruptible ||
9765 copy == VM_MAP_COPY_NULL ||
9766 copy->type != VM_MAP_COPY_ENTRY_LIST) {
9767 /*
9768 * We can't split the "copy" map if we're interruptible
9769 * or if we don't have a "copy" map...
9770 */
9771 blunt_copy:
9772 return vm_map_copy_overwrite_nested(dst_map,
9773 dst_addr,
9774 copy,
9775 interruptible,
9776 (pmap_t) NULL,
9777 TRUE);
9778 }
9779
9780 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy);
9781 if (copy_page_shift < PAGE_SHIFT ||
9782 VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9783 goto blunt_copy;
9784 }
9785
9786 if (VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9787 effective_page_mask = VM_MAP_PAGE_MASK(dst_map);
9788 } else {
9789 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9790 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9791 effective_page_mask);
9792 }
9793 effective_page_size = effective_page_mask + 1;
9794
9795 if (copy_size < VM_MAP_COPY_OVERWRITE_OPTIMIZATION_THRESHOLD_PAGES * effective_page_size) {
9796 /*
9797 * Too small to bother with optimizing...
9798 */
9799 goto blunt_copy;
9800 }
9801
9802 if ((dst_addr & effective_page_mask) !=
9803 (copy->offset & effective_page_mask)) {
9804 /*
9805 * Incompatible mis-alignment of source and destination...
9806 */
9807 goto blunt_copy;
9808 }
9809
9810 /*
9811 * Proper alignment or identical mis-alignment at the beginning.
9812 * Let's try and do a small unaligned copy first (if needed)
9813 * and then an aligned copy for the rest.
9814 */
9815 if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9816 head_addr = dst_addr;
9817 head_size = (effective_page_size -
9818 (copy->offset & effective_page_mask));
9819 head_size = MIN(head_size, copy_size);
9820 }
9821 if (!vm_map_page_aligned(copy->offset + copy_size,
9822 effective_page_mask)) {
9823 /*
9824 * Mis-alignment at the end.
9825 * Do an aligned copy up to the last page and
9826 * then an unaligned copy for the remaining bytes.
9827 */
9828 tail_size = ((copy->offset + copy_size) &
9829 effective_page_mask);
9830 tail_size = MIN(tail_size, copy_size);
9831 tail_addr = dst_addr + copy_size - tail_size;
9832 assert(tail_addr >= head_addr + head_size);
9833 }
9834 assert(head_size + tail_size <= copy_size);
9835
9836 if (head_size + tail_size == copy_size) {
9837 /*
9838 * It's all unaligned, no optimization possible...
9839 */
9840 goto blunt_copy;
9841 }
9842
9843 /*
9844 * Can't optimize if there are any submaps in the
9845 * destination due to the way we free the "copy" map
9846 * progressively in vm_map_copy_overwrite_nested()
9847 * in that case.
9848 */
9849 vm_map_lock_read(dst_map);
9850 if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
9851 vm_map_unlock_read(dst_map);
9852 goto blunt_copy;
9853 }
9854 for (;
9855 (entry != vm_map_copy_to_entry(copy) &&
9856 entry->vme_start < dst_addr + copy_size);
9857 entry = entry->vme_next) {
9858 if (entry->is_sub_map) {
9859 vm_map_unlock_read(dst_map);
9860 goto blunt_copy;
9861 }
9862 }
9863 vm_map_unlock_read(dst_map);
9864
9865 if (head_size) {
9866 /*
9867 * Unaligned copy of the first "head_size" bytes, to reach
9868 * a page boundary.
9869 */
9870
9871 /*
9872 * Extract "head_copy" out of "copy".
9873 */
9874 head_copy = vm_map_copy_allocate();
9875 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
9876 head_copy->cpy_hdr.entries_pageable =
9877 copy->cpy_hdr.entries_pageable;
9878 vm_map_store_init(&head_copy->cpy_hdr);
9879 head_copy->cpy_hdr.page_shift = copy_page_shift;
9880
9881 entry = vm_map_copy_first_entry(copy);
9882 if (entry->vme_end < copy->offset + head_size) {
9883 head_size = entry->vme_end - copy->offset;
9884 }
9885
9886 head_copy->offset = copy->offset;
9887 head_copy->size = head_size;
9888 copy->offset += head_size;
9889 copy->size -= head_size;
9890 copy_size -= head_size;
9891 assert(copy_size > 0);
9892
9893 vm_map_copy_clip_end(copy, entry, copy->offset);
9894 vm_map_copy_entry_unlink(copy, entry);
9895 vm_map_copy_entry_link(head_copy,
9896 vm_map_copy_to_entry(head_copy),
9897 entry);
9898
9899 /*
9900 * Do the unaligned copy.
9901 */
9902 kr = vm_map_copy_overwrite_nested(dst_map,
9903 head_addr,
9904 head_copy,
9905 interruptible,
9906 (pmap_t) NULL,
9907 FALSE);
9908 if (kr != KERN_SUCCESS) {
9909 goto done;
9910 }
9911 }
9912
9913 if (tail_size) {
9914 /*
9915 * Extract "tail_copy" out of "copy".
9916 */
9917 tail_copy = vm_map_copy_allocate();
9918 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
9919 tail_copy->cpy_hdr.entries_pageable =
9920 copy->cpy_hdr.entries_pageable;
9921 vm_map_store_init(&tail_copy->cpy_hdr);
9922 tail_copy->cpy_hdr.page_shift = copy_page_shift;
9923
9924 tail_copy->offset = copy->offset + copy_size - tail_size;
9925 tail_copy->size = tail_size;
9926
9927 copy->size -= tail_size;
9928 copy_size -= tail_size;
9929 assert(copy_size > 0);
9930
9931 entry = vm_map_copy_last_entry(copy);
9932 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9933 entry = vm_map_copy_last_entry(copy);
9934 vm_map_copy_entry_unlink(copy, entry);
9935 vm_map_copy_entry_link(tail_copy,
9936 vm_map_copy_last_entry(tail_copy),
9937 entry);
9938 }
9939
9940 /*
9941 * If we are here from ipc_kmsg_copyout_ool_descriptor(),
9942 * we want to avoid TOCTOU issues w.r.t copy->size but
9943 * we don't need to change vm_map_copy_overwrite_nested()
9944 * and all other vm_map_copy_overwrite variants.
9945 *
9946 * So we assign the original copy_size that was passed into
9947 * this routine back to copy.
9948 *
9949 * This use of local 'copy_size' passed into this routine is
9950 * to try and protect against TOCTOU attacks where the kernel
9951 * has been exploited. We don't expect this to be an issue
9952 * during normal system operation.
9953 */
9954 assertf(copy->size == copy_size,
9955 "Mismatch of copy sizes. Expected 0x%llx, Got 0x%llx\n", (uint64_t) copy_size, (uint64_t) copy->size);
9956 copy->size = copy_size;
9957
9958 /*
9959 * Copy most (or possibly all) of the data.
9960 */
9961 kr = vm_map_copy_overwrite_nested(dst_map,
9962 dst_addr + head_size,
9963 copy,
9964 interruptible,
9965 (pmap_t) NULL,
9966 FALSE);
9967 if (kr != KERN_SUCCESS) {
9968 goto done;
9969 }
9970
9971 if (tail_size) {
9972 kr = vm_map_copy_overwrite_nested(dst_map,
9973 tail_addr,
9974 tail_copy,
9975 interruptible,
9976 (pmap_t) NULL,
9977 FALSE);
9978 }
9979
9980 done:
9981 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9982 if (kr == KERN_SUCCESS) {
9983 /*
9984 * Discard all the copy maps.
9985 */
9986 if (head_copy) {
9987 vm_map_copy_discard(head_copy);
9988 head_copy = NULL;
9989 }
9990 vm_map_copy_discard(copy);
9991 if (tail_copy) {
9992 vm_map_copy_discard(tail_copy);
9993 tail_copy = NULL;
9994 }
9995 } else {
9996 /*
9997 * Re-assemble the original copy map.
9998 */
9999 if (head_copy) {
10000 entry = vm_map_copy_first_entry(head_copy);
10001 vm_map_copy_entry_unlink(head_copy, entry);
10002 vm_map_copy_entry_link(copy,
10003 vm_map_copy_to_entry(copy),
10004 entry);
10005 copy->offset -= head_size;
10006 copy->size += head_size;
10007 vm_map_copy_discard(head_copy);
10008 head_copy = NULL;
10009 }
10010 if (tail_copy) {
10011 entry = vm_map_copy_last_entry(tail_copy);
10012 vm_map_copy_entry_unlink(tail_copy, entry);
10013 vm_map_copy_entry_link(copy,
10014 vm_map_copy_last_entry(copy),
10015 entry);
10016 copy->size += tail_size;
10017 vm_map_copy_discard(tail_copy);
10018 tail_copy = NULL;
10019 }
10020 }
10021 return kr;
10022 }
10023
10024
10025 /*
10026 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
10027 *
10028 * Decription:
10029 * Physically copy unaligned data
10030 *
10031 * Implementation:
10032 * Unaligned parts of pages have to be physically copied. We use
10033 * a modified form of vm_fault_copy (which understands none-aligned
10034 * page offsets and sizes) to do the copy. We attempt to copy as
10035 * much memory in one go as possibly, however vm_fault_copy copies
10036 * within 1 memory object so we have to find the smaller of "amount left"
10037 * "source object data size" and "target object data size". With
10038 * unaligned data we don't need to split regions, therefore the source
10039 * (copy) object should be one map entry, the target range may be split
10040 * over multiple map entries however. In any event we are pessimistic
10041 * about these assumptions.
10042 *
10043 * Assumptions:
10044 * dst_map is locked on entry and is return locked on success,
10045 * unlocked on error.
10046 */
10047
10048 static kern_return_t
10049 vm_map_copy_overwrite_unaligned(
10050 vm_map_t dst_map,
10051 vm_map_entry_t entry,
10052 vm_map_copy_t copy,
10053 vm_map_offset_t start,
10054 boolean_t discard_on_success)
10055 {
10056 vm_map_entry_t copy_entry;
10057 vm_map_entry_t copy_entry_next;
10058 vm_map_version_t version;
10059 vm_object_t dst_object;
10060 vm_object_offset_t dst_offset;
10061 vm_object_offset_t src_offset;
10062 vm_object_offset_t entry_offset;
10063 vm_map_offset_t entry_end;
10064 vm_map_size_t src_size,
10065 dst_size,
10066 copy_size,
10067 amount_left;
10068 kern_return_t kr = KERN_SUCCESS;
10069
10070
10071 copy_entry = vm_map_copy_first_entry(copy);
10072
10073 vm_map_lock_write_to_read(dst_map);
10074
10075 src_offset = copy->offset - trunc_page_mask_64(copy->offset, VM_MAP_COPY_PAGE_MASK(copy));
10076 amount_left = copy->size;
10077 /*
10078 * unaligned so we never clipped this entry, we need the offset into
10079 * the vm_object not just the data.
10080 */
10081 while (amount_left > 0) {
10082 if (entry == vm_map_to_entry(dst_map)) {
10083 vm_map_unlock_read(dst_map);
10084 return KERN_INVALID_ADDRESS;
10085 }
10086
10087 /* "start" must be within the current map entry */
10088 assert((start >= entry->vme_start) && (start < entry->vme_end));
10089
10090 dst_offset = start - entry->vme_start;
10091
10092 dst_size = entry->vme_end - start;
10093
10094 src_size = copy_entry->vme_end -
10095 (copy_entry->vme_start + src_offset);
10096
10097 if (dst_size < src_size) {
10098 /*
10099 * we can only copy dst_size bytes before
10100 * we have to get the next destination entry
10101 */
10102 copy_size = dst_size;
10103 } else {
10104 /*
10105 * we can only copy src_size bytes before
10106 * we have to get the next source copy entry
10107 */
10108 copy_size = src_size;
10109 }
10110
10111 if (copy_size > amount_left) {
10112 copy_size = amount_left;
10113 }
10114 /*
10115 * Entry needs copy, create a shadow shadow object for
10116 * Copy on write region.
10117 */
10118 if (entry->needs_copy &&
10119 ((entry->protection & VM_PROT_WRITE) != 0)) {
10120 if (vm_map_lock_read_to_write(dst_map)) {
10121 vm_map_lock_read(dst_map);
10122 goto RetryLookup;
10123 }
10124 VME_OBJECT_SHADOW(entry,
10125 (vm_map_size_t)(entry->vme_end
10126 - entry->vme_start));
10127 entry->needs_copy = FALSE;
10128 vm_map_lock_write_to_read(dst_map);
10129 }
10130 dst_object = VME_OBJECT(entry);
10131 /*
10132 * unlike with the virtual (aligned) copy we're going
10133 * to fault on it therefore we need a target object.
10134 */
10135 if (dst_object == VM_OBJECT_NULL) {
10136 if (vm_map_lock_read_to_write(dst_map)) {
10137 vm_map_lock_read(dst_map);
10138 goto RetryLookup;
10139 }
10140 dst_object = vm_object_allocate((vm_map_size_t)
10141 entry->vme_end - entry->vme_start);
10142 VME_OBJECT_SET(entry, dst_object);
10143 VME_OFFSET_SET(entry, 0);
10144 assert(entry->use_pmap);
10145 vm_map_lock_write_to_read(dst_map);
10146 }
10147 /*
10148 * Take an object reference and unlock map. The "entry" may
10149 * disappear or change when the map is unlocked.
10150 */
10151 vm_object_reference(dst_object);
10152 version.main_timestamp = dst_map->timestamp;
10153 entry_offset = VME_OFFSET(entry);
10154 entry_end = entry->vme_end;
10155 vm_map_unlock_read(dst_map);
10156 /*
10157 * Copy as much as possible in one pass
10158 */
10159 kr = vm_fault_copy(
10160 VME_OBJECT(copy_entry),
10161 VME_OFFSET(copy_entry) + src_offset,
10162 &copy_size,
10163 dst_object,
10164 entry_offset + dst_offset,
10165 dst_map,
10166 &version,
10167 THREAD_UNINT );
10168
10169 start += copy_size;
10170 src_offset += copy_size;
10171 amount_left -= copy_size;
10172 /*
10173 * Release the object reference
10174 */
10175 vm_object_deallocate(dst_object);
10176 /*
10177 * If a hard error occurred, return it now
10178 */
10179 if (kr != KERN_SUCCESS) {
10180 return kr;
10181 }
10182
10183 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
10184 || amount_left == 0) {
10185 /*
10186 * all done with this copy entry, dispose.
10187 */
10188 copy_entry_next = copy_entry->vme_next;
10189
10190 if (discard_on_success) {
10191 vm_map_copy_entry_unlink(copy, copy_entry);
10192 assert(!copy_entry->is_sub_map);
10193 vm_object_deallocate(VME_OBJECT(copy_entry));
10194 vm_map_copy_entry_dispose(copy, copy_entry);
10195 }
10196
10197 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
10198 amount_left) {
10199 /*
10200 * not finished copying but run out of source
10201 */
10202 return KERN_INVALID_ADDRESS;
10203 }
10204
10205 copy_entry = copy_entry_next;
10206
10207 src_offset = 0;
10208 }
10209
10210 if (amount_left == 0) {
10211 return KERN_SUCCESS;
10212 }
10213
10214 vm_map_lock_read(dst_map);
10215 if (version.main_timestamp == dst_map->timestamp) {
10216 if (start == entry_end) {
10217 /*
10218 * destination region is split. Use the version
10219 * information to avoid a lookup in the normal
10220 * case.
10221 */
10222 entry = entry->vme_next;
10223 /*
10224 * should be contiguous. Fail if we encounter
10225 * a hole in the destination.
10226 */
10227 if (start != entry->vme_start) {
10228 vm_map_unlock_read(dst_map);
10229 return KERN_INVALID_ADDRESS;
10230 }
10231 }
10232 } else {
10233 /*
10234 * Map version check failed.
10235 * we must lookup the entry because somebody
10236 * might have changed the map behind our backs.
10237 */
10238 RetryLookup:
10239 if (!vm_map_lookup_entry(dst_map, start, &entry)) {
10240 vm_map_unlock_read(dst_map);
10241 return KERN_INVALID_ADDRESS;
10242 }
10243 }
10244 }/* while */
10245
10246 return KERN_SUCCESS;
10247 }/* vm_map_copy_overwrite_unaligned */
10248
10249 /*
10250 * Routine: vm_map_copy_overwrite_aligned [internal use only]
10251 *
10252 * Description:
10253 * Does all the vm_trickery possible for whole pages.
10254 *
10255 * Implementation:
10256 *
10257 * If there are no permanent objects in the destination,
10258 * and the source and destination map entry zones match,
10259 * and the destination map entry is not shared,
10260 * then the map entries can be deleted and replaced
10261 * with those from the copy. The following code is the
10262 * basic idea of what to do, but there are lots of annoying
10263 * little details about getting protection and inheritance
10264 * right. Should add protection, inheritance, and sharing checks
10265 * to the above pass and make sure that no wiring is involved.
10266 */
10267
10268 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
10269 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
10270 int vm_map_copy_overwrite_aligned_src_large = 0;
10271
10272 static kern_return_t
10273 vm_map_copy_overwrite_aligned(
10274 vm_map_t dst_map,
10275 vm_map_entry_t tmp_entry,
10276 vm_map_copy_t copy,
10277 vm_map_offset_t start,
10278 __unused pmap_t pmap)
10279 {
10280 vm_object_t object;
10281 vm_map_entry_t copy_entry;
10282 vm_map_size_t copy_size;
10283 vm_map_size_t size;
10284 vm_map_entry_t entry;
10285
10286 while ((copy_entry = vm_map_copy_first_entry(copy))
10287 != vm_map_copy_to_entry(copy)) {
10288 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
10289
10290 entry = tmp_entry;
10291 if (entry->is_sub_map) {
10292 /* unnested when clipped earlier */
10293 assert(!entry->use_pmap);
10294 }
10295 if (entry == vm_map_to_entry(dst_map)) {
10296 vm_map_unlock(dst_map);
10297 return KERN_INVALID_ADDRESS;
10298 }
10299 size = (entry->vme_end - entry->vme_start);
10300 /*
10301 * Make sure that no holes popped up in the
10302 * address map, and that the protection is
10303 * still valid, in case the map was unlocked
10304 * earlier.
10305 */
10306
10307 if ((entry->vme_start != start) || ((entry->is_sub_map)
10308 && !entry->needs_copy)) {
10309 vm_map_unlock(dst_map);
10310 return KERN_INVALID_ADDRESS;
10311 }
10312 assert(entry != vm_map_to_entry(dst_map));
10313
10314 /*
10315 * Check protection again
10316 */
10317
10318 if (!(entry->protection & VM_PROT_WRITE)) {
10319 vm_map_unlock(dst_map);
10320 return KERN_PROTECTION_FAILURE;
10321 }
10322
10323 /*
10324 * Adjust to source size first
10325 */
10326
10327 if (copy_size < size) {
10328 if (entry->map_aligned &&
10329 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
10330 VM_MAP_PAGE_MASK(dst_map))) {
10331 /* no longer map-aligned */
10332 entry->map_aligned = FALSE;
10333 }
10334 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
10335 size = copy_size;
10336 }
10337
10338 /*
10339 * Adjust to destination size
10340 */
10341
10342 if (size < copy_size) {
10343 vm_map_copy_clip_end(copy, copy_entry,
10344 copy_entry->vme_start + size);
10345 copy_size = size;
10346 }
10347
10348 assert((entry->vme_end - entry->vme_start) == size);
10349 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
10350 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
10351
10352 /*
10353 * If the destination contains temporary unshared memory,
10354 * we can perform the copy by throwing it away and
10355 * installing the source data.
10356 */
10357
10358 object = VME_OBJECT(entry);
10359 if ((!entry->is_shared &&
10360 ((object == VM_OBJECT_NULL) ||
10361 (object->internal && !object->true_share))) ||
10362 entry->needs_copy) {
10363 vm_object_t old_object = VME_OBJECT(entry);
10364 vm_object_offset_t old_offset = VME_OFFSET(entry);
10365 vm_object_offset_t offset;
10366
10367 /*
10368 * Ensure that the source and destination aren't
10369 * identical
10370 */
10371 if (old_object == VME_OBJECT(copy_entry) &&
10372 old_offset == VME_OFFSET(copy_entry)) {
10373 vm_map_copy_entry_unlink(copy, copy_entry);
10374 vm_map_copy_entry_dispose(copy, copy_entry);
10375
10376 if (old_object != VM_OBJECT_NULL) {
10377 vm_object_deallocate(old_object);
10378 }
10379
10380 start = tmp_entry->vme_end;
10381 tmp_entry = tmp_entry->vme_next;
10382 continue;
10383 }
10384
10385 #if XNU_TARGET_OS_OSX
10386 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
10387 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
10388 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
10389 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
10390 copy_size <= __TRADEOFF1_COPY_SIZE) {
10391 /*
10392 * Virtual vs. Physical copy tradeoff #1.
10393 *
10394 * Copying only a few pages out of a large
10395 * object: do a physical copy instead of
10396 * a virtual copy, to avoid possibly keeping
10397 * the entire large object alive because of
10398 * those few copy-on-write pages.
10399 */
10400 vm_map_copy_overwrite_aligned_src_large++;
10401 goto slow_copy;
10402 }
10403 #endif /* XNU_TARGET_OS_OSX */
10404
10405 if ((dst_map->pmap != kernel_pmap) &&
10406 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
10407 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
10408 vm_object_t new_object, new_shadow;
10409
10410 /*
10411 * We're about to map something over a mapping
10412 * established by malloc()...
10413 */
10414 new_object = VME_OBJECT(copy_entry);
10415 if (new_object != VM_OBJECT_NULL) {
10416 vm_object_lock_shared(new_object);
10417 }
10418 while (new_object != VM_OBJECT_NULL &&
10419 #if XNU_TARGET_OS_OSX
10420 !new_object->true_share &&
10421 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10422 #endif /* XNU_TARGET_OS_OSX */
10423 new_object->internal) {
10424 new_shadow = new_object->shadow;
10425 if (new_shadow == VM_OBJECT_NULL) {
10426 break;
10427 }
10428 vm_object_lock_shared(new_shadow);
10429 vm_object_unlock(new_object);
10430 new_object = new_shadow;
10431 }
10432 if (new_object != VM_OBJECT_NULL) {
10433 if (!new_object->internal) {
10434 /*
10435 * The new mapping is backed
10436 * by an external object. We
10437 * don't want malloc'ed memory
10438 * to be replaced with such a
10439 * non-anonymous mapping, so
10440 * let's go off the optimized
10441 * path...
10442 */
10443 vm_map_copy_overwrite_aligned_src_not_internal++;
10444 vm_object_unlock(new_object);
10445 goto slow_copy;
10446 }
10447 #if XNU_TARGET_OS_OSX
10448 if (new_object->true_share ||
10449 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
10450 /*
10451 * Same if there's a "true_share"
10452 * object in the shadow chain, or
10453 * an object with a non-default
10454 * (SYMMETRIC) copy strategy.
10455 */
10456 vm_map_copy_overwrite_aligned_src_not_symmetric++;
10457 vm_object_unlock(new_object);
10458 goto slow_copy;
10459 }
10460 #endif /* XNU_TARGET_OS_OSX */
10461 vm_object_unlock(new_object);
10462 }
10463 /*
10464 * The new mapping is still backed by
10465 * anonymous (internal) memory, so it's
10466 * OK to substitute it for the original
10467 * malloc() mapping.
10468 */
10469 }
10470
10471 if (old_object != VM_OBJECT_NULL) {
10472 if (entry->is_sub_map) {
10473 if (entry->use_pmap) {
10474 #ifndef NO_NESTED_PMAP
10475 pmap_unnest(dst_map->pmap,
10476 (addr64_t)entry->vme_start,
10477 entry->vme_end - entry->vme_start);
10478 #endif /* NO_NESTED_PMAP */
10479 if (dst_map->mapped_in_other_pmaps) {
10480 /* clean up parent */
10481 /* map/maps */
10482 vm_map_submap_pmap_clean(
10483 dst_map, entry->vme_start,
10484 entry->vme_end,
10485 VME_SUBMAP(entry),
10486 VME_OFFSET(entry));
10487 }
10488 } else {
10489 vm_map_submap_pmap_clean(
10490 dst_map, entry->vme_start,
10491 entry->vme_end,
10492 VME_SUBMAP(entry),
10493 VME_OFFSET(entry));
10494 }
10495 vm_map_deallocate(VME_SUBMAP(entry));
10496 } else {
10497 if (dst_map->mapped_in_other_pmaps) {
10498 vm_object_pmap_protect_options(
10499 VME_OBJECT(entry),
10500 VME_OFFSET(entry),
10501 entry->vme_end
10502 - entry->vme_start,
10503 PMAP_NULL,
10504 PAGE_SIZE,
10505 entry->vme_start,
10506 VM_PROT_NONE,
10507 PMAP_OPTIONS_REMOVE);
10508 } else {
10509 pmap_remove_options(
10510 dst_map->pmap,
10511 (addr64_t)(entry->vme_start),
10512 (addr64_t)(entry->vme_end),
10513 PMAP_OPTIONS_REMOVE);
10514 }
10515 vm_object_deallocate(old_object);
10516 }
10517 }
10518
10519 if (entry->iokit_acct) {
10520 /* keep using iokit accounting */
10521 entry->use_pmap = FALSE;
10522 } else {
10523 /* use pmap accounting */
10524 entry->use_pmap = TRUE;
10525 }
10526 entry->is_sub_map = FALSE;
10527 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
10528 object = VME_OBJECT(entry);
10529 entry->needs_copy = copy_entry->needs_copy;
10530 entry->wired_count = 0;
10531 entry->user_wired_count = 0;
10532 offset = VME_OFFSET(copy_entry);
10533 VME_OFFSET_SET(entry, offset);
10534
10535 vm_map_copy_entry_unlink(copy, copy_entry);
10536 vm_map_copy_entry_dispose(copy, copy_entry);
10537
10538 /*
10539 * we could try to push pages into the pmap at this point, BUT
10540 * this optimization only saved on average 2 us per page if ALL
10541 * the pages in the source were currently mapped
10542 * and ALL the pages in the dest were touched, if there were fewer
10543 * than 2/3 of the pages touched, this optimization actually cost more cycles
10544 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10545 */
10546
10547 /*
10548 * Set up for the next iteration. The map
10549 * has not been unlocked, so the next
10550 * address should be at the end of this
10551 * entry, and the next map entry should be
10552 * the one following it.
10553 */
10554
10555 start = tmp_entry->vme_end;
10556 tmp_entry = tmp_entry->vme_next;
10557 } else {
10558 vm_map_version_t version;
10559 vm_object_t dst_object;
10560 vm_object_offset_t dst_offset;
10561 kern_return_t r;
10562
10563 slow_copy:
10564 if (entry->needs_copy) {
10565 VME_OBJECT_SHADOW(entry,
10566 (entry->vme_end -
10567 entry->vme_start));
10568 entry->needs_copy = FALSE;
10569 }
10570
10571 dst_object = VME_OBJECT(entry);
10572 dst_offset = VME_OFFSET(entry);
10573
10574 /*
10575 * Take an object reference, and record
10576 * the map version information so that the
10577 * map can be safely unlocked.
10578 */
10579
10580 if (dst_object == VM_OBJECT_NULL) {
10581 /*
10582 * We would usually have just taken the
10583 * optimized path above if the destination
10584 * object has not been allocated yet. But we
10585 * now disable that optimization if the copy
10586 * entry's object is not backed by anonymous
10587 * memory to avoid replacing malloc'ed
10588 * (i.e. re-usable) anonymous memory with a
10589 * not-so-anonymous mapping.
10590 * So we have to handle this case here and
10591 * allocate a new VM object for this map entry.
10592 */
10593 dst_object = vm_object_allocate(
10594 entry->vme_end - entry->vme_start);
10595 dst_offset = 0;
10596 VME_OBJECT_SET(entry, dst_object);
10597 VME_OFFSET_SET(entry, dst_offset);
10598 assert(entry->use_pmap);
10599 }
10600
10601 vm_object_reference(dst_object);
10602
10603 /* account for unlock bumping up timestamp */
10604 version.main_timestamp = dst_map->timestamp + 1;
10605
10606 vm_map_unlock(dst_map);
10607
10608 /*
10609 * Copy as much as possible in one pass
10610 */
10611
10612 copy_size = size;
10613 r = vm_fault_copy(
10614 VME_OBJECT(copy_entry),
10615 VME_OFFSET(copy_entry),
10616 &copy_size,
10617 dst_object,
10618 dst_offset,
10619 dst_map,
10620 &version,
10621 THREAD_UNINT );
10622
10623 /*
10624 * Release the object reference
10625 */
10626
10627 vm_object_deallocate(dst_object);
10628
10629 /*
10630 * If a hard error occurred, return it now
10631 */
10632
10633 if (r != KERN_SUCCESS) {
10634 return r;
10635 }
10636
10637 if (copy_size != 0) {
10638 /*
10639 * Dispose of the copied region
10640 */
10641
10642 vm_map_copy_clip_end(copy, copy_entry,
10643 copy_entry->vme_start + copy_size);
10644 vm_map_copy_entry_unlink(copy, copy_entry);
10645 vm_object_deallocate(VME_OBJECT(copy_entry));
10646 vm_map_copy_entry_dispose(copy, copy_entry);
10647 }
10648
10649 /*
10650 * Pick up in the destination map where we left off.
10651 *
10652 * Use the version information to avoid a lookup
10653 * in the normal case.
10654 */
10655
10656 start += copy_size;
10657 vm_map_lock(dst_map);
10658 if (version.main_timestamp == dst_map->timestamp &&
10659 copy_size != 0) {
10660 /* We can safely use saved tmp_entry value */
10661
10662 if (tmp_entry->map_aligned &&
10663 !VM_MAP_PAGE_ALIGNED(
10664 start,
10665 VM_MAP_PAGE_MASK(dst_map))) {
10666 /* no longer map-aligned */
10667 tmp_entry->map_aligned = FALSE;
10668 }
10669 vm_map_clip_end(dst_map, tmp_entry, start);
10670 tmp_entry = tmp_entry->vme_next;
10671 } else {
10672 /* Must do lookup of tmp_entry */
10673
10674 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10675 vm_map_unlock(dst_map);
10676 return KERN_INVALID_ADDRESS;
10677 }
10678 if (tmp_entry->map_aligned &&
10679 !VM_MAP_PAGE_ALIGNED(
10680 start,
10681 VM_MAP_PAGE_MASK(dst_map))) {
10682 /* no longer map-aligned */
10683 tmp_entry->map_aligned = FALSE;
10684 }
10685 vm_map_clip_start(dst_map, tmp_entry, start);
10686 }
10687 }
10688 }/* while */
10689
10690 return KERN_SUCCESS;
10691 }/* vm_map_copy_overwrite_aligned */
10692
10693 /*
10694 * Routine: vm_map_copyin_kernel_buffer [internal use only]
10695 *
10696 * Description:
10697 * Copy in data to a kernel buffer from space in the
10698 * source map. The original space may be optionally
10699 * deallocated.
10700 *
10701 * If successful, returns a new copy object.
10702 */
10703 static kern_return_t
10704 vm_map_copyin_kernel_buffer(
10705 vm_map_t src_map,
10706 vm_map_offset_t src_addr,
10707 vm_map_size_t len,
10708 boolean_t src_destroy,
10709 vm_map_copy_t *copy_result)
10710 {
10711 kern_return_t kr;
10712 vm_map_copy_t copy;
10713
10714 if (len > msg_ool_size_small) {
10715 return KERN_INVALID_ARGUMENT;
10716 }
10717
10718 copy = zalloc_flags(vm_map_copy_zone, Z_WAITOK | Z_ZERO);
10719 if (copy == VM_MAP_COPY_NULL) {
10720 return KERN_RESOURCE_SHORTAGE;
10721 }
10722 copy->cpy_kdata = kheap_alloc(KHEAP_DATA_BUFFERS, len, Z_WAITOK);
10723 if (copy->cpy_kdata == NULL) {
10724 zfree(vm_map_copy_zone, copy);
10725 return KERN_RESOURCE_SHORTAGE;
10726 }
10727
10728 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10729 copy->size = len;
10730 copy->offset = 0;
10731
10732 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
10733 if (kr != KERN_SUCCESS) {
10734 kheap_free(KHEAP_DATA_BUFFERS, copy->cpy_kdata, len);
10735 zfree(vm_map_copy_zone, copy);
10736 return kr;
10737 }
10738 if (src_destroy) {
10739 (void) vm_map_remove(
10740 src_map,
10741 vm_map_trunc_page(src_addr,
10742 VM_MAP_PAGE_MASK(src_map)),
10743 vm_map_round_page(src_addr + len,
10744 VM_MAP_PAGE_MASK(src_map)),
10745 (VM_MAP_REMOVE_INTERRUPTIBLE |
10746 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10747 ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
10748 }
10749 *copy_result = copy;
10750 return KERN_SUCCESS;
10751 }
10752
10753 /*
10754 * Routine: vm_map_copyout_kernel_buffer [internal use only]
10755 *
10756 * Description:
10757 * Copy out data from a kernel buffer into space in the
10758 * destination map. The space may be otpionally dynamically
10759 * allocated.
10760 *
10761 * If successful, consumes the copy object.
10762 * Otherwise, the caller is responsible for it.
10763 */
10764 static int vm_map_copyout_kernel_buffer_failures = 0;
10765 static kern_return_t
10766 vm_map_copyout_kernel_buffer(
10767 vm_map_t map,
10768 vm_map_address_t *addr, /* IN/OUT */
10769 vm_map_copy_t copy,
10770 vm_map_size_t copy_size,
10771 boolean_t overwrite,
10772 boolean_t consume_on_success)
10773 {
10774 kern_return_t kr = KERN_SUCCESS;
10775 thread_t thread = current_thread();
10776
10777 assert(copy->size == copy_size);
10778
10779 /*
10780 * check for corrupted vm_map_copy structure
10781 */
10782 if (copy_size > msg_ool_size_small || copy->offset) {
10783 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10784 (long long)copy->size, (long long)copy->offset);
10785 }
10786
10787 if (!overwrite) {
10788 /*
10789 * Allocate space in the target map for the data
10790 */
10791 *addr = 0;
10792 kr = vm_map_enter(map,
10793 addr,
10794 vm_map_round_page(copy_size,
10795 VM_MAP_PAGE_MASK(map)),
10796 (vm_map_offset_t) 0,
10797 VM_FLAGS_ANYWHERE,
10798 VM_MAP_KERNEL_FLAGS_NONE,
10799 VM_KERN_MEMORY_NONE,
10800 VM_OBJECT_NULL,
10801 (vm_object_offset_t) 0,
10802 FALSE,
10803 VM_PROT_DEFAULT,
10804 VM_PROT_ALL,
10805 VM_INHERIT_DEFAULT);
10806 if (kr != KERN_SUCCESS) {
10807 return kr;
10808 }
10809 #if KASAN
10810 if (map->pmap == kernel_pmap) {
10811 kasan_notify_address(*addr, copy->size);
10812 }
10813 #endif
10814 }
10815
10816 /*
10817 * Copyout the data from the kernel buffer to the target map.
10818 */
10819 if (thread->map == map) {
10820 /*
10821 * If the target map is the current map, just do
10822 * the copy.
10823 */
10824 assert((vm_size_t)copy_size == copy_size);
10825 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10826 kr = KERN_INVALID_ADDRESS;
10827 }
10828 } else {
10829 vm_map_t oldmap;
10830
10831 /*
10832 * If the target map is another map, assume the
10833 * target's address space identity for the duration
10834 * of the copy.
10835 */
10836 vm_map_reference(map);
10837 oldmap = vm_map_switch(map);
10838
10839 assert((vm_size_t)copy_size == copy_size);
10840 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10841 vm_map_copyout_kernel_buffer_failures++;
10842 kr = KERN_INVALID_ADDRESS;
10843 }
10844
10845 (void) vm_map_switch(oldmap);
10846 vm_map_deallocate(map);
10847 }
10848
10849 if (kr != KERN_SUCCESS) {
10850 /* the copy failed, clean up */
10851 if (!overwrite) {
10852 /*
10853 * Deallocate the space we allocated in the target map.
10854 */
10855 (void) vm_map_remove(
10856 map,
10857 vm_map_trunc_page(*addr,
10858 VM_MAP_PAGE_MASK(map)),
10859 vm_map_round_page((*addr +
10860 vm_map_round_page(copy_size,
10861 VM_MAP_PAGE_MASK(map))),
10862 VM_MAP_PAGE_MASK(map)),
10863 VM_MAP_REMOVE_NO_FLAGS);
10864 *addr = 0;
10865 }
10866 } else {
10867 /* copy was successful, dicard the copy structure */
10868 if (consume_on_success) {
10869 kheap_free(KHEAP_DATA_BUFFERS, copy->cpy_kdata, copy_size);
10870 zfree(vm_map_copy_zone, copy);
10871 }
10872 }
10873
10874 return kr;
10875 }
10876
10877 /*
10878 * Routine: vm_map_copy_insert [internal use only]
10879 *
10880 * Description:
10881 * Link a copy chain ("copy") into a map at the
10882 * specified location (after "where").
10883 * Side effects:
10884 * The copy chain is destroyed.
10885 */
10886 static void
10887 vm_map_copy_insert(
10888 vm_map_t map,
10889 vm_map_entry_t after_where,
10890 vm_map_copy_t copy)
10891 {
10892 vm_map_entry_t entry;
10893
10894 while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10895 entry = vm_map_copy_first_entry(copy);
10896 vm_map_copy_entry_unlink(copy, entry);
10897 vm_map_store_entry_link(map, after_where, entry,
10898 VM_MAP_KERNEL_FLAGS_NONE);
10899 after_where = entry;
10900 }
10901 zfree(vm_map_copy_zone, copy);
10902 }
10903
10904 void
10905 vm_map_copy_remap(
10906 vm_map_t map,
10907 vm_map_entry_t where,
10908 vm_map_copy_t copy,
10909 vm_map_offset_t adjustment,
10910 vm_prot_t cur_prot,
10911 vm_prot_t max_prot,
10912 vm_inherit_t inheritance)
10913 {
10914 vm_map_entry_t copy_entry, new_entry;
10915
10916 for (copy_entry = vm_map_copy_first_entry(copy);
10917 copy_entry != vm_map_copy_to_entry(copy);
10918 copy_entry = copy_entry->vme_next) {
10919 /* get a new VM map entry for the map */
10920 new_entry = vm_map_entry_create(map,
10921 !map->hdr.entries_pageable);
10922 /* copy the "copy entry" to the new entry */
10923 vm_map_entry_copy(map, new_entry, copy_entry);
10924 /* adjust "start" and "end" */
10925 new_entry->vme_start += adjustment;
10926 new_entry->vme_end += adjustment;
10927 /* clear some attributes */
10928 new_entry->inheritance = inheritance;
10929 new_entry->protection = cur_prot;
10930 new_entry->max_protection = max_prot;
10931 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10932 /* take an extra reference on the entry's "object" */
10933 if (new_entry->is_sub_map) {
10934 assert(!new_entry->use_pmap); /* not nested */
10935 vm_map_lock(VME_SUBMAP(new_entry));
10936 vm_map_reference(VME_SUBMAP(new_entry));
10937 vm_map_unlock(VME_SUBMAP(new_entry));
10938 } else {
10939 vm_object_reference(VME_OBJECT(new_entry));
10940 }
10941 /* insert the new entry in the map */
10942 vm_map_store_entry_link(map, where, new_entry,
10943 VM_MAP_KERNEL_FLAGS_NONE);
10944 /* continue inserting the "copy entries" after the new entry */
10945 where = new_entry;
10946 }
10947 }
10948
10949
10950 /*
10951 * Returns true if *size matches (or is in the range of) copy->size.
10952 * Upon returning true, the *size field is updated with the actual size of the
10953 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10954 */
10955 boolean_t
10956 vm_map_copy_validate_size(
10957 vm_map_t dst_map,
10958 vm_map_copy_t copy,
10959 vm_map_size_t *size)
10960 {
10961 if (copy == VM_MAP_COPY_NULL) {
10962 return FALSE;
10963 }
10964 vm_map_size_t copy_sz = copy->size;
10965 vm_map_size_t sz = *size;
10966 switch (copy->type) {
10967 case VM_MAP_COPY_OBJECT:
10968 case VM_MAP_COPY_KERNEL_BUFFER:
10969 if (sz == copy_sz) {
10970 return TRUE;
10971 }
10972 break;
10973 case VM_MAP_COPY_ENTRY_LIST:
10974 /*
10975 * potential page-size rounding prevents us from exactly
10976 * validating this flavor of vm_map_copy, but we can at least
10977 * assert that it's within a range.
10978 */
10979 if (copy_sz >= sz &&
10980 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10981 *size = copy_sz;
10982 return TRUE;
10983 }
10984 break;
10985 default:
10986 break;
10987 }
10988 return FALSE;
10989 }
10990
10991 /*
10992 * Routine: vm_map_copyout_size
10993 *
10994 * Description:
10995 * Copy out a copy chain ("copy") into newly-allocated
10996 * space in the destination map. Uses a prevalidated
10997 * size for the copy object (vm_map_copy_validate_size).
10998 *
10999 * If successful, consumes the copy object.
11000 * Otherwise, the caller is responsible for it.
11001 */
11002 kern_return_t
11003 vm_map_copyout_size(
11004 vm_map_t dst_map,
11005 vm_map_address_t *dst_addr, /* OUT */
11006 vm_map_copy_t copy,
11007 vm_map_size_t copy_size)
11008 {
11009 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
11010 TRUE, /* consume_on_success */
11011 VM_PROT_DEFAULT,
11012 VM_PROT_ALL,
11013 VM_INHERIT_DEFAULT);
11014 }
11015
11016 /*
11017 * Routine: vm_map_copyout
11018 *
11019 * Description:
11020 * Copy out a copy chain ("copy") into newly-allocated
11021 * space in the destination map.
11022 *
11023 * If successful, consumes the copy object.
11024 * Otherwise, the caller is responsible for it.
11025 */
11026 kern_return_t
11027 vm_map_copyout(
11028 vm_map_t dst_map,
11029 vm_map_address_t *dst_addr, /* OUT */
11030 vm_map_copy_t copy)
11031 {
11032 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
11033 TRUE, /* consume_on_success */
11034 VM_PROT_DEFAULT,
11035 VM_PROT_ALL,
11036 VM_INHERIT_DEFAULT);
11037 }
11038
11039 kern_return_t
11040 vm_map_copyout_internal(
11041 vm_map_t dst_map,
11042 vm_map_address_t *dst_addr, /* OUT */
11043 vm_map_copy_t copy,
11044 vm_map_size_t copy_size,
11045 boolean_t consume_on_success,
11046 vm_prot_t cur_protection,
11047 vm_prot_t max_protection,
11048 vm_inherit_t inheritance)
11049 {
11050 vm_map_size_t size;
11051 vm_map_size_t adjustment;
11052 vm_map_offset_t start;
11053 vm_object_offset_t vm_copy_start;
11054 vm_map_entry_t last;
11055 vm_map_entry_t entry;
11056 vm_map_entry_t hole_entry;
11057 vm_map_copy_t original_copy;
11058
11059 /*
11060 * Check for null copy object.
11061 */
11062
11063 if (copy == VM_MAP_COPY_NULL) {
11064 *dst_addr = 0;
11065 return KERN_SUCCESS;
11066 }
11067
11068 /*
11069 * Assert that the vm_map_copy is coming from the right
11070 * zone and hasn't been forged
11071 */
11072 vm_map_copy_require(copy);
11073
11074 if (copy->size != copy_size) {
11075 *dst_addr = 0;
11076 return KERN_FAILURE;
11077 }
11078
11079 /*
11080 * Check for special copy object, created
11081 * by vm_map_copyin_object.
11082 */
11083
11084 if (copy->type == VM_MAP_COPY_OBJECT) {
11085 vm_object_t object = copy->cpy_object;
11086 kern_return_t kr;
11087 vm_object_offset_t offset;
11088
11089 offset = vm_object_trunc_page(copy->offset);
11090 size = vm_map_round_page((copy_size +
11091 (vm_map_size_t)(copy->offset -
11092 offset)),
11093 VM_MAP_PAGE_MASK(dst_map));
11094 *dst_addr = 0;
11095 kr = vm_map_enter(dst_map, dst_addr, size,
11096 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
11097 VM_MAP_KERNEL_FLAGS_NONE,
11098 VM_KERN_MEMORY_NONE,
11099 object, offset, FALSE,
11100 VM_PROT_DEFAULT, VM_PROT_ALL,
11101 VM_INHERIT_DEFAULT);
11102 if (kr != KERN_SUCCESS) {
11103 return kr;
11104 }
11105 /* Account for non-pagealigned copy object */
11106 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
11107 if (consume_on_success) {
11108 zfree(vm_map_copy_zone, copy);
11109 }
11110 return KERN_SUCCESS;
11111 }
11112
11113 /*
11114 * Check for special kernel buffer allocated
11115 * by new_ipc_kmsg_copyin.
11116 */
11117
11118 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
11119 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
11120 copy, copy_size, FALSE,
11121 consume_on_success);
11122 }
11123
11124 original_copy = copy;
11125 if (copy->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
11126 kern_return_t kr;
11127 vm_map_copy_t target_copy;
11128 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
11129
11130 target_copy = VM_MAP_COPY_NULL;
11131 DEBUG4K_ADJUST("adjusting...\n");
11132 kr = vm_map_copy_adjust_to_target(
11133 copy,
11134 0, /* offset */
11135 copy->size, /* size */
11136 dst_map,
11137 TRUE, /* copy */
11138 &target_copy,
11139 &overmap_start,
11140 &overmap_end,
11141 &trimmed_start);
11142 if (kr != KERN_SUCCESS) {
11143 DEBUG4K_COPY("adjust failed 0x%x\n", kr);
11144 return kr;
11145 }
11146 DEBUG4K_COPY("copy %p (%d 0x%llx 0x%llx) dst_map %p (%d) target_copy %p (%d 0x%llx 0x%llx) overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx\n", copy, copy->cpy_hdr.page_shift, copy->offset, (uint64_t)copy->size, dst_map, VM_MAP_PAGE_SHIFT(dst_map), target_copy, target_copy->cpy_hdr.page_shift, target_copy->offset, (uint64_t)target_copy->size, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start);
11147 if (target_copy != copy) {
11148 copy = target_copy;
11149 }
11150 copy_size = copy->size;
11151 }
11152
11153 /*
11154 * Find space for the data
11155 */
11156
11157 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
11158 VM_MAP_COPY_PAGE_MASK(copy));
11159 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
11160 VM_MAP_COPY_PAGE_MASK(copy))
11161 - vm_copy_start;
11162
11163
11164 StartAgain:;
11165
11166 vm_map_lock(dst_map);
11167 if (dst_map->disable_vmentry_reuse == TRUE) {
11168 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
11169 last = entry;
11170 } else {
11171 if (dst_map->holelistenabled) {
11172 hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
11173
11174 if (hole_entry == NULL) {
11175 /*
11176 * No more space in the map?
11177 */
11178 vm_map_unlock(dst_map);
11179 return KERN_NO_SPACE;
11180 }
11181
11182 last = hole_entry;
11183 start = last->vme_start;
11184 } else {
11185 assert(first_free_is_valid(dst_map));
11186 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
11187 vm_map_min(dst_map) : last->vme_end;
11188 }
11189 start = vm_map_round_page(start,
11190 VM_MAP_PAGE_MASK(dst_map));
11191 }
11192
11193 while (TRUE) {
11194 vm_map_entry_t next = last->vme_next;
11195 vm_map_offset_t end = start + size;
11196
11197 if ((end > dst_map->max_offset) || (end < start)) {
11198 if (dst_map->wait_for_space) {
11199 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
11200 assert_wait((event_t) dst_map,
11201 THREAD_INTERRUPTIBLE);
11202 vm_map_unlock(dst_map);
11203 thread_block(THREAD_CONTINUE_NULL);
11204 goto StartAgain;
11205 }
11206 }
11207 vm_map_unlock(dst_map);
11208 return KERN_NO_SPACE;
11209 }
11210
11211 if (dst_map->holelistenabled) {
11212 if (last->vme_end >= end) {
11213 break;
11214 }
11215 } else {
11216 /*
11217 * If there are no more entries, we must win.
11218 *
11219 * OR
11220 *
11221 * If there is another entry, it must be
11222 * after the end of the potential new region.
11223 */
11224
11225 if (next == vm_map_to_entry(dst_map)) {
11226 break;
11227 }
11228
11229 if (next->vme_start >= end) {
11230 break;
11231 }
11232 }
11233
11234 last = next;
11235
11236 if (dst_map->holelistenabled) {
11237 if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
11238 /*
11239 * Wrapped around
11240 */
11241 vm_map_unlock(dst_map);
11242 return KERN_NO_SPACE;
11243 }
11244 start = last->vme_start;
11245 } else {
11246 start = last->vme_end;
11247 }
11248 start = vm_map_round_page(start,
11249 VM_MAP_PAGE_MASK(dst_map));
11250 }
11251
11252 if (dst_map->holelistenabled) {
11253 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
11254 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
11255 }
11256 }
11257
11258
11259 adjustment = start - vm_copy_start;
11260 if (!consume_on_success) {
11261 /*
11262 * We're not allowed to consume "copy", so we'll have to
11263 * copy its map entries into the destination map below.
11264 * No need to re-allocate map entries from the correct
11265 * (pageable or not) zone, since we'll get new map entries
11266 * during the transfer.
11267 * We'll also adjust the map entries's "start" and "end"
11268 * during the transfer, to keep "copy"'s entries consistent
11269 * with its "offset".
11270 */
11271 goto after_adjustments;
11272 }
11273
11274 /*
11275 * Since we're going to just drop the map
11276 * entries from the copy into the destination
11277 * map, they must come from the same pool.
11278 */
11279
11280 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
11281 /*
11282 * Mismatches occur when dealing with the default
11283 * pager.
11284 */
11285 zone_t old_zone;
11286 vm_map_entry_t next, new;
11287
11288 /*
11289 * Find the zone that the copies were allocated from
11290 */
11291
11292 entry = vm_map_copy_first_entry(copy);
11293
11294 /*
11295 * Reinitialize the copy so that vm_map_copy_entry_link
11296 * will work.
11297 */
11298 vm_map_store_copy_reset(copy, entry);
11299 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
11300
11301 /*
11302 * Copy each entry.
11303 */
11304 while (entry != vm_map_copy_to_entry(copy)) {
11305 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11306 vm_map_entry_copy_full(new, entry);
11307 new->vme_no_copy_on_read = FALSE;
11308 assert(!new->iokit_acct);
11309 if (new->is_sub_map) {
11310 /* clr address space specifics */
11311 new->use_pmap = FALSE;
11312 }
11313 vm_map_copy_entry_link(copy,
11314 vm_map_copy_last_entry(copy),
11315 new);
11316 next = entry->vme_next;
11317 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
11318 zfree(old_zone, entry);
11319 entry = next;
11320 }
11321 }
11322
11323 /*
11324 * Adjust the addresses in the copy chain, and
11325 * reset the region attributes.
11326 */
11327
11328 for (entry = vm_map_copy_first_entry(copy);
11329 entry != vm_map_copy_to_entry(copy);
11330 entry = entry->vme_next) {
11331 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
11332 /*
11333 * We're injecting this copy entry into a map that
11334 * has the standard page alignment, so clear
11335 * "map_aligned" (which might have been inherited
11336 * from the original map entry).
11337 */
11338 entry->map_aligned = FALSE;
11339 }
11340
11341 entry->vme_start += adjustment;
11342 entry->vme_end += adjustment;
11343
11344 if (entry->map_aligned) {
11345 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
11346 VM_MAP_PAGE_MASK(dst_map)));
11347 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
11348 VM_MAP_PAGE_MASK(dst_map)));
11349 }
11350
11351 entry->inheritance = VM_INHERIT_DEFAULT;
11352 entry->protection = VM_PROT_DEFAULT;
11353 entry->max_protection = VM_PROT_ALL;
11354 entry->behavior = VM_BEHAVIOR_DEFAULT;
11355
11356 /*
11357 * If the entry is now wired,
11358 * map the pages into the destination map.
11359 */
11360 if (entry->wired_count != 0) {
11361 vm_map_offset_t va;
11362 vm_object_offset_t offset;
11363 vm_object_t object;
11364 vm_prot_t prot;
11365 int type_of_fault;
11366
11367 /* TODO4K would need to use actual page size */
11368 assert(VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT);
11369
11370 object = VME_OBJECT(entry);
11371 offset = VME_OFFSET(entry);
11372 va = entry->vme_start;
11373
11374 pmap_pageable(dst_map->pmap,
11375 entry->vme_start,
11376 entry->vme_end,
11377 TRUE);
11378
11379 while (va < entry->vme_end) {
11380 vm_page_t m;
11381 struct vm_object_fault_info fault_info = {};
11382
11383 /*
11384 * Look up the page in the object.
11385 * Assert that the page will be found in the
11386 * top object:
11387 * either
11388 * the object was newly created by
11389 * vm_object_copy_slowly, and has
11390 * copies of all of the pages from
11391 * the source object
11392 * or
11393 * the object was moved from the old
11394 * map entry; because the old map
11395 * entry was wired, all of the pages
11396 * were in the top-level object.
11397 * (XXX not true if we wire pages for
11398 * reading)
11399 */
11400 vm_object_lock(object);
11401
11402 m = vm_page_lookup(object, offset);
11403 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
11404 m->vmp_absent) {
11405 panic("vm_map_copyout: wiring %p", m);
11406 }
11407
11408 prot = entry->protection;
11409
11410 if (override_nx(dst_map, VME_ALIAS(entry)) &&
11411 prot) {
11412 prot |= VM_PROT_EXECUTE;
11413 }
11414
11415 type_of_fault = DBG_CACHE_HIT_FAULT;
11416
11417 fault_info.user_tag = VME_ALIAS(entry);
11418 fault_info.pmap_options = 0;
11419 if (entry->iokit_acct ||
11420 (!entry->is_sub_map && !entry->use_pmap)) {
11421 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11422 }
11423
11424 vm_fault_enter(m,
11425 dst_map->pmap,
11426 va,
11427 PAGE_SIZE, 0,
11428 prot,
11429 prot,
11430 VM_PAGE_WIRED(m),
11431 FALSE, /* change_wiring */
11432 VM_KERN_MEMORY_NONE, /* tag - not wiring */
11433 &fault_info,
11434 NULL, /* need_retry */
11435 &type_of_fault);
11436
11437 vm_object_unlock(object);
11438
11439 offset += PAGE_SIZE_64;
11440 va += PAGE_SIZE;
11441 }
11442 }
11443 }
11444
11445 after_adjustments:
11446
11447 /*
11448 * Correct the page alignment for the result
11449 */
11450
11451 *dst_addr = start + (copy->offset - vm_copy_start);
11452
11453 #if KASAN
11454 kasan_notify_address(*dst_addr, size);
11455 #endif
11456
11457 /*
11458 * Update the hints and the map size
11459 */
11460
11461 if (consume_on_success) {
11462 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
11463 } else {
11464 SAVE_HINT_MAP_WRITE(dst_map, last);
11465 }
11466
11467 dst_map->size += size;
11468
11469 /*
11470 * Link in the copy
11471 */
11472
11473 if (consume_on_success) {
11474 vm_map_copy_insert(dst_map, last, copy);
11475 if (copy != original_copy) {
11476 vm_map_copy_discard(original_copy);
11477 original_copy = VM_MAP_COPY_NULL;
11478 }
11479 } else {
11480 vm_map_copy_remap(dst_map, last, copy, adjustment,
11481 cur_protection, max_protection,
11482 inheritance);
11483 if (copy != original_copy && original_copy != VM_MAP_COPY_NULL) {
11484 vm_map_copy_discard(copy);
11485 copy = original_copy;
11486 }
11487 }
11488
11489
11490 vm_map_unlock(dst_map);
11491
11492 /*
11493 * XXX If wiring_required, call vm_map_pageable
11494 */
11495
11496 return KERN_SUCCESS;
11497 }
11498
11499 /*
11500 * Routine: vm_map_copyin
11501 *
11502 * Description:
11503 * see vm_map_copyin_common. Exported via Unsupported.exports.
11504 *
11505 */
11506
11507 #undef vm_map_copyin
11508
11509 kern_return_t
11510 vm_map_copyin(
11511 vm_map_t src_map,
11512 vm_map_address_t src_addr,
11513 vm_map_size_t len,
11514 boolean_t src_destroy,
11515 vm_map_copy_t *copy_result) /* OUT */
11516 {
11517 return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11518 FALSE, copy_result, FALSE);
11519 }
11520
11521 /*
11522 * Routine: vm_map_copyin_common
11523 *
11524 * Description:
11525 * Copy the specified region (src_addr, len) from the
11526 * source address space (src_map), possibly removing
11527 * the region from the source address space (src_destroy).
11528 *
11529 * Returns:
11530 * A vm_map_copy_t object (copy_result), suitable for
11531 * insertion into another address space (using vm_map_copyout),
11532 * copying over another address space region (using
11533 * vm_map_copy_overwrite). If the copy is unused, it
11534 * should be destroyed (using vm_map_copy_discard).
11535 *
11536 * In/out conditions:
11537 * The source map should not be locked on entry.
11538 */
11539
11540 typedef struct submap_map {
11541 vm_map_t parent_map;
11542 vm_map_offset_t base_start;
11543 vm_map_offset_t base_end;
11544 vm_map_size_t base_len;
11545 struct submap_map *next;
11546 } submap_map_t;
11547
11548 kern_return_t
11549 vm_map_copyin_common(
11550 vm_map_t src_map,
11551 vm_map_address_t src_addr,
11552 vm_map_size_t len,
11553 boolean_t src_destroy,
11554 __unused boolean_t src_volatile,
11555 vm_map_copy_t *copy_result, /* OUT */
11556 boolean_t use_maxprot)
11557 {
11558 int flags;
11559
11560 flags = 0;
11561 if (src_destroy) {
11562 flags |= VM_MAP_COPYIN_SRC_DESTROY;
11563 }
11564 if (use_maxprot) {
11565 flags |= VM_MAP_COPYIN_USE_MAXPROT;
11566 }
11567 return vm_map_copyin_internal(src_map,
11568 src_addr,
11569 len,
11570 flags,
11571 copy_result);
11572 }
11573 kern_return_t
11574 vm_map_copyin_internal(
11575 vm_map_t src_map,
11576 vm_map_address_t src_addr,
11577 vm_map_size_t len,
11578 int flags,
11579 vm_map_copy_t *copy_result) /* OUT */
11580 {
11581 vm_map_entry_t tmp_entry; /* Result of last map lookup --
11582 * in multi-level lookup, this
11583 * entry contains the actual
11584 * vm_object/offset.
11585 */
11586 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
11587
11588 vm_map_offset_t src_start; /* Start of current entry --
11589 * where copy is taking place now
11590 */
11591 vm_map_offset_t src_end; /* End of entire region to be
11592 * copied */
11593 vm_map_offset_t src_base;
11594 vm_map_t base_map = src_map;
11595 boolean_t map_share = FALSE;
11596 submap_map_t *parent_maps = NULL;
11597
11598 vm_map_copy_t copy; /* Resulting copy */
11599 vm_map_address_t copy_addr;
11600 vm_map_size_t copy_size;
11601 boolean_t src_destroy;
11602 boolean_t use_maxprot;
11603 boolean_t preserve_purgeable;
11604 boolean_t entry_was_shared;
11605 vm_map_entry_t saved_src_entry;
11606
11607 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11608 return KERN_INVALID_ARGUMENT;
11609 }
11610
11611 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11612 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
11613 preserve_purgeable =
11614 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
11615
11616 /*
11617 * Check for copies of zero bytes.
11618 */
11619
11620 if (len == 0) {
11621 *copy_result = VM_MAP_COPY_NULL;
11622 return KERN_SUCCESS;
11623 }
11624
11625 /*
11626 * Check that the end address doesn't overflow
11627 */
11628 src_end = src_addr + len;
11629 if (src_end < src_addr) {
11630 return KERN_INVALID_ADDRESS;
11631 }
11632
11633 /*
11634 * Compute (page aligned) start and end of region
11635 */
11636 src_start = vm_map_trunc_page(src_addr,
11637 VM_MAP_PAGE_MASK(src_map));
11638 src_end = vm_map_round_page(src_end,
11639 VM_MAP_PAGE_MASK(src_map));
11640
11641 /*
11642 * If the copy is sufficiently small, use a kernel buffer instead
11643 * of making a virtual copy. The theory being that the cost of
11644 * setting up VM (and taking C-O-W faults) dominates the copy costs
11645 * for small regions.
11646 */
11647 if ((len < msg_ool_size_small) &&
11648 !use_maxprot &&
11649 !preserve_purgeable &&
11650 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11651 /*
11652 * Since the "msg_ool_size_small" threshold was increased and
11653 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11654 * address space limits, we revert to doing a virtual copy if the
11655 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11656 * of the commpage would now fail when it used to work.
11657 */
11658 (src_start >= vm_map_min(src_map) &&
11659 src_start < vm_map_max(src_map) &&
11660 src_end >= vm_map_min(src_map) &&
11661 src_end < vm_map_max(src_map))) {
11662 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
11663 src_destroy, copy_result);
11664 }
11665
11666 /*
11667 * Allocate a header element for the list.
11668 *
11669 * Use the start and end in the header to
11670 * remember the endpoints prior to rounding.
11671 */
11672
11673 copy = vm_map_copy_allocate();
11674 copy->type = VM_MAP_COPY_ENTRY_LIST;
11675 copy->cpy_hdr.entries_pageable = TRUE;
11676 copy->cpy_hdr.page_shift = VM_MAP_PAGE_SHIFT(src_map);
11677
11678 vm_map_store_init( &(copy->cpy_hdr));
11679
11680 copy->offset = src_addr;
11681 copy->size = len;
11682
11683 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11684
11685 #define RETURN(x) \
11686 MACRO_BEGIN \
11687 vm_map_unlock(src_map); \
11688 if(src_map != base_map) \
11689 vm_map_deallocate(src_map); \
11690 if (new_entry != VM_MAP_ENTRY_NULL) \
11691 vm_map_copy_entry_dispose(copy,new_entry); \
11692 vm_map_copy_discard(copy); \
11693 { \
11694 submap_map_t *_ptr; \
11695 \
11696 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11697 parent_maps=parent_maps->next; \
11698 if (_ptr->parent_map != base_map) \
11699 vm_map_deallocate(_ptr->parent_map); \
11700 kfree(_ptr, sizeof(submap_map_t)); \
11701 } \
11702 } \
11703 MACRO_RETURN(x); \
11704 MACRO_END
11705
11706 /*
11707 * Find the beginning of the region.
11708 */
11709
11710 vm_map_lock(src_map);
11711
11712 /*
11713 * Lookup the original "src_addr" rather than the truncated
11714 * "src_start", in case "src_start" falls in a non-map-aligned
11715 * map entry *before* the map entry that contains "src_addr"...
11716 */
11717 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
11718 RETURN(KERN_INVALID_ADDRESS);
11719 }
11720 if (!tmp_entry->is_sub_map) {
11721 /*
11722 * ... but clip to the map-rounded "src_start" rather than
11723 * "src_addr" to preserve map-alignment. We'll adjust the
11724 * first copy entry at the end, if needed.
11725 */
11726 vm_map_clip_start(src_map, tmp_entry, src_start);
11727 }
11728 if (src_start < tmp_entry->vme_start) {
11729 /*
11730 * Move "src_start" up to the start of the
11731 * first map entry to copy.
11732 */
11733 src_start = tmp_entry->vme_start;
11734 }
11735 /* set for later submap fix-up */
11736 copy_addr = src_start;
11737
11738 /*
11739 * Go through entries until we get to the end.
11740 */
11741
11742 while (TRUE) {
11743 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
11744 vm_map_size_t src_size; /* Size of source
11745 * map entry (in both
11746 * maps)
11747 */
11748
11749 vm_object_t src_object; /* Object to copy */
11750 vm_object_offset_t src_offset;
11751
11752 boolean_t src_needs_copy; /* Should source map
11753 * be made read-only
11754 * for copy-on-write?
11755 */
11756
11757 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
11758
11759 boolean_t was_wired; /* Was source wired? */
11760 vm_map_version_t version; /* Version before locks
11761 * dropped to make copy
11762 */
11763 kern_return_t result; /* Return value from
11764 * copy_strategically.
11765 */
11766 while (tmp_entry->is_sub_map) {
11767 vm_map_size_t submap_len;
11768 submap_map_t *ptr;
11769
11770 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
11771 ptr->next = parent_maps;
11772 parent_maps = ptr;
11773 ptr->parent_map = src_map;
11774 ptr->base_start = src_start;
11775 ptr->base_end = src_end;
11776 submap_len = tmp_entry->vme_end - src_start;
11777 if (submap_len > (src_end - src_start)) {
11778 submap_len = src_end - src_start;
11779 }
11780 ptr->base_len = submap_len;
11781
11782 src_start -= tmp_entry->vme_start;
11783 src_start += VME_OFFSET(tmp_entry);
11784 src_end = src_start + submap_len;
11785 src_map = VME_SUBMAP(tmp_entry);
11786 vm_map_lock(src_map);
11787 /* keep an outstanding reference for all maps in */
11788 /* the parents tree except the base map */
11789 vm_map_reference(src_map);
11790 vm_map_unlock(ptr->parent_map);
11791 if (!vm_map_lookup_entry(
11792 src_map, src_start, &tmp_entry)) {
11793 RETURN(KERN_INVALID_ADDRESS);
11794 }
11795 map_share = TRUE;
11796 if (!tmp_entry->is_sub_map) {
11797 vm_map_clip_start(src_map, tmp_entry, src_start);
11798 }
11799 src_entry = tmp_entry;
11800 }
11801 /* we are now in the lowest level submap... */
11802
11803 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11804 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11805 /* This is not, supported for now.In future */
11806 /* we will need to detect the phys_contig */
11807 /* condition and then upgrade copy_slowly */
11808 /* to do physical copy from the device mem */
11809 /* based object. We can piggy-back off of */
11810 /* the was wired boolean to set-up the */
11811 /* proper handling */
11812 RETURN(KERN_PROTECTION_FAILURE);
11813 }
11814 /*
11815 * Create a new address map entry to hold the result.
11816 * Fill in the fields from the appropriate source entries.
11817 * We must unlock the source map to do this if we need
11818 * to allocate a map entry.
11819 */
11820 if (new_entry == VM_MAP_ENTRY_NULL) {
11821 version.main_timestamp = src_map->timestamp;
11822 vm_map_unlock(src_map);
11823
11824 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11825
11826 vm_map_lock(src_map);
11827 if ((version.main_timestamp + 1) != src_map->timestamp) {
11828 if (!vm_map_lookup_entry(src_map, src_start,
11829 &tmp_entry)) {
11830 RETURN(KERN_INVALID_ADDRESS);
11831 }
11832 if (!tmp_entry->is_sub_map) {
11833 vm_map_clip_start(src_map, tmp_entry, src_start);
11834 }
11835 continue; /* restart w/ new tmp_entry */
11836 }
11837 }
11838
11839 /*
11840 * Verify that the region can be read.
11841 */
11842 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11843 !use_maxprot) ||
11844 (src_entry->max_protection & VM_PROT_READ) == 0) {
11845 RETURN(KERN_PROTECTION_FAILURE);
11846 }
11847
11848 /*
11849 * Clip against the endpoints of the entire region.
11850 */
11851
11852 vm_map_clip_end(src_map, src_entry, src_end);
11853
11854 src_size = src_entry->vme_end - src_start;
11855 src_object = VME_OBJECT(src_entry);
11856 src_offset = VME_OFFSET(src_entry);
11857 was_wired = (src_entry->wired_count != 0);
11858
11859 vm_map_entry_copy(src_map, new_entry, src_entry);
11860 if (new_entry->is_sub_map) {
11861 /* clr address space specifics */
11862 new_entry->use_pmap = FALSE;
11863 } else {
11864 /*
11865 * We're dealing with a copy-on-write operation,
11866 * so the resulting mapping should not inherit the
11867 * original mapping's accounting settings.
11868 * "iokit_acct" should have been cleared in
11869 * vm_map_entry_copy().
11870 * "use_pmap" should be reset to its default (TRUE)
11871 * so that the new mapping gets accounted for in
11872 * the task's memory footprint.
11873 */
11874 assert(!new_entry->iokit_acct);
11875 new_entry->use_pmap = TRUE;
11876 }
11877
11878 /*
11879 * Attempt non-blocking copy-on-write optimizations.
11880 */
11881
11882 /*
11883 * If we are destroying the source, and the object
11884 * is internal, we could move the object reference
11885 * from the source to the copy. The copy is
11886 * copy-on-write only if the source is.
11887 * We make another reference to the object, because
11888 * destroying the source entry will deallocate it.
11889 *
11890 * This memory transfer has to be atomic, (to prevent
11891 * the VM object from being shared or copied while
11892 * it's being moved here), so we could only do this
11893 * if we won't have to unlock the VM map until the
11894 * original mapping has been fully removed.
11895 */
11896
11897 RestartCopy:
11898 if ((src_object == VM_OBJECT_NULL ||
11899 (!was_wired && !map_share && !tmp_entry->is_shared
11900 && !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT))) &&
11901 vm_object_copy_quickly(
11902 VME_OBJECT_PTR(new_entry),
11903 src_offset,
11904 src_size,
11905 &src_needs_copy,
11906 &new_entry_needs_copy)) {
11907 new_entry->needs_copy = new_entry_needs_copy;
11908
11909 /*
11910 * Handle copy-on-write obligations
11911 */
11912
11913 if (src_needs_copy && !tmp_entry->needs_copy) {
11914 vm_prot_t prot;
11915
11916 prot = src_entry->protection & ~VM_PROT_WRITE;
11917
11918 if (override_nx(src_map, VME_ALIAS(src_entry))
11919 && prot) {
11920 prot |= VM_PROT_EXECUTE;
11921 }
11922
11923 vm_object_pmap_protect(
11924 src_object,
11925 src_offset,
11926 src_size,
11927 (src_entry->is_shared ?
11928 PMAP_NULL
11929 : src_map->pmap),
11930 VM_MAP_PAGE_SIZE(src_map),
11931 src_entry->vme_start,
11932 prot);
11933
11934 assert(tmp_entry->wired_count == 0);
11935 tmp_entry->needs_copy = TRUE;
11936 }
11937
11938 /*
11939 * The map has never been unlocked, so it's safe
11940 * to move to the next entry rather than doing
11941 * another lookup.
11942 */
11943
11944 goto CopySuccessful;
11945 }
11946
11947 entry_was_shared = tmp_entry->is_shared;
11948
11949 /*
11950 * Take an object reference, so that we may
11951 * release the map lock(s).
11952 */
11953
11954 assert(src_object != VM_OBJECT_NULL);
11955 vm_object_reference(src_object);
11956
11957 /*
11958 * Record the timestamp for later verification.
11959 * Unlock the map.
11960 */
11961
11962 version.main_timestamp = src_map->timestamp;
11963 vm_map_unlock(src_map); /* Increments timestamp once! */
11964 saved_src_entry = src_entry;
11965 tmp_entry = VM_MAP_ENTRY_NULL;
11966 src_entry = VM_MAP_ENTRY_NULL;
11967
11968 /*
11969 * Perform the copy
11970 */
11971
11972 if (was_wired ||
11973 (debug4k_no_cow_copyin &&
11974 VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT)) {
11975 CopySlowly:
11976 vm_object_lock(src_object);
11977 result = vm_object_copy_slowly(
11978 src_object,
11979 src_offset,
11980 src_size,
11981 THREAD_UNINT,
11982 VME_OBJECT_PTR(new_entry));
11983 VME_OFFSET_SET(new_entry,
11984 src_offset - vm_object_trunc_page(src_offset));
11985 new_entry->needs_copy = FALSE;
11986 } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11987 (entry_was_shared || map_share)) {
11988 vm_object_t new_object;
11989
11990 vm_object_lock_shared(src_object);
11991 new_object = vm_object_copy_delayed(
11992 src_object,
11993 src_offset,
11994 src_size,
11995 TRUE);
11996 if (new_object == VM_OBJECT_NULL) {
11997 goto CopySlowly;
11998 }
11999
12000 VME_OBJECT_SET(new_entry, new_object);
12001 assert(new_entry->wired_count == 0);
12002 new_entry->needs_copy = TRUE;
12003 assert(!new_entry->iokit_acct);
12004 assert(new_object->purgable == VM_PURGABLE_DENY);
12005 assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
12006 result = KERN_SUCCESS;
12007 } else {
12008 vm_object_offset_t new_offset;
12009 new_offset = VME_OFFSET(new_entry);
12010 result = vm_object_copy_strategically(src_object,
12011 src_offset,
12012 src_size,
12013 VME_OBJECT_PTR(new_entry),
12014 &new_offset,
12015 &new_entry_needs_copy);
12016 if (new_offset != VME_OFFSET(new_entry)) {
12017 VME_OFFSET_SET(new_entry, new_offset);
12018 }
12019
12020 new_entry->needs_copy = new_entry_needs_copy;
12021 }
12022
12023 if (result == KERN_SUCCESS &&
12024 ((preserve_purgeable &&
12025 src_object->purgable != VM_PURGABLE_DENY) ||
12026 new_entry->used_for_jit)) {
12027 /*
12028 * Purgeable objects should be COPY_NONE, true share;
12029 * this should be propogated to the copy.
12030 *
12031 * Also force mappings the pmap specially protects to
12032 * be COPY_NONE; trying to COW these mappings would
12033 * change the effective protections, which could have
12034 * side effects if the pmap layer relies on the
12035 * specified protections.
12036 */
12037
12038 vm_object_t new_object;
12039
12040 new_object = VME_OBJECT(new_entry);
12041 assert(new_object != src_object);
12042 vm_object_lock(new_object);
12043 assert(new_object->ref_count == 1);
12044 assert(new_object->shadow == VM_OBJECT_NULL);
12045 assert(new_object->copy == VM_OBJECT_NULL);
12046 assert(new_object->vo_owner == NULL);
12047
12048 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
12049
12050 if (preserve_purgeable &&
12051 src_object->purgable != VM_PURGABLE_DENY) {
12052 new_object->true_share = TRUE;
12053
12054 /* start as non-volatile with no owner... */
12055 new_object->purgable = VM_PURGABLE_NONVOLATILE;
12056 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
12057 /* ... and move to src_object's purgeable state */
12058 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
12059 int state;
12060 state = src_object->purgable;
12061 vm_object_purgable_control(
12062 new_object,
12063 VM_PURGABLE_SET_STATE_FROM_KERNEL,
12064 &state);
12065 }
12066 /* no pmap accounting for purgeable objects */
12067 new_entry->use_pmap = FALSE;
12068 }
12069
12070 vm_object_unlock(new_object);
12071 new_object = VM_OBJECT_NULL;
12072 }
12073
12074 if (result != KERN_SUCCESS &&
12075 result != KERN_MEMORY_RESTART_COPY) {
12076 vm_map_lock(src_map);
12077 RETURN(result);
12078 }
12079
12080 /*
12081 * Throw away the extra reference
12082 */
12083
12084 vm_object_deallocate(src_object);
12085
12086 /*
12087 * Verify that the map has not substantially
12088 * changed while the copy was being made.
12089 */
12090
12091 vm_map_lock(src_map);
12092
12093 if ((version.main_timestamp + 1) == src_map->timestamp) {
12094 /* src_map hasn't changed: src_entry is still valid */
12095 src_entry = saved_src_entry;
12096 goto VerificationSuccessful;
12097 }
12098
12099 /*
12100 * Simple version comparison failed.
12101 *
12102 * Retry the lookup and verify that the
12103 * same object/offset are still present.
12104 *
12105 * [Note: a memory manager that colludes with
12106 * the calling task can detect that we have
12107 * cheated. While the map was unlocked, the
12108 * mapping could have been changed and restored.]
12109 */
12110
12111 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
12112 if (result != KERN_MEMORY_RESTART_COPY) {
12113 vm_object_deallocate(VME_OBJECT(new_entry));
12114 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
12115 /* reset accounting state */
12116 new_entry->iokit_acct = FALSE;
12117 new_entry->use_pmap = TRUE;
12118 }
12119 RETURN(KERN_INVALID_ADDRESS);
12120 }
12121
12122 src_entry = tmp_entry;
12123 vm_map_clip_start(src_map, src_entry, src_start);
12124
12125 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
12126 !use_maxprot) ||
12127 ((src_entry->max_protection & VM_PROT_READ) == 0)) {
12128 goto VerificationFailed;
12129 }
12130
12131 if (src_entry->vme_end < new_entry->vme_end) {
12132 /*
12133 * This entry might have been shortened
12134 * (vm_map_clip_end) or been replaced with
12135 * an entry that ends closer to "src_start"
12136 * than before.
12137 * Adjust "new_entry" accordingly; copying
12138 * less memory would be correct but we also
12139 * redo the copy (see below) if the new entry
12140 * no longer points at the same object/offset.
12141 */
12142 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
12143 VM_MAP_COPY_PAGE_MASK(copy)));
12144 new_entry->vme_end = src_entry->vme_end;
12145 src_size = new_entry->vme_end - src_start;
12146 } else if (src_entry->vme_end > new_entry->vme_end) {
12147 /*
12148 * This entry might have been extended
12149 * (vm_map_entry_simplify() or coalesce)
12150 * or been replaced with an entry that ends farther
12151 * from "src_start" than before.
12152 *
12153 * We've called vm_object_copy_*() only on
12154 * the previous <start:end> range, so we can't
12155 * just extend new_entry. We have to re-do
12156 * the copy based on the new entry as if it was
12157 * pointing at a different object/offset (see
12158 * "Verification failed" below).
12159 */
12160 }
12161
12162 if ((VME_OBJECT(src_entry) != src_object) ||
12163 (VME_OFFSET(src_entry) != src_offset) ||
12164 (src_entry->vme_end > new_entry->vme_end)) {
12165 /*
12166 * Verification failed.
12167 *
12168 * Start over with this top-level entry.
12169 */
12170
12171 VerificationFailed: ;
12172
12173 vm_object_deallocate(VME_OBJECT(new_entry));
12174 tmp_entry = src_entry;
12175 continue;
12176 }
12177
12178 /*
12179 * Verification succeeded.
12180 */
12181
12182 VerificationSuccessful:;
12183
12184 if (result == KERN_MEMORY_RESTART_COPY) {
12185 goto RestartCopy;
12186 }
12187
12188 /*
12189 * Copy succeeded.
12190 */
12191
12192 CopySuccessful: ;
12193
12194 /*
12195 * Link in the new copy entry.
12196 */
12197
12198 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
12199 new_entry);
12200
12201 /*
12202 * Determine whether the entire region
12203 * has been copied.
12204 */
12205 src_base = src_start;
12206 src_start = new_entry->vme_end;
12207 new_entry = VM_MAP_ENTRY_NULL;
12208 while ((src_start >= src_end) && (src_end != 0)) {
12209 submap_map_t *ptr;
12210
12211 if (src_map == base_map) {
12212 /* back to the top */
12213 break;
12214 }
12215
12216 ptr = parent_maps;
12217 assert(ptr != NULL);
12218 parent_maps = parent_maps->next;
12219
12220 /* fix up the damage we did in that submap */
12221 vm_map_simplify_range(src_map,
12222 src_base,
12223 src_end);
12224
12225 vm_map_unlock(src_map);
12226 vm_map_deallocate(src_map);
12227 vm_map_lock(ptr->parent_map);
12228 src_map = ptr->parent_map;
12229 src_base = ptr->base_start;
12230 src_start = ptr->base_start + ptr->base_len;
12231 src_end = ptr->base_end;
12232 if (!vm_map_lookup_entry(src_map,
12233 src_start,
12234 &tmp_entry) &&
12235 (src_end > src_start)) {
12236 RETURN(KERN_INVALID_ADDRESS);
12237 }
12238 kfree(ptr, sizeof(submap_map_t));
12239 if (parent_maps == NULL) {
12240 map_share = FALSE;
12241 }
12242 src_entry = tmp_entry->vme_prev;
12243 }
12244
12245 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
12246 (src_start >= src_addr + len) &&
12247 (src_addr + len != 0)) {
12248 /*
12249 * Stop copying now, even though we haven't reached
12250 * "src_end". We'll adjust the end of the last copy
12251 * entry at the end, if needed.
12252 *
12253 * If src_map's aligment is different from the
12254 * system's page-alignment, there could be
12255 * extra non-map-aligned map entries between
12256 * the original (non-rounded) "src_addr + len"
12257 * and the rounded "src_end".
12258 * We do not want to copy those map entries since
12259 * they're not part of the copied range.
12260 */
12261 break;
12262 }
12263
12264 if ((src_start >= src_end) && (src_end != 0)) {
12265 break;
12266 }
12267
12268 /*
12269 * Verify that there are no gaps in the region
12270 */
12271
12272 tmp_entry = src_entry->vme_next;
12273 if ((tmp_entry->vme_start != src_start) ||
12274 (tmp_entry == vm_map_to_entry(src_map))) {
12275 RETURN(KERN_INVALID_ADDRESS);
12276 }
12277 }
12278
12279 /*
12280 * If the source should be destroyed, do it now, since the
12281 * copy was successful.
12282 */
12283 if (src_destroy) {
12284 (void) vm_map_delete(
12285 src_map,
12286 vm_map_trunc_page(src_addr,
12287 VM_MAP_PAGE_MASK(src_map)),
12288 src_end,
12289 ((src_map == kernel_map) ?
12290 VM_MAP_REMOVE_KUNWIRE :
12291 VM_MAP_REMOVE_NO_FLAGS),
12292 VM_MAP_NULL);
12293 } else {
12294 /* fix up the damage we did in the base map */
12295 vm_map_simplify_range(
12296 src_map,
12297 vm_map_trunc_page(src_addr,
12298 VM_MAP_PAGE_MASK(src_map)),
12299 vm_map_round_page(src_end,
12300 VM_MAP_PAGE_MASK(src_map)));
12301 }
12302
12303 vm_map_unlock(src_map);
12304 tmp_entry = VM_MAP_ENTRY_NULL;
12305
12306 if (VM_MAP_PAGE_SHIFT(src_map) > PAGE_SHIFT &&
12307 VM_MAP_PAGE_SHIFT(src_map) != VM_MAP_COPY_PAGE_SHIFT(copy)) {
12308 vm_map_offset_t original_start, original_offset, original_end;
12309
12310 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
12311
12312 /* adjust alignment of first copy_entry's "vme_start" */
12313 tmp_entry = vm_map_copy_first_entry(copy);
12314 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12315 vm_map_offset_t adjustment;
12316
12317 original_start = tmp_entry->vme_start;
12318 original_offset = VME_OFFSET(tmp_entry);
12319
12320 /* map-align the start of the first copy entry... */
12321 adjustment = (tmp_entry->vme_start -
12322 vm_map_trunc_page(
12323 tmp_entry->vme_start,
12324 VM_MAP_PAGE_MASK(src_map)));
12325 tmp_entry->vme_start -= adjustment;
12326 VME_OFFSET_SET(tmp_entry,
12327 VME_OFFSET(tmp_entry) - adjustment);
12328 copy_addr -= adjustment;
12329 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12330 /* ... adjust for mis-aligned start of copy range */
12331 adjustment =
12332 (vm_map_trunc_page(copy->offset,
12333 PAGE_MASK) -
12334 vm_map_trunc_page(copy->offset,
12335 VM_MAP_PAGE_MASK(src_map)));
12336 if (adjustment) {
12337 assert(page_aligned(adjustment));
12338 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12339 tmp_entry->vme_start += adjustment;
12340 VME_OFFSET_SET(tmp_entry,
12341 (VME_OFFSET(tmp_entry) +
12342 adjustment));
12343 copy_addr += adjustment;
12344 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12345 }
12346
12347 /*
12348 * Assert that the adjustments haven't exposed
12349 * more than was originally copied...
12350 */
12351 assert(tmp_entry->vme_start >= original_start);
12352 assert(VME_OFFSET(tmp_entry) >= original_offset);
12353 /*
12354 * ... and that it did not adjust outside of a
12355 * a single 16K page.
12356 */
12357 assert(vm_map_trunc_page(tmp_entry->vme_start,
12358 VM_MAP_PAGE_MASK(src_map)) ==
12359 vm_map_trunc_page(original_start,
12360 VM_MAP_PAGE_MASK(src_map)));
12361 }
12362
12363 /* adjust alignment of last copy_entry's "vme_end" */
12364 tmp_entry = vm_map_copy_last_entry(copy);
12365 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12366 vm_map_offset_t adjustment;
12367
12368 original_end = tmp_entry->vme_end;
12369
12370 /* map-align the end of the last copy entry... */
12371 tmp_entry->vme_end =
12372 vm_map_round_page(tmp_entry->vme_end,
12373 VM_MAP_PAGE_MASK(src_map));
12374 /* ... adjust for mis-aligned end of copy range */
12375 adjustment =
12376 (vm_map_round_page((copy->offset +
12377 copy->size),
12378 VM_MAP_PAGE_MASK(src_map)) -
12379 vm_map_round_page((copy->offset +
12380 copy->size),
12381 PAGE_MASK));
12382 if (adjustment) {
12383 assert(page_aligned(adjustment));
12384 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12385 tmp_entry->vme_end -= adjustment;
12386 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12387 }
12388
12389 /*
12390 * Assert that the adjustments haven't exposed
12391 * more than was originally copied...
12392 */
12393 assert(tmp_entry->vme_end <= original_end);
12394 /*
12395 * ... and that it did not adjust outside of a
12396 * a single 16K page.
12397 */
12398 assert(vm_map_round_page(tmp_entry->vme_end,
12399 VM_MAP_PAGE_MASK(src_map)) ==
12400 vm_map_round_page(original_end,
12401 VM_MAP_PAGE_MASK(src_map)));
12402 }
12403 }
12404
12405 /* Fix-up start and end points in copy. This is necessary */
12406 /* when the various entries in the copy object were picked */
12407 /* up from different sub-maps */
12408
12409 tmp_entry = vm_map_copy_first_entry(copy);
12410 copy_size = 0; /* compute actual size */
12411 while (tmp_entry != vm_map_copy_to_entry(copy)) {
12412 assert(VM_MAP_PAGE_ALIGNED(
12413 copy_addr + (tmp_entry->vme_end -
12414 tmp_entry->vme_start),
12415 MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12416 assert(VM_MAP_PAGE_ALIGNED(
12417 copy_addr,
12418 MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12419
12420 /*
12421 * The copy_entries will be injected directly into the
12422 * destination map and might not be "map aligned" there...
12423 */
12424 tmp_entry->map_aligned = FALSE;
12425
12426 tmp_entry->vme_end = copy_addr +
12427 (tmp_entry->vme_end - tmp_entry->vme_start);
12428 tmp_entry->vme_start = copy_addr;
12429 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12430 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
12431 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
12432 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
12433 }
12434
12435 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
12436 copy_size < copy->size) {
12437 /*
12438 * The actual size of the VM map copy is smaller than what
12439 * was requested by the caller. This must be because some
12440 * PAGE_SIZE-sized pages are missing at the end of the last
12441 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
12442 * The caller might not have been aware of those missing
12443 * pages and might not want to be aware of it, which is
12444 * fine as long as they don't try to access (and crash on)
12445 * those missing pages.
12446 * Let's adjust the size of the "copy", to avoid failing
12447 * in vm_map_copyout() or vm_map_copy_overwrite().
12448 */
12449 assert(vm_map_round_page(copy_size,
12450 VM_MAP_PAGE_MASK(src_map)) ==
12451 vm_map_round_page(copy->size,
12452 VM_MAP_PAGE_MASK(src_map)));
12453 copy->size = copy_size;
12454 }
12455
12456 *copy_result = copy;
12457 return KERN_SUCCESS;
12458
12459 #undef RETURN
12460 }
12461
12462 kern_return_t
12463 vm_map_copy_extract(
12464 vm_map_t src_map,
12465 vm_map_address_t src_addr,
12466 vm_map_size_t len,
12467 vm_prot_t required_prot,
12468 boolean_t do_copy,
12469 vm_map_copy_t *copy_result, /* OUT */
12470 vm_prot_t *cur_prot, /* OUT */
12471 vm_prot_t *max_prot, /* OUT */
12472 vm_inherit_t inheritance,
12473 vm_map_kernel_flags_t vmk_flags)
12474 {
12475 vm_map_copy_t copy;
12476 kern_return_t kr;
12477
12478 /*
12479 * Check for copies of zero bytes.
12480 */
12481
12482 if (len == 0) {
12483 *copy_result = VM_MAP_COPY_NULL;
12484 return KERN_SUCCESS;
12485 }
12486
12487 /*
12488 * Check that the end address doesn't overflow
12489 */
12490 if (src_addr + len < src_addr) {
12491 return KERN_INVALID_ADDRESS;
12492 }
12493
12494 if (VM_MAP_PAGE_SIZE(src_map) < PAGE_SIZE) {
12495 DEBUG4K_SHARE("src_map %p src_addr 0x%llx src_end 0x%llx\n", src_map, (uint64_t)src_addr, (uint64_t)(src_addr + len));
12496 }
12497
12498 /*
12499 * Allocate a header element for the list.
12500 *
12501 * Use the start and end in the header to
12502 * remember the endpoints prior to rounding.
12503 */
12504
12505 copy = vm_map_copy_allocate();
12506 copy->type = VM_MAP_COPY_ENTRY_LIST;
12507 copy->cpy_hdr.entries_pageable = vmk_flags.vmkf_copy_pageable;
12508
12509 vm_map_store_init(&copy->cpy_hdr);
12510
12511 copy->offset = 0;
12512 copy->size = len;
12513
12514 kr = vm_map_remap_extract(src_map,
12515 src_addr,
12516 len,
12517 required_prot,
12518 do_copy, /* copy */
12519 &copy->cpy_hdr,
12520 cur_prot,
12521 max_prot,
12522 inheritance,
12523 vmk_flags);
12524 if (kr != KERN_SUCCESS) {
12525 vm_map_copy_discard(copy);
12526 return kr;
12527 }
12528 assert((*cur_prot & required_prot) == required_prot);
12529 assert((*max_prot & required_prot) == required_prot);
12530
12531 *copy_result = copy;
12532 return KERN_SUCCESS;
12533 }
12534
12535 /*
12536 * vm_map_copyin_object:
12537 *
12538 * Create a copy object from an object.
12539 * Our caller donates an object reference.
12540 */
12541
12542 kern_return_t
12543 vm_map_copyin_object(
12544 vm_object_t object,
12545 vm_object_offset_t offset, /* offset of region in object */
12546 vm_object_size_t size, /* size of region in object */
12547 vm_map_copy_t *copy_result) /* OUT */
12548 {
12549 vm_map_copy_t copy; /* Resulting copy */
12550
12551 /*
12552 * We drop the object into a special copy object
12553 * that contains the object directly.
12554 */
12555
12556 copy = vm_map_copy_allocate();
12557 copy->type = VM_MAP_COPY_OBJECT;
12558 copy->cpy_object = object;
12559 copy->offset = offset;
12560 copy->size = size;
12561
12562 *copy_result = copy;
12563 return KERN_SUCCESS;
12564 }
12565
12566 static void
12567 vm_map_fork_share(
12568 vm_map_t old_map,
12569 vm_map_entry_t old_entry,
12570 vm_map_t new_map)
12571 {
12572 vm_object_t object;
12573 vm_map_entry_t new_entry;
12574
12575 /*
12576 * New sharing code. New map entry
12577 * references original object. Internal
12578 * objects use asynchronous copy algorithm for
12579 * future copies. First make sure we have
12580 * the right object. If we need a shadow,
12581 * or someone else already has one, then
12582 * make a new shadow and share it.
12583 */
12584
12585 object = VME_OBJECT(old_entry);
12586 if (old_entry->is_sub_map) {
12587 assert(old_entry->wired_count == 0);
12588 #ifndef NO_NESTED_PMAP
12589 if (old_entry->use_pmap) {
12590 kern_return_t result;
12591
12592 result = pmap_nest(new_map->pmap,
12593 (VME_SUBMAP(old_entry))->pmap,
12594 (addr64_t)old_entry->vme_start,
12595 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12596 if (result) {
12597 panic("vm_map_fork_share: pmap_nest failed!");
12598 }
12599 }
12600 #endif /* NO_NESTED_PMAP */
12601 } else if (object == VM_OBJECT_NULL) {
12602 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
12603 old_entry->vme_start));
12604 VME_OFFSET_SET(old_entry, 0);
12605 VME_OBJECT_SET(old_entry, object);
12606 old_entry->use_pmap = TRUE;
12607 // assert(!old_entry->needs_copy);
12608 } else if (object->copy_strategy !=
12609 MEMORY_OBJECT_COPY_SYMMETRIC) {
12610 /*
12611 * We are already using an asymmetric
12612 * copy, and therefore we already have
12613 * the right object.
12614 */
12615
12616 assert(!old_entry->needs_copy);
12617 } else if (old_entry->needs_copy || /* case 1 */
12618 object->shadowed || /* case 2 */
12619 (!object->true_share && /* case 3 */
12620 !old_entry->is_shared &&
12621 (object->vo_size >
12622 (vm_map_size_t)(old_entry->vme_end -
12623 old_entry->vme_start)))) {
12624 /*
12625 * We need to create a shadow.
12626 * There are three cases here.
12627 * In the first case, we need to
12628 * complete a deferred symmetrical
12629 * copy that we participated in.
12630 * In the second and third cases,
12631 * we need to create the shadow so
12632 * that changes that we make to the
12633 * object do not interfere with
12634 * any symmetrical copies which
12635 * have occured (case 2) or which
12636 * might occur (case 3).
12637 *
12638 * The first case is when we had
12639 * deferred shadow object creation
12640 * via the entry->needs_copy mechanism.
12641 * This mechanism only works when
12642 * only one entry points to the source
12643 * object, and we are about to create
12644 * a second entry pointing to the
12645 * same object. The problem is that
12646 * there is no way of mapping from
12647 * an object to the entries pointing
12648 * to it. (Deferred shadow creation
12649 * works with one entry because occurs
12650 * at fault time, and we walk from the
12651 * entry to the object when handling
12652 * the fault.)
12653 *
12654 * The second case is when the object
12655 * to be shared has already been copied
12656 * with a symmetric copy, but we point
12657 * directly to the object without
12658 * needs_copy set in our entry. (This
12659 * can happen because different ranges
12660 * of an object can be pointed to by
12661 * different entries. In particular,
12662 * a single entry pointing to an object
12663 * can be split by a call to vm_inherit,
12664 * which, combined with task_create, can
12665 * result in the different entries
12666 * having different needs_copy values.)
12667 * The shadowed flag in the object allows
12668 * us to detect this case. The problem
12669 * with this case is that if this object
12670 * has or will have shadows, then we
12671 * must not perform an asymmetric copy
12672 * of this object, since such a copy
12673 * allows the object to be changed, which
12674 * will break the previous symmetrical
12675 * copies (which rely upon the object
12676 * not changing). In a sense, the shadowed
12677 * flag says "don't change this object".
12678 * We fix this by creating a shadow
12679 * object for this object, and sharing
12680 * that. This works because we are free
12681 * to change the shadow object (and thus
12682 * to use an asymmetric copy strategy);
12683 * this is also semantically correct,
12684 * since this object is temporary, and
12685 * therefore a copy of the object is
12686 * as good as the object itself. (This
12687 * is not true for permanent objects,
12688 * since the pager needs to see changes,
12689 * which won't happen if the changes
12690 * are made to a copy.)
12691 *
12692 * The third case is when the object
12693 * to be shared has parts sticking
12694 * outside of the entry we're working
12695 * with, and thus may in the future
12696 * be subject to a symmetrical copy.
12697 * (This is a preemptive version of
12698 * case 2.)
12699 */
12700 VME_OBJECT_SHADOW(old_entry,
12701 (vm_map_size_t) (old_entry->vme_end -
12702 old_entry->vme_start));
12703
12704 /*
12705 * If we're making a shadow for other than
12706 * copy on write reasons, then we have
12707 * to remove write permission.
12708 */
12709
12710 if (!old_entry->needs_copy &&
12711 (old_entry->protection & VM_PROT_WRITE)) {
12712 vm_prot_t prot;
12713
12714 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
12715
12716 prot = old_entry->protection & ~VM_PROT_WRITE;
12717
12718 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
12719
12720 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12721 prot |= VM_PROT_EXECUTE;
12722 }
12723
12724
12725 if (old_map->mapped_in_other_pmaps) {
12726 vm_object_pmap_protect(
12727 VME_OBJECT(old_entry),
12728 VME_OFFSET(old_entry),
12729 (old_entry->vme_end -
12730 old_entry->vme_start),
12731 PMAP_NULL,
12732 PAGE_SIZE,
12733 old_entry->vme_start,
12734 prot);
12735 } else {
12736 pmap_protect(old_map->pmap,
12737 old_entry->vme_start,
12738 old_entry->vme_end,
12739 prot);
12740 }
12741 }
12742
12743 old_entry->needs_copy = FALSE;
12744 object = VME_OBJECT(old_entry);
12745 }
12746
12747
12748 /*
12749 * If object was using a symmetric copy strategy,
12750 * change its copy strategy to the default
12751 * asymmetric copy strategy, which is copy_delay
12752 * in the non-norma case and copy_call in the
12753 * norma case. Bump the reference count for the
12754 * new entry.
12755 */
12756
12757 if (old_entry->is_sub_map) {
12758 vm_map_lock(VME_SUBMAP(old_entry));
12759 vm_map_reference(VME_SUBMAP(old_entry));
12760 vm_map_unlock(VME_SUBMAP(old_entry));
12761 } else {
12762 vm_object_lock(object);
12763 vm_object_reference_locked(object);
12764 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12765 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12766 }
12767 vm_object_unlock(object);
12768 }
12769
12770 /*
12771 * Clone the entry, using object ref from above.
12772 * Mark both entries as shared.
12773 */
12774
12775 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
12776 * map or descendants */
12777 vm_map_entry_copy(old_map, new_entry, old_entry);
12778 old_entry->is_shared = TRUE;
12779 new_entry->is_shared = TRUE;
12780
12781 /*
12782 * We're dealing with a shared mapping, so the resulting mapping
12783 * should inherit some of the original mapping's accounting settings.
12784 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12785 * "use_pmap" should stay the same as before (if it hasn't been reset
12786 * to TRUE when we cleared "iokit_acct").
12787 */
12788 assert(!new_entry->iokit_acct);
12789
12790 /*
12791 * If old entry's inheritence is VM_INHERIT_NONE,
12792 * the new entry is for corpse fork, remove the
12793 * write permission from the new entry.
12794 */
12795 if (old_entry->inheritance == VM_INHERIT_NONE) {
12796 new_entry->protection &= ~VM_PROT_WRITE;
12797 new_entry->max_protection &= ~VM_PROT_WRITE;
12798 }
12799
12800 /*
12801 * Insert the entry into the new map -- we
12802 * know we're inserting at the end of the new
12803 * map.
12804 */
12805
12806 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12807 VM_MAP_KERNEL_FLAGS_NONE);
12808
12809 /*
12810 * Update the physical map
12811 */
12812
12813 if (old_entry->is_sub_map) {
12814 /* Bill Angell pmap support goes here */
12815 } else {
12816 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12817 old_entry->vme_end - old_entry->vme_start,
12818 old_entry->vme_start);
12819 }
12820 }
12821
12822 static boolean_t
12823 vm_map_fork_copy(
12824 vm_map_t old_map,
12825 vm_map_entry_t *old_entry_p,
12826 vm_map_t new_map,
12827 int vm_map_copyin_flags)
12828 {
12829 vm_map_entry_t old_entry = *old_entry_p;
12830 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12831 vm_map_offset_t start = old_entry->vme_start;
12832 vm_map_copy_t copy;
12833 vm_map_entry_t last = vm_map_last_entry(new_map);
12834
12835 vm_map_unlock(old_map);
12836 /*
12837 * Use maxprot version of copyin because we
12838 * care about whether this memory can ever
12839 * be accessed, not just whether it's accessible
12840 * right now.
12841 */
12842 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12843 if (vm_map_copyin_internal(old_map, start, entry_size,
12844 vm_map_copyin_flags, &copy)
12845 != KERN_SUCCESS) {
12846 /*
12847 * The map might have changed while it
12848 * was unlocked, check it again. Skip
12849 * any blank space or permanently
12850 * unreadable region.
12851 */
12852 vm_map_lock(old_map);
12853 if (!vm_map_lookup_entry(old_map, start, &last) ||
12854 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12855 last = last->vme_next;
12856 }
12857 *old_entry_p = last;
12858
12859 /*
12860 * XXX For some error returns, want to
12861 * XXX skip to the next element. Note
12862 * that INVALID_ADDRESS and
12863 * PROTECTION_FAILURE are handled above.
12864 */
12865
12866 return FALSE;
12867 }
12868
12869 /*
12870 * Assert that the vm_map_copy is coming from the right
12871 * zone and hasn't been forged
12872 */
12873 vm_map_copy_require(copy);
12874
12875 /*
12876 * Insert the copy into the new map
12877 */
12878 vm_map_copy_insert(new_map, last, copy);
12879
12880 /*
12881 * Pick up the traversal at the end of
12882 * the copied region.
12883 */
12884
12885 vm_map_lock(old_map);
12886 start += entry_size;
12887 if (!vm_map_lookup_entry(old_map, start, &last)) {
12888 last = last->vme_next;
12889 } else {
12890 if (last->vme_start == start) {
12891 /*
12892 * No need to clip here and we don't
12893 * want to cause any unnecessary
12894 * unnesting...
12895 */
12896 } else {
12897 vm_map_clip_start(old_map, last, start);
12898 }
12899 }
12900 *old_entry_p = last;
12901
12902 return TRUE;
12903 }
12904
12905 /*
12906 * vm_map_fork:
12907 *
12908 * Create and return a new map based on the old
12909 * map, according to the inheritance values on the
12910 * regions in that map and the options.
12911 *
12912 * The source map must not be locked.
12913 */
12914 vm_map_t
12915 vm_map_fork(
12916 ledger_t ledger,
12917 vm_map_t old_map,
12918 int options)
12919 {
12920 pmap_t new_pmap;
12921 vm_map_t new_map;
12922 vm_map_entry_t old_entry;
12923 vm_map_size_t new_size = 0, entry_size;
12924 vm_map_entry_t new_entry;
12925 boolean_t src_needs_copy;
12926 boolean_t new_entry_needs_copy;
12927 boolean_t pmap_is64bit;
12928 int vm_map_copyin_flags;
12929 vm_inherit_t old_entry_inheritance;
12930 int map_create_options;
12931 kern_return_t footprint_collect_kr;
12932
12933 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
12934 VM_MAP_FORK_PRESERVE_PURGEABLE |
12935 VM_MAP_FORK_CORPSE_FOOTPRINT)) {
12936 /* unsupported option */
12937 return VM_MAP_NULL;
12938 }
12939
12940 pmap_is64bit =
12941 #if defined(__i386__) || defined(__x86_64__)
12942 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
12943 #elif defined(__arm64__)
12944 old_map->pmap->max == MACH_VM_MAX_ADDRESS;
12945 #elif defined(__arm__)
12946 FALSE;
12947 #else
12948 #error Unknown architecture.
12949 #endif
12950
12951 unsigned int pmap_flags = 0;
12952 pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
12953 #if defined(HAS_APPLE_PAC)
12954 pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
12955 #endif
12956 #if PMAP_CREATE_FORCE_4K_PAGES
12957 if (VM_MAP_PAGE_SIZE(old_map) == FOURK_PAGE_SIZE &&
12958 PAGE_SIZE != FOURK_PAGE_SIZE) {
12959 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
12960 }
12961 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
12962 new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
12963
12964 vm_map_reference_swap(old_map);
12965 vm_map_lock(old_map);
12966
12967 map_create_options = 0;
12968 if (old_map->hdr.entries_pageable) {
12969 map_create_options |= VM_MAP_CREATE_PAGEABLE;
12970 }
12971 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12972 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12973 footprint_collect_kr = KERN_SUCCESS;
12974 }
12975 new_map = vm_map_create_options(new_pmap,
12976 old_map->min_offset,
12977 old_map->max_offset,
12978 map_create_options);
12979 /* inherit cs_enforcement */
12980 vm_map_cs_enforcement_set(new_map, old_map->cs_enforcement);
12981 vm_map_lock(new_map);
12982 vm_commit_pagezero_status(new_map);
12983 /* inherit the parent map's page size */
12984 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
12985 for (
12986 old_entry = vm_map_first_entry(old_map);
12987 old_entry != vm_map_to_entry(old_map);
12988 ) {
12989 entry_size = old_entry->vme_end - old_entry->vme_start;
12990
12991 old_entry_inheritance = old_entry->inheritance;
12992 /*
12993 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12994 * share VM_INHERIT_NONE entries that are not backed by a
12995 * device pager.
12996 */
12997 if (old_entry_inheritance == VM_INHERIT_NONE &&
12998 (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
12999 (old_entry->protection & VM_PROT_READ) &&
13000 !(!old_entry->is_sub_map &&
13001 VME_OBJECT(old_entry) != NULL &&
13002 VME_OBJECT(old_entry)->pager != NULL &&
13003 is_device_pager_ops(
13004 VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
13005 old_entry_inheritance = VM_INHERIT_SHARE;
13006 }
13007
13008 if (old_entry_inheritance != VM_INHERIT_NONE &&
13009 (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
13010 footprint_collect_kr == KERN_SUCCESS) {
13011 /*
13012 * The corpse won't have old_map->pmap to query
13013 * footprint information, so collect that data now
13014 * and store it in new_map->vmmap_corpse_footprint
13015 * for later autopsy.
13016 */
13017 footprint_collect_kr =
13018 vm_map_corpse_footprint_collect(old_map,
13019 old_entry,
13020 new_map);
13021 }
13022
13023 switch (old_entry_inheritance) {
13024 case VM_INHERIT_NONE:
13025 break;
13026
13027 case VM_INHERIT_SHARE:
13028 vm_map_fork_share(old_map, old_entry, new_map);
13029 new_size += entry_size;
13030 break;
13031
13032 case VM_INHERIT_COPY:
13033
13034 /*
13035 * Inline the copy_quickly case;
13036 * upon failure, fall back on call
13037 * to vm_map_fork_copy.
13038 */
13039
13040 if (old_entry->is_sub_map) {
13041 break;
13042 }
13043 if ((old_entry->wired_count != 0) ||
13044 ((VME_OBJECT(old_entry) != NULL) &&
13045 (VME_OBJECT(old_entry)->true_share))) {
13046 goto slow_vm_map_fork_copy;
13047 }
13048
13049 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
13050 vm_map_entry_copy(old_map, new_entry, old_entry);
13051
13052 if (new_entry->used_for_jit == TRUE && new_map->jit_entry_exists == FALSE) {
13053 new_map->jit_entry_exists = TRUE;
13054 }
13055
13056 if (new_entry->is_sub_map) {
13057 /* clear address space specifics */
13058 new_entry->use_pmap = FALSE;
13059 } else {
13060 /*
13061 * We're dealing with a copy-on-write operation,
13062 * so the resulting mapping should not inherit
13063 * the original mapping's accounting settings.
13064 * "iokit_acct" should have been cleared in
13065 * vm_map_entry_copy().
13066 * "use_pmap" should be reset to its default
13067 * (TRUE) so that the new mapping gets
13068 * accounted for in the task's memory footprint.
13069 */
13070 assert(!new_entry->iokit_acct);
13071 new_entry->use_pmap = TRUE;
13072 }
13073
13074 if (!vm_object_copy_quickly(
13075 VME_OBJECT_PTR(new_entry),
13076 VME_OFFSET(old_entry),
13077 (old_entry->vme_end -
13078 old_entry->vme_start),
13079 &src_needs_copy,
13080 &new_entry_needs_copy)) {
13081 vm_map_entry_dispose(new_map, new_entry);
13082 goto slow_vm_map_fork_copy;
13083 }
13084
13085 /*
13086 * Handle copy-on-write obligations
13087 */
13088
13089 if (src_needs_copy && !old_entry->needs_copy) {
13090 vm_prot_t prot;
13091
13092 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
13093
13094 prot = old_entry->protection & ~VM_PROT_WRITE;
13095
13096 if (override_nx(old_map, VME_ALIAS(old_entry))
13097 && prot) {
13098 prot |= VM_PROT_EXECUTE;
13099 }
13100
13101 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
13102
13103 vm_object_pmap_protect(
13104 VME_OBJECT(old_entry),
13105 VME_OFFSET(old_entry),
13106 (old_entry->vme_end -
13107 old_entry->vme_start),
13108 ((old_entry->is_shared
13109 || old_map->mapped_in_other_pmaps)
13110 ? PMAP_NULL :
13111 old_map->pmap),
13112 VM_MAP_PAGE_SIZE(old_map),
13113 old_entry->vme_start,
13114 prot);
13115
13116 assert(old_entry->wired_count == 0);
13117 old_entry->needs_copy = TRUE;
13118 }
13119 new_entry->needs_copy = new_entry_needs_copy;
13120
13121 /*
13122 * Insert the entry at the end
13123 * of the map.
13124 */
13125
13126 vm_map_store_entry_link(new_map,
13127 vm_map_last_entry(new_map),
13128 new_entry,
13129 VM_MAP_KERNEL_FLAGS_NONE);
13130 new_size += entry_size;
13131 break;
13132
13133 slow_vm_map_fork_copy:
13134 vm_map_copyin_flags = 0;
13135 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
13136 vm_map_copyin_flags |=
13137 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
13138 }
13139 if (vm_map_fork_copy(old_map,
13140 &old_entry,
13141 new_map,
13142 vm_map_copyin_flags)) {
13143 new_size += entry_size;
13144 }
13145 continue;
13146 }
13147 old_entry = old_entry->vme_next;
13148 }
13149
13150 #if defined(__arm64__)
13151 pmap_insert_sharedpage(new_map->pmap);
13152 #endif /* __arm64__ */
13153
13154 new_map->size = new_size;
13155
13156 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
13157 vm_map_corpse_footprint_collect_done(new_map);
13158 }
13159
13160 /* Propagate JIT entitlement for the pmap layer. */
13161 if (pmap_get_jit_entitled(old_map->pmap)) {
13162 /* Tell the pmap that it supports JIT. */
13163 pmap_set_jit_entitled(new_map->pmap);
13164 }
13165
13166 vm_map_unlock(new_map);
13167 vm_map_unlock(old_map);
13168 vm_map_deallocate(old_map);
13169
13170 return new_map;
13171 }
13172
13173 /*
13174 * vm_map_exec:
13175 *
13176 * Setup the "new_map" with the proper execution environment according
13177 * to the type of executable (platform, 64bit, chroot environment).
13178 * Map the comm page and shared region, etc...
13179 */
13180 kern_return_t
13181 vm_map_exec(
13182 vm_map_t new_map,
13183 task_t task,
13184 boolean_t is64bit,
13185 void *fsroot,
13186 cpu_type_t cpu,
13187 cpu_subtype_t cpu_subtype,
13188 boolean_t reslide)
13189 {
13190 SHARED_REGION_TRACE_DEBUG(
13191 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
13192 (void *)VM_KERNEL_ADDRPERM(current_task()),
13193 (void *)VM_KERNEL_ADDRPERM(new_map),
13194 (void *)VM_KERNEL_ADDRPERM(task),
13195 (void *)VM_KERNEL_ADDRPERM(fsroot),
13196 cpu,
13197 cpu_subtype));
13198 (void) vm_commpage_enter(new_map, task, is64bit);
13199
13200 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype, reslide);
13201
13202 SHARED_REGION_TRACE_DEBUG(
13203 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
13204 (void *)VM_KERNEL_ADDRPERM(current_task()),
13205 (void *)VM_KERNEL_ADDRPERM(new_map),
13206 (void *)VM_KERNEL_ADDRPERM(task),
13207 (void *)VM_KERNEL_ADDRPERM(fsroot),
13208 cpu,
13209 cpu_subtype));
13210
13211 /*
13212 * Some devices have region(s) of memory that shouldn't get allocated by
13213 * user processes. The following code creates dummy vm_map_entry_t's for each
13214 * of the regions that needs to be reserved to prevent any allocations in
13215 * those regions.
13216 */
13217 kern_return_t kr = KERN_FAILURE;
13218 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
13219 vmk_flags.vmkf_permanent = TRUE;
13220 vmk_flags.vmkf_beyond_max = TRUE;
13221
13222 struct vm_reserved_region *regions = NULL;
13223 size_t num_regions = ml_get_vm_reserved_regions(is64bit, &regions);
13224 assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
13225
13226 for (size_t i = 0; i < num_regions; ++i) {
13227 kr = vm_map_enter(
13228 new_map,
13229 &regions[i].vmrr_addr,
13230 regions[i].vmrr_size,
13231 (vm_map_offset_t)0,
13232 VM_FLAGS_FIXED,
13233 vmk_flags,
13234 VM_KERN_MEMORY_NONE,
13235 VM_OBJECT_NULL,
13236 (vm_object_offset_t)0,
13237 FALSE,
13238 VM_PROT_NONE,
13239 VM_PROT_NONE,
13240 VM_INHERIT_NONE);
13241
13242 if (kr != KERN_SUCCESS) {
13243 panic("Failed to reserve %s region in user map %p %d", regions[i].vmrr_name, new_map, kr);
13244 }
13245 }
13246
13247 new_map->reserved_regions = (num_regions ? TRUE : FALSE);
13248
13249 return KERN_SUCCESS;
13250 }
13251
13252 /*
13253 * vm_map_lookup_locked:
13254 *
13255 * Finds the VM object, offset, and
13256 * protection for a given virtual address in the
13257 * specified map, assuming a page fault of the
13258 * type specified.
13259 *
13260 * Returns the (object, offset, protection) for
13261 * this address, whether it is wired down, and whether
13262 * this map has the only reference to the data in question.
13263 * In order to later verify this lookup, a "version"
13264 * is returned.
13265 * If contended != NULL, *contended will be set to
13266 * true iff the thread had to spin or block to acquire
13267 * an exclusive lock.
13268 *
13269 * The map MUST be locked by the caller and WILL be
13270 * locked on exit. In order to guarantee the
13271 * existence of the returned object, it is returned
13272 * locked.
13273 *
13274 * If a lookup is requested with "write protection"
13275 * specified, the map may be changed to perform virtual
13276 * copying operations, although the data referenced will
13277 * remain the same.
13278 */
13279 kern_return_t
13280 vm_map_lookup_locked(
13281 vm_map_t *var_map, /* IN/OUT */
13282 vm_map_offset_t vaddr,
13283 vm_prot_t fault_type,
13284 int object_lock_type,
13285 vm_map_version_t *out_version, /* OUT */
13286 vm_object_t *object, /* OUT */
13287 vm_object_offset_t *offset, /* OUT */
13288 vm_prot_t *out_prot, /* OUT */
13289 boolean_t *wired, /* OUT */
13290 vm_object_fault_info_t fault_info, /* OUT */
13291 vm_map_t *real_map, /* OUT */
13292 bool *contended) /* OUT */
13293 {
13294 vm_map_entry_t entry;
13295 vm_map_t map = *var_map;
13296 vm_map_t old_map = *var_map;
13297 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
13298 vm_map_offset_t cow_parent_vaddr = 0;
13299 vm_map_offset_t old_start = 0;
13300 vm_map_offset_t old_end = 0;
13301 vm_prot_t prot;
13302 boolean_t mask_protections;
13303 boolean_t force_copy;
13304 boolean_t no_force_copy_if_executable;
13305 vm_prot_t original_fault_type;
13306 vm_map_size_t fault_page_mask;
13307
13308 /*
13309 * VM_PROT_MASK means that the caller wants us to use "fault_type"
13310 * as a mask against the mapping's actual protections, not as an
13311 * absolute value.
13312 */
13313 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
13314 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
13315 no_force_copy_if_executable = (fault_type & VM_PROT_COPY_FAIL_IF_EXECUTABLE) ? TRUE : FALSE;
13316 fault_type &= VM_PROT_ALL;
13317 original_fault_type = fault_type;
13318 if (contended) {
13319 *contended = false;
13320 }
13321
13322 *real_map = map;
13323
13324 fault_page_mask = MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK);
13325 vaddr = VM_MAP_TRUNC_PAGE(vaddr, fault_page_mask);
13326
13327 RetryLookup:
13328 fault_type = original_fault_type;
13329
13330 /*
13331 * If the map has an interesting hint, try it before calling
13332 * full blown lookup routine.
13333 */
13334 entry = map->hint;
13335
13336 if ((entry == vm_map_to_entry(map)) ||
13337 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
13338 vm_map_entry_t tmp_entry;
13339
13340 /*
13341 * Entry was either not a valid hint, or the vaddr
13342 * was not contained in the entry, so do a full lookup.
13343 */
13344 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
13345 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13346 vm_map_unlock(cow_sub_map_parent);
13347 }
13348 if ((*real_map != map)
13349 && (*real_map != cow_sub_map_parent)) {
13350 vm_map_unlock(*real_map);
13351 }
13352 return KERN_INVALID_ADDRESS;
13353 }
13354
13355 entry = tmp_entry;
13356 }
13357 if (map == old_map) {
13358 old_start = entry->vme_start;
13359 old_end = entry->vme_end;
13360 }
13361
13362 /*
13363 * Handle submaps. Drop lock on upper map, submap is
13364 * returned locked.
13365 */
13366
13367 submap_recurse:
13368 if (entry->is_sub_map) {
13369 vm_map_offset_t local_vaddr;
13370 vm_map_offset_t end_delta;
13371 vm_map_offset_t start_delta;
13372 vm_map_entry_t submap_entry, saved_submap_entry;
13373 vm_object_offset_t submap_entry_offset;
13374 vm_object_size_t submap_entry_size;
13375 vm_prot_t subentry_protection;
13376 vm_prot_t subentry_max_protection;
13377 boolean_t subentry_no_copy_on_read;
13378 boolean_t mapped_needs_copy = FALSE;
13379 vm_map_version_t version;
13380
13381 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
13382 "map %p (%d) entry %p submap %p (%d)\n",
13383 map, VM_MAP_PAGE_SHIFT(map), entry,
13384 VME_SUBMAP(entry), VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
13385
13386 local_vaddr = vaddr;
13387
13388 if ((entry->use_pmap &&
13389 !((fault_type & VM_PROT_WRITE) ||
13390 force_copy))) {
13391 /* if real_map equals map we unlock below */
13392 if ((*real_map != map) &&
13393 (*real_map != cow_sub_map_parent)) {
13394 vm_map_unlock(*real_map);
13395 }
13396 *real_map = VME_SUBMAP(entry);
13397 }
13398
13399 if (entry->needs_copy &&
13400 ((fault_type & VM_PROT_WRITE) ||
13401 force_copy)) {
13402 if (!mapped_needs_copy) {
13403 if (vm_map_lock_read_to_write(map)) {
13404 vm_map_lock_read(map);
13405 *real_map = map;
13406 goto RetryLookup;
13407 }
13408 vm_map_lock_read(VME_SUBMAP(entry));
13409 *var_map = VME_SUBMAP(entry);
13410 cow_sub_map_parent = map;
13411 /* reset base to map before cow object */
13412 /* this is the map which will accept */
13413 /* the new cow object */
13414 old_start = entry->vme_start;
13415 old_end = entry->vme_end;
13416 cow_parent_vaddr = vaddr;
13417 mapped_needs_copy = TRUE;
13418 } else {
13419 vm_map_lock_read(VME_SUBMAP(entry));
13420 *var_map = VME_SUBMAP(entry);
13421 if ((cow_sub_map_parent != map) &&
13422 (*real_map != map)) {
13423 vm_map_unlock(map);
13424 }
13425 }
13426 } else {
13427 vm_map_lock_read(VME_SUBMAP(entry));
13428 *var_map = VME_SUBMAP(entry);
13429 /* leave map locked if it is a target */
13430 /* cow sub_map above otherwise, just */
13431 /* follow the maps down to the object */
13432 /* here we unlock knowing we are not */
13433 /* revisiting the map. */
13434 if ((*real_map != map) && (map != cow_sub_map_parent)) {
13435 vm_map_unlock_read(map);
13436 }
13437 }
13438
13439 map = *var_map;
13440
13441 /* calculate the offset in the submap for vaddr */
13442 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
13443 assertf(VM_MAP_PAGE_ALIGNED(local_vaddr, fault_page_mask),
13444 "local_vaddr 0x%llx entry->vme_start 0x%llx fault_page_mask 0x%llx\n",
13445 (uint64_t)local_vaddr, (uint64_t)entry->vme_start, (uint64_t)fault_page_mask);
13446
13447 RetrySubMap:
13448 if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
13449 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13450 vm_map_unlock(cow_sub_map_parent);
13451 }
13452 if ((*real_map != map)
13453 && (*real_map != cow_sub_map_parent)) {
13454 vm_map_unlock(*real_map);
13455 }
13456 *real_map = map;
13457 return KERN_INVALID_ADDRESS;
13458 }
13459
13460 /* find the attenuated shadow of the underlying object */
13461 /* on our target map */
13462
13463 /* in english the submap object may extend beyond the */
13464 /* region mapped by the entry or, may only fill a portion */
13465 /* of it. For our purposes, we only care if the object */
13466 /* doesn't fill. In this case the area which will */
13467 /* ultimately be clipped in the top map will only need */
13468 /* to be as big as the portion of the underlying entry */
13469 /* which is mapped */
13470 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
13471 submap_entry->vme_start - VME_OFFSET(entry) : 0;
13472
13473 end_delta =
13474 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
13475 submap_entry->vme_end ?
13476 0 : (VME_OFFSET(entry) +
13477 (old_end - old_start))
13478 - submap_entry->vme_end;
13479
13480 old_start += start_delta;
13481 old_end -= end_delta;
13482
13483 if (submap_entry->is_sub_map) {
13484 entry = submap_entry;
13485 vaddr = local_vaddr;
13486 goto submap_recurse;
13487 }
13488
13489 if (((fault_type & VM_PROT_WRITE) ||
13490 force_copy)
13491 && cow_sub_map_parent) {
13492 vm_object_t sub_object, copy_object;
13493 vm_object_offset_t copy_offset;
13494 vm_map_offset_t local_start;
13495 vm_map_offset_t local_end;
13496 boolean_t copied_slowly = FALSE;
13497 vm_object_offset_t copied_slowly_phys_offset = 0;
13498 kern_return_t kr = KERN_SUCCESS;
13499
13500 if (vm_map_lock_read_to_write(map)) {
13501 vm_map_lock_read(map);
13502 old_start -= start_delta;
13503 old_end += end_delta;
13504 goto RetrySubMap;
13505 }
13506
13507
13508 sub_object = VME_OBJECT(submap_entry);
13509 if (sub_object == VM_OBJECT_NULL) {
13510 sub_object =
13511 vm_object_allocate(
13512 (vm_map_size_t)
13513 (submap_entry->vme_end -
13514 submap_entry->vme_start));
13515 VME_OBJECT_SET(submap_entry, sub_object);
13516 VME_OFFSET_SET(submap_entry, 0);
13517 assert(!submap_entry->is_sub_map);
13518 assert(submap_entry->use_pmap);
13519 }
13520 local_start = local_vaddr -
13521 (cow_parent_vaddr - old_start);
13522 local_end = local_vaddr +
13523 (old_end - cow_parent_vaddr);
13524 vm_map_clip_start(map, submap_entry, local_start);
13525 vm_map_clip_end(map, submap_entry, local_end);
13526 if (submap_entry->is_sub_map) {
13527 /* unnesting was done when clipping */
13528 assert(!submap_entry->use_pmap);
13529 }
13530
13531 /* This is the COW case, lets connect */
13532 /* an entry in our space to the underlying */
13533 /* object in the submap, bypassing the */
13534 /* submap. */
13535
13536 if (submap_entry->wired_count != 0 ||
13537 (sub_object->copy_strategy !=
13538 MEMORY_OBJECT_COPY_SYMMETRIC)) {
13539 if ((submap_entry->protection & VM_PROT_EXECUTE) &&
13540 no_force_copy_if_executable) {
13541 // printf("FBDP map %p entry %p start 0x%llx end 0x%llx wired %d strat %d\n", map, submap_entry, (uint64_t)local_start, (uint64_t)local_end, submap_entry->wired_count, sub_object->copy_strategy);
13542 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13543 vm_map_unlock(cow_sub_map_parent);
13544 }
13545 if ((*real_map != map)
13546 && (*real_map != cow_sub_map_parent)) {
13547 vm_map_unlock(*real_map);
13548 }
13549 *real_map = map;
13550 vm_map_lock_write_to_read(map);
13551 kr = KERN_PROTECTION_FAILURE;
13552 DTRACE_VM4(submap_no_copy_executable,
13553 vm_map_t, map,
13554 vm_object_offset_t, submap_entry_offset,
13555 vm_object_size_t, submap_entry_size,
13556 int, kr);
13557 return kr;
13558 }
13559
13560 vm_object_reference(sub_object);
13561
13562 assertf(VM_MAP_PAGE_ALIGNED(VME_OFFSET(submap_entry), VM_MAP_PAGE_MASK(map)),
13563 "submap_entry %p offset 0x%llx\n",
13564 submap_entry, VME_OFFSET(submap_entry));
13565 submap_entry_offset = VME_OFFSET(submap_entry);
13566 submap_entry_size = submap_entry->vme_end - submap_entry->vme_start;
13567
13568 DTRACE_VM6(submap_copy_slowly,
13569 vm_map_t, cow_sub_map_parent,
13570 vm_map_offset_t, vaddr,
13571 vm_map_t, map,
13572 vm_object_size_t, submap_entry_size,
13573 int, submap_entry->wired_count,
13574 int, sub_object->copy_strategy);
13575
13576 saved_submap_entry = submap_entry;
13577 version.main_timestamp = map->timestamp;
13578 vm_map_unlock(map); /* Increments timestamp by 1 */
13579 submap_entry = VM_MAP_ENTRY_NULL;
13580
13581 vm_object_lock(sub_object);
13582 kr = vm_object_copy_slowly(sub_object,
13583 submap_entry_offset,
13584 submap_entry_size,
13585 FALSE,
13586 &copy_object);
13587 copied_slowly = TRUE;
13588 /* 4k: account for extra offset in physical page */
13589 copied_slowly_phys_offset = submap_entry_offset - vm_object_trunc_page(submap_entry_offset);
13590 vm_object_deallocate(sub_object);
13591
13592 vm_map_lock(map);
13593
13594 if (kr != KERN_SUCCESS &&
13595 kr != KERN_MEMORY_RESTART_COPY) {
13596 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13597 vm_map_unlock(cow_sub_map_parent);
13598 }
13599 if ((*real_map != map)
13600 && (*real_map != cow_sub_map_parent)) {
13601 vm_map_unlock(*real_map);
13602 }
13603 *real_map = map;
13604 vm_object_deallocate(copy_object);
13605 copy_object = VM_OBJECT_NULL;
13606 vm_map_lock_write_to_read(map);
13607 DTRACE_VM4(submap_copy_slowly,
13608 vm_object_t, sub_object,
13609 vm_object_offset_t, submap_entry_offset,
13610 vm_object_size_t, submap_entry_size,
13611 int, kr);
13612 return kr;
13613 }
13614
13615 if ((kr == KERN_SUCCESS) &&
13616 (version.main_timestamp + 1) == map->timestamp) {
13617 submap_entry = saved_submap_entry;
13618 } else {
13619 saved_submap_entry = NULL;
13620 old_start -= start_delta;
13621 old_end += end_delta;
13622 vm_object_deallocate(copy_object);
13623 copy_object = VM_OBJECT_NULL;
13624 vm_map_lock_write_to_read(map);
13625 goto RetrySubMap;
13626 }
13627 } else {
13628 /* set up shadow object */
13629 copy_object = sub_object;
13630 vm_object_lock(sub_object);
13631 vm_object_reference_locked(sub_object);
13632 sub_object->shadowed = TRUE;
13633 vm_object_unlock(sub_object);
13634
13635 assert(submap_entry->wired_count == 0);
13636 submap_entry->needs_copy = TRUE;
13637
13638 prot = submap_entry->protection;
13639 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13640 prot = prot & ~VM_PROT_WRITE;
13641 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13642
13643 if (override_nx(old_map,
13644 VME_ALIAS(submap_entry))
13645 && prot) {
13646 prot |= VM_PROT_EXECUTE;
13647 }
13648
13649 vm_object_pmap_protect(
13650 sub_object,
13651 VME_OFFSET(submap_entry),
13652 submap_entry->vme_end -
13653 submap_entry->vme_start,
13654 (submap_entry->is_shared
13655 || map->mapped_in_other_pmaps) ?
13656 PMAP_NULL : map->pmap,
13657 VM_MAP_PAGE_SIZE(map),
13658 submap_entry->vme_start,
13659 prot);
13660 }
13661
13662 /*
13663 * Adjust the fault offset to the submap entry.
13664 */
13665 copy_offset = (local_vaddr -
13666 submap_entry->vme_start +
13667 VME_OFFSET(submap_entry));
13668
13669 /* This works diffently than the */
13670 /* normal submap case. We go back */
13671 /* to the parent of the cow map and*/
13672 /* clip out the target portion of */
13673 /* the sub_map, substituting the */
13674 /* new copy object, */
13675
13676 subentry_protection = submap_entry->protection;
13677 subentry_max_protection = submap_entry->max_protection;
13678 subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
13679 vm_map_unlock(map);
13680 submap_entry = NULL; /* not valid after map unlock */
13681
13682 local_start = old_start;
13683 local_end = old_end;
13684 map = cow_sub_map_parent;
13685 *var_map = cow_sub_map_parent;
13686 vaddr = cow_parent_vaddr;
13687 cow_sub_map_parent = NULL;
13688
13689 if (!vm_map_lookup_entry(map,
13690 vaddr, &entry)) {
13691 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13692 vm_map_unlock(cow_sub_map_parent);
13693 }
13694 if ((*real_map != map)
13695 && (*real_map != cow_sub_map_parent)) {
13696 vm_map_unlock(*real_map);
13697 }
13698 *real_map = map;
13699 vm_object_deallocate(
13700 copy_object);
13701 copy_object = VM_OBJECT_NULL;
13702 vm_map_lock_write_to_read(map);
13703 DTRACE_VM4(submap_lookup_post_unlock,
13704 uint64_t, (uint64_t)entry->vme_start,
13705 uint64_t, (uint64_t)entry->vme_end,
13706 vm_map_offset_t, vaddr,
13707 int, copied_slowly);
13708 return KERN_INVALID_ADDRESS;
13709 }
13710
13711 /* clip out the portion of space */
13712 /* mapped by the sub map which */
13713 /* corresponds to the underlying */
13714 /* object */
13715
13716 /*
13717 * Clip (and unnest) the smallest nested chunk
13718 * possible around the faulting address...
13719 */
13720 local_start = vaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
13721 local_end = local_start + pmap_shared_region_size_min(map->pmap);
13722 /*
13723 * ... but don't go beyond the "old_start" to "old_end"
13724 * range, to avoid spanning over another VM region
13725 * with a possibly different VM object and/or offset.
13726 */
13727 if (local_start < old_start) {
13728 local_start = old_start;
13729 }
13730 if (local_end > old_end) {
13731 local_end = old_end;
13732 }
13733 /*
13734 * Adjust copy_offset to the start of the range.
13735 */
13736 copy_offset -= (vaddr - local_start);
13737
13738 vm_map_clip_start(map, entry, local_start);
13739 vm_map_clip_end(map, entry, local_end);
13740 if (entry->is_sub_map) {
13741 /* unnesting was done when clipping */
13742 assert(!entry->use_pmap);
13743 }
13744
13745 /* substitute copy object for */
13746 /* shared map entry */
13747 vm_map_deallocate(VME_SUBMAP(entry));
13748 assert(!entry->iokit_acct);
13749 entry->is_sub_map = FALSE;
13750 entry->use_pmap = TRUE;
13751 VME_OBJECT_SET(entry, copy_object);
13752
13753 /* propagate the submap entry's protections */
13754 if (entry->protection != VM_PROT_READ) {
13755 /*
13756 * Someone has already altered the top entry's
13757 * protections via vm_protect(VM_PROT_COPY).
13758 * Respect these new values and ignore the
13759 * submap entry's protections.
13760 */
13761 } else {
13762 /*
13763 * Regular copy-on-write: propagate the submap
13764 * entry's protections to the top map entry.
13765 */
13766 entry->protection |= subentry_protection;
13767 }
13768 entry->max_protection |= subentry_max_protection;
13769 /* propagate no_copy_on_read */
13770 entry->vme_no_copy_on_read = subentry_no_copy_on_read;
13771
13772 if ((entry->protection & VM_PROT_WRITE) &&
13773 (entry->protection & VM_PROT_EXECUTE) &&
13774 #if XNU_TARGET_OS_OSX
13775 map->pmap != kernel_pmap &&
13776 (vm_map_cs_enforcement(map)
13777 #if __arm64__
13778 || !VM_MAP_IS_EXOTIC(map)
13779 #endif /* __arm64__ */
13780 ) &&
13781 #endif /* XNU_TARGET_OS_OSX */
13782 #if PMAP_CS
13783 !pmap_cs_exempt(map->pmap) &&
13784 #endif
13785 !(entry->used_for_jit) &&
13786 VM_MAP_POLICY_WX_STRIP_X(map)) {
13787 DTRACE_VM3(cs_wx,
13788 uint64_t, (uint64_t)entry->vme_start,
13789 uint64_t, (uint64_t)entry->vme_end,
13790 vm_prot_t, entry->protection);
13791 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13792 proc_selfpid(),
13793 (current_task()->bsd_info
13794 ? proc_name_address(current_task()->bsd_info)
13795 : "?"),
13796 __FUNCTION__);
13797 entry->protection &= ~VM_PROT_EXECUTE;
13798 }
13799
13800 if (copied_slowly) {
13801 VME_OFFSET_SET(entry, local_start - old_start + copied_slowly_phys_offset);
13802 entry->needs_copy = FALSE;
13803 entry->is_shared = FALSE;
13804 } else {
13805 VME_OFFSET_SET(entry, copy_offset);
13806 assert(entry->wired_count == 0);
13807 entry->needs_copy = TRUE;
13808 if (entry->inheritance == VM_INHERIT_SHARE) {
13809 entry->inheritance = VM_INHERIT_COPY;
13810 }
13811 if (map != old_map) {
13812 entry->is_shared = TRUE;
13813 }
13814 }
13815 if (entry->inheritance == VM_INHERIT_SHARE) {
13816 entry->inheritance = VM_INHERIT_COPY;
13817 }
13818
13819 vm_map_lock_write_to_read(map);
13820 } else {
13821 if ((cow_sub_map_parent)
13822 && (cow_sub_map_parent != *real_map)
13823 && (cow_sub_map_parent != map)) {
13824 vm_map_unlock(cow_sub_map_parent);
13825 }
13826 entry = submap_entry;
13827 vaddr = local_vaddr;
13828 }
13829 }
13830
13831 /*
13832 * Check whether this task is allowed to have
13833 * this page.
13834 */
13835
13836 prot = entry->protection;
13837
13838 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
13839 /*
13840 * HACK -- if not a stack, then allow execution
13841 */
13842 prot |= VM_PROT_EXECUTE;
13843 }
13844
13845 if (mask_protections) {
13846 fault_type &= prot;
13847 if (fault_type == VM_PROT_NONE) {
13848 goto protection_failure;
13849 }
13850 }
13851 if (((fault_type & prot) != fault_type)
13852 #if __arm64__
13853 /* prefetch abort in execute-only page */
13854 && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
13855 #endif
13856 ) {
13857 protection_failure:
13858 if (*real_map != map) {
13859 vm_map_unlock(*real_map);
13860 }
13861 *real_map = map;
13862
13863 if ((fault_type & VM_PROT_EXECUTE) && prot) {
13864 log_stack_execution_failure((addr64_t)vaddr, prot);
13865 }
13866
13867 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
13868 return KERN_PROTECTION_FAILURE;
13869 }
13870
13871 /*
13872 * If this page is not pageable, we have to get
13873 * it for all possible accesses.
13874 */
13875
13876 *wired = (entry->wired_count != 0);
13877 if (*wired) {
13878 fault_type = prot;
13879 }
13880
13881 /*
13882 * If the entry was copy-on-write, we either ...
13883 */
13884
13885 if (entry->needs_copy) {
13886 /*
13887 * If we want to write the page, we may as well
13888 * handle that now since we've got the map locked.
13889 *
13890 * If we don't need to write the page, we just
13891 * demote the permissions allowed.
13892 */
13893
13894 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
13895 /*
13896 * Make a new object, and place it in the
13897 * object chain. Note that no new references
13898 * have appeared -- one just moved from the
13899 * map to the new object.
13900 */
13901
13902 if (vm_map_lock_read_to_write(map)) {
13903 vm_map_lock_read(map);
13904 goto RetryLookup;
13905 }
13906
13907 if (VME_OBJECT(entry)->shadowed == FALSE) {
13908 vm_object_lock(VME_OBJECT(entry));
13909 VME_OBJECT(entry)->shadowed = TRUE;
13910 vm_object_unlock(VME_OBJECT(entry));
13911 }
13912 VME_OBJECT_SHADOW(entry,
13913 (vm_map_size_t) (entry->vme_end -
13914 entry->vme_start));
13915 entry->needs_copy = FALSE;
13916
13917 vm_map_lock_write_to_read(map);
13918 }
13919 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
13920 /*
13921 * We're attempting to read a copy-on-write
13922 * page -- don't allow writes.
13923 */
13924
13925 prot &= (~VM_PROT_WRITE);
13926 }
13927 }
13928
13929 /*
13930 * Create an object if necessary.
13931 */
13932 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
13933 if (vm_map_lock_read_to_write(map)) {
13934 vm_map_lock_read(map);
13935 goto RetryLookup;
13936 }
13937
13938 VME_OBJECT_SET(entry,
13939 vm_object_allocate(
13940 (vm_map_size_t)(entry->vme_end -
13941 entry->vme_start)));
13942 VME_OFFSET_SET(entry, 0);
13943 assert(entry->use_pmap);
13944 vm_map_lock_write_to_read(map);
13945 }
13946
13947 /*
13948 * Return the object/offset from this entry. If the entry
13949 * was copy-on-write or empty, it has been fixed up. Also
13950 * return the protection.
13951 */
13952
13953 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13954 *object = VME_OBJECT(entry);
13955 *out_prot = prot;
13956 KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), (unsigned long) VME_ALIAS(entry), 0, 0);
13957
13958 if (fault_info) {
13959 fault_info->interruptible = THREAD_UNINT; /* for now... */
13960 /* ... the caller will change "interruptible" if needed */
13961 fault_info->cluster_size = 0;
13962 fault_info->user_tag = VME_ALIAS(entry);
13963 fault_info->pmap_options = 0;
13964 if (entry->iokit_acct ||
13965 (!entry->is_sub_map && !entry->use_pmap)) {
13966 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13967 }
13968 fault_info->behavior = entry->behavior;
13969 fault_info->lo_offset = VME_OFFSET(entry);
13970 fault_info->hi_offset =
13971 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
13972 fault_info->no_cache = entry->no_cache;
13973 fault_info->stealth = FALSE;
13974 fault_info->io_sync = FALSE;
13975 if (entry->used_for_jit ||
13976 #if PMAP_CS
13977 pmap_cs_exempt(map->pmap) ||
13978 #endif
13979 entry->vme_resilient_codesign) {
13980 fault_info->cs_bypass = TRUE;
13981 } else {
13982 fault_info->cs_bypass = FALSE;
13983 }
13984 fault_info->pmap_cs_associated = FALSE;
13985 #if CONFIG_PMAP_CS
13986 if (entry->pmap_cs_associated) {
13987 /*
13988 * The pmap layer will validate this page
13989 * before allowing it to be executed from.
13990 */
13991 fault_info->pmap_cs_associated = TRUE;
13992 }
13993 #endif /* CONFIG_PMAP_CS */
13994 fault_info->mark_zf_absent = FALSE;
13995 fault_info->batch_pmap_op = FALSE;
13996 fault_info->resilient_media = entry->vme_resilient_media;
13997 fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
13998 if (entry->translated_allow_execute) {
13999 fault_info->pmap_options |= PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE;
14000 }
14001 }
14002
14003 /*
14004 * Lock the object to prevent it from disappearing
14005 */
14006 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
14007 if (contended == NULL) {
14008 vm_object_lock(*object);
14009 } else {
14010 *contended = vm_object_lock_check_contended(*object);
14011 }
14012 } else {
14013 vm_object_lock_shared(*object);
14014 }
14015
14016 /*
14017 * Save the version number
14018 */
14019
14020 out_version->main_timestamp = map->timestamp;
14021
14022 return KERN_SUCCESS;
14023 }
14024
14025
14026 /*
14027 * vm_map_verify:
14028 *
14029 * Verifies that the map in question has not changed
14030 * since the given version. The map has to be locked
14031 * ("shared" mode is fine) before calling this function
14032 * and it will be returned locked too.
14033 */
14034 boolean_t
14035 vm_map_verify(
14036 vm_map_t map,
14037 vm_map_version_t *version) /* REF */
14038 {
14039 boolean_t result;
14040
14041 vm_map_lock_assert_held(map);
14042 result = (map->timestamp == version->main_timestamp);
14043
14044 return result;
14045 }
14046
14047 /*
14048 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
14049 * Goes away after regular vm_region_recurse function migrates to
14050 * 64 bits
14051 * vm_region_recurse: A form of vm_region which follows the
14052 * submaps in a target map
14053 *
14054 */
14055
14056 kern_return_t
14057 vm_map_region_recurse_64(
14058 vm_map_t map,
14059 vm_map_offset_t *address, /* IN/OUT */
14060 vm_map_size_t *size, /* OUT */
14061 natural_t *nesting_depth, /* IN/OUT */
14062 vm_region_submap_info_64_t submap_info, /* IN/OUT */
14063 mach_msg_type_number_t *count) /* IN/OUT */
14064 {
14065 mach_msg_type_number_t original_count;
14066 vm_region_extended_info_data_t extended;
14067 vm_map_entry_t tmp_entry;
14068 vm_map_offset_t user_address;
14069 unsigned int user_max_depth;
14070
14071 /*
14072 * "curr_entry" is the VM map entry preceding or including the
14073 * address we're looking for.
14074 * "curr_map" is the map or sub-map containing "curr_entry".
14075 * "curr_address" is the equivalent of the top map's "user_address"
14076 * in the current map.
14077 * "curr_offset" is the cumulated offset of "curr_map" in the
14078 * target task's address space.
14079 * "curr_depth" is the depth of "curr_map" in the chain of
14080 * sub-maps.
14081 *
14082 * "curr_max_below" and "curr_max_above" limit the range (around
14083 * "curr_address") we should take into account in the current (sub)map.
14084 * They limit the range to what's visible through the map entries
14085 * we've traversed from the top map to the current map.
14086 *
14087 */
14088 vm_map_entry_t curr_entry;
14089 vm_map_address_t curr_address;
14090 vm_map_offset_t curr_offset;
14091 vm_map_t curr_map;
14092 unsigned int curr_depth;
14093 vm_map_offset_t curr_max_below, curr_max_above;
14094 vm_map_offset_t curr_skip;
14095
14096 /*
14097 * "next_" is the same as "curr_" but for the VM region immediately
14098 * after the address we're looking for. We need to keep track of this
14099 * too because we want to return info about that region if the
14100 * address we're looking for is not mapped.
14101 */
14102 vm_map_entry_t next_entry;
14103 vm_map_offset_t next_offset;
14104 vm_map_offset_t next_address;
14105 vm_map_t next_map;
14106 unsigned int next_depth;
14107 vm_map_offset_t next_max_below, next_max_above;
14108 vm_map_offset_t next_skip;
14109
14110 boolean_t look_for_pages;
14111 vm_region_submap_short_info_64_t short_info;
14112 boolean_t do_region_footprint;
14113 int effective_page_size, effective_page_shift;
14114
14115 if (map == VM_MAP_NULL) {
14116 /* no address space to work on */
14117 return KERN_INVALID_ARGUMENT;
14118 }
14119
14120 effective_page_shift = vm_self_region_page_shift(map);
14121 effective_page_size = (1 << effective_page_shift);
14122
14123 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
14124 /*
14125 * "info" structure is not big enough and
14126 * would overflow
14127 */
14128 return KERN_INVALID_ARGUMENT;
14129 }
14130
14131 do_region_footprint = task_self_region_footprint();
14132 original_count = *count;
14133
14134 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
14135 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
14136 look_for_pages = FALSE;
14137 short_info = (vm_region_submap_short_info_64_t) submap_info;
14138 submap_info = NULL;
14139 } else {
14140 look_for_pages = TRUE;
14141 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
14142 short_info = NULL;
14143
14144 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14145 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
14146 }
14147 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14148 *count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
14149 }
14150 }
14151
14152 user_address = *address;
14153 user_max_depth = *nesting_depth;
14154
14155 if (not_in_kdp) {
14156 vm_map_lock_read(map);
14157 }
14158
14159 recurse_again:
14160 curr_entry = NULL;
14161 curr_map = map;
14162 curr_address = user_address;
14163 curr_offset = 0;
14164 curr_skip = 0;
14165 curr_depth = 0;
14166 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
14167 curr_max_below = curr_address;
14168
14169 next_entry = NULL;
14170 next_map = NULL;
14171 next_address = 0;
14172 next_offset = 0;
14173 next_skip = 0;
14174 next_depth = 0;
14175 next_max_above = (vm_map_offset_t) -1;
14176 next_max_below = (vm_map_offset_t) -1;
14177
14178 for (;;) {
14179 if (vm_map_lookup_entry(curr_map,
14180 curr_address,
14181 &tmp_entry)) {
14182 /* tmp_entry contains the address we're looking for */
14183 curr_entry = tmp_entry;
14184 } else {
14185 vm_map_offset_t skip;
14186 /*
14187 * The address is not mapped. "tmp_entry" is the
14188 * map entry preceding the address. We want the next
14189 * one, if it exists.
14190 */
14191 curr_entry = tmp_entry->vme_next;
14192
14193 if (curr_entry == vm_map_to_entry(curr_map) ||
14194 (curr_entry->vme_start >=
14195 curr_address + curr_max_above)) {
14196 /* no next entry at this level: stop looking */
14197 if (not_in_kdp) {
14198 vm_map_unlock_read(curr_map);
14199 }
14200 curr_entry = NULL;
14201 curr_map = NULL;
14202 curr_skip = 0;
14203 curr_offset = 0;
14204 curr_depth = 0;
14205 curr_max_above = 0;
14206 curr_max_below = 0;
14207 break;
14208 }
14209
14210 /* adjust current address and offset */
14211 skip = curr_entry->vme_start - curr_address;
14212 curr_address = curr_entry->vme_start;
14213 curr_skip += skip;
14214 curr_offset += skip;
14215 curr_max_above -= skip;
14216 curr_max_below = 0;
14217 }
14218
14219 /*
14220 * Is the next entry at this level closer to the address (or
14221 * deeper in the submap chain) than the one we had
14222 * so far ?
14223 */
14224 tmp_entry = curr_entry->vme_next;
14225 if (tmp_entry == vm_map_to_entry(curr_map)) {
14226 /* no next entry at this level */
14227 } else if (tmp_entry->vme_start >=
14228 curr_address + curr_max_above) {
14229 /*
14230 * tmp_entry is beyond the scope of what we mapped of
14231 * this submap in the upper level: ignore it.
14232 */
14233 } else if ((next_entry == NULL) ||
14234 (tmp_entry->vme_start + curr_offset <=
14235 next_entry->vme_start + next_offset)) {
14236 /*
14237 * We didn't have a "next_entry" or this one is
14238 * closer to the address we're looking for:
14239 * use this "tmp_entry" as the new "next_entry".
14240 */
14241 if (next_entry != NULL) {
14242 /* unlock the last "next_map" */
14243 if (next_map != curr_map && not_in_kdp) {
14244 vm_map_unlock_read(next_map);
14245 }
14246 }
14247 next_entry = tmp_entry;
14248 next_map = curr_map;
14249 next_depth = curr_depth;
14250 next_address = next_entry->vme_start;
14251 next_skip = curr_skip;
14252 next_skip += (next_address - curr_address);
14253 next_offset = curr_offset;
14254 next_offset += (next_address - curr_address);
14255 next_max_above = MIN(next_max_above, curr_max_above);
14256 next_max_above = MIN(next_max_above,
14257 next_entry->vme_end - next_address);
14258 next_max_below = MIN(next_max_below, curr_max_below);
14259 next_max_below = MIN(next_max_below,
14260 next_address - next_entry->vme_start);
14261 }
14262
14263 /*
14264 * "curr_max_{above,below}" allow us to keep track of the
14265 * portion of the submap that is actually mapped at this level:
14266 * the rest of that submap is irrelevant to us, since it's not
14267 * mapped here.
14268 * The relevant portion of the map starts at
14269 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
14270 */
14271 curr_max_above = MIN(curr_max_above,
14272 curr_entry->vme_end - curr_address);
14273 curr_max_below = MIN(curr_max_below,
14274 curr_address - curr_entry->vme_start);
14275
14276 if (!curr_entry->is_sub_map ||
14277 curr_depth >= user_max_depth) {
14278 /*
14279 * We hit a leaf map or we reached the maximum depth
14280 * we could, so stop looking. Keep the current map
14281 * locked.
14282 */
14283 break;
14284 }
14285
14286 /*
14287 * Get down to the next submap level.
14288 */
14289
14290 /*
14291 * Lock the next level and unlock the current level,
14292 * unless we need to keep it locked to access the "next_entry"
14293 * later.
14294 */
14295 if (not_in_kdp) {
14296 vm_map_lock_read(VME_SUBMAP(curr_entry));
14297 }
14298 if (curr_map == next_map) {
14299 /* keep "next_map" locked in case we need it */
14300 } else {
14301 /* release this map */
14302 if (not_in_kdp) {
14303 vm_map_unlock_read(curr_map);
14304 }
14305 }
14306
14307 /*
14308 * Adjust the offset. "curr_entry" maps the submap
14309 * at relative address "curr_entry->vme_start" in the
14310 * curr_map but skips the first "VME_OFFSET(curr_entry)"
14311 * bytes of the submap.
14312 * "curr_offset" always represents the offset of a virtual
14313 * address in the curr_map relative to the absolute address
14314 * space (i.e. the top-level VM map).
14315 */
14316 curr_offset +=
14317 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
14318 curr_address = user_address + curr_offset;
14319 /* switch to the submap */
14320 curr_map = VME_SUBMAP(curr_entry);
14321 curr_depth++;
14322 curr_entry = NULL;
14323 }
14324
14325 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
14326 // so probably should be a real 32b ID vs. ptr.
14327 // Current users just check for equality
14328
14329 if (curr_entry == NULL) {
14330 /* no VM region contains the address... */
14331
14332 if (do_region_footprint && /* we want footprint numbers */
14333 next_entry == NULL && /* & there are no more regions */
14334 /* & we haven't already provided our fake region: */
14335 user_address <= vm_map_last_entry(map)->vme_end) {
14336 ledger_amount_t ledger_resident, ledger_compressed;
14337
14338 /*
14339 * Add a fake memory region to account for
14340 * purgeable and/or ledger-tagged memory that
14341 * counts towards this task's memory footprint,
14342 * i.e. the resident/compressed pages of non-volatile
14343 * objects owned by that task.
14344 */
14345 task_ledgers_footprint(map->pmap->ledger,
14346 &ledger_resident,
14347 &ledger_compressed);
14348 if (ledger_resident + ledger_compressed == 0) {
14349 /* no purgeable memory usage to report */
14350 return KERN_INVALID_ADDRESS;
14351 }
14352 /* fake region to show nonvolatile footprint */
14353 if (look_for_pages) {
14354 submap_info->protection = VM_PROT_DEFAULT;
14355 submap_info->max_protection = VM_PROT_DEFAULT;
14356 submap_info->inheritance = VM_INHERIT_DEFAULT;
14357 submap_info->offset = 0;
14358 submap_info->user_tag = -1;
14359 submap_info->pages_resident = (unsigned int) (ledger_resident / effective_page_size);
14360 submap_info->pages_shared_now_private = 0;
14361 submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / effective_page_size);
14362 submap_info->pages_dirtied = submap_info->pages_resident;
14363 submap_info->ref_count = 1;
14364 submap_info->shadow_depth = 0;
14365 submap_info->external_pager = 0;
14366 submap_info->share_mode = SM_PRIVATE;
14367 submap_info->is_submap = 0;
14368 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
14369 submap_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14370 submap_info->user_wired_count = 0;
14371 submap_info->pages_reusable = 0;
14372 } else {
14373 short_info->user_tag = -1;
14374 short_info->offset = 0;
14375 short_info->protection = VM_PROT_DEFAULT;
14376 short_info->inheritance = VM_INHERIT_DEFAULT;
14377 short_info->max_protection = VM_PROT_DEFAULT;
14378 short_info->behavior = VM_BEHAVIOR_DEFAULT;
14379 short_info->user_wired_count = 0;
14380 short_info->is_submap = 0;
14381 short_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14382 short_info->external_pager = 0;
14383 short_info->shadow_depth = 0;
14384 short_info->share_mode = SM_PRIVATE;
14385 short_info->ref_count = 1;
14386 }
14387 *nesting_depth = 0;
14388 *size = (vm_map_size_t) (ledger_resident + ledger_compressed);
14389 // *address = user_address;
14390 *address = vm_map_last_entry(map)->vme_end;
14391 return KERN_SUCCESS;
14392 }
14393
14394 if (next_entry == NULL) {
14395 /* ... and no VM region follows it either */
14396 return KERN_INVALID_ADDRESS;
14397 }
14398 /* ... gather info about the next VM region */
14399 curr_entry = next_entry;
14400 curr_map = next_map; /* still locked ... */
14401 curr_address = next_address;
14402 curr_skip = next_skip;
14403 curr_offset = next_offset;
14404 curr_depth = next_depth;
14405 curr_max_above = next_max_above;
14406 curr_max_below = next_max_below;
14407 } else {
14408 /* we won't need "next_entry" after all */
14409 if (next_entry != NULL) {
14410 /* release "next_map" */
14411 if (next_map != curr_map && not_in_kdp) {
14412 vm_map_unlock_read(next_map);
14413 }
14414 }
14415 }
14416 next_entry = NULL;
14417 next_map = NULL;
14418 next_offset = 0;
14419 next_skip = 0;
14420 next_depth = 0;
14421 next_max_below = -1;
14422 next_max_above = -1;
14423
14424 if (curr_entry->is_sub_map &&
14425 curr_depth < user_max_depth) {
14426 /*
14427 * We're not as deep as we could be: we must have
14428 * gone back up after not finding anything mapped
14429 * below the original top-level map entry's.
14430 * Let's move "curr_address" forward and recurse again.
14431 */
14432 user_address = curr_address;
14433 goto recurse_again;
14434 }
14435
14436 *nesting_depth = curr_depth;
14437 *size = curr_max_above + curr_max_below;
14438 *address = user_address + curr_skip - curr_max_below;
14439
14440 if (look_for_pages) {
14441 submap_info->user_tag = VME_ALIAS(curr_entry);
14442 submap_info->offset = VME_OFFSET(curr_entry);
14443 submap_info->protection = curr_entry->protection;
14444 submap_info->inheritance = curr_entry->inheritance;
14445 submap_info->max_protection = curr_entry->max_protection;
14446 submap_info->behavior = curr_entry->behavior;
14447 submap_info->user_wired_count = curr_entry->user_wired_count;
14448 submap_info->is_submap = curr_entry->is_sub_map;
14449 submap_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14450 } else {
14451 short_info->user_tag = VME_ALIAS(curr_entry);
14452 short_info->offset = VME_OFFSET(curr_entry);
14453 short_info->protection = curr_entry->protection;
14454 short_info->inheritance = curr_entry->inheritance;
14455 short_info->max_protection = curr_entry->max_protection;
14456 short_info->behavior = curr_entry->behavior;
14457 short_info->user_wired_count = curr_entry->user_wired_count;
14458 short_info->is_submap = curr_entry->is_sub_map;
14459 short_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14460 }
14461
14462 extended.pages_resident = 0;
14463 extended.pages_swapped_out = 0;
14464 extended.pages_shared_now_private = 0;
14465 extended.pages_dirtied = 0;
14466 extended.pages_reusable = 0;
14467 extended.external_pager = 0;
14468 extended.shadow_depth = 0;
14469 extended.share_mode = SM_EMPTY;
14470 extended.ref_count = 0;
14471
14472 if (not_in_kdp) {
14473 if (!curr_entry->is_sub_map) {
14474 vm_map_offset_t range_start, range_end;
14475 range_start = MAX((curr_address - curr_max_below),
14476 curr_entry->vme_start);
14477 range_end = MIN((curr_address + curr_max_above),
14478 curr_entry->vme_end);
14479 vm_map_region_walk(curr_map,
14480 range_start,
14481 curr_entry,
14482 (VME_OFFSET(curr_entry) +
14483 (range_start -
14484 curr_entry->vme_start)),
14485 range_end - range_start,
14486 &extended,
14487 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
14488 if (extended.external_pager &&
14489 extended.ref_count == 2 &&
14490 extended.share_mode == SM_SHARED) {
14491 extended.share_mode = SM_PRIVATE;
14492 }
14493 } else {
14494 if (curr_entry->use_pmap) {
14495 extended.share_mode = SM_TRUESHARED;
14496 } else {
14497 extended.share_mode = SM_PRIVATE;
14498 }
14499 extended.ref_count = os_ref_get_count(&VME_SUBMAP(curr_entry)->map_refcnt);
14500 }
14501 }
14502
14503 if (look_for_pages) {
14504 submap_info->pages_resident = extended.pages_resident;
14505 submap_info->pages_swapped_out = extended.pages_swapped_out;
14506 submap_info->pages_shared_now_private =
14507 extended.pages_shared_now_private;
14508 submap_info->pages_dirtied = extended.pages_dirtied;
14509 submap_info->external_pager = extended.external_pager;
14510 submap_info->shadow_depth = extended.shadow_depth;
14511 submap_info->share_mode = extended.share_mode;
14512 submap_info->ref_count = extended.ref_count;
14513
14514 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14515 submap_info->pages_reusable = extended.pages_reusable;
14516 }
14517 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14518 submap_info->object_id_full = (vm_object_id_t) (VME_OBJECT(curr_entry) != NULL) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry)) : 0ULL;
14519 }
14520 } else {
14521 short_info->external_pager = extended.external_pager;
14522 short_info->shadow_depth = extended.shadow_depth;
14523 short_info->share_mode = extended.share_mode;
14524 short_info->ref_count = extended.ref_count;
14525 }
14526
14527 if (not_in_kdp) {
14528 vm_map_unlock_read(curr_map);
14529 }
14530
14531 return KERN_SUCCESS;
14532 }
14533
14534 /*
14535 * vm_region:
14536 *
14537 * User call to obtain information about a region in
14538 * a task's address map. Currently, only one flavor is
14539 * supported.
14540 *
14541 * XXX The reserved and behavior fields cannot be filled
14542 * in until the vm merge from the IK is completed, and
14543 * vm_reserve is implemented.
14544 */
14545
14546 kern_return_t
14547 vm_map_region(
14548 vm_map_t map,
14549 vm_map_offset_t *address, /* IN/OUT */
14550 vm_map_size_t *size, /* OUT */
14551 vm_region_flavor_t flavor, /* IN */
14552 vm_region_info_t info, /* OUT */
14553 mach_msg_type_number_t *count, /* IN/OUT */
14554 mach_port_t *object_name) /* OUT */
14555 {
14556 vm_map_entry_t tmp_entry;
14557 vm_map_entry_t entry;
14558 vm_map_offset_t start;
14559
14560 if (map == VM_MAP_NULL) {
14561 return KERN_INVALID_ARGUMENT;
14562 }
14563
14564 switch (flavor) {
14565 case VM_REGION_BASIC_INFO:
14566 /* legacy for old 32-bit objects info */
14567 {
14568 vm_region_basic_info_t basic;
14569
14570 if (*count < VM_REGION_BASIC_INFO_COUNT) {
14571 return KERN_INVALID_ARGUMENT;
14572 }
14573
14574 basic = (vm_region_basic_info_t) info;
14575 *count = VM_REGION_BASIC_INFO_COUNT;
14576
14577 vm_map_lock_read(map);
14578
14579 start = *address;
14580 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14581 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14582 vm_map_unlock_read(map);
14583 return KERN_INVALID_ADDRESS;
14584 }
14585 } else {
14586 entry = tmp_entry;
14587 }
14588
14589 start = entry->vme_start;
14590
14591 basic->offset = (uint32_t)VME_OFFSET(entry);
14592 basic->protection = entry->protection;
14593 basic->inheritance = entry->inheritance;
14594 basic->max_protection = entry->max_protection;
14595 basic->behavior = entry->behavior;
14596 basic->user_wired_count = entry->user_wired_count;
14597 basic->reserved = entry->is_sub_map;
14598 *address = start;
14599 *size = (entry->vme_end - start);
14600
14601 if (object_name) {
14602 *object_name = IP_NULL;
14603 }
14604 if (entry->is_sub_map) {
14605 basic->shared = FALSE;
14606 } else {
14607 basic->shared = entry->is_shared;
14608 }
14609
14610 vm_map_unlock_read(map);
14611 return KERN_SUCCESS;
14612 }
14613
14614 case VM_REGION_BASIC_INFO_64:
14615 {
14616 vm_region_basic_info_64_t basic;
14617
14618 if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
14619 return KERN_INVALID_ARGUMENT;
14620 }
14621
14622 basic = (vm_region_basic_info_64_t) info;
14623 *count = VM_REGION_BASIC_INFO_COUNT_64;
14624
14625 vm_map_lock_read(map);
14626
14627 start = *address;
14628 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14629 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14630 vm_map_unlock_read(map);
14631 return KERN_INVALID_ADDRESS;
14632 }
14633 } else {
14634 entry = tmp_entry;
14635 }
14636
14637 start = entry->vme_start;
14638
14639 basic->offset = VME_OFFSET(entry);
14640 basic->protection = entry->protection;
14641 basic->inheritance = entry->inheritance;
14642 basic->max_protection = entry->max_protection;
14643 basic->behavior = entry->behavior;
14644 basic->user_wired_count = entry->user_wired_count;
14645 basic->reserved = entry->is_sub_map;
14646 *address = start;
14647 *size = (entry->vme_end - start);
14648
14649 if (object_name) {
14650 *object_name = IP_NULL;
14651 }
14652 if (entry->is_sub_map) {
14653 basic->shared = FALSE;
14654 } else {
14655 basic->shared = entry->is_shared;
14656 }
14657
14658 vm_map_unlock_read(map);
14659 return KERN_SUCCESS;
14660 }
14661 case VM_REGION_EXTENDED_INFO:
14662 if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
14663 return KERN_INVALID_ARGUMENT;
14664 }
14665 OS_FALLTHROUGH;
14666 case VM_REGION_EXTENDED_INFO__legacy:
14667 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
14668 return KERN_INVALID_ARGUMENT;
14669 }
14670
14671 {
14672 vm_region_extended_info_t extended;
14673 mach_msg_type_number_t original_count;
14674 int effective_page_size, effective_page_shift;
14675
14676 extended = (vm_region_extended_info_t) info;
14677
14678 effective_page_shift = vm_self_region_page_shift(map);
14679 effective_page_size = (1 << effective_page_shift);
14680
14681 vm_map_lock_read(map);
14682
14683 start = *address;
14684 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14685 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14686 vm_map_unlock_read(map);
14687 return KERN_INVALID_ADDRESS;
14688 }
14689 } else {
14690 entry = tmp_entry;
14691 }
14692 start = entry->vme_start;
14693
14694 extended->protection = entry->protection;
14695 extended->user_tag = VME_ALIAS(entry);
14696 extended->pages_resident = 0;
14697 extended->pages_swapped_out = 0;
14698 extended->pages_shared_now_private = 0;
14699 extended->pages_dirtied = 0;
14700 extended->external_pager = 0;
14701 extended->shadow_depth = 0;
14702
14703 original_count = *count;
14704 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
14705 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
14706 } else {
14707 extended->pages_reusable = 0;
14708 *count = VM_REGION_EXTENDED_INFO_COUNT;
14709 }
14710
14711 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
14712
14713 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
14714 extended->share_mode = SM_PRIVATE;
14715 }
14716
14717 if (object_name) {
14718 *object_name = IP_NULL;
14719 }
14720 *address = start;
14721 *size = (entry->vme_end - start);
14722
14723 vm_map_unlock_read(map);
14724 return KERN_SUCCESS;
14725 }
14726 case VM_REGION_TOP_INFO:
14727 {
14728 vm_region_top_info_t top;
14729
14730 if (*count < VM_REGION_TOP_INFO_COUNT) {
14731 return KERN_INVALID_ARGUMENT;
14732 }
14733
14734 top = (vm_region_top_info_t) info;
14735 *count = VM_REGION_TOP_INFO_COUNT;
14736
14737 vm_map_lock_read(map);
14738
14739 start = *address;
14740 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14741 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14742 vm_map_unlock_read(map);
14743 return KERN_INVALID_ADDRESS;
14744 }
14745 } else {
14746 entry = tmp_entry;
14747 }
14748 start = entry->vme_start;
14749
14750 top->private_pages_resident = 0;
14751 top->shared_pages_resident = 0;
14752
14753 vm_map_region_top_walk(entry, top);
14754
14755 if (object_name) {
14756 *object_name = IP_NULL;
14757 }
14758 *address = start;
14759 *size = (entry->vme_end - start);
14760
14761 vm_map_unlock_read(map);
14762 return KERN_SUCCESS;
14763 }
14764 default:
14765 return KERN_INVALID_ARGUMENT;
14766 }
14767 }
14768
14769 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
14770 MIN((entry_size), \
14771 ((obj)->all_reusable ? \
14772 (obj)->wired_page_count : \
14773 (obj)->resident_page_count - (obj)->reusable_page_count))
14774
14775 void
14776 vm_map_region_top_walk(
14777 vm_map_entry_t entry,
14778 vm_region_top_info_t top)
14779 {
14780 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
14781 top->share_mode = SM_EMPTY;
14782 top->ref_count = 0;
14783 top->obj_id = 0;
14784 return;
14785 }
14786
14787 {
14788 struct vm_object *obj, *tmp_obj;
14789 int ref_count;
14790 uint32_t entry_size;
14791
14792 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
14793
14794 obj = VME_OBJECT(entry);
14795
14796 vm_object_lock(obj);
14797
14798 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14799 ref_count--;
14800 }
14801
14802 assert(obj->reusable_page_count <= obj->resident_page_count);
14803 if (obj->shadow) {
14804 if (ref_count == 1) {
14805 top->private_pages_resident =
14806 OBJ_RESIDENT_COUNT(obj, entry_size);
14807 } else {
14808 top->shared_pages_resident =
14809 OBJ_RESIDENT_COUNT(obj, entry_size);
14810 }
14811 top->ref_count = ref_count;
14812 top->share_mode = SM_COW;
14813
14814 while ((tmp_obj = obj->shadow)) {
14815 vm_object_lock(tmp_obj);
14816 vm_object_unlock(obj);
14817 obj = tmp_obj;
14818
14819 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14820 ref_count--;
14821 }
14822
14823 assert(obj->reusable_page_count <= obj->resident_page_count);
14824 top->shared_pages_resident +=
14825 OBJ_RESIDENT_COUNT(obj, entry_size);
14826 top->ref_count += ref_count - 1;
14827 }
14828 } else {
14829 if (entry->superpage_size) {
14830 top->share_mode = SM_LARGE_PAGE;
14831 top->shared_pages_resident = 0;
14832 top->private_pages_resident = entry_size;
14833 } else if (entry->needs_copy) {
14834 top->share_mode = SM_COW;
14835 top->shared_pages_resident =
14836 OBJ_RESIDENT_COUNT(obj, entry_size);
14837 } else {
14838 if (ref_count == 1 ||
14839 (ref_count == 2 && obj->named)) {
14840 top->share_mode = SM_PRIVATE;
14841 top->private_pages_resident =
14842 OBJ_RESIDENT_COUNT(obj,
14843 entry_size);
14844 } else {
14845 top->share_mode = SM_SHARED;
14846 top->shared_pages_resident =
14847 OBJ_RESIDENT_COUNT(obj,
14848 entry_size);
14849 }
14850 }
14851 top->ref_count = ref_count;
14852 }
14853 /* XXX K64: obj_id will be truncated */
14854 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
14855
14856 vm_object_unlock(obj);
14857 }
14858 }
14859
14860 void
14861 vm_map_region_walk(
14862 vm_map_t map,
14863 vm_map_offset_t va,
14864 vm_map_entry_t entry,
14865 vm_object_offset_t offset,
14866 vm_object_size_t range,
14867 vm_region_extended_info_t extended,
14868 boolean_t look_for_pages,
14869 mach_msg_type_number_t count)
14870 {
14871 struct vm_object *obj, *tmp_obj;
14872 vm_map_offset_t last_offset;
14873 int i;
14874 int ref_count;
14875 struct vm_object *shadow_object;
14876 unsigned short shadow_depth;
14877 boolean_t do_region_footprint;
14878 int effective_page_size, effective_page_shift;
14879 vm_map_offset_t effective_page_mask;
14880
14881 do_region_footprint = task_self_region_footprint();
14882
14883 if ((VME_OBJECT(entry) == 0) ||
14884 (entry->is_sub_map) ||
14885 (VME_OBJECT(entry)->phys_contiguous &&
14886 !entry->superpage_size)) {
14887 extended->share_mode = SM_EMPTY;
14888 extended->ref_count = 0;
14889 return;
14890 }
14891
14892 if (entry->superpage_size) {
14893 extended->shadow_depth = 0;
14894 extended->share_mode = SM_LARGE_PAGE;
14895 extended->ref_count = 1;
14896 extended->external_pager = 0;
14897
14898 /* TODO4K: Superpage in 4k mode? */
14899 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
14900 extended->shadow_depth = 0;
14901 return;
14902 }
14903
14904 effective_page_shift = vm_self_region_page_shift(map);
14905 effective_page_size = (1 << effective_page_shift);
14906 effective_page_mask = effective_page_size - 1;
14907
14908 offset = vm_map_trunc_page(offset, effective_page_mask);
14909
14910 obj = VME_OBJECT(entry);
14911
14912 vm_object_lock(obj);
14913
14914 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14915 ref_count--;
14916 }
14917
14918 if (look_for_pages) {
14919 for (last_offset = offset + range;
14920 offset < last_offset;
14921 offset += effective_page_size, va += effective_page_size) {
14922 if (do_region_footprint) {
14923 int disp;
14924
14925 disp = 0;
14926 if (map->has_corpse_footprint) {
14927 /*
14928 * Query the page info data we saved
14929 * while forking the corpse.
14930 */
14931 vm_map_corpse_footprint_query_page_info(
14932 map,
14933 va,
14934 &disp);
14935 } else {
14936 /*
14937 * Query the pmap.
14938 */
14939 vm_map_footprint_query_page_info(
14940 map,
14941 entry,
14942 va,
14943 &disp);
14944 }
14945 if (disp & VM_PAGE_QUERY_PAGE_PRESENT) {
14946 extended->pages_resident++;
14947 }
14948 if (disp & VM_PAGE_QUERY_PAGE_REUSABLE) {
14949 extended->pages_reusable++;
14950 }
14951 if (disp & VM_PAGE_QUERY_PAGE_DIRTY) {
14952 extended->pages_dirtied++;
14953 }
14954 if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
14955 extended->pages_swapped_out++;
14956 }
14957 continue;
14958 }
14959
14960 vm_map_region_look_for_page(map, va, obj,
14961 vm_object_trunc_page(offset), ref_count,
14962 0, extended, count);
14963 }
14964
14965 if (do_region_footprint) {
14966 goto collect_object_info;
14967 }
14968 } else {
14969 collect_object_info:
14970 shadow_object = obj->shadow;
14971 shadow_depth = 0;
14972
14973 if (!(obj->internal)) {
14974 extended->external_pager = 1;
14975 }
14976
14977 if (shadow_object != VM_OBJECT_NULL) {
14978 vm_object_lock(shadow_object);
14979 for (;
14980 shadow_object != VM_OBJECT_NULL;
14981 shadow_depth++) {
14982 vm_object_t next_shadow;
14983
14984 if (!(shadow_object->internal)) {
14985 extended->external_pager = 1;
14986 }
14987
14988 next_shadow = shadow_object->shadow;
14989 if (next_shadow) {
14990 vm_object_lock(next_shadow);
14991 }
14992 vm_object_unlock(shadow_object);
14993 shadow_object = next_shadow;
14994 }
14995 }
14996 extended->shadow_depth = shadow_depth;
14997 }
14998
14999 if (extended->shadow_depth || entry->needs_copy) {
15000 extended->share_mode = SM_COW;
15001 } else {
15002 if (ref_count == 1) {
15003 extended->share_mode = SM_PRIVATE;
15004 } else {
15005 if (obj->true_share) {
15006 extended->share_mode = SM_TRUESHARED;
15007 } else {
15008 extended->share_mode = SM_SHARED;
15009 }
15010 }
15011 }
15012 extended->ref_count = ref_count - extended->shadow_depth;
15013
15014 for (i = 0; i < extended->shadow_depth; i++) {
15015 if ((tmp_obj = obj->shadow) == 0) {
15016 break;
15017 }
15018 vm_object_lock(tmp_obj);
15019 vm_object_unlock(obj);
15020
15021 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
15022 ref_count--;
15023 }
15024
15025 extended->ref_count += ref_count;
15026 obj = tmp_obj;
15027 }
15028 vm_object_unlock(obj);
15029
15030 if (extended->share_mode == SM_SHARED) {
15031 vm_map_entry_t cur;
15032 vm_map_entry_t last;
15033 int my_refs;
15034
15035 obj = VME_OBJECT(entry);
15036 last = vm_map_to_entry(map);
15037 my_refs = 0;
15038
15039 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
15040 ref_count--;
15041 }
15042 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
15043 my_refs += vm_map_region_count_obj_refs(cur, obj);
15044 }
15045
15046 if (my_refs == ref_count) {
15047 extended->share_mode = SM_PRIVATE_ALIASED;
15048 } else if (my_refs > 1) {
15049 extended->share_mode = SM_SHARED_ALIASED;
15050 }
15051 }
15052 }
15053
15054
15055 /* object is locked on entry and locked on return */
15056
15057
15058 static void
15059 vm_map_region_look_for_page(
15060 __unused vm_map_t map,
15061 __unused vm_map_offset_t va,
15062 vm_object_t object,
15063 vm_object_offset_t offset,
15064 int max_refcnt,
15065 unsigned short depth,
15066 vm_region_extended_info_t extended,
15067 mach_msg_type_number_t count)
15068 {
15069 vm_page_t p;
15070 vm_object_t shadow;
15071 int ref_count;
15072 vm_object_t caller_object;
15073
15074 shadow = object->shadow;
15075 caller_object = object;
15076
15077
15078 while (TRUE) {
15079 if (!(object->internal)) {
15080 extended->external_pager = 1;
15081 }
15082
15083 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
15084 if (shadow && (max_refcnt == 1)) {
15085 extended->pages_shared_now_private++;
15086 }
15087
15088 if (!p->vmp_fictitious &&
15089 (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
15090 extended->pages_dirtied++;
15091 } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
15092 if (p->vmp_reusable || object->all_reusable) {
15093 extended->pages_reusable++;
15094 }
15095 }
15096
15097 extended->pages_resident++;
15098
15099 if (object != caller_object) {
15100 vm_object_unlock(object);
15101 }
15102
15103 return;
15104 }
15105 if (object->internal &&
15106 object->alive &&
15107 !object->terminating &&
15108 object->pager_ready) {
15109 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
15110 == VM_EXTERNAL_STATE_EXISTS) {
15111 /* the pager has that page */
15112 extended->pages_swapped_out++;
15113 if (object != caller_object) {
15114 vm_object_unlock(object);
15115 }
15116 return;
15117 }
15118 }
15119
15120 if (shadow) {
15121 vm_object_lock(shadow);
15122
15123 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
15124 ref_count--;
15125 }
15126
15127 if (++depth > extended->shadow_depth) {
15128 extended->shadow_depth = depth;
15129 }
15130
15131 if (ref_count > max_refcnt) {
15132 max_refcnt = ref_count;
15133 }
15134
15135 if (object != caller_object) {
15136 vm_object_unlock(object);
15137 }
15138
15139 offset = offset + object->vo_shadow_offset;
15140 object = shadow;
15141 shadow = object->shadow;
15142 continue;
15143 }
15144 if (object != caller_object) {
15145 vm_object_unlock(object);
15146 }
15147 break;
15148 }
15149 }
15150
15151 static int
15152 vm_map_region_count_obj_refs(
15153 vm_map_entry_t entry,
15154 vm_object_t object)
15155 {
15156 int ref_count;
15157 vm_object_t chk_obj;
15158 vm_object_t tmp_obj;
15159
15160 if (VME_OBJECT(entry) == 0) {
15161 return 0;
15162 }
15163
15164 if (entry->is_sub_map) {
15165 return 0;
15166 } else {
15167 ref_count = 0;
15168
15169 chk_obj = VME_OBJECT(entry);
15170 vm_object_lock(chk_obj);
15171
15172 while (chk_obj) {
15173 if (chk_obj == object) {
15174 ref_count++;
15175 }
15176 tmp_obj = chk_obj->shadow;
15177 if (tmp_obj) {
15178 vm_object_lock(tmp_obj);
15179 }
15180 vm_object_unlock(chk_obj);
15181
15182 chk_obj = tmp_obj;
15183 }
15184 }
15185 return ref_count;
15186 }
15187
15188
15189 /*
15190 * Routine: vm_map_simplify
15191 *
15192 * Description:
15193 * Attempt to simplify the map representation in
15194 * the vicinity of the given starting address.
15195 * Note:
15196 * This routine is intended primarily to keep the
15197 * kernel maps more compact -- they generally don't
15198 * benefit from the "expand a map entry" technology
15199 * at allocation time because the adjacent entry
15200 * is often wired down.
15201 */
15202 void
15203 vm_map_simplify_entry(
15204 vm_map_t map,
15205 vm_map_entry_t this_entry)
15206 {
15207 vm_map_entry_t prev_entry;
15208
15209 counter(c_vm_map_simplify_entry_called++);
15210
15211 prev_entry = this_entry->vme_prev;
15212
15213 if ((this_entry != vm_map_to_entry(map)) &&
15214 (prev_entry != vm_map_to_entry(map)) &&
15215
15216 (prev_entry->vme_end == this_entry->vme_start) &&
15217
15218 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
15219 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
15220 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
15221 prev_entry->vme_start))
15222 == VME_OFFSET(this_entry)) &&
15223
15224 (prev_entry->behavior == this_entry->behavior) &&
15225 (prev_entry->needs_copy == this_entry->needs_copy) &&
15226 (prev_entry->protection == this_entry->protection) &&
15227 (prev_entry->max_protection == this_entry->max_protection) &&
15228 (prev_entry->inheritance == this_entry->inheritance) &&
15229 (prev_entry->use_pmap == this_entry->use_pmap) &&
15230 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
15231 (prev_entry->no_cache == this_entry->no_cache) &&
15232 (prev_entry->permanent == this_entry->permanent) &&
15233 (prev_entry->map_aligned == this_entry->map_aligned) &&
15234 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
15235 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
15236 (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
15237 /* from_reserved_zone: OK if that field doesn't match */
15238 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
15239 (prev_entry->vme_resilient_codesign ==
15240 this_entry->vme_resilient_codesign) &&
15241 (prev_entry->vme_resilient_media ==
15242 this_entry->vme_resilient_media) &&
15243 (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
15244
15245 (prev_entry->wired_count == this_entry->wired_count) &&
15246 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
15247
15248 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
15249 (prev_entry->in_transition == FALSE) &&
15250 (this_entry->in_transition == FALSE) &&
15251 (prev_entry->needs_wakeup == FALSE) &&
15252 (this_entry->needs_wakeup == FALSE) &&
15253 (prev_entry->is_shared == this_entry->is_shared) &&
15254 (prev_entry->superpage_size == FALSE) &&
15255 (this_entry->superpage_size == FALSE)
15256 ) {
15257 vm_map_store_entry_unlink(map, prev_entry);
15258 assert(prev_entry->vme_start < this_entry->vme_end);
15259 if (prev_entry->map_aligned) {
15260 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
15261 VM_MAP_PAGE_MASK(map)));
15262 }
15263 this_entry->vme_start = prev_entry->vme_start;
15264 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
15265
15266 if (map->holelistenabled) {
15267 vm_map_store_update_first_free(map, this_entry, TRUE);
15268 }
15269
15270 if (prev_entry->is_sub_map) {
15271 vm_map_deallocate(VME_SUBMAP(prev_entry));
15272 } else {
15273 vm_object_deallocate(VME_OBJECT(prev_entry));
15274 }
15275 vm_map_entry_dispose(map, prev_entry);
15276 SAVE_HINT_MAP_WRITE(map, this_entry);
15277 counter(c_vm_map_simplified++);
15278 }
15279 }
15280
15281 void
15282 vm_map_simplify(
15283 vm_map_t map,
15284 vm_map_offset_t start)
15285 {
15286 vm_map_entry_t this_entry;
15287
15288 vm_map_lock(map);
15289 if (vm_map_lookup_entry(map, start, &this_entry)) {
15290 vm_map_simplify_entry(map, this_entry);
15291 vm_map_simplify_entry(map, this_entry->vme_next);
15292 }
15293 counter(c_vm_map_simplify_called++);
15294 vm_map_unlock(map);
15295 }
15296
15297 static void
15298 vm_map_simplify_range(
15299 vm_map_t map,
15300 vm_map_offset_t start,
15301 vm_map_offset_t end)
15302 {
15303 vm_map_entry_t entry;
15304
15305 /*
15306 * The map should be locked (for "write") by the caller.
15307 */
15308
15309 if (start >= end) {
15310 /* invalid address range */
15311 return;
15312 }
15313
15314 start = vm_map_trunc_page(start,
15315 VM_MAP_PAGE_MASK(map));
15316 end = vm_map_round_page(end,
15317 VM_MAP_PAGE_MASK(map));
15318
15319 if (!vm_map_lookup_entry(map, start, &entry)) {
15320 /* "start" is not mapped and "entry" ends before "start" */
15321 if (entry == vm_map_to_entry(map)) {
15322 /* start with first entry in the map */
15323 entry = vm_map_first_entry(map);
15324 } else {
15325 /* start with next entry */
15326 entry = entry->vme_next;
15327 }
15328 }
15329
15330 while (entry != vm_map_to_entry(map) &&
15331 entry->vme_start <= end) {
15332 /* try and coalesce "entry" with its previous entry */
15333 vm_map_simplify_entry(map, entry);
15334 entry = entry->vme_next;
15335 }
15336 }
15337
15338
15339 /*
15340 * Routine: vm_map_machine_attribute
15341 * Purpose:
15342 * Provide machine-specific attributes to mappings,
15343 * such as cachability etc. for machines that provide
15344 * them. NUMA architectures and machines with big/strange
15345 * caches will use this.
15346 * Note:
15347 * Responsibilities for locking and checking are handled here,
15348 * everything else in the pmap module. If any non-volatile
15349 * information must be kept, the pmap module should handle
15350 * it itself. [This assumes that attributes do not
15351 * need to be inherited, which seems ok to me]
15352 */
15353 kern_return_t
15354 vm_map_machine_attribute(
15355 vm_map_t map,
15356 vm_map_offset_t start,
15357 vm_map_offset_t end,
15358 vm_machine_attribute_t attribute,
15359 vm_machine_attribute_val_t* value) /* IN/OUT */
15360 {
15361 kern_return_t ret;
15362 vm_map_size_t sync_size;
15363 vm_map_entry_t entry;
15364
15365 if (start < vm_map_min(map) || end > vm_map_max(map)) {
15366 return KERN_INVALID_ADDRESS;
15367 }
15368
15369 /* Figure how much memory we need to flush (in page increments) */
15370 sync_size = end - start;
15371
15372 vm_map_lock(map);
15373
15374 if (attribute != MATTR_CACHE) {
15375 /* If we don't have to find physical addresses, we */
15376 /* don't have to do an explicit traversal here. */
15377 ret = pmap_attribute(map->pmap, start, end - start,
15378 attribute, value);
15379 vm_map_unlock(map);
15380 return ret;
15381 }
15382
15383 ret = KERN_SUCCESS; /* Assume it all worked */
15384
15385 while (sync_size) {
15386 if (vm_map_lookup_entry(map, start, &entry)) {
15387 vm_map_size_t sub_size;
15388 if ((entry->vme_end - start) > sync_size) {
15389 sub_size = sync_size;
15390 sync_size = 0;
15391 } else {
15392 sub_size = entry->vme_end - start;
15393 sync_size -= sub_size;
15394 }
15395 if (entry->is_sub_map) {
15396 vm_map_offset_t sub_start;
15397 vm_map_offset_t sub_end;
15398
15399 sub_start = (start - entry->vme_start)
15400 + VME_OFFSET(entry);
15401 sub_end = sub_start + sub_size;
15402 vm_map_machine_attribute(
15403 VME_SUBMAP(entry),
15404 sub_start,
15405 sub_end,
15406 attribute, value);
15407 } else {
15408 if (VME_OBJECT(entry)) {
15409 vm_page_t m;
15410 vm_object_t object;
15411 vm_object_t base_object;
15412 vm_object_t last_object;
15413 vm_object_offset_t offset;
15414 vm_object_offset_t base_offset;
15415 vm_map_size_t range;
15416 range = sub_size;
15417 offset = (start - entry->vme_start)
15418 + VME_OFFSET(entry);
15419 offset = vm_object_trunc_page(offset);
15420 base_offset = offset;
15421 object = VME_OBJECT(entry);
15422 base_object = object;
15423 last_object = NULL;
15424
15425 vm_object_lock(object);
15426
15427 while (range) {
15428 m = vm_page_lookup(
15429 object, offset);
15430
15431 if (m && !m->vmp_fictitious) {
15432 ret =
15433 pmap_attribute_cache_sync(
15434 VM_PAGE_GET_PHYS_PAGE(m),
15435 PAGE_SIZE,
15436 attribute, value);
15437 } else if (object->shadow) {
15438 offset = offset + object->vo_shadow_offset;
15439 last_object = object;
15440 object = object->shadow;
15441 vm_object_lock(last_object->shadow);
15442 vm_object_unlock(last_object);
15443 continue;
15444 }
15445 if (range < PAGE_SIZE) {
15446 range = 0;
15447 } else {
15448 range -= PAGE_SIZE;
15449 }
15450
15451 if (base_object != object) {
15452 vm_object_unlock(object);
15453 vm_object_lock(base_object);
15454 object = base_object;
15455 }
15456 /* Bump to the next page */
15457 base_offset += PAGE_SIZE;
15458 offset = base_offset;
15459 }
15460 vm_object_unlock(object);
15461 }
15462 }
15463 start += sub_size;
15464 } else {
15465 vm_map_unlock(map);
15466 return KERN_FAILURE;
15467 }
15468 }
15469
15470 vm_map_unlock(map);
15471
15472 return ret;
15473 }
15474
15475 /*
15476 * vm_map_behavior_set:
15477 *
15478 * Sets the paging reference behavior of the specified address
15479 * range in the target map. Paging reference behavior affects
15480 * how pagein operations resulting from faults on the map will be
15481 * clustered.
15482 */
15483 kern_return_t
15484 vm_map_behavior_set(
15485 vm_map_t map,
15486 vm_map_offset_t start,
15487 vm_map_offset_t end,
15488 vm_behavior_t new_behavior)
15489 {
15490 vm_map_entry_t entry;
15491 vm_map_entry_t temp_entry;
15492
15493 if (start > end ||
15494 start < vm_map_min(map) ||
15495 end > vm_map_max(map)) {
15496 return KERN_NO_SPACE;
15497 }
15498
15499 switch (new_behavior) {
15500 /*
15501 * This first block of behaviors all set a persistent state on the specified
15502 * memory range. All we have to do here is to record the desired behavior
15503 * in the vm_map_entry_t's.
15504 */
15505
15506 case VM_BEHAVIOR_DEFAULT:
15507 case VM_BEHAVIOR_RANDOM:
15508 case VM_BEHAVIOR_SEQUENTIAL:
15509 case VM_BEHAVIOR_RSEQNTL:
15510 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
15511 vm_map_lock(map);
15512
15513 /*
15514 * The entire address range must be valid for the map.
15515 * Note that vm_map_range_check() does a
15516 * vm_map_lookup_entry() internally and returns the
15517 * entry containing the start of the address range if
15518 * the entire range is valid.
15519 */
15520 if (vm_map_range_check(map, start, end, &temp_entry)) {
15521 entry = temp_entry;
15522 vm_map_clip_start(map, entry, start);
15523 } else {
15524 vm_map_unlock(map);
15525 return KERN_INVALID_ADDRESS;
15526 }
15527
15528 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
15529 vm_map_clip_end(map, entry, end);
15530 if (entry->is_sub_map) {
15531 assert(!entry->use_pmap);
15532 }
15533
15534 if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
15535 entry->zero_wired_pages = TRUE;
15536 } else {
15537 entry->behavior = new_behavior;
15538 }
15539 entry = entry->vme_next;
15540 }
15541
15542 vm_map_unlock(map);
15543 break;
15544
15545 /*
15546 * The rest of these are different from the above in that they cause
15547 * an immediate action to take place as opposed to setting a behavior that
15548 * affects future actions.
15549 */
15550
15551 case VM_BEHAVIOR_WILLNEED:
15552 return vm_map_willneed(map, start, end);
15553
15554 case VM_BEHAVIOR_DONTNEED:
15555 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
15556
15557 case VM_BEHAVIOR_FREE:
15558 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
15559
15560 case VM_BEHAVIOR_REUSABLE:
15561 return vm_map_reusable_pages(map, start, end);
15562
15563 case VM_BEHAVIOR_REUSE:
15564 return vm_map_reuse_pages(map, start, end);
15565
15566 case VM_BEHAVIOR_CAN_REUSE:
15567 return vm_map_can_reuse(map, start, end);
15568
15569 #if MACH_ASSERT
15570 case VM_BEHAVIOR_PAGEOUT:
15571 return vm_map_pageout(map, start, end);
15572 #endif /* MACH_ASSERT */
15573
15574 default:
15575 return KERN_INVALID_ARGUMENT;
15576 }
15577
15578 return KERN_SUCCESS;
15579 }
15580
15581
15582 /*
15583 * Internals for madvise(MADV_WILLNEED) system call.
15584 *
15585 * The implementation is to do:-
15586 * a) read-ahead if the mapping corresponds to a mapped regular file
15587 * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
15588 */
15589
15590
15591 static kern_return_t
15592 vm_map_willneed(
15593 vm_map_t map,
15594 vm_map_offset_t start,
15595 vm_map_offset_t end
15596 )
15597 {
15598 vm_map_entry_t entry;
15599 vm_object_t object;
15600 memory_object_t pager;
15601 struct vm_object_fault_info fault_info = {};
15602 kern_return_t kr;
15603 vm_object_size_t len;
15604 vm_object_offset_t offset;
15605
15606 fault_info.interruptible = THREAD_UNINT; /* ignored value */
15607 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
15608 fault_info.stealth = TRUE;
15609
15610 /*
15611 * The MADV_WILLNEED operation doesn't require any changes to the
15612 * vm_map_entry_t's, so the read lock is sufficient.
15613 */
15614
15615 vm_map_lock_read(map);
15616
15617 /*
15618 * The madvise semantics require that the address range be fully
15619 * allocated with no holes. Otherwise, we're required to return
15620 * an error.
15621 */
15622
15623 if (!vm_map_range_check(map, start, end, &entry)) {
15624 vm_map_unlock_read(map);
15625 return KERN_INVALID_ADDRESS;
15626 }
15627
15628 /*
15629 * Examine each vm_map_entry_t in the range.
15630 */
15631 for (; entry != vm_map_to_entry(map) && start < end;) {
15632 /*
15633 * The first time through, the start address could be anywhere
15634 * within the vm_map_entry we found. So adjust the offset to
15635 * correspond. After that, the offset will always be zero to
15636 * correspond to the beginning of the current vm_map_entry.
15637 */
15638 offset = (start - entry->vme_start) + VME_OFFSET(entry);
15639
15640 /*
15641 * Set the length so we don't go beyond the end of the
15642 * map_entry or beyond the end of the range we were given.
15643 * This range could span also multiple map entries all of which
15644 * map different files, so make sure we only do the right amount
15645 * of I/O for each object. Note that it's possible for there
15646 * to be multiple map entries all referring to the same object
15647 * but with different page permissions, but it's not worth
15648 * trying to optimize that case.
15649 */
15650 len = MIN(entry->vme_end - start, end - start);
15651
15652 if ((vm_size_t) len != len) {
15653 /* 32-bit overflow */
15654 len = (vm_size_t) (0 - PAGE_SIZE);
15655 }
15656 fault_info.cluster_size = (vm_size_t) len;
15657 fault_info.lo_offset = offset;
15658 fault_info.hi_offset = offset + len;
15659 fault_info.user_tag = VME_ALIAS(entry);
15660 fault_info.pmap_options = 0;
15661 if (entry->iokit_acct ||
15662 (!entry->is_sub_map && !entry->use_pmap)) {
15663 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
15664 }
15665
15666 /*
15667 * If the entry is a submap OR there's no read permission
15668 * to this mapping, then just skip it.
15669 */
15670 if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
15671 entry = entry->vme_next;
15672 start = entry->vme_start;
15673 continue;
15674 }
15675
15676 object = VME_OBJECT(entry);
15677
15678 if (object == NULL ||
15679 (object && object->internal)) {
15680 /*
15681 * Memory range backed by anonymous memory.
15682 */
15683 vm_size_t region_size = 0, effective_page_size = 0;
15684 vm_map_offset_t addr = 0, effective_page_mask = 0;
15685
15686 region_size = len;
15687 addr = start;
15688
15689 effective_page_mask = MIN(vm_map_page_mask(current_map()), PAGE_MASK);
15690 effective_page_size = effective_page_mask + 1;
15691
15692 vm_map_unlock_read(map);
15693
15694 while (region_size) {
15695 vm_pre_fault(
15696 vm_map_trunc_page(addr, effective_page_mask),
15697 VM_PROT_READ | VM_PROT_WRITE);
15698
15699 region_size -= effective_page_size;
15700 addr += effective_page_size;
15701 }
15702 } else {
15703 /*
15704 * Find the file object backing this map entry. If there is
15705 * none, then we simply ignore the "will need" advice for this
15706 * entry and go on to the next one.
15707 */
15708 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
15709 entry = entry->vme_next;
15710 start = entry->vme_start;
15711 continue;
15712 }
15713
15714 vm_object_paging_begin(object);
15715 pager = object->pager;
15716 vm_object_unlock(object);
15717
15718 /*
15719 * The data_request() could take a long time, so let's
15720 * release the map lock to avoid blocking other threads.
15721 */
15722 vm_map_unlock_read(map);
15723
15724 /*
15725 * Get the data from the object asynchronously.
15726 *
15727 * Note that memory_object_data_request() places limits on the
15728 * amount of I/O it will do. Regardless of the len we
15729 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15730 * silently truncates the len to that size. This isn't
15731 * necessarily bad since madvise shouldn't really be used to
15732 * page in unlimited amounts of data. Other Unix variants
15733 * limit the willneed case as well. If this turns out to be an
15734 * issue for developers, then we can always adjust the policy
15735 * here and still be backwards compatible since this is all
15736 * just "advice".
15737 */
15738 kr = memory_object_data_request(
15739 pager,
15740 vm_object_trunc_page(offset) + object->paging_offset,
15741 0, /* ignored */
15742 VM_PROT_READ,
15743 (memory_object_fault_info_t)&fault_info);
15744
15745 vm_object_lock(object);
15746 vm_object_paging_end(object);
15747 vm_object_unlock(object);
15748
15749 /*
15750 * If we couldn't do the I/O for some reason, just give up on
15751 * the madvise. We still return success to the user since
15752 * madvise isn't supposed to fail when the advice can't be
15753 * taken.
15754 */
15755
15756 if (kr != KERN_SUCCESS) {
15757 return KERN_SUCCESS;
15758 }
15759 }
15760
15761 start += len;
15762 if (start >= end) {
15763 /* done */
15764 return KERN_SUCCESS;
15765 }
15766
15767 /* look up next entry */
15768 vm_map_lock_read(map);
15769 if (!vm_map_lookup_entry(map, start, &entry)) {
15770 /*
15771 * There's a new hole in the address range.
15772 */
15773 vm_map_unlock_read(map);
15774 return KERN_INVALID_ADDRESS;
15775 }
15776 }
15777
15778 vm_map_unlock_read(map);
15779 return KERN_SUCCESS;
15780 }
15781
15782 static boolean_t
15783 vm_map_entry_is_reusable(
15784 vm_map_entry_t entry)
15785 {
15786 /* Only user map entries */
15787
15788 vm_object_t object;
15789
15790 if (entry->is_sub_map) {
15791 return FALSE;
15792 }
15793
15794 switch (VME_ALIAS(entry)) {
15795 case VM_MEMORY_MALLOC:
15796 case VM_MEMORY_MALLOC_SMALL:
15797 case VM_MEMORY_MALLOC_LARGE:
15798 case VM_MEMORY_REALLOC:
15799 case VM_MEMORY_MALLOC_TINY:
15800 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
15801 case VM_MEMORY_MALLOC_LARGE_REUSED:
15802 /*
15803 * This is a malloc() memory region: check if it's still
15804 * in its original state and can be re-used for more
15805 * malloc() allocations.
15806 */
15807 break;
15808 default:
15809 /*
15810 * Not a malloc() memory region: let the caller decide if
15811 * it's re-usable.
15812 */
15813 return TRUE;
15814 }
15815
15816 if (/*entry->is_shared ||*/
15817 entry->is_sub_map ||
15818 entry->in_transition ||
15819 entry->protection != VM_PROT_DEFAULT ||
15820 entry->max_protection != VM_PROT_ALL ||
15821 entry->inheritance != VM_INHERIT_DEFAULT ||
15822 entry->no_cache ||
15823 entry->permanent ||
15824 entry->superpage_size != FALSE ||
15825 entry->zero_wired_pages ||
15826 entry->wired_count != 0 ||
15827 entry->user_wired_count != 0) {
15828 return FALSE;
15829 }
15830
15831 object = VME_OBJECT(entry);
15832 if (object == VM_OBJECT_NULL) {
15833 return TRUE;
15834 }
15835 if (
15836 #if 0
15837 /*
15838 * Let's proceed even if the VM object is potentially
15839 * shared.
15840 * We check for this later when processing the actual
15841 * VM pages, so the contents will be safe if shared.
15842 *
15843 * But we can still mark this memory region as "reusable" to
15844 * acknowledge that the caller did let us know that the memory
15845 * could be re-used and should not be penalized for holding
15846 * on to it. This allows its "resident size" to not include
15847 * the reusable range.
15848 */
15849 object->ref_count == 1 &&
15850 #endif
15851 object->wired_page_count == 0 &&
15852 object->copy == VM_OBJECT_NULL &&
15853 object->shadow == VM_OBJECT_NULL &&
15854 object->internal &&
15855 object->purgable == VM_PURGABLE_DENY &&
15856 object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
15857 !object->true_share &&
15858 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
15859 !object->code_signed) {
15860 return TRUE;
15861 }
15862 return FALSE;
15863 }
15864
15865 static kern_return_t
15866 vm_map_reuse_pages(
15867 vm_map_t map,
15868 vm_map_offset_t start,
15869 vm_map_offset_t end)
15870 {
15871 vm_map_entry_t entry;
15872 vm_object_t object;
15873 vm_object_offset_t start_offset, end_offset;
15874
15875 /*
15876 * The MADV_REUSE operation doesn't require any changes to the
15877 * vm_map_entry_t's, so the read lock is sufficient.
15878 */
15879
15880 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
15881 /*
15882 * XXX TODO4K
15883 * need to figure out what reusable means for a
15884 * portion of a native page.
15885 */
15886 return KERN_SUCCESS;
15887 }
15888
15889 vm_map_lock_read(map);
15890 assert(map->pmap != kernel_pmap); /* protect alias access */
15891
15892 /*
15893 * The madvise semantics require that the address range be fully
15894 * allocated with no holes. Otherwise, we're required to return
15895 * an error.
15896 */
15897
15898 if (!vm_map_range_check(map, start, end, &entry)) {
15899 vm_map_unlock_read(map);
15900 vm_page_stats_reusable.reuse_pages_failure++;
15901 return KERN_INVALID_ADDRESS;
15902 }
15903
15904 /*
15905 * Examine each vm_map_entry_t in the range.
15906 */
15907 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15908 entry = entry->vme_next) {
15909 /*
15910 * Sanity check on the VM map entry.
15911 */
15912 if (!vm_map_entry_is_reusable(entry)) {
15913 vm_map_unlock_read(map);
15914 vm_page_stats_reusable.reuse_pages_failure++;
15915 return KERN_INVALID_ADDRESS;
15916 }
15917
15918 /*
15919 * The first time through, the start address could be anywhere
15920 * within the vm_map_entry we found. So adjust the offset to
15921 * correspond.
15922 */
15923 if (entry->vme_start < start) {
15924 start_offset = start - entry->vme_start;
15925 } else {
15926 start_offset = 0;
15927 }
15928 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15929 start_offset += VME_OFFSET(entry);
15930 end_offset += VME_OFFSET(entry);
15931
15932 assert(!entry->is_sub_map);
15933 object = VME_OBJECT(entry);
15934 if (object != VM_OBJECT_NULL) {
15935 vm_object_lock(object);
15936 vm_object_reuse_pages(object, start_offset, end_offset,
15937 TRUE);
15938 vm_object_unlock(object);
15939 }
15940
15941 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
15942 /*
15943 * XXX
15944 * We do not hold the VM map exclusively here.
15945 * The "alias" field is not that critical, so it's
15946 * safe to update it here, as long as it is the only
15947 * one that can be modified while holding the VM map
15948 * "shared".
15949 */
15950 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
15951 }
15952 }
15953
15954 vm_map_unlock_read(map);
15955 vm_page_stats_reusable.reuse_pages_success++;
15956 return KERN_SUCCESS;
15957 }
15958
15959
15960 static kern_return_t
15961 vm_map_reusable_pages(
15962 vm_map_t map,
15963 vm_map_offset_t start,
15964 vm_map_offset_t end)
15965 {
15966 vm_map_entry_t entry;
15967 vm_object_t object;
15968 vm_object_offset_t start_offset, end_offset;
15969 vm_map_offset_t pmap_offset;
15970
15971 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
15972 /*
15973 * XXX TODO4K
15974 * need to figure out what reusable means for a portion
15975 * of a native page.
15976 */
15977 return KERN_SUCCESS;
15978 }
15979
15980 /*
15981 * The MADV_REUSABLE operation doesn't require any changes to the
15982 * vm_map_entry_t's, so the read lock is sufficient.
15983 */
15984
15985 vm_map_lock_read(map);
15986 assert(map->pmap != kernel_pmap); /* protect alias access */
15987
15988 /*
15989 * The madvise semantics require that the address range be fully
15990 * allocated with no holes. Otherwise, we're required to return
15991 * an error.
15992 */
15993
15994 if (!vm_map_range_check(map, start, end, &entry)) {
15995 vm_map_unlock_read(map);
15996 vm_page_stats_reusable.reusable_pages_failure++;
15997 return KERN_INVALID_ADDRESS;
15998 }
15999
16000 /*
16001 * Examine each vm_map_entry_t in the range.
16002 */
16003 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16004 entry = entry->vme_next) {
16005 int kill_pages = 0;
16006
16007 /*
16008 * Sanity check on the VM map entry.
16009 */
16010 if (!vm_map_entry_is_reusable(entry)) {
16011 vm_map_unlock_read(map);
16012 vm_page_stats_reusable.reusable_pages_failure++;
16013 return KERN_INVALID_ADDRESS;
16014 }
16015
16016 if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
16017 /* not writable: can't discard contents */
16018 vm_map_unlock_read(map);
16019 vm_page_stats_reusable.reusable_nonwritable++;
16020 vm_page_stats_reusable.reusable_pages_failure++;
16021 return KERN_PROTECTION_FAILURE;
16022 }
16023
16024 /*
16025 * The first time through, the start address could be anywhere
16026 * within the vm_map_entry we found. So adjust the offset to
16027 * correspond.
16028 */
16029 if (entry->vme_start < start) {
16030 start_offset = start - entry->vme_start;
16031 pmap_offset = start;
16032 } else {
16033 start_offset = 0;
16034 pmap_offset = entry->vme_start;
16035 }
16036 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
16037 start_offset += VME_OFFSET(entry);
16038 end_offset += VME_OFFSET(entry);
16039
16040 assert(!entry->is_sub_map);
16041 object = VME_OBJECT(entry);
16042 if (object == VM_OBJECT_NULL) {
16043 continue;
16044 }
16045
16046
16047 vm_object_lock(object);
16048 if (((object->ref_count == 1) ||
16049 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
16050 object->copy == VM_OBJECT_NULL)) &&
16051 object->shadow == VM_OBJECT_NULL &&
16052 /*
16053 * "iokit_acct" entries are billed for their virtual size
16054 * (rather than for their resident pages only), so they
16055 * wouldn't benefit from making pages reusable, and it
16056 * would be hard to keep track of pages that are both
16057 * "iokit_acct" and "reusable" in the pmap stats and
16058 * ledgers.
16059 */
16060 !(entry->iokit_acct ||
16061 (!entry->is_sub_map && !entry->use_pmap))) {
16062 if (object->ref_count != 1) {
16063 vm_page_stats_reusable.reusable_shared++;
16064 }
16065 kill_pages = 1;
16066 } else {
16067 kill_pages = -1;
16068 }
16069 if (kill_pages != -1) {
16070 vm_object_deactivate_pages(object,
16071 start_offset,
16072 end_offset - start_offset,
16073 kill_pages,
16074 TRUE /*reusable_pages*/,
16075 map->pmap,
16076 pmap_offset);
16077 } else {
16078 vm_page_stats_reusable.reusable_pages_shared++;
16079 }
16080 vm_object_unlock(object);
16081
16082 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
16083 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
16084 /*
16085 * XXX
16086 * We do not hold the VM map exclusively here.
16087 * The "alias" field is not that critical, so it's
16088 * safe to update it here, as long as it is the only
16089 * one that can be modified while holding the VM map
16090 * "shared".
16091 */
16092 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
16093 }
16094 }
16095
16096 vm_map_unlock_read(map);
16097 vm_page_stats_reusable.reusable_pages_success++;
16098 return KERN_SUCCESS;
16099 }
16100
16101
16102 static kern_return_t
16103 vm_map_can_reuse(
16104 vm_map_t map,
16105 vm_map_offset_t start,
16106 vm_map_offset_t end)
16107 {
16108 vm_map_entry_t entry;
16109
16110 /*
16111 * The MADV_REUSABLE operation doesn't require any changes to the
16112 * vm_map_entry_t's, so the read lock is sufficient.
16113 */
16114
16115 vm_map_lock_read(map);
16116 assert(map->pmap != kernel_pmap); /* protect alias access */
16117
16118 /*
16119 * The madvise semantics require that the address range be fully
16120 * allocated with no holes. Otherwise, we're required to return
16121 * an error.
16122 */
16123
16124 if (!vm_map_range_check(map, start, end, &entry)) {
16125 vm_map_unlock_read(map);
16126 vm_page_stats_reusable.can_reuse_failure++;
16127 return KERN_INVALID_ADDRESS;
16128 }
16129
16130 /*
16131 * Examine each vm_map_entry_t in the range.
16132 */
16133 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16134 entry = entry->vme_next) {
16135 /*
16136 * Sanity check on the VM map entry.
16137 */
16138 if (!vm_map_entry_is_reusable(entry)) {
16139 vm_map_unlock_read(map);
16140 vm_page_stats_reusable.can_reuse_failure++;
16141 return KERN_INVALID_ADDRESS;
16142 }
16143 }
16144
16145 vm_map_unlock_read(map);
16146 vm_page_stats_reusable.can_reuse_success++;
16147 return KERN_SUCCESS;
16148 }
16149
16150
16151 #if MACH_ASSERT
16152 static kern_return_t
16153 vm_map_pageout(
16154 vm_map_t map,
16155 vm_map_offset_t start,
16156 vm_map_offset_t end)
16157 {
16158 vm_map_entry_t entry;
16159
16160 /*
16161 * The MADV_PAGEOUT operation doesn't require any changes to the
16162 * vm_map_entry_t's, so the read lock is sufficient.
16163 */
16164
16165 vm_map_lock_read(map);
16166
16167 /*
16168 * The madvise semantics require that the address range be fully
16169 * allocated with no holes. Otherwise, we're required to return
16170 * an error.
16171 */
16172
16173 if (!vm_map_range_check(map, start, end, &entry)) {
16174 vm_map_unlock_read(map);
16175 return KERN_INVALID_ADDRESS;
16176 }
16177
16178 /*
16179 * Examine each vm_map_entry_t in the range.
16180 */
16181 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16182 entry = entry->vme_next) {
16183 vm_object_t object;
16184
16185 /*
16186 * Sanity check on the VM map entry.
16187 */
16188 if (entry->is_sub_map) {
16189 vm_map_t submap;
16190 vm_map_offset_t submap_start;
16191 vm_map_offset_t submap_end;
16192 vm_map_entry_t submap_entry;
16193
16194 submap = VME_SUBMAP(entry);
16195 submap_start = VME_OFFSET(entry);
16196 submap_end = submap_start + (entry->vme_end -
16197 entry->vme_start);
16198
16199 vm_map_lock_read(submap);
16200
16201 if (!vm_map_range_check(submap,
16202 submap_start,
16203 submap_end,
16204 &submap_entry)) {
16205 vm_map_unlock_read(submap);
16206 vm_map_unlock_read(map);
16207 return KERN_INVALID_ADDRESS;
16208 }
16209
16210 object = VME_OBJECT(submap_entry);
16211 if (submap_entry->is_sub_map ||
16212 object == VM_OBJECT_NULL ||
16213 !object->internal) {
16214 vm_map_unlock_read(submap);
16215 continue;
16216 }
16217
16218 vm_object_pageout(object);
16219
16220 vm_map_unlock_read(submap);
16221 submap = VM_MAP_NULL;
16222 submap_entry = VM_MAP_ENTRY_NULL;
16223 continue;
16224 }
16225
16226 object = VME_OBJECT(entry);
16227 if (entry->is_sub_map ||
16228 object == VM_OBJECT_NULL ||
16229 !object->internal) {
16230 continue;
16231 }
16232
16233 vm_object_pageout(object);
16234 }
16235
16236 vm_map_unlock_read(map);
16237 return KERN_SUCCESS;
16238 }
16239 #endif /* MACH_ASSERT */
16240
16241
16242 /*
16243 * Routine: vm_map_entry_insert
16244 *
16245 * Description: This routine inserts a new vm_entry in a locked map.
16246 */
16247 vm_map_entry_t
16248 vm_map_entry_insert(
16249 vm_map_t map,
16250 vm_map_entry_t insp_entry,
16251 vm_map_offset_t start,
16252 vm_map_offset_t end,
16253 vm_object_t object,
16254 vm_object_offset_t offset,
16255 boolean_t needs_copy,
16256 boolean_t is_shared,
16257 boolean_t in_transition,
16258 vm_prot_t cur_protection,
16259 vm_prot_t max_protection,
16260 vm_behavior_t behavior,
16261 vm_inherit_t inheritance,
16262 unsigned short wired_count,
16263 boolean_t no_cache,
16264 boolean_t permanent,
16265 boolean_t no_copy_on_read,
16266 unsigned int superpage_size,
16267 boolean_t clear_map_aligned,
16268 boolean_t is_submap,
16269 boolean_t used_for_jit,
16270 int alias,
16271 boolean_t translated_allow_execute)
16272 {
16273 vm_map_entry_t new_entry;
16274
16275 assert(insp_entry != (vm_map_entry_t)0);
16276 vm_map_lock_assert_exclusive(map);
16277
16278 #if DEVELOPMENT || DEBUG
16279 vm_object_offset_t end_offset = 0;
16280 assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
16281 #endif /* DEVELOPMENT || DEBUG */
16282
16283 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
16284
16285 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
16286 new_entry->map_aligned = TRUE;
16287 } else {
16288 new_entry->map_aligned = FALSE;
16289 }
16290 if (clear_map_aligned &&
16291 (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
16292 !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
16293 new_entry->map_aligned = FALSE;
16294 }
16295
16296 new_entry->vme_start = start;
16297 new_entry->vme_end = end;
16298 if (new_entry->map_aligned) {
16299 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
16300 VM_MAP_PAGE_MASK(map)));
16301 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
16302 VM_MAP_PAGE_MASK(map)));
16303 } else {
16304 assert(page_aligned(new_entry->vme_start));
16305 assert(page_aligned(new_entry->vme_end));
16306 }
16307 assert(new_entry->vme_start < new_entry->vme_end);
16308
16309 VME_OBJECT_SET(new_entry, object);
16310 VME_OFFSET_SET(new_entry, offset);
16311 new_entry->is_shared = is_shared;
16312 new_entry->is_sub_map = is_submap;
16313 new_entry->needs_copy = needs_copy;
16314 new_entry->in_transition = in_transition;
16315 new_entry->needs_wakeup = FALSE;
16316 new_entry->inheritance = inheritance;
16317 new_entry->protection = cur_protection;
16318 new_entry->max_protection = max_protection;
16319 new_entry->behavior = behavior;
16320 new_entry->wired_count = wired_count;
16321 new_entry->user_wired_count = 0;
16322 if (is_submap) {
16323 /*
16324 * submap: "use_pmap" means "nested".
16325 * default: false.
16326 */
16327 new_entry->use_pmap = FALSE;
16328 } else {
16329 /*
16330 * object: "use_pmap" means "use pmap accounting" for footprint.
16331 * default: true.
16332 */
16333 new_entry->use_pmap = TRUE;
16334 }
16335 VME_ALIAS_SET(new_entry, alias);
16336 new_entry->zero_wired_pages = FALSE;
16337 new_entry->no_cache = no_cache;
16338 new_entry->permanent = permanent;
16339 if (superpage_size) {
16340 new_entry->superpage_size = TRUE;
16341 } else {
16342 new_entry->superpage_size = FALSE;
16343 }
16344 if (used_for_jit) {
16345 if (!(map->jit_entry_exists) ||
16346 VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
16347 new_entry->used_for_jit = TRUE;
16348 map->jit_entry_exists = TRUE;
16349 }
16350 } else {
16351 new_entry->used_for_jit = FALSE;
16352 }
16353 if (translated_allow_execute) {
16354 new_entry->translated_allow_execute = TRUE;
16355 } else {
16356 new_entry->translated_allow_execute = FALSE;
16357 }
16358 new_entry->pmap_cs_associated = FALSE;
16359 new_entry->iokit_acct = FALSE;
16360 new_entry->vme_resilient_codesign = FALSE;
16361 new_entry->vme_resilient_media = FALSE;
16362 new_entry->vme_atomic = FALSE;
16363 new_entry->vme_no_copy_on_read = no_copy_on_read;
16364
16365 /*
16366 * Insert the new entry into the list.
16367 */
16368
16369 vm_map_store_entry_link(map, insp_entry, new_entry,
16370 VM_MAP_KERNEL_FLAGS_NONE);
16371 map->size += end - start;
16372
16373 /*
16374 * Update the free space hint and the lookup hint.
16375 */
16376
16377 SAVE_HINT_MAP_WRITE(map, new_entry);
16378 return new_entry;
16379 }
16380
16381 int vm_remap_old_path = 0;
16382 int vm_remap_new_path = 0;
16383 /*
16384 * Routine: vm_map_remap_extract
16385 *
16386 * Description: This routine returns a vm_entry list from a map.
16387 */
16388 static kern_return_t
16389 vm_map_remap_extract(
16390 vm_map_t map,
16391 vm_map_offset_t addr,
16392 vm_map_size_t size,
16393 vm_prot_t required_protection,
16394 boolean_t copy,
16395 struct vm_map_header *map_header,
16396 vm_prot_t *cur_protection,
16397 vm_prot_t *max_protection,
16398 /* What, no behavior? */
16399 vm_inherit_t inheritance,
16400 vm_map_kernel_flags_t vmk_flags)
16401 {
16402 kern_return_t result;
16403 vm_map_size_t mapped_size;
16404 vm_map_size_t tmp_size;
16405 vm_map_entry_t src_entry; /* result of last map lookup */
16406 vm_map_entry_t new_entry;
16407 vm_object_offset_t offset;
16408 vm_map_offset_t map_address;
16409 vm_map_offset_t src_start; /* start of entry to map */
16410 vm_map_offset_t src_end; /* end of region to be mapped */
16411 vm_object_t object;
16412 vm_map_version_t version;
16413 boolean_t src_needs_copy;
16414 boolean_t new_entry_needs_copy;
16415 vm_map_entry_t saved_src_entry;
16416 boolean_t src_entry_was_wired;
16417 vm_prot_t max_prot_for_prot_copy;
16418 vm_map_offset_t effective_page_mask;
16419 boolean_t pageable, same_map;
16420
16421 pageable = vmk_flags.vmkf_copy_pageable;
16422 same_map = vmk_flags.vmkf_copy_same_map;
16423
16424 effective_page_mask = MIN(PAGE_MASK, VM_MAP_PAGE_MASK(map));
16425
16426 assert(map != VM_MAP_NULL);
16427 assert(size != 0);
16428 assert(size == vm_map_round_page(size, effective_page_mask));
16429 assert(inheritance == VM_INHERIT_NONE ||
16430 inheritance == VM_INHERIT_COPY ||
16431 inheritance == VM_INHERIT_SHARE);
16432 assert(!(required_protection & ~VM_PROT_ALL));
16433
16434 /*
16435 * Compute start and end of region.
16436 */
16437 src_start = vm_map_trunc_page(addr, effective_page_mask);
16438 src_end = vm_map_round_page(src_start + size, effective_page_mask);
16439
16440 /*
16441 * Initialize map_header.
16442 */
16443 map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16444 map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16445 map_header->nentries = 0;
16446 map_header->entries_pageable = pageable;
16447 // map_header->page_shift = MIN(VM_MAP_PAGE_SHIFT(map), PAGE_SHIFT);
16448 map_header->page_shift = VM_MAP_PAGE_SHIFT(map);
16449 map_header->rb_head_store.rbh_root = (void *)(int)SKIP_RB_TREE;
16450
16451 vm_map_store_init( map_header );
16452
16453 if (copy && vmk_flags.vmkf_remap_prot_copy) {
16454 max_prot_for_prot_copy = *max_protection & VM_PROT_ALL;
16455 } else {
16456 max_prot_for_prot_copy = VM_PROT_NONE;
16457 }
16458 *cur_protection = VM_PROT_ALL;
16459 *max_protection = VM_PROT_ALL;
16460
16461 map_address = 0;
16462 mapped_size = 0;
16463 result = KERN_SUCCESS;
16464
16465 /*
16466 * The specified source virtual space might correspond to
16467 * multiple map entries, need to loop on them.
16468 */
16469 vm_map_lock(map);
16470 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16471 /*
16472 * This address space uses sub-pages so the range might
16473 * not be re-mappable in an address space with larger
16474 * pages. Re-assemble any broken-up VM map entries to
16475 * improve our chances of making it work.
16476 */
16477 vm_map_simplify_range(map, src_start, src_end);
16478 }
16479 while (mapped_size != size) {
16480 vm_map_size_t entry_size;
16481
16482 /*
16483 * Find the beginning of the region.
16484 */
16485 if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
16486 result = KERN_INVALID_ADDRESS;
16487 break;
16488 }
16489
16490 if (src_start < src_entry->vme_start ||
16491 (mapped_size && src_start != src_entry->vme_start)) {
16492 result = KERN_INVALID_ADDRESS;
16493 break;
16494 }
16495
16496 tmp_size = size - mapped_size;
16497 if (src_end > src_entry->vme_end) {
16498 tmp_size -= (src_end - src_entry->vme_end);
16499 }
16500
16501 entry_size = (vm_map_size_t)(src_entry->vme_end -
16502 src_entry->vme_start);
16503
16504 if (src_entry->is_sub_map &&
16505 vmk_flags.vmkf_copy_single_object) {
16506 vm_map_t submap;
16507 vm_map_offset_t submap_start;
16508 vm_map_size_t submap_size;
16509
16510 /*
16511 * No check for "required_protection" on "src_entry"
16512 * because the protections that matter are the ones
16513 * on the submap's VM map entry, which will be checked
16514 * during the call to vm_map_remap_extract() below.
16515 */
16516 submap_size = src_entry->vme_end - src_start;
16517 if (submap_size > size) {
16518 submap_size = size;
16519 }
16520 submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16521 submap = VME_SUBMAP(src_entry);
16522 vm_map_reference(submap);
16523 vm_map_unlock(map);
16524 src_entry = NULL;
16525 result = vm_map_remap_extract(submap,
16526 submap_start,
16527 submap_size,
16528 required_protection,
16529 copy,
16530 map_header,
16531 cur_protection,
16532 max_protection,
16533 inheritance,
16534 vmk_flags);
16535 vm_map_deallocate(submap);
16536 return result;
16537 }
16538
16539 if ((src_entry->protection & required_protection)
16540 != required_protection) {
16541 if (vmk_flags.vmkf_copy_single_object &&
16542 mapped_size != 0) {
16543 /*
16544 * Single object extraction.
16545 * We can't extract more with the required
16546 * protection but we've extracted some, so
16547 * stop there and declare success.
16548 * The caller should check the size of
16549 * the copy entry we've extracted.
16550 */
16551 result = KERN_SUCCESS;
16552 } else {
16553 /*
16554 * VM range extraction.
16555 * Required proctection is not available
16556 * for this part of the range: fail.
16557 */
16558 result = KERN_PROTECTION_FAILURE;
16559 }
16560 break;
16561 }
16562
16563 if (src_entry->is_sub_map &&
16564 VM_MAP_PAGE_SHIFT(VME_SUBMAP(src_entry)) < PAGE_SHIFT) {
16565 vm_map_t submap;
16566 vm_map_offset_t submap_start;
16567 vm_map_size_t submap_size;
16568 vm_map_copy_t submap_copy;
16569 vm_prot_t submap_curprot, submap_maxprot;
16570
16571 vm_remap_new_path++;
16572
16573 /*
16574 * No check for "required_protection" on "src_entry"
16575 * because the protections that matter are the ones
16576 * on the submap's VM map entry, which will be checked
16577 * during the call to vm_map_copy_extract() below.
16578 */
16579 object = VM_OBJECT_NULL;
16580 submap_copy = VM_MAP_COPY_NULL;
16581
16582 /* find equivalent range in the submap */
16583 submap = VME_SUBMAP(src_entry);
16584 submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16585 submap_size = tmp_size;
16586 /* extra ref to keep submap alive */
16587 vm_map_reference(submap);
16588
16589 DTRACE_VM6(remap_submap_recurse,
16590 vm_map_t, map,
16591 vm_map_offset_t, addr,
16592 vm_map_size_t, size,
16593 boolean_t, copy,
16594 vm_map_offset_t, submap_start,
16595 vm_map_size_t, submap_size);
16596
16597 /*
16598 * The map can be safely unlocked since we
16599 * already hold a reference on the submap.
16600 *
16601 * No timestamp since we don't care if the map
16602 * gets modified while we're down in the submap.
16603 * We'll resume the extraction at src_start + tmp_size
16604 * anyway.
16605 */
16606 vm_map_unlock(map);
16607 src_entry = NULL; /* not valid once map is unlocked */
16608
16609 result = vm_map_copy_extract(submap,
16610 submap_start,
16611 submap_size,
16612 required_protection,
16613 copy,
16614 &submap_copy,
16615 &submap_curprot,
16616 &submap_maxprot,
16617 inheritance,
16618 vmk_flags);
16619
16620 /* release extra ref on submap */
16621 vm_map_deallocate(submap);
16622 submap = VM_MAP_NULL;
16623
16624 if (result != KERN_SUCCESS) {
16625 vm_map_lock(map);
16626 break;
16627 }
16628
16629 /* transfer submap_copy entries to map_header */
16630 while (vm_map_copy_first_entry(submap_copy) !=
16631 vm_map_copy_to_entry(submap_copy)) {
16632 vm_map_entry_t copy_entry;
16633 vm_map_size_t copy_entry_size;
16634
16635 copy_entry = vm_map_copy_first_entry(submap_copy);
16636 assert(!copy_entry->is_sub_map);
16637 vm_map_copy_entry_unlink(submap_copy, copy_entry);
16638 copy_entry_size = copy_entry->vme_end - copy_entry->vme_start;
16639 copy_entry->vme_start = map_address;
16640 copy_entry->vme_end = map_address + copy_entry_size;
16641 map_address += copy_entry_size;
16642 mapped_size += copy_entry_size;
16643 src_start += copy_entry_size;
16644 assert(src_start <= src_end);
16645 _vm_map_store_entry_link(map_header,
16646 map_header->links.prev,
16647 copy_entry);
16648 }
16649 /* done with submap_copy */
16650 vm_map_copy_discard(submap_copy);
16651
16652 *cur_protection &= submap_curprot;
16653 *max_protection &= submap_maxprot;
16654
16655 /* re-acquire the map lock and continue to next entry */
16656 vm_map_lock(map);
16657 continue;
16658 } else if (src_entry->is_sub_map) {
16659 vm_remap_old_path++;
16660 DTRACE_VM4(remap_submap,
16661 vm_map_t, map,
16662 vm_map_offset_t, addr,
16663 vm_map_size_t, size,
16664 boolean_t, copy);
16665
16666 vm_map_reference(VME_SUBMAP(src_entry));
16667 object = VM_OBJECT_NULL;
16668 } else {
16669 object = VME_OBJECT(src_entry);
16670 if (src_entry->iokit_acct) {
16671 /*
16672 * This entry uses "IOKit accounting".
16673 */
16674 } else if (object != VM_OBJECT_NULL &&
16675 (object->purgable != VM_PURGABLE_DENY ||
16676 object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
16677 /*
16678 * Purgeable objects have their own accounting:
16679 * no pmap accounting for them.
16680 */
16681 assertf(!src_entry->use_pmap,
16682 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16683 map,
16684 src_entry,
16685 (uint64_t)src_entry->vme_start,
16686 (uint64_t)src_entry->vme_end,
16687 src_entry->protection,
16688 src_entry->max_protection,
16689 VME_ALIAS(src_entry));
16690 } else {
16691 /*
16692 * Not IOKit or purgeable:
16693 * must be accounted by pmap stats.
16694 */
16695 assertf(src_entry->use_pmap,
16696 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16697 map,
16698 src_entry,
16699 (uint64_t)src_entry->vme_start,
16700 (uint64_t)src_entry->vme_end,
16701 src_entry->protection,
16702 src_entry->max_protection,
16703 VME_ALIAS(src_entry));
16704 }
16705
16706 if (object == VM_OBJECT_NULL) {
16707 assert(!src_entry->needs_copy);
16708 object = vm_object_allocate(entry_size);
16709 VME_OFFSET_SET(src_entry, 0);
16710 VME_OBJECT_SET(src_entry, object);
16711 assert(src_entry->use_pmap);
16712 } else if (src_entry->wired_count ||
16713 object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
16714 /*
16715 * A wired memory region should not have
16716 * any pending copy-on-write and needs to
16717 * keep pointing at the VM object that
16718 * contains the wired pages.
16719 * If we're sharing this memory (copy=false),
16720 * we'll share this VM object.
16721 * If we're copying this memory (copy=true),
16722 * we'll call vm_object_copy_slowly() below
16723 * and use the new VM object for the remapping.
16724 *
16725 * Or, we are already using an asymmetric
16726 * copy, and therefore we already have
16727 * the right object.
16728 */
16729 assert(!src_entry->needs_copy);
16730 } else if (src_entry->needs_copy || object->shadowed ||
16731 (object->internal && !object->true_share &&
16732 !src_entry->is_shared &&
16733 object->vo_size > entry_size)) {
16734 VME_OBJECT_SHADOW(src_entry, entry_size);
16735 assert(src_entry->use_pmap);
16736
16737 if (!src_entry->needs_copy &&
16738 (src_entry->protection & VM_PROT_WRITE)) {
16739 vm_prot_t prot;
16740
16741 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
16742
16743 prot = src_entry->protection & ~VM_PROT_WRITE;
16744
16745 if (override_nx(map,
16746 VME_ALIAS(src_entry))
16747 && prot) {
16748 prot |= VM_PROT_EXECUTE;
16749 }
16750
16751 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
16752
16753 if (map->mapped_in_other_pmaps) {
16754 vm_object_pmap_protect(
16755 VME_OBJECT(src_entry),
16756 VME_OFFSET(src_entry),
16757 entry_size,
16758 PMAP_NULL,
16759 PAGE_SIZE,
16760 src_entry->vme_start,
16761 prot);
16762 #if MACH_ASSERT
16763 } else if (__improbable(map->pmap == PMAP_NULL)) {
16764 extern boolean_t vm_tests_in_progress;
16765 assert(vm_tests_in_progress);
16766 /*
16767 * Some VM tests (in vm_tests.c)
16768 * sometimes want to use a VM
16769 * map without a pmap.
16770 * Otherwise, this should never
16771 * happen.
16772 */
16773 #endif /* MACH_ASSERT */
16774 } else {
16775 pmap_protect(vm_map_pmap(map),
16776 src_entry->vme_start,
16777 src_entry->vme_end,
16778 prot);
16779 }
16780 }
16781
16782 object = VME_OBJECT(src_entry);
16783 src_entry->needs_copy = FALSE;
16784 }
16785
16786
16787 vm_object_lock(object);
16788 vm_object_reference_locked(object); /* object ref. for new entry */
16789 assert(!src_entry->needs_copy);
16790 if (object->copy_strategy ==
16791 MEMORY_OBJECT_COPY_SYMMETRIC) {
16792 /*
16793 * If we want to share this object (copy==0),
16794 * it needs to be COPY_DELAY.
16795 * If we want to copy this object (copy==1),
16796 * we can't just set "needs_copy" on our side
16797 * and expect the other side to do the same
16798 * (symmetrically), so we can't let the object
16799 * stay COPY_SYMMETRIC.
16800 * So we always switch from COPY_SYMMETRIC to
16801 * COPY_DELAY.
16802 */
16803 object->copy_strategy =
16804 MEMORY_OBJECT_COPY_DELAY;
16805 }
16806 vm_object_unlock(object);
16807 }
16808
16809 offset = (VME_OFFSET(src_entry) +
16810 (src_start - src_entry->vme_start));
16811
16812 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
16813 vm_map_entry_copy(map, new_entry, src_entry);
16814 if (new_entry->is_sub_map) {
16815 /* clr address space specifics */
16816 new_entry->use_pmap = FALSE;
16817 } else if (copy) {
16818 /*
16819 * We're dealing with a copy-on-write operation,
16820 * so the resulting mapping should not inherit the
16821 * original mapping's accounting settings.
16822 * "use_pmap" should be reset to its default (TRUE)
16823 * so that the new mapping gets accounted for in
16824 * the task's memory footprint.
16825 */
16826 new_entry->use_pmap = TRUE;
16827 }
16828 /* "iokit_acct" was cleared in vm_map_entry_copy() */
16829 assert(!new_entry->iokit_acct);
16830
16831 new_entry->map_aligned = FALSE;
16832
16833 new_entry->vme_start = map_address;
16834 new_entry->vme_end = map_address + tmp_size;
16835 assert(new_entry->vme_start < new_entry->vme_end);
16836 if (copy && vmk_flags.vmkf_remap_prot_copy) {
16837 /*
16838 * Remapping for vm_map_protect(VM_PROT_COPY)
16839 * to convert a read-only mapping into a
16840 * copy-on-write version of itself but
16841 * with write access:
16842 * keep the original inheritance and add
16843 * VM_PROT_WRITE to the max protection.
16844 */
16845 new_entry->inheritance = src_entry->inheritance;
16846 new_entry->protection &= max_prot_for_prot_copy;
16847 new_entry->max_protection |= VM_PROT_WRITE;
16848 } else {
16849 new_entry->inheritance = inheritance;
16850 }
16851 VME_OFFSET_SET(new_entry, offset);
16852
16853 /*
16854 * The new region has to be copied now if required.
16855 */
16856 RestartCopy:
16857 if (!copy) {
16858 if (src_entry->used_for_jit == TRUE) {
16859 if (same_map) {
16860 #if __APRR_SUPPORTED__
16861 /*
16862 * Disallow re-mapping of any JIT regions on APRR devices.
16863 */
16864 result = KERN_PROTECTION_FAILURE;
16865 break;
16866 #endif /* __APRR_SUPPORTED__*/
16867 } else if (!VM_MAP_POLICY_ALLOW_JIT_SHARING(map)) {
16868 /*
16869 * Cannot allow an entry describing a JIT
16870 * region to be shared across address spaces.
16871 */
16872 result = KERN_INVALID_ARGUMENT;
16873 break;
16874 }
16875 }
16876
16877 src_entry->is_shared = TRUE;
16878 new_entry->is_shared = TRUE;
16879 if (!(new_entry->is_sub_map)) {
16880 new_entry->needs_copy = FALSE;
16881 }
16882 } else if (src_entry->is_sub_map) {
16883 /* make this a COW sub_map if not already */
16884 assert(new_entry->wired_count == 0);
16885 new_entry->needs_copy = TRUE;
16886 object = VM_OBJECT_NULL;
16887 } else if (src_entry->wired_count == 0 &&
16888 !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) &&
16889 vm_object_copy_quickly(VME_OBJECT_PTR(new_entry),
16890 VME_OFFSET(new_entry),
16891 (new_entry->vme_end -
16892 new_entry->vme_start),
16893 &src_needs_copy,
16894 &new_entry_needs_copy)) {
16895 new_entry->needs_copy = new_entry_needs_copy;
16896 new_entry->is_shared = FALSE;
16897 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
16898
16899 /*
16900 * Handle copy_on_write semantics.
16901 */
16902 if (src_needs_copy && !src_entry->needs_copy) {
16903 vm_prot_t prot;
16904
16905 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
16906
16907 prot = src_entry->protection & ~VM_PROT_WRITE;
16908
16909 if (override_nx(map,
16910 VME_ALIAS(src_entry))
16911 && prot) {
16912 prot |= VM_PROT_EXECUTE;
16913 }
16914
16915 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
16916
16917 vm_object_pmap_protect(object,
16918 offset,
16919 entry_size,
16920 ((src_entry->is_shared
16921 || map->mapped_in_other_pmaps) ?
16922 PMAP_NULL : map->pmap),
16923 VM_MAP_PAGE_SIZE(map),
16924 src_entry->vme_start,
16925 prot);
16926
16927 assert(src_entry->wired_count == 0);
16928 src_entry->needs_copy = TRUE;
16929 }
16930 /*
16931 * Throw away the old object reference of the new entry.
16932 */
16933 vm_object_deallocate(object);
16934 } else {
16935 new_entry->is_shared = FALSE;
16936 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
16937
16938 src_entry_was_wired = (src_entry->wired_count > 0);
16939 saved_src_entry = src_entry;
16940 src_entry = VM_MAP_ENTRY_NULL;
16941
16942 /*
16943 * The map can be safely unlocked since we
16944 * already hold a reference on the object.
16945 *
16946 * Record the timestamp of the map for later
16947 * verification, and unlock the map.
16948 */
16949 version.main_timestamp = map->timestamp;
16950 vm_map_unlock(map); /* Increments timestamp once! */
16951
16952 /*
16953 * Perform the copy.
16954 */
16955 if (src_entry_was_wired > 0 ||
16956 (debug4k_no_cow_copyin &&
16957 VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT)) {
16958 vm_object_lock(object);
16959 result = vm_object_copy_slowly(
16960 object,
16961 offset,
16962 (new_entry->vme_end -
16963 new_entry->vme_start),
16964 THREAD_UNINT,
16965 VME_OBJECT_PTR(new_entry));
16966
16967 VME_OFFSET_SET(new_entry, offset - vm_object_trunc_page(offset));
16968 new_entry->needs_copy = FALSE;
16969 } else {
16970 vm_object_offset_t new_offset;
16971
16972 new_offset = VME_OFFSET(new_entry);
16973 result = vm_object_copy_strategically(
16974 object,
16975 offset,
16976 (new_entry->vme_end -
16977 new_entry->vme_start),
16978 VME_OBJECT_PTR(new_entry),
16979 &new_offset,
16980 &new_entry_needs_copy);
16981 if (new_offset != VME_OFFSET(new_entry)) {
16982 VME_OFFSET_SET(new_entry, new_offset);
16983 }
16984
16985 new_entry->needs_copy = new_entry_needs_copy;
16986 }
16987
16988 /*
16989 * Throw away the old object reference of the new entry.
16990 */
16991 vm_object_deallocate(object);
16992
16993 if (result != KERN_SUCCESS &&
16994 result != KERN_MEMORY_RESTART_COPY) {
16995 _vm_map_entry_dispose(map_header, new_entry);
16996 vm_map_lock(map);
16997 break;
16998 }
16999
17000 /*
17001 * Verify that the map has not substantially
17002 * changed while the copy was being made.
17003 */
17004
17005 vm_map_lock(map);
17006 if (version.main_timestamp + 1 != map->timestamp) {
17007 /*
17008 * Simple version comparison failed.
17009 *
17010 * Retry the lookup and verify that the
17011 * same object/offset are still present.
17012 */
17013 saved_src_entry = VM_MAP_ENTRY_NULL;
17014 vm_object_deallocate(VME_OBJECT(new_entry));
17015 _vm_map_entry_dispose(map_header, new_entry);
17016 if (result == KERN_MEMORY_RESTART_COPY) {
17017 result = KERN_SUCCESS;
17018 }
17019 continue;
17020 }
17021 /* map hasn't changed: src_entry is still valid */
17022 src_entry = saved_src_entry;
17023 saved_src_entry = VM_MAP_ENTRY_NULL;
17024
17025 if (result == KERN_MEMORY_RESTART_COPY) {
17026 vm_object_reference(object);
17027 goto RestartCopy;
17028 }
17029 }
17030
17031 _vm_map_store_entry_link(map_header,
17032 map_header->links.prev, new_entry);
17033
17034 /*Protections for submap mapping are irrelevant here*/
17035 if (!src_entry->is_sub_map) {
17036 *cur_protection &= src_entry->protection;
17037 *max_protection &= src_entry->max_protection;
17038 }
17039
17040 map_address += tmp_size;
17041 mapped_size += tmp_size;
17042 src_start += tmp_size;
17043
17044 if (vmk_flags.vmkf_copy_single_object) {
17045 if (mapped_size != size) {
17046 DEBUG4K_SHARE("map %p addr 0x%llx size 0x%llx clipped copy at mapped_size 0x%llx\n", map, (uint64_t)addr, (uint64_t)size, (uint64_t)mapped_size);
17047 if (src_entry->vme_next != vm_map_to_entry(map) &&
17048 VME_OBJECT(src_entry->vme_next) == VME_OBJECT(src_entry)) {
17049 /* XXX TODO4K */
17050 DEBUG4K_ERROR("could have extended copy to next entry...\n");
17051 }
17052 }
17053 break;
17054 }
17055 } /* end while */
17056
17057 vm_map_unlock(map);
17058 if (result != KERN_SUCCESS) {
17059 /*
17060 * Free all allocated elements.
17061 */
17062 for (src_entry = map_header->links.next;
17063 src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
17064 src_entry = new_entry) {
17065 new_entry = src_entry->vme_next;
17066 _vm_map_store_entry_unlink(map_header, src_entry);
17067 if (src_entry->is_sub_map) {
17068 vm_map_deallocate(VME_SUBMAP(src_entry));
17069 } else {
17070 vm_object_deallocate(VME_OBJECT(src_entry));
17071 }
17072 _vm_map_entry_dispose(map_header, src_entry);
17073 }
17074 }
17075 return result;
17076 }
17077
17078 bool
17079 vm_map_is_exotic(
17080 vm_map_t map)
17081 {
17082 return VM_MAP_IS_EXOTIC(map);
17083 }
17084
17085 bool
17086 vm_map_is_alien(
17087 vm_map_t map)
17088 {
17089 return VM_MAP_IS_ALIEN(map);
17090 }
17091
17092 #if XNU_TARGET_OS_OSX
17093 void
17094 vm_map_mark_alien(
17095 vm_map_t map)
17096 {
17097 vm_map_lock(map);
17098 map->is_alien = true;
17099 vm_map_unlock(map);
17100 }
17101 #endif /* XNU_TARGET_OS_OSX */
17102
17103 void vm_map_copy_to_physcopy(vm_map_copy_t copy_map, vm_map_t target_map);
17104 void
17105 vm_map_copy_to_physcopy(
17106 vm_map_copy_t copy_map,
17107 vm_map_t target_map)
17108 {
17109 vm_map_size_t size;
17110 vm_map_entry_t entry;
17111 vm_map_entry_t new_entry;
17112 vm_object_t new_object;
17113 unsigned int pmap_flags;
17114 pmap_t new_pmap;
17115 vm_map_t new_map;
17116 vm_map_address_t src_start, src_end, src_cur;
17117 vm_map_address_t dst_start, dst_end, dst_cur;
17118 kern_return_t kr;
17119 void *kbuf;
17120
17121 /*
17122 * Perform the equivalent of vm_allocate() and memcpy().
17123 * Replace the mappings in "copy_map" with the newly allocated mapping.
17124 */
17125 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) BEFORE\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17126
17127 assert(copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_MASK(target_map));
17128
17129 /* allocate new VM object */
17130 size = VM_MAP_ROUND_PAGE(copy_map->size, PAGE_MASK);
17131 new_object = vm_object_allocate(size);
17132 assert(new_object);
17133
17134 /* allocate new VM map entry */
17135 new_entry = vm_map_copy_entry_create(copy_map, FALSE);
17136 assert(new_entry);
17137
17138 /* finish initializing new VM map entry */
17139 new_entry->protection = VM_PROT_DEFAULT;
17140 new_entry->max_protection = VM_PROT_DEFAULT;
17141 new_entry->use_pmap = TRUE;
17142
17143 /* make new VM map entry point to new VM object */
17144 new_entry->vme_start = 0;
17145 new_entry->vme_end = size;
17146 VME_OBJECT_SET(new_entry, new_object);
17147 VME_OFFSET_SET(new_entry, 0);
17148
17149 /* create a new pmap to map "copy_map" */
17150 pmap_flags = 0;
17151 assert(copy_map->cpy_hdr.page_shift == FOURK_PAGE_SHIFT);
17152 #if PMAP_CREATE_FORCE_4K_PAGES
17153 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
17154 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
17155 pmap_flags |= PMAP_CREATE_64BIT;
17156 new_pmap = pmap_create_options(NULL, (vm_map_size_t)0, pmap_flags);
17157 assert(new_pmap);
17158
17159 /* create a new pageable VM map to map "copy_map" */
17160 new_map = vm_map_create(new_pmap, 0, MACH_VM_MAX_ADDRESS, TRUE);
17161 assert(new_map);
17162 vm_map_set_page_shift(new_map, copy_map->cpy_hdr.page_shift);
17163
17164 /* map "copy_map" in the new VM map */
17165 src_start = 0;
17166 kr = vm_map_copyout_internal(
17167 new_map,
17168 &src_start,
17169 copy_map,
17170 copy_map->size,
17171 FALSE, /* consume_on_success */
17172 VM_PROT_DEFAULT,
17173 VM_PROT_DEFAULT,
17174 VM_INHERIT_DEFAULT);
17175 assert(kr == KERN_SUCCESS);
17176 src_end = src_start + copy_map->size;
17177
17178 /* map "new_object" in the new VM map */
17179 vm_object_reference(new_object);
17180 dst_start = 0;
17181 kr = vm_map_enter(new_map,
17182 &dst_start,
17183 size,
17184 0, /* mask */
17185 VM_FLAGS_ANYWHERE,
17186 VM_MAP_KERNEL_FLAGS_NONE,
17187 VM_KERN_MEMORY_OSFMK,
17188 new_object,
17189 0, /* offset */
17190 FALSE, /* needs copy */
17191 VM_PROT_DEFAULT,
17192 VM_PROT_DEFAULT,
17193 VM_INHERIT_DEFAULT);
17194 assert(kr == KERN_SUCCESS);
17195 dst_end = dst_start + size;
17196
17197 /* get a kernel buffer */
17198 kbuf = kheap_alloc(KHEAP_TEMP, PAGE_SIZE, Z_WAITOK);
17199 assert(kbuf);
17200
17201 /* physically copy "copy_map" mappings to new VM object */
17202 for (src_cur = src_start, dst_cur = dst_start;
17203 src_cur < src_end;
17204 src_cur += PAGE_SIZE, dst_cur += PAGE_SIZE) {
17205 vm_size_t bytes;
17206
17207 bytes = PAGE_SIZE;
17208 if (src_cur + PAGE_SIZE > src_end) {
17209 /* partial copy for last page */
17210 bytes = src_end - src_cur;
17211 assert(bytes > 0 && bytes < PAGE_SIZE);
17212 /* rest of dst page should be zero-filled */
17213 }
17214 /* get bytes from src mapping */
17215 kr = copyinmap(new_map, src_cur, kbuf, bytes);
17216 if (kr != KERN_SUCCESS) {
17217 DEBUG4K_COPY("copyinmap(%p, 0x%llx, %p, 0x%llx) kr 0x%x\n", new_map, (uint64_t)src_cur, kbuf, (uint64_t)bytes, kr);
17218 }
17219 /* put bytes in dst mapping */
17220 assert(dst_cur < dst_end);
17221 assert(dst_cur + bytes <= dst_end);
17222 kr = copyoutmap(new_map, kbuf, dst_cur, bytes);
17223 if (kr != KERN_SUCCESS) {
17224 DEBUG4K_COPY("copyoutmap(%p, %p, 0x%llx, 0x%llx) kr 0x%x\n", new_map, kbuf, (uint64_t)dst_cur, (uint64_t)bytes, kr);
17225 }
17226 }
17227
17228 /* free kernel buffer */
17229 kheap_free(KHEAP_TEMP, kbuf, PAGE_SIZE);
17230 kbuf = NULL;
17231
17232 /* destroy new map */
17233 vm_map_destroy(new_map, VM_MAP_REMOVE_NO_FLAGS);
17234 new_map = VM_MAP_NULL;
17235
17236 /* dispose of the old map entries in "copy_map" */
17237 while (vm_map_copy_first_entry(copy_map) !=
17238 vm_map_copy_to_entry(copy_map)) {
17239 entry = vm_map_copy_first_entry(copy_map);
17240 vm_map_copy_entry_unlink(copy_map, entry);
17241 if (entry->is_sub_map) {
17242 vm_map_deallocate(VME_SUBMAP(entry));
17243 } else {
17244 vm_object_deallocate(VME_OBJECT(entry));
17245 }
17246 vm_map_copy_entry_dispose(copy_map, entry);
17247 }
17248
17249 /* change "copy_map"'s page_size to match "target_map" */
17250 copy_map->cpy_hdr.page_shift = VM_MAP_PAGE_SHIFT(target_map);
17251 copy_map->offset = 0;
17252 copy_map->size = size;
17253
17254 /* insert new map entry in "copy_map" */
17255 assert(vm_map_copy_last_entry(copy_map) == vm_map_copy_to_entry(copy_map));
17256 vm_map_copy_entry_link(copy_map, vm_map_copy_last_entry(copy_map), new_entry);
17257
17258 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) AFTER\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17259 }
17260
17261 void
17262 vm_map_copy_adjust_get_target_copy_map(
17263 vm_map_copy_t copy_map,
17264 vm_map_copy_t *target_copy_map_p);
17265 void
17266 vm_map_copy_adjust_get_target_copy_map(
17267 vm_map_copy_t copy_map,
17268 vm_map_copy_t *target_copy_map_p)
17269 {
17270 vm_map_copy_t target_copy_map;
17271 vm_map_entry_t entry, target_entry;
17272
17273 if (*target_copy_map_p != VM_MAP_COPY_NULL) {
17274 /* the caller already has a "target_copy_map": use it */
17275 return;
17276 }
17277
17278 /* the caller wants us to create a new copy of "copy_map" */
17279 target_copy_map = vm_map_copy_allocate();
17280 target_copy_map->type = copy_map->type;
17281 assert(target_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17282 target_copy_map->offset = copy_map->offset;
17283 target_copy_map->size = copy_map->size;
17284 target_copy_map->cpy_hdr.page_shift = copy_map->cpy_hdr.page_shift;
17285 vm_map_store_init(&target_copy_map->cpy_hdr);
17286 for (entry = vm_map_copy_first_entry(copy_map);
17287 entry != vm_map_copy_to_entry(copy_map);
17288 entry = entry->vme_next) {
17289 target_entry = vm_map_copy_entry_create(target_copy_map, FALSE);
17290 vm_map_entry_copy_full(target_entry, entry);
17291 if (target_entry->is_sub_map) {
17292 vm_map_reference(VME_SUBMAP(target_entry));
17293 } else {
17294 vm_object_reference(VME_OBJECT(target_entry));
17295 }
17296 vm_map_copy_entry_link(
17297 target_copy_map,
17298 vm_map_copy_last_entry(target_copy_map),
17299 target_entry);
17300 }
17301 entry = VM_MAP_ENTRY_NULL;
17302 *target_copy_map_p = target_copy_map;
17303 }
17304
17305 void
17306 vm_map_copy_trim(
17307 vm_map_copy_t copy_map,
17308 int new_page_shift,
17309 vm_map_offset_t trim_start,
17310 vm_map_offset_t trim_end);
17311 void
17312 vm_map_copy_trim(
17313 vm_map_copy_t copy_map,
17314 int new_page_shift,
17315 vm_map_offset_t trim_start,
17316 vm_map_offset_t trim_end)
17317 {
17318 int copy_page_shift;
17319 vm_map_entry_t entry, next_entry;
17320
17321 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17322 assert(copy_map->cpy_hdr.nentries > 0);
17323
17324 trim_start += vm_map_copy_first_entry(copy_map)->vme_start;
17325 trim_end += vm_map_copy_first_entry(copy_map)->vme_start;
17326
17327 /* use the new page_shift to do the clipping */
17328 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17329 copy_map->cpy_hdr.page_shift = new_page_shift;
17330
17331 for (entry = vm_map_copy_first_entry(copy_map);
17332 entry != vm_map_copy_to_entry(copy_map);
17333 entry = next_entry) {
17334 next_entry = entry->vme_next;
17335 if (entry->vme_end <= trim_start) {
17336 /* entry fully before trim range: skip */
17337 continue;
17338 }
17339 if (entry->vme_start >= trim_end) {
17340 /* entry fully after trim range: done */
17341 break;
17342 }
17343 /* clip entry if needed */
17344 vm_map_copy_clip_start(copy_map, entry, trim_start);
17345 vm_map_copy_clip_end(copy_map, entry, trim_end);
17346 /* dispose of entry */
17347 copy_map->size -= entry->vme_end - entry->vme_start;
17348 vm_map_copy_entry_unlink(copy_map, entry);
17349 if (entry->is_sub_map) {
17350 vm_map_deallocate(VME_SUBMAP(entry));
17351 } else {
17352 vm_object_deallocate(VME_OBJECT(entry));
17353 }
17354 vm_map_copy_entry_dispose(copy_map, entry);
17355 entry = VM_MAP_ENTRY_NULL;
17356 }
17357
17358 /* restore copy_map's original page_shift */
17359 copy_map->cpy_hdr.page_shift = copy_page_shift;
17360 }
17361
17362 /*
17363 * Make any necessary adjustments to "copy_map" to allow it to be
17364 * mapped into "target_map".
17365 * If no changes were necessary, "target_copy_map" points to the
17366 * untouched "copy_map".
17367 * If changes are necessary, changes will be made to "target_copy_map".
17368 * If "target_copy_map" was NULL, we create a new "vm_map_copy_t" and
17369 * copy the original "copy_map" to it before applying the changes.
17370 * The caller should discard "target_copy_map" if it's not the same as
17371 * the original "copy_map".
17372 */
17373 /* TODO4K: also adjust to sub-range in the copy_map -> add start&end? */
17374 kern_return_t
17375 vm_map_copy_adjust_to_target(
17376 vm_map_copy_t src_copy_map,
17377 vm_map_offset_t offset,
17378 vm_map_size_t size,
17379 vm_map_t target_map,
17380 boolean_t copy,
17381 vm_map_copy_t *target_copy_map_p,
17382 vm_map_offset_t *overmap_start_p,
17383 vm_map_offset_t *overmap_end_p,
17384 vm_map_offset_t *trimmed_start_p)
17385 {
17386 vm_map_copy_t copy_map, target_copy_map;
17387 vm_map_size_t target_size;
17388 vm_map_size_t src_copy_map_size;
17389 vm_map_size_t overmap_start, overmap_end;
17390 int misalignments;
17391 vm_map_entry_t entry, target_entry;
17392 vm_map_offset_t addr_adjustment;
17393 vm_map_offset_t new_start, new_end;
17394 int copy_page_mask, target_page_mask;
17395 int copy_page_shift, target_page_shift;
17396 vm_map_offset_t trimmed_end;
17397
17398 /*
17399 * Assert that the vm_map_copy is coming from the right
17400 * zone and hasn't been forged
17401 */
17402 vm_map_copy_require(src_copy_map);
17403 assert(src_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17404
17405 /*
17406 * Start working with "src_copy_map" but we'll switch
17407 * to "target_copy_map" as soon as we start making adjustments.
17408 */
17409 copy_map = src_copy_map;
17410 src_copy_map_size = src_copy_map->size;
17411
17412 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17413 copy_page_mask = VM_MAP_COPY_PAGE_MASK(copy_map);
17414 target_page_shift = VM_MAP_PAGE_SHIFT(target_map);
17415 target_page_mask = VM_MAP_PAGE_MASK(target_map);
17416
17417 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p...\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, *target_copy_map_p);
17418
17419 target_copy_map = *target_copy_map_p;
17420 if (target_copy_map != VM_MAP_COPY_NULL) {
17421 vm_map_copy_require(target_copy_map);
17422 }
17423
17424 if (offset + size > copy_map->size) {
17425 DEBUG4K_ERROR("copy_map %p (%d->%d) copy_map->size 0x%llx offset 0x%llx size 0x%llx KERN_INVALID_ARGUMENT\n", copy_map, copy_page_shift, target_page_shift, (uint64_t)copy_map->size, (uint64_t)offset, (uint64_t)size);
17426 return KERN_INVALID_ARGUMENT;
17427 }
17428
17429 /* trim the end */
17430 trimmed_end = 0;
17431 new_end = VM_MAP_ROUND_PAGE(offset + size, target_page_mask);
17432 if (new_end < copy_map->size) {
17433 trimmed_end = src_copy_map_size - new_end;
17434 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim end from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)new_end, (uint64_t)copy_map->size);
17435 /* get "target_copy_map" if needed and adjust it */
17436 vm_map_copy_adjust_get_target_copy_map(copy_map,
17437 &target_copy_map);
17438 copy_map = target_copy_map;
17439 vm_map_copy_trim(target_copy_map, target_page_shift,
17440 new_end, copy_map->size);
17441 }
17442
17443 /* trim the start */
17444 new_start = VM_MAP_TRUNC_PAGE(offset, target_page_mask);
17445 if (new_start != 0) {
17446 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim start from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)0, (uint64_t)new_start);
17447 /* get "target_copy_map" if needed and adjust it */
17448 vm_map_copy_adjust_get_target_copy_map(copy_map,
17449 &target_copy_map);
17450 copy_map = target_copy_map;
17451 vm_map_copy_trim(target_copy_map, target_page_shift,
17452 0, new_start);
17453 }
17454 *trimmed_start_p = new_start;
17455
17456 /* target_size starts with what's left after trimming */
17457 target_size = copy_map->size;
17458 assertf(target_size == src_copy_map_size - *trimmed_start_p - trimmed_end,
17459 "target_size 0x%llx src_copy_map_size 0x%llx trimmed_start 0x%llx trimmed_end 0x%llx\n",
17460 (uint64_t)target_size, (uint64_t)src_copy_map_size,
17461 (uint64_t)*trimmed_start_p, (uint64_t)trimmed_end);
17462
17463 /* check for misalignments but don't adjust yet */
17464 misalignments = 0;
17465 overmap_start = 0;
17466 overmap_end = 0;
17467 if (copy_page_shift < target_page_shift) {
17468 /*
17469 * Remapping from 4K to 16K: check the VM object alignments
17470 * throughout the range.
17471 * If the start and end of the range are mis-aligned, we can
17472 * over-map to re-align, and adjust the "overmap" start/end
17473 * and "target_size" of the range accordingly.
17474 * If there is any mis-alignment within the range:
17475 * if "copy":
17476 * we can do immediate-copy instead of copy-on-write,
17477 * else:
17478 * no way to remap and share; fail.
17479 */
17480 for (entry = vm_map_copy_first_entry(copy_map);
17481 entry != vm_map_copy_to_entry(copy_map);
17482 entry = entry->vme_next) {
17483 vm_object_offset_t object_offset_start, object_offset_end;
17484
17485 object_offset_start = VME_OFFSET(entry);
17486 object_offset_end = object_offset_start;
17487 object_offset_end += entry->vme_end - entry->vme_start;
17488 if (object_offset_start & target_page_mask) {
17489 if (entry == vm_map_copy_first_entry(copy_map) && !copy) {
17490 overmap_start++;
17491 } else {
17492 misalignments++;
17493 }
17494 }
17495 if (object_offset_end & target_page_mask) {
17496 if (entry->vme_next == vm_map_copy_to_entry(copy_map) && !copy) {
17497 overmap_end++;
17498 } else {
17499 misalignments++;
17500 }
17501 }
17502 }
17503 }
17504 entry = VM_MAP_ENTRY_NULL;
17505
17506 /* decide how to deal with misalignments */
17507 assert(overmap_start <= 1);
17508 assert(overmap_end <= 1);
17509 if (!overmap_start && !overmap_end && !misalignments) {
17510 /* copy_map is properly aligned for target_map ... */
17511 if (*trimmed_start_p) {
17512 /* ... but we trimmed it, so still need to adjust */
17513 } else {
17514 /* ... and we didn't trim anything: we're done */
17515 if (target_copy_map == VM_MAP_COPY_NULL) {
17516 target_copy_map = copy_map;
17517 }
17518 *target_copy_map_p = target_copy_map;
17519 *overmap_start_p = 0;
17520 *overmap_end_p = 0;
17521 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17522 return KERN_SUCCESS;
17523 }
17524 } else if (misalignments && !copy) {
17525 /* can't "share" if misaligned */
17526 DEBUG4K_ADJUST("unsupported sharing\n");
17527 #if MACH_ASSERT
17528 if (debug4k_panic_on_misaligned_sharing) {
17529 panic("DEBUG4k %s:%d unsupported sharing\n", __FUNCTION__, __LINE__);
17530 }
17531 #endif /* MACH_ASSERT */
17532 DEBUG4K_ADJUST("copy_map %p (%d) target_map %p (%d) copy %d target_copy_map %p -> KERN_NOT_SUPPORTED\n", copy_map, copy_page_shift, target_map, target_page_shift, copy, *target_copy_map_p);
17533 return KERN_NOT_SUPPORTED;
17534 } else {
17535 /* can't virtual-copy if misaligned (but can physical-copy) */
17536 DEBUG4K_ADJUST("mis-aligned copying\n");
17537 }
17538
17539 /* get a "target_copy_map" if needed and switch to it */
17540 vm_map_copy_adjust_get_target_copy_map(copy_map, &target_copy_map);
17541 copy_map = target_copy_map;
17542
17543 if (misalignments && copy) {
17544 vm_map_size_t target_copy_map_size;
17545
17546 /*
17547 * Can't do copy-on-write with misaligned mappings.
17548 * Replace the mappings with a physical copy of the original
17549 * mappings' contents.
17550 */
17551 target_copy_map_size = target_copy_map->size;
17552 vm_map_copy_to_physcopy(target_copy_map, target_map);
17553 *target_copy_map_p = target_copy_map;
17554 *overmap_start_p = 0;
17555 *overmap_end_p = target_copy_map->size - target_copy_map_size;
17556 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx)-> trimmed 0x%llx overmap start 0x%llx end 0x%llx PHYSCOPY\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17557 return KERN_SUCCESS;
17558 }
17559
17560 /* apply the adjustments */
17561 misalignments = 0;
17562 overmap_start = 0;
17563 overmap_end = 0;
17564 /* remove copy_map->offset, so that everything starts at offset 0 */
17565 addr_adjustment = copy_map->offset;
17566 /* also remove whatever we trimmed from the start */
17567 addr_adjustment += *trimmed_start_p;
17568 for (target_entry = vm_map_copy_first_entry(target_copy_map);
17569 target_entry != vm_map_copy_to_entry(target_copy_map);
17570 target_entry = target_entry->vme_next) {
17571 vm_object_offset_t object_offset_start, object_offset_end;
17572
17573 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx BEFORE\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17574 object_offset_start = VME_OFFSET(target_entry);
17575 if (object_offset_start & target_page_mask) {
17576 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at start\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17577 if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
17578 /*
17579 * start of 1st entry is mis-aligned:
17580 * re-adjust by over-mapping.
17581 */
17582 overmap_start = object_offset_start - trunc_page_mask_64(object_offset_start, target_page_mask);
17583 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_start 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_start);
17584 VME_OFFSET_SET(target_entry, VME_OFFSET(target_entry) - overmap_start);
17585 } else {
17586 misalignments++;
17587 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
17588 assert(copy);
17589 }
17590 }
17591
17592 if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
17593 target_size += overmap_start;
17594 } else {
17595 target_entry->vme_start += overmap_start;
17596 }
17597 target_entry->vme_end += overmap_start;
17598
17599 object_offset_end = VME_OFFSET(target_entry) + target_entry->vme_end - target_entry->vme_start;
17600 if (object_offset_end & target_page_mask) {
17601 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at end\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17602 if (target_entry->vme_next == vm_map_copy_to_entry(target_copy_map)) {
17603 /*
17604 * end of last entry is mis-aligned: re-adjust by over-mapping.
17605 */
17606 overmap_end = round_page_mask_64(object_offset_end, target_page_mask) - object_offset_end;
17607 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_end 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_end);
17608 target_entry->vme_end += overmap_end;
17609 target_size += overmap_end;
17610 } else {
17611 misalignments++;
17612 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
17613 assert(copy);
17614 }
17615 }
17616 target_entry->vme_start -= addr_adjustment;
17617 target_entry->vme_end -= addr_adjustment;
17618 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx AFTER\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17619 }
17620
17621 target_copy_map->size = target_size;
17622 target_copy_map->offset += overmap_start;
17623 target_copy_map->offset -= addr_adjustment;
17624 target_copy_map->cpy_hdr.page_shift = target_page_shift;
17625
17626 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->size, target_page_mask));
17627 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->offset, FOURK_PAGE_MASK));
17628 assert(overmap_start < VM_MAP_PAGE_SIZE(target_map));
17629 assert(overmap_end < VM_MAP_PAGE_SIZE(target_map));
17630
17631 *target_copy_map_p = target_copy_map;
17632 *overmap_start_p = overmap_start;
17633 *overmap_end_p = overmap_end;
17634
17635 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17636 return KERN_SUCCESS;
17637 }
17638
17639 kern_return_t
17640 vm_map_range_physical_size(
17641 vm_map_t map,
17642 vm_map_address_t start,
17643 mach_vm_size_t size,
17644 mach_vm_size_t * phys_size)
17645 {
17646 kern_return_t kr;
17647 vm_map_copy_t copy_map, target_copy_map;
17648 vm_map_offset_t adjusted_start, adjusted_end;
17649 vm_map_size_t adjusted_size;
17650 vm_prot_t cur_prot, max_prot;
17651 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
17652 vm_map_kernel_flags_t vmk_flags;
17653
17654 adjusted_start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(map));
17655 adjusted_end = vm_map_round_page(start + size, VM_MAP_PAGE_MASK(map));
17656 adjusted_size = adjusted_end - adjusted_start;
17657 *phys_size = adjusted_size;
17658 if (VM_MAP_PAGE_SIZE(map) == PAGE_SIZE) {
17659 return KERN_SUCCESS;
17660 }
17661 if (start == 0) {
17662 adjusted_start = vm_map_trunc_page(start, PAGE_MASK);
17663 adjusted_end = vm_map_round_page(start + size, PAGE_MASK);
17664 adjusted_size = adjusted_end - adjusted_start;
17665 *phys_size = adjusted_size;
17666 return KERN_SUCCESS;
17667 }
17668 if (adjusted_size == 0) {
17669 DEBUG4K_SHARE("map %p start 0x%llx size 0x%llx adjusted 0x%llx -> phys_size 0!\n", map, (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_size);
17670 *phys_size = 0;
17671 return KERN_SUCCESS;
17672 }
17673
17674 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
17675 vmk_flags.vmkf_copy_pageable = TRUE;
17676 vmk_flags.vmkf_copy_same_map = TRUE;
17677 assert(adjusted_size != 0);
17678 kr = vm_map_copy_extract(map, adjusted_start, adjusted_size,
17679 VM_PROT_NONE, /* required_protection: no check here */
17680 FALSE /* copy */,
17681 &copy_map,
17682 &cur_prot, &max_prot, VM_INHERIT_DEFAULT,
17683 vmk_flags);
17684 if (kr != KERN_SUCCESS) {
17685 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
17686 //assert(0);
17687 *phys_size = 0;
17688 return kr;
17689 }
17690 assert(copy_map != VM_MAP_COPY_NULL);
17691 target_copy_map = copy_map;
17692 DEBUG4K_ADJUST("adjusting...\n");
17693 kr = vm_map_copy_adjust_to_target(
17694 copy_map,
17695 start - adjusted_start, /* offset */
17696 size, /* size */
17697 kernel_map,
17698 FALSE, /* copy */
17699 &target_copy_map,
17700 &overmap_start,
17701 &overmap_end,
17702 &trimmed_start);
17703 if (kr == KERN_SUCCESS) {
17704 if (target_copy_map->size != *phys_size) {
17705 DEBUG4K_ADJUST("map %p (%d) start 0x%llx size 0x%llx adjusted_start 0x%llx adjusted_end 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx phys_size 0x%llx -> 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_start, (uint64_t)adjusted_end, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start, (uint64_t)*phys_size, (uint64_t)target_copy_map->size);
17706 }
17707 *phys_size = target_copy_map->size;
17708 } else {
17709 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
17710 //assert(0);
17711 *phys_size = 0;
17712 }
17713 vm_map_copy_discard(copy_map);
17714 copy_map = VM_MAP_COPY_NULL;
17715
17716 return kr;
17717 }
17718
17719
17720 kern_return_t
17721 memory_entry_check_for_adjustment(
17722 vm_map_t src_map,
17723 ipc_port_t port,
17724 vm_map_offset_t *overmap_start,
17725 vm_map_offset_t *overmap_end)
17726 {
17727 kern_return_t kr = KERN_SUCCESS;
17728 vm_map_copy_t copy_map = VM_MAP_COPY_NULL, target_copy_map = VM_MAP_COPY_NULL;
17729
17730 assert(port);
17731 assertf(ip_kotype(port) == IKOT_NAMED_ENTRY, "Port Type expected: %d...received:%d\n", IKOT_NAMED_ENTRY, ip_kotype(port));
17732
17733 vm_named_entry_t named_entry;
17734
17735 named_entry = (vm_named_entry_t) port->ip_kobject;
17736 named_entry_lock(named_entry);
17737 copy_map = named_entry->backing.copy;
17738 target_copy_map = copy_map;
17739
17740 if (src_map && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT) {
17741 vm_map_offset_t trimmed_start;
17742
17743 trimmed_start = 0;
17744 DEBUG4K_ADJUST("adjusting...\n");
17745 kr = vm_map_copy_adjust_to_target(
17746 copy_map,
17747 0, /* offset */
17748 copy_map->size, /* size */
17749 src_map,
17750 FALSE, /* copy */
17751 &target_copy_map,
17752 overmap_start,
17753 overmap_end,
17754 &trimmed_start);
17755 assert(trimmed_start == 0);
17756 }
17757 named_entry_unlock(named_entry);
17758
17759 return kr;
17760 }
17761
17762
17763 /*
17764 * Routine: vm_remap
17765 *
17766 * Map portion of a task's address space.
17767 * Mapped region must not overlap more than
17768 * one vm memory object. Protections and
17769 * inheritance attributes remain the same
17770 * as in the original task and are out parameters.
17771 * Source and Target task can be identical
17772 * Other attributes are identical as for vm_map()
17773 */
17774 kern_return_t
17775 vm_map_remap(
17776 vm_map_t target_map,
17777 vm_map_address_t *address,
17778 vm_map_size_t size,
17779 vm_map_offset_t mask,
17780 int flags,
17781 vm_map_kernel_flags_t vmk_flags,
17782 vm_tag_t tag,
17783 vm_map_t src_map,
17784 vm_map_offset_t memory_address,
17785 boolean_t copy,
17786 vm_prot_t *cur_protection,
17787 vm_prot_t *max_protection,
17788 vm_inherit_t inheritance)
17789 {
17790 kern_return_t result;
17791 vm_map_entry_t entry;
17792 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
17793 vm_map_entry_t new_entry;
17794 vm_map_copy_t copy_map;
17795 vm_map_offset_t offset_in_mapping;
17796 vm_map_size_t target_size = 0;
17797 vm_map_size_t src_page_mask, target_page_mask;
17798 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
17799 vm_map_offset_t initial_memory_address;
17800 vm_map_size_t initial_size;
17801
17802 if (target_map == VM_MAP_NULL) {
17803 return KERN_INVALID_ARGUMENT;
17804 }
17805
17806 initial_memory_address = memory_address;
17807 initial_size = size;
17808 src_page_mask = VM_MAP_PAGE_MASK(src_map);
17809 target_page_mask = VM_MAP_PAGE_MASK(target_map);
17810
17811 switch (inheritance) {
17812 case VM_INHERIT_NONE:
17813 case VM_INHERIT_COPY:
17814 case VM_INHERIT_SHARE:
17815 if (size != 0 && src_map != VM_MAP_NULL) {
17816 break;
17817 }
17818 OS_FALLTHROUGH;
17819 default:
17820 return KERN_INVALID_ARGUMENT;
17821 }
17822
17823 if (src_page_mask != target_page_mask) {
17824 if (copy) {
17825 DEBUG4K_COPY("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
17826 } else {
17827 DEBUG4K_SHARE("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
17828 }
17829 }
17830
17831 /*
17832 * If the user is requesting that we return the address of the
17833 * first byte of the data (rather than the base of the page),
17834 * then we use different rounding semantics: specifically,
17835 * we assume that (memory_address, size) describes a region
17836 * all of whose pages we must cover, rather than a base to be truncated
17837 * down and a size to be added to that base. So we figure out
17838 * the highest page that the requested region includes and make
17839 * sure that the size will cover it.
17840 *
17841 * The key example we're worried about it is of the form:
17842 *
17843 * memory_address = 0x1ff0, size = 0x20
17844 *
17845 * With the old semantics, we round down the memory_address to 0x1000
17846 * and round up the size to 0x1000, resulting in our covering *only*
17847 * page 0x1000. With the new semantics, we'd realize that the region covers
17848 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
17849 * 0x1000 and page 0x2000 in the region we remap.
17850 */
17851 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
17852 vm_map_offset_t range_start, range_end;
17853
17854 range_start = vm_map_trunc_page(memory_address, src_page_mask);
17855 range_end = vm_map_round_page(memory_address + size, src_page_mask);
17856 memory_address = range_start;
17857 size = range_end - range_start;
17858 offset_in_mapping = initial_memory_address - memory_address;
17859 } else {
17860 /*
17861 * IMPORTANT:
17862 * This legacy code path is broken: for the range mentioned
17863 * above [ memory_address = 0x1ff0,size = 0x20 ], which spans
17864 * two 4k pages, it yields [ memory_address = 0x1000,
17865 * size = 0x1000 ], which covers only the first 4k page.
17866 * BUT some code unfortunately depends on this bug, so we
17867 * can't fix it without breaking something.
17868 * New code should get automatically opted in the new
17869 * behavior with the new VM_FLAGS_RETURN_DATA_ADDR flags.
17870 */
17871 offset_in_mapping = 0;
17872 memory_address = vm_map_trunc_page(memory_address, src_page_mask);
17873 size = vm_map_round_page(size, src_page_mask);
17874 initial_memory_address = memory_address;
17875 initial_size = size;
17876 }
17877
17878
17879 if (size == 0) {
17880 return KERN_INVALID_ARGUMENT;
17881 }
17882
17883 if (flags & VM_FLAGS_RESILIENT_MEDIA) {
17884 /* must be copy-on-write to be "media resilient" */
17885 if (!copy) {
17886 return KERN_INVALID_ARGUMENT;
17887 }
17888 }
17889
17890 vmk_flags.vmkf_copy_pageable = target_map->hdr.entries_pageable;
17891 vmk_flags.vmkf_copy_same_map = (src_map == target_map);
17892
17893 assert(size != 0);
17894 result = vm_map_copy_extract(src_map,
17895 memory_address,
17896 size,
17897 VM_PROT_NONE, /* required_protection: no check here */
17898 copy, &copy_map,
17899 cur_protection,
17900 max_protection,
17901 inheritance,
17902 vmk_flags);
17903 if (result != KERN_SUCCESS) {
17904 return result;
17905 }
17906 assert(copy_map != VM_MAP_COPY_NULL);
17907
17908 overmap_start = 0;
17909 overmap_end = 0;
17910 trimmed_start = 0;
17911 target_size = size;
17912 if (src_page_mask != target_page_mask) {
17913 vm_map_copy_t target_copy_map;
17914
17915 target_copy_map = copy_map; /* can modify "copy_map" itself */
17916 DEBUG4K_ADJUST("adjusting...\n");
17917 result = vm_map_copy_adjust_to_target(
17918 copy_map,
17919 offset_in_mapping, /* offset */
17920 initial_size,
17921 target_map,
17922 copy,
17923 &target_copy_map,
17924 &overmap_start,
17925 &overmap_end,
17926 &trimmed_start);
17927 if (result != KERN_SUCCESS) {
17928 DEBUG4K_COPY("failed to adjust 0x%x\n", result);
17929 vm_map_copy_discard(copy_map);
17930 return result;
17931 }
17932 if (trimmed_start == 0) {
17933 /* nothing trimmed: no adjustment needed */
17934 } else if (trimmed_start >= offset_in_mapping) {
17935 /* trimmed more than offset_in_mapping: nothing left */
17936 assert(overmap_start == 0);
17937 assert(overmap_end == 0);
17938 offset_in_mapping = 0;
17939 } else {
17940 /* trimmed some of offset_in_mapping: adjust */
17941 assert(overmap_start == 0);
17942 assert(overmap_end == 0);
17943 offset_in_mapping -= trimmed_start;
17944 }
17945 offset_in_mapping += overmap_start;
17946 target_size = target_copy_map->size;
17947 }
17948
17949 /*
17950 * Allocate/check a range of free virtual address
17951 * space for the target
17952 */
17953 *address = vm_map_trunc_page(*address, target_page_mask);
17954 vm_map_lock(target_map);
17955 target_size = vm_map_round_page(target_size, target_page_mask);
17956 result = vm_map_remap_range_allocate(target_map, address,
17957 target_size,
17958 mask, flags, vmk_flags, tag,
17959 &insp_entry);
17960
17961 for (entry = vm_map_copy_first_entry(copy_map);
17962 entry != vm_map_copy_to_entry(copy_map);
17963 entry = new_entry) {
17964 new_entry = entry->vme_next;
17965 vm_map_copy_entry_unlink(copy_map, entry);
17966 if (result == KERN_SUCCESS) {
17967 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
17968 /* no codesigning -> read-only access */
17969 entry->max_protection = VM_PROT_READ;
17970 entry->protection = VM_PROT_READ;
17971 entry->vme_resilient_codesign = TRUE;
17972 }
17973 entry->vme_start += *address;
17974 entry->vme_end += *address;
17975 assert(!entry->map_aligned);
17976 if ((flags & VM_FLAGS_RESILIENT_MEDIA) &&
17977 !entry->is_sub_map &&
17978 (VME_OBJECT(entry) == VM_OBJECT_NULL ||
17979 VME_OBJECT(entry)->internal)) {
17980 entry->vme_resilient_media = TRUE;
17981 }
17982 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start, MIN(target_page_mask, PAGE_MASK)));
17983 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end, MIN(target_page_mask, PAGE_MASK)));
17984 assert(VM_MAP_PAGE_ALIGNED(VME_OFFSET(entry), MIN(target_page_mask, PAGE_MASK)));
17985 vm_map_store_entry_link(target_map, insp_entry, entry,
17986 vmk_flags);
17987 insp_entry = entry;
17988 } else {
17989 if (!entry->is_sub_map) {
17990 vm_object_deallocate(VME_OBJECT(entry));
17991 } else {
17992 vm_map_deallocate(VME_SUBMAP(entry));
17993 }
17994 vm_map_copy_entry_dispose(copy_map, entry);
17995 }
17996 }
17997
17998 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
17999 *cur_protection = VM_PROT_READ;
18000 *max_protection = VM_PROT_READ;
18001 }
18002
18003 if (target_map->disable_vmentry_reuse == TRUE) {
18004 assert(!target_map->is_nested_map);
18005 if (target_map->highest_entry_end < insp_entry->vme_end) {
18006 target_map->highest_entry_end = insp_entry->vme_end;
18007 }
18008 }
18009
18010 if (result == KERN_SUCCESS) {
18011 target_map->size += target_size;
18012 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
18013
18014 #if PMAP_CS
18015 if (*max_protection & VM_PROT_EXECUTE) {
18016 vm_map_address_t region_start = 0, region_size = 0;
18017 struct pmap_cs_code_directory *region_cd = NULL;
18018 vm_map_address_t base = 0;
18019 struct pmap_cs_lookup_results results = {};
18020 vm_map_size_t page_addr = vm_map_trunc_page(memory_address, PAGE_MASK);
18021 vm_map_size_t assoc_size = vm_map_round_page(memory_address + size - page_addr, PAGE_MASK);
18022
18023 pmap_cs_lookup(src_map->pmap, memory_address, &results);
18024 region_size = results.region_size;
18025 region_start = results.region_start;
18026 region_cd = results.region_cd_entry;
18027 base = results.base;
18028
18029 if (region_cd != NULL && (page_addr != region_start || assoc_size != region_size)) {
18030 *cur_protection = VM_PROT_READ;
18031 *max_protection = VM_PROT_READ;
18032 printf("mismatched remap of executable range 0x%llx-0x%llx to 0x%llx, "
18033 "region_start 0x%llx, region_size 0x%llx, cd_entry %sNULL, making non-executable.\n",
18034 page_addr, page_addr + assoc_size, *address,
18035 region_start, region_size,
18036 region_cd != NULL ? "not " : "" // Don't leak kernel slide
18037 );
18038 }
18039 }
18040 #endif
18041 }
18042 vm_map_unlock(target_map);
18043
18044 if (result == KERN_SUCCESS && target_map->wiring_required) {
18045 result = vm_map_wire_kernel(target_map, *address,
18046 *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
18047 TRUE);
18048 }
18049
18050 /*
18051 * If requested, return the address of the data pointed to by the
18052 * request, rather than the base of the resulting page.
18053 */
18054 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
18055 *address += offset_in_mapping;
18056 }
18057
18058 if (src_page_mask != target_page_mask) {
18059 DEBUG4K_SHARE("vm_remap(%p 0x%llx 0x%llx copy=%d-> %p 0x%llx 0x%llx result=0x%x\n", src_map, (uint64_t)memory_address, (uint64_t)size, copy, target_map, (uint64_t)*address, (uint64_t)offset_in_mapping, result);
18060 }
18061 vm_map_copy_discard(copy_map);
18062 copy_map = VM_MAP_COPY_NULL;
18063
18064 return result;
18065 }
18066
18067 /*
18068 * Routine: vm_map_remap_range_allocate
18069 *
18070 * Description:
18071 * Allocate a range in the specified virtual address map.
18072 * returns the address and the map entry just before the allocated
18073 * range
18074 *
18075 * Map must be locked.
18076 */
18077
18078 static kern_return_t
18079 vm_map_remap_range_allocate(
18080 vm_map_t map,
18081 vm_map_address_t *address, /* IN/OUT */
18082 vm_map_size_t size,
18083 vm_map_offset_t mask,
18084 int flags,
18085 vm_map_kernel_flags_t vmk_flags,
18086 __unused vm_tag_t tag,
18087 vm_map_entry_t *map_entry) /* OUT */
18088 {
18089 vm_map_entry_t entry;
18090 vm_map_offset_t start;
18091 vm_map_offset_t end;
18092 vm_map_offset_t desired_empty_end;
18093 kern_return_t kr;
18094 vm_map_entry_t hole_entry;
18095
18096 StartAgain:;
18097
18098 start = *address;
18099
18100 if (flags & VM_FLAGS_ANYWHERE) {
18101 if (flags & VM_FLAGS_RANDOM_ADDR) {
18102 /*
18103 * Get a random start address.
18104 */
18105 kr = vm_map_random_address_for_size(map, address, size);
18106 if (kr != KERN_SUCCESS) {
18107 return kr;
18108 }
18109 start = *address;
18110 }
18111
18112 /*
18113 * Calculate the first possible address.
18114 */
18115
18116 if (start < map->min_offset) {
18117 start = map->min_offset;
18118 }
18119 if (start > map->max_offset) {
18120 return KERN_NO_SPACE;
18121 }
18122
18123 /*
18124 * Look for the first possible address;
18125 * if there's already something at this
18126 * address, we have to start after it.
18127 */
18128
18129 if (map->disable_vmentry_reuse == TRUE) {
18130 VM_MAP_HIGHEST_ENTRY(map, entry, start);
18131 } else {
18132 if (map->holelistenabled) {
18133 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
18134
18135 if (hole_entry == NULL) {
18136 /*
18137 * No more space in the map?
18138 */
18139 return KERN_NO_SPACE;
18140 } else {
18141 boolean_t found_hole = FALSE;
18142
18143 do {
18144 if (hole_entry->vme_start >= start) {
18145 start = hole_entry->vme_start;
18146 found_hole = TRUE;
18147 break;
18148 }
18149
18150 if (hole_entry->vme_end > start) {
18151 found_hole = TRUE;
18152 break;
18153 }
18154 hole_entry = hole_entry->vme_next;
18155 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
18156
18157 if (found_hole == FALSE) {
18158 return KERN_NO_SPACE;
18159 }
18160
18161 entry = hole_entry;
18162 }
18163 } else {
18164 assert(first_free_is_valid(map));
18165 if (start == map->min_offset) {
18166 if ((entry = map->first_free) != vm_map_to_entry(map)) {
18167 start = entry->vme_end;
18168 }
18169 } else {
18170 vm_map_entry_t tmp_entry;
18171 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
18172 start = tmp_entry->vme_end;
18173 }
18174 entry = tmp_entry;
18175 }
18176 }
18177 start = vm_map_round_page(start,
18178 VM_MAP_PAGE_MASK(map));
18179 }
18180
18181 /*
18182 * In any case, the "entry" always precedes
18183 * the proposed new region throughout the
18184 * loop:
18185 */
18186
18187 while (TRUE) {
18188 vm_map_entry_t next;
18189
18190 /*
18191 * Find the end of the proposed new region.
18192 * Be sure we didn't go beyond the end, or
18193 * wrap around the address.
18194 */
18195
18196 end = ((start + mask) & ~mask);
18197 end = vm_map_round_page(end,
18198 VM_MAP_PAGE_MASK(map));
18199 if (end < start) {
18200 return KERN_NO_SPACE;
18201 }
18202 start = end;
18203 end += size;
18204
18205 /* We want an entire page of empty space, but don't increase the allocation size. */
18206 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
18207
18208 if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
18209 if (map->wait_for_space) {
18210 if (size <= (map->max_offset -
18211 map->min_offset)) {
18212 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
18213 vm_map_unlock(map);
18214 thread_block(THREAD_CONTINUE_NULL);
18215 vm_map_lock(map);
18216 goto StartAgain;
18217 }
18218 }
18219
18220 return KERN_NO_SPACE;
18221 }
18222
18223 next = entry->vme_next;
18224
18225 if (map->holelistenabled) {
18226 if (entry->vme_end >= desired_empty_end) {
18227 break;
18228 }
18229 } else {
18230 /*
18231 * If there are no more entries, we must win.
18232 *
18233 * OR
18234 *
18235 * If there is another entry, it must be
18236 * after the end of the potential new region.
18237 */
18238
18239 if (next == vm_map_to_entry(map)) {
18240 break;
18241 }
18242
18243 if (next->vme_start >= desired_empty_end) {
18244 break;
18245 }
18246 }
18247
18248 /*
18249 * Didn't fit -- move to the next entry.
18250 */
18251
18252 entry = next;
18253
18254 if (map->holelistenabled) {
18255 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
18256 /*
18257 * Wrapped around
18258 */
18259 return KERN_NO_SPACE;
18260 }
18261 start = entry->vme_start;
18262 } else {
18263 start = entry->vme_end;
18264 }
18265 }
18266
18267 if (map->holelistenabled) {
18268 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
18269 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
18270 }
18271 }
18272
18273 *address = start;
18274 } else {
18275 vm_map_entry_t temp_entry;
18276
18277 /*
18278 * Verify that:
18279 * the address doesn't itself violate
18280 * the mask requirement.
18281 */
18282
18283 if ((start & mask) != 0) {
18284 return KERN_NO_SPACE;
18285 }
18286
18287
18288 /*
18289 * ... the address is within bounds
18290 */
18291
18292 end = start + size;
18293
18294 if ((start < map->min_offset) ||
18295 (end > map->max_offset) ||
18296 (start >= end)) {
18297 return KERN_INVALID_ADDRESS;
18298 }
18299
18300 /*
18301 * If we're asked to overwrite whatever was mapped in that
18302 * range, first deallocate that range.
18303 */
18304 if (flags & VM_FLAGS_OVERWRITE) {
18305 vm_map_t zap_map;
18306 int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
18307
18308 /*
18309 * We use a "zap_map" to avoid having to unlock
18310 * the "map" in vm_map_delete(), which would compromise
18311 * the atomicity of the "deallocate" and then "remap"
18312 * combination.
18313 */
18314 zap_map = vm_map_create(PMAP_NULL,
18315 start,
18316 end,
18317 map->hdr.entries_pageable);
18318 if (zap_map == VM_MAP_NULL) {
18319 return KERN_RESOURCE_SHORTAGE;
18320 }
18321 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
18322 vm_map_disable_hole_optimization(zap_map);
18323
18324 if (vmk_flags.vmkf_overwrite_immutable) {
18325 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
18326 }
18327 kr = vm_map_delete(map, start, end,
18328 remove_flags,
18329 zap_map);
18330 if (kr == KERN_SUCCESS) {
18331 vm_map_destroy(zap_map,
18332 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
18333 zap_map = VM_MAP_NULL;
18334 }
18335 }
18336
18337 /*
18338 * ... the starting address isn't allocated
18339 */
18340
18341 if (vm_map_lookup_entry(map, start, &temp_entry)) {
18342 return KERN_NO_SPACE;
18343 }
18344
18345 entry = temp_entry;
18346
18347 /*
18348 * ... the next region doesn't overlap the
18349 * end point.
18350 */
18351
18352 if ((entry->vme_next != vm_map_to_entry(map)) &&
18353 (entry->vme_next->vme_start < end)) {
18354 return KERN_NO_SPACE;
18355 }
18356 }
18357 *map_entry = entry;
18358 return KERN_SUCCESS;
18359 }
18360
18361 /*
18362 * vm_map_switch:
18363 *
18364 * Set the address map for the current thread to the specified map
18365 */
18366
18367 vm_map_t
18368 vm_map_switch(
18369 vm_map_t map)
18370 {
18371 int mycpu;
18372 thread_t thread = current_thread();
18373 vm_map_t oldmap = thread->map;
18374
18375 mp_disable_preemption();
18376 mycpu = cpu_number();
18377
18378 /*
18379 * Deactivate the current map and activate the requested map
18380 */
18381 PMAP_SWITCH_USER(thread, map, mycpu);
18382
18383 mp_enable_preemption();
18384 return oldmap;
18385 }
18386
18387
18388 /*
18389 * Routine: vm_map_write_user
18390 *
18391 * Description:
18392 * Copy out data from a kernel space into space in the
18393 * destination map. The space must already exist in the
18394 * destination map.
18395 * NOTE: This routine should only be called by threads
18396 * which can block on a page fault. i.e. kernel mode user
18397 * threads.
18398 *
18399 */
18400 kern_return_t
18401 vm_map_write_user(
18402 vm_map_t map,
18403 void *src_p,
18404 vm_map_address_t dst_addr,
18405 vm_size_t size)
18406 {
18407 kern_return_t kr = KERN_SUCCESS;
18408
18409 if (current_map() == map) {
18410 if (copyout(src_p, dst_addr, size)) {
18411 kr = KERN_INVALID_ADDRESS;
18412 }
18413 } else {
18414 vm_map_t oldmap;
18415
18416 /* take on the identity of the target map while doing */
18417 /* the transfer */
18418
18419 vm_map_reference(map);
18420 oldmap = vm_map_switch(map);
18421 if (copyout(src_p, dst_addr, size)) {
18422 kr = KERN_INVALID_ADDRESS;
18423 }
18424 vm_map_switch(oldmap);
18425 vm_map_deallocate(map);
18426 }
18427 return kr;
18428 }
18429
18430 /*
18431 * Routine: vm_map_read_user
18432 *
18433 * Description:
18434 * Copy in data from a user space source map into the
18435 * kernel map. The space must already exist in the
18436 * kernel map.
18437 * NOTE: This routine should only be called by threads
18438 * which can block on a page fault. i.e. kernel mode user
18439 * threads.
18440 *
18441 */
18442 kern_return_t
18443 vm_map_read_user(
18444 vm_map_t map,
18445 vm_map_address_t src_addr,
18446 void *dst_p,
18447 vm_size_t size)
18448 {
18449 kern_return_t kr = KERN_SUCCESS;
18450
18451 if (current_map() == map) {
18452 if (copyin(src_addr, dst_p, size)) {
18453 kr = KERN_INVALID_ADDRESS;
18454 }
18455 } else {
18456 vm_map_t oldmap;
18457
18458 /* take on the identity of the target map while doing */
18459 /* the transfer */
18460
18461 vm_map_reference(map);
18462 oldmap = vm_map_switch(map);
18463 if (copyin(src_addr, dst_p, size)) {
18464 kr = KERN_INVALID_ADDRESS;
18465 }
18466 vm_map_switch(oldmap);
18467 vm_map_deallocate(map);
18468 }
18469 return kr;
18470 }
18471
18472
18473 /*
18474 * vm_map_check_protection:
18475 *
18476 * Assert that the target map allows the specified
18477 * privilege on the entire address region given.
18478 * The entire region must be allocated.
18479 */
18480 boolean_t
18481 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
18482 vm_map_offset_t end, vm_prot_t protection)
18483 {
18484 vm_map_entry_t entry;
18485 vm_map_entry_t tmp_entry;
18486
18487 vm_map_lock(map);
18488
18489 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
18490 vm_map_unlock(map);
18491 return FALSE;
18492 }
18493
18494 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
18495 vm_map_unlock(map);
18496 return FALSE;
18497 }
18498
18499 entry = tmp_entry;
18500
18501 while (start < end) {
18502 if (entry == vm_map_to_entry(map)) {
18503 vm_map_unlock(map);
18504 return FALSE;
18505 }
18506
18507 /*
18508 * No holes allowed!
18509 */
18510
18511 if (start < entry->vme_start) {
18512 vm_map_unlock(map);
18513 return FALSE;
18514 }
18515
18516 /*
18517 * Check protection associated with entry.
18518 */
18519
18520 if ((entry->protection & protection) != protection) {
18521 vm_map_unlock(map);
18522 return FALSE;
18523 }
18524
18525 /* go to next entry */
18526
18527 start = entry->vme_end;
18528 entry = entry->vme_next;
18529 }
18530 vm_map_unlock(map);
18531 return TRUE;
18532 }
18533
18534 kern_return_t
18535 vm_map_purgable_control(
18536 vm_map_t map,
18537 vm_map_offset_t address,
18538 vm_purgable_t control,
18539 int *state)
18540 {
18541 vm_map_entry_t entry;
18542 vm_object_t object;
18543 kern_return_t kr;
18544 boolean_t was_nonvolatile;
18545
18546 /*
18547 * Vet all the input parameters and current type and state of the
18548 * underlaying object. Return with an error if anything is amiss.
18549 */
18550 if (map == VM_MAP_NULL) {
18551 return KERN_INVALID_ARGUMENT;
18552 }
18553
18554 if (control != VM_PURGABLE_SET_STATE &&
18555 control != VM_PURGABLE_GET_STATE &&
18556 control != VM_PURGABLE_PURGE_ALL &&
18557 control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
18558 return KERN_INVALID_ARGUMENT;
18559 }
18560
18561 if (control == VM_PURGABLE_PURGE_ALL) {
18562 vm_purgeable_object_purge_all();
18563 return KERN_SUCCESS;
18564 }
18565
18566 if ((control == VM_PURGABLE_SET_STATE ||
18567 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
18568 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
18569 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
18570 return KERN_INVALID_ARGUMENT;
18571 }
18572
18573 vm_map_lock_read(map);
18574
18575 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
18576 /*
18577 * Must pass a valid non-submap address.
18578 */
18579 vm_map_unlock_read(map);
18580 return KERN_INVALID_ADDRESS;
18581 }
18582
18583 if ((entry->protection & VM_PROT_WRITE) == 0) {
18584 /*
18585 * Can't apply purgable controls to something you can't write.
18586 */
18587 vm_map_unlock_read(map);
18588 return KERN_PROTECTION_FAILURE;
18589 }
18590
18591 object = VME_OBJECT(entry);
18592 if (object == VM_OBJECT_NULL ||
18593 object->purgable == VM_PURGABLE_DENY) {
18594 /*
18595 * Object must already be present and be purgeable.
18596 */
18597 vm_map_unlock_read(map);
18598 return KERN_INVALID_ARGUMENT;
18599 }
18600
18601 vm_object_lock(object);
18602
18603 #if 00
18604 if (VME_OFFSET(entry) != 0 ||
18605 entry->vme_end - entry->vme_start != object->vo_size) {
18606 /*
18607 * Can only apply purgable controls to the whole (existing)
18608 * object at once.
18609 */
18610 vm_map_unlock_read(map);
18611 vm_object_unlock(object);
18612 return KERN_INVALID_ARGUMENT;
18613 }
18614 #endif
18615
18616 assert(!entry->is_sub_map);
18617 assert(!entry->use_pmap); /* purgeable has its own accounting */
18618
18619 vm_map_unlock_read(map);
18620
18621 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
18622
18623 kr = vm_object_purgable_control(object, control, state);
18624
18625 if (was_nonvolatile &&
18626 object->purgable != VM_PURGABLE_NONVOLATILE &&
18627 map->pmap == kernel_pmap) {
18628 #if DEBUG
18629 object->vo_purgeable_volatilizer = kernel_task;
18630 #endif /* DEBUG */
18631 }
18632
18633 vm_object_unlock(object);
18634
18635 return kr;
18636 }
18637
18638 void
18639 vm_map_footprint_query_page_info(
18640 vm_map_t map,
18641 vm_map_entry_t map_entry,
18642 vm_map_offset_t curr_s_offset,
18643 int *disposition_p)
18644 {
18645 int pmap_disp;
18646 vm_object_t object;
18647 int disposition;
18648 int effective_page_size;
18649
18650 vm_map_lock_assert_held(map);
18651 assert(!map->has_corpse_footprint);
18652 assert(curr_s_offset >= map_entry->vme_start);
18653 assert(curr_s_offset < map_entry->vme_end);
18654
18655 object = VME_OBJECT(map_entry);
18656 if (object == VM_OBJECT_NULL) {
18657 *disposition_p = 0;
18658 return;
18659 }
18660
18661 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
18662
18663 pmap_disp = 0;
18664 if (object == VM_OBJECT_NULL) {
18665 /* nothing mapped here: no need to ask */
18666 *disposition_p = 0;
18667 return;
18668 } else if (map_entry->is_sub_map &&
18669 !map_entry->use_pmap) {
18670 /* nested pmap: no footprint */
18671 *disposition_p = 0;
18672 return;
18673 }
18674
18675 /*
18676 * Query the pmap.
18677 */
18678 pmap_query_page_info(map->pmap, curr_s_offset, &pmap_disp);
18679
18680 /*
18681 * Compute this page's disposition.
18682 */
18683 disposition = 0;
18684
18685 /* deal with "alternate accounting" first */
18686 if (!map_entry->is_sub_map &&
18687 object->vo_no_footprint) {
18688 /* does not count in footprint */
18689 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18690 } else if (!map_entry->is_sub_map &&
18691 (object->purgable == VM_PURGABLE_NONVOLATILE ||
18692 (object->purgable == VM_PURGABLE_DENY &&
18693 object->vo_ledger_tag)) &&
18694 VM_OBJECT_OWNER(object) != NULL &&
18695 VM_OBJECT_OWNER(object)->map == map) {
18696 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18697 if ((((curr_s_offset
18698 - map_entry->vme_start
18699 + VME_OFFSET(map_entry))
18700 / effective_page_size) <
18701 (object->resident_page_count +
18702 vm_compressor_pager_get_count(object->pager)))) {
18703 /*
18704 * Non-volatile purgeable object owned
18705 * by this task: report the first
18706 * "#resident + #compressed" pages as
18707 * "resident" (to show that they
18708 * contribute to the footprint) but not
18709 * "dirty" (to avoid double-counting
18710 * with the fake "non-volatile" region
18711 * we'll report at the end of the
18712 * address space to account for all
18713 * (mapped or not) non-volatile memory
18714 * owned by this task.
18715 */
18716 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18717 }
18718 } else if (!map_entry->is_sub_map &&
18719 (object->purgable == VM_PURGABLE_VOLATILE ||
18720 object->purgable == VM_PURGABLE_EMPTY) &&
18721 VM_OBJECT_OWNER(object) != NULL &&
18722 VM_OBJECT_OWNER(object)->map == map) {
18723 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18724 if ((((curr_s_offset
18725 - map_entry->vme_start
18726 + VME_OFFSET(map_entry))
18727 / effective_page_size) <
18728 object->wired_page_count)) {
18729 /*
18730 * Volatile|empty purgeable object owned
18731 * by this task: report the first
18732 * "#wired" pages as "resident" (to
18733 * show that they contribute to the
18734 * footprint) but not "dirty" (to avoid
18735 * double-counting with the fake
18736 * "non-volatile" region we'll report
18737 * at the end of the address space to
18738 * account for all (mapped or not)
18739 * non-volatile memory owned by this
18740 * task.
18741 */
18742 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18743 }
18744 } else if (!map_entry->is_sub_map &&
18745 map_entry->iokit_acct &&
18746 object->internal &&
18747 object->purgable == VM_PURGABLE_DENY) {
18748 /*
18749 * Non-purgeable IOKit memory: phys_footprint
18750 * includes the entire virtual mapping.
18751 */
18752 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18753 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18754 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18755 } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
18756 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
18757 /* alternate accounting */
18758 #if (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG)
18759 if (map->pmap->footprint_was_suspended) {
18760 /*
18761 * The assertion below can fail if dyld
18762 * suspended footprint accounting
18763 * while doing some adjustments to
18764 * this page; the mapping would say
18765 * "use pmap accounting" but the page
18766 * would be marked "alternate
18767 * accounting".
18768 */
18769 } else
18770 #endif /* (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG) */
18771 {
18772 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18773 }
18774 disposition = 0;
18775 } else {
18776 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
18777 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18778 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18779 disposition |= VM_PAGE_QUERY_PAGE_REF;
18780 if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
18781 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18782 } else {
18783 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
18784 }
18785 if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
18786 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
18787 }
18788 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
18789 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18790 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
18791 }
18792 }
18793
18794 *disposition_p = disposition;
18795 }
18796
18797 kern_return_t
18798 vm_map_page_query_internal(
18799 vm_map_t target_map,
18800 vm_map_offset_t offset,
18801 int *disposition,
18802 int *ref_count)
18803 {
18804 kern_return_t kr;
18805 vm_page_info_basic_data_t info;
18806 mach_msg_type_number_t count;
18807
18808 count = VM_PAGE_INFO_BASIC_COUNT;
18809 kr = vm_map_page_info(target_map,
18810 offset,
18811 VM_PAGE_INFO_BASIC,
18812 (vm_page_info_t) &info,
18813 &count);
18814 if (kr == KERN_SUCCESS) {
18815 *disposition = info.disposition;
18816 *ref_count = info.ref_count;
18817 } else {
18818 *disposition = 0;
18819 *ref_count = 0;
18820 }
18821
18822 return kr;
18823 }
18824
18825 kern_return_t
18826 vm_map_page_info(
18827 vm_map_t map,
18828 vm_map_offset_t offset,
18829 vm_page_info_flavor_t flavor,
18830 vm_page_info_t info,
18831 mach_msg_type_number_t *count)
18832 {
18833 return vm_map_page_range_info_internal(map,
18834 offset, /* start of range */
18835 (offset + 1), /* this will get rounded in the call to the page boundary */
18836 (int)-1, /* effective_page_shift: unspecified */
18837 flavor,
18838 info,
18839 count);
18840 }
18841
18842 kern_return_t
18843 vm_map_page_range_info_internal(
18844 vm_map_t map,
18845 vm_map_offset_t start_offset,
18846 vm_map_offset_t end_offset,
18847 int effective_page_shift,
18848 vm_page_info_flavor_t flavor,
18849 vm_page_info_t info,
18850 mach_msg_type_number_t *count)
18851 {
18852 vm_map_entry_t map_entry = VM_MAP_ENTRY_NULL;
18853 vm_object_t object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
18854 vm_page_t m = VM_PAGE_NULL;
18855 kern_return_t retval = KERN_SUCCESS;
18856 int disposition = 0;
18857 int ref_count = 0;
18858 int depth = 0, info_idx = 0;
18859 vm_page_info_basic_t basic_info = 0;
18860 vm_map_offset_t offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
18861 vm_map_offset_t start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
18862 boolean_t do_region_footprint;
18863 ledger_amount_t ledger_resident, ledger_compressed;
18864 int effective_page_size;
18865 vm_map_offset_t effective_page_mask;
18866
18867 switch (flavor) {
18868 case VM_PAGE_INFO_BASIC:
18869 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
18870 /*
18871 * The "vm_page_info_basic_data" structure was not
18872 * properly padded, so allow the size to be off by
18873 * one to maintain backwards binary compatibility...
18874 */
18875 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
18876 return KERN_INVALID_ARGUMENT;
18877 }
18878 }
18879 break;
18880 default:
18881 return KERN_INVALID_ARGUMENT;
18882 }
18883
18884 if (effective_page_shift == -1) {
18885 effective_page_shift = vm_self_region_page_shift_safely(map);
18886 if (effective_page_shift == -1) {
18887 return KERN_INVALID_ARGUMENT;
18888 }
18889 }
18890 effective_page_size = (1 << effective_page_shift);
18891 effective_page_mask = effective_page_size - 1;
18892
18893 do_region_footprint = task_self_region_footprint();
18894 disposition = 0;
18895 ref_count = 0;
18896 depth = 0;
18897 info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
18898 retval = KERN_SUCCESS;
18899
18900 offset_in_page = start_offset & effective_page_mask;
18901 start = vm_map_trunc_page(start_offset, effective_page_mask);
18902 end = vm_map_round_page(end_offset, effective_page_mask);
18903
18904 if (end < start) {
18905 return KERN_INVALID_ARGUMENT;
18906 }
18907
18908 assert((end - start) <= MAX_PAGE_RANGE_QUERY);
18909
18910 vm_map_lock_read(map);
18911
18912 task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
18913
18914 for (curr_s_offset = start; curr_s_offset < end;) {
18915 /*
18916 * New lookup needs reset of these variables.
18917 */
18918 curr_object = object = VM_OBJECT_NULL;
18919 offset_in_object = 0;
18920 ref_count = 0;
18921 depth = 0;
18922
18923 if (do_region_footprint &&
18924 curr_s_offset >= vm_map_last_entry(map)->vme_end) {
18925 /*
18926 * Request for "footprint" info about a page beyond
18927 * the end of address space: this must be for
18928 * the fake region vm_map_region_recurse_64()
18929 * reported to account for non-volatile purgeable
18930 * memory owned by this task.
18931 */
18932 disposition = 0;
18933
18934 if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
18935 (unsigned) ledger_compressed) {
18936 /*
18937 * We haven't reported all the "non-volatile
18938 * compressed" pages yet, so report this fake
18939 * page as "compressed".
18940 */
18941 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
18942 } else {
18943 /*
18944 * We've reported all the non-volatile
18945 * compressed page but not all the non-volatile
18946 * pages , so report this fake page as
18947 * "resident dirty".
18948 */
18949 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18950 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18951 disposition |= VM_PAGE_QUERY_PAGE_REF;
18952 }
18953 switch (flavor) {
18954 case VM_PAGE_INFO_BASIC:
18955 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
18956 basic_info->disposition = disposition;
18957 basic_info->ref_count = 1;
18958 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
18959 basic_info->offset = 0;
18960 basic_info->depth = 0;
18961
18962 info_idx++;
18963 break;
18964 }
18965 curr_s_offset += effective_page_size;
18966 continue;
18967 }
18968
18969 /*
18970 * First, find the map entry covering "curr_s_offset", going down
18971 * submaps if necessary.
18972 */
18973 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
18974 /* no entry -> no object -> no page */
18975
18976 if (curr_s_offset < vm_map_min(map)) {
18977 /*
18978 * Illegal address that falls below map min.
18979 */
18980 curr_e_offset = MIN(end, vm_map_min(map));
18981 } else if (curr_s_offset >= vm_map_max(map)) {
18982 /*
18983 * Illegal address that falls on/after map max.
18984 */
18985 curr_e_offset = end;
18986 } else if (map_entry == vm_map_to_entry(map)) {
18987 /*
18988 * Hit a hole.
18989 */
18990 if (map_entry->vme_next == vm_map_to_entry(map)) {
18991 /*
18992 * Empty map.
18993 */
18994 curr_e_offset = MIN(map->max_offset, end);
18995 } else {
18996 /*
18997 * Hole at start of the map.
18998 */
18999 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
19000 }
19001 } else {
19002 if (map_entry->vme_next == vm_map_to_entry(map)) {
19003 /*
19004 * Hole at the end of the map.
19005 */
19006 curr_e_offset = MIN(map->max_offset, end);
19007 } else {
19008 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
19009 }
19010 }
19011
19012 assert(curr_e_offset >= curr_s_offset);
19013
19014 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19015
19016 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19017
19018 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19019
19020 curr_s_offset = curr_e_offset;
19021
19022 info_idx += num_pages;
19023
19024 continue;
19025 }
19026
19027 /* compute offset from this map entry's start */
19028 offset_in_object = curr_s_offset - map_entry->vme_start;
19029
19030 /* compute offset into this map entry's object (or submap) */
19031 offset_in_object += VME_OFFSET(map_entry);
19032
19033 if (map_entry->is_sub_map) {
19034 vm_map_t sub_map = VM_MAP_NULL;
19035 vm_page_info_t submap_info = 0;
19036 vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
19037
19038 range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
19039
19040 submap_s_offset = offset_in_object;
19041 submap_e_offset = submap_s_offset + range_len;
19042
19043 sub_map = VME_SUBMAP(map_entry);
19044
19045 vm_map_reference(sub_map);
19046 vm_map_unlock_read(map);
19047
19048 submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19049
19050 assertf(VM_MAP_PAGE_SHIFT(sub_map) >= VM_MAP_PAGE_SHIFT(map),
19051 "Submap page size (%d) differs from current map (%d)\n", VM_MAP_PAGE_SIZE(sub_map), VM_MAP_PAGE_SIZE(map));
19052
19053 retval = vm_map_page_range_info_internal(sub_map,
19054 submap_s_offset,
19055 submap_e_offset,
19056 effective_page_shift,
19057 VM_PAGE_INFO_BASIC,
19058 (vm_page_info_t) submap_info,
19059 count);
19060
19061 assert(retval == KERN_SUCCESS);
19062
19063 vm_map_lock_read(map);
19064 vm_map_deallocate(sub_map);
19065
19066 /* Move the "info" index by the number of pages we inspected.*/
19067 info_idx += range_len >> effective_page_shift;
19068
19069 /* Move our current offset by the size of the range we inspected.*/
19070 curr_s_offset += range_len;
19071
19072 continue;
19073 }
19074
19075 object = VME_OBJECT(map_entry);
19076
19077 if (object == VM_OBJECT_NULL) {
19078 /*
19079 * We don't have an object here and, hence,
19080 * no pages to inspect. We'll fill up the
19081 * info structure appropriately.
19082 */
19083
19084 curr_e_offset = MIN(map_entry->vme_end, end);
19085
19086 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19087
19088 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19089
19090 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19091
19092 curr_s_offset = curr_e_offset;
19093
19094 info_idx += num_pages;
19095
19096 continue;
19097 }
19098
19099 if (do_region_footprint) {
19100 disposition = 0;
19101 if (map->has_corpse_footprint) {
19102 /*
19103 * Query the page info data we saved
19104 * while forking the corpse.
19105 */
19106 vm_map_corpse_footprint_query_page_info(
19107 map,
19108 curr_s_offset,
19109 &disposition);
19110 } else {
19111 /*
19112 * Query the live pmap for footprint info
19113 * about this page.
19114 */
19115 vm_map_footprint_query_page_info(
19116 map,
19117 map_entry,
19118 curr_s_offset,
19119 &disposition);
19120 }
19121 switch (flavor) {
19122 case VM_PAGE_INFO_BASIC:
19123 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19124 basic_info->disposition = disposition;
19125 basic_info->ref_count = 1;
19126 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
19127 basic_info->offset = 0;
19128 basic_info->depth = 0;
19129
19130 info_idx++;
19131 break;
19132 }
19133 curr_s_offset += effective_page_size;
19134 continue;
19135 }
19136
19137 vm_object_reference(object);
19138 /*
19139 * Shared mode -- so we can allow other readers
19140 * to grab the lock too.
19141 */
19142 vm_object_lock_shared(object);
19143
19144 curr_e_offset = MIN(map_entry->vme_end, end);
19145
19146 vm_map_unlock_read(map);
19147
19148 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
19149
19150 curr_object = object;
19151
19152 for (; curr_s_offset < curr_e_offset;) {
19153 if (object == curr_object) {
19154 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
19155 } else {
19156 ref_count = curr_object->ref_count;
19157 }
19158
19159 curr_offset_in_object = offset_in_object;
19160
19161 for (;;) {
19162 m = vm_page_lookup(curr_object, vm_object_trunc_page(curr_offset_in_object));
19163
19164 if (m != VM_PAGE_NULL) {
19165 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19166 break;
19167 } else {
19168 if (curr_object->internal &&
19169 curr_object->alive &&
19170 !curr_object->terminating &&
19171 curr_object->pager_ready) {
19172 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, vm_object_trunc_page(curr_offset_in_object))
19173 == VM_EXTERNAL_STATE_EXISTS) {
19174 /* the pager has that page */
19175 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19176 break;
19177 }
19178 }
19179
19180 /*
19181 * Go down the VM object shadow chain until we find the page
19182 * we're looking for.
19183 */
19184
19185 if (curr_object->shadow != VM_OBJECT_NULL) {
19186 vm_object_t shadow = VM_OBJECT_NULL;
19187
19188 curr_offset_in_object += curr_object->vo_shadow_offset;
19189 shadow = curr_object->shadow;
19190
19191 vm_object_lock_shared(shadow);
19192 vm_object_unlock(curr_object);
19193
19194 curr_object = shadow;
19195 depth++;
19196 continue;
19197 } else {
19198 break;
19199 }
19200 }
19201 }
19202
19203 /* The ref_count is not strictly accurate, it measures the number */
19204 /* of entities holding a ref on the object, they may not be mapping */
19205 /* the object or may not be mapping the section holding the */
19206 /* target page but its still a ball park number and though an over- */
19207 /* count, it picks up the copy-on-write cases */
19208
19209 /* We could also get a picture of page sharing from pmap_attributes */
19210 /* but this would under count as only faulted-in mappings would */
19211 /* show up. */
19212
19213 if ((curr_object == object) && curr_object->shadow) {
19214 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
19215 }
19216
19217 if (!curr_object->internal) {
19218 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
19219 }
19220
19221 if (m != VM_PAGE_NULL) {
19222 if (m->vmp_fictitious) {
19223 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
19224 } else {
19225 if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
19226 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19227 }
19228
19229 if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
19230 disposition |= VM_PAGE_QUERY_PAGE_REF;
19231 }
19232
19233 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
19234 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
19235 }
19236
19237 /*
19238 * XXX TODO4K:
19239 * when this routine deals with 4k
19240 * pages, check the appropriate CS bit
19241 * here.
19242 */
19243 if (m->vmp_cs_validated) {
19244 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
19245 }
19246 if (m->vmp_cs_tainted) {
19247 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
19248 }
19249 if (m->vmp_cs_nx) {
19250 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
19251 }
19252 if (m->vmp_reusable || curr_object->all_reusable) {
19253 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
19254 }
19255 }
19256 }
19257
19258 switch (flavor) {
19259 case VM_PAGE_INFO_BASIC:
19260 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19261 basic_info->disposition = disposition;
19262 basic_info->ref_count = ref_count;
19263 basic_info->object_id = (vm_object_id_t) (uintptr_t)
19264 VM_KERNEL_ADDRPERM(curr_object);
19265 basic_info->offset =
19266 (memory_object_offset_t) curr_offset_in_object + offset_in_page;
19267 basic_info->depth = depth;
19268
19269 info_idx++;
19270 break;
19271 }
19272
19273 disposition = 0;
19274 offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
19275
19276 /*
19277 * Move to next offset in the range and in our object.
19278 */
19279 curr_s_offset += effective_page_size;
19280 offset_in_object += effective_page_size;
19281 curr_offset_in_object = offset_in_object;
19282
19283 if (curr_object != object) {
19284 vm_object_unlock(curr_object);
19285
19286 curr_object = object;
19287
19288 vm_object_lock_shared(curr_object);
19289 } else {
19290 vm_object_lock_yield_shared(curr_object);
19291 }
19292 }
19293
19294 vm_object_unlock(curr_object);
19295 vm_object_deallocate(curr_object);
19296
19297 vm_map_lock_read(map);
19298 }
19299
19300 vm_map_unlock_read(map);
19301 return retval;
19302 }
19303
19304 /*
19305 * vm_map_msync
19306 *
19307 * Synchronises the memory range specified with its backing store
19308 * image by either flushing or cleaning the contents to the appropriate
19309 * memory manager engaging in a memory object synchronize dialog with
19310 * the manager. The client doesn't return until the manager issues
19311 * m_o_s_completed message. MIG Magically converts user task parameter
19312 * to the task's address map.
19313 *
19314 * interpretation of sync_flags
19315 * VM_SYNC_INVALIDATE - discard pages, only return precious
19316 * pages to manager.
19317 *
19318 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
19319 * - discard pages, write dirty or precious
19320 * pages back to memory manager.
19321 *
19322 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
19323 * - write dirty or precious pages back to
19324 * the memory manager.
19325 *
19326 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
19327 * is a hole in the region, and we would
19328 * have returned KERN_SUCCESS, return
19329 * KERN_INVALID_ADDRESS instead.
19330 *
19331 * NOTE
19332 * The memory object attributes have not yet been implemented, this
19333 * function will have to deal with the invalidate attribute
19334 *
19335 * RETURNS
19336 * KERN_INVALID_TASK Bad task parameter
19337 * KERN_INVALID_ARGUMENT both sync and async were specified.
19338 * KERN_SUCCESS The usual.
19339 * KERN_INVALID_ADDRESS There was a hole in the region.
19340 */
19341
19342 kern_return_t
19343 vm_map_msync(
19344 vm_map_t map,
19345 vm_map_address_t address,
19346 vm_map_size_t size,
19347 vm_sync_t sync_flags)
19348 {
19349 vm_map_entry_t entry;
19350 vm_map_size_t amount_left;
19351 vm_object_offset_t offset;
19352 vm_object_offset_t start_offset, end_offset;
19353 boolean_t do_sync_req;
19354 boolean_t had_hole = FALSE;
19355 vm_map_offset_t pmap_offset;
19356
19357 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
19358 (sync_flags & VM_SYNC_SYNCHRONOUS)) {
19359 return KERN_INVALID_ARGUMENT;
19360 }
19361
19362 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19363 DEBUG4K_SHARE("map %p address 0x%llx size 0x%llx flags 0x%x\n", map, (uint64_t)address, (uint64_t)size, sync_flags);
19364 }
19365
19366 /*
19367 * align address and size on page boundaries
19368 */
19369 size = (vm_map_round_page(address + size,
19370 VM_MAP_PAGE_MASK(map)) -
19371 vm_map_trunc_page(address,
19372 VM_MAP_PAGE_MASK(map)));
19373 address = vm_map_trunc_page(address,
19374 VM_MAP_PAGE_MASK(map));
19375
19376 if (map == VM_MAP_NULL) {
19377 return KERN_INVALID_TASK;
19378 }
19379
19380 if (size == 0) {
19381 return KERN_SUCCESS;
19382 }
19383
19384 amount_left = size;
19385
19386 while (amount_left > 0) {
19387 vm_object_size_t flush_size;
19388 vm_object_t object;
19389
19390 vm_map_lock(map);
19391 if (!vm_map_lookup_entry(map,
19392 address,
19393 &entry)) {
19394 vm_map_size_t skip;
19395
19396 /*
19397 * hole in the address map.
19398 */
19399 had_hole = TRUE;
19400
19401 if (sync_flags & VM_SYNC_KILLPAGES) {
19402 /*
19403 * For VM_SYNC_KILLPAGES, there should be
19404 * no holes in the range, since we couldn't
19405 * prevent someone else from allocating in
19406 * that hole and we wouldn't want to "kill"
19407 * their pages.
19408 */
19409 vm_map_unlock(map);
19410 break;
19411 }
19412
19413 /*
19414 * Check for empty map.
19415 */
19416 if (entry == vm_map_to_entry(map) &&
19417 entry->vme_next == entry) {
19418 vm_map_unlock(map);
19419 break;
19420 }
19421 /*
19422 * Check that we don't wrap and that
19423 * we have at least one real map entry.
19424 */
19425 if ((map->hdr.nentries == 0) ||
19426 (entry->vme_next->vme_start < address)) {
19427 vm_map_unlock(map);
19428 break;
19429 }
19430 /*
19431 * Move up to the next entry if needed
19432 */
19433 skip = (entry->vme_next->vme_start - address);
19434 if (skip >= amount_left) {
19435 amount_left = 0;
19436 } else {
19437 amount_left -= skip;
19438 }
19439 address = entry->vme_next->vme_start;
19440 vm_map_unlock(map);
19441 continue;
19442 }
19443
19444 offset = address - entry->vme_start;
19445 pmap_offset = address;
19446
19447 /*
19448 * do we have more to flush than is contained in this
19449 * entry ?
19450 */
19451 if (amount_left + entry->vme_start + offset > entry->vme_end) {
19452 flush_size = entry->vme_end -
19453 (entry->vme_start + offset);
19454 } else {
19455 flush_size = amount_left;
19456 }
19457 amount_left -= flush_size;
19458 address += flush_size;
19459
19460 if (entry->is_sub_map == TRUE) {
19461 vm_map_t local_map;
19462 vm_map_offset_t local_offset;
19463
19464 local_map = VME_SUBMAP(entry);
19465 local_offset = VME_OFFSET(entry);
19466 vm_map_reference(local_map);
19467 vm_map_unlock(map);
19468 if (vm_map_msync(
19469 local_map,
19470 local_offset,
19471 flush_size,
19472 sync_flags) == KERN_INVALID_ADDRESS) {
19473 had_hole = TRUE;
19474 }
19475 vm_map_deallocate(local_map);
19476 continue;
19477 }
19478 object = VME_OBJECT(entry);
19479
19480 /*
19481 * We can't sync this object if the object has not been
19482 * created yet
19483 */
19484 if (object == VM_OBJECT_NULL) {
19485 vm_map_unlock(map);
19486 continue;
19487 }
19488 offset += VME_OFFSET(entry);
19489
19490 vm_object_lock(object);
19491
19492 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
19493 int kill_pages = 0;
19494 boolean_t reusable_pages = FALSE;
19495
19496 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19497 /*
19498 * This is a destructive operation and so we
19499 * err on the side of limiting the range of
19500 * the operation.
19501 */
19502 start_offset = vm_object_round_page(offset);
19503 end_offset = vm_object_trunc_page(offset + flush_size);
19504
19505 if (end_offset <= start_offset) {
19506 vm_object_unlock(object);
19507 vm_map_unlock(map);
19508 continue;
19509 }
19510
19511 pmap_offset += start_offset - offset;;
19512 } else {
19513 start_offset = offset;
19514 end_offset = offset + flush_size;
19515 }
19516
19517 if (sync_flags & VM_SYNC_KILLPAGES) {
19518 if (((object->ref_count == 1) ||
19519 ((object->copy_strategy !=
19520 MEMORY_OBJECT_COPY_SYMMETRIC) &&
19521 (object->copy == VM_OBJECT_NULL))) &&
19522 (object->shadow == VM_OBJECT_NULL)) {
19523 if (object->ref_count != 1) {
19524 vm_page_stats_reusable.free_shared++;
19525 }
19526 kill_pages = 1;
19527 } else {
19528 kill_pages = -1;
19529 }
19530 }
19531 if (kill_pages != -1) {
19532 vm_object_deactivate_pages(
19533 object,
19534 start_offset,
19535 (vm_object_size_t) (end_offset - start_offset),
19536 kill_pages,
19537 reusable_pages,
19538 map->pmap,
19539 pmap_offset);
19540 }
19541 vm_object_unlock(object);
19542 vm_map_unlock(map);
19543 continue;
19544 }
19545 /*
19546 * We can't sync this object if there isn't a pager.
19547 * Don't bother to sync internal objects, since there can't
19548 * be any "permanent" storage for these objects anyway.
19549 */
19550 if ((object->pager == MEMORY_OBJECT_NULL) ||
19551 (object->internal) || (object->private)) {
19552 vm_object_unlock(object);
19553 vm_map_unlock(map);
19554 continue;
19555 }
19556 /*
19557 * keep reference on the object until syncing is done
19558 */
19559 vm_object_reference_locked(object);
19560 vm_object_unlock(object);
19561
19562 vm_map_unlock(map);
19563
19564 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19565 start_offset = vm_object_trunc_page(offset);
19566 end_offset = vm_object_round_page(offset + flush_size);
19567 } else {
19568 start_offset = offset;
19569 end_offset = offset + flush_size;
19570 }
19571
19572 do_sync_req = vm_object_sync(object,
19573 start_offset,
19574 (end_offset - start_offset),
19575 sync_flags & VM_SYNC_INVALIDATE,
19576 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
19577 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
19578 sync_flags & VM_SYNC_SYNCHRONOUS);
19579
19580 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
19581 /*
19582 * clear out the clustering and read-ahead hints
19583 */
19584 vm_object_lock(object);
19585
19586 object->pages_created = 0;
19587 object->pages_used = 0;
19588 object->sequential = 0;
19589 object->last_alloc = 0;
19590
19591 vm_object_unlock(object);
19592 }
19593 vm_object_deallocate(object);
19594 } /* while */
19595
19596 /* for proper msync() behaviour */
19597 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
19598 return KERN_INVALID_ADDRESS;
19599 }
19600
19601 return KERN_SUCCESS;
19602 }/* vm_msync */
19603
19604 kern_return_t
19605 vm_named_entry_from_vm_object(
19606 vm_named_entry_t named_entry,
19607 vm_object_t object,
19608 vm_object_offset_t offset,
19609 vm_object_size_t size,
19610 vm_prot_t prot)
19611 {
19612 vm_map_copy_t copy;
19613 vm_map_entry_t copy_entry;
19614
19615 assert(!named_entry->is_sub_map);
19616 assert(!named_entry->is_copy);
19617 assert(!named_entry->is_object);
19618 assert(!named_entry->internal);
19619 assert(named_entry->backing.copy == VM_MAP_COPY_NULL);
19620
19621 copy = vm_map_copy_allocate();
19622 copy->type = VM_MAP_COPY_ENTRY_LIST;
19623 copy->offset = offset;
19624 copy->size = size;
19625 copy->cpy_hdr.page_shift = PAGE_SHIFT;
19626 vm_map_store_init(&copy->cpy_hdr);
19627
19628 copy_entry = vm_map_copy_entry_create(copy, FALSE);
19629 copy_entry->protection = prot;
19630 copy_entry->max_protection = prot;
19631 copy_entry->use_pmap = TRUE;
19632 copy_entry->vme_start = VM_MAP_TRUNC_PAGE(offset, PAGE_MASK);
19633 copy_entry->vme_end = VM_MAP_ROUND_PAGE(offset + size, PAGE_MASK);
19634 VME_OBJECT_SET(copy_entry, object);
19635 VME_OFFSET_SET(copy_entry, vm_object_trunc_page(offset));
19636 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), copy_entry);
19637
19638 named_entry->backing.copy = copy;
19639 named_entry->is_object = TRUE;
19640 if (object->internal) {
19641 named_entry->internal = TRUE;
19642 }
19643
19644 DEBUG4K_MEMENTRY("named_entry %p copy %p object %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, object, offset, size, prot);
19645
19646 return KERN_SUCCESS;
19647 }
19648
19649 vm_object_t
19650 vm_named_entry_to_vm_object(
19651 vm_named_entry_t named_entry)
19652 {
19653 vm_map_copy_t copy;
19654 vm_map_entry_t copy_entry;
19655 vm_object_t object;
19656
19657 assert(!named_entry->is_sub_map);
19658 assert(!named_entry->is_copy);
19659 assert(named_entry->is_object);
19660 copy = named_entry->backing.copy;
19661 assert(copy != VM_MAP_COPY_NULL);
19662 assert(copy->cpy_hdr.nentries == 1);
19663 copy_entry = vm_map_copy_first_entry(copy);
19664 assert(!copy_entry->is_sub_map);
19665 object = VME_OBJECT(copy_entry);
19666
19667 DEBUG4K_MEMENTRY("%p -> %p -> %p [0x%llx 0x%llx 0x%llx 0x%x/0x%x ] -> %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, copy_entry, (uint64_t)copy_entry->vme_start, (uint64_t)copy_entry->vme_end, copy_entry->vme_offset, copy_entry->protection, copy_entry->max_protection, object, named_entry->offset, named_entry->size, named_entry->protection);
19668
19669 return object;
19670 }
19671
19672 /*
19673 * Routine: convert_port_entry_to_map
19674 * Purpose:
19675 * Convert from a port specifying an entry or a task
19676 * to a map. Doesn't consume the port ref; produces a map ref,
19677 * which may be null. Unlike convert_port_to_map, the
19678 * port may be task or a named entry backed.
19679 * Conditions:
19680 * Nothing locked.
19681 */
19682
19683
19684 vm_map_t
19685 convert_port_entry_to_map(
19686 ipc_port_t port)
19687 {
19688 vm_map_t map;
19689 vm_named_entry_t named_entry;
19690 uint32_t try_failed_count = 0;
19691
19692 if (IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
19693 while (TRUE) {
19694 ip_lock(port);
19695 if (ip_active(port) && (ip_kotype(port)
19696 == IKOT_NAMED_ENTRY)) {
19697 named_entry =
19698 (vm_named_entry_t) ip_get_kobject(port);
19699 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
19700 ip_unlock(port);
19701
19702 try_failed_count++;
19703 mutex_pause(try_failed_count);
19704 continue;
19705 }
19706 named_entry->ref_count++;
19707 lck_mtx_unlock(&(named_entry)->Lock);
19708 ip_unlock(port);
19709 if ((named_entry->is_sub_map) &&
19710 (named_entry->protection
19711 & VM_PROT_WRITE)) {
19712 map = named_entry->backing.map;
19713 if (map->pmap != PMAP_NULL) {
19714 if (map->pmap == kernel_pmap) {
19715 panic("userspace has access "
19716 "to a kernel map %p", map);
19717 }
19718 pmap_require(map->pmap);
19719 }
19720 } else {
19721 mach_destroy_memory_entry(port);
19722 return VM_MAP_NULL;
19723 }
19724 vm_map_reference_swap(map);
19725 mach_destroy_memory_entry(port);
19726 break;
19727 } else {
19728 return VM_MAP_NULL;
19729 }
19730 }
19731 } else {
19732 map = convert_port_to_map(port);
19733 }
19734
19735 return map;
19736 }
19737
19738 /*
19739 * Routine: convert_port_entry_to_object
19740 * Purpose:
19741 * Convert from a port specifying a named entry to an
19742 * object. Doesn't consume the port ref; produces a map ref,
19743 * which may be null.
19744 * Conditions:
19745 * Nothing locked.
19746 */
19747
19748
19749 vm_object_t
19750 convert_port_entry_to_object(
19751 ipc_port_t port)
19752 {
19753 vm_object_t object = VM_OBJECT_NULL;
19754 vm_named_entry_t named_entry;
19755 uint32_t try_failed_count = 0;
19756
19757 if (IP_VALID(port) &&
19758 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
19759 try_again:
19760 ip_lock(port);
19761 if (ip_active(port) &&
19762 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
19763 named_entry = (vm_named_entry_t) ip_get_kobject(port);
19764 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
19765 ip_unlock(port);
19766 try_failed_count++;
19767 mutex_pause(try_failed_count);
19768 goto try_again;
19769 }
19770 named_entry->ref_count++;
19771 lck_mtx_unlock(&(named_entry)->Lock);
19772 ip_unlock(port);
19773 if (!(named_entry->is_sub_map) &&
19774 !(named_entry->is_copy) &&
19775 (named_entry->is_object) &&
19776 (named_entry->protection & VM_PROT_WRITE)) {
19777 vm_map_copy_t copy;
19778 vm_map_entry_t copy_entry;
19779
19780 copy = named_entry->backing.copy;
19781 assert(copy->cpy_hdr.nentries == 1);
19782 copy_entry = vm_map_copy_first_entry(copy);
19783 assert(!copy_entry->is_sub_map);
19784 object = VME_OBJECT(copy_entry);
19785 assert(object != VM_OBJECT_NULL);
19786 vm_object_reference(object);
19787 }
19788 mach_destroy_memory_entry(port);
19789 }
19790 }
19791
19792 return object;
19793 }
19794
19795 /*
19796 * Export routines to other components for the things we access locally through
19797 * macros.
19798 */
19799 #undef current_map
19800 vm_map_t
19801 current_map(void)
19802 {
19803 return current_map_fast();
19804 }
19805
19806 /*
19807 * vm_map_reference:
19808 *
19809 * Most code internal to the osfmk will go through a
19810 * macro defining this. This is always here for the
19811 * use of other kernel components.
19812 */
19813 #undef vm_map_reference
19814 void
19815 vm_map_reference(
19816 vm_map_t map)
19817 {
19818 if (map == VM_MAP_NULL) {
19819 return;
19820 }
19821
19822 lck_mtx_lock(&map->s_lock);
19823 #if TASK_SWAPPER
19824 assert(map->res_count > 0);
19825 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
19826 map->res_count++;
19827 #endif
19828 os_ref_retain_locked(&map->map_refcnt);
19829 lck_mtx_unlock(&map->s_lock);
19830 }
19831
19832 /*
19833 * vm_map_deallocate:
19834 *
19835 * Removes a reference from the specified map,
19836 * destroying it if no references remain.
19837 * The map should not be locked.
19838 */
19839 void
19840 vm_map_deallocate(
19841 vm_map_t map)
19842 {
19843 unsigned int ref;
19844
19845 if (map == VM_MAP_NULL) {
19846 return;
19847 }
19848
19849 lck_mtx_lock(&map->s_lock);
19850 ref = os_ref_release_locked(&map->map_refcnt);
19851 if (ref > 0) {
19852 vm_map_res_deallocate(map);
19853 lck_mtx_unlock(&map->s_lock);
19854 return;
19855 }
19856 assert(os_ref_get_count(&map->map_refcnt) == 0);
19857 lck_mtx_unlock(&map->s_lock);
19858
19859 #if TASK_SWAPPER
19860 /*
19861 * The map residence count isn't decremented here because
19862 * the vm_map_delete below will traverse the entire map,
19863 * deleting entries, and the residence counts on objects
19864 * and sharing maps will go away then.
19865 */
19866 #endif
19867
19868 vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
19869 }
19870
19871 void
19872 vm_map_inspect_deallocate(
19873 vm_map_inspect_t map)
19874 {
19875 vm_map_deallocate((vm_map_t)map);
19876 }
19877
19878 void
19879 vm_map_read_deallocate(
19880 vm_map_read_t map)
19881 {
19882 vm_map_deallocate((vm_map_t)map);
19883 }
19884
19885
19886 void
19887 vm_map_disable_NX(vm_map_t map)
19888 {
19889 if (map == NULL) {
19890 return;
19891 }
19892 if (map->pmap == NULL) {
19893 return;
19894 }
19895
19896 pmap_disable_NX(map->pmap);
19897 }
19898
19899 void
19900 vm_map_disallow_data_exec(vm_map_t map)
19901 {
19902 if (map == NULL) {
19903 return;
19904 }
19905
19906 map->map_disallow_data_exec = TRUE;
19907 }
19908
19909 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
19910 * more descriptive.
19911 */
19912 void
19913 vm_map_set_32bit(vm_map_t map)
19914 {
19915 #if defined(__arm__) || defined(__arm64__)
19916 map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
19917 #else
19918 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
19919 #endif
19920 }
19921
19922
19923 void
19924 vm_map_set_64bit(vm_map_t map)
19925 {
19926 #if defined(__arm__) || defined(__arm64__)
19927 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
19928 #else
19929 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
19930 #endif
19931 }
19932
19933 /*
19934 * Expand the maximum size of an existing map to the maximum supported.
19935 */
19936 void
19937 vm_map_set_jumbo(vm_map_t map)
19938 {
19939 #if defined (__arm64__) && !defined(CONFIG_ARROW)
19940 vm_map_set_max_addr(map, ~0);
19941 #else /* arm64 */
19942 (void) map;
19943 #endif
19944 }
19945
19946 /*
19947 * This map has a JIT entitlement
19948 */
19949 void
19950 vm_map_set_jit_entitled(vm_map_t map)
19951 {
19952 #if defined (__arm64__)
19953 pmap_set_jit_entitled(map->pmap);
19954 #else /* arm64 */
19955 (void) map;
19956 #endif
19957 }
19958
19959 /*
19960 * Expand the maximum size of an existing map.
19961 */
19962 void
19963 vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
19964 {
19965 #if defined(__arm64__)
19966 vm_map_offset_t max_supported_offset = 0;
19967 vm_map_offset_t old_max_offset = map->max_offset;
19968 max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
19969
19970 new_max_offset = trunc_page(new_max_offset);
19971
19972 /* The address space cannot be shrunk using this routine. */
19973 if (old_max_offset >= new_max_offset) {
19974 return;
19975 }
19976
19977 if (max_supported_offset < new_max_offset) {
19978 new_max_offset = max_supported_offset;
19979 }
19980
19981 map->max_offset = new_max_offset;
19982
19983 if (map->holes_list->prev->vme_end == old_max_offset) {
19984 /*
19985 * There is already a hole at the end of the map; simply make it bigger.
19986 */
19987 map->holes_list->prev->vme_end = map->max_offset;
19988 } else {
19989 /*
19990 * There is no hole at the end, so we need to create a new hole
19991 * for the new empty space we're creating.
19992 */
19993 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
19994 new_hole->start = old_max_offset;
19995 new_hole->end = map->max_offset;
19996 new_hole->prev = map->holes_list->prev;
19997 new_hole->next = (struct vm_map_entry *)map->holes_list;
19998 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
19999 map->holes_list->prev = (struct vm_map_entry *)new_hole;
20000 }
20001 #else
20002 (void)map;
20003 (void)new_max_offset;
20004 #endif
20005 }
20006
20007 vm_map_offset_t
20008 vm_compute_max_offset(boolean_t is64)
20009 {
20010 #if defined(__arm__) || defined(__arm64__)
20011 return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
20012 #else
20013 return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
20014 #endif
20015 }
20016
20017 void
20018 vm_map_get_max_aslr_slide_section(
20019 vm_map_t map __unused,
20020 int64_t *max_sections,
20021 int64_t *section_size)
20022 {
20023 #if defined(__arm64__)
20024 *max_sections = 3;
20025 *section_size = ARM_TT_TWIG_SIZE;
20026 #else
20027 *max_sections = 1;
20028 *section_size = 0;
20029 #endif
20030 }
20031
20032 uint64_t
20033 vm_map_get_max_aslr_slide_pages(vm_map_t map)
20034 {
20035 #if defined(__arm64__)
20036 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
20037 * limited embedded address space; this is also meant to minimize pmap
20038 * memory usage on 16KB page systems.
20039 */
20040 return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
20041 #else
20042 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
20043 #endif
20044 }
20045
20046 uint64_t
20047 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
20048 {
20049 #if defined(__arm64__)
20050 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
20051 * of independent entropy on 16KB page systems.
20052 */
20053 return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
20054 #else
20055 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
20056 #endif
20057 }
20058
20059 #ifndef __arm__
20060 boolean_t
20061 vm_map_is_64bit(
20062 vm_map_t map)
20063 {
20064 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
20065 }
20066 #endif
20067
20068 boolean_t
20069 vm_map_has_hard_pagezero(
20070 vm_map_t map,
20071 vm_map_offset_t pagezero_size)
20072 {
20073 /*
20074 * XXX FBDP
20075 * We should lock the VM map (for read) here but we can get away
20076 * with it for now because there can't really be any race condition:
20077 * the VM map's min_offset is changed only when the VM map is created
20078 * and when the zero page is established (when the binary gets loaded),
20079 * and this routine gets called only when the task terminates and the
20080 * VM map is being torn down, and when a new map is created via
20081 * load_machfile()/execve().
20082 */
20083 return map->min_offset >= pagezero_size;
20084 }
20085
20086 /*
20087 * Raise a VM map's maximun offset.
20088 */
20089 kern_return_t
20090 vm_map_raise_max_offset(
20091 vm_map_t map,
20092 vm_map_offset_t new_max_offset)
20093 {
20094 kern_return_t ret;
20095
20096 vm_map_lock(map);
20097 ret = KERN_INVALID_ADDRESS;
20098
20099 if (new_max_offset >= map->max_offset) {
20100 if (!vm_map_is_64bit(map)) {
20101 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
20102 map->max_offset = new_max_offset;
20103 ret = KERN_SUCCESS;
20104 }
20105 } else {
20106 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
20107 map->max_offset = new_max_offset;
20108 ret = KERN_SUCCESS;
20109 }
20110 }
20111 }
20112
20113 vm_map_unlock(map);
20114 return ret;
20115 }
20116
20117
20118 /*
20119 * Raise a VM map's minimum offset.
20120 * To strictly enforce "page zero" reservation.
20121 */
20122 kern_return_t
20123 vm_map_raise_min_offset(
20124 vm_map_t map,
20125 vm_map_offset_t new_min_offset)
20126 {
20127 vm_map_entry_t first_entry;
20128
20129 new_min_offset = vm_map_round_page(new_min_offset,
20130 VM_MAP_PAGE_MASK(map));
20131
20132 vm_map_lock(map);
20133
20134 if (new_min_offset < map->min_offset) {
20135 /*
20136 * Can't move min_offset backwards, as that would expose
20137 * a part of the address space that was previously, and for
20138 * possibly good reasons, inaccessible.
20139 */
20140 vm_map_unlock(map);
20141 return KERN_INVALID_ADDRESS;
20142 }
20143 if (new_min_offset >= map->max_offset) {
20144 /* can't go beyond the end of the address space */
20145 vm_map_unlock(map);
20146 return KERN_INVALID_ADDRESS;
20147 }
20148
20149 first_entry = vm_map_first_entry(map);
20150 if (first_entry != vm_map_to_entry(map) &&
20151 first_entry->vme_start < new_min_offset) {
20152 /*
20153 * Some memory was already allocated below the new
20154 * minimun offset. It's too late to change it now...
20155 */
20156 vm_map_unlock(map);
20157 return KERN_NO_SPACE;
20158 }
20159
20160 map->min_offset = new_min_offset;
20161
20162 assert(map->holes_list);
20163 map->holes_list->start = new_min_offset;
20164 assert(new_min_offset < map->holes_list->end);
20165
20166 vm_map_unlock(map);
20167
20168 return KERN_SUCCESS;
20169 }
20170
20171 /*
20172 * Set the limit on the maximum amount of user wired memory allowed for this map.
20173 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
20174 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
20175 * don't have to reach over to the BSD data structures.
20176 */
20177
20178 void
20179 vm_map_set_user_wire_limit(vm_map_t map,
20180 vm_size_t limit)
20181 {
20182 map->user_wire_limit = limit;
20183 }
20184
20185
20186 void
20187 vm_map_switch_protect(vm_map_t map,
20188 boolean_t val)
20189 {
20190 vm_map_lock(map);
20191 map->switch_protect = val;
20192 vm_map_unlock(map);
20193 }
20194
20195 extern int cs_process_enforcement_enable;
20196 boolean_t
20197 vm_map_cs_enforcement(
20198 vm_map_t map)
20199 {
20200 if (cs_process_enforcement_enable) {
20201 return TRUE;
20202 }
20203 return map->cs_enforcement;
20204 }
20205
20206 void
20207 vm_map_cs_enforcement_set(
20208 vm_map_t map,
20209 boolean_t val)
20210 {
20211 vm_map_lock(map);
20212 map->cs_enforcement = val;
20213 pmap_set_vm_map_cs_enforced(map->pmap, val);
20214 vm_map_unlock(map);
20215 }
20216
20217 /*
20218 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
20219 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
20220 * bump both counters.
20221 */
20222 void
20223 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
20224 {
20225 pmap_t pmap = vm_map_pmap(map);
20226
20227 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20228 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20229 }
20230
20231 void
20232 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
20233 {
20234 pmap_t pmap = vm_map_pmap(map);
20235
20236 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20237 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20238 }
20239
20240 /* Add (generate) code signature for memory range */
20241 #if CONFIG_DYNAMIC_CODE_SIGNING
20242 kern_return_t
20243 vm_map_sign(vm_map_t map,
20244 vm_map_offset_t start,
20245 vm_map_offset_t end)
20246 {
20247 vm_map_entry_t entry;
20248 vm_page_t m;
20249 vm_object_t object;
20250
20251 /*
20252 * Vet all the input parameters and current type and state of the
20253 * underlaying object. Return with an error if anything is amiss.
20254 */
20255 if (map == VM_MAP_NULL) {
20256 return KERN_INVALID_ARGUMENT;
20257 }
20258
20259 vm_map_lock_read(map);
20260
20261 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
20262 /*
20263 * Must pass a valid non-submap address.
20264 */
20265 vm_map_unlock_read(map);
20266 return KERN_INVALID_ADDRESS;
20267 }
20268
20269 if ((entry->vme_start > start) || (entry->vme_end < end)) {
20270 /*
20271 * Map entry doesn't cover the requested range. Not handling
20272 * this situation currently.
20273 */
20274 vm_map_unlock_read(map);
20275 return KERN_INVALID_ARGUMENT;
20276 }
20277
20278 object = VME_OBJECT(entry);
20279 if (object == VM_OBJECT_NULL) {
20280 /*
20281 * Object must already be present or we can't sign.
20282 */
20283 vm_map_unlock_read(map);
20284 return KERN_INVALID_ARGUMENT;
20285 }
20286
20287 vm_object_lock(object);
20288 vm_map_unlock_read(map);
20289
20290 while (start < end) {
20291 uint32_t refmod;
20292
20293 m = vm_page_lookup(object,
20294 start - entry->vme_start + VME_OFFSET(entry));
20295 if (m == VM_PAGE_NULL) {
20296 /* shoud we try to fault a page here? we can probably
20297 * demand it exists and is locked for this request */
20298 vm_object_unlock(object);
20299 return KERN_FAILURE;
20300 }
20301 /* deal with special page status */
20302 if (m->vmp_busy ||
20303 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
20304 vm_object_unlock(object);
20305 return KERN_FAILURE;
20306 }
20307
20308 /* Page is OK... now "validate" it */
20309 /* This is the place where we'll call out to create a code
20310 * directory, later */
20311 /* XXX TODO4K: deal with 4k subpages individually? */
20312 m->vmp_cs_validated = VMP_CS_ALL_TRUE;
20313
20314 /* The page is now "clean" for codesigning purposes. That means
20315 * we don't consider it as modified (wpmapped) anymore. But
20316 * we'll disconnect the page so we note any future modification
20317 * attempts. */
20318 m->vmp_wpmapped = FALSE;
20319 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
20320
20321 /* Pull the dirty status from the pmap, since we cleared the
20322 * wpmapped bit */
20323 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
20324 SET_PAGE_DIRTY(m, FALSE);
20325 }
20326
20327 /* On to the next page */
20328 start += PAGE_SIZE;
20329 }
20330 vm_object_unlock(object);
20331
20332 return KERN_SUCCESS;
20333 }
20334 #endif
20335
20336 kern_return_t
20337 vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
20338 {
20339 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
20340 vm_map_entry_t next_entry;
20341 kern_return_t kr = KERN_SUCCESS;
20342 vm_map_t zap_map;
20343
20344 vm_map_lock(map);
20345
20346 /*
20347 * We use a "zap_map" to avoid having to unlock
20348 * the "map" in vm_map_delete().
20349 */
20350 zap_map = vm_map_create(PMAP_NULL,
20351 map->min_offset,
20352 map->max_offset,
20353 map->hdr.entries_pageable);
20354
20355 if (zap_map == VM_MAP_NULL) {
20356 return KERN_RESOURCE_SHORTAGE;
20357 }
20358
20359 vm_map_set_page_shift(zap_map,
20360 VM_MAP_PAGE_SHIFT(map));
20361 vm_map_disable_hole_optimization(zap_map);
20362
20363 for (entry = vm_map_first_entry(map);
20364 entry != vm_map_to_entry(map);
20365 entry = next_entry) {
20366 next_entry = entry->vme_next;
20367
20368 if (VME_OBJECT(entry) &&
20369 !entry->is_sub_map &&
20370 (VME_OBJECT(entry)->internal == TRUE) &&
20371 (VME_OBJECT(entry)->ref_count == 1)) {
20372 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
20373 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
20374
20375 (void)vm_map_delete(map,
20376 entry->vme_start,
20377 entry->vme_end,
20378 VM_MAP_REMOVE_SAVE_ENTRIES,
20379 zap_map);
20380 }
20381 }
20382
20383 vm_map_unlock(map);
20384
20385 /*
20386 * Get rid of the "zap_maps" and all the map entries that
20387 * they may still contain.
20388 */
20389 if (zap_map != VM_MAP_NULL) {
20390 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
20391 zap_map = VM_MAP_NULL;
20392 }
20393
20394 return kr;
20395 }
20396
20397
20398 #if DEVELOPMENT || DEBUG
20399
20400 int
20401 vm_map_disconnect_page_mappings(
20402 vm_map_t map,
20403 boolean_t do_unnest)
20404 {
20405 vm_map_entry_t entry;
20406 int page_count = 0;
20407
20408 if (do_unnest == TRUE) {
20409 #ifndef NO_NESTED_PMAP
20410 vm_map_lock(map);
20411
20412 for (entry = vm_map_first_entry(map);
20413 entry != vm_map_to_entry(map);
20414 entry = entry->vme_next) {
20415 if (entry->is_sub_map && entry->use_pmap) {
20416 /*
20417 * Make sure the range between the start of this entry and
20418 * the end of this entry is no longer nested, so that
20419 * we will only remove mappings from the pmap in use by this
20420 * this task
20421 */
20422 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
20423 }
20424 }
20425 vm_map_unlock(map);
20426 #endif
20427 }
20428 vm_map_lock_read(map);
20429
20430 page_count = map->pmap->stats.resident_count;
20431
20432 for (entry = vm_map_first_entry(map);
20433 entry != vm_map_to_entry(map);
20434 entry = entry->vme_next) {
20435 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
20436 (VME_OBJECT(entry)->phys_contiguous))) {
20437 continue;
20438 }
20439 if (entry->is_sub_map) {
20440 assert(!entry->use_pmap);
20441 }
20442
20443 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
20444 }
20445 vm_map_unlock_read(map);
20446
20447 return page_count;
20448 }
20449
20450 kern_return_t
20451 vm_map_inject_error(vm_map_t map, vm_map_offset_t vaddr)
20452 {
20453 vm_object_t object = NULL;
20454 vm_object_offset_t offset;
20455 vm_prot_t prot;
20456 boolean_t wired;
20457 vm_map_version_t version;
20458 vm_map_t real_map;
20459 int result = KERN_FAILURE;
20460
20461 vaddr = vm_map_trunc_page(vaddr, PAGE_MASK);
20462 vm_map_lock(map);
20463
20464 result = vm_map_lookup_locked(&map, vaddr, VM_PROT_READ,
20465 OBJECT_LOCK_EXCLUSIVE, &version, &object, &offset, &prot, &wired,
20466 NULL, &real_map, NULL);
20467 if (object == NULL) {
20468 result = KERN_MEMORY_ERROR;
20469 } else if (object->pager) {
20470 result = vm_compressor_pager_inject_error(object->pager,
20471 offset);
20472 } else {
20473 result = KERN_MEMORY_PRESENT;
20474 }
20475
20476 if (object != NULL) {
20477 vm_object_unlock(object);
20478 }
20479
20480 if (real_map != map) {
20481 vm_map_unlock(real_map);
20482 }
20483 vm_map_unlock(map);
20484
20485 return result;
20486 }
20487
20488 #endif
20489
20490
20491 #if CONFIG_FREEZE
20492
20493
20494 extern struct freezer_context freezer_context_global;
20495 AbsoluteTime c_freezer_last_yield_ts = 0;
20496
20497 extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
20498 extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
20499
20500 kern_return_t
20501 vm_map_freeze(
20502 task_t task,
20503 unsigned int *purgeable_count,
20504 unsigned int *wired_count,
20505 unsigned int *clean_count,
20506 unsigned int *dirty_count,
20507 unsigned int dirty_budget,
20508 unsigned int *shared_count,
20509 int *freezer_error_code,
20510 boolean_t eval_only)
20511 {
20512 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
20513 kern_return_t kr = KERN_SUCCESS;
20514 boolean_t evaluation_phase = TRUE;
20515 vm_object_t cur_shared_object = NULL;
20516 int cur_shared_obj_ref_cnt = 0;
20517 unsigned int dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
20518
20519 *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
20520
20521 /*
20522 * We need the exclusive lock here so that we can
20523 * block any page faults or lookups while we are
20524 * in the middle of freezing this vm map.
20525 */
20526 vm_map_t map = task->map;
20527
20528 vm_map_lock(map);
20529
20530 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
20531
20532 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20533 if (vm_compressor_low_on_space()) {
20534 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20535 }
20536
20537 if (vm_swap_low_on_space()) {
20538 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20539 }
20540
20541 kr = KERN_NO_SPACE;
20542 goto done;
20543 }
20544
20545 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
20546 /*
20547 * In-memory compressor backing the freezer. No disk.
20548 * So no need to do the evaluation phase.
20549 */
20550 evaluation_phase = FALSE;
20551
20552 if (eval_only == TRUE) {
20553 /*
20554 * We don't support 'eval_only' mode
20555 * in this non-swap config.
20556 */
20557 *freezer_error_code = FREEZER_ERROR_GENERIC;
20558 kr = KERN_INVALID_ARGUMENT;
20559 goto done;
20560 }
20561
20562 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
20563 clock_get_uptime(&c_freezer_last_yield_ts);
20564 }
20565 again:
20566
20567 for (entry2 = vm_map_first_entry(map);
20568 entry2 != vm_map_to_entry(map);
20569 entry2 = entry2->vme_next) {
20570 vm_object_t src_object = VME_OBJECT(entry2);
20571
20572 if (src_object &&
20573 !entry2->is_sub_map &&
20574 !src_object->phys_contiguous) {
20575 /* If eligible, scan the entry, moving eligible pages over to our parent object */
20576
20577 if (src_object->internal == TRUE) {
20578 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
20579 /*
20580 * We skip purgeable objects during evaluation phase only.
20581 * If we decide to freeze this process, we'll explicitly
20582 * purge these objects before we go around again with
20583 * 'evaluation_phase' set to FALSE.
20584 */
20585
20586 if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
20587 /*
20588 * We want to purge objects that may not belong to this task but are mapped
20589 * in this task alone. Since we already purged this task's purgeable memory
20590 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
20591 * on this task's purgeable objects. Hence the check for only volatile objects.
20592 */
20593 if (evaluation_phase == FALSE &&
20594 (src_object->purgable == VM_PURGABLE_VOLATILE) &&
20595 (src_object->ref_count == 1)) {
20596 vm_object_lock(src_object);
20597 vm_object_purge(src_object, 0);
20598 vm_object_unlock(src_object);
20599 }
20600 continue;
20601 }
20602
20603 /*
20604 * Pages belonging to this object could be swapped to disk.
20605 * Make sure it's not a shared object because we could end
20606 * up just bringing it back in again.
20607 *
20608 * We try to optimize somewhat by checking for objects that are mapped
20609 * more than once within our own map. But we don't do full searches,
20610 * we just look at the entries following our current entry.
20611 */
20612
20613 if (src_object->ref_count > 1) {
20614 if (src_object != cur_shared_object) {
20615 obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20616 dirty_shared_count += obj_pages_snapshot;
20617
20618 cur_shared_object = src_object;
20619 cur_shared_obj_ref_cnt = 1;
20620 continue;
20621 } else {
20622 cur_shared_obj_ref_cnt++;
20623 if (src_object->ref_count == cur_shared_obj_ref_cnt) {
20624 /*
20625 * Fall through to below and treat this object as private.
20626 * So deduct its pages from our shared total and add it to the
20627 * private total.
20628 */
20629
20630 dirty_shared_count -= obj_pages_snapshot;
20631 dirty_private_count += obj_pages_snapshot;
20632 } else {
20633 continue;
20634 }
20635 }
20636 }
20637
20638
20639 if (src_object->ref_count == 1) {
20640 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20641 }
20642
20643 if (evaluation_phase == TRUE) {
20644 continue;
20645 }
20646 }
20647
20648 uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
20649 *wired_count += src_object->wired_page_count;
20650
20651 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20652 if (vm_compressor_low_on_space()) {
20653 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20654 }
20655
20656 if (vm_swap_low_on_space()) {
20657 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20658 }
20659
20660 kr = KERN_NO_SPACE;
20661 break;
20662 }
20663 if (paged_out_count >= dirty_budget) {
20664 break;
20665 }
20666 dirty_budget -= paged_out_count;
20667 }
20668 }
20669 }
20670
20671 *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
20672 if (evaluation_phase) {
20673 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
20674
20675 if (dirty_shared_count > shared_pages_threshold) {
20676 *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
20677 kr = KERN_FAILURE;
20678 goto done;
20679 }
20680
20681 if (dirty_shared_count &&
20682 ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
20683 *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
20684 kr = KERN_FAILURE;
20685 goto done;
20686 }
20687
20688 evaluation_phase = FALSE;
20689 dirty_shared_count = dirty_private_count = 0;
20690
20691 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
20692 clock_get_uptime(&c_freezer_last_yield_ts);
20693
20694 if (eval_only) {
20695 kr = KERN_SUCCESS;
20696 goto done;
20697 }
20698
20699 vm_purgeable_purge_task_owned(task);
20700
20701 goto again;
20702 } else {
20703 kr = KERN_SUCCESS;
20704 }
20705
20706 done:
20707 vm_map_unlock(map);
20708
20709 if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
20710 vm_object_compressed_freezer_done();
20711 }
20712 return kr;
20713 }
20714
20715 #endif
20716
20717 /*
20718 * vm_map_entry_should_cow_for_true_share:
20719 *
20720 * Determines if the map entry should be clipped and setup for copy-on-write
20721 * to avoid applying "true_share" to a large VM object when only a subset is
20722 * targeted.
20723 *
20724 * For now, we target only the map entries created for the Objective C
20725 * Garbage Collector, which initially have the following properties:
20726 * - alias == VM_MEMORY_MALLOC
20727 * - wired_count == 0
20728 * - !needs_copy
20729 * and a VM object with:
20730 * - internal
20731 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
20732 * - !true_share
20733 * - vo_size == ANON_CHUNK_SIZE
20734 *
20735 * Only non-kernel map entries.
20736 */
20737 boolean_t
20738 vm_map_entry_should_cow_for_true_share(
20739 vm_map_entry_t entry)
20740 {
20741 vm_object_t object;
20742
20743 if (entry->is_sub_map) {
20744 /* entry does not point at a VM object */
20745 return FALSE;
20746 }
20747
20748 if (entry->needs_copy) {
20749 /* already set for copy_on_write: done! */
20750 return FALSE;
20751 }
20752
20753 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
20754 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
20755 /* not a malloc heap or Obj-C Garbage Collector heap */
20756 return FALSE;
20757 }
20758
20759 if (entry->wired_count) {
20760 /* wired: can't change the map entry... */
20761 vm_counters.should_cow_but_wired++;
20762 return FALSE;
20763 }
20764
20765 object = VME_OBJECT(entry);
20766
20767 if (object == VM_OBJECT_NULL) {
20768 /* no object yet... */
20769 return FALSE;
20770 }
20771
20772 if (!object->internal) {
20773 /* not an internal object */
20774 return FALSE;
20775 }
20776
20777 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
20778 /* not the default copy strategy */
20779 return FALSE;
20780 }
20781
20782 if (object->true_share) {
20783 /* already true_share: too late to avoid it */
20784 return FALSE;
20785 }
20786
20787 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
20788 object->vo_size != ANON_CHUNK_SIZE) {
20789 /* ... not an object created for the ObjC Garbage Collector */
20790 return FALSE;
20791 }
20792
20793 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
20794 object->vo_size != 2048 * 4096) {
20795 /* ... not a "MALLOC_SMALL" heap */
20796 return FALSE;
20797 }
20798
20799 /*
20800 * All the criteria match: we have a large object being targeted for "true_share".
20801 * To limit the adverse side-effects linked with "true_share", tell the caller to
20802 * try and avoid setting up the entire object for "true_share" by clipping the
20803 * targeted range and setting it up for copy-on-write.
20804 */
20805 return TRUE;
20806 }
20807
20808 vm_map_offset_t
20809 vm_map_round_page_mask(
20810 vm_map_offset_t offset,
20811 vm_map_offset_t mask)
20812 {
20813 return VM_MAP_ROUND_PAGE(offset, mask);
20814 }
20815
20816 vm_map_offset_t
20817 vm_map_trunc_page_mask(
20818 vm_map_offset_t offset,
20819 vm_map_offset_t mask)
20820 {
20821 return VM_MAP_TRUNC_PAGE(offset, mask);
20822 }
20823
20824 boolean_t
20825 vm_map_page_aligned(
20826 vm_map_offset_t offset,
20827 vm_map_offset_t mask)
20828 {
20829 return ((offset) & mask) == 0;
20830 }
20831
20832 int
20833 vm_map_page_shift(
20834 vm_map_t map)
20835 {
20836 return VM_MAP_PAGE_SHIFT(map);
20837 }
20838
20839 int
20840 vm_map_page_size(
20841 vm_map_t map)
20842 {
20843 return VM_MAP_PAGE_SIZE(map);
20844 }
20845
20846 vm_map_offset_t
20847 vm_map_page_mask(
20848 vm_map_t map)
20849 {
20850 return VM_MAP_PAGE_MASK(map);
20851 }
20852
20853 kern_return_t
20854 vm_map_set_page_shift(
20855 vm_map_t map,
20856 int pageshift)
20857 {
20858 if (map->hdr.nentries != 0) {
20859 /* too late to change page size */
20860 return KERN_FAILURE;
20861 }
20862
20863 map->hdr.page_shift = pageshift;
20864
20865 return KERN_SUCCESS;
20866 }
20867
20868 kern_return_t
20869 vm_map_query_volatile(
20870 vm_map_t map,
20871 mach_vm_size_t *volatile_virtual_size_p,
20872 mach_vm_size_t *volatile_resident_size_p,
20873 mach_vm_size_t *volatile_compressed_size_p,
20874 mach_vm_size_t *volatile_pmap_size_p,
20875 mach_vm_size_t *volatile_compressed_pmap_size_p)
20876 {
20877 mach_vm_size_t volatile_virtual_size;
20878 mach_vm_size_t volatile_resident_count;
20879 mach_vm_size_t volatile_compressed_count;
20880 mach_vm_size_t volatile_pmap_count;
20881 mach_vm_size_t volatile_compressed_pmap_count;
20882 mach_vm_size_t resident_count;
20883 vm_map_entry_t entry;
20884 vm_object_t object;
20885
20886 /* map should be locked by caller */
20887
20888 volatile_virtual_size = 0;
20889 volatile_resident_count = 0;
20890 volatile_compressed_count = 0;
20891 volatile_pmap_count = 0;
20892 volatile_compressed_pmap_count = 0;
20893
20894 for (entry = vm_map_first_entry(map);
20895 entry != vm_map_to_entry(map);
20896 entry = entry->vme_next) {
20897 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
20898
20899 if (entry->is_sub_map) {
20900 continue;
20901 }
20902 if (!(entry->protection & VM_PROT_WRITE)) {
20903 continue;
20904 }
20905 object = VME_OBJECT(entry);
20906 if (object == VM_OBJECT_NULL) {
20907 continue;
20908 }
20909 if (object->purgable != VM_PURGABLE_VOLATILE &&
20910 object->purgable != VM_PURGABLE_EMPTY) {
20911 continue;
20912 }
20913 if (VME_OFFSET(entry)) {
20914 /*
20915 * If the map entry has been split and the object now
20916 * appears several times in the VM map, we don't want
20917 * to count the object's resident_page_count more than
20918 * once. We count it only for the first one, starting
20919 * at offset 0 and ignore the other VM map entries.
20920 */
20921 continue;
20922 }
20923 resident_count = object->resident_page_count;
20924 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
20925 resident_count = 0;
20926 } else {
20927 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
20928 }
20929
20930 volatile_virtual_size += entry->vme_end - entry->vme_start;
20931 volatile_resident_count += resident_count;
20932 if (object->pager) {
20933 volatile_compressed_count +=
20934 vm_compressor_pager_get_count(object->pager);
20935 }
20936 pmap_compressed_bytes = 0;
20937 pmap_resident_bytes =
20938 pmap_query_resident(map->pmap,
20939 entry->vme_start,
20940 entry->vme_end,
20941 &pmap_compressed_bytes);
20942 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
20943 volatile_compressed_pmap_count += (pmap_compressed_bytes
20944 / PAGE_SIZE);
20945 }
20946
20947 /* map is still locked on return */
20948
20949 *volatile_virtual_size_p = volatile_virtual_size;
20950 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
20951 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
20952 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
20953 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
20954
20955 return KERN_SUCCESS;
20956 }
20957
20958 void
20959 vm_map_sizes(vm_map_t map,
20960 vm_map_size_t * psize,
20961 vm_map_size_t * pfree,
20962 vm_map_size_t * plargest_free)
20963 {
20964 vm_map_entry_t entry;
20965 vm_map_offset_t prev;
20966 vm_map_size_t free, total_free, largest_free;
20967 boolean_t end;
20968
20969 if (!map) {
20970 *psize = *pfree = *plargest_free = 0;
20971 return;
20972 }
20973 total_free = largest_free = 0;
20974
20975 vm_map_lock_read(map);
20976 if (psize) {
20977 *psize = map->max_offset - map->min_offset;
20978 }
20979
20980 prev = map->min_offset;
20981 for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
20982 end = (entry == vm_map_to_entry(map));
20983
20984 if (end) {
20985 free = entry->vme_end - prev;
20986 } else {
20987 free = entry->vme_start - prev;
20988 }
20989
20990 total_free += free;
20991 if (free > largest_free) {
20992 largest_free = free;
20993 }
20994
20995 if (end) {
20996 break;
20997 }
20998 prev = entry->vme_end;
20999 }
21000 vm_map_unlock_read(map);
21001 if (pfree) {
21002 *pfree = total_free;
21003 }
21004 if (plargest_free) {
21005 *plargest_free = largest_free;
21006 }
21007 }
21008
21009 #if VM_SCAN_FOR_SHADOW_CHAIN
21010 int vm_map_shadow_max(vm_map_t map);
21011 int
21012 vm_map_shadow_max(
21013 vm_map_t map)
21014 {
21015 int shadows, shadows_max;
21016 vm_map_entry_t entry;
21017 vm_object_t object, next_object;
21018
21019 if (map == NULL) {
21020 return 0;
21021 }
21022
21023 shadows_max = 0;
21024
21025 vm_map_lock_read(map);
21026
21027 for (entry = vm_map_first_entry(map);
21028 entry != vm_map_to_entry(map);
21029 entry = entry->vme_next) {
21030 if (entry->is_sub_map) {
21031 continue;
21032 }
21033 object = VME_OBJECT(entry);
21034 if (object == NULL) {
21035 continue;
21036 }
21037 vm_object_lock_shared(object);
21038 for (shadows = 0;
21039 object->shadow != NULL;
21040 shadows++, object = next_object) {
21041 next_object = object->shadow;
21042 vm_object_lock_shared(next_object);
21043 vm_object_unlock(object);
21044 }
21045 vm_object_unlock(object);
21046 if (shadows > shadows_max) {
21047 shadows_max = shadows;
21048 }
21049 }
21050
21051 vm_map_unlock_read(map);
21052
21053 return shadows_max;
21054 }
21055 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
21056
21057 void
21058 vm_commit_pagezero_status(vm_map_t lmap)
21059 {
21060 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
21061 }
21062
21063 #if XNU_TARGET_OS_OSX
21064 void
21065 vm_map_set_high_start(
21066 vm_map_t map,
21067 vm_map_offset_t high_start)
21068 {
21069 map->vmmap_high_start = high_start;
21070 }
21071 #endif /* XNU_TARGET_OS_OSX */
21072
21073 #if PMAP_CS
21074 kern_return_t
21075 vm_map_entry_cs_associate(
21076 vm_map_t map,
21077 vm_map_entry_t entry,
21078 vm_map_kernel_flags_t vmk_flags)
21079 {
21080 vm_object_t cs_object, cs_shadow;
21081 vm_object_offset_t cs_offset;
21082 void *cs_blobs;
21083 struct vnode *cs_vnode;
21084 kern_return_t cs_ret;
21085
21086 if (map->pmap == NULL ||
21087 entry->is_sub_map || /* XXX FBDP: recurse on sub-range? */
21088 pmap_cs_exempt(map->pmap) ||
21089 VME_OBJECT(entry) == VM_OBJECT_NULL ||
21090 !(entry->protection & VM_PROT_EXECUTE)) {
21091 return KERN_SUCCESS;
21092 }
21093
21094 vm_map_lock_assert_exclusive(map);
21095
21096 if (entry->used_for_jit) {
21097 cs_ret = pmap_cs_associate(map->pmap,
21098 PMAP_CS_ASSOCIATE_JIT,
21099 entry->vme_start,
21100 entry->vme_end - entry->vme_start,
21101 0);
21102 goto done;
21103 }
21104
21105 if (vmk_flags.vmkf_remap_prot_copy) {
21106 cs_ret = pmap_cs_associate(map->pmap,
21107 PMAP_CS_ASSOCIATE_COW,
21108 entry->vme_start,
21109 entry->vme_end - entry->vme_start,
21110 0);
21111 goto done;
21112 }
21113
21114 vm_object_lock_shared(VME_OBJECT(entry));
21115 cs_offset = VME_OFFSET(entry);
21116 for (cs_object = VME_OBJECT(entry);
21117 (cs_object != VM_OBJECT_NULL &&
21118 !cs_object->code_signed);
21119 cs_object = cs_shadow) {
21120 cs_shadow = cs_object->shadow;
21121 if (cs_shadow != VM_OBJECT_NULL) {
21122 cs_offset += cs_object->vo_shadow_offset;
21123 vm_object_lock_shared(cs_shadow);
21124 }
21125 vm_object_unlock(cs_object);
21126 }
21127 if (cs_object == VM_OBJECT_NULL) {
21128 return KERN_SUCCESS;
21129 }
21130
21131 cs_offset += cs_object->paging_offset;
21132 cs_vnode = vnode_pager_lookup_vnode(cs_object->pager);
21133 cs_ret = vnode_pager_get_cs_blobs(cs_vnode,
21134 &cs_blobs);
21135 assert(cs_ret == KERN_SUCCESS);
21136 cs_ret = cs_associate_blob_with_mapping(map->pmap,
21137 entry->vme_start,
21138 (entry->vme_end -
21139 entry->vme_start),
21140 cs_offset,
21141 cs_blobs);
21142 vm_object_unlock(cs_object);
21143 cs_object = VM_OBJECT_NULL;
21144
21145 done:
21146 if (cs_ret == KERN_SUCCESS) {
21147 DTRACE_VM2(vm_map_entry_cs_associate_success,
21148 vm_map_offset_t, entry->vme_start,
21149 vm_map_offset_t, entry->vme_end);
21150 if (vm_map_executable_immutable) {
21151 /*
21152 * Prevent this executable
21153 * mapping from being unmapped
21154 * or modified.
21155 */
21156 entry->permanent = TRUE;
21157 }
21158 /*
21159 * pmap says it will validate the
21160 * code-signing validity of pages
21161 * faulted in via this mapping, so
21162 * this map entry should be marked so
21163 * that vm_fault() bypasses code-signing
21164 * validation for faults coming through
21165 * this mapping.
21166 */
21167 entry->pmap_cs_associated = TRUE;
21168 } else if (cs_ret == KERN_NOT_SUPPORTED) {
21169 /*
21170 * pmap won't check the code-signing
21171 * validity of pages faulted in via
21172 * this mapping, so VM should keep
21173 * doing it.
21174 */
21175 DTRACE_VM3(vm_map_entry_cs_associate_off,
21176 vm_map_offset_t, entry->vme_start,
21177 vm_map_offset_t, entry->vme_end,
21178 int, cs_ret);
21179 } else {
21180 /*
21181 * A real error: do not allow
21182 * execution in this mapping.
21183 */
21184 DTRACE_VM3(vm_map_entry_cs_associate_failure,
21185 vm_map_offset_t, entry->vme_start,
21186 vm_map_offset_t, entry->vme_end,
21187 int, cs_ret);
21188 entry->protection &= ~VM_PROT_EXECUTE;
21189 entry->max_protection &= ~VM_PROT_EXECUTE;
21190 }
21191
21192 return cs_ret;
21193 }
21194 #endif /* PMAP_CS */
21195
21196 /*
21197 * FORKED CORPSE FOOTPRINT
21198 *
21199 * A forked corpse gets a copy of the original VM map but its pmap is mostly
21200 * empty since it never ran and never got to fault in any pages.
21201 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
21202 * a forked corpse would therefore return very little information.
21203 *
21204 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
21205 * to vm_map_fork() to collect footprint information from the original VM map
21206 * and its pmap, and store it in the forked corpse's VM map. That information
21207 * is stored in place of the VM map's "hole list" since we'll never need to
21208 * lookup for holes in the corpse's map.
21209 *
21210 * The corpse's footprint info looks like this:
21211 *
21212 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
21213 * as follows:
21214 * +---------------------------------------+
21215 * header-> | cf_size |
21216 * +-------------------+-------------------+
21217 * | cf_last_region | cf_last_zeroes |
21218 * +-------------------+-------------------+
21219 * region1-> | cfr_vaddr |
21220 * +-------------------+-------------------+
21221 * | cfr_num_pages | d0 | d1 | d2 | d3 |
21222 * +---------------------------------------+
21223 * | d4 | d5 | ... |
21224 * +---------------------------------------+
21225 * | ... |
21226 * +-------------------+-------------------+
21227 * | dy | dz | na | na | cfr_vaddr... | <-region2
21228 * +-------------------+-------------------+
21229 * | cfr_vaddr (ctd) | cfr_num_pages |
21230 * +---------------------------------------+
21231 * | d0 | d1 ... |
21232 * +---------------------------------------+
21233 * ...
21234 * +---------------------------------------+
21235 * last region-> | cfr_vaddr |
21236 * +---------------------------------------+
21237 * + cfr_num_pages | d0 | d1 | d2 | d3 |
21238 * +---------------------------------------+
21239 * ...
21240 * +---------------------------------------+
21241 * | dx | dy | dz | na | na | na | na | na |
21242 * +---------------------------------------+
21243 *
21244 * where:
21245 * cf_size: total size of the buffer (rounded to page size)
21246 * cf_last_region: offset in the buffer of the last "region" sub-header
21247 * cf_last_zeroes: number of trailing "zero" dispositions at the end
21248 * of last region
21249 * cfr_vaddr: virtual address of the start of the covered "region"
21250 * cfr_num_pages: number of pages in the covered "region"
21251 * d*: disposition of the page at that virtual address
21252 * Regions in the buffer are word-aligned.
21253 *
21254 * We estimate the size of the buffer based on the number of memory regions
21255 * and the virtual size of the address space. While copying each memory region
21256 * during vm_map_fork(), we also collect the footprint info for that region
21257 * and store it in the buffer, packing it as much as possible (coalescing
21258 * contiguous memory regions to avoid having too many region headers and
21259 * avoiding long streaks of "zero" page dispositions by splitting footprint
21260 * "regions", so the number of regions in the footprint buffer might not match
21261 * the number of memory regions in the address space.
21262 *
21263 * We also have to copy the original task's "nonvolatile" ledgers since that's
21264 * part of the footprint and will need to be reported to any tool asking for
21265 * the footprint information of the forked corpse.
21266 */
21267
21268 uint64_t vm_map_corpse_footprint_count = 0;
21269 uint64_t vm_map_corpse_footprint_size_avg = 0;
21270 uint64_t vm_map_corpse_footprint_size_max = 0;
21271 uint64_t vm_map_corpse_footprint_full = 0;
21272 uint64_t vm_map_corpse_footprint_no_buf = 0;
21273
21274 struct vm_map_corpse_footprint_header {
21275 vm_size_t cf_size; /* allocated buffer size */
21276 uint32_t cf_last_region; /* offset of last region in buffer */
21277 union {
21278 uint32_t cfu_last_zeroes; /* during creation:
21279 * number of "zero" dispositions at
21280 * end of last region */
21281 uint32_t cfu_hint_region; /* during lookup:
21282 * offset of last looked up region */
21283 #define cf_last_zeroes cfu.cfu_last_zeroes
21284 #define cf_hint_region cfu.cfu_hint_region
21285 } cfu;
21286 };
21287 typedef uint8_t cf_disp_t;
21288 struct vm_map_corpse_footprint_region {
21289 vm_map_offset_t cfr_vaddr; /* region start virtual address */
21290 uint32_t cfr_num_pages; /* number of pages in this "region" */
21291 cf_disp_t cfr_disposition[0]; /* disposition of each page */
21292 } __attribute__((packed));
21293
21294 static cf_disp_t
21295 vm_page_disposition_to_cf_disp(
21296 int disposition)
21297 {
21298 assert(sizeof(cf_disp_t) == 1);
21299 /* relocate bits that don't fit in a "uint8_t" */
21300 if (disposition & VM_PAGE_QUERY_PAGE_REUSABLE) {
21301 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
21302 }
21303 /* cast gets rid of extra bits */
21304 return (cf_disp_t) disposition;
21305 }
21306
21307 static int
21308 vm_page_cf_disp_to_disposition(
21309 cf_disp_t cf_disp)
21310 {
21311 int disposition;
21312
21313 assert(sizeof(cf_disp_t) == 1);
21314 disposition = (int) cf_disp;
21315 /* move relocated bits back in place */
21316 if (cf_disp & VM_PAGE_QUERY_PAGE_FICTITIOUS) {
21317 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
21318 disposition &= ~VM_PAGE_QUERY_PAGE_FICTITIOUS;
21319 }
21320 return disposition;
21321 }
21322
21323 /*
21324 * vm_map_corpse_footprint_new_region:
21325 * closes the current footprint "region" and creates a new one
21326 *
21327 * Returns NULL if there's not enough space in the buffer for a new region.
21328 */
21329 static struct vm_map_corpse_footprint_region *
21330 vm_map_corpse_footprint_new_region(
21331 struct vm_map_corpse_footprint_header *footprint_header)
21332 {
21333 uintptr_t footprint_edge;
21334 uint32_t new_region_offset;
21335 struct vm_map_corpse_footprint_region *footprint_region;
21336 struct vm_map_corpse_footprint_region *new_footprint_region;
21337
21338 footprint_edge = ((uintptr_t)footprint_header +
21339 footprint_header->cf_size);
21340 footprint_region = ((struct vm_map_corpse_footprint_region *)
21341 ((char *)footprint_header +
21342 footprint_header->cf_last_region));
21343 assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
21344 footprint_edge);
21345
21346 /* get rid of trailing zeroes in the last region */
21347 assert(footprint_region->cfr_num_pages >=
21348 footprint_header->cf_last_zeroes);
21349 footprint_region->cfr_num_pages -=
21350 footprint_header->cf_last_zeroes;
21351 footprint_header->cf_last_zeroes = 0;
21352
21353 /* reuse this region if it's now empty */
21354 if (footprint_region->cfr_num_pages == 0) {
21355 return footprint_region;
21356 }
21357
21358 /* compute offset of new region */
21359 new_region_offset = footprint_header->cf_last_region;
21360 new_region_offset += sizeof(*footprint_region);
21361 new_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
21362 new_region_offset = roundup(new_region_offset, sizeof(int));
21363
21364 /* check if we're going over the edge */
21365 if (((uintptr_t)footprint_header +
21366 new_region_offset +
21367 sizeof(*footprint_region)) >=
21368 footprint_edge) {
21369 /* over the edge: no new region */
21370 return NULL;
21371 }
21372
21373 /* adjust offset of last region in header */
21374 footprint_header->cf_last_region = new_region_offset;
21375
21376 new_footprint_region = (struct vm_map_corpse_footprint_region *)
21377 ((char *)footprint_header +
21378 footprint_header->cf_last_region);
21379 new_footprint_region->cfr_vaddr = 0;
21380 new_footprint_region->cfr_num_pages = 0;
21381 /* caller needs to initialize new region */
21382
21383 return new_footprint_region;
21384 }
21385
21386 /*
21387 * vm_map_corpse_footprint_collect:
21388 * collect footprint information for "old_entry" in "old_map" and
21389 * stores it in "new_map"'s vmmap_footprint_info.
21390 */
21391 kern_return_t
21392 vm_map_corpse_footprint_collect(
21393 vm_map_t old_map,
21394 vm_map_entry_t old_entry,
21395 vm_map_t new_map)
21396 {
21397 vm_map_offset_t va;
21398 kern_return_t kr;
21399 struct vm_map_corpse_footprint_header *footprint_header;
21400 struct vm_map_corpse_footprint_region *footprint_region;
21401 struct vm_map_corpse_footprint_region *new_footprint_region;
21402 cf_disp_t *next_disp_p;
21403 uintptr_t footprint_edge;
21404 uint32_t num_pages_tmp;
21405 int effective_page_size;
21406
21407 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(old_map));
21408
21409 va = old_entry->vme_start;
21410
21411 vm_map_lock_assert_exclusive(old_map);
21412 vm_map_lock_assert_exclusive(new_map);
21413
21414 assert(new_map->has_corpse_footprint);
21415 assert(!old_map->has_corpse_footprint);
21416 if (!new_map->has_corpse_footprint ||
21417 old_map->has_corpse_footprint) {
21418 /*
21419 * This can only transfer footprint info from a
21420 * map with a live pmap to a map with a corpse footprint.
21421 */
21422 return KERN_NOT_SUPPORTED;
21423 }
21424
21425 if (new_map->vmmap_corpse_footprint == NULL) {
21426 vm_offset_t buf;
21427 vm_size_t buf_size;
21428
21429 buf = 0;
21430 buf_size = (sizeof(*footprint_header) +
21431 (old_map->hdr.nentries
21432 *
21433 (sizeof(*footprint_region) +
21434 +3)) /* potential alignment for each region */
21435 +
21436 ((old_map->size / effective_page_size)
21437 *
21438 sizeof(cf_disp_t))); /* disposition for each page */
21439 // printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
21440 buf_size = round_page(buf_size);
21441
21442 /* limit buffer to 1 page to validate overflow detection */
21443 // buf_size = PAGE_SIZE;
21444
21445 /* limit size to a somewhat sane amount */
21446 #if XNU_TARGET_OS_OSX
21447 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
21448 #else /* XNU_TARGET_OS_OSX */
21449 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
21450 #endif /* XNU_TARGET_OS_OSX */
21451 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
21452 buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
21453 }
21454
21455 /*
21456 * Allocate the pageable buffer (with a trailing guard page).
21457 * It will be zero-filled on demand.
21458 */
21459 kr = kernel_memory_allocate(kernel_map,
21460 &buf,
21461 (buf_size
21462 + PAGE_SIZE), /* trailing guard page */
21463 0, /* mask */
21464 KMA_PAGEABLE | KMA_GUARD_LAST,
21465 VM_KERN_MEMORY_DIAG);
21466 if (kr != KERN_SUCCESS) {
21467 vm_map_corpse_footprint_no_buf++;
21468 return kr;
21469 }
21470
21471 /* initialize header and 1st region */
21472 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
21473 new_map->vmmap_corpse_footprint = footprint_header;
21474
21475 footprint_header->cf_size = buf_size;
21476 footprint_header->cf_last_region =
21477 sizeof(*footprint_header);
21478 footprint_header->cf_last_zeroes = 0;
21479
21480 footprint_region = (struct vm_map_corpse_footprint_region *)
21481 ((char *)footprint_header +
21482 footprint_header->cf_last_region);
21483 footprint_region->cfr_vaddr = 0;
21484 footprint_region->cfr_num_pages = 0;
21485 } else {
21486 /* retrieve header and last region */
21487 footprint_header = (struct vm_map_corpse_footprint_header *)
21488 new_map->vmmap_corpse_footprint;
21489 footprint_region = (struct vm_map_corpse_footprint_region *)
21490 ((char *)footprint_header +
21491 footprint_header->cf_last_region);
21492 }
21493 footprint_edge = ((uintptr_t)footprint_header +
21494 footprint_header->cf_size);
21495
21496 if ((footprint_region->cfr_vaddr +
21497 (((vm_map_offset_t)footprint_region->cfr_num_pages) *
21498 effective_page_size))
21499 != old_entry->vme_start) {
21500 uint64_t num_pages_delta, num_pages_delta_size;
21501 uint32_t region_offset_delta_size;
21502
21503 /*
21504 * Not the next contiguous virtual address:
21505 * start a new region or store "zero" dispositions for
21506 * the missing pages?
21507 */
21508 /* size of gap in actual page dispositions */
21509 num_pages_delta = ((old_entry->vme_start -
21510 footprint_region->cfr_vaddr) / effective_page_size)
21511 - footprint_region->cfr_num_pages;
21512 num_pages_delta_size = num_pages_delta * sizeof(cf_disp_t);
21513 /* size of gap as a new footprint region header */
21514 region_offset_delta_size =
21515 (sizeof(*footprint_region) +
21516 roundup(((footprint_region->cfr_num_pages -
21517 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)),
21518 sizeof(int)) -
21519 ((footprint_region->cfr_num_pages -
21520 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)));
21521 // printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
21522 if (region_offset_delta_size < num_pages_delta_size ||
21523 os_add3_overflow(footprint_region->cfr_num_pages,
21524 (uint32_t) num_pages_delta,
21525 1,
21526 &num_pages_tmp)) {
21527 /*
21528 * Storing data for this gap would take more space
21529 * than inserting a new footprint region header:
21530 * let's start a new region and save space. If it's a
21531 * tie, let's avoid using a new region, since that
21532 * would require more region hops to find the right
21533 * range during lookups.
21534 *
21535 * If the current region's cfr_num_pages would overflow
21536 * if we added "zero" page dispositions for the gap,
21537 * no choice but to start a new region.
21538 */
21539 // printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
21540 new_footprint_region =
21541 vm_map_corpse_footprint_new_region(footprint_header);
21542 /* check that we're not going over the edge */
21543 if (new_footprint_region == NULL) {
21544 goto over_the_edge;
21545 }
21546 footprint_region = new_footprint_region;
21547 /* initialize new region as empty */
21548 footprint_region->cfr_vaddr = old_entry->vme_start;
21549 footprint_region->cfr_num_pages = 0;
21550 } else {
21551 /*
21552 * Store "zero" page dispositions for the missing
21553 * pages.
21554 */
21555 // printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
21556 for (; num_pages_delta > 0; num_pages_delta--) {
21557 next_disp_p = (cf_disp_t *)
21558 ((uintptr_t) footprint_region +
21559 sizeof(*footprint_region));
21560 next_disp_p += footprint_region->cfr_num_pages;
21561 /* check that we're not going over the edge */
21562 if ((uintptr_t)next_disp_p >= footprint_edge) {
21563 goto over_the_edge;
21564 }
21565 /* store "zero" disposition for this gap page */
21566 footprint_region->cfr_num_pages++;
21567 *next_disp_p = (cf_disp_t) 0;
21568 footprint_header->cf_last_zeroes++;
21569 }
21570 }
21571 }
21572
21573 for (va = old_entry->vme_start;
21574 va < old_entry->vme_end;
21575 va += effective_page_size) {
21576 int disposition;
21577 cf_disp_t cf_disp;
21578
21579 vm_map_footprint_query_page_info(old_map,
21580 old_entry,
21581 va,
21582 &disposition);
21583 cf_disp = vm_page_disposition_to_cf_disp(disposition);
21584
21585 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
21586
21587 if (cf_disp == 0 && footprint_region->cfr_num_pages == 0) {
21588 /*
21589 * Ignore "zero" dispositions at start of
21590 * region: just move start of region.
21591 */
21592 footprint_region->cfr_vaddr += effective_page_size;
21593 continue;
21594 }
21595
21596 /* would region's cfr_num_pages overflow? */
21597 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
21598 &num_pages_tmp)) {
21599 /* overflow: create a new region */
21600 new_footprint_region =
21601 vm_map_corpse_footprint_new_region(
21602 footprint_header);
21603 if (new_footprint_region == NULL) {
21604 goto over_the_edge;
21605 }
21606 footprint_region = new_footprint_region;
21607 footprint_region->cfr_vaddr = va;
21608 footprint_region->cfr_num_pages = 0;
21609 }
21610
21611 next_disp_p = (cf_disp_t *) ((uintptr_t) footprint_region +
21612 sizeof(*footprint_region));
21613 next_disp_p += footprint_region->cfr_num_pages;
21614 /* check that we're not going over the edge */
21615 if ((uintptr_t)next_disp_p >= footprint_edge) {
21616 goto over_the_edge;
21617 }
21618 /* store this dispostion */
21619 *next_disp_p = cf_disp;
21620 footprint_region->cfr_num_pages++;
21621
21622 if (cf_disp != 0) {
21623 /* non-zero disp: break the current zero streak */
21624 footprint_header->cf_last_zeroes = 0;
21625 /* done */
21626 continue;
21627 }
21628
21629 /* zero disp: add to the current streak of zeroes */
21630 footprint_header->cf_last_zeroes++;
21631 if ((footprint_header->cf_last_zeroes +
21632 roundup(((footprint_region->cfr_num_pages -
21633 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)) &
21634 (sizeof(int) - 1),
21635 sizeof(int))) <
21636 (sizeof(*footprint_header))) {
21637 /*
21638 * There are not enough trailing "zero" dispositions
21639 * (+ the extra padding we would need for the previous
21640 * region); creating a new region would not save space
21641 * at this point, so let's keep this "zero" disposition
21642 * in this region and reconsider later.
21643 */
21644 continue;
21645 }
21646 /*
21647 * Create a new region to avoid having too many consecutive
21648 * "zero" dispositions.
21649 */
21650 new_footprint_region =
21651 vm_map_corpse_footprint_new_region(footprint_header);
21652 if (new_footprint_region == NULL) {
21653 goto over_the_edge;
21654 }
21655 footprint_region = new_footprint_region;
21656 /* initialize the new region as empty ... */
21657 footprint_region->cfr_num_pages = 0;
21658 /* ... and skip this "zero" disp */
21659 footprint_region->cfr_vaddr = va + effective_page_size;
21660 }
21661
21662 return KERN_SUCCESS;
21663
21664 over_the_edge:
21665 // printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
21666 vm_map_corpse_footprint_full++;
21667 return KERN_RESOURCE_SHORTAGE;
21668 }
21669
21670 /*
21671 * vm_map_corpse_footprint_collect_done:
21672 * completes the footprint collection by getting rid of any remaining
21673 * trailing "zero" dispositions and trimming the unused part of the
21674 * kernel buffer
21675 */
21676 void
21677 vm_map_corpse_footprint_collect_done(
21678 vm_map_t new_map)
21679 {
21680 struct vm_map_corpse_footprint_header *footprint_header;
21681 struct vm_map_corpse_footprint_region *footprint_region;
21682 vm_size_t buf_size, actual_size;
21683 kern_return_t kr;
21684
21685 assert(new_map->has_corpse_footprint);
21686 if (!new_map->has_corpse_footprint ||
21687 new_map->vmmap_corpse_footprint == NULL) {
21688 return;
21689 }
21690
21691 footprint_header = (struct vm_map_corpse_footprint_header *)
21692 new_map->vmmap_corpse_footprint;
21693 buf_size = footprint_header->cf_size;
21694
21695 footprint_region = (struct vm_map_corpse_footprint_region *)
21696 ((char *)footprint_header +
21697 footprint_header->cf_last_region);
21698
21699 /* get rid of trailing zeroes in last region */
21700 assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
21701 footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
21702 footprint_header->cf_last_zeroes = 0;
21703
21704 actual_size = (vm_size_t)(footprint_header->cf_last_region +
21705 sizeof(*footprint_region) +
21706 (footprint_region->cfr_num_pages * sizeof(cf_disp_t)));
21707
21708 // printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
21709 vm_map_corpse_footprint_size_avg =
21710 (((vm_map_corpse_footprint_size_avg *
21711 vm_map_corpse_footprint_count) +
21712 actual_size) /
21713 (vm_map_corpse_footprint_count + 1));
21714 vm_map_corpse_footprint_count++;
21715 if (actual_size > vm_map_corpse_footprint_size_max) {
21716 vm_map_corpse_footprint_size_max = actual_size;
21717 }
21718
21719 actual_size = round_page(actual_size);
21720 if (buf_size > actual_size) {
21721 kr = vm_deallocate(kernel_map,
21722 ((vm_address_t)footprint_header +
21723 actual_size +
21724 PAGE_SIZE), /* trailing guard page */
21725 (buf_size - actual_size));
21726 assertf(kr == KERN_SUCCESS,
21727 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21728 footprint_header,
21729 (uint64_t) buf_size,
21730 (uint64_t) actual_size,
21731 kr);
21732 kr = vm_protect(kernel_map,
21733 ((vm_address_t)footprint_header +
21734 actual_size),
21735 PAGE_SIZE,
21736 FALSE, /* set_maximum */
21737 VM_PROT_NONE);
21738 assertf(kr == KERN_SUCCESS,
21739 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21740 footprint_header,
21741 (uint64_t) buf_size,
21742 (uint64_t) actual_size,
21743 kr);
21744 }
21745
21746 footprint_header->cf_size = actual_size;
21747 }
21748
21749 /*
21750 * vm_map_corpse_footprint_query_page_info:
21751 * retrieves the disposition of the page at virtual address "vaddr"
21752 * in the forked corpse's VM map
21753 *
21754 * This is the equivalent of vm_map_footprint_query_page_info() for a forked corpse.
21755 */
21756 kern_return_t
21757 vm_map_corpse_footprint_query_page_info(
21758 vm_map_t map,
21759 vm_map_offset_t va,
21760 int *disposition_p)
21761 {
21762 struct vm_map_corpse_footprint_header *footprint_header;
21763 struct vm_map_corpse_footprint_region *footprint_region;
21764 uint32_t footprint_region_offset;
21765 vm_map_offset_t region_start, region_end;
21766 int disp_idx;
21767 kern_return_t kr;
21768 int effective_page_size;
21769 cf_disp_t cf_disp;
21770
21771 if (!map->has_corpse_footprint) {
21772 *disposition_p = 0;
21773 kr = KERN_INVALID_ARGUMENT;
21774 goto done;
21775 }
21776
21777 footprint_header = map->vmmap_corpse_footprint;
21778 if (footprint_header == NULL) {
21779 *disposition_p = 0;
21780 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21781 kr = KERN_INVALID_ARGUMENT;
21782 goto done;
21783 }
21784
21785 /* start looking at the hint ("cf_hint_region") */
21786 footprint_region_offset = footprint_header->cf_hint_region;
21787
21788 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
21789
21790 lookup_again:
21791 if (footprint_region_offset < sizeof(*footprint_header)) {
21792 /* hint too low: start from 1st region */
21793 footprint_region_offset = sizeof(*footprint_header);
21794 }
21795 if (footprint_region_offset >= footprint_header->cf_last_region) {
21796 /* hint too high: re-start from 1st region */
21797 footprint_region_offset = sizeof(*footprint_header);
21798 }
21799 footprint_region = (struct vm_map_corpse_footprint_region *)
21800 ((char *)footprint_header + footprint_region_offset);
21801 region_start = footprint_region->cfr_vaddr;
21802 region_end = (region_start +
21803 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
21804 effective_page_size));
21805 if (va < region_start &&
21806 footprint_region_offset != sizeof(*footprint_header)) {
21807 /* our range starts before the hint region */
21808
21809 /* reset the hint (in a racy way...) */
21810 footprint_header->cf_hint_region = sizeof(*footprint_header);
21811 /* lookup "va" again from 1st region */
21812 footprint_region_offset = sizeof(*footprint_header);
21813 goto lookup_again;
21814 }
21815
21816 while (va >= region_end) {
21817 if (footprint_region_offset >= footprint_header->cf_last_region) {
21818 break;
21819 }
21820 /* skip the region's header */
21821 footprint_region_offset += sizeof(*footprint_region);
21822 /* skip the region's page dispositions */
21823 footprint_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
21824 /* align to next word boundary */
21825 footprint_region_offset =
21826 roundup(footprint_region_offset,
21827 sizeof(int));
21828 footprint_region = (struct vm_map_corpse_footprint_region *)
21829 ((char *)footprint_header + footprint_region_offset);
21830 region_start = footprint_region->cfr_vaddr;
21831 region_end = (region_start +
21832 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
21833 effective_page_size));
21834 }
21835 if (va < region_start || va >= region_end) {
21836 /* page not found */
21837 *disposition_p = 0;
21838 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21839 kr = KERN_SUCCESS;
21840 goto done;
21841 }
21842
21843 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
21844 footprint_header->cf_hint_region = footprint_region_offset;
21845
21846 /* get page disposition for "va" in this region */
21847 disp_idx = (int) ((va - footprint_region->cfr_vaddr) / effective_page_size);
21848 cf_disp = footprint_region->cfr_disposition[disp_idx];
21849 *disposition_p = vm_page_cf_disp_to_disposition(cf_disp);
21850 kr = KERN_SUCCESS;
21851 done:
21852 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21853 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
21854 DTRACE_VM4(footprint_query_page_info,
21855 vm_map_t, map,
21856 vm_map_offset_t, va,
21857 int, *disposition_p,
21858 kern_return_t, kr);
21859
21860 return kr;
21861 }
21862
21863 void
21864 vm_map_corpse_footprint_destroy(
21865 vm_map_t map)
21866 {
21867 if (map->has_corpse_footprint &&
21868 map->vmmap_corpse_footprint != 0) {
21869 struct vm_map_corpse_footprint_header *footprint_header;
21870 vm_size_t buf_size;
21871 kern_return_t kr;
21872
21873 footprint_header = map->vmmap_corpse_footprint;
21874 buf_size = footprint_header->cf_size;
21875 kr = vm_deallocate(kernel_map,
21876 (vm_offset_t) map->vmmap_corpse_footprint,
21877 ((vm_size_t) buf_size
21878 + PAGE_SIZE)); /* trailing guard page */
21879 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
21880 map->vmmap_corpse_footprint = 0;
21881 map->has_corpse_footprint = FALSE;
21882 }
21883 }
21884
21885 /*
21886 * vm_map_copy_footprint_ledgers:
21887 * copies any ledger that's relevant to the memory footprint of "old_task"
21888 * into the forked corpse's task ("new_task")
21889 */
21890 void
21891 vm_map_copy_footprint_ledgers(
21892 task_t old_task,
21893 task_t new_task)
21894 {
21895 vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
21896 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
21897 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
21898 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
21899 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
21900 vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
21901 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
21902 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
21903 vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
21904 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
21905 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
21906 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
21907 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
21908 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
21909 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
21910 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
21911 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
21912 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
21913 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
21914 vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
21915 }
21916
21917 /*
21918 * vm_map_copy_ledger:
21919 * copy a single ledger from "old_task" to "new_task"
21920 */
21921 void
21922 vm_map_copy_ledger(
21923 task_t old_task,
21924 task_t new_task,
21925 int ledger_entry)
21926 {
21927 ledger_amount_t old_balance, new_balance, delta;
21928
21929 assert(new_task->map->has_corpse_footprint);
21930 if (!new_task->map->has_corpse_footprint) {
21931 return;
21932 }
21933
21934 /* turn off sanity checks for the ledger we're about to mess with */
21935 ledger_disable_panic_on_negative(new_task->ledger,
21936 ledger_entry);
21937
21938 /* adjust "new_task" to match "old_task" */
21939 ledger_get_balance(old_task->ledger,
21940 ledger_entry,
21941 &old_balance);
21942 ledger_get_balance(new_task->ledger,
21943 ledger_entry,
21944 &new_balance);
21945 if (new_balance == old_balance) {
21946 /* new == old: done */
21947 } else if (new_balance > old_balance) {
21948 /* new > old ==> new -= new - old */
21949 delta = new_balance - old_balance;
21950 ledger_debit(new_task->ledger,
21951 ledger_entry,
21952 delta);
21953 } else {
21954 /* new < old ==> new += old - new */
21955 delta = old_balance - new_balance;
21956 ledger_credit(new_task->ledger,
21957 ledger_entry,
21958 delta);
21959 }
21960 }
21961
21962 #if MACH_ASSERT
21963
21964 extern int pmap_ledgers_panic;
21965 extern int pmap_ledgers_panic_leeway;
21966
21967 #define LEDGER_DRIFT(__LEDGER) \
21968 int __LEDGER##_over; \
21969 ledger_amount_t __LEDGER##_over_total; \
21970 ledger_amount_t __LEDGER##_over_max; \
21971 int __LEDGER##_under; \
21972 ledger_amount_t __LEDGER##_under_total; \
21973 ledger_amount_t __LEDGER##_under_max
21974
21975 struct {
21976 uint64_t num_pmaps_checked;
21977
21978 LEDGER_DRIFT(phys_footprint);
21979 LEDGER_DRIFT(internal);
21980 LEDGER_DRIFT(internal_compressed);
21981 LEDGER_DRIFT(iokit_mapped);
21982 LEDGER_DRIFT(alternate_accounting);
21983 LEDGER_DRIFT(alternate_accounting_compressed);
21984 LEDGER_DRIFT(page_table);
21985 LEDGER_DRIFT(purgeable_volatile);
21986 LEDGER_DRIFT(purgeable_nonvolatile);
21987 LEDGER_DRIFT(purgeable_volatile_compressed);
21988 LEDGER_DRIFT(purgeable_nonvolatile_compressed);
21989 LEDGER_DRIFT(tagged_nofootprint);
21990 LEDGER_DRIFT(tagged_footprint);
21991 LEDGER_DRIFT(tagged_nofootprint_compressed);
21992 LEDGER_DRIFT(tagged_footprint_compressed);
21993 LEDGER_DRIFT(network_volatile);
21994 LEDGER_DRIFT(network_nonvolatile);
21995 LEDGER_DRIFT(network_volatile_compressed);
21996 LEDGER_DRIFT(network_nonvolatile_compressed);
21997 LEDGER_DRIFT(media_nofootprint);
21998 LEDGER_DRIFT(media_footprint);
21999 LEDGER_DRIFT(media_nofootprint_compressed);
22000 LEDGER_DRIFT(media_footprint_compressed);
22001 LEDGER_DRIFT(graphics_nofootprint);
22002 LEDGER_DRIFT(graphics_footprint);
22003 LEDGER_DRIFT(graphics_nofootprint_compressed);
22004 LEDGER_DRIFT(graphics_footprint_compressed);
22005 LEDGER_DRIFT(neural_nofootprint);
22006 LEDGER_DRIFT(neural_footprint);
22007 LEDGER_DRIFT(neural_nofootprint_compressed);
22008 LEDGER_DRIFT(neural_footprint_compressed);
22009 } pmap_ledgers_drift;
22010
22011 void
22012 vm_map_pmap_check_ledgers(
22013 pmap_t pmap,
22014 ledger_t ledger,
22015 int pid,
22016 char *procname)
22017 {
22018 ledger_amount_t bal;
22019 boolean_t do_panic;
22020
22021 do_panic = FALSE;
22022
22023 pmap_ledgers_drift.num_pmaps_checked++;
22024
22025 #define LEDGER_CHECK_BALANCE(__LEDGER) \
22026 MACRO_BEGIN \
22027 int panic_on_negative = TRUE; \
22028 ledger_get_balance(ledger, \
22029 task_ledgers.__LEDGER, \
22030 &bal); \
22031 ledger_get_panic_on_negative(ledger, \
22032 task_ledgers.__LEDGER, \
22033 &panic_on_negative); \
22034 if (bal != 0) { \
22035 if (panic_on_negative || \
22036 (pmap_ledgers_panic && \
22037 pmap_ledgers_panic_leeway > 0 && \
22038 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
22039 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
22040 do_panic = TRUE; \
22041 } \
22042 printf("LEDGER BALANCE proc %d (%s) " \
22043 "\"%s\" = %lld\n", \
22044 pid, procname, #__LEDGER, bal); \
22045 if (bal > 0) { \
22046 pmap_ledgers_drift.__LEDGER##_over++; \
22047 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
22048 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
22049 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
22050 } \
22051 } else if (bal < 0) { \
22052 pmap_ledgers_drift.__LEDGER##_under++; \
22053 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
22054 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
22055 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
22056 } \
22057 } \
22058 } \
22059 MACRO_END
22060
22061 LEDGER_CHECK_BALANCE(phys_footprint);
22062 LEDGER_CHECK_BALANCE(internal);
22063 LEDGER_CHECK_BALANCE(internal_compressed);
22064 LEDGER_CHECK_BALANCE(iokit_mapped);
22065 LEDGER_CHECK_BALANCE(alternate_accounting);
22066 LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
22067 LEDGER_CHECK_BALANCE(page_table);
22068 LEDGER_CHECK_BALANCE(purgeable_volatile);
22069 LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
22070 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
22071 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
22072 LEDGER_CHECK_BALANCE(tagged_nofootprint);
22073 LEDGER_CHECK_BALANCE(tagged_footprint);
22074 LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
22075 LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
22076 LEDGER_CHECK_BALANCE(network_volatile);
22077 LEDGER_CHECK_BALANCE(network_nonvolatile);
22078 LEDGER_CHECK_BALANCE(network_volatile_compressed);
22079 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
22080 LEDGER_CHECK_BALANCE(media_nofootprint);
22081 LEDGER_CHECK_BALANCE(media_footprint);
22082 LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
22083 LEDGER_CHECK_BALANCE(media_footprint_compressed);
22084 LEDGER_CHECK_BALANCE(graphics_nofootprint);
22085 LEDGER_CHECK_BALANCE(graphics_footprint);
22086 LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
22087 LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
22088 LEDGER_CHECK_BALANCE(neural_nofootprint);
22089 LEDGER_CHECK_BALANCE(neural_footprint);
22090 LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
22091 LEDGER_CHECK_BALANCE(neural_footprint_compressed);
22092
22093 if (do_panic) {
22094 if (pmap_ledgers_panic) {
22095 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
22096 pmap, pid, procname);
22097 } else {
22098 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
22099 pmap, pid, procname);
22100 }
22101 }
22102 }
22103 #endif /* MACH_ASSERT */