]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
xnu-7195.60.75.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68
69 #include <vm/vm_options.h>
70
71 #include <libkern/OSAtomic.h>
72
73 #include <mach/kern_return.h>
74 #include <mach/port.h>
75 #include <mach/vm_attributes.h>
76 #include <mach/vm_param.h>
77 #include <mach/vm_behavior.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/memory_object.h>
80 #include <mach/mach_vm.h>
81 #include <machine/cpu_capabilities.h>
82 #include <mach/sdt.h>
83
84 #include <kern/assert.h>
85 #include <kern/backtrace.h>
86 #include <kern/counters.h>
87 #include <kern/exc_guard.h>
88 #include <kern/kalloc.h>
89 #include <kern/zalloc_internal.h>
90
91 #include <vm/cpm.h>
92 #include <vm/vm_compressor.h>
93 #include <vm/vm_compressor_pager.h>
94 #include <vm/vm_init.h>
95 #include <vm/vm_fault.h>
96 #include <vm/vm_map.h>
97 #include <vm/vm_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
100 #include <vm/pmap.h>
101 #include <vm/vm_kern.h>
102 #include <ipc/ipc_port.h>
103 #include <kern/sched_prim.h>
104 #include <kern/misc_protos.h>
105
106 #include <mach/vm_map_server.h>
107 #include <mach/mach_host_server.h>
108 #include <vm/vm_protos.h>
109 #include <vm/vm_purgeable_internal.h>
110
111 #include <vm/vm_protos.h>
112 #include <vm/vm_shared_region.h>
113 #include <vm/vm_map_store.h>
114
115 #include <san/kasan.h>
116
117 #include <sys/codesign.h>
118 #include <sys/mman.h>
119
120 #include <libkern/section_keywords.h>
121 #if DEVELOPMENT || DEBUG
122 extern int proc_selfcsflags(void);
123 int panic_on_unsigned_execute = 0;
124 #endif /* DEVELOPMENT || DEBUG */
125
126 #if MACH_ASSERT
127 int debug4k_filter = 0;
128 char debug4k_proc_name[1024] = "";
129 int debug4k_proc_filter = (int)-1 & ~(1 << __DEBUG4K_FAULT);
130 int debug4k_panic_on_misaligned_sharing = 0;
131 const char *debug4k_category_name[] = {
132 "error", /* 0 */
133 "life", /* 1 */
134 "load", /* 2 */
135 "fault", /* 3 */
136 "copy", /* 4 */
137 "share", /* 5 */
138 "adjust", /* 6 */
139 "pmap", /* 7 */
140 "mementry", /* 8 */
141 "iokit", /* 9 */
142 "upl", /* 10 */
143 "exc", /* 11 */
144 "vfs" /* 12 */
145 };
146 #endif /* MACH_ASSERT */
147 int debug4k_no_cow_copyin = 0;
148
149
150 #if __arm64__
151 extern const int fourk_binary_compatibility_unsafe;
152 extern const int fourk_binary_compatibility_allow_wx;
153 #endif /* __arm64__ */
154 extern int proc_selfpid(void);
155 extern char *proc_name_address(void *p);
156
157 #if VM_MAP_DEBUG_APPLE_PROTECT
158 int vm_map_debug_apple_protect = 0;
159 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
160 #if VM_MAP_DEBUG_FOURK
161 int vm_map_debug_fourk = 0;
162 #endif /* VM_MAP_DEBUG_FOURK */
163
164 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
165 int vm_map_executable_immutable_verbose = 0;
166
167 os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
168
169 extern u_int32_t random(void); /* from <libkern/libkern.h> */
170 /* Internal prototypes
171 */
172
173 static void vm_map_simplify_range(
174 vm_map_t map,
175 vm_map_offset_t start,
176 vm_map_offset_t end); /* forward */
177
178 static boolean_t vm_map_range_check(
179 vm_map_t map,
180 vm_map_offset_t start,
181 vm_map_offset_t end,
182 vm_map_entry_t *entry);
183
184 static vm_map_entry_t _vm_map_entry_create(
185 struct vm_map_header *map_header, boolean_t map_locked);
186
187 static void _vm_map_entry_dispose(
188 struct vm_map_header *map_header,
189 vm_map_entry_t entry);
190
191 static void vm_map_pmap_enter(
192 vm_map_t map,
193 vm_map_offset_t addr,
194 vm_map_offset_t end_addr,
195 vm_object_t object,
196 vm_object_offset_t offset,
197 vm_prot_t protection);
198
199 static void _vm_map_clip_end(
200 struct vm_map_header *map_header,
201 vm_map_entry_t entry,
202 vm_map_offset_t end);
203
204 static void _vm_map_clip_start(
205 struct vm_map_header *map_header,
206 vm_map_entry_t entry,
207 vm_map_offset_t start);
208
209 static void vm_map_entry_delete(
210 vm_map_t map,
211 vm_map_entry_t entry);
212
213 static kern_return_t vm_map_delete(
214 vm_map_t map,
215 vm_map_offset_t start,
216 vm_map_offset_t end,
217 int flags,
218 vm_map_t zap_map);
219
220 static void vm_map_copy_insert(
221 vm_map_t map,
222 vm_map_entry_t after_where,
223 vm_map_copy_t copy);
224
225 static kern_return_t vm_map_copy_overwrite_unaligned(
226 vm_map_t dst_map,
227 vm_map_entry_t entry,
228 vm_map_copy_t copy,
229 vm_map_address_t start,
230 boolean_t discard_on_success);
231
232 static kern_return_t vm_map_copy_overwrite_aligned(
233 vm_map_t dst_map,
234 vm_map_entry_t tmp_entry,
235 vm_map_copy_t copy,
236 vm_map_offset_t start,
237 pmap_t pmap);
238
239 static kern_return_t vm_map_copyin_kernel_buffer(
240 vm_map_t src_map,
241 vm_map_address_t src_addr,
242 vm_map_size_t len,
243 boolean_t src_destroy,
244 vm_map_copy_t *copy_result); /* OUT */
245
246 static kern_return_t vm_map_copyout_kernel_buffer(
247 vm_map_t map,
248 vm_map_address_t *addr, /* IN/OUT */
249 vm_map_copy_t copy,
250 vm_map_size_t copy_size,
251 boolean_t overwrite,
252 boolean_t consume_on_success);
253
254 static void vm_map_fork_share(
255 vm_map_t old_map,
256 vm_map_entry_t old_entry,
257 vm_map_t new_map);
258
259 static boolean_t vm_map_fork_copy(
260 vm_map_t old_map,
261 vm_map_entry_t *old_entry_p,
262 vm_map_t new_map,
263 int vm_map_copyin_flags);
264
265 static kern_return_t vm_map_wire_nested(
266 vm_map_t map,
267 vm_map_offset_t start,
268 vm_map_offset_t end,
269 vm_prot_t caller_prot,
270 vm_tag_t tag,
271 boolean_t user_wire,
272 pmap_t map_pmap,
273 vm_map_offset_t pmap_addr,
274 ppnum_t *physpage_p);
275
276 static kern_return_t vm_map_unwire_nested(
277 vm_map_t map,
278 vm_map_offset_t start,
279 vm_map_offset_t end,
280 boolean_t user_wire,
281 pmap_t map_pmap,
282 vm_map_offset_t pmap_addr);
283
284 static kern_return_t vm_map_overwrite_submap_recurse(
285 vm_map_t dst_map,
286 vm_map_offset_t dst_addr,
287 vm_map_size_t dst_size);
288
289 static kern_return_t vm_map_copy_overwrite_nested(
290 vm_map_t dst_map,
291 vm_map_offset_t dst_addr,
292 vm_map_copy_t copy,
293 boolean_t interruptible,
294 pmap_t pmap,
295 boolean_t discard_on_success);
296
297 static kern_return_t vm_map_remap_extract(
298 vm_map_t map,
299 vm_map_offset_t addr,
300 vm_map_size_t size,
301 vm_prot_t required_protection,
302 boolean_t copy,
303 struct vm_map_header *map_header,
304 vm_prot_t *cur_protection,
305 vm_prot_t *max_protection,
306 vm_inherit_t inheritance,
307 vm_map_kernel_flags_t vmk_flags);
308
309 static kern_return_t vm_map_remap_range_allocate(
310 vm_map_t map,
311 vm_map_address_t *address,
312 vm_map_size_t size,
313 vm_map_offset_t mask,
314 int flags,
315 vm_map_kernel_flags_t vmk_flags,
316 vm_tag_t tag,
317 vm_map_entry_t *map_entry);
318
319 static void vm_map_region_look_for_page(
320 vm_map_t map,
321 vm_map_offset_t va,
322 vm_object_t object,
323 vm_object_offset_t offset,
324 int max_refcnt,
325 unsigned short depth,
326 vm_region_extended_info_t extended,
327 mach_msg_type_number_t count);
328
329 static int vm_map_region_count_obj_refs(
330 vm_map_entry_t entry,
331 vm_object_t object);
332
333
334 static kern_return_t vm_map_willneed(
335 vm_map_t map,
336 vm_map_offset_t start,
337 vm_map_offset_t end);
338
339 static kern_return_t vm_map_reuse_pages(
340 vm_map_t map,
341 vm_map_offset_t start,
342 vm_map_offset_t end);
343
344 static kern_return_t vm_map_reusable_pages(
345 vm_map_t map,
346 vm_map_offset_t start,
347 vm_map_offset_t end);
348
349 static kern_return_t vm_map_can_reuse(
350 vm_map_t map,
351 vm_map_offset_t start,
352 vm_map_offset_t end);
353
354 #if MACH_ASSERT
355 static kern_return_t vm_map_pageout(
356 vm_map_t map,
357 vm_map_offset_t start,
358 vm_map_offset_t end);
359 #endif /* MACH_ASSERT */
360
361 kern_return_t vm_map_corpse_footprint_collect(
362 vm_map_t old_map,
363 vm_map_entry_t old_entry,
364 vm_map_t new_map);
365 void vm_map_corpse_footprint_collect_done(
366 vm_map_t new_map);
367 void vm_map_corpse_footprint_destroy(
368 vm_map_t map);
369 kern_return_t vm_map_corpse_footprint_query_page_info(
370 vm_map_t map,
371 vm_map_offset_t va,
372 int *disposition_p);
373 void vm_map_footprint_query_page_info(
374 vm_map_t map,
375 vm_map_entry_t map_entry,
376 vm_map_offset_t curr_s_offset,
377 int *disposition_p);
378
379 static const struct vm_map_entry vm_map_entry_template = {
380 .behavior = VM_BEHAVIOR_DEFAULT,
381 .inheritance = VM_INHERIT_DEFAULT,
382 };
383
384 pid_t find_largest_process_vm_map_entries(void);
385
386 /*
387 * Macros to copy a vm_map_entry. We must be careful to correctly
388 * manage the wired page count. vm_map_entry_copy() creates a new
389 * map entry to the same memory - the wired count in the new entry
390 * must be set to zero. vm_map_entry_copy_full() creates a new
391 * entry that is identical to the old entry. This preserves the
392 * wire count; it's used for map splitting and zone changing in
393 * vm_map_copyout.
394 */
395
396 static inline void
397 vm_map_entry_copy_pmap_cs_assoc(
398 vm_map_t map __unused,
399 vm_map_entry_t new __unused,
400 vm_map_entry_t old __unused)
401 {
402 /* when pmap_cs is not enabled, assert as a sanity check */
403 assert(new->pmap_cs_associated == FALSE);
404 }
405
406 /*
407 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
408 * But for security reasons on some platforms, we don't want the
409 * new mapping to be "used for jit", so we reset the flag here.
410 */
411 static inline void
412 vm_map_entry_copy_code_signing(
413 vm_map_t map,
414 vm_map_entry_t new,
415 vm_map_entry_t old __unused)
416 {
417 if (VM_MAP_POLICY_ALLOW_JIT_COPY(map)) {
418 assert(new->used_for_jit == old->used_for_jit);
419 } else {
420 new->used_for_jit = FALSE;
421 }
422 }
423
424 static inline void
425 vm_map_entry_copy(
426 vm_map_t map,
427 vm_map_entry_t new,
428 vm_map_entry_t old)
429 {
430 boolean_t _vmec_reserved = new->from_reserved_zone;
431 *new = *old;
432 new->is_shared = FALSE;
433 new->needs_wakeup = FALSE;
434 new->in_transition = FALSE;
435 new->wired_count = 0;
436 new->user_wired_count = 0;
437 new->permanent = FALSE;
438 vm_map_entry_copy_code_signing(map, new, old);
439 vm_map_entry_copy_pmap_cs_assoc(map, new, old);
440 new->from_reserved_zone = _vmec_reserved;
441 if (new->iokit_acct) {
442 assertf(!new->use_pmap, "old %p new %p\n", old, new);
443 new->iokit_acct = FALSE;
444 new->use_pmap = TRUE;
445 }
446 new->vme_resilient_codesign = FALSE;
447 new->vme_resilient_media = FALSE;
448 new->vme_atomic = FALSE;
449 new->vme_no_copy_on_read = FALSE;
450 }
451
452 static inline void
453 vm_map_entry_copy_full(
454 vm_map_entry_t new,
455 vm_map_entry_t old)
456 {
457 boolean_t _vmecf_reserved = new->from_reserved_zone;
458 *new = *old;
459 new->from_reserved_zone = _vmecf_reserved;
460 }
461
462 /*
463 * Normal lock_read_to_write() returns FALSE/0 on failure.
464 * These functions evaluate to zero on success and non-zero value on failure.
465 */
466 __attribute__((always_inline))
467 int
468 vm_map_lock_read_to_write(vm_map_t map)
469 {
470 if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
471 DTRACE_VM(vm_map_lock_upgrade);
472 return 0;
473 }
474 return 1;
475 }
476
477 __attribute__((always_inline))
478 boolean_t
479 vm_map_try_lock(vm_map_t map)
480 {
481 if (lck_rw_try_lock_exclusive(&(map)->lock)) {
482 DTRACE_VM(vm_map_lock_w);
483 return TRUE;
484 }
485 return FALSE;
486 }
487
488 __attribute__((always_inline))
489 boolean_t
490 vm_map_try_lock_read(vm_map_t map)
491 {
492 if (lck_rw_try_lock_shared(&(map)->lock)) {
493 DTRACE_VM(vm_map_lock_r);
494 return TRUE;
495 }
496 return FALSE;
497 }
498
499 /*
500 * Routines to get the page size the caller should
501 * use while inspecting the target address space.
502 * Use the "_safely" variant if the caller is dealing with a user-provided
503 * array whose size depends on the page size, to avoid any overflow or
504 * underflow of a user-allocated buffer.
505 */
506 int
507 vm_self_region_page_shift_safely(
508 vm_map_t target_map)
509 {
510 int effective_page_shift = 0;
511
512 if (PAGE_SIZE == (4096)) {
513 /* x86_64 and 4k watches: always use 4k */
514 return PAGE_SHIFT;
515 }
516 /* did caller provide an explicit page size for this thread to use? */
517 effective_page_shift = thread_self_region_page_shift();
518 if (effective_page_shift) {
519 /* use the explicitly-provided page size */
520 return effective_page_shift;
521 }
522 /* no explicit page size: use the caller's page size... */
523 effective_page_shift = VM_MAP_PAGE_SHIFT(current_map());
524 if (effective_page_shift == VM_MAP_PAGE_SHIFT(target_map)) {
525 /* page size match: safe to use */
526 return effective_page_shift;
527 }
528 /* page size mismatch */
529 return -1;
530 }
531 int
532 vm_self_region_page_shift(
533 vm_map_t target_map)
534 {
535 int effective_page_shift;
536
537 effective_page_shift = vm_self_region_page_shift_safely(target_map);
538 if (effective_page_shift == -1) {
539 /* no safe value but OK to guess for caller */
540 effective_page_shift = MIN(VM_MAP_PAGE_SHIFT(current_map()),
541 VM_MAP_PAGE_SHIFT(target_map));
542 }
543 return effective_page_shift;
544 }
545
546
547 /*
548 * Decide if we want to allow processes to execute from their data or stack areas.
549 * override_nx() returns true if we do. Data/stack execution can be enabled independently
550 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
551 * or allow_stack_exec to enable data execution for that type of data area for that particular
552 * ABI (or both by or'ing the flags together). These are initialized in the architecture
553 * specific pmap files since the default behavior varies according to architecture. The
554 * main reason it varies is because of the need to provide binary compatibility with old
555 * applications that were written before these restrictions came into being. In the old
556 * days, an app could execute anything it could read, but this has slowly been tightened
557 * up over time. The default behavior is:
558 *
559 * 32-bit PPC apps may execute from both stack and data areas
560 * 32-bit Intel apps may exeucte from data areas but not stack
561 * 64-bit PPC/Intel apps may not execute from either data or stack
562 *
563 * An application on any architecture may override these defaults by explicitly
564 * adding PROT_EXEC permission to the page in question with the mprotect(2)
565 * system call. This code here just determines what happens when an app tries to
566 * execute from a page that lacks execute permission.
567 *
568 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
569 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
570 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
571 * execution from data areas for a particular binary even if the arch normally permits it. As
572 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
573 * to support some complicated use cases, notably browsers with out-of-process plugins that
574 * are not all NX-safe.
575 */
576
577 extern int allow_data_exec, allow_stack_exec;
578
579 int
580 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
581 {
582 int current_abi;
583
584 if (map->pmap == kernel_pmap) {
585 return FALSE;
586 }
587
588 /*
589 * Determine if the app is running in 32 or 64 bit mode.
590 */
591
592 if (vm_map_is_64bit(map)) {
593 current_abi = VM_ABI_64;
594 } else {
595 current_abi = VM_ABI_32;
596 }
597
598 /*
599 * Determine if we should allow the execution based on whether it's a
600 * stack or data area and the current architecture.
601 */
602
603 if (user_tag == VM_MEMORY_STACK) {
604 return allow_stack_exec & current_abi;
605 }
606
607 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
608 }
609
610
611 /*
612 * Virtual memory maps provide for the mapping, protection,
613 * and sharing of virtual memory objects. In addition,
614 * this module provides for an efficient virtual copy of
615 * memory from one map to another.
616 *
617 * Synchronization is required prior to most operations.
618 *
619 * Maps consist of an ordered doubly-linked list of simple
620 * entries; a single hint is used to speed up lookups.
621 *
622 * Sharing maps have been deleted from this version of Mach.
623 * All shared objects are now mapped directly into the respective
624 * maps. This requires a change in the copy on write strategy;
625 * the asymmetric (delayed) strategy is used for shared temporary
626 * objects instead of the symmetric (shadow) strategy. All maps
627 * are now "top level" maps (either task map, kernel map or submap
628 * of the kernel map).
629 *
630 * Since portions of maps are specified by start/end addreses,
631 * which may not align with existing map entries, all
632 * routines merely "clip" entries to these start/end values.
633 * [That is, an entry is split into two, bordering at a
634 * start or end value.] Note that these clippings may not
635 * always be necessary (as the two resulting entries are then
636 * not changed); however, the clipping is done for convenience.
637 * No attempt is currently made to "glue back together" two
638 * abutting entries.
639 *
640 * The symmetric (shadow) copy strategy implements virtual copy
641 * by copying VM object references from one map to
642 * another, and then marking both regions as copy-on-write.
643 * It is important to note that only one writeable reference
644 * to a VM object region exists in any map when this strategy
645 * is used -- this means that shadow object creation can be
646 * delayed until a write operation occurs. The symmetric (delayed)
647 * strategy allows multiple maps to have writeable references to
648 * the same region of a vm object, and hence cannot delay creating
649 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
650 * Copying of permanent objects is completely different; see
651 * vm_object_copy_strategically() in vm_object.c.
652 */
653
654 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_zone; /* zone for vm_map structures */
655 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_entry_reserved_zone; /* zone with reserve for non-blocking allocations */
656 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_copy_zone; /* zone for vm_map_copy structures */
657
658 SECURITY_READ_ONLY_LATE(zone_t) vm_map_entry_zone; /* zone for vm_map_entry structures */
659 SECURITY_READ_ONLY_LATE(zone_t) vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
660
661 #define VM_MAP_ZONE_NAME "maps"
662 #define VM_MAP_ZFLAGS ( \
663 ZC_NOENCRYPT | \
664 ZC_NOGC | \
665 ZC_NOGZALLOC | \
666 ZC_ALLOW_FOREIGN)
667
668 #define VME_RESERVED_ZONE_NAME "Reserved VM map entries"
669 #define VM_MAP_RESERVED_ZFLAGS ( \
670 ZC_NOENCRYPT | \
671 ZC_ALLOW_FOREIGN | \
672 ZC_NOCALLOUT | \
673 ZC_NOGZALLOC | \
674 ZC_KASAN_NOQUARANTINE | \
675 ZC_NOGC)
676
677 #define VM_MAP_HOLES_ZONE_NAME "VM map holes"
678 #define VM_MAP_HOLES_ZFLAGS ( \
679 ZC_NOENCRYPT | \
680 ZC_NOGC | \
681 ZC_NOGZALLOC | \
682 ZC_ALLOW_FOREIGN)
683
684 /*
685 * Asserts that a vm_map_copy object is coming from the
686 * vm_map_copy_zone to ensure that it isn't a fake constructed
687 * anywhere else.
688 */
689 static inline void
690 vm_map_copy_require(struct vm_map_copy *copy)
691 {
692 zone_id_require(ZONE_ID_VM_MAP_COPY, sizeof(struct vm_map_copy), copy);
693 }
694
695 /*
696 * Placeholder object for submap operations. This object is dropped
697 * into the range by a call to vm_map_find, and removed when
698 * vm_map_submap creates the submap.
699 */
700
701 vm_object_t vm_submap_object;
702
703 static __startup_data vm_offset_t map_data;
704 static __startup_data vm_size_t map_data_size;
705 static __startup_data vm_offset_t kentry_data;
706 static __startup_data vm_size_t kentry_data_size;
707 static __startup_data vm_offset_t map_holes_data;
708 static __startup_data vm_size_t map_holes_data_size;
709
710 #if XNU_TARGET_OS_OSX
711 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
712 #else /* XNU_TARGET_OS_OSX */
713 #define NO_COALESCE_LIMIT 0
714 #endif /* XNU_TARGET_OS_OSX */
715
716 /* Skip acquiring locks if we're in the midst of a kernel core dump */
717 unsigned int not_in_kdp = 1;
718
719 unsigned int vm_map_set_cache_attr_count = 0;
720
721 kern_return_t
722 vm_map_set_cache_attr(
723 vm_map_t map,
724 vm_map_offset_t va)
725 {
726 vm_map_entry_t map_entry;
727 vm_object_t object;
728 kern_return_t kr = KERN_SUCCESS;
729
730 vm_map_lock_read(map);
731
732 if (!vm_map_lookup_entry(map, va, &map_entry) ||
733 map_entry->is_sub_map) {
734 /*
735 * that memory is not properly mapped
736 */
737 kr = KERN_INVALID_ARGUMENT;
738 goto done;
739 }
740 object = VME_OBJECT(map_entry);
741
742 if (object == VM_OBJECT_NULL) {
743 /*
744 * there should be a VM object here at this point
745 */
746 kr = KERN_INVALID_ARGUMENT;
747 goto done;
748 }
749 vm_object_lock(object);
750 object->set_cache_attr = TRUE;
751 vm_object_unlock(object);
752
753 vm_map_set_cache_attr_count++;
754 done:
755 vm_map_unlock_read(map);
756
757 return kr;
758 }
759
760
761 #if CONFIG_CODE_DECRYPTION
762 /*
763 * vm_map_apple_protected:
764 * This remaps the requested part of the object with an object backed by
765 * the decrypting pager.
766 * crypt_info contains entry points and session data for the crypt module.
767 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
768 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
769 */
770 kern_return_t
771 vm_map_apple_protected(
772 vm_map_t map,
773 vm_map_offset_t start,
774 vm_map_offset_t end,
775 vm_object_offset_t crypto_backing_offset,
776 struct pager_crypt_info *crypt_info,
777 uint32_t cryptid)
778 {
779 boolean_t map_locked;
780 kern_return_t kr;
781 vm_map_entry_t map_entry;
782 struct vm_map_entry tmp_entry;
783 memory_object_t unprotected_mem_obj;
784 vm_object_t protected_object;
785 vm_map_offset_t map_addr;
786 vm_map_offset_t start_aligned, end_aligned;
787 vm_object_offset_t crypto_start, crypto_end;
788 int vm_flags;
789 vm_map_kernel_flags_t vmk_flags;
790
791 vm_flags = 0;
792 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
793
794 map_locked = FALSE;
795 unprotected_mem_obj = MEMORY_OBJECT_NULL;
796
797 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
798 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
799 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
800 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
801
802 #if __arm64__
803 /*
804 * "start" and "end" might be 4K-aligned but not 16K-aligned,
805 * so we might have to loop and establish up to 3 mappings:
806 *
807 * + the first 16K-page, which might overlap with the previous
808 * 4K-aligned mapping,
809 * + the center,
810 * + the last 16K-page, which might overlap with the next
811 * 4K-aligned mapping.
812 * Each of these mapping might be backed by a vnode pager (if
813 * properly page-aligned) or a "fourk_pager", itself backed by a
814 * vnode pager (if 4K-aligned but not page-aligned).
815 */
816 #endif /* __arm64__ */
817
818 map_addr = start_aligned;
819 for (map_addr = start_aligned;
820 map_addr < end;
821 map_addr = tmp_entry.vme_end) {
822 vm_map_lock(map);
823 map_locked = TRUE;
824
825 /* lookup the protected VM object */
826 if (!vm_map_lookup_entry(map,
827 map_addr,
828 &map_entry) ||
829 map_entry->is_sub_map ||
830 VME_OBJECT(map_entry) == VM_OBJECT_NULL) {
831 /* that memory is not properly mapped */
832 kr = KERN_INVALID_ARGUMENT;
833 goto done;
834 }
835
836 /* ensure mapped memory is mapped as executable except
837 * except for model decryption flow */
838 if ((cryptid != CRYPTID_MODEL_ENCRYPTION) &&
839 !(map_entry->protection & VM_PROT_EXECUTE)) {
840 kr = KERN_INVALID_ARGUMENT;
841 goto done;
842 }
843
844 /* get the protected object to be decrypted */
845 protected_object = VME_OBJECT(map_entry);
846 if (protected_object == VM_OBJECT_NULL) {
847 /* there should be a VM object here at this point */
848 kr = KERN_INVALID_ARGUMENT;
849 goto done;
850 }
851 /* ensure protected object stays alive while map is unlocked */
852 vm_object_reference(protected_object);
853
854 /* limit the map entry to the area we want to cover */
855 vm_map_clip_start(map, map_entry, start_aligned);
856 vm_map_clip_end(map, map_entry, end_aligned);
857
858 tmp_entry = *map_entry;
859 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
860 vm_map_unlock(map);
861 map_locked = FALSE;
862
863 /*
864 * This map entry might be only partially encrypted
865 * (if not fully "page-aligned").
866 */
867 crypto_start = 0;
868 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
869 if (tmp_entry.vme_start < start) {
870 if (tmp_entry.vme_start != start_aligned) {
871 kr = KERN_INVALID_ADDRESS;
872 }
873 crypto_start += (start - tmp_entry.vme_start);
874 }
875 if (tmp_entry.vme_end > end) {
876 if (tmp_entry.vme_end != end_aligned) {
877 kr = KERN_INVALID_ADDRESS;
878 }
879 crypto_end -= (tmp_entry.vme_end - end);
880 }
881
882 /*
883 * This "extra backing offset" is needed to get the decryption
884 * routine to use the right key. It adjusts for the possibly
885 * relative offset of an interposed "4K" pager...
886 */
887 if (crypto_backing_offset == (vm_object_offset_t) -1) {
888 crypto_backing_offset = VME_OFFSET(&tmp_entry);
889 }
890
891 /*
892 * Lookup (and create if necessary) the protected memory object
893 * matching that VM object.
894 * If successful, this also grabs a reference on the memory object,
895 * to guarantee that it doesn't go away before we get a chance to map
896 * it.
897 */
898 unprotected_mem_obj = apple_protect_pager_setup(
899 protected_object,
900 VME_OFFSET(&tmp_entry),
901 crypto_backing_offset,
902 crypt_info,
903 crypto_start,
904 crypto_end);
905
906 /* release extra ref on protected object */
907 vm_object_deallocate(protected_object);
908
909 if (unprotected_mem_obj == NULL) {
910 kr = KERN_FAILURE;
911 goto done;
912 }
913
914 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
915 /* can overwrite an immutable mapping */
916 vmk_flags.vmkf_overwrite_immutable = TRUE;
917 #if __arm64__
918 if (tmp_entry.used_for_jit &&
919 (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
920 PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
921 fourk_binary_compatibility_unsafe &&
922 fourk_binary_compatibility_allow_wx) {
923 printf("** FOURK_COMPAT [%d]: "
924 "allowing write+execute at 0x%llx\n",
925 proc_selfpid(), tmp_entry.vme_start);
926 vmk_flags.vmkf_map_jit = TRUE;
927 }
928 #endif /* __arm64__ */
929
930 /* map this memory object in place of the current one */
931 map_addr = tmp_entry.vme_start;
932 kr = vm_map_enter_mem_object(map,
933 &map_addr,
934 (tmp_entry.vme_end -
935 tmp_entry.vme_start),
936 (mach_vm_offset_t) 0,
937 vm_flags,
938 vmk_flags,
939 VM_KERN_MEMORY_NONE,
940 (ipc_port_t)(uintptr_t) unprotected_mem_obj,
941 0,
942 TRUE,
943 tmp_entry.protection,
944 tmp_entry.max_protection,
945 tmp_entry.inheritance);
946 assertf(kr == KERN_SUCCESS,
947 "kr = 0x%x\n", kr);
948 assertf(map_addr == tmp_entry.vme_start,
949 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
950 (uint64_t)map_addr,
951 (uint64_t) tmp_entry.vme_start,
952 &tmp_entry);
953
954 #if VM_MAP_DEBUG_APPLE_PROTECT
955 if (vm_map_debug_apple_protect) {
956 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
957 " backing:[object:%p,offset:0x%llx,"
958 "crypto_backing_offset:0x%llx,"
959 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
960 map,
961 (uint64_t) map_addr,
962 (uint64_t) (map_addr + (tmp_entry.vme_end -
963 tmp_entry.vme_start)),
964 unprotected_mem_obj,
965 protected_object,
966 VME_OFFSET(&tmp_entry),
967 crypto_backing_offset,
968 crypto_start,
969 crypto_end);
970 }
971 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
972
973 /*
974 * Release the reference obtained by
975 * apple_protect_pager_setup().
976 * The mapping (if it succeeded) is now holding a reference on
977 * the memory object.
978 */
979 memory_object_deallocate(unprotected_mem_obj);
980 unprotected_mem_obj = MEMORY_OBJECT_NULL;
981
982 /* continue with next map entry */
983 crypto_backing_offset += (tmp_entry.vme_end -
984 tmp_entry.vme_start);
985 crypto_backing_offset -= crypto_start;
986 }
987 kr = KERN_SUCCESS;
988
989 done:
990 if (map_locked) {
991 vm_map_unlock(map);
992 }
993 return kr;
994 }
995 #endif /* CONFIG_CODE_DECRYPTION */
996
997
998 LCK_GRP_DECLARE(vm_map_lck_grp, "vm_map");
999 LCK_ATTR_DECLARE(vm_map_lck_attr, 0, 0);
1000 LCK_ATTR_DECLARE(vm_map_lck_rw_attr, 0, LCK_ATTR_DEBUG);
1001
1002 #if XNU_TARGET_OS_OSX
1003 int malloc_no_cow = 0;
1004 #else /* XNU_TARGET_OS_OSX */
1005 int malloc_no_cow = 1;
1006 #endif /* XNU_TARGET_OS_OSX */
1007 uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
1008 #if DEBUG
1009 int vm_check_map_sanity = 0;
1010 #endif
1011
1012 /*
1013 * vm_map_init:
1014 *
1015 * Initialize the vm_map module. Must be called before
1016 * any other vm_map routines.
1017 *
1018 * Map and entry structures are allocated from zones -- we must
1019 * initialize those zones.
1020 *
1021 * There are three zones of interest:
1022 *
1023 * vm_map_zone: used to allocate maps.
1024 * vm_map_entry_zone: used to allocate map entries.
1025 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
1026 *
1027 * The kernel allocates map entries from a special zone that is initially
1028 * "crammed" with memory. It would be difficult (perhaps impossible) for
1029 * the kernel to allocate more memory to a entry zone when it became
1030 * empty since the very act of allocating memory implies the creation
1031 * of a new entry.
1032 */
1033 __startup_func
1034 void
1035 vm_map_init(void)
1036 {
1037 const char *mez_name = "VM map entries";
1038
1039
1040 #if MACH_ASSERT
1041 PE_parse_boot_argn("debug4k_filter", &debug4k_filter,
1042 sizeof(debug4k_filter));
1043 #endif /* MACH_ASSERT */
1044
1045 vm_map_zone = zone_create(VM_MAP_ZONE_NAME, sizeof(struct _vm_map),
1046 VM_MAP_ZFLAGS);
1047
1048 vm_map_entry_zone = zone_create(mez_name, sizeof(struct vm_map_entry),
1049 ZC_NOENCRYPT | ZC_NOGZALLOC | ZC_NOCALLOUT);
1050
1051 /*
1052 * Don't quarantine because we always need elements available
1053 * Disallow GC on this zone... to aid the GC.
1054 */
1055 vm_map_entry_reserved_zone = zone_create_ext(VME_RESERVED_ZONE_NAME,
1056 sizeof(struct vm_map_entry), VM_MAP_RESERVED_ZFLAGS,
1057 ZONE_ID_ANY, ^(zone_t z) {
1058 zone_set_noexpand(z, 64 * kentry_data_size);
1059 });
1060
1061 vm_map_copy_zone = zone_create_ext("VM map copies", sizeof(struct vm_map_copy),
1062 ZC_NOENCRYPT | ZC_CACHING, ZONE_ID_VM_MAP_COPY, NULL);
1063
1064 vm_map_holes_zone = zone_create(VM_MAP_HOLES_ZONE_NAME,
1065 sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS);
1066
1067 /*
1068 * Add the stolen memory to zones, adjust zone size and stolen counts.
1069 */
1070 zcram(vm_map_zone, map_data, map_data_size);
1071 zcram(vm_map_entry_reserved_zone, kentry_data, kentry_data_size);
1072 zcram(vm_map_holes_zone, map_holes_data, map_holes_data_size);
1073
1074 /*
1075 * Since these are covered by zones, remove them from stolen page accounting.
1076 */
1077 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
1078
1079 #if VM_MAP_DEBUG_APPLE_PROTECT
1080 PE_parse_boot_argn("vm_map_debug_apple_protect",
1081 &vm_map_debug_apple_protect,
1082 sizeof(vm_map_debug_apple_protect));
1083 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1084 #if VM_MAP_DEBUG_APPLE_FOURK
1085 PE_parse_boot_argn("vm_map_debug_fourk",
1086 &vm_map_debug_fourk,
1087 sizeof(vm_map_debug_fourk));
1088 #endif /* VM_MAP_DEBUG_FOURK */
1089 PE_parse_boot_argn("vm_map_executable_immutable",
1090 &vm_map_executable_immutable,
1091 sizeof(vm_map_executable_immutable));
1092 PE_parse_boot_argn("vm_map_executable_immutable_verbose",
1093 &vm_map_executable_immutable_verbose,
1094 sizeof(vm_map_executable_immutable_verbose));
1095
1096 PE_parse_boot_argn("malloc_no_cow",
1097 &malloc_no_cow,
1098 sizeof(malloc_no_cow));
1099 if (malloc_no_cow) {
1100 vm_memory_malloc_no_cow_mask = 0ULL;
1101 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1102 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
1103 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
1104 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1105 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1106 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1107 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1108 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1109 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1110 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1111 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1112 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1113 &vm_memory_malloc_no_cow_mask,
1114 sizeof(vm_memory_malloc_no_cow_mask));
1115 }
1116
1117 #if DEBUG
1118 PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity, sizeof(vm_check_map_sanity));
1119 if (vm_check_map_sanity) {
1120 kprintf("VM sanity checking enabled\n");
1121 } else {
1122 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1123 }
1124 #endif /* DEBUG */
1125
1126 #if DEVELOPMENT || DEBUG
1127 PE_parse_boot_argn("panic_on_unsigned_execute",
1128 &panic_on_unsigned_execute,
1129 sizeof(panic_on_unsigned_execute));
1130 #endif /* DEVELOPMENT || DEBUG */
1131 }
1132
1133 __startup_func
1134 static void
1135 vm_map_steal_memory(void)
1136 {
1137 uint16_t kentry_initial_pages;
1138
1139 map_data_size = zone_get_foreign_alloc_size(VM_MAP_ZONE_NAME,
1140 sizeof(struct _vm_map), VM_MAP_ZFLAGS, 1);
1141
1142 /*
1143 * kentry_initial_pages corresponds to the number of kernel map entries
1144 * required during bootstrap until the asynchronous replenishment
1145 * scheme is activated and/or entries are available from the general
1146 * map entry pool.
1147 */
1148 #if defined(__LP64__)
1149 kentry_initial_pages = 10;
1150 #else
1151 kentry_initial_pages = 6;
1152 #endif
1153
1154 #if CONFIG_GZALLOC
1155 /* If using the guard allocator, reserve more memory for the kernel
1156 * reserved map entry pool.
1157 */
1158 if (gzalloc_enabled()) {
1159 kentry_initial_pages *= 1024;
1160 }
1161 #endif
1162
1163 kentry_data_size = zone_get_foreign_alloc_size(VME_RESERVED_ZONE_NAME,
1164 sizeof(struct vm_map_entry), VM_MAP_RESERVED_ZFLAGS,
1165 kentry_initial_pages);
1166
1167 map_holes_data_size = zone_get_foreign_alloc_size(VM_MAP_HOLES_ZONE_NAME,
1168 sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS,
1169 kentry_initial_pages);
1170
1171 /*
1172 * Steal a contiguous range of memory so that a simple range check
1173 * can validate foreign addresses being freed/crammed to these
1174 * zones
1175 */
1176 vm_size_t total_size;
1177 if (os_add3_overflow(map_data_size, kentry_data_size,
1178 map_holes_data_size, &total_size)) {
1179 panic("vm_map_steal_memory: overflow in amount of memory requested");
1180 }
1181 map_data = zone_foreign_mem_init(total_size);
1182 kentry_data = map_data + map_data_size;
1183 map_holes_data = kentry_data + kentry_data_size;
1184 }
1185 STARTUP(PMAP_STEAL, STARTUP_RANK_FIRST, vm_map_steal_memory);
1186
1187 boolean_t vm_map_supports_hole_optimization = FALSE;
1188
1189 void
1190 vm_kernel_reserved_entry_init(void)
1191 {
1192 zone_prio_refill_configure(vm_map_entry_reserved_zone);
1193
1194 /*
1195 * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1196 */
1197 zone_prio_refill_configure(vm_map_holes_zone);
1198 vm_map_supports_hole_optimization = TRUE;
1199 }
1200
1201 void
1202 vm_map_disable_hole_optimization(vm_map_t map)
1203 {
1204 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
1205
1206 if (map->holelistenabled) {
1207 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1208
1209 while (hole_entry != NULL) {
1210 next_hole_entry = hole_entry->vme_next;
1211
1212 hole_entry->vme_next = NULL;
1213 hole_entry->vme_prev = NULL;
1214 zfree(vm_map_holes_zone, hole_entry);
1215
1216 if (next_hole_entry == head_entry) {
1217 hole_entry = NULL;
1218 } else {
1219 hole_entry = next_hole_entry;
1220 }
1221 }
1222
1223 map->holes_list = NULL;
1224 map->holelistenabled = FALSE;
1225
1226 map->first_free = vm_map_first_entry(map);
1227 SAVE_HINT_HOLE_WRITE(map, NULL);
1228 }
1229 }
1230
1231 boolean_t
1232 vm_kernel_map_is_kernel(vm_map_t map)
1233 {
1234 return map->pmap == kernel_pmap;
1235 }
1236
1237 /*
1238 * vm_map_create:
1239 *
1240 * Creates and returns a new empty VM map with
1241 * the given physical map structure, and having
1242 * the given lower and upper address bounds.
1243 */
1244
1245 vm_map_t
1246 vm_map_create(
1247 pmap_t pmap,
1248 vm_map_offset_t min,
1249 vm_map_offset_t max,
1250 boolean_t pageable)
1251 {
1252 int options;
1253
1254 options = 0;
1255 if (pageable) {
1256 options |= VM_MAP_CREATE_PAGEABLE;
1257 }
1258 return vm_map_create_options(pmap, min, max, options);
1259 }
1260
1261 vm_map_t
1262 vm_map_create_options(
1263 pmap_t pmap,
1264 vm_map_offset_t min,
1265 vm_map_offset_t max,
1266 int options)
1267 {
1268 vm_map_t result;
1269 struct vm_map_links *hole_entry = NULL;
1270
1271 if (options & ~(VM_MAP_CREATE_ALL_OPTIONS)) {
1272 /* unknown option */
1273 return VM_MAP_NULL;
1274 }
1275
1276 result = (vm_map_t) zalloc(vm_map_zone);
1277 if (result == VM_MAP_NULL) {
1278 panic("vm_map_create");
1279 }
1280
1281 vm_map_first_entry(result) = vm_map_to_entry(result);
1282 vm_map_last_entry(result) = vm_map_to_entry(result);
1283 result->hdr.nentries = 0;
1284 if (options & VM_MAP_CREATE_PAGEABLE) {
1285 result->hdr.entries_pageable = TRUE;
1286 } else {
1287 result->hdr.entries_pageable = FALSE;
1288 }
1289
1290 vm_map_store_init( &(result->hdr));
1291
1292 result->hdr.page_shift = PAGE_SHIFT;
1293
1294 result->size = 0;
1295 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
1296 result->user_wire_size = 0;
1297 #if XNU_TARGET_OS_OSX
1298 result->vmmap_high_start = 0;
1299 #endif
1300 os_ref_init_count(&result->map_refcnt, &map_refgrp, 1);
1301 #if TASK_SWAPPER
1302 result->res_count = 1;
1303 result->sw_state = MAP_SW_IN;
1304 #endif /* TASK_SWAPPER */
1305 result->pmap = pmap;
1306 result->min_offset = min;
1307 result->max_offset = max;
1308 result->wiring_required = FALSE;
1309 result->no_zero_fill = FALSE;
1310 result->mapped_in_other_pmaps = FALSE;
1311 result->wait_for_space = FALSE;
1312 result->switch_protect = FALSE;
1313 result->disable_vmentry_reuse = FALSE;
1314 result->map_disallow_data_exec = FALSE;
1315 result->is_nested_map = FALSE;
1316 result->map_disallow_new_exec = FALSE;
1317 result->terminated = FALSE;
1318 result->cs_enforcement = FALSE;
1319 result->highest_entry_end = 0;
1320 result->first_free = vm_map_to_entry(result);
1321 result->hint = vm_map_to_entry(result);
1322 result->jit_entry_exists = FALSE;
1323 result->is_alien = FALSE;
1324 result->reserved_regions = FALSE;
1325
1326 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1327 if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1328 result->has_corpse_footprint = TRUE;
1329 result->holelistenabled = FALSE;
1330 result->vmmap_corpse_footprint = NULL;
1331 } else {
1332 result->has_corpse_footprint = FALSE;
1333 if (vm_map_supports_hole_optimization) {
1334 hole_entry = zalloc(vm_map_holes_zone);
1335
1336 hole_entry->start = min;
1337 #if defined(__arm__) || defined(__arm64__)
1338 hole_entry->end = result->max_offset;
1339 #else
1340 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
1341 #endif
1342 result->holes_list = result->hole_hint = hole_entry;
1343 hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1344 result->holelistenabled = TRUE;
1345 } else {
1346 result->holelistenabled = FALSE;
1347 }
1348 }
1349
1350 vm_map_lock_init(result);
1351 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
1352
1353 return result;
1354 }
1355
1356 vm_map_size_t
1357 vm_map_adjusted_size(vm_map_t map)
1358 {
1359 struct vm_reserved_region *regions = NULL;
1360 size_t num_regions = 0;
1361 mach_vm_size_t reserved_size = 0, map_size = 0;
1362
1363 if (map == NULL || (map->size == 0)) {
1364 return 0;
1365 }
1366
1367 map_size = map->size;
1368
1369 if (map->reserved_regions == FALSE || !vm_map_is_exotic(map) || map->terminated) {
1370 /*
1371 * No special reserved regions or not an exotic map or the task
1372 * is terminating and these special regions might have already
1373 * been deallocated.
1374 */
1375 return map_size;
1376 }
1377
1378 num_regions = ml_get_vm_reserved_regions(vm_map_is_64bit(map), &regions);
1379 assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
1380
1381 while (num_regions) {
1382 reserved_size += regions[--num_regions].vmrr_size;
1383 }
1384
1385 /*
1386 * There are a few places where the map is being switched out due to
1387 * 'termination' without that bit being set (e.g. exec and corpse purging).
1388 * In those cases, we could have the map's regions being deallocated on
1389 * a core while some accounting process is trying to get the map's size.
1390 * So this assert can't be enabled till all those places are uniform in
1391 * their use of the 'map->terminated' bit.
1392 *
1393 * assert(map_size >= reserved_size);
1394 */
1395
1396 return (map_size >= reserved_size) ? (map_size - reserved_size) : map_size;
1397 }
1398
1399 /*
1400 * vm_map_entry_create: [ internal use only ]
1401 *
1402 * Allocates a VM map entry for insertion in the
1403 * given map (or map copy). No fields are filled.
1404 */
1405 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1406
1407 #define vm_map_copy_entry_create(copy, map_locked) \
1408 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1409 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1410
1411 static vm_map_entry_t
1412 _vm_map_entry_create(
1413 struct vm_map_header *map_header, boolean_t __unused map_locked)
1414 {
1415 zone_t zone;
1416 vm_map_entry_t entry;
1417
1418 zone = vm_map_entry_zone;
1419
1420 assert(map_header->entries_pageable ? !map_locked : TRUE);
1421
1422 if (map_header->entries_pageable) {
1423 entry = (vm_map_entry_t) zalloc(zone);
1424 } else {
1425 entry = (vm_map_entry_t) zalloc_noblock(zone);
1426
1427 if (entry == VM_MAP_ENTRY_NULL) {
1428 zone = vm_map_entry_reserved_zone;
1429 entry = (vm_map_entry_t) zalloc(zone);
1430 OSAddAtomic(1, &reserved_zalloc_count);
1431 } else {
1432 OSAddAtomic(1, &nonreserved_zalloc_count);
1433 }
1434 }
1435
1436 if (entry == VM_MAP_ENTRY_NULL) {
1437 panic("vm_map_entry_create");
1438 }
1439 *entry = vm_map_entry_template;
1440 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1441
1442 vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1443 #if MAP_ENTRY_CREATION_DEBUG
1444 entry->vme_creation_maphdr = map_header;
1445 backtrace(&entry->vme_creation_bt[0],
1446 (sizeof(entry->vme_creation_bt) / sizeof(uintptr_t)), NULL);
1447 #endif
1448 return entry;
1449 }
1450
1451 /*
1452 * vm_map_entry_dispose: [ internal use only ]
1453 *
1454 * Inverse of vm_map_entry_create.
1455 *
1456 * write map lock held so no need to
1457 * do anything special to insure correctness
1458 * of the stores
1459 */
1460 #define vm_map_entry_dispose(map, entry) \
1461 _vm_map_entry_dispose(&(map)->hdr, (entry))
1462
1463 #define vm_map_copy_entry_dispose(copy, entry) \
1464 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1465
1466 static void
1467 _vm_map_entry_dispose(
1468 struct vm_map_header *map_header,
1469 vm_map_entry_t entry)
1470 {
1471 zone_t zone;
1472
1473 if (map_header->entries_pageable || !(entry->from_reserved_zone)) {
1474 zone = vm_map_entry_zone;
1475 } else {
1476 zone = vm_map_entry_reserved_zone;
1477 }
1478
1479 if (!map_header->entries_pageable) {
1480 if (zone == vm_map_entry_zone) {
1481 OSAddAtomic(-1, &nonreserved_zalloc_count);
1482 } else {
1483 OSAddAtomic(-1, &reserved_zalloc_count);
1484 }
1485 }
1486
1487 zfree(zone, entry);
1488 }
1489
1490 #if MACH_ASSERT
1491 static boolean_t first_free_check = FALSE;
1492 boolean_t
1493 first_free_is_valid(
1494 vm_map_t map)
1495 {
1496 if (!first_free_check) {
1497 return TRUE;
1498 }
1499
1500 return first_free_is_valid_store( map );
1501 }
1502 #endif /* MACH_ASSERT */
1503
1504
1505 #define vm_map_copy_entry_link(copy, after_where, entry) \
1506 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1507
1508 #define vm_map_copy_entry_unlink(copy, entry) \
1509 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1510
1511 #if MACH_ASSERT && TASK_SWAPPER
1512 /*
1513 * vm_map_res_reference:
1514 *
1515 * Adds another valid residence count to the given map.
1516 *
1517 * Map is locked so this function can be called from
1518 * vm_map_swapin.
1519 *
1520 */
1521 void
1522 vm_map_res_reference(vm_map_t map)
1523 {
1524 /* assert map is locked */
1525 assert(map->res_count >= 0);
1526 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1527 if (map->res_count == 0) {
1528 lck_mtx_unlock(&map->s_lock);
1529 vm_map_lock(map);
1530 vm_map_swapin(map);
1531 lck_mtx_lock(&map->s_lock);
1532 ++map->res_count;
1533 vm_map_unlock(map);
1534 } else {
1535 ++map->res_count;
1536 }
1537 }
1538
1539 /*
1540 * vm_map_reference_swap:
1541 *
1542 * Adds valid reference and residence counts to the given map.
1543 *
1544 * The map may not be in memory (i.e. zero residence count).
1545 *
1546 */
1547 void
1548 vm_map_reference_swap(vm_map_t map)
1549 {
1550 assert(map != VM_MAP_NULL);
1551 lck_mtx_lock(&map->s_lock);
1552 assert(map->res_count >= 0);
1553 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1554 os_ref_retain_locked(&map->map_refcnt);
1555 vm_map_res_reference(map);
1556 lck_mtx_unlock(&map->s_lock);
1557 }
1558
1559 /*
1560 * vm_map_res_deallocate:
1561 *
1562 * Decrement residence count on a map; possibly causing swapout.
1563 *
1564 * The map must be in memory (i.e. non-zero residence count).
1565 *
1566 * The map is locked, so this function is callable from vm_map_deallocate.
1567 *
1568 */
1569 void
1570 vm_map_res_deallocate(vm_map_t map)
1571 {
1572 assert(map->res_count > 0);
1573 if (--map->res_count == 0) {
1574 lck_mtx_unlock(&map->s_lock);
1575 vm_map_lock(map);
1576 vm_map_swapout(map);
1577 vm_map_unlock(map);
1578 lck_mtx_lock(&map->s_lock);
1579 }
1580 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1581 }
1582 #endif /* MACH_ASSERT && TASK_SWAPPER */
1583
1584 /*
1585 * vm_map_destroy:
1586 *
1587 * Actually destroy a map.
1588 */
1589 void
1590 vm_map_destroy(
1591 vm_map_t map,
1592 int flags)
1593 {
1594 vm_map_lock(map);
1595
1596 /* final cleanup: no need to unnest shared region */
1597 flags |= VM_MAP_REMOVE_NO_UNNESTING;
1598 /* final cleanup: ok to remove immutable mappings */
1599 flags |= VM_MAP_REMOVE_IMMUTABLE;
1600 /* final cleanup: allow gaps in range */
1601 flags |= VM_MAP_REMOVE_GAPS_OK;
1602
1603 /* clean up regular map entries */
1604 (void) vm_map_delete(map, map->min_offset, map->max_offset,
1605 flags, VM_MAP_NULL);
1606 /* clean up leftover special mappings (commpage, GPU carveout, etc...) */
1607 #if !defined(__arm__)
1608 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1609 flags, VM_MAP_NULL);
1610 #endif /* !__arm__ */
1611
1612 vm_map_disable_hole_optimization(map);
1613 vm_map_corpse_footprint_destroy(map);
1614
1615 vm_map_unlock(map);
1616
1617 assert(map->hdr.nentries == 0);
1618
1619 if (map->pmap) {
1620 pmap_destroy(map->pmap);
1621 }
1622
1623 if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1624 /*
1625 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1626 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1627 * structure or kalloc'ed via lck_mtx_init.
1628 * An example is s_lock_ext within struct _vm_map.
1629 *
1630 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1631 * can add another tag to detect embedded vs alloc'ed indirect external
1632 * mutexes but that'll be additional checks in the lock path and require
1633 * updating dependencies for the old vs new tag.
1634 *
1635 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1636 * just when lock debugging is ON, we choose to forego explicitly destroying
1637 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1638 * count on vm_map_lck_grp, which has no serious side-effect.
1639 */
1640 } else {
1641 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1642 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1643 }
1644
1645 zfree(vm_map_zone, map);
1646 }
1647
1648 /*
1649 * Returns pid of the task with the largest number of VM map entries.
1650 * Used in the zone-map-exhaustion jetsam path.
1651 */
1652 pid_t
1653 find_largest_process_vm_map_entries(void)
1654 {
1655 pid_t victim_pid = -1;
1656 int max_vm_map_entries = 0;
1657 task_t task = TASK_NULL;
1658 queue_head_t *task_list = &tasks;
1659
1660 lck_mtx_lock(&tasks_threads_lock);
1661 queue_iterate(task_list, task, task_t, tasks) {
1662 if (task == kernel_task || !task->active) {
1663 continue;
1664 }
1665
1666 vm_map_t task_map = task->map;
1667 if (task_map != VM_MAP_NULL) {
1668 int task_vm_map_entries = task_map->hdr.nentries;
1669 if (task_vm_map_entries > max_vm_map_entries) {
1670 max_vm_map_entries = task_vm_map_entries;
1671 victim_pid = pid_from_task(task);
1672 }
1673 }
1674 }
1675 lck_mtx_unlock(&tasks_threads_lock);
1676
1677 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1678 return victim_pid;
1679 }
1680
1681 #if TASK_SWAPPER
1682 /*
1683 * vm_map_swapin/vm_map_swapout
1684 *
1685 * Swap a map in and out, either referencing or releasing its resources.
1686 * These functions are internal use only; however, they must be exported
1687 * because they may be called from macros, which are exported.
1688 *
1689 * In the case of swapout, there could be races on the residence count,
1690 * so if the residence count is up, we return, assuming that a
1691 * vm_map_deallocate() call in the near future will bring us back.
1692 *
1693 * Locking:
1694 * -- We use the map write lock for synchronization among races.
1695 * -- The map write lock, and not the simple s_lock, protects the
1696 * swap state of the map.
1697 * -- If a map entry is a share map, then we hold both locks, in
1698 * hierarchical order.
1699 *
1700 * Synchronization Notes:
1701 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1702 * will block on the map lock and proceed when swapout is through.
1703 * 2) A vm_map_reference() call at this time is illegal, and will
1704 * cause a panic. vm_map_reference() is only allowed on resident
1705 * maps, since it refuses to block.
1706 * 3) A vm_map_swapin() call during a swapin will block, and
1707 * proceeed when the first swapin is done, turning into a nop.
1708 * This is the reason the res_count is not incremented until
1709 * after the swapin is complete.
1710 * 4) There is a timing hole after the checks of the res_count, before
1711 * the map lock is taken, during which a swapin may get the lock
1712 * before a swapout about to happen. If this happens, the swapin
1713 * will detect the state and increment the reference count, causing
1714 * the swapout to be a nop, thereby delaying it until a later
1715 * vm_map_deallocate. If the swapout gets the lock first, then
1716 * the swapin will simply block until the swapout is done, and
1717 * then proceed.
1718 *
1719 * Because vm_map_swapin() is potentially an expensive operation, it
1720 * should be used with caution.
1721 *
1722 * Invariants:
1723 * 1) A map with a residence count of zero is either swapped, or
1724 * being swapped.
1725 * 2) A map with a non-zero residence count is either resident,
1726 * or being swapped in.
1727 */
1728
1729 int vm_map_swap_enable = 1;
1730
1731 void
1732 vm_map_swapin(vm_map_t map)
1733 {
1734 vm_map_entry_t entry;
1735
1736 if (!vm_map_swap_enable) { /* debug */
1737 return;
1738 }
1739
1740 /*
1741 * Map is locked
1742 * First deal with various races.
1743 */
1744 if (map->sw_state == MAP_SW_IN) {
1745 /*
1746 * we raced with swapout and won. Returning will incr.
1747 * the res_count, turning the swapout into a nop.
1748 */
1749 return;
1750 }
1751
1752 /*
1753 * The residence count must be zero. If we raced with another
1754 * swapin, the state would have been IN; if we raced with a
1755 * swapout (after another competing swapin), we must have lost
1756 * the race to get here (see above comment), in which case
1757 * res_count is still 0.
1758 */
1759 assert(map->res_count == 0);
1760
1761 /*
1762 * There are no intermediate states of a map going out or
1763 * coming in, since the map is locked during the transition.
1764 */
1765 assert(map->sw_state == MAP_SW_OUT);
1766
1767 /*
1768 * We now operate upon each map entry. If the entry is a sub-
1769 * or share-map, we call vm_map_res_reference upon it.
1770 * If the entry is an object, we call vm_object_res_reference
1771 * (this may iterate through the shadow chain).
1772 * Note that we hold the map locked the entire time,
1773 * even if we get back here via a recursive call in
1774 * vm_map_res_reference.
1775 */
1776 entry = vm_map_first_entry(map);
1777
1778 while (entry != vm_map_to_entry(map)) {
1779 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1780 if (entry->is_sub_map) {
1781 vm_map_t lmap = VME_SUBMAP(entry);
1782 lck_mtx_lock(&lmap->s_lock);
1783 vm_map_res_reference(lmap);
1784 lck_mtx_unlock(&lmap->s_lock);
1785 } else {
1786 vm_object_t object = VME_OBEJCT(entry);
1787 vm_object_lock(object);
1788 /*
1789 * This call may iterate through the
1790 * shadow chain.
1791 */
1792 vm_object_res_reference(object);
1793 vm_object_unlock(object);
1794 }
1795 }
1796 entry = entry->vme_next;
1797 }
1798 assert(map->sw_state == MAP_SW_OUT);
1799 map->sw_state = MAP_SW_IN;
1800 }
1801
1802 void
1803 vm_map_swapout(vm_map_t map)
1804 {
1805 vm_map_entry_t entry;
1806
1807 /*
1808 * Map is locked
1809 * First deal with various races.
1810 * If we raced with a swapin and lost, the residence count
1811 * will have been incremented to 1, and we simply return.
1812 */
1813 lck_mtx_lock(&map->s_lock);
1814 if (map->res_count != 0) {
1815 lck_mtx_unlock(&map->s_lock);
1816 return;
1817 }
1818 lck_mtx_unlock(&map->s_lock);
1819
1820 /*
1821 * There are no intermediate states of a map going out or
1822 * coming in, since the map is locked during the transition.
1823 */
1824 assert(map->sw_state == MAP_SW_IN);
1825
1826 if (!vm_map_swap_enable) {
1827 return;
1828 }
1829
1830 /*
1831 * We now operate upon each map entry. If the entry is a sub-
1832 * or share-map, we call vm_map_res_deallocate upon it.
1833 * If the entry is an object, we call vm_object_res_deallocate
1834 * (this may iterate through the shadow chain).
1835 * Note that we hold the map locked the entire time,
1836 * even if we get back here via a recursive call in
1837 * vm_map_res_deallocate.
1838 */
1839 entry = vm_map_first_entry(map);
1840
1841 while (entry != vm_map_to_entry(map)) {
1842 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1843 if (entry->is_sub_map) {
1844 vm_map_t lmap = VME_SUBMAP(entry);
1845 lck_mtx_lock(&lmap->s_lock);
1846 vm_map_res_deallocate(lmap);
1847 lck_mtx_unlock(&lmap->s_lock);
1848 } else {
1849 vm_object_t object = VME_OBJECT(entry);
1850 vm_object_lock(object);
1851 /*
1852 * This call may take a long time,
1853 * since it could actively push
1854 * out pages (if we implement it
1855 * that way).
1856 */
1857 vm_object_res_deallocate(object);
1858 vm_object_unlock(object);
1859 }
1860 }
1861 entry = entry->vme_next;
1862 }
1863 assert(map->sw_state == MAP_SW_IN);
1864 map->sw_state = MAP_SW_OUT;
1865 }
1866
1867 #endif /* TASK_SWAPPER */
1868
1869 /*
1870 * vm_map_lookup_entry: [ internal use only ]
1871 *
1872 * Calls into the vm map store layer to find the map
1873 * entry containing (or immediately preceding) the
1874 * specified address in the given map; the entry is returned
1875 * in the "entry" parameter. The boolean
1876 * result indicates whether the address is
1877 * actually contained in the map.
1878 */
1879 boolean_t
1880 vm_map_lookup_entry(
1881 vm_map_t map,
1882 vm_map_offset_t address,
1883 vm_map_entry_t *entry) /* OUT */
1884 {
1885 return vm_map_store_lookup_entry( map, address, entry );
1886 }
1887
1888 /*
1889 * Routine: vm_map_find_space
1890 * Purpose:
1891 * Allocate a range in the specified virtual address map,
1892 * returning the entry allocated for that range.
1893 * Used by kmem_alloc, etc.
1894 *
1895 * The map must be NOT be locked. It will be returned locked
1896 * on KERN_SUCCESS, unlocked on failure.
1897 *
1898 * If an entry is allocated, the object/offset fields
1899 * are initialized to zero.
1900 *
1901 * If VM_MAP_FIND_LAST_FREE flag is set, allocate from end of map. This
1902 * is currently only used for allocating memory for zones backing
1903 * one of the kalloc heaps.(rdar://65832263)
1904 */
1905 kern_return_t
1906 vm_map_find_space(
1907 vm_map_t map,
1908 vm_map_offset_t *address, /* OUT */
1909 vm_map_size_t size,
1910 vm_map_offset_t mask,
1911 int flags,
1912 vm_map_kernel_flags_t vmk_flags,
1913 vm_tag_t tag,
1914 vm_map_entry_t *o_entry) /* OUT */
1915 {
1916 vm_map_entry_t entry, new_entry, hole_entry;
1917 vm_map_offset_t start;
1918 vm_map_offset_t end;
1919
1920 if (size == 0) {
1921 *address = 0;
1922 return KERN_INVALID_ARGUMENT;
1923 }
1924
1925 new_entry = vm_map_entry_create(map, FALSE);
1926 vm_map_lock(map);
1927
1928 if (flags & VM_MAP_FIND_LAST_FREE) {
1929 assert(!map->disable_vmentry_reuse);
1930 /* TODO: Make backward lookup generic and support guard pages */
1931 assert(!vmk_flags.vmkf_guard_after && !vmk_flags.vmkf_guard_before);
1932 assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
1933
1934 /* Allocate space from end of map */
1935 vm_map_store_find_last_free(map, &entry);
1936
1937 if (!entry) {
1938 goto noSpace;
1939 }
1940
1941 if (entry == vm_map_to_entry(map)) {
1942 end = map->max_offset;
1943 } else {
1944 end = entry->vme_start;
1945 }
1946
1947 while (TRUE) {
1948 vm_map_entry_t prev;
1949
1950 start = end - size;
1951
1952 if ((start < map->min_offset) || end < start) {
1953 goto noSpace;
1954 }
1955
1956 prev = entry->vme_prev;
1957 entry = prev;
1958
1959 if (prev == vm_map_to_entry(map)) {
1960 break;
1961 }
1962
1963 if (prev->vme_end <= start) {
1964 break;
1965 }
1966
1967 /*
1968 * Didn't fit -- move to the next entry.
1969 */
1970
1971 end = entry->vme_start;
1972 }
1973 } else {
1974 if (vmk_flags.vmkf_guard_after) {
1975 /* account for the back guard page in the size */
1976 size += VM_MAP_PAGE_SIZE(map);
1977 }
1978
1979 /*
1980 * Look for the first possible address; if there's already
1981 * something at this address, we have to start after it.
1982 */
1983
1984 if (map->disable_vmentry_reuse == TRUE) {
1985 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1986 } else {
1987 if (map->holelistenabled) {
1988 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1989
1990 if (hole_entry == NULL) {
1991 /*
1992 * No more space in the map?
1993 */
1994 goto noSpace;
1995 }
1996
1997 entry = hole_entry;
1998 start = entry->vme_start;
1999 } else {
2000 assert(first_free_is_valid(map));
2001 if ((entry = map->first_free) == vm_map_to_entry(map)) {
2002 start = map->min_offset;
2003 } else {
2004 start = entry->vme_end;
2005 }
2006 }
2007 }
2008
2009 /*
2010 * In any case, the "entry" always precedes
2011 * the proposed new region throughout the loop:
2012 */
2013
2014 while (TRUE) {
2015 vm_map_entry_t next;
2016
2017 /*
2018 * Find the end of the proposed new region.
2019 * Be sure we didn't go beyond the end, or
2020 * wrap around the address.
2021 */
2022
2023 if (vmk_flags.vmkf_guard_before) {
2024 /* reserve space for the front guard page */
2025 start += VM_MAP_PAGE_SIZE(map);
2026 }
2027 end = ((start + mask) & ~mask);
2028
2029 if (end < start) {
2030 goto noSpace;
2031 }
2032 start = end;
2033 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
2034 end += size;
2035 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
2036
2037 if ((end > map->max_offset) || (end < start)) {
2038 goto noSpace;
2039 }
2040
2041 next = entry->vme_next;
2042
2043 if (map->holelistenabled) {
2044 if (entry->vme_end >= end) {
2045 break;
2046 }
2047 } else {
2048 /*
2049 * If there are no more entries, we must win.
2050 *
2051 * OR
2052 *
2053 * If there is another entry, it must be
2054 * after the end of the potential new region.
2055 */
2056
2057 if (next == vm_map_to_entry(map)) {
2058 break;
2059 }
2060
2061 if (next->vme_start >= end) {
2062 break;
2063 }
2064 }
2065
2066 /*
2067 * Didn't fit -- move to the next entry.
2068 */
2069
2070 entry = next;
2071
2072 if (map->holelistenabled) {
2073 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2074 /*
2075 * Wrapped around
2076 */
2077 goto noSpace;
2078 }
2079 start = entry->vme_start;
2080 } else {
2081 start = entry->vme_end;
2082 }
2083 }
2084
2085 if (vmk_flags.vmkf_guard_before) {
2086 /* go back for the front guard page */
2087 start -= VM_MAP_PAGE_SIZE(map);
2088 }
2089 }
2090
2091 if (map->holelistenabled) {
2092 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2093 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2094 }
2095 }
2096
2097 /*
2098 * At this point,
2099 * "start" and "end" should define the endpoints of the
2100 * available new range, and
2101 * "entry" should refer to the region before the new
2102 * range, and
2103 *
2104 * the map should be locked.
2105 */
2106
2107 *address = start;
2108
2109 assert(start < end);
2110 new_entry->vme_start = start;
2111 new_entry->vme_end = end;
2112 assert(page_aligned(new_entry->vme_start));
2113 assert(page_aligned(new_entry->vme_end));
2114 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
2115 VM_MAP_PAGE_MASK(map)));
2116 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
2117 VM_MAP_PAGE_MASK(map)));
2118
2119 new_entry->is_shared = FALSE;
2120 new_entry->is_sub_map = FALSE;
2121 new_entry->use_pmap = TRUE;
2122 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
2123 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
2124
2125 new_entry->needs_copy = FALSE;
2126
2127 new_entry->inheritance = VM_INHERIT_DEFAULT;
2128 new_entry->protection = VM_PROT_DEFAULT;
2129 new_entry->max_protection = VM_PROT_ALL;
2130 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
2131 new_entry->wired_count = 0;
2132 new_entry->user_wired_count = 0;
2133
2134 new_entry->in_transition = FALSE;
2135 new_entry->needs_wakeup = FALSE;
2136 new_entry->no_cache = FALSE;
2137 new_entry->permanent = FALSE;
2138 new_entry->superpage_size = FALSE;
2139 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
2140 new_entry->map_aligned = TRUE;
2141 } else {
2142 new_entry->map_aligned = FALSE;
2143 }
2144
2145 new_entry->used_for_jit = FALSE;
2146 new_entry->pmap_cs_associated = FALSE;
2147 new_entry->zero_wired_pages = FALSE;
2148 new_entry->iokit_acct = FALSE;
2149 new_entry->vme_resilient_codesign = FALSE;
2150 new_entry->vme_resilient_media = FALSE;
2151 if (vmk_flags.vmkf_atomic_entry) {
2152 new_entry->vme_atomic = TRUE;
2153 } else {
2154 new_entry->vme_atomic = FALSE;
2155 }
2156
2157 VME_ALIAS_SET(new_entry, tag);
2158
2159 /*
2160 * Insert the new entry into the list
2161 */
2162
2163 vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
2164
2165 map->size += size;
2166
2167 /*
2168 * Update the lookup hint
2169 */
2170 SAVE_HINT_MAP_WRITE(map, new_entry);
2171
2172 *o_entry = new_entry;
2173 return KERN_SUCCESS;
2174
2175 noSpace:
2176
2177 vm_map_entry_dispose(map, new_entry);
2178 vm_map_unlock(map);
2179 return KERN_NO_SPACE;
2180 }
2181
2182 int vm_map_pmap_enter_print = FALSE;
2183 int vm_map_pmap_enter_enable = FALSE;
2184
2185 /*
2186 * Routine: vm_map_pmap_enter [internal only]
2187 *
2188 * Description:
2189 * Force pages from the specified object to be entered into
2190 * the pmap at the specified address if they are present.
2191 * As soon as a page not found in the object the scan ends.
2192 *
2193 * Returns:
2194 * Nothing.
2195 *
2196 * In/out conditions:
2197 * The source map should not be locked on entry.
2198 */
2199 __unused static void
2200 vm_map_pmap_enter(
2201 vm_map_t map,
2202 vm_map_offset_t addr,
2203 vm_map_offset_t end_addr,
2204 vm_object_t object,
2205 vm_object_offset_t offset,
2206 vm_prot_t protection)
2207 {
2208 int type_of_fault;
2209 kern_return_t kr;
2210 struct vm_object_fault_info fault_info = {};
2211
2212 if (map->pmap == 0) {
2213 return;
2214 }
2215
2216 assert(VM_MAP_PAGE_SHIFT(map) == PAGE_SHIFT);
2217
2218 while (addr < end_addr) {
2219 vm_page_t m;
2220
2221
2222 /*
2223 * TODO:
2224 * From vm_map_enter(), we come into this function without the map
2225 * lock held or the object lock held.
2226 * We haven't taken a reference on the object either.
2227 * We should do a proper lookup on the map to make sure
2228 * that things are sane before we go locking objects that
2229 * could have been deallocated from under us.
2230 */
2231
2232 vm_object_lock(object);
2233
2234 m = vm_page_lookup(object, offset);
2235
2236 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
2237 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_absent))) {
2238 vm_object_unlock(object);
2239 return;
2240 }
2241
2242 if (vm_map_pmap_enter_print) {
2243 printf("vm_map_pmap_enter:");
2244 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2245 map, (unsigned long long)addr, object, (unsigned long long)offset);
2246 }
2247 type_of_fault = DBG_CACHE_HIT_FAULT;
2248 kr = vm_fault_enter(m, map->pmap,
2249 addr,
2250 PAGE_SIZE, 0,
2251 protection, protection,
2252 VM_PAGE_WIRED(m),
2253 FALSE, /* change_wiring */
2254 VM_KERN_MEMORY_NONE, /* tag - not wiring */
2255 &fault_info,
2256 NULL, /* need_retry */
2257 &type_of_fault);
2258
2259 vm_object_unlock(object);
2260
2261 offset += PAGE_SIZE_64;
2262 addr += PAGE_SIZE;
2263 }
2264 }
2265
2266 boolean_t vm_map_pmap_is_empty(
2267 vm_map_t map,
2268 vm_map_offset_t start,
2269 vm_map_offset_t end);
2270 boolean_t
2271 vm_map_pmap_is_empty(
2272 vm_map_t map,
2273 vm_map_offset_t start,
2274 vm_map_offset_t end)
2275 {
2276 #ifdef MACHINE_PMAP_IS_EMPTY
2277 return pmap_is_empty(map->pmap, start, end);
2278 #else /* MACHINE_PMAP_IS_EMPTY */
2279 vm_map_offset_t offset;
2280 ppnum_t phys_page;
2281
2282 if (map->pmap == NULL) {
2283 return TRUE;
2284 }
2285
2286 for (offset = start;
2287 offset < end;
2288 offset += PAGE_SIZE) {
2289 phys_page = pmap_find_phys(map->pmap, offset);
2290 if (phys_page) {
2291 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
2292 "page %d at 0x%llx\n",
2293 map, (long long)start, (long long)end,
2294 phys_page, (long long)offset);
2295 return FALSE;
2296 }
2297 }
2298 return TRUE;
2299 #endif /* MACHINE_PMAP_IS_EMPTY */
2300 }
2301
2302 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2303 kern_return_t
2304 vm_map_random_address_for_size(
2305 vm_map_t map,
2306 vm_map_offset_t *address,
2307 vm_map_size_t size)
2308 {
2309 kern_return_t kr = KERN_SUCCESS;
2310 int tries = 0;
2311 vm_map_offset_t random_addr = 0;
2312 vm_map_offset_t hole_end;
2313
2314 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
2315 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
2316 vm_map_size_t vm_hole_size = 0;
2317 vm_map_size_t addr_space_size;
2318
2319 addr_space_size = vm_map_max(map) - vm_map_min(map);
2320
2321 assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
2322
2323 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2324 random_addr = ((vm_map_offset_t)random()) << VM_MAP_PAGE_SHIFT(map);
2325 random_addr = vm_map_trunc_page(
2326 vm_map_min(map) + (random_addr % addr_space_size),
2327 VM_MAP_PAGE_MASK(map));
2328
2329 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2330 if (prev_entry == vm_map_to_entry(map)) {
2331 next_entry = vm_map_first_entry(map);
2332 } else {
2333 next_entry = prev_entry->vme_next;
2334 }
2335 if (next_entry == vm_map_to_entry(map)) {
2336 hole_end = vm_map_max(map);
2337 } else {
2338 hole_end = next_entry->vme_start;
2339 }
2340 vm_hole_size = hole_end - random_addr;
2341 if (vm_hole_size >= size) {
2342 *address = random_addr;
2343 break;
2344 }
2345 }
2346 tries++;
2347 }
2348
2349 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2350 kr = KERN_NO_SPACE;
2351 }
2352 return kr;
2353 }
2354
2355 static boolean_t
2356 vm_memory_malloc_no_cow(
2357 int alias)
2358 {
2359 uint64_t alias_mask;
2360
2361 if (alias > 63) {
2362 return FALSE;
2363 }
2364
2365 alias_mask = 1ULL << alias;
2366 if (alias_mask & vm_memory_malloc_no_cow_mask) {
2367 return TRUE;
2368 }
2369 return FALSE;
2370 }
2371
2372 /*
2373 * Routine: vm_map_enter
2374 *
2375 * Description:
2376 * Allocate a range in the specified virtual address map.
2377 * The resulting range will refer to memory defined by
2378 * the given memory object and offset into that object.
2379 *
2380 * Arguments are as defined in the vm_map call.
2381 */
2382 static unsigned int vm_map_enter_restore_successes = 0;
2383 static unsigned int vm_map_enter_restore_failures = 0;
2384 kern_return_t
2385 vm_map_enter(
2386 vm_map_t map,
2387 vm_map_offset_t *address, /* IN/OUT */
2388 vm_map_size_t size,
2389 vm_map_offset_t mask,
2390 int flags,
2391 vm_map_kernel_flags_t vmk_flags,
2392 vm_tag_t alias,
2393 vm_object_t object,
2394 vm_object_offset_t offset,
2395 boolean_t needs_copy,
2396 vm_prot_t cur_protection,
2397 vm_prot_t max_protection,
2398 vm_inherit_t inheritance)
2399 {
2400 vm_map_entry_t entry, new_entry;
2401 vm_map_offset_t start, tmp_start, tmp_offset;
2402 vm_map_offset_t end, tmp_end;
2403 vm_map_offset_t tmp2_start, tmp2_end;
2404 vm_map_offset_t desired_empty_end;
2405 vm_map_offset_t step;
2406 kern_return_t result = KERN_SUCCESS;
2407 vm_map_t zap_old_map = VM_MAP_NULL;
2408 vm_map_t zap_new_map = VM_MAP_NULL;
2409 boolean_t map_locked = FALSE;
2410 boolean_t pmap_empty = TRUE;
2411 boolean_t new_mapping_established = FALSE;
2412 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2413 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2414 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2415 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2416 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2417 boolean_t is_submap = vmk_flags.vmkf_submap;
2418 boolean_t permanent = vmk_flags.vmkf_permanent;
2419 boolean_t no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
2420 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
2421 boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
2422 boolean_t translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
2423 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2424 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2425 boolean_t random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2426 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2427 vm_tag_t user_alias;
2428 vm_map_offset_t effective_min_offset, effective_max_offset;
2429 kern_return_t kr;
2430 boolean_t clear_map_aligned = FALSE;
2431 vm_map_entry_t hole_entry;
2432 vm_map_size_t chunk_size = 0;
2433
2434 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2435
2436 if (flags & VM_FLAGS_4GB_CHUNK) {
2437 #if defined(__LP64__)
2438 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2439 #else /* __LP64__ */
2440 chunk_size = ANON_CHUNK_SIZE;
2441 #endif /* __LP64__ */
2442 } else {
2443 chunk_size = ANON_CHUNK_SIZE;
2444 }
2445
2446 if (superpage_size) {
2447 switch (superpage_size) {
2448 /*
2449 * Note that the current implementation only supports
2450 * a single size for superpages, SUPERPAGE_SIZE, per
2451 * architecture. As soon as more sizes are supposed
2452 * to be supported, SUPERPAGE_SIZE has to be replaced
2453 * with a lookup of the size depending on superpage_size.
2454 */
2455 #ifdef __x86_64__
2456 case SUPERPAGE_SIZE_ANY:
2457 /* handle it like 2 MB and round up to page size */
2458 size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2459 OS_FALLTHROUGH;
2460 case SUPERPAGE_SIZE_2MB:
2461 break;
2462 #endif
2463 default:
2464 return KERN_INVALID_ARGUMENT;
2465 }
2466 mask = SUPERPAGE_SIZE - 1;
2467 if (size & (SUPERPAGE_SIZE - 1)) {
2468 return KERN_INVALID_ARGUMENT;
2469 }
2470 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
2471 }
2472
2473
2474 if ((cur_protection & VM_PROT_WRITE) &&
2475 (cur_protection & VM_PROT_EXECUTE) &&
2476 #if XNU_TARGET_OS_OSX
2477 map->pmap != kernel_pmap &&
2478 (cs_process_global_enforcement() ||
2479 (vmk_flags.vmkf_cs_enforcement_override
2480 ? vmk_flags.vmkf_cs_enforcement
2481 : (vm_map_cs_enforcement(map)
2482 #if __arm64__
2483 || !VM_MAP_IS_EXOTIC(map)
2484 #endif /* __arm64__ */
2485 ))) &&
2486 #endif /* XNU_TARGET_OS_OSX */
2487 (VM_MAP_POLICY_WX_FAIL(map) ||
2488 VM_MAP_POLICY_WX_STRIP_X(map)) &&
2489 !entry_for_jit) {
2490 boolean_t vm_protect_wx_fail = VM_MAP_POLICY_WX_FAIL(map);
2491
2492 DTRACE_VM3(cs_wx,
2493 uint64_t, 0,
2494 uint64_t, 0,
2495 vm_prot_t, cur_protection);
2496 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. %s\n",
2497 proc_selfpid(),
2498 (current_task()->bsd_info
2499 ? proc_name_address(current_task()->bsd_info)
2500 : "?"),
2501 __FUNCTION__,
2502 (vm_protect_wx_fail ? "failing" : "turning off execute"));
2503 cur_protection &= ~VM_PROT_EXECUTE;
2504 if (vm_protect_wx_fail) {
2505 return KERN_PROTECTION_FAILURE;
2506 }
2507 }
2508
2509 /*
2510 * If the task has requested executable lockdown,
2511 * deny any new executable mapping.
2512 */
2513 if (map->map_disallow_new_exec == TRUE) {
2514 if (cur_protection & VM_PROT_EXECUTE) {
2515 return KERN_PROTECTION_FAILURE;
2516 }
2517 }
2518
2519 if (resilient_codesign) {
2520 assert(!is_submap);
2521 int reject_prot = (needs_copy ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
2522 if ((cur_protection | max_protection) & reject_prot) {
2523 return KERN_PROTECTION_FAILURE;
2524 }
2525 }
2526
2527 if (resilient_media) {
2528 assert(!is_submap);
2529 // assert(!needs_copy);
2530 if (object != VM_OBJECT_NULL &&
2531 !object->internal) {
2532 /*
2533 * This mapping is directly backed by an external
2534 * memory manager (e.g. a vnode pager for a file):
2535 * we would not have any safe place to inject
2536 * a zero-filled page if an actual page is not
2537 * available, without possibly impacting the actual
2538 * contents of the mapped object (e.g. the file),
2539 * so we can't provide any media resiliency here.
2540 */
2541 return KERN_INVALID_ARGUMENT;
2542 }
2543 }
2544
2545 if (is_submap) {
2546 if (purgable) {
2547 /* submaps can not be purgeable */
2548 return KERN_INVALID_ARGUMENT;
2549 }
2550 if (object == VM_OBJECT_NULL) {
2551 /* submaps can not be created lazily */
2552 return KERN_INVALID_ARGUMENT;
2553 }
2554 }
2555 if (vmk_flags.vmkf_already) {
2556 /*
2557 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2558 * is already present. For it to be meaningul, the requested
2559 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2560 * we shouldn't try and remove what was mapped there first
2561 * (!VM_FLAGS_OVERWRITE).
2562 */
2563 if ((flags & VM_FLAGS_ANYWHERE) ||
2564 (flags & VM_FLAGS_OVERWRITE)) {
2565 return KERN_INVALID_ARGUMENT;
2566 }
2567 }
2568
2569 effective_min_offset = map->min_offset;
2570
2571 if (vmk_flags.vmkf_beyond_max) {
2572 /*
2573 * Allow an insertion beyond the map's max offset.
2574 */
2575 #if !defined(__arm__)
2576 if (vm_map_is_64bit(map)) {
2577 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2578 } else
2579 #endif /* __arm__ */
2580 effective_max_offset = 0x00000000FFFFF000ULL;
2581 } else {
2582 #if XNU_TARGET_OS_OSX
2583 if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2584 effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2585 } else {
2586 effective_max_offset = map->max_offset;
2587 }
2588 #else /* XNU_TARGET_OS_OSX */
2589 effective_max_offset = map->max_offset;
2590 #endif /* XNU_TARGET_OS_OSX */
2591 }
2592
2593 if (size == 0 ||
2594 (offset & MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK_64)) != 0) {
2595 *address = 0;
2596 return KERN_INVALID_ARGUMENT;
2597 }
2598
2599 if (map->pmap == kernel_pmap) {
2600 user_alias = VM_KERN_MEMORY_NONE;
2601 } else {
2602 user_alias = alias;
2603 }
2604
2605 if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2606 chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2607 }
2608
2609 #define RETURN(value) { result = value; goto BailOut; }
2610
2611 assertf(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK), "0x%llx", (uint64_t)*address);
2612 assertf(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK), "0x%llx", (uint64_t)size);
2613 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
2614 assertf(page_aligned(*address), "0x%llx", (uint64_t)*address);
2615 assertf(page_aligned(size), "0x%llx", (uint64_t)size);
2616 }
2617
2618 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2619 !VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2620 /*
2621 * In most cases, the caller rounds the size up to the
2622 * map's page size.
2623 * If we get a size that is explicitly not map-aligned here,
2624 * we'll have to respect the caller's wish and mark the
2625 * mapping as "not map-aligned" to avoid tripping the
2626 * map alignment checks later.
2627 */
2628 clear_map_aligned = TRUE;
2629 }
2630 if (!anywhere &&
2631 VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2632 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2633 /*
2634 * We've been asked to map at a fixed address and that
2635 * address is not aligned to the map's specific alignment.
2636 * The caller should know what it's doing (i.e. most likely
2637 * mapping some fragmented copy map, transferring memory from
2638 * a VM map with a different alignment), so clear map_aligned
2639 * for this new VM map entry and proceed.
2640 */
2641 clear_map_aligned = TRUE;
2642 }
2643
2644 /*
2645 * Only zero-fill objects are allowed to be purgable.
2646 * LP64todo - limit purgable objects to 32-bits for now
2647 */
2648 if (purgable &&
2649 (offset != 0 ||
2650 (object != VM_OBJECT_NULL &&
2651 (object->vo_size != size ||
2652 object->purgable == VM_PURGABLE_DENY))
2653 || size > ANON_MAX_SIZE)) { /* LP64todo: remove when dp capable */
2654 return KERN_INVALID_ARGUMENT;
2655 }
2656
2657 if (!anywhere && overwrite) {
2658 /*
2659 * Create a temporary VM map to hold the old mappings in the
2660 * affected area while we create the new one.
2661 * This avoids releasing the VM map lock in
2662 * vm_map_entry_delete() and allows atomicity
2663 * when we want to replace some mappings with a new one.
2664 * It also allows us to restore the old VM mappings if the
2665 * new mapping fails.
2666 */
2667 zap_old_map = vm_map_create(PMAP_NULL,
2668 *address,
2669 *address + size,
2670 map->hdr.entries_pageable);
2671 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2672 vm_map_disable_hole_optimization(zap_old_map);
2673 }
2674
2675 StartAgain:;
2676
2677 start = *address;
2678
2679 if (anywhere) {
2680 vm_map_lock(map);
2681 map_locked = TRUE;
2682
2683 if (entry_for_jit) {
2684 if (map->jit_entry_exists &&
2685 !VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
2686 result = KERN_INVALID_ARGUMENT;
2687 goto BailOut;
2688 }
2689 if (VM_MAP_POLICY_ALLOW_JIT_RANDOM_ADDRESS(map)) {
2690 random_address = TRUE;
2691 }
2692 }
2693
2694 if (random_address) {
2695 /*
2696 * Get a random start address.
2697 */
2698 result = vm_map_random_address_for_size(map, address, size);
2699 if (result != KERN_SUCCESS) {
2700 goto BailOut;
2701 }
2702 start = *address;
2703 }
2704 #if XNU_TARGET_OS_OSX
2705 else if ((start == 0 || start == vm_map_min(map)) &&
2706 !map->disable_vmentry_reuse &&
2707 map->vmmap_high_start != 0) {
2708 start = map->vmmap_high_start;
2709 }
2710 #endif /* XNU_TARGET_OS_OSX */
2711
2712
2713 /*
2714 * Calculate the first possible address.
2715 */
2716
2717 if (start < effective_min_offset) {
2718 start = effective_min_offset;
2719 }
2720 if (start > effective_max_offset) {
2721 RETURN(KERN_NO_SPACE);
2722 }
2723
2724 /*
2725 * Look for the first possible address;
2726 * if there's already something at this
2727 * address, we have to start after it.
2728 */
2729
2730 if (map->disable_vmentry_reuse == TRUE) {
2731 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2732 } else {
2733 if (map->holelistenabled) {
2734 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
2735
2736 if (hole_entry == NULL) {
2737 /*
2738 * No more space in the map?
2739 */
2740 result = KERN_NO_SPACE;
2741 goto BailOut;
2742 } else {
2743 boolean_t found_hole = FALSE;
2744
2745 do {
2746 if (hole_entry->vme_start >= start) {
2747 start = hole_entry->vme_start;
2748 found_hole = TRUE;
2749 break;
2750 }
2751
2752 if (hole_entry->vme_end > start) {
2753 found_hole = TRUE;
2754 break;
2755 }
2756 hole_entry = hole_entry->vme_next;
2757 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
2758
2759 if (found_hole == FALSE) {
2760 result = KERN_NO_SPACE;
2761 goto BailOut;
2762 }
2763
2764 entry = hole_entry;
2765
2766 if (start == 0) {
2767 start += PAGE_SIZE_64;
2768 }
2769 }
2770 } else {
2771 assert(first_free_is_valid(map));
2772
2773 entry = map->first_free;
2774
2775 if (entry == vm_map_to_entry(map)) {
2776 entry = NULL;
2777 } else {
2778 if (entry->vme_next == vm_map_to_entry(map)) {
2779 /*
2780 * Hole at the end of the map.
2781 */
2782 entry = NULL;
2783 } else {
2784 if (start < (entry->vme_next)->vme_start) {
2785 start = entry->vme_end;
2786 start = vm_map_round_page(start,
2787 VM_MAP_PAGE_MASK(map));
2788 } else {
2789 /*
2790 * Need to do a lookup.
2791 */
2792 entry = NULL;
2793 }
2794 }
2795 }
2796
2797 if (entry == NULL) {
2798 vm_map_entry_t tmp_entry;
2799 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2800 assert(!entry_for_jit);
2801 start = tmp_entry->vme_end;
2802 start = vm_map_round_page(start,
2803 VM_MAP_PAGE_MASK(map));
2804 }
2805 entry = tmp_entry;
2806 }
2807 }
2808 }
2809
2810 /*
2811 * In any case, the "entry" always precedes
2812 * the proposed new region throughout the
2813 * loop:
2814 */
2815
2816 while (TRUE) {
2817 vm_map_entry_t next;
2818
2819 /*
2820 * Find the end of the proposed new region.
2821 * Be sure we didn't go beyond the end, or
2822 * wrap around the address.
2823 */
2824
2825 end = ((start + mask) & ~mask);
2826 end = vm_map_round_page(end,
2827 VM_MAP_PAGE_MASK(map));
2828 if (end < start) {
2829 RETURN(KERN_NO_SPACE);
2830 }
2831 start = end;
2832 assert(VM_MAP_PAGE_ALIGNED(start,
2833 VM_MAP_PAGE_MASK(map)));
2834 end += size;
2835
2836 /* We want an entire page of empty space, but don't increase the allocation size. */
2837 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2838
2839 if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
2840 if (map->wait_for_space) {
2841 assert(!keep_map_locked);
2842 if (size <= (effective_max_offset -
2843 effective_min_offset)) {
2844 assert_wait((event_t)map,
2845 THREAD_ABORTSAFE);
2846 vm_map_unlock(map);
2847 map_locked = FALSE;
2848 thread_block(THREAD_CONTINUE_NULL);
2849 goto StartAgain;
2850 }
2851 }
2852 RETURN(KERN_NO_SPACE);
2853 }
2854
2855 next = entry->vme_next;
2856
2857 if (map->holelistenabled) {
2858 if (entry->vme_end >= desired_empty_end) {
2859 break;
2860 }
2861 } else {
2862 /*
2863 * If there are no more entries, we must win.
2864 *
2865 * OR
2866 *
2867 * If there is another entry, it must be
2868 * after the end of the potential new region.
2869 */
2870
2871 if (next == vm_map_to_entry(map)) {
2872 break;
2873 }
2874
2875 if (next->vme_start >= desired_empty_end) {
2876 break;
2877 }
2878 }
2879
2880 /*
2881 * Didn't fit -- move to the next entry.
2882 */
2883
2884 entry = next;
2885
2886 if (map->holelistenabled) {
2887 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2888 /*
2889 * Wrapped around
2890 */
2891 result = KERN_NO_SPACE;
2892 goto BailOut;
2893 }
2894 start = entry->vme_start;
2895 } else {
2896 start = entry->vme_end;
2897 }
2898
2899 start = vm_map_round_page(start,
2900 VM_MAP_PAGE_MASK(map));
2901 }
2902
2903 if (map->holelistenabled) {
2904 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2905 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2906 }
2907 }
2908
2909 *address = start;
2910 assert(VM_MAP_PAGE_ALIGNED(*address,
2911 VM_MAP_PAGE_MASK(map)));
2912 } else {
2913 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT &&
2914 !overwrite &&
2915 user_alias == VM_MEMORY_REALLOC) {
2916 /*
2917 * Force realloc() to switch to a new allocation,
2918 * to prevent 4k-fragmented virtual ranges.
2919 */
2920 // DEBUG4K_ERROR("no realloc in place");
2921 return KERN_NO_SPACE;
2922 }
2923
2924 /*
2925 * Verify that:
2926 * the address doesn't itself violate
2927 * the mask requirement.
2928 */
2929
2930 vm_map_lock(map);
2931 map_locked = TRUE;
2932 if ((start & mask) != 0) {
2933 RETURN(KERN_NO_SPACE);
2934 }
2935
2936 /*
2937 * ... the address is within bounds
2938 */
2939
2940 end = start + size;
2941
2942 if ((start < effective_min_offset) ||
2943 (end > effective_max_offset) ||
2944 (start >= end)) {
2945 RETURN(KERN_INVALID_ADDRESS);
2946 }
2947
2948 if (overwrite && zap_old_map != VM_MAP_NULL) {
2949 int remove_flags;
2950 /*
2951 * Fixed mapping and "overwrite" flag: attempt to
2952 * remove all existing mappings in the specified
2953 * address range, saving them in our "zap_old_map".
2954 */
2955 remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2956 remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2957 if (vmk_flags.vmkf_overwrite_immutable) {
2958 /* we can overwrite immutable mappings */
2959 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2960 }
2961 (void) vm_map_delete(map, start, end,
2962 remove_flags,
2963 zap_old_map);
2964 }
2965
2966 /*
2967 * ... the starting address isn't allocated
2968 */
2969
2970 if (vm_map_lookup_entry(map, start, &entry)) {
2971 if (!(vmk_flags.vmkf_already)) {
2972 RETURN(KERN_NO_SPACE);
2973 }
2974 /*
2975 * Check if what's already there is what we want.
2976 */
2977 tmp_start = start;
2978 tmp_offset = offset;
2979 if (entry->vme_start < start) {
2980 tmp_start -= start - entry->vme_start;
2981 tmp_offset -= start - entry->vme_start;
2982 }
2983 for (; entry->vme_start < end;
2984 entry = entry->vme_next) {
2985 /*
2986 * Check if the mapping's attributes
2987 * match the existing map entry.
2988 */
2989 if (entry == vm_map_to_entry(map) ||
2990 entry->vme_start != tmp_start ||
2991 entry->is_sub_map != is_submap ||
2992 VME_OFFSET(entry) != tmp_offset ||
2993 entry->needs_copy != needs_copy ||
2994 entry->protection != cur_protection ||
2995 entry->max_protection != max_protection ||
2996 entry->inheritance != inheritance ||
2997 entry->iokit_acct != iokit_acct ||
2998 VME_ALIAS(entry) != alias) {
2999 /* not the same mapping ! */
3000 RETURN(KERN_NO_SPACE);
3001 }
3002 /*
3003 * Check if the same object is being mapped.
3004 */
3005 if (is_submap) {
3006 if (VME_SUBMAP(entry) !=
3007 (vm_map_t) object) {
3008 /* not the same submap */
3009 RETURN(KERN_NO_SPACE);
3010 }
3011 } else {
3012 if (VME_OBJECT(entry) != object) {
3013 /* not the same VM object... */
3014 vm_object_t obj2;
3015
3016 obj2 = VME_OBJECT(entry);
3017 if ((obj2 == VM_OBJECT_NULL ||
3018 obj2->internal) &&
3019 (object == VM_OBJECT_NULL ||
3020 object->internal)) {
3021 /*
3022 * ... but both are
3023 * anonymous memory,
3024 * so equivalent.
3025 */
3026 } else {
3027 RETURN(KERN_NO_SPACE);
3028 }
3029 }
3030 }
3031
3032 tmp_offset += entry->vme_end - entry->vme_start;
3033 tmp_start += entry->vme_end - entry->vme_start;
3034 if (entry->vme_end >= end) {
3035 /* reached the end of our mapping */
3036 break;
3037 }
3038 }
3039 /* it all matches: let's use what's already there ! */
3040 RETURN(KERN_MEMORY_PRESENT);
3041 }
3042
3043 /*
3044 * ... the next region doesn't overlap the
3045 * end point.
3046 */
3047
3048 if ((entry->vme_next != vm_map_to_entry(map)) &&
3049 (entry->vme_next->vme_start < end)) {
3050 RETURN(KERN_NO_SPACE);
3051 }
3052 }
3053
3054 /*
3055 * At this point,
3056 * "start" and "end" should define the endpoints of the
3057 * available new range, and
3058 * "entry" should refer to the region before the new
3059 * range, and
3060 *
3061 * the map should be locked.
3062 */
3063
3064 /*
3065 * See whether we can avoid creating a new entry (and object) by
3066 * extending one of our neighbors. [So far, we only attempt to
3067 * extend from below.] Note that we can never extend/join
3068 * purgable objects because they need to remain distinct
3069 * entities in order to implement their "volatile object"
3070 * semantics.
3071 */
3072
3073 if (purgable ||
3074 entry_for_jit ||
3075 vm_memory_malloc_no_cow(user_alias)) {
3076 if (object == VM_OBJECT_NULL) {
3077 object = vm_object_allocate(size);
3078 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
3079 object->true_share = FALSE;
3080 if (purgable) {
3081 task_t owner;
3082 object->purgable = VM_PURGABLE_NONVOLATILE;
3083 if (map->pmap == kernel_pmap) {
3084 /*
3085 * Purgeable mappings made in a kernel
3086 * map are "owned" by the kernel itself
3087 * rather than the current user task
3088 * because they're likely to be used by
3089 * more than this user task (see
3090 * execargs_purgeable_allocate(), for
3091 * example).
3092 */
3093 owner = kernel_task;
3094 } else {
3095 owner = current_task();
3096 }
3097 assert(object->vo_owner == NULL);
3098 assert(object->resident_page_count == 0);
3099 assert(object->wired_page_count == 0);
3100 vm_object_lock(object);
3101 vm_purgeable_nonvolatile_enqueue(object, owner);
3102 vm_object_unlock(object);
3103 }
3104 offset = (vm_object_offset_t)0;
3105 }
3106 } else if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
3107 /* no coalescing if address space uses sub-pages */
3108 } else if ((is_submap == FALSE) &&
3109 (object == VM_OBJECT_NULL) &&
3110 (entry != vm_map_to_entry(map)) &&
3111 (entry->vme_end == start) &&
3112 (!entry->is_shared) &&
3113 (!entry->is_sub_map) &&
3114 (!entry->in_transition) &&
3115 (!entry->needs_wakeup) &&
3116 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
3117 (entry->protection == cur_protection) &&
3118 (entry->max_protection == max_protection) &&
3119 (entry->inheritance == inheritance) &&
3120 ((user_alias == VM_MEMORY_REALLOC) ||
3121 (VME_ALIAS(entry) == alias)) &&
3122 (entry->no_cache == no_cache) &&
3123 (entry->permanent == permanent) &&
3124 /* no coalescing for immutable executable mappings */
3125 !((entry->protection & VM_PROT_EXECUTE) &&
3126 entry->permanent) &&
3127 (!entry->superpage_size && !superpage_size) &&
3128 /*
3129 * No coalescing if not map-aligned, to avoid propagating
3130 * that condition any further than needed:
3131 */
3132 (!entry->map_aligned || !clear_map_aligned) &&
3133 (!entry->zero_wired_pages) &&
3134 (!entry->used_for_jit && !entry_for_jit) &&
3135 (!entry->pmap_cs_associated) &&
3136 (entry->iokit_acct == iokit_acct) &&
3137 (!entry->vme_resilient_codesign) &&
3138 (!entry->vme_resilient_media) &&
3139 (!entry->vme_atomic) &&
3140 (entry->vme_no_copy_on_read == no_copy_on_read) &&
3141
3142 ((entry->vme_end - entry->vme_start) + size <=
3143 (user_alias == VM_MEMORY_REALLOC ?
3144 ANON_CHUNK_SIZE :
3145 NO_COALESCE_LIMIT)) &&
3146
3147 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
3148 if (vm_object_coalesce(VME_OBJECT(entry),
3149 VM_OBJECT_NULL,
3150 VME_OFFSET(entry),
3151 (vm_object_offset_t) 0,
3152 (vm_map_size_t)(entry->vme_end - entry->vme_start),
3153 (vm_map_size_t)(end - entry->vme_end))) {
3154 /*
3155 * Coalesced the two objects - can extend
3156 * the previous map entry to include the
3157 * new range.
3158 */
3159 map->size += (end - entry->vme_end);
3160 assert(entry->vme_start < end);
3161 assert(VM_MAP_PAGE_ALIGNED(end,
3162 VM_MAP_PAGE_MASK(map)));
3163 if (__improbable(vm_debug_events)) {
3164 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
3165 }
3166 entry->vme_end = end;
3167 if (map->holelistenabled) {
3168 vm_map_store_update_first_free(map, entry, TRUE);
3169 } else {
3170 vm_map_store_update_first_free(map, map->first_free, TRUE);
3171 }
3172 new_mapping_established = TRUE;
3173 RETURN(KERN_SUCCESS);
3174 }
3175 }
3176
3177 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
3178 new_entry = NULL;
3179
3180 for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
3181 tmp2_end = tmp2_start + step;
3182 /*
3183 * Create a new entry
3184 *
3185 * XXX FBDP
3186 * The reserved "page zero" in each process's address space can
3187 * be arbitrarily large. Splitting it into separate objects and
3188 * therefore different VM map entries serves no purpose and just
3189 * slows down operations on the VM map, so let's not split the
3190 * allocation into chunks if the max protection is NONE. That
3191 * memory should never be accessible, so it will never get to the
3192 * default pager.
3193 */
3194 tmp_start = tmp2_start;
3195 if (object == VM_OBJECT_NULL &&
3196 size > chunk_size &&
3197 max_protection != VM_PROT_NONE &&
3198 superpage_size == 0) {
3199 tmp_end = tmp_start + chunk_size;
3200 } else {
3201 tmp_end = tmp2_end;
3202 }
3203 do {
3204 new_entry = vm_map_entry_insert(map,
3205 entry, tmp_start, tmp_end,
3206 object, offset, needs_copy,
3207 FALSE, FALSE,
3208 cur_protection, max_protection,
3209 VM_BEHAVIOR_DEFAULT,
3210 (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3211 VM_INHERIT_NONE : inheritance),
3212 0,
3213 no_cache,
3214 permanent,
3215 no_copy_on_read,
3216 superpage_size,
3217 clear_map_aligned,
3218 is_submap,
3219 entry_for_jit,
3220 alias,
3221 translated_allow_execute);
3222
3223 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
3224
3225 if (resilient_codesign) {
3226 int reject_prot = (needs_copy ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
3227 if (!((cur_protection | max_protection) & reject_prot)) {
3228 new_entry->vme_resilient_codesign = TRUE;
3229 }
3230 }
3231
3232 if (resilient_media &&
3233 (object == VM_OBJECT_NULL ||
3234 object->internal)) {
3235 new_entry->vme_resilient_media = TRUE;
3236 }
3237
3238 assert(!new_entry->iokit_acct);
3239 if (!is_submap &&
3240 object != VM_OBJECT_NULL &&
3241 (object->purgable != VM_PURGABLE_DENY ||
3242 object->vo_ledger_tag)) {
3243 assert(new_entry->use_pmap);
3244 assert(!new_entry->iokit_acct);
3245 /*
3246 * Turn off pmap accounting since
3247 * purgeable (or tagged) objects have their
3248 * own ledgers.
3249 */
3250 new_entry->use_pmap = FALSE;
3251 } else if (!is_submap &&
3252 iokit_acct &&
3253 object != VM_OBJECT_NULL &&
3254 object->internal) {
3255 /* alternate accounting */
3256 assert(!new_entry->iokit_acct);
3257 assert(new_entry->use_pmap);
3258 new_entry->iokit_acct = TRUE;
3259 new_entry->use_pmap = FALSE;
3260 DTRACE_VM4(
3261 vm_map_iokit_mapped_region,
3262 vm_map_t, map,
3263 vm_map_offset_t, new_entry->vme_start,
3264 vm_map_offset_t, new_entry->vme_end,
3265 int, VME_ALIAS(new_entry));
3266 vm_map_iokit_mapped_region(
3267 map,
3268 (new_entry->vme_end -
3269 new_entry->vme_start));
3270 } else if (!is_submap) {
3271 assert(!new_entry->iokit_acct);
3272 assert(new_entry->use_pmap);
3273 }
3274
3275 if (is_submap) {
3276 vm_map_t submap;
3277 boolean_t submap_is_64bit;
3278 boolean_t use_pmap;
3279
3280 assert(new_entry->is_sub_map);
3281 assert(!new_entry->use_pmap);
3282 assert(!new_entry->iokit_acct);
3283 submap = (vm_map_t) object;
3284 submap_is_64bit = vm_map_is_64bit(submap);
3285 use_pmap = vmk_flags.vmkf_nested_pmap;
3286 #ifndef NO_NESTED_PMAP
3287 if (use_pmap && submap->pmap == NULL) {
3288 ledger_t ledger = map->pmap->ledger;
3289 /* we need a sub pmap to nest... */
3290 submap->pmap = pmap_create_options(ledger, 0,
3291 submap_is_64bit ? PMAP_CREATE_64BIT : 0);
3292 if (submap->pmap == NULL) {
3293 /* let's proceed without nesting... */
3294 }
3295 #if defined(__arm__) || defined(__arm64__)
3296 else {
3297 pmap_set_nested(submap->pmap);
3298 }
3299 #endif
3300 }
3301 if (use_pmap && submap->pmap != NULL) {
3302 if (VM_MAP_PAGE_SHIFT(map) != VM_MAP_PAGE_SHIFT(submap)) {
3303 DEBUG4K_ERROR("map %p (%d) submap %p (%d): incompatible page sizes\n", map, VM_MAP_PAGE_SHIFT(map), submap, VM_MAP_PAGE_SHIFT(submap));
3304 kr = KERN_FAILURE;
3305 } else {
3306 kr = pmap_nest(map->pmap,
3307 submap->pmap,
3308 tmp_start,
3309 tmp_end - tmp_start);
3310 }
3311 if (kr != KERN_SUCCESS) {
3312 printf("vm_map_enter: "
3313 "pmap_nest(0x%llx,0x%llx) "
3314 "error 0x%x\n",
3315 (long long)tmp_start,
3316 (long long)tmp_end,
3317 kr);
3318 } else {
3319 /* we're now nested ! */
3320 new_entry->use_pmap = TRUE;
3321 pmap_empty = FALSE;
3322 }
3323 }
3324 #endif /* NO_NESTED_PMAP */
3325 }
3326 entry = new_entry;
3327
3328 if (superpage_size) {
3329 vm_page_t pages, m;
3330 vm_object_t sp_object;
3331 vm_object_offset_t sp_offset;
3332
3333 VME_OFFSET_SET(entry, 0);
3334
3335 /* allocate one superpage */
3336 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
3337 if (kr != KERN_SUCCESS) {
3338 /* deallocate whole range... */
3339 new_mapping_established = TRUE;
3340 /* ... but only up to "tmp_end" */
3341 size -= end - tmp_end;
3342 RETURN(kr);
3343 }
3344
3345 /* create one vm_object per superpage */
3346 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3347 sp_object->phys_contiguous = TRUE;
3348 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3349 VME_OBJECT_SET(entry, sp_object);
3350 assert(entry->use_pmap);
3351
3352 /* enter the base pages into the object */
3353 vm_object_lock(sp_object);
3354 for (sp_offset = 0;
3355 sp_offset < SUPERPAGE_SIZE;
3356 sp_offset += PAGE_SIZE) {
3357 m = pages;
3358 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
3359 pages = NEXT_PAGE(m);
3360 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3361 vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
3362 }
3363 vm_object_unlock(sp_object);
3364 }
3365 } while (tmp_end != tmp2_end &&
3366 (tmp_start = tmp_end) &&
3367 (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3368 tmp_end + chunk_size : tmp2_end));
3369 }
3370
3371 new_mapping_established = TRUE;
3372
3373 BailOut:
3374 assert(map_locked == TRUE);
3375
3376 if (result == KERN_SUCCESS) {
3377 vm_prot_t pager_prot;
3378 memory_object_t pager;
3379
3380 #if DEBUG
3381 if (pmap_empty &&
3382 !(vmk_flags.vmkf_no_pmap_check)) {
3383 assert(vm_map_pmap_is_empty(map,
3384 *address,
3385 *address + size));
3386 }
3387 #endif /* DEBUG */
3388
3389 /*
3390 * For "named" VM objects, let the pager know that the
3391 * memory object is being mapped. Some pagers need to keep
3392 * track of this, to know when they can reclaim the memory
3393 * object, for example.
3394 * VM calls memory_object_map() for each mapping (specifying
3395 * the protection of each mapping) and calls
3396 * memory_object_last_unmap() when all the mappings are gone.
3397 */
3398 pager_prot = max_protection;
3399 if (needs_copy) {
3400 /*
3401 * Copy-On-Write mapping: won't modify
3402 * the memory object.
3403 */
3404 pager_prot &= ~VM_PROT_WRITE;
3405 }
3406 if (!is_submap &&
3407 object != VM_OBJECT_NULL &&
3408 object->named &&
3409 object->pager != MEMORY_OBJECT_NULL) {
3410 vm_object_lock(object);
3411 pager = object->pager;
3412 if (object->named &&
3413 pager != MEMORY_OBJECT_NULL) {
3414 assert(object->pager_ready);
3415 vm_object_mapping_wait(object, THREAD_UNINT);
3416 vm_object_mapping_begin(object);
3417 vm_object_unlock(object);
3418
3419 kr = memory_object_map(pager, pager_prot);
3420 assert(kr == KERN_SUCCESS);
3421
3422 vm_object_lock(object);
3423 vm_object_mapping_end(object);
3424 }
3425 vm_object_unlock(object);
3426 }
3427 }
3428
3429 assert(map_locked == TRUE);
3430
3431 if (!keep_map_locked) {
3432 vm_map_unlock(map);
3433 map_locked = FALSE;
3434 }
3435
3436 /*
3437 * We can't hold the map lock if we enter this block.
3438 */
3439
3440 if (result == KERN_SUCCESS) {
3441 /* Wire down the new entry if the user
3442 * requested all new map entries be wired.
3443 */
3444 if ((map->wiring_required) || (superpage_size)) {
3445 assert(!keep_map_locked);
3446 pmap_empty = FALSE; /* pmap won't be empty */
3447 kr = vm_map_wire_kernel(map, start, end,
3448 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3449 TRUE);
3450 result = kr;
3451 }
3452
3453 }
3454
3455 if (result != KERN_SUCCESS) {
3456 if (new_mapping_established) {
3457 /*
3458 * We have to get rid of the new mappings since we
3459 * won't make them available to the user.
3460 * Try and do that atomically, to minimize the risk
3461 * that someone else create new mappings that range.
3462 */
3463 zap_new_map = vm_map_create(PMAP_NULL,
3464 *address,
3465 *address + size,
3466 map->hdr.entries_pageable);
3467 vm_map_set_page_shift(zap_new_map,
3468 VM_MAP_PAGE_SHIFT(map));
3469 vm_map_disable_hole_optimization(zap_new_map);
3470
3471 if (!map_locked) {
3472 vm_map_lock(map);
3473 map_locked = TRUE;
3474 }
3475 (void) vm_map_delete(map, *address, *address + size,
3476 (VM_MAP_REMOVE_SAVE_ENTRIES |
3477 VM_MAP_REMOVE_NO_MAP_ALIGN),
3478 zap_new_map);
3479 }
3480 if (zap_old_map != VM_MAP_NULL &&
3481 zap_old_map->hdr.nentries != 0) {
3482 vm_map_entry_t entry1, entry2;
3483
3484 /*
3485 * The new mapping failed. Attempt to restore
3486 * the old mappings, saved in the "zap_old_map".
3487 */
3488 if (!map_locked) {
3489 vm_map_lock(map);
3490 map_locked = TRUE;
3491 }
3492
3493 /* first check if the coast is still clear */
3494 start = vm_map_first_entry(zap_old_map)->vme_start;
3495 end = vm_map_last_entry(zap_old_map)->vme_end;
3496 if (vm_map_lookup_entry(map, start, &entry1) ||
3497 vm_map_lookup_entry(map, end, &entry2) ||
3498 entry1 != entry2) {
3499 /*
3500 * Part of that range has already been
3501 * re-mapped: we can't restore the old
3502 * mappings...
3503 */
3504 vm_map_enter_restore_failures++;
3505 } else {
3506 /*
3507 * Transfer the saved map entries from
3508 * "zap_old_map" to the original "map",
3509 * inserting them all after "entry1".
3510 */
3511 for (entry2 = vm_map_first_entry(zap_old_map);
3512 entry2 != vm_map_to_entry(zap_old_map);
3513 entry2 = vm_map_first_entry(zap_old_map)) {
3514 vm_map_size_t entry_size;
3515
3516 entry_size = (entry2->vme_end -
3517 entry2->vme_start);
3518 vm_map_store_entry_unlink(zap_old_map,
3519 entry2);
3520 zap_old_map->size -= entry_size;
3521 vm_map_store_entry_link(map, entry1, entry2,
3522 VM_MAP_KERNEL_FLAGS_NONE);
3523 map->size += entry_size;
3524 entry1 = entry2;
3525 }
3526 if (map->wiring_required) {
3527 /*
3528 * XXX TODO: we should rewire the
3529 * old pages here...
3530 */
3531 }
3532 vm_map_enter_restore_successes++;
3533 }
3534 }
3535 }
3536
3537 /*
3538 * The caller is responsible for releasing the lock if it requested to
3539 * keep the map locked.
3540 */
3541 if (map_locked && !keep_map_locked) {
3542 vm_map_unlock(map);
3543 }
3544
3545 /*
3546 * Get rid of the "zap_maps" and all the map entries that
3547 * they may still contain.
3548 */
3549 if (zap_old_map != VM_MAP_NULL) {
3550 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3551 zap_old_map = VM_MAP_NULL;
3552 }
3553 if (zap_new_map != VM_MAP_NULL) {
3554 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3555 zap_new_map = VM_MAP_NULL;
3556 }
3557
3558 return result;
3559
3560 #undef RETURN
3561 }
3562
3563 #if __arm64__
3564 extern const struct memory_object_pager_ops fourk_pager_ops;
3565 kern_return_t
3566 vm_map_enter_fourk(
3567 vm_map_t map,
3568 vm_map_offset_t *address, /* IN/OUT */
3569 vm_map_size_t size,
3570 vm_map_offset_t mask,
3571 int flags,
3572 vm_map_kernel_flags_t vmk_flags,
3573 vm_tag_t alias,
3574 vm_object_t object,
3575 vm_object_offset_t offset,
3576 boolean_t needs_copy,
3577 vm_prot_t cur_protection,
3578 vm_prot_t max_protection,
3579 vm_inherit_t inheritance)
3580 {
3581 vm_map_entry_t entry, new_entry;
3582 vm_map_offset_t start, fourk_start;
3583 vm_map_offset_t end, fourk_end;
3584 vm_map_size_t fourk_size;
3585 kern_return_t result = KERN_SUCCESS;
3586 vm_map_t zap_old_map = VM_MAP_NULL;
3587 vm_map_t zap_new_map = VM_MAP_NULL;
3588 boolean_t map_locked = FALSE;
3589 boolean_t pmap_empty = TRUE;
3590 boolean_t new_mapping_established = FALSE;
3591 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3592 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3593 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3594 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3595 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3596 boolean_t is_submap = vmk_flags.vmkf_submap;
3597 boolean_t permanent = vmk_flags.vmkf_permanent;
3598 boolean_t no_copy_on_read = vmk_flags.vmkf_permanent;
3599 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
3600 // boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3601 boolean_t translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
3602 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3603 vm_map_offset_t effective_min_offset, effective_max_offset;
3604 kern_return_t kr;
3605 boolean_t clear_map_aligned = FALSE;
3606 memory_object_t fourk_mem_obj;
3607 vm_object_t fourk_object;
3608 vm_map_offset_t fourk_pager_offset;
3609 int fourk_pager_index_start, fourk_pager_index_num;
3610 int cur_idx;
3611 boolean_t fourk_copy;
3612 vm_object_t copy_object;
3613 vm_object_offset_t copy_offset;
3614
3615 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
3616 panic("%s:%d\n", __FUNCTION__, __LINE__);
3617 }
3618 fourk_mem_obj = MEMORY_OBJECT_NULL;
3619 fourk_object = VM_OBJECT_NULL;
3620
3621 if (superpage_size) {
3622 return KERN_NOT_SUPPORTED;
3623 }
3624
3625 if ((cur_protection & VM_PROT_WRITE) &&
3626 (cur_protection & VM_PROT_EXECUTE) &&
3627 #if XNU_TARGET_OS_OSX
3628 map->pmap != kernel_pmap &&
3629 (vm_map_cs_enforcement(map)
3630 #if __arm64__
3631 || !VM_MAP_IS_EXOTIC(map)
3632 #endif /* __arm64__ */
3633 ) &&
3634 #endif /* XNU_TARGET_OS_OSX */
3635 !entry_for_jit) {
3636 DTRACE_VM3(cs_wx,
3637 uint64_t, 0,
3638 uint64_t, 0,
3639 vm_prot_t, cur_protection);
3640 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3641 "turning off execute\n",
3642 proc_selfpid(),
3643 (current_task()->bsd_info
3644 ? proc_name_address(current_task()->bsd_info)
3645 : "?"),
3646 __FUNCTION__);
3647 cur_protection &= ~VM_PROT_EXECUTE;
3648 }
3649
3650 /*
3651 * If the task has requested executable lockdown,
3652 * deny any new executable mapping.
3653 */
3654 if (map->map_disallow_new_exec == TRUE) {
3655 if (cur_protection & VM_PROT_EXECUTE) {
3656 return KERN_PROTECTION_FAILURE;
3657 }
3658 }
3659
3660 if (is_submap) {
3661 return KERN_NOT_SUPPORTED;
3662 }
3663 if (vmk_flags.vmkf_already) {
3664 return KERN_NOT_SUPPORTED;
3665 }
3666 if (purgable || entry_for_jit) {
3667 return KERN_NOT_SUPPORTED;
3668 }
3669
3670 effective_min_offset = map->min_offset;
3671
3672 if (vmk_flags.vmkf_beyond_max) {
3673 return KERN_NOT_SUPPORTED;
3674 } else {
3675 effective_max_offset = map->max_offset;
3676 }
3677
3678 if (size == 0 ||
3679 (offset & FOURK_PAGE_MASK) != 0) {
3680 *address = 0;
3681 return KERN_INVALID_ARGUMENT;
3682 }
3683
3684 #define RETURN(value) { result = value; goto BailOut; }
3685
3686 assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3687 assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3688
3689 if (!anywhere && overwrite) {
3690 return KERN_NOT_SUPPORTED;
3691 }
3692 if (!anywhere && overwrite) {
3693 /*
3694 * Create a temporary VM map to hold the old mappings in the
3695 * affected area while we create the new one.
3696 * This avoids releasing the VM map lock in
3697 * vm_map_entry_delete() and allows atomicity
3698 * when we want to replace some mappings with a new one.
3699 * It also allows us to restore the old VM mappings if the
3700 * new mapping fails.
3701 */
3702 zap_old_map = vm_map_create(PMAP_NULL,
3703 *address,
3704 *address + size,
3705 map->hdr.entries_pageable);
3706 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3707 vm_map_disable_hole_optimization(zap_old_map);
3708 }
3709
3710 fourk_start = *address;
3711 fourk_size = size;
3712 fourk_end = fourk_start + fourk_size;
3713
3714 start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3715 end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3716 size = end - start;
3717
3718 if (anywhere) {
3719 return KERN_NOT_SUPPORTED;
3720 } else {
3721 /*
3722 * Verify that:
3723 * the address doesn't itself violate
3724 * the mask requirement.
3725 */
3726
3727 vm_map_lock(map);
3728 map_locked = TRUE;
3729 if ((start & mask) != 0) {
3730 RETURN(KERN_NO_SPACE);
3731 }
3732
3733 /*
3734 * ... the address is within bounds
3735 */
3736
3737 end = start + size;
3738
3739 if ((start < effective_min_offset) ||
3740 (end > effective_max_offset) ||
3741 (start >= end)) {
3742 RETURN(KERN_INVALID_ADDRESS);
3743 }
3744
3745 if (overwrite && zap_old_map != VM_MAP_NULL) {
3746 /*
3747 * Fixed mapping and "overwrite" flag: attempt to
3748 * remove all existing mappings in the specified
3749 * address range, saving them in our "zap_old_map".
3750 */
3751 (void) vm_map_delete(map, start, end,
3752 (VM_MAP_REMOVE_SAVE_ENTRIES |
3753 VM_MAP_REMOVE_NO_MAP_ALIGN),
3754 zap_old_map);
3755 }
3756
3757 /*
3758 * ... the starting address isn't allocated
3759 */
3760 if (vm_map_lookup_entry(map, start, &entry)) {
3761 vm_object_t cur_object, shadow_object;
3762
3763 /*
3764 * We might already some 4K mappings
3765 * in a 16K page here.
3766 */
3767
3768 if (entry->vme_end - entry->vme_start
3769 != SIXTEENK_PAGE_SIZE) {
3770 RETURN(KERN_NO_SPACE);
3771 }
3772 if (entry->is_sub_map) {
3773 RETURN(KERN_NO_SPACE);
3774 }
3775 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3776 RETURN(KERN_NO_SPACE);
3777 }
3778
3779 /* go all the way down the shadow chain */
3780 cur_object = VME_OBJECT(entry);
3781 vm_object_lock(cur_object);
3782 while (cur_object->shadow != VM_OBJECT_NULL) {
3783 shadow_object = cur_object->shadow;
3784 vm_object_lock(shadow_object);
3785 vm_object_unlock(cur_object);
3786 cur_object = shadow_object;
3787 shadow_object = VM_OBJECT_NULL;
3788 }
3789 if (cur_object->internal ||
3790 cur_object->pager == NULL) {
3791 vm_object_unlock(cur_object);
3792 RETURN(KERN_NO_SPACE);
3793 }
3794 if (cur_object->pager->mo_pager_ops
3795 != &fourk_pager_ops) {
3796 vm_object_unlock(cur_object);
3797 RETURN(KERN_NO_SPACE);
3798 }
3799 fourk_object = cur_object;
3800 fourk_mem_obj = fourk_object->pager;
3801
3802 /* keep the "4K" object alive */
3803 vm_object_reference_locked(fourk_object);
3804 memory_object_reference(fourk_mem_obj);
3805 vm_object_unlock(fourk_object);
3806
3807 /* merge permissions */
3808 entry->protection |= cur_protection;
3809 entry->max_protection |= max_protection;
3810 if ((entry->protection & (VM_PROT_WRITE |
3811 VM_PROT_EXECUTE)) ==
3812 (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3813 fourk_binary_compatibility_unsafe &&
3814 fourk_binary_compatibility_allow_wx) {
3815 /* write+execute: need to be "jit" */
3816 entry->used_for_jit = TRUE;
3817 }
3818 goto map_in_fourk_pager;
3819 }
3820
3821 /*
3822 * ... the next region doesn't overlap the
3823 * end point.
3824 */
3825
3826 if ((entry->vme_next != vm_map_to_entry(map)) &&
3827 (entry->vme_next->vme_start < end)) {
3828 RETURN(KERN_NO_SPACE);
3829 }
3830 }
3831
3832 /*
3833 * At this point,
3834 * "start" and "end" should define the endpoints of the
3835 * available new range, and
3836 * "entry" should refer to the region before the new
3837 * range, and
3838 *
3839 * the map should be locked.
3840 */
3841
3842 /* create a new "4K" pager */
3843 fourk_mem_obj = fourk_pager_create();
3844 fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3845 assert(fourk_object);
3846
3847 /* keep the "4" object alive */
3848 vm_object_reference(fourk_object);
3849
3850 /* create a "copy" object, to map the "4K" object copy-on-write */
3851 fourk_copy = TRUE;
3852 result = vm_object_copy_strategically(fourk_object,
3853 0,
3854 end - start,
3855 &copy_object,
3856 &copy_offset,
3857 &fourk_copy);
3858 assert(result == KERN_SUCCESS);
3859 assert(copy_object != VM_OBJECT_NULL);
3860 assert(copy_offset == 0);
3861
3862 /* map the "4K" pager's copy object */
3863 new_entry =
3864 vm_map_entry_insert(map, entry,
3865 vm_map_trunc_page(start,
3866 VM_MAP_PAGE_MASK(map)),
3867 vm_map_round_page(end,
3868 VM_MAP_PAGE_MASK(map)),
3869 copy_object,
3870 0, /* offset */
3871 FALSE, /* needs_copy */
3872 FALSE,
3873 FALSE,
3874 cur_protection, max_protection,
3875 VM_BEHAVIOR_DEFAULT,
3876 (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3877 VM_INHERIT_NONE : inheritance),
3878 0,
3879 no_cache,
3880 permanent,
3881 no_copy_on_read,
3882 superpage_size,
3883 clear_map_aligned,
3884 is_submap,
3885 FALSE, /* jit */
3886 alias,
3887 translated_allow_execute);
3888 entry = new_entry;
3889
3890 #if VM_MAP_DEBUG_FOURK
3891 if (vm_map_debug_fourk) {
3892 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3893 map,
3894 (uint64_t) entry->vme_start,
3895 (uint64_t) entry->vme_end,
3896 fourk_mem_obj);
3897 }
3898 #endif /* VM_MAP_DEBUG_FOURK */
3899
3900 new_mapping_established = TRUE;
3901
3902 map_in_fourk_pager:
3903 /* "map" the original "object" where it belongs in the "4K" pager */
3904 fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3905 fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3906 if (fourk_size > SIXTEENK_PAGE_SIZE) {
3907 fourk_pager_index_num = 4;
3908 } else {
3909 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3910 }
3911 if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3912 fourk_pager_index_num = 4 - fourk_pager_index_start;
3913 }
3914 for (cur_idx = 0;
3915 cur_idx < fourk_pager_index_num;
3916 cur_idx++) {
3917 vm_object_t old_object;
3918 vm_object_offset_t old_offset;
3919
3920 kr = fourk_pager_populate(fourk_mem_obj,
3921 TRUE, /* overwrite */
3922 fourk_pager_index_start + cur_idx,
3923 object,
3924 (object
3925 ? (offset +
3926 (cur_idx * FOURK_PAGE_SIZE))
3927 : 0),
3928 &old_object,
3929 &old_offset);
3930 #if VM_MAP_DEBUG_FOURK
3931 if (vm_map_debug_fourk) {
3932 if (old_object == (vm_object_t) -1 &&
3933 old_offset == (vm_object_offset_t) -1) {
3934 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3935 "pager [%p:0x%llx] "
3936 "populate[%d] "
3937 "[object:%p,offset:0x%llx]\n",
3938 map,
3939 (uint64_t) entry->vme_start,
3940 (uint64_t) entry->vme_end,
3941 fourk_mem_obj,
3942 VME_OFFSET(entry),
3943 fourk_pager_index_start + cur_idx,
3944 object,
3945 (object
3946 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3947 : 0));
3948 } else {
3949 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3950 "pager [%p:0x%llx] "
3951 "populate[%d] [object:%p,offset:0x%llx] "
3952 "old [%p:0x%llx]\n",
3953 map,
3954 (uint64_t) entry->vme_start,
3955 (uint64_t) entry->vme_end,
3956 fourk_mem_obj,
3957 VME_OFFSET(entry),
3958 fourk_pager_index_start + cur_idx,
3959 object,
3960 (object
3961 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3962 : 0),
3963 old_object,
3964 old_offset);
3965 }
3966 }
3967 #endif /* VM_MAP_DEBUG_FOURK */
3968
3969 assert(kr == KERN_SUCCESS);
3970 if (object != old_object &&
3971 object != VM_OBJECT_NULL &&
3972 object != (vm_object_t) -1) {
3973 vm_object_reference(object);
3974 }
3975 if (object != old_object &&
3976 old_object != VM_OBJECT_NULL &&
3977 old_object != (vm_object_t) -1) {
3978 vm_object_deallocate(old_object);
3979 }
3980 }
3981
3982 BailOut:
3983 assert(map_locked == TRUE);
3984
3985 if (result == KERN_SUCCESS) {
3986 vm_prot_t pager_prot;
3987 memory_object_t pager;
3988
3989 #if DEBUG
3990 if (pmap_empty &&
3991 !(vmk_flags.vmkf_no_pmap_check)) {
3992 assert(vm_map_pmap_is_empty(map,
3993 *address,
3994 *address + size));
3995 }
3996 #endif /* DEBUG */
3997
3998 /*
3999 * For "named" VM objects, let the pager know that the
4000 * memory object is being mapped. Some pagers need to keep
4001 * track of this, to know when they can reclaim the memory
4002 * object, for example.
4003 * VM calls memory_object_map() for each mapping (specifying
4004 * the protection of each mapping) and calls
4005 * memory_object_last_unmap() when all the mappings are gone.
4006 */
4007 pager_prot = max_protection;
4008 if (needs_copy) {
4009 /*
4010 * Copy-On-Write mapping: won't modify
4011 * the memory object.
4012 */
4013 pager_prot &= ~VM_PROT_WRITE;
4014 }
4015 if (!is_submap &&
4016 object != VM_OBJECT_NULL &&
4017 object->named &&
4018 object->pager != MEMORY_OBJECT_NULL) {
4019 vm_object_lock(object);
4020 pager = object->pager;
4021 if (object->named &&
4022 pager != MEMORY_OBJECT_NULL) {
4023 assert(object->pager_ready);
4024 vm_object_mapping_wait(object, THREAD_UNINT);
4025 vm_object_mapping_begin(object);
4026 vm_object_unlock(object);
4027
4028 kr = memory_object_map(pager, pager_prot);
4029 assert(kr == KERN_SUCCESS);
4030
4031 vm_object_lock(object);
4032 vm_object_mapping_end(object);
4033 }
4034 vm_object_unlock(object);
4035 }
4036 if (!is_submap &&
4037 fourk_object != VM_OBJECT_NULL &&
4038 fourk_object->named &&
4039 fourk_object->pager != MEMORY_OBJECT_NULL) {
4040 vm_object_lock(fourk_object);
4041 pager = fourk_object->pager;
4042 if (fourk_object->named &&
4043 pager != MEMORY_OBJECT_NULL) {
4044 assert(fourk_object->pager_ready);
4045 vm_object_mapping_wait(fourk_object,
4046 THREAD_UNINT);
4047 vm_object_mapping_begin(fourk_object);
4048 vm_object_unlock(fourk_object);
4049
4050 kr = memory_object_map(pager, VM_PROT_READ);
4051 assert(kr == KERN_SUCCESS);
4052
4053 vm_object_lock(fourk_object);
4054 vm_object_mapping_end(fourk_object);
4055 }
4056 vm_object_unlock(fourk_object);
4057 }
4058 }
4059
4060 if (fourk_object != VM_OBJECT_NULL) {
4061 vm_object_deallocate(fourk_object);
4062 fourk_object = VM_OBJECT_NULL;
4063 memory_object_deallocate(fourk_mem_obj);
4064 fourk_mem_obj = MEMORY_OBJECT_NULL;
4065 }
4066
4067 assert(map_locked == TRUE);
4068
4069 if (!keep_map_locked) {
4070 vm_map_unlock(map);
4071 map_locked = FALSE;
4072 }
4073
4074 /*
4075 * We can't hold the map lock if we enter this block.
4076 */
4077
4078 if (result == KERN_SUCCESS) {
4079 /* Wire down the new entry if the user
4080 * requested all new map entries be wired.
4081 */
4082 if ((map->wiring_required) || (superpage_size)) {
4083 assert(!keep_map_locked);
4084 pmap_empty = FALSE; /* pmap won't be empty */
4085 kr = vm_map_wire_kernel(map, start, end,
4086 new_entry->protection, VM_KERN_MEMORY_MLOCK,
4087 TRUE);
4088 result = kr;
4089 }
4090
4091 }
4092
4093 if (result != KERN_SUCCESS) {
4094 if (new_mapping_established) {
4095 /*
4096 * We have to get rid of the new mappings since we
4097 * won't make them available to the user.
4098 * Try and do that atomically, to minimize the risk
4099 * that someone else create new mappings that range.
4100 */
4101 zap_new_map = vm_map_create(PMAP_NULL,
4102 *address,
4103 *address + size,
4104 map->hdr.entries_pageable);
4105 vm_map_set_page_shift(zap_new_map,
4106 VM_MAP_PAGE_SHIFT(map));
4107 vm_map_disable_hole_optimization(zap_new_map);
4108
4109 if (!map_locked) {
4110 vm_map_lock(map);
4111 map_locked = TRUE;
4112 }
4113 (void) vm_map_delete(map, *address, *address + size,
4114 (VM_MAP_REMOVE_SAVE_ENTRIES |
4115 VM_MAP_REMOVE_NO_MAP_ALIGN),
4116 zap_new_map);
4117 }
4118 if (zap_old_map != VM_MAP_NULL &&
4119 zap_old_map->hdr.nentries != 0) {
4120 vm_map_entry_t entry1, entry2;
4121
4122 /*
4123 * The new mapping failed. Attempt to restore
4124 * the old mappings, saved in the "zap_old_map".
4125 */
4126 if (!map_locked) {
4127 vm_map_lock(map);
4128 map_locked = TRUE;
4129 }
4130
4131 /* first check if the coast is still clear */
4132 start = vm_map_first_entry(zap_old_map)->vme_start;
4133 end = vm_map_last_entry(zap_old_map)->vme_end;
4134 if (vm_map_lookup_entry(map, start, &entry1) ||
4135 vm_map_lookup_entry(map, end, &entry2) ||
4136 entry1 != entry2) {
4137 /*
4138 * Part of that range has already been
4139 * re-mapped: we can't restore the old
4140 * mappings...
4141 */
4142 vm_map_enter_restore_failures++;
4143 } else {
4144 /*
4145 * Transfer the saved map entries from
4146 * "zap_old_map" to the original "map",
4147 * inserting them all after "entry1".
4148 */
4149 for (entry2 = vm_map_first_entry(zap_old_map);
4150 entry2 != vm_map_to_entry(zap_old_map);
4151 entry2 = vm_map_first_entry(zap_old_map)) {
4152 vm_map_size_t entry_size;
4153
4154 entry_size = (entry2->vme_end -
4155 entry2->vme_start);
4156 vm_map_store_entry_unlink(zap_old_map,
4157 entry2);
4158 zap_old_map->size -= entry_size;
4159 vm_map_store_entry_link(map, entry1, entry2,
4160 VM_MAP_KERNEL_FLAGS_NONE);
4161 map->size += entry_size;
4162 entry1 = entry2;
4163 }
4164 if (map->wiring_required) {
4165 /*
4166 * XXX TODO: we should rewire the
4167 * old pages here...
4168 */
4169 }
4170 vm_map_enter_restore_successes++;
4171 }
4172 }
4173 }
4174
4175 /*
4176 * The caller is responsible for releasing the lock if it requested to
4177 * keep the map locked.
4178 */
4179 if (map_locked && !keep_map_locked) {
4180 vm_map_unlock(map);
4181 }
4182
4183 /*
4184 * Get rid of the "zap_maps" and all the map entries that
4185 * they may still contain.
4186 */
4187 if (zap_old_map != VM_MAP_NULL) {
4188 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
4189 zap_old_map = VM_MAP_NULL;
4190 }
4191 if (zap_new_map != VM_MAP_NULL) {
4192 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
4193 zap_new_map = VM_MAP_NULL;
4194 }
4195
4196 return result;
4197
4198 #undef RETURN
4199 }
4200 #endif /* __arm64__ */
4201
4202 /*
4203 * Counters for the prefault optimization.
4204 */
4205 int64_t vm_prefault_nb_pages = 0;
4206 int64_t vm_prefault_nb_bailout = 0;
4207
4208 static kern_return_t
4209 vm_map_enter_mem_object_helper(
4210 vm_map_t target_map,
4211 vm_map_offset_t *address,
4212 vm_map_size_t initial_size,
4213 vm_map_offset_t mask,
4214 int flags,
4215 vm_map_kernel_flags_t vmk_flags,
4216 vm_tag_t tag,
4217 ipc_port_t port,
4218 vm_object_offset_t offset,
4219 boolean_t copy,
4220 vm_prot_t cur_protection,
4221 vm_prot_t max_protection,
4222 vm_inherit_t inheritance,
4223 upl_page_list_ptr_t page_list,
4224 unsigned int page_list_count)
4225 {
4226 vm_map_address_t map_addr;
4227 vm_map_size_t map_size;
4228 vm_object_t object;
4229 vm_object_size_t size;
4230 kern_return_t result;
4231 boolean_t mask_cur_protection, mask_max_protection;
4232 boolean_t kernel_prefault, try_prefault = (page_list_count != 0);
4233 vm_map_offset_t offset_in_mapping = 0;
4234 #if __arm64__
4235 boolean_t fourk = vmk_flags.vmkf_fourk;
4236 #endif /* __arm64__ */
4237
4238 if (VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4239 /* XXX TODO4K prefaulting depends on page size... */
4240 try_prefault = FALSE;
4241 }
4242
4243 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
4244
4245 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
4246 mask_max_protection = max_protection & VM_PROT_IS_MASK;
4247 cur_protection &= ~VM_PROT_IS_MASK;
4248 max_protection &= ~VM_PROT_IS_MASK;
4249
4250 /*
4251 * Check arguments for validity
4252 */
4253 if ((target_map == VM_MAP_NULL) ||
4254 (cur_protection & ~VM_PROT_ALL) ||
4255 (max_protection & ~VM_PROT_ALL) ||
4256 (inheritance > VM_INHERIT_LAST_VALID) ||
4257 (try_prefault && (copy || !page_list)) ||
4258 initial_size == 0) {
4259 return KERN_INVALID_ARGUMENT;
4260 }
4261
4262 #if __arm64__
4263 if (fourk && VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4264 /* no "fourk" if map is using a sub-page page size */
4265 fourk = FALSE;
4266 }
4267 if (fourk) {
4268 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
4269 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
4270 } else
4271 #endif /* __arm64__ */
4272 {
4273 map_addr = vm_map_trunc_page(*address,
4274 VM_MAP_PAGE_MASK(target_map));
4275 map_size = vm_map_round_page(initial_size,
4276 VM_MAP_PAGE_MASK(target_map));
4277 }
4278 size = vm_object_round_page(initial_size);
4279
4280 /*
4281 * Find the vm object (if any) corresponding to this port.
4282 */
4283 if (!IP_VALID(port)) {
4284 object = VM_OBJECT_NULL;
4285 offset = 0;
4286 copy = FALSE;
4287 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
4288 vm_named_entry_t named_entry;
4289 vm_object_offset_t data_offset;
4290
4291 named_entry = (vm_named_entry_t) ip_get_kobject(port);
4292
4293 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4294 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4295 data_offset = named_entry->data_offset;
4296 offset += named_entry->data_offset;
4297 } else {
4298 data_offset = 0;
4299 }
4300
4301 /* a few checks to make sure user is obeying rules */
4302 if (size == 0) {
4303 if (offset >= named_entry->size) {
4304 return KERN_INVALID_RIGHT;
4305 }
4306 size = named_entry->size - offset;
4307 }
4308 if (mask_max_protection) {
4309 max_protection &= named_entry->protection;
4310 }
4311 if (mask_cur_protection) {
4312 cur_protection &= named_entry->protection;
4313 }
4314 if ((named_entry->protection & max_protection) !=
4315 max_protection) {
4316 return KERN_INVALID_RIGHT;
4317 }
4318 if ((named_entry->protection & cur_protection) !=
4319 cur_protection) {
4320 return KERN_INVALID_RIGHT;
4321 }
4322 if (offset + size < offset) {
4323 /* overflow */
4324 return KERN_INVALID_ARGUMENT;
4325 }
4326 if (named_entry->size < (offset + initial_size)) {
4327 return KERN_INVALID_ARGUMENT;
4328 }
4329
4330 if (named_entry->is_copy) {
4331 /* for a vm_map_copy, we can only map it whole */
4332 if ((size != named_entry->size) &&
4333 (vm_map_round_page(size,
4334 VM_MAP_PAGE_MASK(target_map)) ==
4335 named_entry->size)) {
4336 /* XXX FBDP use the rounded size... */
4337 size = vm_map_round_page(
4338 size,
4339 VM_MAP_PAGE_MASK(target_map));
4340 }
4341 }
4342
4343 /* the callers parameter offset is defined to be the */
4344 /* offset from beginning of named entry offset in object */
4345 offset = offset + named_entry->offset;
4346
4347 if (!VM_MAP_PAGE_ALIGNED(size,
4348 VM_MAP_PAGE_MASK(target_map))) {
4349 /*
4350 * Let's not map more than requested;
4351 * vm_map_enter() will handle this "not map-aligned"
4352 * case.
4353 */
4354 map_size = size;
4355 }
4356
4357 named_entry_lock(named_entry);
4358 if (named_entry->is_sub_map) {
4359 vm_map_t submap;
4360
4361 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4362 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4363 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4364 }
4365
4366 submap = named_entry->backing.map;
4367 vm_map_reference(submap);
4368 named_entry_unlock(named_entry);
4369
4370 vmk_flags.vmkf_submap = TRUE;
4371
4372 result = vm_map_enter(target_map,
4373 &map_addr,
4374 map_size,
4375 mask,
4376 flags,
4377 vmk_flags,
4378 tag,
4379 (vm_object_t)(uintptr_t) submap,
4380 offset,
4381 copy,
4382 cur_protection,
4383 max_protection,
4384 inheritance);
4385 if (result != KERN_SUCCESS) {
4386 vm_map_deallocate(submap);
4387 } else {
4388 /*
4389 * No need to lock "submap" just to check its
4390 * "mapped" flag: that flag is never reset
4391 * once it's been set and if we race, we'll
4392 * just end up setting it twice, which is OK.
4393 */
4394 if (submap->mapped_in_other_pmaps == FALSE &&
4395 vm_map_pmap(submap) != PMAP_NULL &&
4396 vm_map_pmap(submap) !=
4397 vm_map_pmap(target_map)) {
4398 /*
4399 * This submap is being mapped in a map
4400 * that uses a different pmap.
4401 * Set its "mapped_in_other_pmaps" flag
4402 * to indicate that we now need to
4403 * remove mappings from all pmaps rather
4404 * than just the submap's pmap.
4405 */
4406 vm_map_lock(submap);
4407 submap->mapped_in_other_pmaps = TRUE;
4408 vm_map_unlock(submap);
4409 }
4410 *address = map_addr;
4411 }
4412 return result;
4413 } else if (named_entry->is_copy) {
4414 kern_return_t kr;
4415 vm_map_copy_t copy_map;
4416 vm_map_entry_t copy_entry;
4417 vm_map_offset_t copy_addr;
4418 vm_map_copy_t target_copy_map;
4419 vm_map_offset_t overmap_start, overmap_end;
4420 vm_map_offset_t trimmed_start;
4421 vm_map_size_t target_size;
4422
4423 if (flags & ~(VM_FLAGS_FIXED |
4424 VM_FLAGS_ANYWHERE |
4425 VM_FLAGS_OVERWRITE |
4426 VM_FLAGS_RETURN_4K_DATA_ADDR |
4427 VM_FLAGS_RETURN_DATA_ADDR |
4428 VM_FLAGS_ALIAS_MASK)) {
4429 named_entry_unlock(named_entry);
4430 return KERN_INVALID_ARGUMENT;
4431 }
4432
4433 copy_map = named_entry->backing.copy;
4434 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4435 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4436 /* unsupported type; should not happen */
4437 printf("vm_map_enter_mem_object: "
4438 "memory_entry->backing.copy "
4439 "unsupported type 0x%x\n",
4440 copy_map->type);
4441 named_entry_unlock(named_entry);
4442 return KERN_INVALID_ARGUMENT;
4443 }
4444
4445 if (VM_MAP_PAGE_SHIFT(target_map) != copy_map->cpy_hdr.page_shift) {
4446 DEBUG4K_SHARE("copy_map %p offset %llx size 0x%llx pgshift %d -> target_map %p pgshift %d\n", copy_map, offset, (uint64_t)map_size, copy_map->cpy_hdr.page_shift, target_map, VM_MAP_PAGE_SHIFT(target_map));
4447 }
4448
4449 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4450 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4451 offset_in_mapping = offset & VM_MAP_PAGE_MASK(target_map);
4452 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4453 offset_in_mapping &= ~((signed)(0xFFF));
4454 }
4455 }
4456
4457 target_copy_map = VM_MAP_COPY_NULL;
4458 target_size = copy_map->size;
4459 overmap_start = 0;
4460 overmap_end = 0;
4461 trimmed_start = 0;
4462 if (copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(target_map)) {
4463 DEBUG4K_ADJUST("adjusting...\n");
4464 kr = vm_map_copy_adjust_to_target(
4465 copy_map,
4466 offset /* includes data_offset */,
4467 initial_size,
4468 target_map,
4469 copy,
4470 &target_copy_map,
4471 &overmap_start,
4472 &overmap_end,
4473 &trimmed_start);
4474 if (kr != KERN_SUCCESS) {
4475 named_entry_unlock(named_entry);
4476 return kr;
4477 }
4478 target_size = target_copy_map->size;
4479 if (trimmed_start >= data_offset) {
4480 data_offset = offset & VM_MAP_PAGE_MASK(target_map);
4481 } else {
4482 data_offset -= trimmed_start;
4483 }
4484 } else {
4485 target_copy_map = copy_map;
4486 }
4487
4488 /* reserve a contiguous range */
4489 kr = vm_map_enter(target_map,
4490 &map_addr,
4491 vm_map_round_page(target_size, VM_MAP_PAGE_MASK(target_map)),
4492 mask,
4493 flags & (VM_FLAGS_ANYWHERE |
4494 VM_FLAGS_OVERWRITE |
4495 VM_FLAGS_RETURN_4K_DATA_ADDR |
4496 VM_FLAGS_RETURN_DATA_ADDR),
4497 vmk_flags,
4498 tag,
4499 VM_OBJECT_NULL,
4500 0,
4501 FALSE, /* copy */
4502 cur_protection,
4503 max_protection,
4504 inheritance);
4505 if (kr != KERN_SUCCESS) {
4506 DEBUG4K_ERROR("kr 0x%x\n", kr);
4507 if (target_copy_map != copy_map) {
4508 vm_map_copy_discard(target_copy_map);
4509 target_copy_map = VM_MAP_COPY_NULL;
4510 }
4511 named_entry_unlock(named_entry);
4512 return kr;
4513 }
4514
4515 copy_addr = map_addr;
4516
4517 for (copy_entry = vm_map_copy_first_entry(target_copy_map);
4518 copy_entry != vm_map_copy_to_entry(target_copy_map);
4519 copy_entry = copy_entry->vme_next) {
4520 int remap_flags;
4521 vm_map_kernel_flags_t vmk_remap_flags;
4522 vm_map_t copy_submap;
4523 vm_object_t copy_object;
4524 vm_map_size_t copy_size;
4525 vm_object_offset_t copy_offset;
4526 int copy_vm_alias;
4527
4528 remap_flags = 0;
4529 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4530
4531 copy_object = VME_OBJECT(copy_entry);
4532 copy_offset = VME_OFFSET(copy_entry);
4533 copy_size = (copy_entry->vme_end -
4534 copy_entry->vme_start);
4535 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4536 if (copy_vm_alias == 0) {
4537 /*
4538 * Caller does not want a specific
4539 * alias for this new mapping: use
4540 * the alias of the original mapping.
4541 */
4542 copy_vm_alias = VME_ALIAS(copy_entry);
4543 }
4544
4545 /* sanity check */
4546 if ((copy_addr + copy_size) >
4547 (map_addr +
4548 overmap_start + overmap_end +
4549 named_entry->size /* XXX full size */)) {
4550 /* over-mapping too much !? */
4551 kr = KERN_INVALID_ARGUMENT;
4552 DEBUG4K_ERROR("kr 0x%x\n", kr);
4553 /* abort */
4554 break;
4555 }
4556
4557 /* take a reference on the object */
4558 if (copy_entry->is_sub_map) {
4559 vmk_remap_flags.vmkf_submap = TRUE;
4560 copy_submap = VME_SUBMAP(copy_entry);
4561 vm_map_lock(copy_submap);
4562 vm_map_reference(copy_submap);
4563 vm_map_unlock(copy_submap);
4564 copy_object = (vm_object_t)(uintptr_t) copy_submap;
4565 } else if (!copy &&
4566 copy_object != VM_OBJECT_NULL &&
4567 (copy_entry->needs_copy ||
4568 copy_object->shadowed ||
4569 (!copy_object->true_share &&
4570 !copy_entry->is_shared &&
4571 copy_object->vo_size > copy_size))) {
4572 /*
4573 * We need to resolve our side of this
4574 * "symmetric" copy-on-write now; we
4575 * need a new object to map and share,
4576 * instead of the current one which
4577 * might still be shared with the
4578 * original mapping.
4579 *
4580 * Note: A "vm_map_copy_t" does not
4581 * have a lock but we're protected by
4582 * the named entry's lock here.
4583 */
4584 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4585 VME_OBJECT_SHADOW(copy_entry, copy_size);
4586 if (!copy_entry->needs_copy &&
4587 copy_entry->protection & VM_PROT_WRITE) {
4588 vm_prot_t prot;
4589
4590 prot = copy_entry->protection & ~VM_PROT_WRITE;
4591 vm_object_pmap_protect(copy_object,
4592 copy_offset,
4593 copy_size,
4594 PMAP_NULL,
4595 PAGE_SIZE,
4596 0,
4597 prot);
4598 }
4599
4600 copy_entry->needs_copy = FALSE;
4601 copy_entry->is_shared = TRUE;
4602 copy_object = VME_OBJECT(copy_entry);
4603 copy_offset = VME_OFFSET(copy_entry);
4604 vm_object_lock(copy_object);
4605 vm_object_reference_locked(copy_object);
4606 if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4607 /* we're about to make a shared mapping of this object */
4608 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4609 copy_object->true_share = TRUE;
4610 }
4611 vm_object_unlock(copy_object);
4612 } else {
4613 /*
4614 * We already have the right object
4615 * to map.
4616 */
4617 copy_object = VME_OBJECT(copy_entry);
4618 vm_object_reference(copy_object);
4619 }
4620
4621 /* over-map the object into destination */
4622 remap_flags |= flags;
4623 remap_flags |= VM_FLAGS_FIXED;
4624 remap_flags |= VM_FLAGS_OVERWRITE;
4625 remap_flags &= ~VM_FLAGS_ANYWHERE;
4626 if (!copy && !copy_entry->is_sub_map) {
4627 /*
4628 * copy-on-write should have been
4629 * resolved at this point, or we would
4630 * end up sharing instead of copying.
4631 */
4632 assert(!copy_entry->needs_copy);
4633 }
4634 #if XNU_TARGET_OS_OSX
4635 if (copy_entry->used_for_jit) {
4636 vmk_remap_flags.vmkf_map_jit = TRUE;
4637 }
4638 #endif /* XNU_TARGET_OS_OSX */
4639
4640 assertf((copy_vm_alias & VME_ALIAS_MASK) == copy_vm_alias,
4641 "VM Tag truncated from 0x%x to 0x%x\n", copy_vm_alias, (copy_vm_alias & VME_ALIAS_MASK));
4642 kr = vm_map_enter(target_map,
4643 &copy_addr,
4644 copy_size,
4645 (vm_map_offset_t) 0,
4646 remap_flags,
4647 vmk_remap_flags,
4648 (vm_tag_t) copy_vm_alias, /* see comment at end of vm_fault_unwire re. cast*/
4649 copy_object,
4650 copy_offset,
4651 ((copy_object == NULL) ? FALSE : copy),
4652 cur_protection,
4653 max_protection,
4654 inheritance);
4655 if (kr != KERN_SUCCESS) {
4656 DEBUG4K_SHARE("failed kr 0x%x\n", kr);
4657 if (copy_entry->is_sub_map) {
4658 vm_map_deallocate(copy_submap);
4659 } else {
4660 vm_object_deallocate(copy_object);
4661 }
4662 /* abort */
4663 break;
4664 }
4665
4666 /* next mapping */
4667 copy_addr += copy_size;
4668 }
4669
4670 if (kr == KERN_SUCCESS) {
4671 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4672 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4673 *address = map_addr + offset_in_mapping;
4674 } else {
4675 *address = map_addr;
4676 }
4677 if (overmap_start) {
4678 *address += overmap_start;
4679 DEBUG4K_SHARE("map %p map_addr 0x%llx offset_in_mapping 0x%llx overmap_start 0x%llx -> *address 0x%llx\n", target_map, (uint64_t)map_addr, (uint64_t) offset_in_mapping, (uint64_t)overmap_start, (uint64_t)*address);
4680 }
4681 }
4682 named_entry_unlock(named_entry);
4683 if (target_copy_map != copy_map) {
4684 vm_map_copy_discard(target_copy_map);
4685 target_copy_map = VM_MAP_COPY_NULL;
4686 }
4687
4688 if (kr != KERN_SUCCESS) {
4689 if (!(flags & VM_FLAGS_OVERWRITE)) {
4690 /* deallocate the contiguous range */
4691 (void) vm_deallocate(target_map,
4692 map_addr,
4693 map_size);
4694 }
4695 }
4696
4697 return kr;
4698 }
4699
4700 if (named_entry->is_object) {
4701 unsigned int access;
4702 vm_prot_t protections;
4703 unsigned int wimg_mode;
4704
4705 /* we are mapping a VM object */
4706
4707 protections = named_entry->protection & VM_PROT_ALL;
4708 access = GET_MAP_MEM(named_entry->protection);
4709
4710 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4711 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4712 offset_in_mapping = offset - VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4713 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4714 offset_in_mapping &= ~((signed)(0xFFF));
4715 }
4716 offset = VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4717 map_size = VM_MAP_ROUND_PAGE((offset + offset_in_mapping + initial_size) - offset, VM_MAP_PAGE_MASK(target_map));
4718 }
4719
4720 object = vm_named_entry_to_vm_object(named_entry);
4721 assert(object != VM_OBJECT_NULL);
4722 vm_object_lock(object);
4723 named_entry_unlock(named_entry);
4724
4725 vm_object_reference_locked(object);
4726
4727 wimg_mode = object->wimg_bits;
4728 vm_prot_to_wimg(access, &wimg_mode);
4729 if (object->wimg_bits != wimg_mode) {
4730 vm_object_change_wimg_mode(object, wimg_mode);
4731 }
4732
4733 vm_object_unlock(object);
4734 } else {
4735 panic("invalid VM named entry %p", named_entry);
4736 }
4737 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4738 /*
4739 * JMM - This is temporary until we unify named entries
4740 * and raw memory objects.
4741 *
4742 * Detected fake ip_kotype for a memory object. In
4743 * this case, the port isn't really a port at all, but
4744 * instead is just a raw memory object.
4745 */
4746 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4747 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4748 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4749 }
4750
4751 object = memory_object_to_vm_object((memory_object_t)port);
4752 if (object == VM_OBJECT_NULL) {
4753 return KERN_INVALID_OBJECT;
4754 }
4755 vm_object_reference(object);
4756
4757 /* wait for object (if any) to be ready */
4758 if (object != VM_OBJECT_NULL) {
4759 if (object == kernel_object) {
4760 printf("Warning: Attempt to map kernel object"
4761 " by a non-private kernel entity\n");
4762 return KERN_INVALID_OBJECT;
4763 }
4764 if (!object->pager_ready) {
4765 vm_object_lock(object);
4766
4767 while (!object->pager_ready) {
4768 vm_object_wait(object,
4769 VM_OBJECT_EVENT_PAGER_READY,
4770 THREAD_UNINT);
4771 vm_object_lock(object);
4772 }
4773 vm_object_unlock(object);
4774 }
4775 }
4776 } else {
4777 return KERN_INVALID_OBJECT;
4778 }
4779
4780 if (object != VM_OBJECT_NULL &&
4781 object->named &&
4782 object->pager != MEMORY_OBJECT_NULL &&
4783 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4784 memory_object_t pager;
4785 vm_prot_t pager_prot;
4786 kern_return_t kr;
4787
4788 /*
4789 * For "named" VM objects, let the pager know that the
4790 * memory object is being mapped. Some pagers need to keep
4791 * track of this, to know when they can reclaim the memory
4792 * object, for example.
4793 * VM calls memory_object_map() for each mapping (specifying
4794 * the protection of each mapping) and calls
4795 * memory_object_last_unmap() when all the mappings are gone.
4796 */
4797 pager_prot = max_protection;
4798 if (copy) {
4799 /*
4800 * Copy-On-Write mapping: won't modify the
4801 * memory object.
4802 */
4803 pager_prot &= ~VM_PROT_WRITE;
4804 }
4805 vm_object_lock(object);
4806 pager = object->pager;
4807 if (object->named &&
4808 pager != MEMORY_OBJECT_NULL &&
4809 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4810 assert(object->pager_ready);
4811 vm_object_mapping_wait(object, THREAD_UNINT);
4812 vm_object_mapping_begin(object);
4813 vm_object_unlock(object);
4814
4815 kr = memory_object_map(pager, pager_prot);
4816 assert(kr == KERN_SUCCESS);
4817
4818 vm_object_lock(object);
4819 vm_object_mapping_end(object);
4820 }
4821 vm_object_unlock(object);
4822 }
4823
4824 /*
4825 * Perform the copy if requested
4826 */
4827
4828 if (copy) {
4829 vm_object_t new_object;
4830 vm_object_offset_t new_offset;
4831
4832 result = vm_object_copy_strategically(object, offset,
4833 map_size,
4834 &new_object, &new_offset,
4835 &copy);
4836
4837
4838 if (result == KERN_MEMORY_RESTART_COPY) {
4839 boolean_t success;
4840 boolean_t src_needs_copy;
4841
4842 /*
4843 * XXX
4844 * We currently ignore src_needs_copy.
4845 * This really is the issue of how to make
4846 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4847 * non-kernel users to use. Solution forthcoming.
4848 * In the meantime, since we don't allow non-kernel
4849 * memory managers to specify symmetric copy,
4850 * we won't run into problems here.
4851 */
4852 new_object = object;
4853 new_offset = offset;
4854 success = vm_object_copy_quickly(&new_object,
4855 new_offset,
4856 map_size,
4857 &src_needs_copy,
4858 &copy);
4859 assert(success);
4860 result = KERN_SUCCESS;
4861 }
4862 /*
4863 * Throw away the reference to the
4864 * original object, as it won't be mapped.
4865 */
4866
4867 vm_object_deallocate(object);
4868
4869 if (result != KERN_SUCCESS) {
4870 return result;
4871 }
4872
4873 object = new_object;
4874 offset = new_offset;
4875 }
4876
4877 /*
4878 * If non-kernel users want to try to prefault pages, the mapping and prefault
4879 * needs to be atomic.
4880 */
4881 kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4882 vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4883
4884 #if __arm64__
4885 if (fourk) {
4886 /* map this object in a "4K" pager */
4887 result = vm_map_enter_fourk(target_map,
4888 &map_addr,
4889 map_size,
4890 (vm_map_offset_t) mask,
4891 flags,
4892 vmk_flags,
4893 tag,
4894 object,
4895 offset,
4896 copy,
4897 cur_protection,
4898 max_protection,
4899 inheritance);
4900 } else
4901 #endif /* __arm64__ */
4902 {
4903 result = vm_map_enter(target_map,
4904 &map_addr, map_size,
4905 (vm_map_offset_t)mask,
4906 flags,
4907 vmk_flags,
4908 tag,
4909 object, offset,
4910 copy,
4911 cur_protection, max_protection,
4912 inheritance);
4913 }
4914 if (result != KERN_SUCCESS) {
4915 vm_object_deallocate(object);
4916 }
4917
4918 /*
4919 * Try to prefault, and do not forget to release the vm map lock.
4920 */
4921 if (result == KERN_SUCCESS && try_prefault) {
4922 mach_vm_address_t va = map_addr;
4923 kern_return_t kr = KERN_SUCCESS;
4924 unsigned int i = 0;
4925 int pmap_options;
4926
4927 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4928 if (object->internal) {
4929 pmap_options |= PMAP_OPTIONS_INTERNAL;
4930 }
4931
4932 for (i = 0; i < page_list_count; ++i) {
4933 if (!UPL_VALID_PAGE(page_list, i)) {
4934 if (kernel_prefault) {
4935 assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4936 result = KERN_MEMORY_ERROR;
4937 break;
4938 }
4939 } else {
4940 /*
4941 * If this function call failed, we should stop
4942 * trying to optimize, other calls are likely
4943 * going to fail too.
4944 *
4945 * We are not gonna report an error for such
4946 * failure though. That's an optimization, not
4947 * something critical.
4948 */
4949 kr = pmap_enter_options(target_map->pmap,
4950 va, UPL_PHYS_PAGE(page_list, i),
4951 cur_protection, VM_PROT_NONE,
4952 0, TRUE, pmap_options, NULL);
4953 if (kr != KERN_SUCCESS) {
4954 OSIncrementAtomic64(&vm_prefault_nb_bailout);
4955 if (kernel_prefault) {
4956 result = kr;
4957 }
4958 break;
4959 }
4960 OSIncrementAtomic64(&vm_prefault_nb_pages);
4961 }
4962
4963 /* Next virtual address */
4964 va += PAGE_SIZE;
4965 }
4966 if (vmk_flags.vmkf_keep_map_locked) {
4967 vm_map_unlock(target_map);
4968 }
4969 }
4970
4971 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4972 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4973 *address = map_addr + offset_in_mapping;
4974 } else {
4975 *address = map_addr;
4976 }
4977 return result;
4978 }
4979
4980 kern_return_t
4981 vm_map_enter_mem_object(
4982 vm_map_t target_map,
4983 vm_map_offset_t *address,
4984 vm_map_size_t initial_size,
4985 vm_map_offset_t mask,
4986 int flags,
4987 vm_map_kernel_flags_t vmk_flags,
4988 vm_tag_t tag,
4989 ipc_port_t port,
4990 vm_object_offset_t offset,
4991 boolean_t copy,
4992 vm_prot_t cur_protection,
4993 vm_prot_t max_protection,
4994 vm_inherit_t inheritance)
4995 {
4996 kern_return_t ret;
4997
4998 ret = vm_map_enter_mem_object_helper(target_map,
4999 address,
5000 initial_size,
5001 mask,
5002 flags,
5003 vmk_flags,
5004 tag,
5005 port,
5006 offset,
5007 copy,
5008 cur_protection,
5009 max_protection,
5010 inheritance,
5011 NULL,
5012 0);
5013
5014 #if KASAN
5015 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
5016 kasan_notify_address(*address, initial_size);
5017 }
5018 #endif
5019
5020 return ret;
5021 }
5022
5023 kern_return_t
5024 vm_map_enter_mem_object_prefault(
5025 vm_map_t target_map,
5026 vm_map_offset_t *address,
5027 vm_map_size_t initial_size,
5028 vm_map_offset_t mask,
5029 int flags,
5030 vm_map_kernel_flags_t vmk_flags,
5031 vm_tag_t tag,
5032 ipc_port_t port,
5033 vm_object_offset_t offset,
5034 vm_prot_t cur_protection,
5035 vm_prot_t max_protection,
5036 upl_page_list_ptr_t page_list,
5037 unsigned int page_list_count)
5038 {
5039 kern_return_t ret;
5040
5041 ret = vm_map_enter_mem_object_helper(target_map,
5042 address,
5043 initial_size,
5044 mask,
5045 flags,
5046 vmk_flags,
5047 tag,
5048 port,
5049 offset,
5050 FALSE,
5051 cur_protection,
5052 max_protection,
5053 VM_INHERIT_DEFAULT,
5054 page_list,
5055 page_list_count);
5056
5057 #if KASAN
5058 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
5059 kasan_notify_address(*address, initial_size);
5060 }
5061 #endif
5062
5063 return ret;
5064 }
5065
5066
5067 kern_return_t
5068 vm_map_enter_mem_object_control(
5069 vm_map_t target_map,
5070 vm_map_offset_t *address,
5071 vm_map_size_t initial_size,
5072 vm_map_offset_t mask,
5073 int flags,
5074 vm_map_kernel_flags_t vmk_flags,
5075 vm_tag_t tag,
5076 memory_object_control_t control,
5077 vm_object_offset_t offset,
5078 boolean_t copy,
5079 vm_prot_t cur_protection,
5080 vm_prot_t max_protection,
5081 vm_inherit_t inheritance)
5082 {
5083 vm_map_address_t map_addr;
5084 vm_map_size_t map_size;
5085 vm_object_t object;
5086 vm_object_size_t size;
5087 kern_return_t result;
5088 memory_object_t pager;
5089 vm_prot_t pager_prot;
5090 kern_return_t kr;
5091 #if __arm64__
5092 boolean_t fourk = vmk_flags.vmkf_fourk;
5093 #endif /* __arm64__ */
5094
5095 /*
5096 * Check arguments for validity
5097 */
5098 if ((target_map == VM_MAP_NULL) ||
5099 (cur_protection & ~VM_PROT_ALL) ||
5100 (max_protection & ~VM_PROT_ALL) ||
5101 (inheritance > VM_INHERIT_LAST_VALID) ||
5102 initial_size == 0) {
5103 return KERN_INVALID_ARGUMENT;
5104 }
5105
5106 #if __arm64__
5107 if (fourk && VM_MAP_PAGE_MASK(target_map) < PAGE_MASK) {
5108 fourk = FALSE;
5109 }
5110
5111 if (fourk) {
5112 map_addr = vm_map_trunc_page(*address,
5113 FOURK_PAGE_MASK);
5114 map_size = vm_map_round_page(initial_size,
5115 FOURK_PAGE_MASK);
5116 } else
5117 #endif /* __arm64__ */
5118 {
5119 map_addr = vm_map_trunc_page(*address,
5120 VM_MAP_PAGE_MASK(target_map));
5121 map_size = vm_map_round_page(initial_size,
5122 VM_MAP_PAGE_MASK(target_map));
5123 }
5124 size = vm_object_round_page(initial_size);
5125
5126 object = memory_object_control_to_vm_object(control);
5127
5128 if (object == VM_OBJECT_NULL) {
5129 return KERN_INVALID_OBJECT;
5130 }
5131
5132 if (object == kernel_object) {
5133 printf("Warning: Attempt to map kernel object"
5134 " by a non-private kernel entity\n");
5135 return KERN_INVALID_OBJECT;
5136 }
5137
5138 vm_object_lock(object);
5139 object->ref_count++;
5140 vm_object_res_reference(object);
5141
5142 /*
5143 * For "named" VM objects, let the pager know that the
5144 * memory object is being mapped. Some pagers need to keep
5145 * track of this, to know when they can reclaim the memory
5146 * object, for example.
5147 * VM calls memory_object_map() for each mapping (specifying
5148 * the protection of each mapping) and calls
5149 * memory_object_last_unmap() when all the mappings are gone.
5150 */
5151 pager_prot = max_protection;
5152 if (copy) {
5153 pager_prot &= ~VM_PROT_WRITE;
5154 }
5155 pager = object->pager;
5156 if (object->named &&
5157 pager != MEMORY_OBJECT_NULL &&
5158 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
5159 assert(object->pager_ready);
5160 vm_object_mapping_wait(object, THREAD_UNINT);
5161 vm_object_mapping_begin(object);
5162 vm_object_unlock(object);
5163
5164 kr = memory_object_map(pager, pager_prot);
5165 assert(kr == KERN_SUCCESS);
5166
5167 vm_object_lock(object);
5168 vm_object_mapping_end(object);
5169 }
5170 vm_object_unlock(object);
5171
5172 /*
5173 * Perform the copy if requested
5174 */
5175
5176 if (copy) {
5177 vm_object_t new_object;
5178 vm_object_offset_t new_offset;
5179
5180 result = vm_object_copy_strategically(object, offset, size,
5181 &new_object, &new_offset,
5182 &copy);
5183
5184
5185 if (result == KERN_MEMORY_RESTART_COPY) {
5186 boolean_t success;
5187 boolean_t src_needs_copy;
5188
5189 /*
5190 * XXX
5191 * We currently ignore src_needs_copy.
5192 * This really is the issue of how to make
5193 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
5194 * non-kernel users to use. Solution forthcoming.
5195 * In the meantime, since we don't allow non-kernel
5196 * memory managers to specify symmetric copy,
5197 * we won't run into problems here.
5198 */
5199 new_object = object;
5200 new_offset = offset;
5201 success = vm_object_copy_quickly(&new_object,
5202 new_offset, size,
5203 &src_needs_copy,
5204 &copy);
5205 assert(success);
5206 result = KERN_SUCCESS;
5207 }
5208 /*
5209 * Throw away the reference to the
5210 * original object, as it won't be mapped.
5211 */
5212
5213 vm_object_deallocate(object);
5214
5215 if (result != KERN_SUCCESS) {
5216 return result;
5217 }
5218
5219 object = new_object;
5220 offset = new_offset;
5221 }
5222
5223 #if __arm64__
5224 if (fourk) {
5225 result = vm_map_enter_fourk(target_map,
5226 &map_addr,
5227 map_size,
5228 (vm_map_offset_t)mask,
5229 flags,
5230 vmk_flags,
5231 tag,
5232 object, offset,
5233 copy,
5234 cur_protection, max_protection,
5235 inheritance);
5236 } else
5237 #endif /* __arm64__ */
5238 {
5239 result = vm_map_enter(target_map,
5240 &map_addr, map_size,
5241 (vm_map_offset_t)mask,
5242 flags,
5243 vmk_flags,
5244 tag,
5245 object, offset,
5246 copy,
5247 cur_protection, max_protection,
5248 inheritance);
5249 }
5250 if (result != KERN_SUCCESS) {
5251 vm_object_deallocate(object);
5252 }
5253 *address = map_addr;
5254
5255 return result;
5256 }
5257
5258
5259 #if VM_CPM
5260
5261 #ifdef MACH_ASSERT
5262 extern pmap_paddr_t avail_start, avail_end;
5263 #endif
5264
5265 /*
5266 * Allocate memory in the specified map, with the caveat that
5267 * the memory is physically contiguous. This call may fail
5268 * if the system can't find sufficient contiguous memory.
5269 * This call may cause or lead to heart-stopping amounts of
5270 * paging activity.
5271 *
5272 * Memory obtained from this call should be freed in the
5273 * normal way, viz., via vm_deallocate.
5274 */
5275 kern_return_t
5276 vm_map_enter_cpm(
5277 vm_map_t map,
5278 vm_map_offset_t *addr,
5279 vm_map_size_t size,
5280 int flags)
5281 {
5282 vm_object_t cpm_obj;
5283 pmap_t pmap;
5284 vm_page_t m, pages;
5285 kern_return_t kr;
5286 vm_map_offset_t va, start, end, offset;
5287 #if MACH_ASSERT
5288 vm_map_offset_t prev_addr = 0;
5289 #endif /* MACH_ASSERT */
5290
5291 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
5292 vm_tag_t tag;
5293
5294 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
5295 /* XXX TODO4K do we need to support this? */
5296 *addr = 0;
5297 return KERN_NOT_SUPPORTED;
5298 }
5299
5300 VM_GET_FLAGS_ALIAS(flags, tag);
5301
5302 if (size == 0) {
5303 *addr = 0;
5304 return KERN_SUCCESS;
5305 }
5306 if (anywhere) {
5307 *addr = vm_map_min(map);
5308 } else {
5309 *addr = vm_map_trunc_page(*addr,
5310 VM_MAP_PAGE_MASK(map));
5311 }
5312 size = vm_map_round_page(size,
5313 VM_MAP_PAGE_MASK(map));
5314
5315 /*
5316 * LP64todo - cpm_allocate should probably allow
5317 * allocations of >4GB, but not with the current
5318 * algorithm, so just cast down the size for now.
5319 */
5320 if (size > VM_MAX_ADDRESS) {
5321 return KERN_RESOURCE_SHORTAGE;
5322 }
5323 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
5324 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
5325 return kr;
5326 }
5327
5328 cpm_obj = vm_object_allocate((vm_object_size_t)size);
5329 assert(cpm_obj != VM_OBJECT_NULL);
5330 assert(cpm_obj->internal);
5331 assert(cpm_obj->vo_size == (vm_object_size_t)size);
5332 assert(cpm_obj->can_persist == FALSE);
5333 assert(cpm_obj->pager_created == FALSE);
5334 assert(cpm_obj->pageout == FALSE);
5335 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5336
5337 /*
5338 * Insert pages into object.
5339 */
5340
5341 vm_object_lock(cpm_obj);
5342 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5343 m = pages;
5344 pages = NEXT_PAGE(m);
5345 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5346
5347 assert(!m->vmp_gobbled);
5348 assert(!m->vmp_wanted);
5349 assert(!m->vmp_pageout);
5350 assert(!m->vmp_tabled);
5351 assert(VM_PAGE_WIRED(m));
5352 assert(m->vmp_busy);
5353 assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
5354
5355 m->vmp_busy = FALSE;
5356 vm_page_insert(m, cpm_obj, offset);
5357 }
5358 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5359 vm_object_unlock(cpm_obj);
5360
5361 /*
5362 * Hang onto a reference on the object in case a
5363 * multi-threaded application for some reason decides
5364 * to deallocate the portion of the address space into
5365 * which we will insert this object.
5366 *
5367 * Unfortunately, we must insert the object now before
5368 * we can talk to the pmap module about which addresses
5369 * must be wired down. Hence, the race with a multi-
5370 * threaded app.
5371 */
5372 vm_object_reference(cpm_obj);
5373
5374 /*
5375 * Insert object into map.
5376 */
5377
5378 kr = vm_map_enter(
5379 map,
5380 addr,
5381 size,
5382 (vm_map_offset_t)0,
5383 flags,
5384 VM_MAP_KERNEL_FLAGS_NONE,
5385 cpm_obj,
5386 (vm_object_offset_t)0,
5387 FALSE,
5388 VM_PROT_ALL,
5389 VM_PROT_ALL,
5390 VM_INHERIT_DEFAULT);
5391
5392 if (kr != KERN_SUCCESS) {
5393 /*
5394 * A CPM object doesn't have can_persist set,
5395 * so all we have to do is deallocate it to
5396 * free up these pages.
5397 */
5398 assert(cpm_obj->pager_created == FALSE);
5399 assert(cpm_obj->can_persist == FALSE);
5400 assert(cpm_obj->pageout == FALSE);
5401 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5402 vm_object_deallocate(cpm_obj); /* kill acquired ref */
5403 vm_object_deallocate(cpm_obj); /* kill creation ref */
5404 }
5405
5406 /*
5407 * Inform the physical mapping system that the
5408 * range of addresses may not fault, so that
5409 * page tables and such can be locked down as well.
5410 */
5411 start = *addr;
5412 end = start + size;
5413 pmap = vm_map_pmap(map);
5414 pmap_pageable(pmap, start, end, FALSE);
5415
5416 /*
5417 * Enter each page into the pmap, to avoid faults.
5418 * Note that this loop could be coded more efficiently,
5419 * if the need arose, rather than looking up each page
5420 * again.
5421 */
5422 for (offset = 0, va = start; offset < size;
5423 va += PAGE_SIZE, offset += PAGE_SIZE) {
5424 int type_of_fault;
5425
5426 vm_object_lock(cpm_obj);
5427 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5428 assert(m != VM_PAGE_NULL);
5429
5430 vm_page_zero_fill(m);
5431
5432 type_of_fault = DBG_ZERO_FILL_FAULT;
5433
5434 vm_fault_enter(m, pmap, va,
5435 PAGE_SIZE, 0,
5436 VM_PROT_ALL, VM_PROT_WRITE,
5437 VM_PAGE_WIRED(m),
5438 FALSE, /* change_wiring */
5439 VM_KERN_MEMORY_NONE, /* tag - not wiring */
5440 FALSE, /* no_cache */
5441 FALSE, /* cs_bypass */
5442 0, /* user_tag */
5443 0, /* pmap_options */
5444 NULL, /* need_retry */
5445 &type_of_fault);
5446
5447 vm_object_unlock(cpm_obj);
5448 }
5449
5450 #if MACH_ASSERT
5451 /*
5452 * Verify ordering in address space.
5453 */
5454 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5455 vm_object_lock(cpm_obj);
5456 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5457 vm_object_unlock(cpm_obj);
5458 if (m == VM_PAGE_NULL) {
5459 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
5460 cpm_obj, (uint64_t)offset);
5461 }
5462 assert(m->vmp_tabled);
5463 assert(!m->vmp_busy);
5464 assert(!m->vmp_wanted);
5465 assert(!m->vmp_fictitious);
5466 assert(!m->vmp_private);
5467 assert(!m->vmp_absent);
5468 assert(!m->vmp_error);
5469 assert(!m->vmp_cleaning);
5470 assert(!m->vmp_laundry);
5471 assert(!m->vmp_precious);
5472 assert(!m->vmp_clustered);
5473 if (offset != 0) {
5474 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5475 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5476 (uint64_t)start, (uint64_t)end, (uint64_t)va);
5477 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5478 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5479 panic("vm_allocate_cpm: pages not contig!");
5480 }
5481 }
5482 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5483 }
5484 #endif /* MACH_ASSERT */
5485
5486 vm_object_deallocate(cpm_obj); /* kill extra ref */
5487
5488 return kr;
5489 }
5490
5491
5492 #else /* VM_CPM */
5493
5494 /*
5495 * Interface is defined in all cases, but unless the kernel
5496 * is built explicitly for this option, the interface does
5497 * nothing.
5498 */
5499
5500 kern_return_t
5501 vm_map_enter_cpm(
5502 __unused vm_map_t map,
5503 __unused vm_map_offset_t *addr,
5504 __unused vm_map_size_t size,
5505 __unused int flags)
5506 {
5507 return KERN_FAILURE;
5508 }
5509 #endif /* VM_CPM */
5510
5511 /* Not used without nested pmaps */
5512 #ifndef NO_NESTED_PMAP
5513 /*
5514 * Clip and unnest a portion of a nested submap mapping.
5515 */
5516
5517
5518 static void
5519 vm_map_clip_unnest(
5520 vm_map_t map,
5521 vm_map_entry_t entry,
5522 vm_map_offset_t start_unnest,
5523 vm_map_offset_t end_unnest)
5524 {
5525 vm_map_offset_t old_start_unnest = start_unnest;
5526 vm_map_offset_t old_end_unnest = end_unnest;
5527
5528 assert(entry->is_sub_map);
5529 assert(VME_SUBMAP(entry) != NULL);
5530 assert(entry->use_pmap);
5531
5532 /*
5533 * Query the platform for the optimal unnest range.
5534 * DRK: There's some duplication of effort here, since
5535 * callers may have adjusted the range to some extent. This
5536 * routine was introduced to support 1GiB subtree nesting
5537 * for x86 platforms, which can also nest on 2MiB boundaries
5538 * depending on size/alignment.
5539 */
5540 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5541 assert(VME_SUBMAP(entry)->is_nested_map);
5542 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5543 log_unnest_badness(map,
5544 old_start_unnest,
5545 old_end_unnest,
5546 VME_SUBMAP(entry)->is_nested_map,
5547 (entry->vme_start +
5548 VME_SUBMAP(entry)->lowest_unnestable_start -
5549 VME_OFFSET(entry)));
5550 }
5551
5552 if (entry->vme_start > start_unnest ||
5553 entry->vme_end < end_unnest) {
5554 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5555 "bad nested entry: start=0x%llx end=0x%llx\n",
5556 (long long)start_unnest, (long long)end_unnest,
5557 (long long)entry->vme_start, (long long)entry->vme_end);
5558 }
5559
5560 if (start_unnest > entry->vme_start) {
5561 _vm_map_clip_start(&map->hdr,
5562 entry,
5563 start_unnest);
5564 if (map->holelistenabled) {
5565 vm_map_store_update_first_free(map, NULL, FALSE);
5566 } else {
5567 vm_map_store_update_first_free(map, map->first_free, FALSE);
5568 }
5569 }
5570 if (entry->vme_end > end_unnest) {
5571 _vm_map_clip_end(&map->hdr,
5572 entry,
5573 end_unnest);
5574 if (map->holelistenabled) {
5575 vm_map_store_update_first_free(map, NULL, FALSE);
5576 } else {
5577 vm_map_store_update_first_free(map, map->first_free, FALSE);
5578 }
5579 }
5580
5581 pmap_unnest(map->pmap,
5582 entry->vme_start,
5583 entry->vme_end - entry->vme_start);
5584 if ((map->mapped_in_other_pmaps) && os_ref_get_count(&map->map_refcnt) != 0) {
5585 /* clean up parent map/maps */
5586 vm_map_submap_pmap_clean(
5587 map, entry->vme_start,
5588 entry->vme_end,
5589 VME_SUBMAP(entry),
5590 VME_OFFSET(entry));
5591 }
5592 entry->use_pmap = FALSE;
5593 if ((map->pmap != kernel_pmap) &&
5594 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5595 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5596 }
5597 }
5598 #endif /* NO_NESTED_PMAP */
5599
5600 /*
5601 * vm_map_clip_start: [ internal use only ]
5602 *
5603 * Asserts that the given entry begins at or after
5604 * the specified address; if necessary,
5605 * it splits the entry into two.
5606 */
5607 void
5608 vm_map_clip_start(
5609 vm_map_t map,
5610 vm_map_entry_t entry,
5611 vm_map_offset_t startaddr)
5612 {
5613 #ifndef NO_NESTED_PMAP
5614 if (entry->is_sub_map &&
5615 entry->use_pmap &&
5616 startaddr >= entry->vme_start) {
5617 vm_map_offset_t start_unnest, end_unnest;
5618
5619 /*
5620 * Make sure "startaddr" is no longer in a nested range
5621 * before we clip. Unnest only the minimum range the platform
5622 * can handle.
5623 * vm_map_clip_unnest may perform additional adjustments to
5624 * the unnest range.
5625 */
5626 start_unnest = startaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
5627 end_unnest = start_unnest + pmap_shared_region_size_min(map->pmap);
5628 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5629 }
5630 #endif /* NO_NESTED_PMAP */
5631 if (startaddr > entry->vme_start) {
5632 if (VME_OBJECT(entry) &&
5633 !entry->is_sub_map &&
5634 VME_OBJECT(entry)->phys_contiguous) {
5635 pmap_remove(map->pmap,
5636 (addr64_t)(entry->vme_start),
5637 (addr64_t)(entry->vme_end));
5638 }
5639 if (entry->vme_atomic) {
5640 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5641 }
5642
5643 DTRACE_VM5(
5644 vm_map_clip_start,
5645 vm_map_t, map,
5646 vm_map_offset_t, entry->vme_start,
5647 vm_map_offset_t, entry->vme_end,
5648 vm_map_offset_t, startaddr,
5649 int, VME_ALIAS(entry));
5650
5651 _vm_map_clip_start(&map->hdr, entry, startaddr);
5652 if (map->holelistenabled) {
5653 vm_map_store_update_first_free(map, NULL, FALSE);
5654 } else {
5655 vm_map_store_update_first_free(map, map->first_free, FALSE);
5656 }
5657 }
5658 }
5659
5660
5661 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5662 MACRO_BEGIN \
5663 if ((startaddr) > (entry)->vme_start) \
5664 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5665 MACRO_END
5666
5667 /*
5668 * This routine is called only when it is known that
5669 * the entry must be split.
5670 */
5671 static void
5672 _vm_map_clip_start(
5673 struct vm_map_header *map_header,
5674 vm_map_entry_t entry,
5675 vm_map_offset_t start)
5676 {
5677 vm_map_entry_t new_entry;
5678
5679 /*
5680 * Split off the front portion --
5681 * note that we must insert the new
5682 * entry BEFORE this one, so that
5683 * this entry has the specified starting
5684 * address.
5685 */
5686
5687 if (entry->map_aligned) {
5688 assert(VM_MAP_PAGE_ALIGNED(start,
5689 VM_MAP_HDR_PAGE_MASK(map_header)));
5690 }
5691
5692 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5693 vm_map_entry_copy_full(new_entry, entry);
5694
5695 new_entry->vme_end = start;
5696 assert(new_entry->vme_start < new_entry->vme_end);
5697 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5698 assert(start < entry->vme_end);
5699 entry->vme_start = start;
5700
5701 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5702
5703 if (entry->is_sub_map) {
5704 vm_map_reference(VME_SUBMAP(new_entry));
5705 } else {
5706 vm_object_reference(VME_OBJECT(new_entry));
5707 }
5708 }
5709
5710
5711 /*
5712 * vm_map_clip_end: [ internal use only ]
5713 *
5714 * Asserts that the given entry ends at or before
5715 * the specified address; if necessary,
5716 * it splits the entry into two.
5717 */
5718 void
5719 vm_map_clip_end(
5720 vm_map_t map,
5721 vm_map_entry_t entry,
5722 vm_map_offset_t endaddr)
5723 {
5724 if (endaddr > entry->vme_end) {
5725 /*
5726 * Within the scope of this clipping, limit "endaddr" to
5727 * the end of this map entry...
5728 */
5729 endaddr = entry->vme_end;
5730 }
5731 #ifndef NO_NESTED_PMAP
5732 if (entry->is_sub_map && entry->use_pmap) {
5733 vm_map_offset_t start_unnest, end_unnest;
5734
5735 /*
5736 * Make sure the range between the start of this entry and
5737 * the new "endaddr" is no longer nested before we clip.
5738 * Unnest only the minimum range the platform can handle.
5739 * vm_map_clip_unnest may perform additional adjustments to
5740 * the unnest range.
5741 */
5742 start_unnest = entry->vme_start;
5743 end_unnest =
5744 (endaddr + pmap_shared_region_size_min(map->pmap) - 1) &
5745 ~(pmap_shared_region_size_min(map->pmap) - 1);
5746 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5747 }
5748 #endif /* NO_NESTED_PMAP */
5749 if (endaddr < entry->vme_end) {
5750 if (VME_OBJECT(entry) &&
5751 !entry->is_sub_map &&
5752 VME_OBJECT(entry)->phys_contiguous) {
5753 pmap_remove(map->pmap,
5754 (addr64_t)(entry->vme_start),
5755 (addr64_t)(entry->vme_end));
5756 }
5757 if (entry->vme_atomic) {
5758 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5759 }
5760 DTRACE_VM5(
5761 vm_map_clip_end,
5762 vm_map_t, map,
5763 vm_map_offset_t, entry->vme_start,
5764 vm_map_offset_t, entry->vme_end,
5765 vm_map_offset_t, endaddr,
5766 int, VME_ALIAS(entry));
5767
5768 _vm_map_clip_end(&map->hdr, entry, endaddr);
5769 if (map->holelistenabled) {
5770 vm_map_store_update_first_free(map, NULL, FALSE);
5771 } else {
5772 vm_map_store_update_first_free(map, map->first_free, FALSE);
5773 }
5774 }
5775 }
5776
5777
5778 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5779 MACRO_BEGIN \
5780 if ((endaddr) < (entry)->vme_end) \
5781 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5782 MACRO_END
5783
5784 /*
5785 * This routine is called only when it is known that
5786 * the entry must be split.
5787 */
5788 static void
5789 _vm_map_clip_end(
5790 struct vm_map_header *map_header,
5791 vm_map_entry_t entry,
5792 vm_map_offset_t end)
5793 {
5794 vm_map_entry_t new_entry;
5795
5796 /*
5797 * Create a new entry and insert it
5798 * AFTER the specified entry
5799 */
5800
5801 if (entry->map_aligned) {
5802 assert(VM_MAP_PAGE_ALIGNED(end,
5803 VM_MAP_HDR_PAGE_MASK(map_header)));
5804 }
5805
5806 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5807 vm_map_entry_copy_full(new_entry, entry);
5808
5809 assert(entry->vme_start < end);
5810 new_entry->vme_start = entry->vme_end = end;
5811 VME_OFFSET_SET(new_entry,
5812 VME_OFFSET(new_entry) + (end - entry->vme_start));
5813 assert(new_entry->vme_start < new_entry->vme_end);
5814
5815 _vm_map_store_entry_link(map_header, entry, new_entry);
5816
5817 if (entry->is_sub_map) {
5818 vm_map_reference(VME_SUBMAP(new_entry));
5819 } else {
5820 vm_object_reference(VME_OBJECT(new_entry));
5821 }
5822 }
5823
5824
5825 /*
5826 * VM_MAP_RANGE_CHECK: [ internal use only ]
5827 *
5828 * Asserts that the starting and ending region
5829 * addresses fall within the valid range of the map.
5830 */
5831 #define VM_MAP_RANGE_CHECK(map, start, end) \
5832 MACRO_BEGIN \
5833 if (start < vm_map_min(map)) \
5834 start = vm_map_min(map); \
5835 if (end > vm_map_max(map)) \
5836 end = vm_map_max(map); \
5837 if (start > end) \
5838 start = end; \
5839 MACRO_END
5840
5841 /*
5842 * vm_map_range_check: [ internal use only ]
5843 *
5844 * Check that the region defined by the specified start and
5845 * end addresses are wholly contained within a single map
5846 * entry or set of adjacent map entries of the spacified map,
5847 * i.e. the specified region contains no unmapped space.
5848 * If any or all of the region is unmapped, FALSE is returned.
5849 * Otherwise, TRUE is returned and if the output argument 'entry'
5850 * is not NULL it points to the map entry containing the start
5851 * of the region.
5852 *
5853 * The map is locked for reading on entry and is left locked.
5854 */
5855 static boolean_t
5856 vm_map_range_check(
5857 vm_map_t map,
5858 vm_map_offset_t start,
5859 vm_map_offset_t end,
5860 vm_map_entry_t *entry)
5861 {
5862 vm_map_entry_t cur;
5863 vm_map_offset_t prev;
5864
5865 /*
5866 * Basic sanity checks first
5867 */
5868 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5869 return FALSE;
5870 }
5871
5872 /*
5873 * Check first if the region starts within a valid
5874 * mapping for the map.
5875 */
5876 if (!vm_map_lookup_entry(map, start, &cur)) {
5877 return FALSE;
5878 }
5879
5880 /*
5881 * Optimize for the case that the region is contained
5882 * in a single map entry.
5883 */
5884 if (entry != (vm_map_entry_t *) NULL) {
5885 *entry = cur;
5886 }
5887 if (end <= cur->vme_end) {
5888 return TRUE;
5889 }
5890
5891 /*
5892 * If the region is not wholly contained within a
5893 * single entry, walk the entries looking for holes.
5894 */
5895 prev = cur->vme_end;
5896 cur = cur->vme_next;
5897 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5898 if (end <= cur->vme_end) {
5899 return TRUE;
5900 }
5901 prev = cur->vme_end;
5902 cur = cur->vme_next;
5903 }
5904 return FALSE;
5905 }
5906
5907 /*
5908 * vm_map_submap: [ kernel use only ]
5909 *
5910 * Mark the given range as handled by a subordinate map.
5911 *
5912 * This range must have been created with vm_map_find using
5913 * the vm_submap_object, and no other operations may have been
5914 * performed on this range prior to calling vm_map_submap.
5915 *
5916 * Only a limited number of operations can be performed
5917 * within this rage after calling vm_map_submap:
5918 * vm_fault
5919 * [Don't try vm_map_copyin!]
5920 *
5921 * To remove a submapping, one must first remove the
5922 * range from the superior map, and then destroy the
5923 * submap (if desired). [Better yet, don't try it.]
5924 */
5925 kern_return_t
5926 vm_map_submap(
5927 vm_map_t map,
5928 vm_map_offset_t start,
5929 vm_map_offset_t end,
5930 vm_map_t submap,
5931 vm_map_offset_t offset,
5932 #ifdef NO_NESTED_PMAP
5933 __unused
5934 #endif /* NO_NESTED_PMAP */
5935 boolean_t use_pmap)
5936 {
5937 vm_map_entry_t entry;
5938 kern_return_t result = KERN_INVALID_ARGUMENT;
5939 vm_object_t object;
5940
5941 vm_map_lock(map);
5942
5943 if (!vm_map_lookup_entry(map, start, &entry)) {
5944 entry = entry->vme_next;
5945 }
5946
5947 if (entry == vm_map_to_entry(map) ||
5948 entry->is_sub_map) {
5949 vm_map_unlock(map);
5950 return KERN_INVALID_ARGUMENT;
5951 }
5952
5953 vm_map_clip_start(map, entry, start);
5954 vm_map_clip_end(map, entry, end);
5955
5956 if ((entry->vme_start == start) && (entry->vme_end == end) &&
5957 (!entry->is_sub_map) &&
5958 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5959 (object->resident_page_count == 0) &&
5960 (object->copy == VM_OBJECT_NULL) &&
5961 (object->shadow == VM_OBJECT_NULL) &&
5962 (!object->pager_created)) {
5963 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5964 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5965 vm_object_deallocate(object);
5966 entry->is_sub_map = TRUE;
5967 entry->use_pmap = FALSE;
5968 VME_SUBMAP_SET(entry, submap);
5969 vm_map_reference(submap);
5970 if (submap->mapped_in_other_pmaps == FALSE &&
5971 vm_map_pmap(submap) != PMAP_NULL &&
5972 vm_map_pmap(submap) != vm_map_pmap(map)) {
5973 /*
5974 * This submap is being mapped in a map
5975 * that uses a different pmap.
5976 * Set its "mapped_in_other_pmaps" flag
5977 * to indicate that we now need to
5978 * remove mappings from all pmaps rather
5979 * than just the submap's pmap.
5980 */
5981 submap->mapped_in_other_pmaps = TRUE;
5982 }
5983
5984 #ifndef NO_NESTED_PMAP
5985 if (use_pmap) {
5986 /* nest if platform code will allow */
5987 if (submap->pmap == NULL) {
5988 ledger_t ledger = map->pmap->ledger;
5989 submap->pmap = pmap_create_options(ledger,
5990 (vm_map_size_t) 0, 0);
5991 if (submap->pmap == PMAP_NULL) {
5992 vm_map_unlock(map);
5993 return KERN_NO_SPACE;
5994 }
5995 #if defined(__arm__) || defined(__arm64__)
5996 pmap_set_nested(submap->pmap);
5997 #endif
5998 }
5999 result = pmap_nest(map->pmap,
6000 (VME_SUBMAP(entry))->pmap,
6001 (addr64_t)start,
6002 (uint64_t)(end - start));
6003 if (result) {
6004 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
6005 }
6006 entry->use_pmap = TRUE;
6007 }
6008 #else /* NO_NESTED_PMAP */
6009 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
6010 #endif /* NO_NESTED_PMAP */
6011 result = KERN_SUCCESS;
6012 }
6013 vm_map_unlock(map);
6014
6015 return result;
6016 }
6017
6018 /*
6019 * vm_map_protect:
6020 *
6021 * Sets the protection of the specified address
6022 * region in the target map. If "set_max" is
6023 * specified, the maximum protection is to be set;
6024 * otherwise, only the current protection is affected.
6025 */
6026 kern_return_t
6027 vm_map_protect(
6028 vm_map_t map,
6029 vm_map_offset_t start,
6030 vm_map_offset_t end,
6031 vm_prot_t new_prot,
6032 boolean_t set_max)
6033 {
6034 vm_map_entry_t current;
6035 vm_map_offset_t prev;
6036 vm_map_entry_t entry;
6037 vm_prot_t new_max;
6038 int pmap_options = 0;
6039 kern_return_t kr;
6040
6041 if (new_prot & VM_PROT_COPY) {
6042 vm_map_offset_t new_start;
6043 vm_prot_t cur_prot, max_prot;
6044 vm_map_kernel_flags_t kflags;
6045
6046 /* LP64todo - see below */
6047 if (start >= map->max_offset) {
6048 return KERN_INVALID_ADDRESS;
6049 }
6050
6051 if ((new_prot & VM_PROT_EXECUTE) &&
6052 map->pmap != kernel_pmap &&
6053 (vm_map_cs_enforcement(map)
6054 #if XNU_TARGET_OS_OSX && __arm64__
6055 || !VM_MAP_IS_EXOTIC(map)
6056 #endif /* XNU_TARGET_OS_OSX && __arm64__ */
6057 ) &&
6058 VM_MAP_POLICY_WX_FAIL(map)) {
6059 DTRACE_VM3(cs_wx,
6060 uint64_t, (uint64_t) start,
6061 uint64_t, (uint64_t) end,
6062 vm_prot_t, new_prot);
6063 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
6064 proc_selfpid(),
6065 (current_task()->bsd_info
6066 ? proc_name_address(current_task()->bsd_info)
6067 : "?"),
6068 __FUNCTION__);
6069 return KERN_PROTECTION_FAILURE;
6070 }
6071
6072 /*
6073 * Let vm_map_remap_extract() know that it will need to:
6074 * + make a copy of the mapping
6075 * + add VM_PROT_WRITE to the max protections
6076 * + remove any protections that are no longer allowed from the
6077 * max protections (to avoid any WRITE/EXECUTE conflict, for
6078 * example).
6079 * Note that "max_prot" is an IN/OUT parameter only for this
6080 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
6081 * only.
6082 */
6083 max_prot = new_prot & VM_PROT_ALL;
6084 kflags = VM_MAP_KERNEL_FLAGS_NONE;
6085 kflags.vmkf_remap_prot_copy = TRUE;
6086 kflags.vmkf_overwrite_immutable = TRUE;
6087 new_start = start;
6088 kr = vm_map_remap(map,
6089 &new_start,
6090 end - start,
6091 0, /* mask */
6092 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
6093 kflags,
6094 0,
6095 map,
6096 start,
6097 TRUE, /* copy-on-write remapping! */
6098 &cur_prot,
6099 &max_prot,
6100 VM_INHERIT_DEFAULT);
6101 if (kr != KERN_SUCCESS) {
6102 return kr;
6103 }
6104 new_prot &= ~VM_PROT_COPY;
6105 }
6106
6107 vm_map_lock(map);
6108
6109 /* LP64todo - remove this check when vm_map_commpage64()
6110 * no longer has to stuff in a map_entry for the commpage
6111 * above the map's max_offset.
6112 */
6113 if (start >= map->max_offset) {
6114 vm_map_unlock(map);
6115 return KERN_INVALID_ADDRESS;
6116 }
6117
6118 while (1) {
6119 /*
6120 * Lookup the entry. If it doesn't start in a valid
6121 * entry, return an error.
6122 */
6123 if (!vm_map_lookup_entry(map, start, &entry)) {
6124 vm_map_unlock(map);
6125 return KERN_INVALID_ADDRESS;
6126 }
6127
6128 if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
6129 start = SUPERPAGE_ROUND_DOWN(start);
6130 continue;
6131 }
6132 break;
6133 }
6134 if (entry->superpage_size) {
6135 end = SUPERPAGE_ROUND_UP(end);
6136 }
6137
6138 /*
6139 * Make a first pass to check for protection and address
6140 * violations.
6141 */
6142
6143 current = entry;
6144 prev = current->vme_start;
6145 while ((current != vm_map_to_entry(map)) &&
6146 (current->vme_start < end)) {
6147 /*
6148 * If there is a hole, return an error.
6149 */
6150 if (current->vme_start != prev) {
6151 vm_map_unlock(map);
6152 return KERN_INVALID_ADDRESS;
6153 }
6154
6155 new_max = current->max_protection;
6156 if ((new_prot & new_max) != new_prot) {
6157 vm_map_unlock(map);
6158 return KERN_PROTECTION_FAILURE;
6159 }
6160
6161 if ((new_prot & VM_PROT_WRITE) &&
6162 (new_prot & VM_PROT_EXECUTE) &&
6163 #if XNU_TARGET_OS_OSX
6164 map->pmap != kernel_pmap &&
6165 (vm_map_cs_enforcement(map)
6166 #if __arm64__
6167 || !VM_MAP_IS_EXOTIC(map)
6168 #endif /* __arm64__ */
6169 ) &&
6170 #endif /* XNU_TARGET_OS_OSX */
6171 !(current->used_for_jit)) {
6172 DTRACE_VM3(cs_wx,
6173 uint64_t, (uint64_t) current->vme_start,
6174 uint64_t, (uint64_t) current->vme_end,
6175 vm_prot_t, new_prot);
6176 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
6177 proc_selfpid(),
6178 (current_task()->bsd_info
6179 ? proc_name_address(current_task()->bsd_info)
6180 : "?"),
6181 __FUNCTION__);
6182 new_prot &= ~VM_PROT_EXECUTE;
6183 if (VM_MAP_POLICY_WX_FAIL(map)) {
6184 vm_map_unlock(map);
6185 return KERN_PROTECTION_FAILURE;
6186 }
6187 }
6188
6189 /*
6190 * If the task has requested executable lockdown,
6191 * deny both:
6192 * - adding executable protections OR
6193 * - adding write protections to an existing executable mapping.
6194 */
6195 if (map->map_disallow_new_exec == TRUE) {
6196 if ((new_prot & VM_PROT_EXECUTE) ||
6197 ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
6198 vm_map_unlock(map);
6199 return KERN_PROTECTION_FAILURE;
6200 }
6201 }
6202
6203 prev = current->vme_end;
6204 current = current->vme_next;
6205 }
6206
6207 #if __arm64__
6208 if (end > prev &&
6209 end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
6210 vm_map_entry_t prev_entry;
6211
6212 prev_entry = current->vme_prev;
6213 if (prev_entry != vm_map_to_entry(map) &&
6214 !prev_entry->map_aligned &&
6215 (vm_map_round_page(prev_entry->vme_end,
6216 VM_MAP_PAGE_MASK(map))
6217 == end)) {
6218 /*
6219 * The last entry in our range is not "map-aligned"
6220 * but it would have reached all the way to "end"
6221 * if it had been map-aligned, so this is not really
6222 * a hole in the range and we can proceed.
6223 */
6224 prev = end;
6225 }
6226 }
6227 #endif /* __arm64__ */
6228
6229 if (end > prev) {
6230 vm_map_unlock(map);
6231 return KERN_INVALID_ADDRESS;
6232 }
6233
6234 /*
6235 * Go back and fix up protections.
6236 * Clip to start here if the range starts within
6237 * the entry.
6238 */
6239
6240 current = entry;
6241 if (current != vm_map_to_entry(map)) {
6242 /* clip and unnest if necessary */
6243 vm_map_clip_start(map, current, start);
6244 }
6245
6246 while ((current != vm_map_to_entry(map)) &&
6247 (current->vme_start < end)) {
6248 vm_prot_t old_prot;
6249
6250 vm_map_clip_end(map, current, end);
6251
6252 if (current->is_sub_map) {
6253 /* clipping did unnest if needed */
6254 assert(!current->use_pmap);
6255 }
6256
6257 old_prot = current->protection;
6258
6259 if (set_max) {
6260 current->max_protection = new_prot;
6261 current->protection = new_prot & old_prot;
6262 } else {
6263 current->protection = new_prot;
6264 }
6265
6266 /*
6267 * Update physical map if necessary.
6268 * If the request is to turn off write protection,
6269 * we won't do it for real (in pmap). This is because
6270 * it would cause copy-on-write to fail. We've already
6271 * set, the new protection in the map, so if a
6272 * write-protect fault occurred, it will be fixed up
6273 * properly, COW or not.
6274 */
6275 if (current->protection != old_prot) {
6276 /* Look one level in we support nested pmaps */
6277 /* from mapped submaps which are direct entries */
6278 /* in our map */
6279
6280 vm_prot_t prot;
6281
6282 prot = current->protection;
6283 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
6284 prot &= ~VM_PROT_WRITE;
6285 } else {
6286 assert(!VME_OBJECT(current)->code_signed);
6287 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
6288 }
6289
6290 if (override_nx(map, VME_ALIAS(current)) && prot) {
6291 prot |= VM_PROT_EXECUTE;
6292 }
6293
6294 #if DEVELOPMENT || DEBUG
6295 if (!(old_prot & VM_PROT_EXECUTE) &&
6296 (prot & VM_PROT_EXECUTE) &&
6297 panic_on_unsigned_execute &&
6298 (proc_selfcsflags() & CS_KILL)) {
6299 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
6300 }
6301 #endif /* DEVELOPMENT || DEBUG */
6302
6303 if (pmap_has_prot_policy(map->pmap, current->translated_allow_execute, prot)) {
6304 if (current->wired_count) {
6305 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
6306 map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
6307 }
6308
6309 /* If the pmap layer cares about this
6310 * protection type, force a fault for
6311 * each page so that vm_fault will
6312 * repopulate the page with the full
6313 * set of protections.
6314 */
6315 /*
6316 * TODO: We don't seem to need this,
6317 * but this is due to an internal
6318 * implementation detail of
6319 * pmap_protect. Do we want to rely
6320 * on this?
6321 */
6322 prot = VM_PROT_NONE;
6323 }
6324
6325 if (current->is_sub_map && current->use_pmap) {
6326 pmap_protect(VME_SUBMAP(current)->pmap,
6327 current->vme_start,
6328 current->vme_end,
6329 prot);
6330 } else {
6331 if (prot & VM_PROT_WRITE) {
6332 if (VME_OBJECT(current) == compressor_object) {
6333 /*
6334 * For write requests on the
6335 * compressor, we wil ask the
6336 * pmap layer to prevent us from
6337 * taking a write fault when we
6338 * attempt to access the mapping
6339 * next.
6340 */
6341 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6342 }
6343 }
6344
6345 pmap_protect_options(map->pmap,
6346 current->vme_start,
6347 current->vme_end,
6348 prot,
6349 pmap_options,
6350 NULL);
6351 }
6352 }
6353 current = current->vme_next;
6354 }
6355
6356 current = entry;
6357 while ((current != vm_map_to_entry(map)) &&
6358 (current->vme_start <= end)) {
6359 vm_map_simplify_entry(map, current);
6360 current = current->vme_next;
6361 }
6362
6363 vm_map_unlock(map);
6364 return KERN_SUCCESS;
6365 }
6366
6367 /*
6368 * vm_map_inherit:
6369 *
6370 * Sets the inheritance of the specified address
6371 * range in the target map. Inheritance
6372 * affects how the map will be shared with
6373 * child maps at the time of vm_map_fork.
6374 */
6375 kern_return_t
6376 vm_map_inherit(
6377 vm_map_t map,
6378 vm_map_offset_t start,
6379 vm_map_offset_t end,
6380 vm_inherit_t new_inheritance)
6381 {
6382 vm_map_entry_t entry;
6383 vm_map_entry_t temp_entry;
6384
6385 vm_map_lock(map);
6386
6387 VM_MAP_RANGE_CHECK(map, start, end);
6388
6389 if (vm_map_lookup_entry(map, start, &temp_entry)) {
6390 entry = temp_entry;
6391 } else {
6392 temp_entry = temp_entry->vme_next;
6393 entry = temp_entry;
6394 }
6395
6396 /* first check entire range for submaps which can't support the */
6397 /* given inheritance. */
6398 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6399 if (entry->is_sub_map) {
6400 if (new_inheritance == VM_INHERIT_COPY) {
6401 vm_map_unlock(map);
6402 return KERN_INVALID_ARGUMENT;
6403 }
6404 }
6405
6406 entry = entry->vme_next;
6407 }
6408
6409 entry = temp_entry;
6410 if (entry != vm_map_to_entry(map)) {
6411 /* clip and unnest if necessary */
6412 vm_map_clip_start(map, entry, start);
6413 }
6414
6415 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6416 vm_map_clip_end(map, entry, end);
6417 if (entry->is_sub_map) {
6418 /* clip did unnest if needed */
6419 assert(!entry->use_pmap);
6420 }
6421
6422 entry->inheritance = new_inheritance;
6423
6424 entry = entry->vme_next;
6425 }
6426
6427 vm_map_unlock(map);
6428 return KERN_SUCCESS;
6429 }
6430
6431 /*
6432 * Update the accounting for the amount of wired memory in this map. If the user has
6433 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6434 */
6435
6436 static kern_return_t
6437 add_wire_counts(
6438 vm_map_t map,
6439 vm_map_entry_t entry,
6440 boolean_t user_wire)
6441 {
6442 vm_map_size_t size;
6443
6444 if (user_wire) {
6445 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
6446
6447 /*
6448 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6449 * this map entry.
6450 */
6451
6452 if (entry->user_wired_count == 0) {
6453 size = entry->vme_end - entry->vme_start;
6454
6455 /*
6456 * Since this is the first time the user is wiring this map entry, check to see if we're
6457 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6458 * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value. There is also
6459 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6460 * limit, then we fail.
6461 */
6462
6463 if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_per_task_user_wire_limit) ||
6464 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6465 if (size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6466 os_atomic_inc(&vm_add_wire_count_over_global_limit, relaxed);
6467 } else {
6468 os_atomic_inc(&vm_add_wire_count_over_user_limit, relaxed);
6469 }
6470 return KERN_RESOURCE_SHORTAGE;
6471 }
6472
6473 /*
6474 * The first time the user wires an entry, we also increment the wired_count and add this to
6475 * the total that has been wired in the map.
6476 */
6477
6478 if (entry->wired_count >= MAX_WIRE_COUNT) {
6479 return KERN_FAILURE;
6480 }
6481
6482 entry->wired_count++;
6483 map->user_wire_size += size;
6484 }
6485
6486 if (entry->user_wired_count >= MAX_WIRE_COUNT) {
6487 return KERN_FAILURE;
6488 }
6489
6490 entry->user_wired_count++;
6491 } else {
6492 /*
6493 * The kernel's wiring the memory. Just bump the count and continue.
6494 */
6495
6496 if (entry->wired_count >= MAX_WIRE_COUNT) {
6497 panic("vm_map_wire: too many wirings");
6498 }
6499
6500 entry->wired_count++;
6501 }
6502
6503 return KERN_SUCCESS;
6504 }
6505
6506 /*
6507 * Update the memory wiring accounting now that the given map entry is being unwired.
6508 */
6509
6510 static void
6511 subtract_wire_counts(
6512 vm_map_t map,
6513 vm_map_entry_t entry,
6514 boolean_t user_wire)
6515 {
6516 if (user_wire) {
6517 /*
6518 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6519 */
6520
6521 if (entry->user_wired_count == 1) {
6522 /*
6523 * We're removing the last user wire reference. Decrement the wired_count and the total
6524 * user wired memory for this map.
6525 */
6526
6527 assert(entry->wired_count >= 1);
6528 entry->wired_count--;
6529 map->user_wire_size -= entry->vme_end - entry->vme_start;
6530 }
6531
6532 assert(entry->user_wired_count >= 1);
6533 entry->user_wired_count--;
6534 } else {
6535 /*
6536 * The kernel is unwiring the memory. Just update the count.
6537 */
6538
6539 assert(entry->wired_count >= 1);
6540 entry->wired_count--;
6541 }
6542 }
6543
6544 int cs_executable_wire = 0;
6545
6546 /*
6547 * vm_map_wire:
6548 *
6549 * Sets the pageability of the specified address range in the
6550 * target map as wired. Regions specified as not pageable require
6551 * locked-down physical memory and physical page maps. The
6552 * access_type variable indicates types of accesses that must not
6553 * generate page faults. This is checked against protection of
6554 * memory being locked-down.
6555 *
6556 * The map must not be locked, but a reference must remain to the
6557 * map throughout the call.
6558 */
6559 static kern_return_t
6560 vm_map_wire_nested(
6561 vm_map_t map,
6562 vm_map_offset_t start,
6563 vm_map_offset_t end,
6564 vm_prot_t caller_prot,
6565 vm_tag_t tag,
6566 boolean_t user_wire,
6567 pmap_t map_pmap,
6568 vm_map_offset_t pmap_addr,
6569 ppnum_t *physpage_p)
6570 {
6571 vm_map_entry_t entry;
6572 vm_prot_t access_type;
6573 struct vm_map_entry *first_entry, tmp_entry;
6574 vm_map_t real_map;
6575 vm_map_offset_t s, e;
6576 kern_return_t rc;
6577 boolean_t need_wakeup;
6578 boolean_t main_map = FALSE;
6579 wait_interrupt_t interruptible_state;
6580 thread_t cur_thread;
6581 unsigned int last_timestamp;
6582 vm_map_size_t size;
6583 boolean_t wire_and_extract;
6584 vm_prot_t extra_prots;
6585
6586 extra_prots = VM_PROT_COPY;
6587 extra_prots |= VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6588 #if XNU_TARGET_OS_OSX
6589 if (map->pmap == kernel_pmap ||
6590 !vm_map_cs_enforcement(map)) {
6591 extra_prots &= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6592 }
6593 #endif /* XNU_TARGET_OS_OSX */
6594
6595 access_type = (caller_prot & VM_PROT_ALL);
6596
6597 wire_and_extract = FALSE;
6598 if (physpage_p != NULL) {
6599 /*
6600 * The caller wants the physical page number of the
6601 * wired page. We return only one physical page number
6602 * so this works for only one page at a time.
6603 */
6604 if ((end - start) != PAGE_SIZE) {
6605 return KERN_INVALID_ARGUMENT;
6606 }
6607 wire_and_extract = TRUE;
6608 *physpage_p = 0;
6609 }
6610
6611 vm_map_lock(map);
6612 if (map_pmap == NULL) {
6613 main_map = TRUE;
6614 }
6615 last_timestamp = map->timestamp;
6616
6617 VM_MAP_RANGE_CHECK(map, start, end);
6618 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6619 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6620
6621 if (start == end) {
6622 /* We wired what the caller asked for, zero pages */
6623 vm_map_unlock(map);
6624 return KERN_SUCCESS;
6625 }
6626
6627 need_wakeup = FALSE;
6628 cur_thread = current_thread();
6629
6630 s = start;
6631 rc = KERN_SUCCESS;
6632
6633 if (vm_map_lookup_entry(map, s, &first_entry)) {
6634 entry = first_entry;
6635 /*
6636 * vm_map_clip_start will be done later.
6637 * We don't want to unnest any nested submaps here !
6638 */
6639 } else {
6640 /* Start address is not in map */
6641 rc = KERN_INVALID_ADDRESS;
6642 goto done;
6643 }
6644
6645 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6646 /*
6647 * At this point, we have wired from "start" to "s".
6648 * We still need to wire from "s" to "end".
6649 *
6650 * "entry" hasn't been clipped, so it could start before "s"
6651 * and/or end after "end".
6652 */
6653
6654 /* "e" is how far we want to wire in this entry */
6655 e = entry->vme_end;
6656 if (e > end) {
6657 e = end;
6658 }
6659
6660 /*
6661 * If another thread is wiring/unwiring this entry then
6662 * block after informing other thread to wake us up.
6663 */
6664 if (entry->in_transition) {
6665 wait_result_t wait_result;
6666
6667 /*
6668 * We have not clipped the entry. Make sure that
6669 * the start address is in range so that the lookup
6670 * below will succeed.
6671 * "s" is the current starting point: we've already
6672 * wired from "start" to "s" and we still have
6673 * to wire from "s" to "end".
6674 */
6675
6676 entry->needs_wakeup = TRUE;
6677
6678 /*
6679 * wake up anybody waiting on entries that we have
6680 * already wired.
6681 */
6682 if (need_wakeup) {
6683 vm_map_entry_wakeup(map);
6684 need_wakeup = FALSE;
6685 }
6686 /*
6687 * User wiring is interruptible
6688 */
6689 wait_result = vm_map_entry_wait(map,
6690 (user_wire) ? THREAD_ABORTSAFE :
6691 THREAD_UNINT);
6692 if (user_wire && wait_result == THREAD_INTERRUPTED) {
6693 /*
6694 * undo the wirings we have done so far
6695 * We do not clear the needs_wakeup flag,
6696 * because we cannot tell if we were the
6697 * only one waiting.
6698 */
6699 rc = KERN_FAILURE;
6700 goto done;
6701 }
6702
6703 /*
6704 * Cannot avoid a lookup here. reset timestamp.
6705 */
6706 last_timestamp = map->timestamp;
6707
6708 /*
6709 * The entry could have been clipped, look it up again.
6710 * Worse that can happen is, it may not exist anymore.
6711 */
6712 if (!vm_map_lookup_entry(map, s, &first_entry)) {
6713 /*
6714 * User: undo everything upto the previous
6715 * entry. let vm_map_unwire worry about
6716 * checking the validity of the range.
6717 */
6718 rc = KERN_FAILURE;
6719 goto done;
6720 }
6721 entry = first_entry;
6722 continue;
6723 }
6724
6725 if (entry->is_sub_map) {
6726 vm_map_offset_t sub_start;
6727 vm_map_offset_t sub_end;
6728 vm_map_offset_t local_start;
6729 vm_map_offset_t local_end;
6730 pmap_t pmap;
6731
6732 if (wire_and_extract) {
6733 /*
6734 * Wiring would result in copy-on-write
6735 * which would not be compatible with
6736 * the sharing we have with the original
6737 * provider of this memory.
6738 */
6739 rc = KERN_INVALID_ARGUMENT;
6740 goto done;
6741 }
6742
6743 vm_map_clip_start(map, entry, s);
6744 vm_map_clip_end(map, entry, end);
6745
6746 sub_start = VME_OFFSET(entry);
6747 sub_end = entry->vme_end;
6748 sub_end += VME_OFFSET(entry) - entry->vme_start;
6749
6750 local_end = entry->vme_end;
6751 if (map_pmap == NULL) {
6752 vm_object_t object;
6753 vm_object_offset_t offset;
6754 vm_prot_t prot;
6755 boolean_t wired;
6756 vm_map_entry_t local_entry;
6757 vm_map_version_t version;
6758 vm_map_t lookup_map;
6759
6760 if (entry->use_pmap) {
6761 pmap = VME_SUBMAP(entry)->pmap;
6762 /* ppc implementation requires that */
6763 /* submaps pmap address ranges line */
6764 /* up with parent map */
6765 #ifdef notdef
6766 pmap_addr = sub_start;
6767 #endif
6768 pmap_addr = s;
6769 } else {
6770 pmap = map->pmap;
6771 pmap_addr = s;
6772 }
6773
6774 if (entry->wired_count) {
6775 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6776 goto done;
6777 }
6778
6779 /*
6780 * The map was not unlocked:
6781 * no need to goto re-lookup.
6782 * Just go directly to next entry.
6783 */
6784 entry = entry->vme_next;
6785 s = entry->vme_start;
6786 continue;
6787 }
6788
6789 /* call vm_map_lookup_locked to */
6790 /* cause any needs copy to be */
6791 /* evaluated */
6792 local_start = entry->vme_start;
6793 lookup_map = map;
6794 vm_map_lock_write_to_read(map);
6795 rc = vm_map_lookup_locked(
6796 &lookup_map, local_start,
6797 (access_type | extra_prots),
6798 OBJECT_LOCK_EXCLUSIVE,
6799 &version, &object,
6800 &offset, &prot, &wired,
6801 NULL,
6802 &real_map, NULL);
6803 if (rc != KERN_SUCCESS) {
6804 vm_map_unlock_read(lookup_map);
6805 assert(map_pmap == NULL);
6806 vm_map_unwire(map, start,
6807 s, user_wire);
6808 return rc;
6809 }
6810 vm_object_unlock(object);
6811 if (real_map != lookup_map) {
6812 vm_map_unlock(real_map);
6813 }
6814 vm_map_unlock_read(lookup_map);
6815 vm_map_lock(map);
6816
6817 /* we unlocked, so must re-lookup */
6818 if (!vm_map_lookup_entry(map,
6819 local_start,
6820 &local_entry)) {
6821 rc = KERN_FAILURE;
6822 goto done;
6823 }
6824
6825 /*
6826 * entry could have been "simplified",
6827 * so re-clip
6828 */
6829 entry = local_entry;
6830 assert(s == local_start);
6831 vm_map_clip_start(map, entry, s);
6832 vm_map_clip_end(map, entry, end);
6833 /* re-compute "e" */
6834 e = entry->vme_end;
6835 if (e > end) {
6836 e = end;
6837 }
6838
6839 /* did we have a change of type? */
6840 if (!entry->is_sub_map) {
6841 last_timestamp = map->timestamp;
6842 continue;
6843 }
6844 } else {
6845 local_start = entry->vme_start;
6846 pmap = map_pmap;
6847 }
6848
6849 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6850 goto done;
6851 }
6852
6853 entry->in_transition = TRUE;
6854
6855 vm_map_unlock(map);
6856 rc = vm_map_wire_nested(VME_SUBMAP(entry),
6857 sub_start, sub_end,
6858 caller_prot, tag,
6859 user_wire, pmap, pmap_addr,
6860 NULL);
6861 vm_map_lock(map);
6862
6863 /*
6864 * Find the entry again. It could have been clipped
6865 * after we unlocked the map.
6866 */
6867 if (!vm_map_lookup_entry(map, local_start,
6868 &first_entry)) {
6869 panic("vm_map_wire: re-lookup failed");
6870 }
6871 entry = first_entry;
6872
6873 assert(local_start == s);
6874 /* re-compute "e" */
6875 e = entry->vme_end;
6876 if (e > end) {
6877 e = end;
6878 }
6879
6880 last_timestamp = map->timestamp;
6881 while ((entry != vm_map_to_entry(map)) &&
6882 (entry->vme_start < e)) {
6883 assert(entry->in_transition);
6884 entry->in_transition = FALSE;
6885 if (entry->needs_wakeup) {
6886 entry->needs_wakeup = FALSE;
6887 need_wakeup = TRUE;
6888 }
6889 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6890 subtract_wire_counts(map, entry, user_wire);
6891 }
6892 entry = entry->vme_next;
6893 }
6894 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6895 goto done;
6896 }
6897
6898 /* no need to relookup again */
6899 s = entry->vme_start;
6900 continue;
6901 }
6902
6903 /*
6904 * If this entry is already wired then increment
6905 * the appropriate wire reference count.
6906 */
6907 if (entry->wired_count) {
6908 if ((entry->protection & access_type) != access_type) {
6909 /* found a protection problem */
6910
6911 /*
6912 * XXX FBDP
6913 * We should always return an error
6914 * in this case but since we didn't
6915 * enforce it before, let's do
6916 * it only for the new "wire_and_extract"
6917 * code path for now...
6918 */
6919 if (wire_and_extract) {
6920 rc = KERN_PROTECTION_FAILURE;
6921 goto done;
6922 }
6923 }
6924
6925 /*
6926 * entry is already wired down, get our reference
6927 * after clipping to our range.
6928 */
6929 vm_map_clip_start(map, entry, s);
6930 vm_map_clip_end(map, entry, end);
6931
6932 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6933 goto done;
6934 }
6935
6936 if (wire_and_extract) {
6937 vm_object_t object;
6938 vm_object_offset_t offset;
6939 vm_page_t m;
6940
6941 /*
6942 * We don't have to "wire" the page again
6943 * bit we still have to "extract" its
6944 * physical page number, after some sanity
6945 * checks.
6946 */
6947 assert((entry->vme_end - entry->vme_start)
6948 == PAGE_SIZE);
6949 assert(!entry->needs_copy);
6950 assert(!entry->is_sub_map);
6951 assert(VME_OBJECT(entry));
6952 if (((entry->vme_end - entry->vme_start)
6953 != PAGE_SIZE) ||
6954 entry->needs_copy ||
6955 entry->is_sub_map ||
6956 VME_OBJECT(entry) == VM_OBJECT_NULL) {
6957 rc = KERN_INVALID_ARGUMENT;
6958 goto done;
6959 }
6960
6961 object = VME_OBJECT(entry);
6962 offset = VME_OFFSET(entry);
6963 /* need exclusive lock to update m->dirty */
6964 if (entry->protection & VM_PROT_WRITE) {
6965 vm_object_lock(object);
6966 } else {
6967 vm_object_lock_shared(object);
6968 }
6969 m = vm_page_lookup(object, offset);
6970 assert(m != VM_PAGE_NULL);
6971 assert(VM_PAGE_WIRED(m));
6972 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6973 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6974 if (entry->protection & VM_PROT_WRITE) {
6975 vm_object_lock_assert_exclusive(
6976 object);
6977 m->vmp_dirty = TRUE;
6978 }
6979 } else {
6980 /* not already wired !? */
6981 *physpage_p = 0;
6982 }
6983 vm_object_unlock(object);
6984 }
6985
6986 /* map was not unlocked: no need to relookup */
6987 entry = entry->vme_next;
6988 s = entry->vme_start;
6989 continue;
6990 }
6991
6992 /*
6993 * Unwired entry or wire request transmitted via submap
6994 */
6995
6996 /*
6997 * Wiring would copy the pages to the shadow object.
6998 * The shadow object would not be code-signed so
6999 * attempting to execute code from these copied pages
7000 * would trigger a code-signing violation.
7001 */
7002
7003 if ((entry->protection & VM_PROT_EXECUTE)
7004 #if XNU_TARGET_OS_OSX
7005 &&
7006 map->pmap != kernel_pmap &&
7007 (vm_map_cs_enforcement(map)
7008 #if __arm64__
7009 || !VM_MAP_IS_EXOTIC(map)
7010 #endif /* __arm64__ */
7011 )
7012 #endif /* XNU_TARGET_OS_OSX */
7013 ) {
7014 #if MACH_ASSERT
7015 printf("pid %d[%s] wiring executable range from "
7016 "0x%llx to 0x%llx: rejected to preserve "
7017 "code-signing\n",
7018 proc_selfpid(),
7019 (current_task()->bsd_info
7020 ? proc_name_address(current_task()->bsd_info)
7021 : "?"),
7022 (uint64_t) entry->vme_start,
7023 (uint64_t) entry->vme_end);
7024 #endif /* MACH_ASSERT */
7025 DTRACE_VM2(cs_executable_wire,
7026 uint64_t, (uint64_t)entry->vme_start,
7027 uint64_t, (uint64_t)entry->vme_end);
7028 cs_executable_wire++;
7029 rc = KERN_PROTECTION_FAILURE;
7030 goto done;
7031 }
7032
7033 /*
7034 * Perform actions of vm_map_lookup that need the write
7035 * lock on the map: create a shadow object for a
7036 * copy-on-write region, or an object for a zero-fill
7037 * region.
7038 */
7039 size = entry->vme_end - entry->vme_start;
7040 /*
7041 * If wiring a copy-on-write page, we need to copy it now
7042 * even if we're only (currently) requesting read access.
7043 * This is aggressive, but once it's wired we can't move it.
7044 */
7045 if (entry->needs_copy) {
7046 if (wire_and_extract) {
7047 /*
7048 * We're supposed to share with the original
7049 * provider so should not be "needs_copy"
7050 */
7051 rc = KERN_INVALID_ARGUMENT;
7052 goto done;
7053 }
7054
7055 VME_OBJECT_SHADOW(entry, size);
7056 entry->needs_copy = FALSE;
7057 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
7058 if (wire_and_extract) {
7059 /*
7060 * We're supposed to share with the original
7061 * provider so should already have an object.
7062 */
7063 rc = KERN_INVALID_ARGUMENT;
7064 goto done;
7065 }
7066 VME_OBJECT_SET(entry, vm_object_allocate(size));
7067 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
7068 assert(entry->use_pmap);
7069 }
7070
7071 vm_map_clip_start(map, entry, s);
7072 vm_map_clip_end(map, entry, end);
7073
7074 /* re-compute "e" */
7075 e = entry->vme_end;
7076 if (e > end) {
7077 e = end;
7078 }
7079
7080 /*
7081 * Check for holes and protection mismatch.
7082 * Holes: Next entry should be contiguous unless this
7083 * is the end of the region.
7084 * Protection: Access requested must be allowed, unless
7085 * wiring is by protection class
7086 */
7087 if ((entry->vme_end < end) &&
7088 ((entry->vme_next == vm_map_to_entry(map)) ||
7089 (entry->vme_next->vme_start > entry->vme_end))) {
7090 /* found a hole */
7091 rc = KERN_INVALID_ADDRESS;
7092 goto done;
7093 }
7094 if ((entry->protection & access_type) != access_type) {
7095 /* found a protection problem */
7096 rc = KERN_PROTECTION_FAILURE;
7097 goto done;
7098 }
7099
7100 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
7101
7102 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
7103 goto done;
7104 }
7105
7106 entry->in_transition = TRUE;
7107
7108 /*
7109 * This entry might get split once we unlock the map.
7110 * In vm_fault_wire(), we need the current range as
7111 * defined by this entry. In order for this to work
7112 * along with a simultaneous clip operation, we make a
7113 * temporary copy of this entry and use that for the
7114 * wiring. Note that the underlying objects do not
7115 * change during a clip.
7116 */
7117 tmp_entry = *entry;
7118
7119 /*
7120 * The in_transition state guarentees that the entry
7121 * (or entries for this range, if split occured) will be
7122 * there when the map lock is acquired for the second time.
7123 */
7124 vm_map_unlock(map);
7125
7126 if (!user_wire && cur_thread != THREAD_NULL) {
7127 interruptible_state = thread_interrupt_level(THREAD_UNINT);
7128 } else {
7129 interruptible_state = THREAD_UNINT;
7130 }
7131
7132 if (map_pmap) {
7133 rc = vm_fault_wire(map,
7134 &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
7135 physpage_p);
7136 } else {
7137 rc = vm_fault_wire(map,
7138 &tmp_entry, caller_prot, tag, map->pmap,
7139 tmp_entry.vme_start,
7140 physpage_p);
7141 }
7142
7143 if (!user_wire && cur_thread != THREAD_NULL) {
7144 thread_interrupt_level(interruptible_state);
7145 }
7146
7147 vm_map_lock(map);
7148
7149 if (last_timestamp + 1 != map->timestamp) {
7150 /*
7151 * Find the entry again. It could have been clipped
7152 * after we unlocked the map.
7153 */
7154 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7155 &first_entry)) {
7156 panic("vm_map_wire: re-lookup failed");
7157 }
7158
7159 entry = first_entry;
7160 }
7161
7162 last_timestamp = map->timestamp;
7163
7164 while ((entry != vm_map_to_entry(map)) &&
7165 (entry->vme_start < tmp_entry.vme_end)) {
7166 assert(entry->in_transition);
7167 entry->in_transition = FALSE;
7168 if (entry->needs_wakeup) {
7169 entry->needs_wakeup = FALSE;
7170 need_wakeup = TRUE;
7171 }
7172 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
7173 subtract_wire_counts(map, entry, user_wire);
7174 }
7175 entry = entry->vme_next;
7176 }
7177
7178 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
7179 goto done;
7180 }
7181
7182 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
7183 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
7184 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
7185 /* found a "new" hole */
7186 s = tmp_entry.vme_end;
7187 rc = KERN_INVALID_ADDRESS;
7188 goto done;
7189 }
7190
7191 s = entry->vme_start;
7192 } /* end while loop through map entries */
7193
7194 done:
7195 if (rc == KERN_SUCCESS) {
7196 /* repair any damage we may have made to the VM map */
7197 vm_map_simplify_range(map, start, end);
7198 }
7199
7200 vm_map_unlock(map);
7201
7202 /*
7203 * wake up anybody waiting on entries we wired.
7204 */
7205 if (need_wakeup) {
7206 vm_map_entry_wakeup(map);
7207 }
7208
7209 if (rc != KERN_SUCCESS) {
7210 /* undo what has been wired so far */
7211 vm_map_unwire_nested(map, start, s, user_wire,
7212 map_pmap, pmap_addr);
7213 if (physpage_p) {
7214 *physpage_p = 0;
7215 }
7216 }
7217
7218 return rc;
7219 }
7220
7221 kern_return_t
7222 vm_map_wire_external(
7223 vm_map_t map,
7224 vm_map_offset_t start,
7225 vm_map_offset_t end,
7226 vm_prot_t caller_prot,
7227 boolean_t user_wire)
7228 {
7229 kern_return_t kret;
7230
7231 kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
7232 user_wire, (pmap_t)NULL, 0, NULL);
7233 return kret;
7234 }
7235
7236 kern_return_t
7237 vm_map_wire_kernel(
7238 vm_map_t map,
7239 vm_map_offset_t start,
7240 vm_map_offset_t end,
7241 vm_prot_t caller_prot,
7242 vm_tag_t tag,
7243 boolean_t user_wire)
7244 {
7245 kern_return_t kret;
7246
7247 kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
7248 user_wire, (pmap_t)NULL, 0, NULL);
7249 return kret;
7250 }
7251
7252 kern_return_t
7253 vm_map_wire_and_extract_external(
7254 vm_map_t map,
7255 vm_map_offset_t start,
7256 vm_prot_t caller_prot,
7257 boolean_t user_wire,
7258 ppnum_t *physpage_p)
7259 {
7260 kern_return_t kret;
7261
7262 kret = vm_map_wire_nested(map,
7263 start,
7264 start + VM_MAP_PAGE_SIZE(map),
7265 caller_prot,
7266 vm_tag_bt(),
7267 user_wire,
7268 (pmap_t)NULL,
7269 0,
7270 physpage_p);
7271 if (kret != KERN_SUCCESS &&
7272 physpage_p != NULL) {
7273 *physpage_p = 0;
7274 }
7275 return kret;
7276 }
7277
7278 kern_return_t
7279 vm_map_wire_and_extract_kernel(
7280 vm_map_t map,
7281 vm_map_offset_t start,
7282 vm_prot_t caller_prot,
7283 vm_tag_t tag,
7284 boolean_t user_wire,
7285 ppnum_t *physpage_p)
7286 {
7287 kern_return_t kret;
7288
7289 kret = vm_map_wire_nested(map,
7290 start,
7291 start + VM_MAP_PAGE_SIZE(map),
7292 caller_prot,
7293 tag,
7294 user_wire,
7295 (pmap_t)NULL,
7296 0,
7297 physpage_p);
7298 if (kret != KERN_SUCCESS &&
7299 physpage_p != NULL) {
7300 *physpage_p = 0;
7301 }
7302 return kret;
7303 }
7304
7305 /*
7306 * vm_map_unwire:
7307 *
7308 * Sets the pageability of the specified address range in the target
7309 * as pageable. Regions specified must have been wired previously.
7310 *
7311 * The map must not be locked, but a reference must remain to the map
7312 * throughout the call.
7313 *
7314 * Kernel will panic on failures. User unwire ignores holes and
7315 * unwired and intransition entries to avoid losing memory by leaving
7316 * it unwired.
7317 */
7318 static kern_return_t
7319 vm_map_unwire_nested(
7320 vm_map_t map,
7321 vm_map_offset_t start,
7322 vm_map_offset_t end,
7323 boolean_t user_wire,
7324 pmap_t map_pmap,
7325 vm_map_offset_t pmap_addr)
7326 {
7327 vm_map_entry_t entry;
7328 struct vm_map_entry *first_entry, tmp_entry;
7329 boolean_t need_wakeup;
7330 boolean_t main_map = FALSE;
7331 unsigned int last_timestamp;
7332
7333 vm_map_lock(map);
7334 if (map_pmap == NULL) {
7335 main_map = TRUE;
7336 }
7337 last_timestamp = map->timestamp;
7338
7339 VM_MAP_RANGE_CHECK(map, start, end);
7340 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7341 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
7342
7343 if (start == end) {
7344 /* We unwired what the caller asked for: zero pages */
7345 vm_map_unlock(map);
7346 return KERN_SUCCESS;
7347 }
7348
7349 if (vm_map_lookup_entry(map, start, &first_entry)) {
7350 entry = first_entry;
7351 /*
7352 * vm_map_clip_start will be done later.
7353 * We don't want to unnest any nested sub maps here !
7354 */
7355 } else {
7356 if (!user_wire) {
7357 panic("vm_map_unwire: start not found");
7358 }
7359 /* Start address is not in map. */
7360 vm_map_unlock(map);
7361 return KERN_INVALID_ADDRESS;
7362 }
7363
7364 if (entry->superpage_size) {
7365 /* superpages are always wired */
7366 vm_map_unlock(map);
7367 return KERN_INVALID_ADDRESS;
7368 }
7369
7370 need_wakeup = FALSE;
7371 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7372 if (entry->in_transition) {
7373 /*
7374 * 1)
7375 * Another thread is wiring down this entry. Note
7376 * that if it is not for the other thread we would
7377 * be unwiring an unwired entry. This is not
7378 * permitted. If we wait, we will be unwiring memory
7379 * we did not wire.
7380 *
7381 * 2)
7382 * Another thread is unwiring this entry. We did not
7383 * have a reference to it, because if we did, this
7384 * entry will not be getting unwired now.
7385 */
7386 if (!user_wire) {
7387 /*
7388 * XXX FBDP
7389 * This could happen: there could be some
7390 * overlapping vslock/vsunlock operations
7391 * going on.
7392 * We should probably just wait and retry,
7393 * but then we have to be careful that this
7394 * entry could get "simplified" after
7395 * "in_transition" gets unset and before
7396 * we re-lookup the entry, so we would
7397 * have to re-clip the entry to avoid
7398 * re-unwiring what we have already unwired...
7399 * See vm_map_wire_nested().
7400 *
7401 * Or we could just ignore "in_transition"
7402 * here and proceed to decement the wired
7403 * count(s) on this entry. That should be fine
7404 * as long as "wired_count" doesn't drop all
7405 * the way to 0 (and we should panic if THAT
7406 * happens).
7407 */
7408 panic("vm_map_unwire: in_transition entry");
7409 }
7410
7411 entry = entry->vme_next;
7412 continue;
7413 }
7414
7415 if (entry->is_sub_map) {
7416 vm_map_offset_t sub_start;
7417 vm_map_offset_t sub_end;
7418 vm_map_offset_t local_end;
7419 pmap_t pmap;
7420
7421 vm_map_clip_start(map, entry, start);
7422 vm_map_clip_end(map, entry, end);
7423
7424 sub_start = VME_OFFSET(entry);
7425 sub_end = entry->vme_end - entry->vme_start;
7426 sub_end += VME_OFFSET(entry);
7427 local_end = entry->vme_end;
7428 if (map_pmap == NULL) {
7429 if (entry->use_pmap) {
7430 pmap = VME_SUBMAP(entry)->pmap;
7431 pmap_addr = sub_start;
7432 } else {
7433 pmap = map->pmap;
7434 pmap_addr = start;
7435 }
7436 if (entry->wired_count == 0 ||
7437 (user_wire && entry->user_wired_count == 0)) {
7438 if (!user_wire) {
7439 panic("vm_map_unwire: entry is unwired");
7440 }
7441 entry = entry->vme_next;
7442 continue;
7443 }
7444
7445 /*
7446 * Check for holes
7447 * Holes: Next entry should be contiguous unless
7448 * this is the end of the region.
7449 */
7450 if (((entry->vme_end < end) &&
7451 ((entry->vme_next == vm_map_to_entry(map)) ||
7452 (entry->vme_next->vme_start
7453 > entry->vme_end)))) {
7454 if (!user_wire) {
7455 panic("vm_map_unwire: non-contiguous region");
7456 }
7457 /*
7458 * entry = entry->vme_next;
7459 * continue;
7460 */
7461 }
7462
7463 subtract_wire_counts(map, entry, user_wire);
7464
7465 if (entry->wired_count != 0) {
7466 entry = entry->vme_next;
7467 continue;
7468 }
7469
7470 entry->in_transition = TRUE;
7471 tmp_entry = *entry;/* see comment in vm_map_wire() */
7472
7473 /*
7474 * We can unlock the map now. The in_transition state
7475 * guarantees existance of the entry.
7476 */
7477 vm_map_unlock(map);
7478 vm_map_unwire_nested(VME_SUBMAP(entry),
7479 sub_start, sub_end, user_wire, pmap, pmap_addr);
7480 vm_map_lock(map);
7481
7482 if (last_timestamp + 1 != map->timestamp) {
7483 /*
7484 * Find the entry again. It could have been
7485 * clipped or deleted after we unlocked the map.
7486 */
7487 if (!vm_map_lookup_entry(map,
7488 tmp_entry.vme_start,
7489 &first_entry)) {
7490 if (!user_wire) {
7491 panic("vm_map_unwire: re-lookup failed");
7492 }
7493 entry = first_entry->vme_next;
7494 } else {
7495 entry = first_entry;
7496 }
7497 }
7498 last_timestamp = map->timestamp;
7499
7500 /*
7501 * clear transition bit for all constituent entries
7502 * that were in the original entry (saved in
7503 * tmp_entry). Also check for waiters.
7504 */
7505 while ((entry != vm_map_to_entry(map)) &&
7506 (entry->vme_start < tmp_entry.vme_end)) {
7507 assert(entry->in_transition);
7508 entry->in_transition = FALSE;
7509 if (entry->needs_wakeup) {
7510 entry->needs_wakeup = FALSE;
7511 need_wakeup = TRUE;
7512 }
7513 entry = entry->vme_next;
7514 }
7515 continue;
7516 } else {
7517 vm_map_unlock(map);
7518 vm_map_unwire_nested(VME_SUBMAP(entry),
7519 sub_start, sub_end, user_wire, map_pmap,
7520 pmap_addr);
7521 vm_map_lock(map);
7522
7523 if (last_timestamp + 1 != map->timestamp) {
7524 /*
7525 * Find the entry again. It could have been
7526 * clipped or deleted after we unlocked the map.
7527 */
7528 if (!vm_map_lookup_entry(map,
7529 tmp_entry.vme_start,
7530 &first_entry)) {
7531 if (!user_wire) {
7532 panic("vm_map_unwire: re-lookup failed");
7533 }
7534 entry = first_entry->vme_next;
7535 } else {
7536 entry = first_entry;
7537 }
7538 }
7539 last_timestamp = map->timestamp;
7540 }
7541 }
7542
7543
7544 if ((entry->wired_count == 0) ||
7545 (user_wire && entry->user_wired_count == 0)) {
7546 if (!user_wire) {
7547 panic("vm_map_unwire: entry is unwired");
7548 }
7549
7550 entry = entry->vme_next;
7551 continue;
7552 }
7553
7554 assert(entry->wired_count > 0 &&
7555 (!user_wire || entry->user_wired_count > 0));
7556
7557 vm_map_clip_start(map, entry, start);
7558 vm_map_clip_end(map, entry, end);
7559
7560 /*
7561 * Check for holes
7562 * Holes: Next entry should be contiguous unless
7563 * this is the end of the region.
7564 */
7565 if (((entry->vme_end < end) &&
7566 ((entry->vme_next == vm_map_to_entry(map)) ||
7567 (entry->vme_next->vme_start > entry->vme_end)))) {
7568 if (!user_wire) {
7569 panic("vm_map_unwire: non-contiguous region");
7570 }
7571 entry = entry->vme_next;
7572 continue;
7573 }
7574
7575 subtract_wire_counts(map, entry, user_wire);
7576
7577 if (entry->wired_count != 0) {
7578 entry = entry->vme_next;
7579 continue;
7580 }
7581
7582 if (entry->zero_wired_pages) {
7583 entry->zero_wired_pages = FALSE;
7584 }
7585
7586 entry->in_transition = TRUE;
7587 tmp_entry = *entry; /* see comment in vm_map_wire() */
7588
7589 /*
7590 * We can unlock the map now. The in_transition state
7591 * guarantees existance of the entry.
7592 */
7593 vm_map_unlock(map);
7594 if (map_pmap) {
7595 vm_fault_unwire(map,
7596 &tmp_entry, FALSE, map_pmap, pmap_addr);
7597 } else {
7598 vm_fault_unwire(map,
7599 &tmp_entry, FALSE, map->pmap,
7600 tmp_entry.vme_start);
7601 }
7602 vm_map_lock(map);
7603
7604 if (last_timestamp + 1 != map->timestamp) {
7605 /*
7606 * Find the entry again. It could have been clipped
7607 * or deleted after we unlocked the map.
7608 */
7609 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7610 &first_entry)) {
7611 if (!user_wire) {
7612 panic("vm_map_unwire: re-lookup failed");
7613 }
7614 entry = first_entry->vme_next;
7615 } else {
7616 entry = first_entry;
7617 }
7618 }
7619 last_timestamp = map->timestamp;
7620
7621 /*
7622 * clear transition bit for all constituent entries that
7623 * were in the original entry (saved in tmp_entry). Also
7624 * check for waiters.
7625 */
7626 while ((entry != vm_map_to_entry(map)) &&
7627 (entry->vme_start < tmp_entry.vme_end)) {
7628 assert(entry->in_transition);
7629 entry->in_transition = FALSE;
7630 if (entry->needs_wakeup) {
7631 entry->needs_wakeup = FALSE;
7632 need_wakeup = TRUE;
7633 }
7634 entry = entry->vme_next;
7635 }
7636 }
7637
7638 /*
7639 * We might have fragmented the address space when we wired this
7640 * range of addresses. Attempt to re-coalesce these VM map entries
7641 * with their neighbors now that they're no longer wired.
7642 * Under some circumstances, address space fragmentation can
7643 * prevent VM object shadow chain collapsing, which can cause
7644 * swap space leaks.
7645 */
7646 vm_map_simplify_range(map, start, end);
7647
7648 vm_map_unlock(map);
7649 /*
7650 * wake up anybody waiting on entries that we have unwired.
7651 */
7652 if (need_wakeup) {
7653 vm_map_entry_wakeup(map);
7654 }
7655 return KERN_SUCCESS;
7656 }
7657
7658 kern_return_t
7659 vm_map_unwire(
7660 vm_map_t map,
7661 vm_map_offset_t start,
7662 vm_map_offset_t end,
7663 boolean_t user_wire)
7664 {
7665 return vm_map_unwire_nested(map, start, end,
7666 user_wire, (pmap_t)NULL, 0);
7667 }
7668
7669
7670 /*
7671 * vm_map_entry_delete: [ internal use only ]
7672 *
7673 * Deallocate the given entry from the target map.
7674 */
7675 static void
7676 vm_map_entry_delete(
7677 vm_map_t map,
7678 vm_map_entry_t entry)
7679 {
7680 vm_map_offset_t s, e;
7681 vm_object_t object;
7682 vm_map_t submap;
7683
7684 s = entry->vme_start;
7685 e = entry->vme_end;
7686 assert(VM_MAP_PAGE_ALIGNED(s, FOURK_PAGE_MASK));
7687 assert(VM_MAP_PAGE_ALIGNED(e, FOURK_PAGE_MASK));
7688 if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
7689 assert(page_aligned(s));
7690 assert(page_aligned(e));
7691 }
7692 if (entry->map_aligned == TRUE) {
7693 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7694 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7695 }
7696 assert(entry->wired_count == 0);
7697 assert(entry->user_wired_count == 0);
7698 assert(!entry->permanent);
7699
7700 if (entry->is_sub_map) {
7701 object = NULL;
7702 submap = VME_SUBMAP(entry);
7703 } else {
7704 submap = NULL;
7705 object = VME_OBJECT(entry);
7706 }
7707
7708 vm_map_store_entry_unlink(map, entry);
7709 map->size -= e - s;
7710
7711 vm_map_entry_dispose(map, entry);
7712
7713 vm_map_unlock(map);
7714 /*
7715 * Deallocate the object only after removing all
7716 * pmap entries pointing to its pages.
7717 */
7718 if (submap) {
7719 vm_map_deallocate(submap);
7720 } else {
7721 vm_object_deallocate(object);
7722 }
7723 }
7724
7725 void
7726 vm_map_submap_pmap_clean(
7727 vm_map_t map,
7728 vm_map_offset_t start,
7729 vm_map_offset_t end,
7730 vm_map_t sub_map,
7731 vm_map_offset_t offset)
7732 {
7733 vm_map_offset_t submap_start;
7734 vm_map_offset_t submap_end;
7735 vm_map_size_t remove_size;
7736 vm_map_entry_t entry;
7737
7738 submap_end = offset + (end - start);
7739 submap_start = offset;
7740
7741 vm_map_lock_read(sub_map);
7742 if (vm_map_lookup_entry(sub_map, offset, &entry)) {
7743 remove_size = (entry->vme_end - entry->vme_start);
7744 if (offset > entry->vme_start) {
7745 remove_size -= offset - entry->vme_start;
7746 }
7747
7748
7749 if (submap_end < entry->vme_end) {
7750 remove_size -=
7751 entry->vme_end - submap_end;
7752 }
7753 if (entry->is_sub_map) {
7754 vm_map_submap_pmap_clean(
7755 sub_map,
7756 start,
7757 start + remove_size,
7758 VME_SUBMAP(entry),
7759 VME_OFFSET(entry));
7760 } else {
7761 if (map->mapped_in_other_pmaps &&
7762 os_ref_get_count(&map->map_refcnt) != 0 &&
7763 VME_OBJECT(entry) != NULL) {
7764 vm_object_pmap_protect_options(
7765 VME_OBJECT(entry),
7766 (VME_OFFSET(entry) +
7767 offset -
7768 entry->vme_start),
7769 remove_size,
7770 PMAP_NULL,
7771 PAGE_SIZE,
7772 entry->vme_start,
7773 VM_PROT_NONE,
7774 PMAP_OPTIONS_REMOVE);
7775 } else {
7776 pmap_remove(map->pmap,
7777 (addr64_t)start,
7778 (addr64_t)(start + remove_size));
7779 }
7780 }
7781 }
7782
7783 entry = entry->vme_next;
7784
7785 while ((entry != vm_map_to_entry(sub_map))
7786 && (entry->vme_start < submap_end)) {
7787 remove_size = (entry->vme_end - entry->vme_start);
7788 if (submap_end < entry->vme_end) {
7789 remove_size -= entry->vme_end - submap_end;
7790 }
7791 if (entry->is_sub_map) {
7792 vm_map_submap_pmap_clean(
7793 sub_map,
7794 (start + entry->vme_start) - offset,
7795 ((start + entry->vme_start) - offset) + remove_size,
7796 VME_SUBMAP(entry),
7797 VME_OFFSET(entry));
7798 } else {
7799 if (map->mapped_in_other_pmaps &&
7800 os_ref_get_count(&map->map_refcnt) != 0 &&
7801 VME_OBJECT(entry) != NULL) {
7802 vm_object_pmap_protect_options(
7803 VME_OBJECT(entry),
7804 VME_OFFSET(entry),
7805 remove_size,
7806 PMAP_NULL,
7807 PAGE_SIZE,
7808 entry->vme_start,
7809 VM_PROT_NONE,
7810 PMAP_OPTIONS_REMOVE);
7811 } else {
7812 pmap_remove(map->pmap,
7813 (addr64_t)((start + entry->vme_start)
7814 - offset),
7815 (addr64_t)(((start + entry->vme_start)
7816 - offset) + remove_size));
7817 }
7818 }
7819 entry = entry->vme_next;
7820 }
7821 vm_map_unlock_read(sub_map);
7822 return;
7823 }
7824
7825 /*
7826 * virt_memory_guard_ast:
7827 *
7828 * Handle the AST callout for a virtual memory guard.
7829 * raise an EXC_GUARD exception and terminate the task
7830 * if configured to do so.
7831 */
7832 void
7833 virt_memory_guard_ast(
7834 thread_t thread,
7835 mach_exception_data_type_t code,
7836 mach_exception_data_type_t subcode)
7837 {
7838 task_t task = thread->task;
7839 assert(task != kernel_task);
7840 assert(task == current_task());
7841 uint32_t behavior;
7842
7843 behavior = task->task_exc_guard;
7844
7845 /* Is delivery enabled */
7846 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7847 return;
7848 }
7849
7850 /* If only once, make sure we're that once */
7851 while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7852 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7853
7854 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7855 break;
7856 }
7857 behavior = task->task_exc_guard;
7858 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7859 return;
7860 }
7861 }
7862
7863 /* Raise exception via corpse fork or synchronously */
7864 if ((task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) &&
7865 (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) == 0) {
7866 task_violated_guard(code, subcode, NULL);
7867 } else {
7868 task_exception_notify(EXC_GUARD, code, subcode);
7869 }
7870
7871 /* Terminate the task if desired */
7872 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7873 task_bsdtask_kill(current_task());
7874 }
7875 }
7876
7877 /*
7878 * vm_map_guard_exception:
7879 *
7880 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7881 *
7882 * Right now, we do this when we find nothing mapped, or a
7883 * gap in the mapping when a user address space deallocate
7884 * was requested. We report the address of the first gap found.
7885 */
7886 static void
7887 vm_map_guard_exception(
7888 vm_map_offset_t gap_start,
7889 unsigned reason)
7890 {
7891 mach_exception_code_t code = 0;
7892 unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7893 unsigned int target = 0; /* should we pass in pid associated with map? */
7894 mach_exception_data_type_t subcode = (uint64_t)gap_start;
7895 boolean_t fatal = FALSE;
7896
7897 task_t task = current_task();
7898
7899 /* Can't deliver exceptions to kernel task */
7900 if (task == kernel_task) {
7901 return;
7902 }
7903
7904 EXC_GUARD_ENCODE_TYPE(code, guard_type);
7905 EXC_GUARD_ENCODE_FLAVOR(code, reason);
7906 EXC_GUARD_ENCODE_TARGET(code, target);
7907
7908 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7909 fatal = TRUE;
7910 }
7911 thread_guard_violation(current_thread(), code, subcode, fatal);
7912 }
7913
7914 /*
7915 * vm_map_delete: [ internal use only ]
7916 *
7917 * Deallocates the given address range from the target map.
7918 * Removes all user wirings. Unwires one kernel wiring if
7919 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7920 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7921 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7922 *
7923 * This routine is called with map locked and leaves map locked.
7924 */
7925 static kern_return_t
7926 vm_map_delete(
7927 vm_map_t map,
7928 vm_map_offset_t start,
7929 vm_map_offset_t end,
7930 int flags,
7931 vm_map_t zap_map)
7932 {
7933 vm_map_entry_t entry, next;
7934 struct vm_map_entry *first_entry, tmp_entry;
7935 vm_map_offset_t s;
7936 vm_object_t object;
7937 boolean_t need_wakeup;
7938 unsigned int last_timestamp = ~0; /* unlikely value */
7939 int interruptible;
7940 vm_map_offset_t gap_start;
7941 __unused vm_map_offset_t save_start = start;
7942 __unused vm_map_offset_t save_end = end;
7943 const vm_map_offset_t FIND_GAP = 1; /* a not page aligned value */
7944 const vm_map_offset_t GAPS_OK = 2; /* a different not page aligned value */
7945
7946 if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK) && !map->terminated) {
7947 gap_start = FIND_GAP;
7948 } else {
7949 gap_start = GAPS_OK;
7950 }
7951
7952 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7953 THREAD_ABORTSAFE : THREAD_UNINT;
7954
7955 /*
7956 * All our DMA I/O operations in IOKit are currently done by
7957 * wiring through the map entries of the task requesting the I/O.
7958 * Because of this, we must always wait for kernel wirings
7959 * to go away on the entries before deleting them.
7960 *
7961 * Any caller who wants to actually remove a kernel wiring
7962 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7963 * properly remove one wiring instead of blasting through
7964 * them all.
7965 */
7966 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7967
7968 while (1) {
7969 /*
7970 * Find the start of the region, and clip it
7971 */
7972 if (vm_map_lookup_entry(map, start, &first_entry)) {
7973 entry = first_entry;
7974 if (map == kalloc_map &&
7975 (entry->vme_start != start ||
7976 entry->vme_end != end)) {
7977 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7978 "mismatched entry %p [0x%llx:0x%llx]\n",
7979 map,
7980 (uint64_t)start,
7981 (uint64_t)end,
7982 entry,
7983 (uint64_t)entry->vme_start,
7984 (uint64_t)entry->vme_end);
7985 }
7986
7987 /*
7988 * If in a superpage, extend the range to include the start of the mapping.
7989 */
7990 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
7991 start = SUPERPAGE_ROUND_DOWN(start);
7992 continue;
7993 }
7994
7995 if (start == entry->vme_start) {
7996 /*
7997 * No need to clip. We don't want to cause
7998 * any unnecessary unnesting in this case...
7999 */
8000 } else {
8001 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
8002 entry->map_aligned &&
8003 !VM_MAP_PAGE_ALIGNED(
8004 start,
8005 VM_MAP_PAGE_MASK(map))) {
8006 /*
8007 * The entry will no longer be
8008 * map-aligned after clipping
8009 * and the caller said it's OK.
8010 */
8011 entry->map_aligned = FALSE;
8012 }
8013 if (map == kalloc_map) {
8014 panic("vm_map_delete(%p,0x%llx,0x%llx):"
8015 " clipping %p at 0x%llx\n",
8016 map,
8017 (uint64_t)start,
8018 (uint64_t)end,
8019 entry,
8020 (uint64_t)start);
8021 }
8022 vm_map_clip_start(map, entry, start);
8023 }
8024
8025 /*
8026 * Fix the lookup hint now, rather than each
8027 * time through the loop.
8028 */
8029 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8030 } else {
8031 if (map->pmap == kernel_pmap &&
8032 os_ref_get_count(&map->map_refcnt) != 0) {
8033 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8034 "no map entry at 0x%llx\n",
8035 map,
8036 (uint64_t)start,
8037 (uint64_t)end,
8038 (uint64_t)start);
8039 }
8040 entry = first_entry->vme_next;
8041 if (gap_start == FIND_GAP) {
8042 gap_start = start;
8043 }
8044 }
8045 break;
8046 }
8047 if (entry->superpage_size) {
8048 end = SUPERPAGE_ROUND_UP(end);
8049 }
8050
8051 need_wakeup = FALSE;
8052 /*
8053 * Step through all entries in this region
8054 */
8055 s = entry->vme_start;
8056 while ((entry != vm_map_to_entry(map)) && (s < end)) {
8057 /*
8058 * At this point, we have deleted all the memory entries
8059 * between "start" and "s". We still need to delete
8060 * all memory entries between "s" and "end".
8061 * While we were blocked and the map was unlocked, some
8062 * new memory entries could have been re-allocated between
8063 * "start" and "s" and we don't want to mess with those.
8064 * Some of those entries could even have been re-assembled
8065 * with an entry after "s" (in vm_map_simplify_entry()), so
8066 * we may have to vm_map_clip_start() again.
8067 */
8068
8069 if (entry->vme_start >= s) {
8070 /*
8071 * This entry starts on or after "s"
8072 * so no need to clip its start.
8073 */
8074 } else {
8075 /*
8076 * This entry has been re-assembled by a
8077 * vm_map_simplify_entry(). We need to
8078 * re-clip its start.
8079 */
8080 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
8081 entry->map_aligned &&
8082 !VM_MAP_PAGE_ALIGNED(s,
8083 VM_MAP_PAGE_MASK(map))) {
8084 /*
8085 * The entry will no longer be map-aligned
8086 * after clipping and the caller said it's OK.
8087 */
8088 entry->map_aligned = FALSE;
8089 }
8090 if (map == kalloc_map) {
8091 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8092 "clipping %p at 0x%llx\n",
8093 map,
8094 (uint64_t)start,
8095 (uint64_t)end,
8096 entry,
8097 (uint64_t)s);
8098 }
8099 vm_map_clip_start(map, entry, s);
8100 }
8101 if (entry->vme_end <= end) {
8102 /*
8103 * This entry is going away completely, so no need
8104 * to clip and possibly cause an unnecessary unnesting.
8105 */
8106 } else {
8107 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
8108 entry->map_aligned &&
8109 !VM_MAP_PAGE_ALIGNED(end,
8110 VM_MAP_PAGE_MASK(map))) {
8111 /*
8112 * The entry will no longer be map-aligned
8113 * after clipping and the caller said it's OK.
8114 */
8115 entry->map_aligned = FALSE;
8116 }
8117 if (map == kalloc_map) {
8118 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8119 "clipping %p at 0x%llx\n",
8120 map,
8121 (uint64_t)start,
8122 (uint64_t)end,
8123 entry,
8124 (uint64_t)end);
8125 }
8126 vm_map_clip_end(map, entry, end);
8127 }
8128
8129 if (entry->permanent) {
8130 if (map->pmap == kernel_pmap) {
8131 panic("%s(%p,0x%llx,0x%llx): "
8132 "attempt to remove permanent "
8133 "VM map entry "
8134 "%p [0x%llx:0x%llx]\n",
8135 __FUNCTION__,
8136 map,
8137 (uint64_t) start,
8138 (uint64_t) end,
8139 entry,
8140 (uint64_t) entry->vme_start,
8141 (uint64_t) entry->vme_end);
8142 } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
8143 // printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
8144 entry->permanent = FALSE;
8145 } else {
8146 if (vm_map_executable_immutable_verbose) {
8147 printf("%d[%s] %s(0x%llx,0x%llx): "
8148 "permanent entry [0x%llx:0x%llx] "
8149 "prot 0x%x/0x%x\n",
8150 proc_selfpid(),
8151 (current_task()->bsd_info
8152 ? proc_name_address(current_task()->bsd_info)
8153 : "?"),
8154 __FUNCTION__,
8155 (uint64_t) start,
8156 (uint64_t) end,
8157 (uint64_t)entry->vme_start,
8158 (uint64_t)entry->vme_end,
8159 entry->protection,
8160 entry->max_protection);
8161 }
8162 /*
8163 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
8164 */
8165 DTRACE_VM5(vm_map_delete_permanent,
8166 vm_map_offset_t, entry->vme_start,
8167 vm_map_offset_t, entry->vme_end,
8168 vm_prot_t, entry->protection,
8169 vm_prot_t, entry->max_protection,
8170 int, VME_ALIAS(entry));
8171 }
8172 }
8173
8174
8175 if (entry->in_transition) {
8176 wait_result_t wait_result;
8177
8178 /*
8179 * Another thread is wiring/unwiring this entry.
8180 * Let the other thread know we are waiting.
8181 */
8182 assert(s == entry->vme_start);
8183 entry->needs_wakeup = TRUE;
8184
8185 /*
8186 * wake up anybody waiting on entries that we have
8187 * already unwired/deleted.
8188 */
8189 if (need_wakeup) {
8190 vm_map_entry_wakeup(map);
8191 need_wakeup = FALSE;
8192 }
8193
8194 wait_result = vm_map_entry_wait(map, interruptible);
8195
8196 if (interruptible &&
8197 wait_result == THREAD_INTERRUPTED) {
8198 /*
8199 * We do not clear the needs_wakeup flag,
8200 * since we cannot tell if we were the only one.
8201 */
8202 return KERN_ABORTED;
8203 }
8204
8205 /*
8206 * The entry could have been clipped or it
8207 * may not exist anymore. Look it up again.
8208 */
8209 if (!vm_map_lookup_entry(map, s, &first_entry)) {
8210 /*
8211 * User: use the next entry
8212 */
8213 if (gap_start == FIND_GAP) {
8214 gap_start = s;
8215 }
8216 entry = first_entry->vme_next;
8217 s = entry->vme_start;
8218 } else {
8219 entry = first_entry;
8220 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8221 }
8222 last_timestamp = map->timestamp;
8223 continue;
8224 } /* end in_transition */
8225
8226 if (entry->wired_count) {
8227 boolean_t user_wire;
8228
8229 user_wire = entry->user_wired_count > 0;
8230
8231 /*
8232 * Remove a kernel wiring if requested
8233 */
8234 if (flags & VM_MAP_REMOVE_KUNWIRE) {
8235 entry->wired_count--;
8236 }
8237
8238 /*
8239 * Remove all user wirings for proper accounting
8240 */
8241 if (entry->user_wired_count > 0) {
8242 while (entry->user_wired_count) {
8243 subtract_wire_counts(map, entry, user_wire);
8244 }
8245 }
8246
8247 if (entry->wired_count != 0) {
8248 assert(map != kernel_map);
8249 /*
8250 * Cannot continue. Typical case is when
8251 * a user thread has physical io pending on
8252 * on this page. Either wait for the
8253 * kernel wiring to go away or return an
8254 * error.
8255 */
8256 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
8257 wait_result_t wait_result;
8258
8259 assert(s == entry->vme_start);
8260 entry->needs_wakeup = TRUE;
8261 wait_result = vm_map_entry_wait(map,
8262 interruptible);
8263
8264 if (interruptible &&
8265 wait_result == THREAD_INTERRUPTED) {
8266 /*
8267 * We do not clear the
8268 * needs_wakeup flag, since we
8269 * cannot tell if we were the
8270 * only one.
8271 */
8272 return KERN_ABORTED;
8273 }
8274
8275 /*
8276 * The entry could have been clipped or
8277 * it may not exist anymore. Look it
8278 * up again.
8279 */
8280 if (!vm_map_lookup_entry(map, s,
8281 &first_entry)) {
8282 assert(map != kernel_map);
8283 /*
8284 * User: use the next entry
8285 */
8286 if (gap_start == FIND_GAP) {
8287 gap_start = s;
8288 }
8289 entry = first_entry->vme_next;
8290 s = entry->vme_start;
8291 } else {
8292 entry = first_entry;
8293 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8294 }
8295 last_timestamp = map->timestamp;
8296 continue;
8297 } else {
8298 return KERN_FAILURE;
8299 }
8300 }
8301
8302 entry->in_transition = TRUE;
8303 /*
8304 * copy current entry. see comment in vm_map_wire()
8305 */
8306 tmp_entry = *entry;
8307 assert(s == entry->vme_start);
8308
8309 /*
8310 * We can unlock the map now. The in_transition
8311 * state guarentees existance of the entry.
8312 */
8313 vm_map_unlock(map);
8314
8315 if (tmp_entry.is_sub_map) {
8316 vm_map_t sub_map;
8317 vm_map_offset_t sub_start, sub_end;
8318 pmap_t pmap;
8319 vm_map_offset_t pmap_addr;
8320
8321
8322 sub_map = VME_SUBMAP(&tmp_entry);
8323 sub_start = VME_OFFSET(&tmp_entry);
8324 sub_end = sub_start + (tmp_entry.vme_end -
8325 tmp_entry.vme_start);
8326 if (tmp_entry.use_pmap) {
8327 pmap = sub_map->pmap;
8328 pmap_addr = tmp_entry.vme_start;
8329 } else {
8330 pmap = map->pmap;
8331 pmap_addr = tmp_entry.vme_start;
8332 }
8333 (void) vm_map_unwire_nested(sub_map,
8334 sub_start, sub_end,
8335 user_wire,
8336 pmap, pmap_addr);
8337 } else {
8338 if (VME_OBJECT(&tmp_entry) == kernel_object) {
8339 pmap_protect_options(
8340 map->pmap,
8341 tmp_entry.vme_start,
8342 tmp_entry.vme_end,
8343 VM_PROT_NONE,
8344 PMAP_OPTIONS_REMOVE,
8345 NULL);
8346 }
8347 vm_fault_unwire(map, &tmp_entry,
8348 VME_OBJECT(&tmp_entry) == kernel_object,
8349 map->pmap, tmp_entry.vme_start);
8350 }
8351
8352 vm_map_lock(map);
8353
8354 if (last_timestamp + 1 != map->timestamp) {
8355 /*
8356 * Find the entry again. It could have
8357 * been clipped after we unlocked the map.
8358 */
8359 if (!vm_map_lookup_entry(map, s, &first_entry)) {
8360 assert((map != kernel_map) &&
8361 (!entry->is_sub_map));
8362 if (gap_start == FIND_GAP) {
8363 gap_start = s;
8364 }
8365 first_entry = first_entry->vme_next;
8366 s = first_entry->vme_start;
8367 } else {
8368 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8369 }
8370 } else {
8371 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8372 first_entry = entry;
8373 }
8374
8375 last_timestamp = map->timestamp;
8376
8377 entry = first_entry;
8378 while ((entry != vm_map_to_entry(map)) &&
8379 (entry->vme_start < tmp_entry.vme_end)) {
8380 assert(entry->in_transition);
8381 entry->in_transition = FALSE;
8382 if (entry->needs_wakeup) {
8383 entry->needs_wakeup = FALSE;
8384 need_wakeup = TRUE;
8385 }
8386 entry = entry->vme_next;
8387 }
8388 /*
8389 * We have unwired the entry(s). Go back and
8390 * delete them.
8391 */
8392 entry = first_entry;
8393 continue;
8394 }
8395
8396 /* entry is unwired */
8397 assert(entry->wired_count == 0);
8398 assert(entry->user_wired_count == 0);
8399
8400 assert(s == entry->vme_start);
8401
8402 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
8403 /*
8404 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8405 * vm_map_delete(), some map entries might have been
8406 * transferred to a "zap_map", which doesn't have a
8407 * pmap. The original pmap has already been flushed
8408 * in the vm_map_delete() call targeting the original
8409 * map, but when we get to destroying the "zap_map",
8410 * we don't have any pmap to flush, so let's just skip
8411 * all this.
8412 */
8413 } else if (entry->is_sub_map) {
8414 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
8415 "map %p (%d) entry %p submap %p (%d)\n",
8416 map, VM_MAP_PAGE_SHIFT(map), entry,
8417 VME_SUBMAP(entry),
8418 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8419 if (entry->use_pmap) {
8420 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) == VM_MAP_PAGE_SHIFT(map),
8421 "map %p (%d) entry %p submap %p (%d)\n",
8422 map, VM_MAP_PAGE_SHIFT(map), entry,
8423 VME_SUBMAP(entry),
8424 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8425 #ifndef NO_NESTED_PMAP
8426 int pmap_flags;
8427
8428 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
8429 /*
8430 * This is the final cleanup of the
8431 * address space being terminated.
8432 * No new mappings are expected and
8433 * we don't really need to unnest the
8434 * shared region (and lose the "global"
8435 * pmap mappings, if applicable).
8436 *
8437 * Tell the pmap layer that we're
8438 * "clean" wrt nesting.
8439 */
8440 pmap_flags = PMAP_UNNEST_CLEAN;
8441 } else {
8442 /*
8443 * We're unmapping part of the nested
8444 * shared region, so we can't keep the
8445 * nested pmap.
8446 */
8447 pmap_flags = 0;
8448 }
8449 pmap_unnest_options(
8450 map->pmap,
8451 (addr64_t)entry->vme_start,
8452 entry->vme_end - entry->vme_start,
8453 pmap_flags);
8454 #endif /* NO_NESTED_PMAP */
8455 if (map->mapped_in_other_pmaps &&
8456 os_ref_get_count(&map->map_refcnt) != 0) {
8457 /* clean up parent map/maps */
8458 vm_map_submap_pmap_clean(
8459 map, entry->vme_start,
8460 entry->vme_end,
8461 VME_SUBMAP(entry),
8462 VME_OFFSET(entry));
8463 }
8464 } else {
8465 vm_map_submap_pmap_clean(
8466 map, entry->vme_start, entry->vme_end,
8467 VME_SUBMAP(entry),
8468 VME_OFFSET(entry));
8469 }
8470 } else if (VME_OBJECT(entry) != kernel_object &&
8471 VME_OBJECT(entry) != compressor_object) {
8472 object = VME_OBJECT(entry);
8473 if (map->mapped_in_other_pmaps &&
8474 os_ref_get_count(&map->map_refcnt) != 0) {
8475 vm_object_pmap_protect_options(
8476 object, VME_OFFSET(entry),
8477 entry->vme_end - entry->vme_start,
8478 PMAP_NULL,
8479 PAGE_SIZE,
8480 entry->vme_start,
8481 VM_PROT_NONE,
8482 PMAP_OPTIONS_REMOVE);
8483 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
8484 (map->pmap == kernel_pmap)) {
8485 /* Remove translations associated
8486 * with this range unless the entry
8487 * does not have an object, or
8488 * it's the kernel map or a descendant
8489 * since the platform could potentially
8490 * create "backdoor" mappings invisible
8491 * to the VM. It is expected that
8492 * objectless, non-kernel ranges
8493 * do not have such VM invisible
8494 * translations.
8495 */
8496 pmap_remove_options(map->pmap,
8497 (addr64_t)entry->vme_start,
8498 (addr64_t)entry->vme_end,
8499 PMAP_OPTIONS_REMOVE);
8500 }
8501 }
8502
8503 if (entry->iokit_acct) {
8504 /* alternate accounting */
8505 DTRACE_VM4(vm_map_iokit_unmapped_region,
8506 vm_map_t, map,
8507 vm_map_offset_t, entry->vme_start,
8508 vm_map_offset_t, entry->vme_end,
8509 int, VME_ALIAS(entry));
8510 vm_map_iokit_unmapped_region(map,
8511 (entry->vme_end -
8512 entry->vme_start));
8513 entry->iokit_acct = FALSE;
8514 entry->use_pmap = FALSE;
8515 }
8516
8517 /*
8518 * All pmap mappings for this map entry must have been
8519 * cleared by now.
8520 */
8521 #if DEBUG
8522 assert(vm_map_pmap_is_empty(map,
8523 entry->vme_start,
8524 entry->vme_end));
8525 #endif /* DEBUG */
8526
8527 next = entry->vme_next;
8528
8529 if (map->pmap == kernel_pmap &&
8530 os_ref_get_count(&map->map_refcnt) != 0 &&
8531 entry->vme_end < end &&
8532 (next == vm_map_to_entry(map) ||
8533 next->vme_start != entry->vme_end)) {
8534 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8535 "hole after %p at 0x%llx\n",
8536 map,
8537 (uint64_t)start,
8538 (uint64_t)end,
8539 entry,
8540 (uint64_t)entry->vme_end);
8541 }
8542
8543 /*
8544 * If the desired range didn't end with "entry", then there is a gap if
8545 * we wrapped around to the start of the map or if "entry" and "next"
8546 * aren't contiguous.
8547 *
8548 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8549 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8550 */
8551 if (gap_start == FIND_GAP &&
8552 vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8553 (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8554 gap_start = entry->vme_end;
8555 }
8556 s = next->vme_start;
8557 last_timestamp = map->timestamp;
8558
8559 if (entry->permanent) {
8560 /*
8561 * A permanent entry can not be removed, so leave it
8562 * in place but remove all access permissions.
8563 */
8564 entry->protection = VM_PROT_NONE;
8565 entry->max_protection = VM_PROT_NONE;
8566 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
8567 zap_map != VM_MAP_NULL) {
8568 vm_map_size_t entry_size;
8569 /*
8570 * The caller wants to save the affected VM map entries
8571 * into the "zap_map". The caller will take care of
8572 * these entries.
8573 */
8574 /* unlink the entry from "map" ... */
8575 vm_map_store_entry_unlink(map, entry);
8576 /* ... and add it to the end of the "zap_map" */
8577 vm_map_store_entry_link(zap_map,
8578 vm_map_last_entry(zap_map),
8579 entry,
8580 VM_MAP_KERNEL_FLAGS_NONE);
8581 entry_size = entry->vme_end - entry->vme_start;
8582 map->size -= entry_size;
8583 zap_map->size += entry_size;
8584 /* we didn't unlock the map, so no timestamp increase */
8585 last_timestamp--;
8586 } else {
8587 vm_map_entry_delete(map, entry);
8588 /* vm_map_entry_delete unlocks the map */
8589 vm_map_lock(map);
8590 }
8591
8592 entry = next;
8593
8594 if (entry == vm_map_to_entry(map)) {
8595 break;
8596 }
8597 if (last_timestamp + 1 != map->timestamp) {
8598 /*
8599 * We are responsible for deleting everything
8600 * from the given space. If someone has interfered,
8601 * we pick up where we left off. Back fills should
8602 * be all right for anyone, except map_delete, and
8603 * we have to assume that the task has been fully
8604 * disabled before we get here
8605 */
8606 if (!vm_map_lookup_entry(map, s, &entry)) {
8607 entry = entry->vme_next;
8608
8609 /*
8610 * Nothing found for s. If we weren't already done, then there is a gap.
8611 */
8612 if (gap_start == FIND_GAP && s < end) {
8613 gap_start = s;
8614 }
8615 s = entry->vme_start;
8616 } else {
8617 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8618 }
8619 /*
8620 * others can not only allocate behind us, we can
8621 * also see coalesce while we don't have the map lock
8622 */
8623 if (entry == vm_map_to_entry(map)) {
8624 break;
8625 }
8626 }
8627 last_timestamp = map->timestamp;
8628 }
8629
8630 if (map->wait_for_space) {
8631 thread_wakeup((event_t) map);
8632 }
8633 /*
8634 * wake up anybody waiting on entries that we have already deleted.
8635 */
8636 if (need_wakeup) {
8637 vm_map_entry_wakeup(map);
8638 }
8639
8640 if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8641 DTRACE_VM3(kern_vm_deallocate_gap,
8642 vm_map_offset_t, gap_start,
8643 vm_map_offset_t, save_start,
8644 vm_map_offset_t, save_end);
8645 if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
8646 vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8647 }
8648 }
8649
8650 return KERN_SUCCESS;
8651 }
8652
8653
8654 /*
8655 * vm_map_terminate:
8656 *
8657 * Clean out a task's map.
8658 */
8659 kern_return_t
8660 vm_map_terminate(
8661 vm_map_t map)
8662 {
8663 vm_map_lock(map);
8664 map->terminated = TRUE;
8665 vm_map_unlock(map);
8666
8667 return vm_map_remove(map,
8668 map->min_offset,
8669 map->max_offset,
8670 /*
8671 * Final cleanup:
8672 * + no unnesting
8673 * + remove immutable mappings
8674 * + allow gaps in range
8675 */
8676 (VM_MAP_REMOVE_NO_UNNESTING |
8677 VM_MAP_REMOVE_IMMUTABLE |
8678 VM_MAP_REMOVE_GAPS_OK));
8679 }
8680
8681 /*
8682 * vm_map_remove:
8683 *
8684 * Remove the given address range from the target map.
8685 * This is the exported form of vm_map_delete.
8686 */
8687 kern_return_t
8688 vm_map_remove(
8689 vm_map_t map,
8690 vm_map_offset_t start,
8691 vm_map_offset_t end,
8692 boolean_t flags)
8693 {
8694 kern_return_t result;
8695
8696 vm_map_lock(map);
8697 VM_MAP_RANGE_CHECK(map, start, end);
8698 /*
8699 * For the zone maps, the kernel controls the allocation/freeing of memory.
8700 * Any free to the zone maps should be within the bounds of the map and
8701 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8702 * free to the zone maps into a no-op, there is a problem and we should
8703 * panic.
8704 */
8705 if ((start == end) && zone_maps_owned(start, 1)) {
8706 panic("Nothing being freed to a zone map. start = end = %p\n", (void *)start);
8707 }
8708 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8709 vm_map_unlock(map);
8710
8711 return result;
8712 }
8713
8714 /*
8715 * vm_map_remove_locked:
8716 *
8717 * Remove the given address range from the target locked map.
8718 * This is the exported form of vm_map_delete.
8719 */
8720 kern_return_t
8721 vm_map_remove_locked(
8722 vm_map_t map,
8723 vm_map_offset_t start,
8724 vm_map_offset_t end,
8725 boolean_t flags)
8726 {
8727 kern_return_t result;
8728
8729 VM_MAP_RANGE_CHECK(map, start, end);
8730 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8731 return result;
8732 }
8733
8734
8735 /*
8736 * Routine: vm_map_copy_allocate
8737 *
8738 * Description:
8739 * Allocates and initializes a map copy object.
8740 */
8741 static vm_map_copy_t
8742 vm_map_copy_allocate(void)
8743 {
8744 vm_map_copy_t new_copy;
8745
8746 new_copy = zalloc(vm_map_copy_zone);
8747 bzero(new_copy, sizeof(*new_copy));
8748 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8749 vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8750 vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8751 return new_copy;
8752 }
8753
8754 /*
8755 * Routine: vm_map_copy_discard
8756 *
8757 * Description:
8758 * Dispose of a map copy object (returned by
8759 * vm_map_copyin).
8760 */
8761 void
8762 vm_map_copy_discard(
8763 vm_map_copy_t copy)
8764 {
8765 if (copy == VM_MAP_COPY_NULL) {
8766 return;
8767 }
8768
8769 switch (copy->type) {
8770 case VM_MAP_COPY_ENTRY_LIST:
8771 while (vm_map_copy_first_entry(copy) !=
8772 vm_map_copy_to_entry(copy)) {
8773 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
8774
8775 vm_map_copy_entry_unlink(copy, entry);
8776 if (entry->is_sub_map) {
8777 vm_map_deallocate(VME_SUBMAP(entry));
8778 } else {
8779 vm_object_deallocate(VME_OBJECT(entry));
8780 }
8781 vm_map_copy_entry_dispose(copy, entry);
8782 }
8783 break;
8784 case VM_MAP_COPY_OBJECT:
8785 vm_object_deallocate(copy->cpy_object);
8786 break;
8787 case VM_MAP_COPY_KERNEL_BUFFER:
8788
8789 /*
8790 * The vm_map_copy_t and possibly the data buffer were
8791 * allocated by a single call to kheap_alloc(), i.e. the
8792 * vm_map_copy_t was not allocated out of the zone.
8793 */
8794 if (copy->size > msg_ool_size_small || copy->offset) {
8795 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8796 (long long)copy->size, (long long)copy->offset);
8797 }
8798 kheap_free(KHEAP_DATA_BUFFERS, copy->cpy_kdata, copy->size);
8799 }
8800 zfree(vm_map_copy_zone, copy);
8801 }
8802
8803 /*
8804 * Routine: vm_map_copy_copy
8805 *
8806 * Description:
8807 * Move the information in a map copy object to
8808 * a new map copy object, leaving the old one
8809 * empty.
8810 *
8811 * This is used by kernel routines that need
8812 * to look at out-of-line data (in copyin form)
8813 * before deciding whether to return SUCCESS.
8814 * If the routine returns FAILURE, the original
8815 * copy object will be deallocated; therefore,
8816 * these routines must make a copy of the copy
8817 * object and leave the original empty so that
8818 * deallocation will not fail.
8819 */
8820 vm_map_copy_t
8821 vm_map_copy_copy(
8822 vm_map_copy_t copy)
8823 {
8824 vm_map_copy_t new_copy;
8825
8826 if (copy == VM_MAP_COPY_NULL) {
8827 return VM_MAP_COPY_NULL;
8828 }
8829
8830 /*
8831 * Allocate a new copy object, and copy the information
8832 * from the old one into it.
8833 */
8834
8835 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8836 memcpy((void *) new_copy, (void *) copy, sizeof(struct vm_map_copy));
8837 #if __has_feature(ptrauth_calls)
8838 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8839 new_copy->cpy_kdata = copy->cpy_kdata;
8840 }
8841 #endif
8842
8843 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8844 /*
8845 * The links in the entry chain must be
8846 * changed to point to the new copy object.
8847 */
8848 vm_map_copy_first_entry(copy)->vme_prev
8849 = vm_map_copy_to_entry(new_copy);
8850 vm_map_copy_last_entry(copy)->vme_next
8851 = vm_map_copy_to_entry(new_copy);
8852 }
8853
8854 /*
8855 * Change the old copy object into one that contains
8856 * nothing to be deallocated.
8857 */
8858 copy->type = VM_MAP_COPY_OBJECT;
8859 copy->cpy_object = VM_OBJECT_NULL;
8860
8861 /*
8862 * Return the new object.
8863 */
8864 return new_copy;
8865 }
8866
8867 static kern_return_t
8868 vm_map_overwrite_submap_recurse(
8869 vm_map_t dst_map,
8870 vm_map_offset_t dst_addr,
8871 vm_map_size_t dst_size)
8872 {
8873 vm_map_offset_t dst_end;
8874 vm_map_entry_t tmp_entry;
8875 vm_map_entry_t entry;
8876 kern_return_t result;
8877 boolean_t encountered_sub_map = FALSE;
8878
8879
8880
8881 /*
8882 * Verify that the destination is all writeable
8883 * initially. We have to trunc the destination
8884 * address and round the copy size or we'll end up
8885 * splitting entries in strange ways.
8886 */
8887
8888 dst_end = vm_map_round_page(dst_addr + dst_size,
8889 VM_MAP_PAGE_MASK(dst_map));
8890 vm_map_lock(dst_map);
8891
8892 start_pass_1:
8893 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8894 vm_map_unlock(dst_map);
8895 return KERN_INVALID_ADDRESS;
8896 }
8897
8898 vm_map_clip_start(dst_map,
8899 tmp_entry,
8900 vm_map_trunc_page(dst_addr,
8901 VM_MAP_PAGE_MASK(dst_map)));
8902 if (tmp_entry->is_sub_map) {
8903 /* clipping did unnest if needed */
8904 assert(!tmp_entry->use_pmap);
8905 }
8906
8907 for (entry = tmp_entry;;) {
8908 vm_map_entry_t next;
8909
8910 next = entry->vme_next;
8911 while (entry->is_sub_map) {
8912 vm_map_offset_t sub_start;
8913 vm_map_offset_t sub_end;
8914 vm_map_offset_t local_end;
8915
8916 if (entry->in_transition) {
8917 /*
8918 * Say that we are waiting, and wait for entry.
8919 */
8920 entry->needs_wakeup = TRUE;
8921 vm_map_entry_wait(dst_map, THREAD_UNINT);
8922
8923 goto start_pass_1;
8924 }
8925
8926 encountered_sub_map = TRUE;
8927 sub_start = VME_OFFSET(entry);
8928
8929 if (entry->vme_end < dst_end) {
8930 sub_end = entry->vme_end;
8931 } else {
8932 sub_end = dst_end;
8933 }
8934 sub_end -= entry->vme_start;
8935 sub_end += VME_OFFSET(entry);
8936 local_end = entry->vme_end;
8937 vm_map_unlock(dst_map);
8938
8939 result = vm_map_overwrite_submap_recurse(
8940 VME_SUBMAP(entry),
8941 sub_start,
8942 sub_end - sub_start);
8943
8944 if (result != KERN_SUCCESS) {
8945 return result;
8946 }
8947 if (dst_end <= entry->vme_end) {
8948 return KERN_SUCCESS;
8949 }
8950 vm_map_lock(dst_map);
8951 if (!vm_map_lookup_entry(dst_map, local_end,
8952 &tmp_entry)) {
8953 vm_map_unlock(dst_map);
8954 return KERN_INVALID_ADDRESS;
8955 }
8956 entry = tmp_entry;
8957 next = entry->vme_next;
8958 }
8959
8960 if (!(entry->protection & VM_PROT_WRITE)) {
8961 vm_map_unlock(dst_map);
8962 return KERN_PROTECTION_FAILURE;
8963 }
8964
8965 /*
8966 * If the entry is in transition, we must wait
8967 * for it to exit that state. Anything could happen
8968 * when we unlock the map, so start over.
8969 */
8970 if (entry->in_transition) {
8971 /*
8972 * Say that we are waiting, and wait for entry.
8973 */
8974 entry->needs_wakeup = TRUE;
8975 vm_map_entry_wait(dst_map, THREAD_UNINT);
8976
8977 goto start_pass_1;
8978 }
8979
8980 /*
8981 * our range is contained completely within this map entry
8982 */
8983 if (dst_end <= entry->vme_end) {
8984 vm_map_unlock(dst_map);
8985 return KERN_SUCCESS;
8986 }
8987 /*
8988 * check that range specified is contiguous region
8989 */
8990 if ((next == vm_map_to_entry(dst_map)) ||
8991 (next->vme_start != entry->vme_end)) {
8992 vm_map_unlock(dst_map);
8993 return KERN_INVALID_ADDRESS;
8994 }
8995
8996 /*
8997 * Check for permanent objects in the destination.
8998 */
8999 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9000 ((!VME_OBJECT(entry)->internal) ||
9001 (VME_OBJECT(entry)->true_share))) {
9002 if (encountered_sub_map) {
9003 vm_map_unlock(dst_map);
9004 return KERN_FAILURE;
9005 }
9006 }
9007
9008
9009 entry = next;
9010 }/* for */
9011 vm_map_unlock(dst_map);
9012 return KERN_SUCCESS;
9013 }
9014
9015 /*
9016 * Routine: vm_map_copy_overwrite
9017 *
9018 * Description:
9019 * Copy the memory described by the map copy
9020 * object (copy; returned by vm_map_copyin) onto
9021 * the specified destination region (dst_map, dst_addr).
9022 * The destination must be writeable.
9023 *
9024 * Unlike vm_map_copyout, this routine actually
9025 * writes over previously-mapped memory. If the
9026 * previous mapping was to a permanent (user-supplied)
9027 * memory object, it is preserved.
9028 *
9029 * The attributes (protection and inheritance) of the
9030 * destination region are preserved.
9031 *
9032 * If successful, consumes the copy object.
9033 * Otherwise, the caller is responsible for it.
9034 *
9035 * Implementation notes:
9036 * To overwrite aligned temporary virtual memory, it is
9037 * sufficient to remove the previous mapping and insert
9038 * the new copy. This replacement is done either on
9039 * the whole region (if no permanent virtual memory
9040 * objects are embedded in the destination region) or
9041 * in individual map entries.
9042 *
9043 * To overwrite permanent virtual memory , it is necessary
9044 * to copy each page, as the external memory management
9045 * interface currently does not provide any optimizations.
9046 *
9047 * Unaligned memory also has to be copied. It is possible
9048 * to use 'vm_trickery' to copy the aligned data. This is
9049 * not done but not hard to implement.
9050 *
9051 * Once a page of permanent memory has been overwritten,
9052 * it is impossible to interrupt this function; otherwise,
9053 * the call would be neither atomic nor location-independent.
9054 * The kernel-state portion of a user thread must be
9055 * interruptible.
9056 *
9057 * It may be expensive to forward all requests that might
9058 * overwrite permanent memory (vm_write, vm_copy) to
9059 * uninterruptible kernel threads. This routine may be
9060 * called by interruptible threads; however, success is
9061 * not guaranteed -- if the request cannot be performed
9062 * atomically and interruptibly, an error indication is
9063 * returned.
9064 */
9065
9066 static kern_return_t
9067 vm_map_copy_overwrite_nested(
9068 vm_map_t dst_map,
9069 vm_map_address_t dst_addr,
9070 vm_map_copy_t copy,
9071 boolean_t interruptible,
9072 pmap_t pmap,
9073 boolean_t discard_on_success)
9074 {
9075 vm_map_offset_t dst_end;
9076 vm_map_entry_t tmp_entry;
9077 vm_map_entry_t entry;
9078 kern_return_t kr;
9079 boolean_t aligned = TRUE;
9080 boolean_t contains_permanent_objects = FALSE;
9081 boolean_t encountered_sub_map = FALSE;
9082 vm_map_offset_t base_addr;
9083 vm_map_size_t copy_size;
9084 vm_map_size_t total_size;
9085 int copy_page_shift;
9086
9087
9088 /*
9089 * Check for null copy object.
9090 */
9091
9092 if (copy == VM_MAP_COPY_NULL) {
9093 return KERN_SUCCESS;
9094 }
9095
9096 /*
9097 * Assert that the vm_map_copy is coming from the right
9098 * zone and hasn't been forged
9099 */
9100 vm_map_copy_require(copy);
9101
9102 /*
9103 * Check for special kernel buffer allocated
9104 * by new_ipc_kmsg_copyin.
9105 */
9106
9107 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
9108 return vm_map_copyout_kernel_buffer(
9109 dst_map, &dst_addr,
9110 copy, copy->size, TRUE, discard_on_success);
9111 }
9112
9113 /*
9114 * Only works for entry lists at the moment. Will
9115 * support page lists later.
9116 */
9117
9118 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9119
9120 if (copy->size == 0) {
9121 if (discard_on_success) {
9122 vm_map_copy_discard(copy);
9123 }
9124 return KERN_SUCCESS;
9125 }
9126
9127 copy_page_shift = copy->cpy_hdr.page_shift;
9128
9129 /*
9130 * Verify that the destination is all writeable
9131 * initially. We have to trunc the destination
9132 * address and round the copy size or we'll end up
9133 * splitting entries in strange ways.
9134 */
9135
9136 if (!VM_MAP_PAGE_ALIGNED(copy->size,
9137 VM_MAP_PAGE_MASK(dst_map)) ||
9138 !VM_MAP_PAGE_ALIGNED(copy->offset,
9139 VM_MAP_PAGE_MASK(dst_map)) ||
9140 !VM_MAP_PAGE_ALIGNED(dst_addr,
9141 VM_MAP_PAGE_MASK(dst_map)) ||
9142 copy_page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
9143 aligned = FALSE;
9144 dst_end = vm_map_round_page(dst_addr + copy->size,
9145 VM_MAP_PAGE_MASK(dst_map));
9146 } else {
9147 dst_end = dst_addr + copy->size;
9148 }
9149
9150 vm_map_lock(dst_map);
9151
9152 /* LP64todo - remove this check when vm_map_commpage64()
9153 * no longer has to stuff in a map_entry for the commpage
9154 * above the map's max_offset.
9155 */
9156 if (dst_addr >= dst_map->max_offset) {
9157 vm_map_unlock(dst_map);
9158 return KERN_INVALID_ADDRESS;
9159 }
9160
9161 start_pass_1:
9162 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
9163 vm_map_unlock(dst_map);
9164 return KERN_INVALID_ADDRESS;
9165 }
9166 vm_map_clip_start(dst_map,
9167 tmp_entry,
9168 vm_map_trunc_page(dst_addr,
9169 VM_MAP_PAGE_MASK(dst_map)));
9170 for (entry = tmp_entry;;) {
9171 vm_map_entry_t next = entry->vme_next;
9172
9173 while (entry->is_sub_map) {
9174 vm_map_offset_t sub_start;
9175 vm_map_offset_t sub_end;
9176 vm_map_offset_t local_end;
9177
9178 if (entry->in_transition) {
9179 /*
9180 * Say that we are waiting, and wait for entry.
9181 */
9182 entry->needs_wakeup = TRUE;
9183 vm_map_entry_wait(dst_map, THREAD_UNINT);
9184
9185 goto start_pass_1;
9186 }
9187
9188 local_end = entry->vme_end;
9189 if (!(entry->needs_copy)) {
9190 /* if needs_copy we are a COW submap */
9191 /* in such a case we just replace so */
9192 /* there is no need for the follow- */
9193 /* ing check. */
9194 encountered_sub_map = TRUE;
9195 sub_start = VME_OFFSET(entry);
9196
9197 if (entry->vme_end < dst_end) {
9198 sub_end = entry->vme_end;
9199 } else {
9200 sub_end = dst_end;
9201 }
9202 sub_end -= entry->vme_start;
9203 sub_end += VME_OFFSET(entry);
9204 vm_map_unlock(dst_map);
9205
9206 kr = vm_map_overwrite_submap_recurse(
9207 VME_SUBMAP(entry),
9208 sub_start,
9209 sub_end - sub_start);
9210 if (kr != KERN_SUCCESS) {
9211 return kr;
9212 }
9213 vm_map_lock(dst_map);
9214 }
9215
9216 if (dst_end <= entry->vme_end) {
9217 goto start_overwrite;
9218 }
9219 if (!vm_map_lookup_entry(dst_map, local_end,
9220 &entry)) {
9221 vm_map_unlock(dst_map);
9222 return KERN_INVALID_ADDRESS;
9223 }
9224 next = entry->vme_next;
9225 }
9226
9227 if (!(entry->protection & VM_PROT_WRITE)) {
9228 vm_map_unlock(dst_map);
9229 return KERN_PROTECTION_FAILURE;
9230 }
9231
9232 /*
9233 * If the entry is in transition, we must wait
9234 * for it to exit that state. Anything could happen
9235 * when we unlock the map, so start over.
9236 */
9237 if (entry->in_transition) {
9238 /*
9239 * Say that we are waiting, and wait for entry.
9240 */
9241 entry->needs_wakeup = TRUE;
9242 vm_map_entry_wait(dst_map, THREAD_UNINT);
9243
9244 goto start_pass_1;
9245 }
9246
9247 /*
9248 * our range is contained completely within this map entry
9249 */
9250 if (dst_end <= entry->vme_end) {
9251 break;
9252 }
9253 /*
9254 * check that range specified is contiguous region
9255 */
9256 if ((next == vm_map_to_entry(dst_map)) ||
9257 (next->vme_start != entry->vme_end)) {
9258 vm_map_unlock(dst_map);
9259 return KERN_INVALID_ADDRESS;
9260 }
9261
9262
9263 /*
9264 * Check for permanent objects in the destination.
9265 */
9266 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9267 ((!VME_OBJECT(entry)->internal) ||
9268 (VME_OBJECT(entry)->true_share))) {
9269 contains_permanent_objects = TRUE;
9270 }
9271
9272 entry = next;
9273 }/* for */
9274
9275 start_overwrite:
9276 /*
9277 * If there are permanent objects in the destination, then
9278 * the copy cannot be interrupted.
9279 */
9280
9281 if (interruptible && contains_permanent_objects) {
9282 vm_map_unlock(dst_map);
9283 return KERN_FAILURE; /* XXX */
9284 }
9285
9286 /*
9287 *
9288 * Make a second pass, overwriting the data
9289 * At the beginning of each loop iteration,
9290 * the next entry to be overwritten is "tmp_entry"
9291 * (initially, the value returned from the lookup above),
9292 * and the starting address expected in that entry
9293 * is "start".
9294 */
9295
9296 total_size = copy->size;
9297 if (encountered_sub_map) {
9298 copy_size = 0;
9299 /* re-calculate tmp_entry since we've had the map */
9300 /* unlocked */
9301 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
9302 vm_map_unlock(dst_map);
9303 return KERN_INVALID_ADDRESS;
9304 }
9305 } else {
9306 copy_size = copy->size;
9307 }
9308
9309 base_addr = dst_addr;
9310 while (TRUE) {
9311 /* deconstruct the copy object and do in parts */
9312 /* only in sub_map, interruptable case */
9313 vm_map_entry_t copy_entry;
9314 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
9315 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
9316 int nentries;
9317 int remaining_entries = 0;
9318 vm_map_offset_t new_offset = 0;
9319
9320 for (entry = tmp_entry; copy_size == 0;) {
9321 vm_map_entry_t next;
9322
9323 next = entry->vme_next;
9324
9325 /* tmp_entry and base address are moved along */
9326 /* each time we encounter a sub-map. Otherwise */
9327 /* entry can outpase tmp_entry, and the copy_size */
9328 /* may reflect the distance between them */
9329 /* if the current entry is found to be in transition */
9330 /* we will start over at the beginning or the last */
9331 /* encounter of a submap as dictated by base_addr */
9332 /* we will zero copy_size accordingly. */
9333 if (entry->in_transition) {
9334 /*
9335 * Say that we are waiting, and wait for entry.
9336 */
9337 entry->needs_wakeup = TRUE;
9338 vm_map_entry_wait(dst_map, THREAD_UNINT);
9339
9340 if (!vm_map_lookup_entry(dst_map, base_addr,
9341 &tmp_entry)) {
9342 vm_map_unlock(dst_map);
9343 return KERN_INVALID_ADDRESS;
9344 }
9345 copy_size = 0;
9346 entry = tmp_entry;
9347 continue;
9348 }
9349 if (entry->is_sub_map) {
9350 vm_map_offset_t sub_start;
9351 vm_map_offset_t sub_end;
9352 vm_map_offset_t local_end;
9353
9354 if (entry->needs_copy) {
9355 /* if this is a COW submap */
9356 /* just back the range with a */
9357 /* anonymous entry */
9358 if (entry->vme_end < dst_end) {
9359 sub_end = entry->vme_end;
9360 } else {
9361 sub_end = dst_end;
9362 }
9363 if (entry->vme_start < base_addr) {
9364 sub_start = base_addr;
9365 } else {
9366 sub_start = entry->vme_start;
9367 }
9368 vm_map_clip_end(
9369 dst_map, entry, sub_end);
9370 vm_map_clip_start(
9371 dst_map, entry, sub_start);
9372 assert(!entry->use_pmap);
9373 assert(!entry->iokit_acct);
9374 entry->use_pmap = TRUE;
9375 entry->is_sub_map = FALSE;
9376 vm_map_deallocate(
9377 VME_SUBMAP(entry));
9378 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
9379 VME_OFFSET_SET(entry, 0);
9380 entry->is_shared = FALSE;
9381 entry->needs_copy = FALSE;
9382 entry->protection = VM_PROT_DEFAULT;
9383 entry->max_protection = VM_PROT_ALL;
9384 entry->wired_count = 0;
9385 entry->user_wired_count = 0;
9386 if (entry->inheritance
9387 == VM_INHERIT_SHARE) {
9388 entry->inheritance = VM_INHERIT_COPY;
9389 }
9390 continue;
9391 }
9392 /* first take care of any non-sub_map */
9393 /* entries to send */
9394 if (base_addr < entry->vme_start) {
9395 /* stuff to send */
9396 copy_size =
9397 entry->vme_start - base_addr;
9398 break;
9399 }
9400 sub_start = VME_OFFSET(entry);
9401
9402 if (entry->vme_end < dst_end) {
9403 sub_end = entry->vme_end;
9404 } else {
9405 sub_end = dst_end;
9406 }
9407 sub_end -= entry->vme_start;
9408 sub_end += VME_OFFSET(entry);
9409 local_end = entry->vme_end;
9410 vm_map_unlock(dst_map);
9411 copy_size = sub_end - sub_start;
9412
9413 /* adjust the copy object */
9414 if (total_size > copy_size) {
9415 vm_map_size_t local_size = 0;
9416 vm_map_size_t entry_size;
9417
9418 nentries = 1;
9419 new_offset = copy->offset;
9420 copy_entry = vm_map_copy_first_entry(copy);
9421 while (copy_entry !=
9422 vm_map_copy_to_entry(copy)) {
9423 entry_size = copy_entry->vme_end -
9424 copy_entry->vme_start;
9425 if ((local_size < copy_size) &&
9426 ((local_size + entry_size)
9427 >= copy_size)) {
9428 vm_map_copy_clip_end(copy,
9429 copy_entry,
9430 copy_entry->vme_start +
9431 (copy_size - local_size));
9432 entry_size = copy_entry->vme_end -
9433 copy_entry->vme_start;
9434 local_size += entry_size;
9435 new_offset += entry_size;
9436 }
9437 if (local_size >= copy_size) {
9438 next_copy = copy_entry->vme_next;
9439 copy_entry->vme_next =
9440 vm_map_copy_to_entry(copy);
9441 previous_prev =
9442 copy->cpy_hdr.links.prev;
9443 copy->cpy_hdr.links.prev = copy_entry;
9444 copy->size = copy_size;
9445 remaining_entries =
9446 copy->cpy_hdr.nentries;
9447 remaining_entries -= nentries;
9448 copy->cpy_hdr.nentries = nentries;
9449 break;
9450 } else {
9451 local_size += entry_size;
9452 new_offset += entry_size;
9453 nentries++;
9454 }
9455 copy_entry = copy_entry->vme_next;
9456 }
9457 }
9458
9459 if ((entry->use_pmap) && (pmap == NULL)) {
9460 kr = vm_map_copy_overwrite_nested(
9461 VME_SUBMAP(entry),
9462 sub_start,
9463 copy,
9464 interruptible,
9465 VME_SUBMAP(entry)->pmap,
9466 TRUE);
9467 } else if (pmap != NULL) {
9468 kr = vm_map_copy_overwrite_nested(
9469 VME_SUBMAP(entry),
9470 sub_start,
9471 copy,
9472 interruptible, pmap,
9473 TRUE);
9474 } else {
9475 kr = vm_map_copy_overwrite_nested(
9476 VME_SUBMAP(entry),
9477 sub_start,
9478 copy,
9479 interruptible,
9480 dst_map->pmap,
9481 TRUE);
9482 }
9483 if (kr != KERN_SUCCESS) {
9484 if (next_copy != NULL) {
9485 copy->cpy_hdr.nentries +=
9486 remaining_entries;
9487 copy->cpy_hdr.links.prev->vme_next =
9488 next_copy;
9489 copy->cpy_hdr.links.prev
9490 = previous_prev;
9491 copy->size = total_size;
9492 }
9493 return kr;
9494 }
9495 if (dst_end <= local_end) {
9496 return KERN_SUCCESS;
9497 }
9498 /* otherwise copy no longer exists, it was */
9499 /* destroyed after successful copy_overwrite */
9500 copy = vm_map_copy_allocate();
9501 copy->type = VM_MAP_COPY_ENTRY_LIST;
9502 copy->offset = new_offset;
9503 copy->cpy_hdr.page_shift = copy_page_shift;
9504
9505 /*
9506 * XXX FBDP
9507 * this does not seem to deal with
9508 * the VM map store (R&B tree)
9509 */
9510
9511 total_size -= copy_size;
9512 copy_size = 0;
9513 /* put back remainder of copy in container */
9514 if (next_copy != NULL) {
9515 copy->cpy_hdr.nentries = remaining_entries;
9516 copy->cpy_hdr.links.next = next_copy;
9517 copy->cpy_hdr.links.prev = previous_prev;
9518 copy->size = total_size;
9519 next_copy->vme_prev =
9520 vm_map_copy_to_entry(copy);
9521 next_copy = NULL;
9522 }
9523 base_addr = local_end;
9524 vm_map_lock(dst_map);
9525 if (!vm_map_lookup_entry(dst_map,
9526 local_end, &tmp_entry)) {
9527 vm_map_unlock(dst_map);
9528 return KERN_INVALID_ADDRESS;
9529 }
9530 entry = tmp_entry;
9531 continue;
9532 }
9533 if (dst_end <= entry->vme_end) {
9534 copy_size = dst_end - base_addr;
9535 break;
9536 }
9537
9538 if ((next == vm_map_to_entry(dst_map)) ||
9539 (next->vme_start != entry->vme_end)) {
9540 vm_map_unlock(dst_map);
9541 return KERN_INVALID_ADDRESS;
9542 }
9543
9544 entry = next;
9545 }/* for */
9546
9547 next_copy = NULL;
9548 nentries = 1;
9549
9550 /* adjust the copy object */
9551 if (total_size > copy_size) {
9552 vm_map_size_t local_size = 0;
9553 vm_map_size_t entry_size;
9554
9555 new_offset = copy->offset;
9556 copy_entry = vm_map_copy_first_entry(copy);
9557 while (copy_entry != vm_map_copy_to_entry(copy)) {
9558 entry_size = copy_entry->vme_end -
9559 copy_entry->vme_start;
9560 if ((local_size < copy_size) &&
9561 ((local_size + entry_size)
9562 >= copy_size)) {
9563 vm_map_copy_clip_end(copy, copy_entry,
9564 copy_entry->vme_start +
9565 (copy_size - local_size));
9566 entry_size = copy_entry->vme_end -
9567 copy_entry->vme_start;
9568 local_size += entry_size;
9569 new_offset += entry_size;
9570 }
9571 if (local_size >= copy_size) {
9572 next_copy = copy_entry->vme_next;
9573 copy_entry->vme_next =
9574 vm_map_copy_to_entry(copy);
9575 previous_prev =
9576 copy->cpy_hdr.links.prev;
9577 copy->cpy_hdr.links.prev = copy_entry;
9578 copy->size = copy_size;
9579 remaining_entries =
9580 copy->cpy_hdr.nentries;
9581 remaining_entries -= nentries;
9582 copy->cpy_hdr.nentries = nentries;
9583 break;
9584 } else {
9585 local_size += entry_size;
9586 new_offset += entry_size;
9587 nentries++;
9588 }
9589 copy_entry = copy_entry->vme_next;
9590 }
9591 }
9592
9593 if (aligned) {
9594 pmap_t local_pmap;
9595
9596 if (pmap) {
9597 local_pmap = pmap;
9598 } else {
9599 local_pmap = dst_map->pmap;
9600 }
9601
9602 if ((kr = vm_map_copy_overwrite_aligned(
9603 dst_map, tmp_entry, copy,
9604 base_addr, local_pmap)) != KERN_SUCCESS) {
9605 if (next_copy != NULL) {
9606 copy->cpy_hdr.nentries +=
9607 remaining_entries;
9608 copy->cpy_hdr.links.prev->vme_next =
9609 next_copy;
9610 copy->cpy_hdr.links.prev =
9611 previous_prev;
9612 copy->size += copy_size;
9613 }
9614 return kr;
9615 }
9616 vm_map_unlock(dst_map);
9617 } else {
9618 /*
9619 * Performance gain:
9620 *
9621 * if the copy and dst address are misaligned but the same
9622 * offset within the page we can copy_not_aligned the
9623 * misaligned parts and copy aligned the rest. If they are
9624 * aligned but len is unaligned we simply need to copy
9625 * the end bit unaligned. We'll need to split the misaligned
9626 * bits of the region in this case !
9627 */
9628 /* ALWAYS UNLOCKS THE dst_map MAP */
9629 kr = vm_map_copy_overwrite_unaligned(
9630 dst_map,
9631 tmp_entry,
9632 copy,
9633 base_addr,
9634 discard_on_success);
9635 if (kr != KERN_SUCCESS) {
9636 if (next_copy != NULL) {
9637 copy->cpy_hdr.nentries +=
9638 remaining_entries;
9639 copy->cpy_hdr.links.prev->vme_next =
9640 next_copy;
9641 copy->cpy_hdr.links.prev =
9642 previous_prev;
9643 copy->size += copy_size;
9644 }
9645 return kr;
9646 }
9647 }
9648 total_size -= copy_size;
9649 if (total_size == 0) {
9650 break;
9651 }
9652 base_addr += copy_size;
9653 copy_size = 0;
9654 copy->offset = new_offset;
9655 if (next_copy != NULL) {
9656 copy->cpy_hdr.nentries = remaining_entries;
9657 copy->cpy_hdr.links.next = next_copy;
9658 copy->cpy_hdr.links.prev = previous_prev;
9659 next_copy->vme_prev = vm_map_copy_to_entry(copy);
9660 copy->size = total_size;
9661 }
9662 vm_map_lock(dst_map);
9663 while (TRUE) {
9664 if (!vm_map_lookup_entry(dst_map,
9665 base_addr, &tmp_entry)) {
9666 vm_map_unlock(dst_map);
9667 return KERN_INVALID_ADDRESS;
9668 }
9669 if (tmp_entry->in_transition) {
9670 entry->needs_wakeup = TRUE;
9671 vm_map_entry_wait(dst_map, THREAD_UNINT);
9672 } else {
9673 break;
9674 }
9675 }
9676 vm_map_clip_start(dst_map,
9677 tmp_entry,
9678 vm_map_trunc_page(base_addr,
9679 VM_MAP_PAGE_MASK(dst_map)));
9680
9681 entry = tmp_entry;
9682 } /* while */
9683
9684 /*
9685 * Throw away the vm_map_copy object
9686 */
9687 if (discard_on_success) {
9688 vm_map_copy_discard(copy);
9689 }
9690
9691 return KERN_SUCCESS;
9692 }/* vm_map_copy_overwrite */
9693
9694 kern_return_t
9695 vm_map_copy_overwrite(
9696 vm_map_t dst_map,
9697 vm_map_offset_t dst_addr,
9698 vm_map_copy_t copy,
9699 vm_map_size_t copy_size,
9700 boolean_t interruptible)
9701 {
9702 vm_map_size_t head_size, tail_size;
9703 vm_map_copy_t head_copy, tail_copy;
9704 vm_map_offset_t head_addr, tail_addr;
9705 vm_map_entry_t entry;
9706 kern_return_t kr;
9707 vm_map_offset_t effective_page_mask, effective_page_size;
9708 int copy_page_shift;
9709
9710 head_size = 0;
9711 tail_size = 0;
9712 head_copy = NULL;
9713 tail_copy = NULL;
9714 head_addr = 0;
9715 tail_addr = 0;
9716
9717 if (interruptible ||
9718 copy == VM_MAP_COPY_NULL ||
9719 copy->type != VM_MAP_COPY_ENTRY_LIST) {
9720 /*
9721 * We can't split the "copy" map if we're interruptible
9722 * or if we don't have a "copy" map...
9723 */
9724 blunt_copy:
9725 return vm_map_copy_overwrite_nested(dst_map,
9726 dst_addr,
9727 copy,
9728 interruptible,
9729 (pmap_t) NULL,
9730 TRUE);
9731 }
9732
9733 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy);
9734 if (copy_page_shift < PAGE_SHIFT ||
9735 VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9736 goto blunt_copy;
9737 }
9738
9739 if (VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9740 effective_page_mask = VM_MAP_PAGE_MASK(dst_map);
9741 } else {
9742 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9743 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9744 effective_page_mask);
9745 }
9746 effective_page_size = effective_page_mask + 1;
9747
9748 if (copy_size < VM_MAP_COPY_OVERWRITE_OPTIMIZATION_THRESHOLD_PAGES * effective_page_size) {
9749 /*
9750 * Too small to bother with optimizing...
9751 */
9752 goto blunt_copy;
9753 }
9754
9755 if ((dst_addr & effective_page_mask) !=
9756 (copy->offset & effective_page_mask)) {
9757 /*
9758 * Incompatible mis-alignment of source and destination...
9759 */
9760 goto blunt_copy;
9761 }
9762
9763 /*
9764 * Proper alignment or identical mis-alignment at the beginning.
9765 * Let's try and do a small unaligned copy first (if needed)
9766 * and then an aligned copy for the rest.
9767 */
9768 if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9769 head_addr = dst_addr;
9770 head_size = (effective_page_size -
9771 (copy->offset & effective_page_mask));
9772 head_size = MIN(head_size, copy_size);
9773 }
9774 if (!vm_map_page_aligned(copy->offset + copy_size,
9775 effective_page_mask)) {
9776 /*
9777 * Mis-alignment at the end.
9778 * Do an aligned copy up to the last page and
9779 * then an unaligned copy for the remaining bytes.
9780 */
9781 tail_size = ((copy->offset + copy_size) &
9782 effective_page_mask);
9783 tail_size = MIN(tail_size, copy_size);
9784 tail_addr = dst_addr + copy_size - tail_size;
9785 assert(tail_addr >= head_addr + head_size);
9786 }
9787 assert(head_size + tail_size <= copy_size);
9788
9789 if (head_size + tail_size == copy_size) {
9790 /*
9791 * It's all unaligned, no optimization possible...
9792 */
9793 goto blunt_copy;
9794 }
9795
9796 /*
9797 * Can't optimize if there are any submaps in the
9798 * destination due to the way we free the "copy" map
9799 * progressively in vm_map_copy_overwrite_nested()
9800 * in that case.
9801 */
9802 vm_map_lock_read(dst_map);
9803 if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
9804 vm_map_unlock_read(dst_map);
9805 goto blunt_copy;
9806 }
9807 for (;
9808 (entry != vm_map_copy_to_entry(copy) &&
9809 entry->vme_start < dst_addr + copy_size);
9810 entry = entry->vme_next) {
9811 if (entry->is_sub_map) {
9812 vm_map_unlock_read(dst_map);
9813 goto blunt_copy;
9814 }
9815 }
9816 vm_map_unlock_read(dst_map);
9817
9818 if (head_size) {
9819 /*
9820 * Unaligned copy of the first "head_size" bytes, to reach
9821 * a page boundary.
9822 */
9823
9824 /*
9825 * Extract "head_copy" out of "copy".
9826 */
9827 head_copy = vm_map_copy_allocate();
9828 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
9829 head_copy->cpy_hdr.entries_pageable =
9830 copy->cpy_hdr.entries_pageable;
9831 vm_map_store_init(&head_copy->cpy_hdr);
9832 head_copy->cpy_hdr.page_shift = copy_page_shift;
9833
9834 entry = vm_map_copy_first_entry(copy);
9835 if (entry->vme_end < copy->offset + head_size) {
9836 head_size = entry->vme_end - copy->offset;
9837 }
9838
9839 head_copy->offset = copy->offset;
9840 head_copy->size = head_size;
9841 copy->offset += head_size;
9842 copy->size -= head_size;
9843 copy_size -= head_size;
9844 assert(copy_size > 0);
9845
9846 vm_map_copy_clip_end(copy, entry, copy->offset);
9847 vm_map_copy_entry_unlink(copy, entry);
9848 vm_map_copy_entry_link(head_copy,
9849 vm_map_copy_to_entry(head_copy),
9850 entry);
9851
9852 /*
9853 * Do the unaligned copy.
9854 */
9855 kr = vm_map_copy_overwrite_nested(dst_map,
9856 head_addr,
9857 head_copy,
9858 interruptible,
9859 (pmap_t) NULL,
9860 FALSE);
9861 if (kr != KERN_SUCCESS) {
9862 goto done;
9863 }
9864 }
9865
9866 if (tail_size) {
9867 /*
9868 * Extract "tail_copy" out of "copy".
9869 */
9870 tail_copy = vm_map_copy_allocate();
9871 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
9872 tail_copy->cpy_hdr.entries_pageable =
9873 copy->cpy_hdr.entries_pageable;
9874 vm_map_store_init(&tail_copy->cpy_hdr);
9875 tail_copy->cpy_hdr.page_shift = copy_page_shift;
9876
9877 tail_copy->offset = copy->offset + copy_size - tail_size;
9878 tail_copy->size = tail_size;
9879
9880 copy->size -= tail_size;
9881 copy_size -= tail_size;
9882 assert(copy_size > 0);
9883
9884 entry = vm_map_copy_last_entry(copy);
9885 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9886 entry = vm_map_copy_last_entry(copy);
9887 vm_map_copy_entry_unlink(copy, entry);
9888 vm_map_copy_entry_link(tail_copy,
9889 vm_map_copy_last_entry(tail_copy),
9890 entry);
9891 }
9892
9893 /*
9894 * If we are here from ipc_kmsg_copyout_ool_descriptor(),
9895 * we want to avoid TOCTOU issues w.r.t copy->size but
9896 * we don't need to change vm_map_copy_overwrite_nested()
9897 * and all other vm_map_copy_overwrite variants.
9898 *
9899 * So we assign the original copy_size that was passed into
9900 * this routine back to copy.
9901 *
9902 * This use of local 'copy_size' passed into this routine is
9903 * to try and protect against TOCTOU attacks where the kernel
9904 * has been exploited. We don't expect this to be an issue
9905 * during normal system operation.
9906 */
9907 assertf(copy->size == copy_size,
9908 "Mismatch of copy sizes. Expected 0x%llx, Got 0x%llx\n", (uint64_t) copy_size, (uint64_t) copy->size);
9909 copy->size = copy_size;
9910
9911 /*
9912 * Copy most (or possibly all) of the data.
9913 */
9914 kr = vm_map_copy_overwrite_nested(dst_map,
9915 dst_addr + head_size,
9916 copy,
9917 interruptible,
9918 (pmap_t) NULL,
9919 FALSE);
9920 if (kr != KERN_SUCCESS) {
9921 goto done;
9922 }
9923
9924 if (tail_size) {
9925 kr = vm_map_copy_overwrite_nested(dst_map,
9926 tail_addr,
9927 tail_copy,
9928 interruptible,
9929 (pmap_t) NULL,
9930 FALSE);
9931 }
9932
9933 done:
9934 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9935 if (kr == KERN_SUCCESS) {
9936 /*
9937 * Discard all the copy maps.
9938 */
9939 if (head_copy) {
9940 vm_map_copy_discard(head_copy);
9941 head_copy = NULL;
9942 }
9943 vm_map_copy_discard(copy);
9944 if (tail_copy) {
9945 vm_map_copy_discard(tail_copy);
9946 tail_copy = NULL;
9947 }
9948 } else {
9949 /*
9950 * Re-assemble the original copy map.
9951 */
9952 if (head_copy) {
9953 entry = vm_map_copy_first_entry(head_copy);
9954 vm_map_copy_entry_unlink(head_copy, entry);
9955 vm_map_copy_entry_link(copy,
9956 vm_map_copy_to_entry(copy),
9957 entry);
9958 copy->offset -= head_size;
9959 copy->size += head_size;
9960 vm_map_copy_discard(head_copy);
9961 head_copy = NULL;
9962 }
9963 if (tail_copy) {
9964 entry = vm_map_copy_last_entry(tail_copy);
9965 vm_map_copy_entry_unlink(tail_copy, entry);
9966 vm_map_copy_entry_link(copy,
9967 vm_map_copy_last_entry(copy),
9968 entry);
9969 copy->size += tail_size;
9970 vm_map_copy_discard(tail_copy);
9971 tail_copy = NULL;
9972 }
9973 }
9974 return kr;
9975 }
9976
9977
9978 /*
9979 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
9980 *
9981 * Decription:
9982 * Physically copy unaligned data
9983 *
9984 * Implementation:
9985 * Unaligned parts of pages have to be physically copied. We use
9986 * a modified form of vm_fault_copy (which understands none-aligned
9987 * page offsets and sizes) to do the copy. We attempt to copy as
9988 * much memory in one go as possibly, however vm_fault_copy copies
9989 * within 1 memory object so we have to find the smaller of "amount left"
9990 * "source object data size" and "target object data size". With
9991 * unaligned data we don't need to split regions, therefore the source
9992 * (copy) object should be one map entry, the target range may be split
9993 * over multiple map entries however. In any event we are pessimistic
9994 * about these assumptions.
9995 *
9996 * Assumptions:
9997 * dst_map is locked on entry and is return locked on success,
9998 * unlocked on error.
9999 */
10000
10001 static kern_return_t
10002 vm_map_copy_overwrite_unaligned(
10003 vm_map_t dst_map,
10004 vm_map_entry_t entry,
10005 vm_map_copy_t copy,
10006 vm_map_offset_t start,
10007 boolean_t discard_on_success)
10008 {
10009 vm_map_entry_t copy_entry;
10010 vm_map_entry_t copy_entry_next;
10011 vm_map_version_t version;
10012 vm_object_t dst_object;
10013 vm_object_offset_t dst_offset;
10014 vm_object_offset_t src_offset;
10015 vm_object_offset_t entry_offset;
10016 vm_map_offset_t entry_end;
10017 vm_map_size_t src_size,
10018 dst_size,
10019 copy_size,
10020 amount_left;
10021 kern_return_t kr = KERN_SUCCESS;
10022
10023
10024 copy_entry = vm_map_copy_first_entry(copy);
10025
10026 vm_map_lock_write_to_read(dst_map);
10027
10028 src_offset = copy->offset - trunc_page_mask_64(copy->offset, VM_MAP_COPY_PAGE_MASK(copy));
10029 amount_left = copy->size;
10030 /*
10031 * unaligned so we never clipped this entry, we need the offset into
10032 * the vm_object not just the data.
10033 */
10034 while (amount_left > 0) {
10035 if (entry == vm_map_to_entry(dst_map)) {
10036 vm_map_unlock_read(dst_map);
10037 return KERN_INVALID_ADDRESS;
10038 }
10039
10040 /* "start" must be within the current map entry */
10041 assert((start >= entry->vme_start) && (start < entry->vme_end));
10042
10043 dst_offset = start - entry->vme_start;
10044
10045 dst_size = entry->vme_end - start;
10046
10047 src_size = copy_entry->vme_end -
10048 (copy_entry->vme_start + src_offset);
10049
10050 if (dst_size < src_size) {
10051 /*
10052 * we can only copy dst_size bytes before
10053 * we have to get the next destination entry
10054 */
10055 copy_size = dst_size;
10056 } else {
10057 /*
10058 * we can only copy src_size bytes before
10059 * we have to get the next source copy entry
10060 */
10061 copy_size = src_size;
10062 }
10063
10064 if (copy_size > amount_left) {
10065 copy_size = amount_left;
10066 }
10067 /*
10068 * Entry needs copy, create a shadow shadow object for
10069 * Copy on write region.
10070 */
10071 if (entry->needs_copy &&
10072 ((entry->protection & VM_PROT_WRITE) != 0)) {
10073 if (vm_map_lock_read_to_write(dst_map)) {
10074 vm_map_lock_read(dst_map);
10075 goto RetryLookup;
10076 }
10077 VME_OBJECT_SHADOW(entry,
10078 (vm_map_size_t)(entry->vme_end
10079 - entry->vme_start));
10080 entry->needs_copy = FALSE;
10081 vm_map_lock_write_to_read(dst_map);
10082 }
10083 dst_object = VME_OBJECT(entry);
10084 /*
10085 * unlike with the virtual (aligned) copy we're going
10086 * to fault on it therefore we need a target object.
10087 */
10088 if (dst_object == VM_OBJECT_NULL) {
10089 if (vm_map_lock_read_to_write(dst_map)) {
10090 vm_map_lock_read(dst_map);
10091 goto RetryLookup;
10092 }
10093 dst_object = vm_object_allocate((vm_map_size_t)
10094 entry->vme_end - entry->vme_start);
10095 VME_OBJECT_SET(entry, dst_object);
10096 VME_OFFSET_SET(entry, 0);
10097 assert(entry->use_pmap);
10098 vm_map_lock_write_to_read(dst_map);
10099 }
10100 /*
10101 * Take an object reference and unlock map. The "entry" may
10102 * disappear or change when the map is unlocked.
10103 */
10104 vm_object_reference(dst_object);
10105 version.main_timestamp = dst_map->timestamp;
10106 entry_offset = VME_OFFSET(entry);
10107 entry_end = entry->vme_end;
10108 vm_map_unlock_read(dst_map);
10109 /*
10110 * Copy as much as possible in one pass
10111 */
10112 kr = vm_fault_copy(
10113 VME_OBJECT(copy_entry),
10114 VME_OFFSET(copy_entry) + src_offset,
10115 &copy_size,
10116 dst_object,
10117 entry_offset + dst_offset,
10118 dst_map,
10119 &version,
10120 THREAD_UNINT );
10121
10122 start += copy_size;
10123 src_offset += copy_size;
10124 amount_left -= copy_size;
10125 /*
10126 * Release the object reference
10127 */
10128 vm_object_deallocate(dst_object);
10129 /*
10130 * If a hard error occurred, return it now
10131 */
10132 if (kr != KERN_SUCCESS) {
10133 return kr;
10134 }
10135
10136 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
10137 || amount_left == 0) {
10138 /*
10139 * all done with this copy entry, dispose.
10140 */
10141 copy_entry_next = copy_entry->vme_next;
10142
10143 if (discard_on_success) {
10144 vm_map_copy_entry_unlink(copy, copy_entry);
10145 assert(!copy_entry->is_sub_map);
10146 vm_object_deallocate(VME_OBJECT(copy_entry));
10147 vm_map_copy_entry_dispose(copy, copy_entry);
10148 }
10149
10150 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
10151 amount_left) {
10152 /*
10153 * not finished copying but run out of source
10154 */
10155 return KERN_INVALID_ADDRESS;
10156 }
10157
10158 copy_entry = copy_entry_next;
10159
10160 src_offset = 0;
10161 }
10162
10163 if (amount_left == 0) {
10164 return KERN_SUCCESS;
10165 }
10166
10167 vm_map_lock_read(dst_map);
10168 if (version.main_timestamp == dst_map->timestamp) {
10169 if (start == entry_end) {
10170 /*
10171 * destination region is split. Use the version
10172 * information to avoid a lookup in the normal
10173 * case.
10174 */
10175 entry = entry->vme_next;
10176 /*
10177 * should be contiguous. Fail if we encounter
10178 * a hole in the destination.
10179 */
10180 if (start != entry->vme_start) {
10181 vm_map_unlock_read(dst_map);
10182 return KERN_INVALID_ADDRESS;
10183 }
10184 }
10185 } else {
10186 /*
10187 * Map version check failed.
10188 * we must lookup the entry because somebody
10189 * might have changed the map behind our backs.
10190 */
10191 RetryLookup:
10192 if (!vm_map_lookup_entry(dst_map, start, &entry)) {
10193 vm_map_unlock_read(dst_map);
10194 return KERN_INVALID_ADDRESS;
10195 }
10196 }
10197 }/* while */
10198
10199 return KERN_SUCCESS;
10200 }/* vm_map_copy_overwrite_unaligned */
10201
10202 /*
10203 * Routine: vm_map_copy_overwrite_aligned [internal use only]
10204 *
10205 * Description:
10206 * Does all the vm_trickery possible for whole pages.
10207 *
10208 * Implementation:
10209 *
10210 * If there are no permanent objects in the destination,
10211 * and the source and destination map entry zones match,
10212 * and the destination map entry is not shared,
10213 * then the map entries can be deleted and replaced
10214 * with those from the copy. The following code is the
10215 * basic idea of what to do, but there are lots of annoying
10216 * little details about getting protection and inheritance
10217 * right. Should add protection, inheritance, and sharing checks
10218 * to the above pass and make sure that no wiring is involved.
10219 */
10220
10221 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
10222 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
10223 int vm_map_copy_overwrite_aligned_src_large = 0;
10224
10225 static kern_return_t
10226 vm_map_copy_overwrite_aligned(
10227 vm_map_t dst_map,
10228 vm_map_entry_t tmp_entry,
10229 vm_map_copy_t copy,
10230 vm_map_offset_t start,
10231 __unused pmap_t pmap)
10232 {
10233 vm_object_t object;
10234 vm_map_entry_t copy_entry;
10235 vm_map_size_t copy_size;
10236 vm_map_size_t size;
10237 vm_map_entry_t entry;
10238
10239 while ((copy_entry = vm_map_copy_first_entry(copy))
10240 != vm_map_copy_to_entry(copy)) {
10241 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
10242
10243 entry = tmp_entry;
10244 if (entry->is_sub_map) {
10245 /* unnested when clipped earlier */
10246 assert(!entry->use_pmap);
10247 }
10248 if (entry == vm_map_to_entry(dst_map)) {
10249 vm_map_unlock(dst_map);
10250 return KERN_INVALID_ADDRESS;
10251 }
10252 size = (entry->vme_end - entry->vme_start);
10253 /*
10254 * Make sure that no holes popped up in the
10255 * address map, and that the protection is
10256 * still valid, in case the map was unlocked
10257 * earlier.
10258 */
10259
10260 if ((entry->vme_start != start) || ((entry->is_sub_map)
10261 && !entry->needs_copy)) {
10262 vm_map_unlock(dst_map);
10263 return KERN_INVALID_ADDRESS;
10264 }
10265 assert(entry != vm_map_to_entry(dst_map));
10266
10267 /*
10268 * Check protection again
10269 */
10270
10271 if (!(entry->protection & VM_PROT_WRITE)) {
10272 vm_map_unlock(dst_map);
10273 return KERN_PROTECTION_FAILURE;
10274 }
10275
10276 /*
10277 * Adjust to source size first
10278 */
10279
10280 if (copy_size < size) {
10281 if (entry->map_aligned &&
10282 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
10283 VM_MAP_PAGE_MASK(dst_map))) {
10284 /* no longer map-aligned */
10285 entry->map_aligned = FALSE;
10286 }
10287 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
10288 size = copy_size;
10289 }
10290
10291 /*
10292 * Adjust to destination size
10293 */
10294
10295 if (size < copy_size) {
10296 vm_map_copy_clip_end(copy, copy_entry,
10297 copy_entry->vme_start + size);
10298 copy_size = size;
10299 }
10300
10301 assert((entry->vme_end - entry->vme_start) == size);
10302 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
10303 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
10304
10305 /*
10306 * If the destination contains temporary unshared memory,
10307 * we can perform the copy by throwing it away and
10308 * installing the source data.
10309 */
10310
10311 object = VME_OBJECT(entry);
10312 if ((!entry->is_shared &&
10313 ((object == VM_OBJECT_NULL) ||
10314 (object->internal && !object->true_share))) ||
10315 entry->needs_copy) {
10316 vm_object_t old_object = VME_OBJECT(entry);
10317 vm_object_offset_t old_offset = VME_OFFSET(entry);
10318 vm_object_offset_t offset;
10319
10320 /*
10321 * Ensure that the source and destination aren't
10322 * identical
10323 */
10324 if (old_object == VME_OBJECT(copy_entry) &&
10325 old_offset == VME_OFFSET(copy_entry)) {
10326 vm_map_copy_entry_unlink(copy, copy_entry);
10327 vm_map_copy_entry_dispose(copy, copy_entry);
10328
10329 if (old_object != VM_OBJECT_NULL) {
10330 vm_object_deallocate(old_object);
10331 }
10332
10333 start = tmp_entry->vme_end;
10334 tmp_entry = tmp_entry->vme_next;
10335 continue;
10336 }
10337
10338 #if XNU_TARGET_OS_OSX
10339 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
10340 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
10341 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
10342 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
10343 copy_size <= __TRADEOFF1_COPY_SIZE) {
10344 /*
10345 * Virtual vs. Physical copy tradeoff #1.
10346 *
10347 * Copying only a few pages out of a large
10348 * object: do a physical copy instead of
10349 * a virtual copy, to avoid possibly keeping
10350 * the entire large object alive because of
10351 * those few copy-on-write pages.
10352 */
10353 vm_map_copy_overwrite_aligned_src_large++;
10354 goto slow_copy;
10355 }
10356 #endif /* XNU_TARGET_OS_OSX */
10357
10358 if ((dst_map->pmap != kernel_pmap) &&
10359 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
10360 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
10361 vm_object_t new_object, new_shadow;
10362
10363 /*
10364 * We're about to map something over a mapping
10365 * established by malloc()...
10366 */
10367 new_object = VME_OBJECT(copy_entry);
10368 if (new_object != VM_OBJECT_NULL) {
10369 vm_object_lock_shared(new_object);
10370 }
10371 while (new_object != VM_OBJECT_NULL &&
10372 #if XNU_TARGET_OS_OSX
10373 !new_object->true_share &&
10374 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10375 #endif /* XNU_TARGET_OS_OSX */
10376 new_object->internal) {
10377 new_shadow = new_object->shadow;
10378 if (new_shadow == VM_OBJECT_NULL) {
10379 break;
10380 }
10381 vm_object_lock_shared(new_shadow);
10382 vm_object_unlock(new_object);
10383 new_object = new_shadow;
10384 }
10385 if (new_object != VM_OBJECT_NULL) {
10386 if (!new_object->internal) {
10387 /*
10388 * The new mapping is backed
10389 * by an external object. We
10390 * don't want malloc'ed memory
10391 * to be replaced with such a
10392 * non-anonymous mapping, so
10393 * let's go off the optimized
10394 * path...
10395 */
10396 vm_map_copy_overwrite_aligned_src_not_internal++;
10397 vm_object_unlock(new_object);
10398 goto slow_copy;
10399 }
10400 #if XNU_TARGET_OS_OSX
10401 if (new_object->true_share ||
10402 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
10403 /*
10404 * Same if there's a "true_share"
10405 * object in the shadow chain, or
10406 * an object with a non-default
10407 * (SYMMETRIC) copy strategy.
10408 */
10409 vm_map_copy_overwrite_aligned_src_not_symmetric++;
10410 vm_object_unlock(new_object);
10411 goto slow_copy;
10412 }
10413 #endif /* XNU_TARGET_OS_OSX */
10414 vm_object_unlock(new_object);
10415 }
10416 /*
10417 * The new mapping is still backed by
10418 * anonymous (internal) memory, so it's
10419 * OK to substitute it for the original
10420 * malloc() mapping.
10421 */
10422 }
10423
10424 if (old_object != VM_OBJECT_NULL) {
10425 if (entry->is_sub_map) {
10426 if (entry->use_pmap) {
10427 #ifndef NO_NESTED_PMAP
10428 pmap_unnest(dst_map->pmap,
10429 (addr64_t)entry->vme_start,
10430 entry->vme_end - entry->vme_start);
10431 #endif /* NO_NESTED_PMAP */
10432 if (dst_map->mapped_in_other_pmaps) {
10433 /* clean up parent */
10434 /* map/maps */
10435 vm_map_submap_pmap_clean(
10436 dst_map, entry->vme_start,
10437 entry->vme_end,
10438 VME_SUBMAP(entry),
10439 VME_OFFSET(entry));
10440 }
10441 } else {
10442 vm_map_submap_pmap_clean(
10443 dst_map, entry->vme_start,
10444 entry->vme_end,
10445 VME_SUBMAP(entry),
10446 VME_OFFSET(entry));
10447 }
10448 vm_map_deallocate(VME_SUBMAP(entry));
10449 } else {
10450 if (dst_map->mapped_in_other_pmaps) {
10451 vm_object_pmap_protect_options(
10452 VME_OBJECT(entry),
10453 VME_OFFSET(entry),
10454 entry->vme_end
10455 - entry->vme_start,
10456 PMAP_NULL,
10457 PAGE_SIZE,
10458 entry->vme_start,
10459 VM_PROT_NONE,
10460 PMAP_OPTIONS_REMOVE);
10461 } else {
10462 pmap_remove_options(
10463 dst_map->pmap,
10464 (addr64_t)(entry->vme_start),
10465 (addr64_t)(entry->vme_end),
10466 PMAP_OPTIONS_REMOVE);
10467 }
10468 vm_object_deallocate(old_object);
10469 }
10470 }
10471
10472 if (entry->iokit_acct) {
10473 /* keep using iokit accounting */
10474 entry->use_pmap = FALSE;
10475 } else {
10476 /* use pmap accounting */
10477 entry->use_pmap = TRUE;
10478 }
10479 entry->is_sub_map = FALSE;
10480 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
10481 object = VME_OBJECT(entry);
10482 entry->needs_copy = copy_entry->needs_copy;
10483 entry->wired_count = 0;
10484 entry->user_wired_count = 0;
10485 offset = VME_OFFSET(copy_entry);
10486 VME_OFFSET_SET(entry, offset);
10487
10488 vm_map_copy_entry_unlink(copy, copy_entry);
10489 vm_map_copy_entry_dispose(copy, copy_entry);
10490
10491 /*
10492 * we could try to push pages into the pmap at this point, BUT
10493 * this optimization only saved on average 2 us per page if ALL
10494 * the pages in the source were currently mapped
10495 * and ALL the pages in the dest were touched, if there were fewer
10496 * than 2/3 of the pages touched, this optimization actually cost more cycles
10497 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10498 */
10499
10500 /*
10501 * Set up for the next iteration. The map
10502 * has not been unlocked, so the next
10503 * address should be at the end of this
10504 * entry, and the next map entry should be
10505 * the one following it.
10506 */
10507
10508 start = tmp_entry->vme_end;
10509 tmp_entry = tmp_entry->vme_next;
10510 } else {
10511 vm_map_version_t version;
10512 vm_object_t dst_object;
10513 vm_object_offset_t dst_offset;
10514 kern_return_t r;
10515
10516 slow_copy:
10517 if (entry->needs_copy) {
10518 VME_OBJECT_SHADOW(entry,
10519 (entry->vme_end -
10520 entry->vme_start));
10521 entry->needs_copy = FALSE;
10522 }
10523
10524 dst_object = VME_OBJECT(entry);
10525 dst_offset = VME_OFFSET(entry);
10526
10527 /*
10528 * Take an object reference, and record
10529 * the map version information so that the
10530 * map can be safely unlocked.
10531 */
10532
10533 if (dst_object == VM_OBJECT_NULL) {
10534 /*
10535 * We would usually have just taken the
10536 * optimized path above if the destination
10537 * object has not been allocated yet. But we
10538 * now disable that optimization if the copy
10539 * entry's object is not backed by anonymous
10540 * memory to avoid replacing malloc'ed
10541 * (i.e. re-usable) anonymous memory with a
10542 * not-so-anonymous mapping.
10543 * So we have to handle this case here and
10544 * allocate a new VM object for this map entry.
10545 */
10546 dst_object = vm_object_allocate(
10547 entry->vme_end - entry->vme_start);
10548 dst_offset = 0;
10549 VME_OBJECT_SET(entry, dst_object);
10550 VME_OFFSET_SET(entry, dst_offset);
10551 assert(entry->use_pmap);
10552 }
10553
10554 vm_object_reference(dst_object);
10555
10556 /* account for unlock bumping up timestamp */
10557 version.main_timestamp = dst_map->timestamp + 1;
10558
10559 vm_map_unlock(dst_map);
10560
10561 /*
10562 * Copy as much as possible in one pass
10563 */
10564
10565 copy_size = size;
10566 r = vm_fault_copy(
10567 VME_OBJECT(copy_entry),
10568 VME_OFFSET(copy_entry),
10569 &copy_size,
10570 dst_object,
10571 dst_offset,
10572 dst_map,
10573 &version,
10574 THREAD_UNINT );
10575
10576 /*
10577 * Release the object reference
10578 */
10579
10580 vm_object_deallocate(dst_object);
10581
10582 /*
10583 * If a hard error occurred, return it now
10584 */
10585
10586 if (r != KERN_SUCCESS) {
10587 return r;
10588 }
10589
10590 if (copy_size != 0) {
10591 /*
10592 * Dispose of the copied region
10593 */
10594
10595 vm_map_copy_clip_end(copy, copy_entry,
10596 copy_entry->vme_start + copy_size);
10597 vm_map_copy_entry_unlink(copy, copy_entry);
10598 vm_object_deallocate(VME_OBJECT(copy_entry));
10599 vm_map_copy_entry_dispose(copy, copy_entry);
10600 }
10601
10602 /*
10603 * Pick up in the destination map where we left off.
10604 *
10605 * Use the version information to avoid a lookup
10606 * in the normal case.
10607 */
10608
10609 start += copy_size;
10610 vm_map_lock(dst_map);
10611 if (version.main_timestamp == dst_map->timestamp &&
10612 copy_size != 0) {
10613 /* We can safely use saved tmp_entry value */
10614
10615 if (tmp_entry->map_aligned &&
10616 !VM_MAP_PAGE_ALIGNED(
10617 start,
10618 VM_MAP_PAGE_MASK(dst_map))) {
10619 /* no longer map-aligned */
10620 tmp_entry->map_aligned = FALSE;
10621 }
10622 vm_map_clip_end(dst_map, tmp_entry, start);
10623 tmp_entry = tmp_entry->vme_next;
10624 } else {
10625 /* Must do lookup of tmp_entry */
10626
10627 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10628 vm_map_unlock(dst_map);
10629 return KERN_INVALID_ADDRESS;
10630 }
10631 if (tmp_entry->map_aligned &&
10632 !VM_MAP_PAGE_ALIGNED(
10633 start,
10634 VM_MAP_PAGE_MASK(dst_map))) {
10635 /* no longer map-aligned */
10636 tmp_entry->map_aligned = FALSE;
10637 }
10638 vm_map_clip_start(dst_map, tmp_entry, start);
10639 }
10640 }
10641 }/* while */
10642
10643 return KERN_SUCCESS;
10644 }/* vm_map_copy_overwrite_aligned */
10645
10646 /*
10647 * Routine: vm_map_copyin_kernel_buffer [internal use only]
10648 *
10649 * Description:
10650 * Copy in data to a kernel buffer from space in the
10651 * source map. The original space may be optionally
10652 * deallocated.
10653 *
10654 * If successful, returns a new copy object.
10655 */
10656 static kern_return_t
10657 vm_map_copyin_kernel_buffer(
10658 vm_map_t src_map,
10659 vm_map_offset_t src_addr,
10660 vm_map_size_t len,
10661 boolean_t src_destroy,
10662 vm_map_copy_t *copy_result)
10663 {
10664 kern_return_t kr;
10665 vm_map_copy_t copy;
10666
10667 if (len > msg_ool_size_small) {
10668 return KERN_INVALID_ARGUMENT;
10669 }
10670
10671 copy = zalloc_flags(vm_map_copy_zone, Z_WAITOK | Z_ZERO);
10672 if (copy == VM_MAP_COPY_NULL) {
10673 return KERN_RESOURCE_SHORTAGE;
10674 }
10675 copy->cpy_kdata = kheap_alloc(KHEAP_DATA_BUFFERS, len, Z_WAITOK);
10676 if (copy->cpy_kdata == NULL) {
10677 zfree(vm_map_copy_zone, copy);
10678 return KERN_RESOURCE_SHORTAGE;
10679 }
10680
10681 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10682 copy->size = len;
10683 copy->offset = 0;
10684
10685 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
10686 if (kr != KERN_SUCCESS) {
10687 kheap_free(KHEAP_DATA_BUFFERS, copy->cpy_kdata, len);
10688 zfree(vm_map_copy_zone, copy);
10689 return kr;
10690 }
10691 if (src_destroy) {
10692 (void) vm_map_remove(
10693 src_map,
10694 vm_map_trunc_page(src_addr,
10695 VM_MAP_PAGE_MASK(src_map)),
10696 vm_map_round_page(src_addr + len,
10697 VM_MAP_PAGE_MASK(src_map)),
10698 (VM_MAP_REMOVE_INTERRUPTIBLE |
10699 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10700 ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
10701 }
10702 *copy_result = copy;
10703 return KERN_SUCCESS;
10704 }
10705
10706 /*
10707 * Routine: vm_map_copyout_kernel_buffer [internal use only]
10708 *
10709 * Description:
10710 * Copy out data from a kernel buffer into space in the
10711 * destination map. The space may be otpionally dynamically
10712 * allocated.
10713 *
10714 * If successful, consumes the copy object.
10715 * Otherwise, the caller is responsible for it.
10716 */
10717 static int vm_map_copyout_kernel_buffer_failures = 0;
10718 static kern_return_t
10719 vm_map_copyout_kernel_buffer(
10720 vm_map_t map,
10721 vm_map_address_t *addr, /* IN/OUT */
10722 vm_map_copy_t copy,
10723 vm_map_size_t copy_size,
10724 boolean_t overwrite,
10725 boolean_t consume_on_success)
10726 {
10727 kern_return_t kr = KERN_SUCCESS;
10728 thread_t thread = current_thread();
10729
10730 assert(copy->size == copy_size);
10731
10732 /*
10733 * check for corrupted vm_map_copy structure
10734 */
10735 if (copy_size > msg_ool_size_small || copy->offset) {
10736 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10737 (long long)copy->size, (long long)copy->offset);
10738 }
10739
10740 if (!overwrite) {
10741 /*
10742 * Allocate space in the target map for the data
10743 */
10744 *addr = 0;
10745 kr = vm_map_enter(map,
10746 addr,
10747 vm_map_round_page(copy_size,
10748 VM_MAP_PAGE_MASK(map)),
10749 (vm_map_offset_t) 0,
10750 VM_FLAGS_ANYWHERE,
10751 VM_MAP_KERNEL_FLAGS_NONE,
10752 VM_KERN_MEMORY_NONE,
10753 VM_OBJECT_NULL,
10754 (vm_object_offset_t) 0,
10755 FALSE,
10756 VM_PROT_DEFAULT,
10757 VM_PROT_ALL,
10758 VM_INHERIT_DEFAULT);
10759 if (kr != KERN_SUCCESS) {
10760 return kr;
10761 }
10762 #if KASAN
10763 if (map->pmap == kernel_pmap) {
10764 kasan_notify_address(*addr, copy->size);
10765 }
10766 #endif
10767 }
10768
10769 /*
10770 * Copyout the data from the kernel buffer to the target map.
10771 */
10772 if (thread->map == map) {
10773 /*
10774 * If the target map is the current map, just do
10775 * the copy.
10776 */
10777 assert((vm_size_t)copy_size == copy_size);
10778 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10779 kr = KERN_INVALID_ADDRESS;
10780 }
10781 } else {
10782 vm_map_t oldmap;
10783
10784 /*
10785 * If the target map is another map, assume the
10786 * target's address space identity for the duration
10787 * of the copy.
10788 */
10789 vm_map_reference(map);
10790 oldmap = vm_map_switch(map);
10791
10792 assert((vm_size_t)copy_size == copy_size);
10793 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10794 vm_map_copyout_kernel_buffer_failures++;
10795 kr = KERN_INVALID_ADDRESS;
10796 }
10797
10798 (void) vm_map_switch(oldmap);
10799 vm_map_deallocate(map);
10800 }
10801
10802 if (kr != KERN_SUCCESS) {
10803 /* the copy failed, clean up */
10804 if (!overwrite) {
10805 /*
10806 * Deallocate the space we allocated in the target map.
10807 */
10808 (void) vm_map_remove(
10809 map,
10810 vm_map_trunc_page(*addr,
10811 VM_MAP_PAGE_MASK(map)),
10812 vm_map_round_page((*addr +
10813 vm_map_round_page(copy_size,
10814 VM_MAP_PAGE_MASK(map))),
10815 VM_MAP_PAGE_MASK(map)),
10816 VM_MAP_REMOVE_NO_FLAGS);
10817 *addr = 0;
10818 }
10819 } else {
10820 /* copy was successful, dicard the copy structure */
10821 if (consume_on_success) {
10822 kheap_free(KHEAP_DATA_BUFFERS, copy->cpy_kdata, copy_size);
10823 zfree(vm_map_copy_zone, copy);
10824 }
10825 }
10826
10827 return kr;
10828 }
10829
10830 /*
10831 * Routine: vm_map_copy_insert [internal use only]
10832 *
10833 * Description:
10834 * Link a copy chain ("copy") into a map at the
10835 * specified location (after "where").
10836 * Side effects:
10837 * The copy chain is destroyed.
10838 */
10839 static void
10840 vm_map_copy_insert(
10841 vm_map_t map,
10842 vm_map_entry_t after_where,
10843 vm_map_copy_t copy)
10844 {
10845 vm_map_entry_t entry;
10846
10847 while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10848 entry = vm_map_copy_first_entry(copy);
10849 vm_map_copy_entry_unlink(copy, entry);
10850 vm_map_store_entry_link(map, after_where, entry,
10851 VM_MAP_KERNEL_FLAGS_NONE);
10852 after_where = entry;
10853 }
10854 zfree(vm_map_copy_zone, copy);
10855 }
10856
10857 void
10858 vm_map_copy_remap(
10859 vm_map_t map,
10860 vm_map_entry_t where,
10861 vm_map_copy_t copy,
10862 vm_map_offset_t adjustment,
10863 vm_prot_t cur_prot,
10864 vm_prot_t max_prot,
10865 vm_inherit_t inheritance)
10866 {
10867 vm_map_entry_t copy_entry, new_entry;
10868
10869 for (copy_entry = vm_map_copy_first_entry(copy);
10870 copy_entry != vm_map_copy_to_entry(copy);
10871 copy_entry = copy_entry->vme_next) {
10872 /* get a new VM map entry for the map */
10873 new_entry = vm_map_entry_create(map,
10874 !map->hdr.entries_pageable);
10875 /* copy the "copy entry" to the new entry */
10876 vm_map_entry_copy(map, new_entry, copy_entry);
10877 /* adjust "start" and "end" */
10878 new_entry->vme_start += adjustment;
10879 new_entry->vme_end += adjustment;
10880 /* clear some attributes */
10881 new_entry->inheritance = inheritance;
10882 new_entry->protection = cur_prot;
10883 new_entry->max_protection = max_prot;
10884 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10885 /* take an extra reference on the entry's "object" */
10886 if (new_entry->is_sub_map) {
10887 assert(!new_entry->use_pmap); /* not nested */
10888 vm_map_lock(VME_SUBMAP(new_entry));
10889 vm_map_reference(VME_SUBMAP(new_entry));
10890 vm_map_unlock(VME_SUBMAP(new_entry));
10891 } else {
10892 vm_object_reference(VME_OBJECT(new_entry));
10893 }
10894 /* insert the new entry in the map */
10895 vm_map_store_entry_link(map, where, new_entry,
10896 VM_MAP_KERNEL_FLAGS_NONE);
10897 /* continue inserting the "copy entries" after the new entry */
10898 where = new_entry;
10899 }
10900 }
10901
10902
10903 /*
10904 * Returns true if *size matches (or is in the range of) copy->size.
10905 * Upon returning true, the *size field is updated with the actual size of the
10906 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10907 */
10908 boolean_t
10909 vm_map_copy_validate_size(
10910 vm_map_t dst_map,
10911 vm_map_copy_t copy,
10912 vm_map_size_t *size)
10913 {
10914 if (copy == VM_MAP_COPY_NULL) {
10915 return FALSE;
10916 }
10917 vm_map_size_t copy_sz = copy->size;
10918 vm_map_size_t sz = *size;
10919 switch (copy->type) {
10920 case VM_MAP_COPY_OBJECT:
10921 case VM_MAP_COPY_KERNEL_BUFFER:
10922 if (sz == copy_sz) {
10923 return TRUE;
10924 }
10925 break;
10926 case VM_MAP_COPY_ENTRY_LIST:
10927 /*
10928 * potential page-size rounding prevents us from exactly
10929 * validating this flavor of vm_map_copy, but we can at least
10930 * assert that it's within a range.
10931 */
10932 if (copy_sz >= sz &&
10933 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10934 *size = copy_sz;
10935 return TRUE;
10936 }
10937 break;
10938 default:
10939 break;
10940 }
10941 return FALSE;
10942 }
10943
10944 /*
10945 * Routine: vm_map_copyout_size
10946 *
10947 * Description:
10948 * Copy out a copy chain ("copy") into newly-allocated
10949 * space in the destination map. Uses a prevalidated
10950 * size for the copy object (vm_map_copy_validate_size).
10951 *
10952 * If successful, consumes the copy object.
10953 * Otherwise, the caller is responsible for it.
10954 */
10955 kern_return_t
10956 vm_map_copyout_size(
10957 vm_map_t dst_map,
10958 vm_map_address_t *dst_addr, /* OUT */
10959 vm_map_copy_t copy,
10960 vm_map_size_t copy_size)
10961 {
10962 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
10963 TRUE, /* consume_on_success */
10964 VM_PROT_DEFAULT,
10965 VM_PROT_ALL,
10966 VM_INHERIT_DEFAULT);
10967 }
10968
10969 /*
10970 * Routine: vm_map_copyout
10971 *
10972 * Description:
10973 * Copy out a copy chain ("copy") into newly-allocated
10974 * space in the destination map.
10975 *
10976 * If successful, consumes the copy object.
10977 * Otherwise, the caller is responsible for it.
10978 */
10979 kern_return_t
10980 vm_map_copyout(
10981 vm_map_t dst_map,
10982 vm_map_address_t *dst_addr, /* OUT */
10983 vm_map_copy_t copy)
10984 {
10985 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
10986 TRUE, /* consume_on_success */
10987 VM_PROT_DEFAULT,
10988 VM_PROT_ALL,
10989 VM_INHERIT_DEFAULT);
10990 }
10991
10992 kern_return_t
10993 vm_map_copyout_internal(
10994 vm_map_t dst_map,
10995 vm_map_address_t *dst_addr, /* OUT */
10996 vm_map_copy_t copy,
10997 vm_map_size_t copy_size,
10998 boolean_t consume_on_success,
10999 vm_prot_t cur_protection,
11000 vm_prot_t max_protection,
11001 vm_inherit_t inheritance)
11002 {
11003 vm_map_size_t size;
11004 vm_map_size_t adjustment;
11005 vm_map_offset_t start;
11006 vm_object_offset_t vm_copy_start;
11007 vm_map_entry_t last;
11008 vm_map_entry_t entry;
11009 vm_map_entry_t hole_entry;
11010 vm_map_copy_t original_copy;
11011
11012 /*
11013 * Check for null copy object.
11014 */
11015
11016 if (copy == VM_MAP_COPY_NULL) {
11017 *dst_addr = 0;
11018 return KERN_SUCCESS;
11019 }
11020
11021 /*
11022 * Assert that the vm_map_copy is coming from the right
11023 * zone and hasn't been forged
11024 */
11025 vm_map_copy_require(copy);
11026
11027 if (copy->size != copy_size) {
11028 *dst_addr = 0;
11029 return KERN_FAILURE;
11030 }
11031
11032 /*
11033 * Check for special copy object, created
11034 * by vm_map_copyin_object.
11035 */
11036
11037 if (copy->type == VM_MAP_COPY_OBJECT) {
11038 vm_object_t object = copy->cpy_object;
11039 kern_return_t kr;
11040 vm_object_offset_t offset;
11041
11042 offset = vm_object_trunc_page(copy->offset);
11043 size = vm_map_round_page((copy_size +
11044 (vm_map_size_t)(copy->offset -
11045 offset)),
11046 VM_MAP_PAGE_MASK(dst_map));
11047 *dst_addr = 0;
11048 kr = vm_map_enter(dst_map, dst_addr, size,
11049 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
11050 VM_MAP_KERNEL_FLAGS_NONE,
11051 VM_KERN_MEMORY_NONE,
11052 object, offset, FALSE,
11053 VM_PROT_DEFAULT, VM_PROT_ALL,
11054 VM_INHERIT_DEFAULT);
11055 if (kr != KERN_SUCCESS) {
11056 return kr;
11057 }
11058 /* Account for non-pagealigned copy object */
11059 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
11060 if (consume_on_success) {
11061 zfree(vm_map_copy_zone, copy);
11062 }
11063 return KERN_SUCCESS;
11064 }
11065
11066 /*
11067 * Check for special kernel buffer allocated
11068 * by new_ipc_kmsg_copyin.
11069 */
11070
11071 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
11072 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
11073 copy, copy_size, FALSE,
11074 consume_on_success);
11075 }
11076
11077 original_copy = copy;
11078 if (copy->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
11079 kern_return_t kr;
11080 vm_map_copy_t target_copy;
11081 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
11082
11083 target_copy = VM_MAP_COPY_NULL;
11084 DEBUG4K_ADJUST("adjusting...\n");
11085 kr = vm_map_copy_adjust_to_target(
11086 copy,
11087 0, /* offset */
11088 copy->size, /* size */
11089 dst_map,
11090 TRUE, /* copy */
11091 &target_copy,
11092 &overmap_start,
11093 &overmap_end,
11094 &trimmed_start);
11095 if (kr != KERN_SUCCESS) {
11096 DEBUG4K_COPY("adjust failed 0x%x\n", kr);
11097 return kr;
11098 }
11099 DEBUG4K_COPY("copy %p (%d 0x%llx 0x%llx) dst_map %p (%d) target_copy %p (%d 0x%llx 0x%llx) overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx\n", copy, copy->cpy_hdr.page_shift, copy->offset, (uint64_t)copy->size, dst_map, VM_MAP_PAGE_SHIFT(dst_map), target_copy, target_copy->cpy_hdr.page_shift, target_copy->offset, (uint64_t)target_copy->size, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start);
11100 if (target_copy != copy) {
11101 copy = target_copy;
11102 }
11103 copy_size = copy->size;
11104 }
11105
11106 /*
11107 * Find space for the data
11108 */
11109
11110 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
11111 VM_MAP_COPY_PAGE_MASK(copy));
11112 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
11113 VM_MAP_COPY_PAGE_MASK(copy))
11114 - vm_copy_start;
11115
11116
11117 StartAgain:;
11118
11119 vm_map_lock(dst_map);
11120 if (dst_map->disable_vmentry_reuse == TRUE) {
11121 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
11122 last = entry;
11123 } else {
11124 if (dst_map->holelistenabled) {
11125 hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
11126
11127 if (hole_entry == NULL) {
11128 /*
11129 * No more space in the map?
11130 */
11131 vm_map_unlock(dst_map);
11132 return KERN_NO_SPACE;
11133 }
11134
11135 last = hole_entry;
11136 start = last->vme_start;
11137 } else {
11138 assert(first_free_is_valid(dst_map));
11139 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
11140 vm_map_min(dst_map) : last->vme_end;
11141 }
11142 start = vm_map_round_page(start,
11143 VM_MAP_PAGE_MASK(dst_map));
11144 }
11145
11146 while (TRUE) {
11147 vm_map_entry_t next = last->vme_next;
11148 vm_map_offset_t end = start + size;
11149
11150 if ((end > dst_map->max_offset) || (end < start)) {
11151 if (dst_map->wait_for_space) {
11152 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
11153 assert_wait((event_t) dst_map,
11154 THREAD_INTERRUPTIBLE);
11155 vm_map_unlock(dst_map);
11156 thread_block(THREAD_CONTINUE_NULL);
11157 goto StartAgain;
11158 }
11159 }
11160 vm_map_unlock(dst_map);
11161 return KERN_NO_SPACE;
11162 }
11163
11164 if (dst_map->holelistenabled) {
11165 if (last->vme_end >= end) {
11166 break;
11167 }
11168 } else {
11169 /*
11170 * If there are no more entries, we must win.
11171 *
11172 * OR
11173 *
11174 * If there is another entry, it must be
11175 * after the end of the potential new region.
11176 */
11177
11178 if (next == vm_map_to_entry(dst_map)) {
11179 break;
11180 }
11181
11182 if (next->vme_start >= end) {
11183 break;
11184 }
11185 }
11186
11187 last = next;
11188
11189 if (dst_map->holelistenabled) {
11190 if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
11191 /*
11192 * Wrapped around
11193 */
11194 vm_map_unlock(dst_map);
11195 return KERN_NO_SPACE;
11196 }
11197 start = last->vme_start;
11198 } else {
11199 start = last->vme_end;
11200 }
11201 start = vm_map_round_page(start,
11202 VM_MAP_PAGE_MASK(dst_map));
11203 }
11204
11205 if (dst_map->holelistenabled) {
11206 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
11207 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
11208 }
11209 }
11210
11211
11212 adjustment = start - vm_copy_start;
11213 if (!consume_on_success) {
11214 /*
11215 * We're not allowed to consume "copy", so we'll have to
11216 * copy its map entries into the destination map below.
11217 * No need to re-allocate map entries from the correct
11218 * (pageable or not) zone, since we'll get new map entries
11219 * during the transfer.
11220 * We'll also adjust the map entries's "start" and "end"
11221 * during the transfer, to keep "copy"'s entries consistent
11222 * with its "offset".
11223 */
11224 goto after_adjustments;
11225 }
11226
11227 /*
11228 * Since we're going to just drop the map
11229 * entries from the copy into the destination
11230 * map, they must come from the same pool.
11231 */
11232
11233 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
11234 /*
11235 * Mismatches occur when dealing with the default
11236 * pager.
11237 */
11238 zone_t old_zone;
11239 vm_map_entry_t next, new;
11240
11241 /*
11242 * Find the zone that the copies were allocated from
11243 */
11244
11245 entry = vm_map_copy_first_entry(copy);
11246
11247 /*
11248 * Reinitialize the copy so that vm_map_copy_entry_link
11249 * will work.
11250 */
11251 vm_map_store_copy_reset(copy, entry);
11252 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
11253
11254 /*
11255 * Copy each entry.
11256 */
11257 while (entry != vm_map_copy_to_entry(copy)) {
11258 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11259 vm_map_entry_copy_full(new, entry);
11260 new->vme_no_copy_on_read = FALSE;
11261 assert(!new->iokit_acct);
11262 if (new->is_sub_map) {
11263 /* clr address space specifics */
11264 new->use_pmap = FALSE;
11265 }
11266 vm_map_copy_entry_link(copy,
11267 vm_map_copy_last_entry(copy),
11268 new);
11269 next = entry->vme_next;
11270 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
11271 zfree(old_zone, entry);
11272 entry = next;
11273 }
11274 }
11275
11276 /*
11277 * Adjust the addresses in the copy chain, and
11278 * reset the region attributes.
11279 */
11280
11281 for (entry = vm_map_copy_first_entry(copy);
11282 entry != vm_map_copy_to_entry(copy);
11283 entry = entry->vme_next) {
11284 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
11285 /*
11286 * We're injecting this copy entry into a map that
11287 * has the standard page alignment, so clear
11288 * "map_aligned" (which might have been inherited
11289 * from the original map entry).
11290 */
11291 entry->map_aligned = FALSE;
11292 }
11293
11294 entry->vme_start += adjustment;
11295 entry->vme_end += adjustment;
11296
11297 if (entry->map_aligned) {
11298 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
11299 VM_MAP_PAGE_MASK(dst_map)));
11300 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
11301 VM_MAP_PAGE_MASK(dst_map)));
11302 }
11303
11304 entry->inheritance = VM_INHERIT_DEFAULT;
11305 entry->protection = VM_PROT_DEFAULT;
11306 entry->max_protection = VM_PROT_ALL;
11307 entry->behavior = VM_BEHAVIOR_DEFAULT;
11308
11309 /*
11310 * If the entry is now wired,
11311 * map the pages into the destination map.
11312 */
11313 if (entry->wired_count != 0) {
11314 vm_map_offset_t va;
11315 vm_object_offset_t offset;
11316 vm_object_t object;
11317 vm_prot_t prot;
11318 int type_of_fault;
11319
11320 /* TODO4K would need to use actual page size */
11321 assert(VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT);
11322
11323 object = VME_OBJECT(entry);
11324 offset = VME_OFFSET(entry);
11325 va = entry->vme_start;
11326
11327 pmap_pageable(dst_map->pmap,
11328 entry->vme_start,
11329 entry->vme_end,
11330 TRUE);
11331
11332 while (va < entry->vme_end) {
11333 vm_page_t m;
11334 struct vm_object_fault_info fault_info = {};
11335
11336 /*
11337 * Look up the page in the object.
11338 * Assert that the page will be found in the
11339 * top object:
11340 * either
11341 * the object was newly created by
11342 * vm_object_copy_slowly, and has
11343 * copies of all of the pages from
11344 * the source object
11345 * or
11346 * the object was moved from the old
11347 * map entry; because the old map
11348 * entry was wired, all of the pages
11349 * were in the top-level object.
11350 * (XXX not true if we wire pages for
11351 * reading)
11352 */
11353 vm_object_lock(object);
11354
11355 m = vm_page_lookup(object, offset);
11356 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
11357 m->vmp_absent) {
11358 panic("vm_map_copyout: wiring %p", m);
11359 }
11360
11361 prot = entry->protection;
11362
11363 if (override_nx(dst_map, VME_ALIAS(entry)) &&
11364 prot) {
11365 prot |= VM_PROT_EXECUTE;
11366 }
11367
11368 type_of_fault = DBG_CACHE_HIT_FAULT;
11369
11370 fault_info.user_tag = VME_ALIAS(entry);
11371 fault_info.pmap_options = 0;
11372 if (entry->iokit_acct ||
11373 (!entry->is_sub_map && !entry->use_pmap)) {
11374 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11375 }
11376
11377 vm_fault_enter(m,
11378 dst_map->pmap,
11379 va,
11380 PAGE_SIZE, 0,
11381 prot,
11382 prot,
11383 VM_PAGE_WIRED(m),
11384 FALSE, /* change_wiring */
11385 VM_KERN_MEMORY_NONE, /* tag - not wiring */
11386 &fault_info,
11387 NULL, /* need_retry */
11388 &type_of_fault);
11389
11390 vm_object_unlock(object);
11391
11392 offset += PAGE_SIZE_64;
11393 va += PAGE_SIZE;
11394 }
11395 }
11396 }
11397
11398 after_adjustments:
11399
11400 /*
11401 * Correct the page alignment for the result
11402 */
11403
11404 *dst_addr = start + (copy->offset - vm_copy_start);
11405
11406 #if KASAN
11407 kasan_notify_address(*dst_addr, size);
11408 #endif
11409
11410 /*
11411 * Update the hints and the map size
11412 */
11413
11414 if (consume_on_success) {
11415 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
11416 } else {
11417 SAVE_HINT_MAP_WRITE(dst_map, last);
11418 }
11419
11420 dst_map->size += size;
11421
11422 /*
11423 * Link in the copy
11424 */
11425
11426 if (consume_on_success) {
11427 vm_map_copy_insert(dst_map, last, copy);
11428 if (copy != original_copy) {
11429 vm_map_copy_discard(original_copy);
11430 original_copy = VM_MAP_COPY_NULL;
11431 }
11432 } else {
11433 vm_map_copy_remap(dst_map, last, copy, adjustment,
11434 cur_protection, max_protection,
11435 inheritance);
11436 if (copy != original_copy && original_copy != VM_MAP_COPY_NULL) {
11437 vm_map_copy_discard(copy);
11438 copy = original_copy;
11439 }
11440 }
11441
11442
11443 vm_map_unlock(dst_map);
11444
11445 /*
11446 * XXX If wiring_required, call vm_map_pageable
11447 */
11448
11449 return KERN_SUCCESS;
11450 }
11451
11452 /*
11453 * Routine: vm_map_copyin
11454 *
11455 * Description:
11456 * see vm_map_copyin_common. Exported via Unsupported.exports.
11457 *
11458 */
11459
11460 #undef vm_map_copyin
11461
11462 kern_return_t
11463 vm_map_copyin(
11464 vm_map_t src_map,
11465 vm_map_address_t src_addr,
11466 vm_map_size_t len,
11467 boolean_t src_destroy,
11468 vm_map_copy_t *copy_result) /* OUT */
11469 {
11470 return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11471 FALSE, copy_result, FALSE);
11472 }
11473
11474 /*
11475 * Routine: vm_map_copyin_common
11476 *
11477 * Description:
11478 * Copy the specified region (src_addr, len) from the
11479 * source address space (src_map), possibly removing
11480 * the region from the source address space (src_destroy).
11481 *
11482 * Returns:
11483 * A vm_map_copy_t object (copy_result), suitable for
11484 * insertion into another address space (using vm_map_copyout),
11485 * copying over another address space region (using
11486 * vm_map_copy_overwrite). If the copy is unused, it
11487 * should be destroyed (using vm_map_copy_discard).
11488 *
11489 * In/out conditions:
11490 * The source map should not be locked on entry.
11491 */
11492
11493 typedef struct submap_map {
11494 vm_map_t parent_map;
11495 vm_map_offset_t base_start;
11496 vm_map_offset_t base_end;
11497 vm_map_size_t base_len;
11498 struct submap_map *next;
11499 } submap_map_t;
11500
11501 kern_return_t
11502 vm_map_copyin_common(
11503 vm_map_t src_map,
11504 vm_map_address_t src_addr,
11505 vm_map_size_t len,
11506 boolean_t src_destroy,
11507 __unused boolean_t src_volatile,
11508 vm_map_copy_t *copy_result, /* OUT */
11509 boolean_t use_maxprot)
11510 {
11511 int flags;
11512
11513 flags = 0;
11514 if (src_destroy) {
11515 flags |= VM_MAP_COPYIN_SRC_DESTROY;
11516 }
11517 if (use_maxprot) {
11518 flags |= VM_MAP_COPYIN_USE_MAXPROT;
11519 }
11520 return vm_map_copyin_internal(src_map,
11521 src_addr,
11522 len,
11523 flags,
11524 copy_result);
11525 }
11526 kern_return_t
11527 vm_map_copyin_internal(
11528 vm_map_t src_map,
11529 vm_map_address_t src_addr,
11530 vm_map_size_t len,
11531 int flags,
11532 vm_map_copy_t *copy_result) /* OUT */
11533 {
11534 vm_map_entry_t tmp_entry; /* Result of last map lookup --
11535 * in multi-level lookup, this
11536 * entry contains the actual
11537 * vm_object/offset.
11538 */
11539 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
11540
11541 vm_map_offset_t src_start; /* Start of current entry --
11542 * where copy is taking place now
11543 */
11544 vm_map_offset_t src_end; /* End of entire region to be
11545 * copied */
11546 vm_map_offset_t src_base;
11547 vm_map_t base_map = src_map;
11548 boolean_t map_share = FALSE;
11549 submap_map_t *parent_maps = NULL;
11550
11551 vm_map_copy_t copy; /* Resulting copy */
11552 vm_map_address_t copy_addr;
11553 vm_map_size_t copy_size;
11554 boolean_t src_destroy;
11555 boolean_t use_maxprot;
11556 boolean_t preserve_purgeable;
11557 boolean_t entry_was_shared;
11558 vm_map_entry_t saved_src_entry;
11559
11560 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11561 return KERN_INVALID_ARGUMENT;
11562 }
11563
11564 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11565 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
11566 preserve_purgeable =
11567 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
11568
11569 /*
11570 * Check for copies of zero bytes.
11571 */
11572
11573 if (len == 0) {
11574 *copy_result = VM_MAP_COPY_NULL;
11575 return KERN_SUCCESS;
11576 }
11577
11578 /*
11579 * Check that the end address doesn't overflow
11580 */
11581 src_end = src_addr + len;
11582 if (src_end < src_addr) {
11583 return KERN_INVALID_ADDRESS;
11584 }
11585
11586 /*
11587 * Compute (page aligned) start and end of region
11588 */
11589 src_start = vm_map_trunc_page(src_addr,
11590 VM_MAP_PAGE_MASK(src_map));
11591 src_end = vm_map_round_page(src_end,
11592 VM_MAP_PAGE_MASK(src_map));
11593
11594 /*
11595 * If the copy is sufficiently small, use a kernel buffer instead
11596 * of making a virtual copy. The theory being that the cost of
11597 * setting up VM (and taking C-O-W faults) dominates the copy costs
11598 * for small regions.
11599 */
11600 if ((len < msg_ool_size_small) &&
11601 !use_maxprot &&
11602 !preserve_purgeable &&
11603 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11604 /*
11605 * Since the "msg_ool_size_small" threshold was increased and
11606 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11607 * address space limits, we revert to doing a virtual copy if the
11608 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11609 * of the commpage would now fail when it used to work.
11610 */
11611 (src_start >= vm_map_min(src_map) &&
11612 src_start < vm_map_max(src_map) &&
11613 src_end >= vm_map_min(src_map) &&
11614 src_end < vm_map_max(src_map))) {
11615 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
11616 src_destroy, copy_result);
11617 }
11618
11619 /*
11620 * Allocate a header element for the list.
11621 *
11622 * Use the start and end in the header to
11623 * remember the endpoints prior to rounding.
11624 */
11625
11626 copy = vm_map_copy_allocate();
11627 copy->type = VM_MAP_COPY_ENTRY_LIST;
11628 copy->cpy_hdr.entries_pageable = TRUE;
11629 copy->cpy_hdr.page_shift = VM_MAP_PAGE_SHIFT(src_map);
11630
11631 vm_map_store_init( &(copy->cpy_hdr));
11632
11633 copy->offset = src_addr;
11634 copy->size = len;
11635
11636 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11637
11638 #define RETURN(x) \
11639 MACRO_BEGIN \
11640 vm_map_unlock(src_map); \
11641 if(src_map != base_map) \
11642 vm_map_deallocate(src_map); \
11643 if (new_entry != VM_MAP_ENTRY_NULL) \
11644 vm_map_copy_entry_dispose(copy,new_entry); \
11645 vm_map_copy_discard(copy); \
11646 { \
11647 submap_map_t *_ptr; \
11648 \
11649 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11650 parent_maps=parent_maps->next; \
11651 if (_ptr->parent_map != base_map) \
11652 vm_map_deallocate(_ptr->parent_map); \
11653 kfree(_ptr, sizeof(submap_map_t)); \
11654 } \
11655 } \
11656 MACRO_RETURN(x); \
11657 MACRO_END
11658
11659 /*
11660 * Find the beginning of the region.
11661 */
11662
11663 vm_map_lock(src_map);
11664
11665 /*
11666 * Lookup the original "src_addr" rather than the truncated
11667 * "src_start", in case "src_start" falls in a non-map-aligned
11668 * map entry *before* the map entry that contains "src_addr"...
11669 */
11670 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
11671 RETURN(KERN_INVALID_ADDRESS);
11672 }
11673 if (!tmp_entry->is_sub_map) {
11674 /*
11675 * ... but clip to the map-rounded "src_start" rather than
11676 * "src_addr" to preserve map-alignment. We'll adjust the
11677 * first copy entry at the end, if needed.
11678 */
11679 vm_map_clip_start(src_map, tmp_entry, src_start);
11680 }
11681 if (src_start < tmp_entry->vme_start) {
11682 /*
11683 * Move "src_start" up to the start of the
11684 * first map entry to copy.
11685 */
11686 src_start = tmp_entry->vme_start;
11687 }
11688 /* set for later submap fix-up */
11689 copy_addr = src_start;
11690
11691 /*
11692 * Go through entries until we get to the end.
11693 */
11694
11695 while (TRUE) {
11696 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
11697 vm_map_size_t src_size; /* Size of source
11698 * map entry (in both
11699 * maps)
11700 */
11701
11702 vm_object_t src_object; /* Object to copy */
11703 vm_object_offset_t src_offset;
11704
11705 boolean_t src_needs_copy; /* Should source map
11706 * be made read-only
11707 * for copy-on-write?
11708 */
11709
11710 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
11711
11712 boolean_t was_wired; /* Was source wired? */
11713 vm_map_version_t version; /* Version before locks
11714 * dropped to make copy
11715 */
11716 kern_return_t result; /* Return value from
11717 * copy_strategically.
11718 */
11719 while (tmp_entry->is_sub_map) {
11720 vm_map_size_t submap_len;
11721 submap_map_t *ptr;
11722
11723 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
11724 ptr->next = parent_maps;
11725 parent_maps = ptr;
11726 ptr->parent_map = src_map;
11727 ptr->base_start = src_start;
11728 ptr->base_end = src_end;
11729 submap_len = tmp_entry->vme_end - src_start;
11730 if (submap_len > (src_end - src_start)) {
11731 submap_len = src_end - src_start;
11732 }
11733 ptr->base_len = submap_len;
11734
11735 src_start -= tmp_entry->vme_start;
11736 src_start += VME_OFFSET(tmp_entry);
11737 src_end = src_start + submap_len;
11738 src_map = VME_SUBMAP(tmp_entry);
11739 vm_map_lock(src_map);
11740 /* keep an outstanding reference for all maps in */
11741 /* the parents tree except the base map */
11742 vm_map_reference(src_map);
11743 vm_map_unlock(ptr->parent_map);
11744 if (!vm_map_lookup_entry(
11745 src_map, src_start, &tmp_entry)) {
11746 RETURN(KERN_INVALID_ADDRESS);
11747 }
11748 map_share = TRUE;
11749 if (!tmp_entry->is_sub_map) {
11750 vm_map_clip_start(src_map, tmp_entry, src_start);
11751 }
11752 src_entry = tmp_entry;
11753 }
11754 /* we are now in the lowest level submap... */
11755
11756 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11757 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11758 /* This is not, supported for now.In future */
11759 /* we will need to detect the phys_contig */
11760 /* condition and then upgrade copy_slowly */
11761 /* to do physical copy from the device mem */
11762 /* based object. We can piggy-back off of */
11763 /* the was wired boolean to set-up the */
11764 /* proper handling */
11765 RETURN(KERN_PROTECTION_FAILURE);
11766 }
11767 /*
11768 * Create a new address map entry to hold the result.
11769 * Fill in the fields from the appropriate source entries.
11770 * We must unlock the source map to do this if we need
11771 * to allocate a map entry.
11772 */
11773 if (new_entry == VM_MAP_ENTRY_NULL) {
11774 version.main_timestamp = src_map->timestamp;
11775 vm_map_unlock(src_map);
11776
11777 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11778
11779 vm_map_lock(src_map);
11780 if ((version.main_timestamp + 1) != src_map->timestamp) {
11781 if (!vm_map_lookup_entry(src_map, src_start,
11782 &tmp_entry)) {
11783 RETURN(KERN_INVALID_ADDRESS);
11784 }
11785 if (!tmp_entry->is_sub_map) {
11786 vm_map_clip_start(src_map, tmp_entry, src_start);
11787 }
11788 continue; /* restart w/ new tmp_entry */
11789 }
11790 }
11791
11792 /*
11793 * Verify that the region can be read.
11794 */
11795 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11796 !use_maxprot) ||
11797 (src_entry->max_protection & VM_PROT_READ) == 0) {
11798 RETURN(KERN_PROTECTION_FAILURE);
11799 }
11800
11801 /*
11802 * Clip against the endpoints of the entire region.
11803 */
11804
11805 vm_map_clip_end(src_map, src_entry, src_end);
11806
11807 src_size = src_entry->vme_end - src_start;
11808 src_object = VME_OBJECT(src_entry);
11809 src_offset = VME_OFFSET(src_entry);
11810 was_wired = (src_entry->wired_count != 0);
11811
11812 vm_map_entry_copy(src_map, new_entry, src_entry);
11813 if (new_entry->is_sub_map) {
11814 /* clr address space specifics */
11815 new_entry->use_pmap = FALSE;
11816 } else {
11817 /*
11818 * We're dealing with a copy-on-write operation,
11819 * so the resulting mapping should not inherit the
11820 * original mapping's accounting settings.
11821 * "iokit_acct" should have been cleared in
11822 * vm_map_entry_copy().
11823 * "use_pmap" should be reset to its default (TRUE)
11824 * so that the new mapping gets accounted for in
11825 * the task's memory footprint.
11826 */
11827 assert(!new_entry->iokit_acct);
11828 new_entry->use_pmap = TRUE;
11829 }
11830
11831 /*
11832 * Attempt non-blocking copy-on-write optimizations.
11833 */
11834
11835 /*
11836 * If we are destroying the source, and the object
11837 * is internal, we could move the object reference
11838 * from the source to the copy. The copy is
11839 * copy-on-write only if the source is.
11840 * We make another reference to the object, because
11841 * destroying the source entry will deallocate it.
11842 *
11843 * This memory transfer has to be atomic, (to prevent
11844 * the VM object from being shared or copied while
11845 * it's being moved here), so we could only do this
11846 * if we won't have to unlock the VM map until the
11847 * original mapping has been fully removed.
11848 */
11849
11850 RestartCopy:
11851 if ((src_object == VM_OBJECT_NULL ||
11852 (!was_wired && !map_share && !tmp_entry->is_shared
11853 && !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT))) &&
11854 vm_object_copy_quickly(
11855 VME_OBJECT_PTR(new_entry),
11856 src_offset,
11857 src_size,
11858 &src_needs_copy,
11859 &new_entry_needs_copy)) {
11860 new_entry->needs_copy = new_entry_needs_copy;
11861
11862 /*
11863 * Handle copy-on-write obligations
11864 */
11865
11866 if (src_needs_copy && !tmp_entry->needs_copy) {
11867 vm_prot_t prot;
11868
11869 prot = src_entry->protection & ~VM_PROT_WRITE;
11870
11871 if (override_nx(src_map, VME_ALIAS(src_entry))
11872 && prot) {
11873 prot |= VM_PROT_EXECUTE;
11874 }
11875
11876 vm_object_pmap_protect(
11877 src_object,
11878 src_offset,
11879 src_size,
11880 (src_entry->is_shared ?
11881 PMAP_NULL
11882 : src_map->pmap),
11883 VM_MAP_PAGE_SIZE(src_map),
11884 src_entry->vme_start,
11885 prot);
11886
11887 assert(tmp_entry->wired_count == 0);
11888 tmp_entry->needs_copy = TRUE;
11889 }
11890
11891 /*
11892 * The map has never been unlocked, so it's safe
11893 * to move to the next entry rather than doing
11894 * another lookup.
11895 */
11896
11897 goto CopySuccessful;
11898 }
11899
11900 entry_was_shared = tmp_entry->is_shared;
11901
11902 /*
11903 * Take an object reference, so that we may
11904 * release the map lock(s).
11905 */
11906
11907 assert(src_object != VM_OBJECT_NULL);
11908 vm_object_reference(src_object);
11909
11910 /*
11911 * Record the timestamp for later verification.
11912 * Unlock the map.
11913 */
11914
11915 version.main_timestamp = src_map->timestamp;
11916 vm_map_unlock(src_map); /* Increments timestamp once! */
11917 saved_src_entry = src_entry;
11918 tmp_entry = VM_MAP_ENTRY_NULL;
11919 src_entry = VM_MAP_ENTRY_NULL;
11920
11921 /*
11922 * Perform the copy
11923 */
11924
11925 if (was_wired ||
11926 (debug4k_no_cow_copyin &&
11927 VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT)) {
11928 CopySlowly:
11929 vm_object_lock(src_object);
11930 result = vm_object_copy_slowly(
11931 src_object,
11932 src_offset,
11933 src_size,
11934 THREAD_UNINT,
11935 VME_OBJECT_PTR(new_entry));
11936 VME_OFFSET_SET(new_entry,
11937 src_offset - vm_object_trunc_page(src_offset));
11938 new_entry->needs_copy = FALSE;
11939 } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11940 (entry_was_shared || map_share)) {
11941 vm_object_t new_object;
11942
11943 vm_object_lock_shared(src_object);
11944 new_object = vm_object_copy_delayed(
11945 src_object,
11946 src_offset,
11947 src_size,
11948 TRUE);
11949 if (new_object == VM_OBJECT_NULL) {
11950 goto CopySlowly;
11951 }
11952
11953 VME_OBJECT_SET(new_entry, new_object);
11954 assert(new_entry->wired_count == 0);
11955 new_entry->needs_copy = TRUE;
11956 assert(!new_entry->iokit_acct);
11957 assert(new_object->purgable == VM_PURGABLE_DENY);
11958 assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
11959 result = KERN_SUCCESS;
11960 } else {
11961 vm_object_offset_t new_offset;
11962 new_offset = VME_OFFSET(new_entry);
11963 result = vm_object_copy_strategically(src_object,
11964 src_offset,
11965 src_size,
11966 VME_OBJECT_PTR(new_entry),
11967 &new_offset,
11968 &new_entry_needs_copy);
11969 if (new_offset != VME_OFFSET(new_entry)) {
11970 VME_OFFSET_SET(new_entry, new_offset);
11971 }
11972
11973 new_entry->needs_copy = new_entry_needs_copy;
11974 }
11975
11976 if (result == KERN_SUCCESS &&
11977 ((preserve_purgeable &&
11978 src_object->purgable != VM_PURGABLE_DENY) ||
11979 new_entry->used_for_jit)) {
11980 /*
11981 * Purgeable objects should be COPY_NONE, true share;
11982 * this should be propogated to the copy.
11983 *
11984 * Also force mappings the pmap specially protects to
11985 * be COPY_NONE; trying to COW these mappings would
11986 * change the effective protections, which could have
11987 * side effects if the pmap layer relies on the
11988 * specified protections.
11989 */
11990
11991 vm_object_t new_object;
11992
11993 new_object = VME_OBJECT(new_entry);
11994 assert(new_object != src_object);
11995 vm_object_lock(new_object);
11996 assert(new_object->ref_count == 1);
11997 assert(new_object->shadow == VM_OBJECT_NULL);
11998 assert(new_object->copy == VM_OBJECT_NULL);
11999 assert(new_object->vo_owner == NULL);
12000
12001 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
12002
12003 if (preserve_purgeable &&
12004 src_object->purgable != VM_PURGABLE_DENY) {
12005 new_object->true_share = TRUE;
12006
12007 /* start as non-volatile with no owner... */
12008 new_object->purgable = VM_PURGABLE_NONVOLATILE;
12009 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
12010 /* ... and move to src_object's purgeable state */
12011 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
12012 int state;
12013 state = src_object->purgable;
12014 vm_object_purgable_control(
12015 new_object,
12016 VM_PURGABLE_SET_STATE_FROM_KERNEL,
12017 &state);
12018 }
12019 /* no pmap accounting for purgeable objects */
12020 new_entry->use_pmap = FALSE;
12021 }
12022
12023 vm_object_unlock(new_object);
12024 new_object = VM_OBJECT_NULL;
12025 }
12026
12027 if (result != KERN_SUCCESS &&
12028 result != KERN_MEMORY_RESTART_COPY) {
12029 vm_map_lock(src_map);
12030 RETURN(result);
12031 }
12032
12033 /*
12034 * Throw away the extra reference
12035 */
12036
12037 vm_object_deallocate(src_object);
12038
12039 /*
12040 * Verify that the map has not substantially
12041 * changed while the copy was being made.
12042 */
12043
12044 vm_map_lock(src_map);
12045
12046 if ((version.main_timestamp + 1) == src_map->timestamp) {
12047 /* src_map hasn't changed: src_entry is still valid */
12048 src_entry = saved_src_entry;
12049 goto VerificationSuccessful;
12050 }
12051
12052 /*
12053 * Simple version comparison failed.
12054 *
12055 * Retry the lookup and verify that the
12056 * same object/offset are still present.
12057 *
12058 * [Note: a memory manager that colludes with
12059 * the calling task can detect that we have
12060 * cheated. While the map was unlocked, the
12061 * mapping could have been changed and restored.]
12062 */
12063
12064 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
12065 if (result != KERN_MEMORY_RESTART_COPY) {
12066 vm_object_deallocate(VME_OBJECT(new_entry));
12067 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
12068 /* reset accounting state */
12069 new_entry->iokit_acct = FALSE;
12070 new_entry->use_pmap = TRUE;
12071 }
12072 RETURN(KERN_INVALID_ADDRESS);
12073 }
12074
12075 src_entry = tmp_entry;
12076 vm_map_clip_start(src_map, src_entry, src_start);
12077
12078 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
12079 !use_maxprot) ||
12080 ((src_entry->max_protection & VM_PROT_READ) == 0)) {
12081 goto VerificationFailed;
12082 }
12083
12084 if (src_entry->vme_end < new_entry->vme_end) {
12085 /*
12086 * This entry might have been shortened
12087 * (vm_map_clip_end) or been replaced with
12088 * an entry that ends closer to "src_start"
12089 * than before.
12090 * Adjust "new_entry" accordingly; copying
12091 * less memory would be correct but we also
12092 * redo the copy (see below) if the new entry
12093 * no longer points at the same object/offset.
12094 */
12095 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
12096 VM_MAP_COPY_PAGE_MASK(copy)));
12097 new_entry->vme_end = src_entry->vme_end;
12098 src_size = new_entry->vme_end - src_start;
12099 } else if (src_entry->vme_end > new_entry->vme_end) {
12100 /*
12101 * This entry might have been extended
12102 * (vm_map_entry_simplify() or coalesce)
12103 * or been replaced with an entry that ends farther
12104 * from "src_start" than before.
12105 *
12106 * We've called vm_object_copy_*() only on
12107 * the previous <start:end> range, so we can't
12108 * just extend new_entry. We have to re-do
12109 * the copy based on the new entry as if it was
12110 * pointing at a different object/offset (see
12111 * "Verification failed" below).
12112 */
12113 }
12114
12115 if ((VME_OBJECT(src_entry) != src_object) ||
12116 (VME_OFFSET(src_entry) != src_offset) ||
12117 (src_entry->vme_end > new_entry->vme_end)) {
12118 /*
12119 * Verification failed.
12120 *
12121 * Start over with this top-level entry.
12122 */
12123
12124 VerificationFailed: ;
12125
12126 vm_object_deallocate(VME_OBJECT(new_entry));
12127 tmp_entry = src_entry;
12128 continue;
12129 }
12130
12131 /*
12132 * Verification succeeded.
12133 */
12134
12135 VerificationSuccessful:;
12136
12137 if (result == KERN_MEMORY_RESTART_COPY) {
12138 goto RestartCopy;
12139 }
12140
12141 /*
12142 * Copy succeeded.
12143 */
12144
12145 CopySuccessful: ;
12146
12147 /*
12148 * Link in the new copy entry.
12149 */
12150
12151 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
12152 new_entry);
12153
12154 /*
12155 * Determine whether the entire region
12156 * has been copied.
12157 */
12158 src_base = src_start;
12159 src_start = new_entry->vme_end;
12160 new_entry = VM_MAP_ENTRY_NULL;
12161 while ((src_start >= src_end) && (src_end != 0)) {
12162 submap_map_t *ptr;
12163
12164 if (src_map == base_map) {
12165 /* back to the top */
12166 break;
12167 }
12168
12169 ptr = parent_maps;
12170 assert(ptr != NULL);
12171 parent_maps = parent_maps->next;
12172
12173 /* fix up the damage we did in that submap */
12174 vm_map_simplify_range(src_map,
12175 src_base,
12176 src_end);
12177
12178 vm_map_unlock(src_map);
12179 vm_map_deallocate(src_map);
12180 vm_map_lock(ptr->parent_map);
12181 src_map = ptr->parent_map;
12182 src_base = ptr->base_start;
12183 src_start = ptr->base_start + ptr->base_len;
12184 src_end = ptr->base_end;
12185 if (!vm_map_lookup_entry(src_map,
12186 src_start,
12187 &tmp_entry) &&
12188 (src_end > src_start)) {
12189 RETURN(KERN_INVALID_ADDRESS);
12190 }
12191 kfree(ptr, sizeof(submap_map_t));
12192 if (parent_maps == NULL) {
12193 map_share = FALSE;
12194 }
12195 src_entry = tmp_entry->vme_prev;
12196 }
12197
12198 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
12199 (src_start >= src_addr + len) &&
12200 (src_addr + len != 0)) {
12201 /*
12202 * Stop copying now, even though we haven't reached
12203 * "src_end". We'll adjust the end of the last copy
12204 * entry at the end, if needed.
12205 *
12206 * If src_map's aligment is different from the
12207 * system's page-alignment, there could be
12208 * extra non-map-aligned map entries between
12209 * the original (non-rounded) "src_addr + len"
12210 * and the rounded "src_end".
12211 * We do not want to copy those map entries since
12212 * they're not part of the copied range.
12213 */
12214 break;
12215 }
12216
12217 if ((src_start >= src_end) && (src_end != 0)) {
12218 break;
12219 }
12220
12221 /*
12222 * Verify that there are no gaps in the region
12223 */
12224
12225 tmp_entry = src_entry->vme_next;
12226 if ((tmp_entry->vme_start != src_start) ||
12227 (tmp_entry == vm_map_to_entry(src_map))) {
12228 RETURN(KERN_INVALID_ADDRESS);
12229 }
12230 }
12231
12232 /*
12233 * If the source should be destroyed, do it now, since the
12234 * copy was successful.
12235 */
12236 if (src_destroy) {
12237 (void) vm_map_delete(
12238 src_map,
12239 vm_map_trunc_page(src_addr,
12240 VM_MAP_PAGE_MASK(src_map)),
12241 src_end,
12242 ((src_map == kernel_map) ?
12243 VM_MAP_REMOVE_KUNWIRE :
12244 VM_MAP_REMOVE_NO_FLAGS),
12245 VM_MAP_NULL);
12246 } else {
12247 /* fix up the damage we did in the base map */
12248 vm_map_simplify_range(
12249 src_map,
12250 vm_map_trunc_page(src_addr,
12251 VM_MAP_PAGE_MASK(src_map)),
12252 vm_map_round_page(src_end,
12253 VM_MAP_PAGE_MASK(src_map)));
12254 }
12255
12256 vm_map_unlock(src_map);
12257 tmp_entry = VM_MAP_ENTRY_NULL;
12258
12259 if (VM_MAP_PAGE_SHIFT(src_map) > PAGE_SHIFT &&
12260 VM_MAP_PAGE_SHIFT(src_map) != VM_MAP_COPY_PAGE_SHIFT(copy)) {
12261 vm_map_offset_t original_start, original_offset, original_end;
12262
12263 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
12264
12265 /* adjust alignment of first copy_entry's "vme_start" */
12266 tmp_entry = vm_map_copy_first_entry(copy);
12267 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12268 vm_map_offset_t adjustment;
12269
12270 original_start = tmp_entry->vme_start;
12271 original_offset = VME_OFFSET(tmp_entry);
12272
12273 /* map-align the start of the first copy entry... */
12274 adjustment = (tmp_entry->vme_start -
12275 vm_map_trunc_page(
12276 tmp_entry->vme_start,
12277 VM_MAP_PAGE_MASK(src_map)));
12278 tmp_entry->vme_start -= adjustment;
12279 VME_OFFSET_SET(tmp_entry,
12280 VME_OFFSET(tmp_entry) - adjustment);
12281 copy_addr -= adjustment;
12282 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12283 /* ... adjust for mis-aligned start of copy range */
12284 adjustment =
12285 (vm_map_trunc_page(copy->offset,
12286 PAGE_MASK) -
12287 vm_map_trunc_page(copy->offset,
12288 VM_MAP_PAGE_MASK(src_map)));
12289 if (adjustment) {
12290 assert(page_aligned(adjustment));
12291 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12292 tmp_entry->vme_start += adjustment;
12293 VME_OFFSET_SET(tmp_entry,
12294 (VME_OFFSET(tmp_entry) +
12295 adjustment));
12296 copy_addr += adjustment;
12297 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12298 }
12299
12300 /*
12301 * Assert that the adjustments haven't exposed
12302 * more than was originally copied...
12303 */
12304 assert(tmp_entry->vme_start >= original_start);
12305 assert(VME_OFFSET(tmp_entry) >= original_offset);
12306 /*
12307 * ... and that it did not adjust outside of a
12308 * a single 16K page.
12309 */
12310 assert(vm_map_trunc_page(tmp_entry->vme_start,
12311 VM_MAP_PAGE_MASK(src_map)) ==
12312 vm_map_trunc_page(original_start,
12313 VM_MAP_PAGE_MASK(src_map)));
12314 }
12315
12316 /* adjust alignment of last copy_entry's "vme_end" */
12317 tmp_entry = vm_map_copy_last_entry(copy);
12318 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12319 vm_map_offset_t adjustment;
12320
12321 original_end = tmp_entry->vme_end;
12322
12323 /* map-align the end of the last copy entry... */
12324 tmp_entry->vme_end =
12325 vm_map_round_page(tmp_entry->vme_end,
12326 VM_MAP_PAGE_MASK(src_map));
12327 /* ... adjust for mis-aligned end of copy range */
12328 adjustment =
12329 (vm_map_round_page((copy->offset +
12330 copy->size),
12331 VM_MAP_PAGE_MASK(src_map)) -
12332 vm_map_round_page((copy->offset +
12333 copy->size),
12334 PAGE_MASK));
12335 if (adjustment) {
12336 assert(page_aligned(adjustment));
12337 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12338 tmp_entry->vme_end -= adjustment;
12339 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12340 }
12341
12342 /*
12343 * Assert that the adjustments haven't exposed
12344 * more than was originally copied...
12345 */
12346 assert(tmp_entry->vme_end <= original_end);
12347 /*
12348 * ... and that it did not adjust outside of a
12349 * a single 16K page.
12350 */
12351 assert(vm_map_round_page(tmp_entry->vme_end,
12352 VM_MAP_PAGE_MASK(src_map)) ==
12353 vm_map_round_page(original_end,
12354 VM_MAP_PAGE_MASK(src_map)));
12355 }
12356 }
12357
12358 /* Fix-up start and end points in copy. This is necessary */
12359 /* when the various entries in the copy object were picked */
12360 /* up from different sub-maps */
12361
12362 tmp_entry = vm_map_copy_first_entry(copy);
12363 copy_size = 0; /* compute actual size */
12364 while (tmp_entry != vm_map_copy_to_entry(copy)) {
12365 assert(VM_MAP_PAGE_ALIGNED(
12366 copy_addr + (tmp_entry->vme_end -
12367 tmp_entry->vme_start),
12368 MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12369 assert(VM_MAP_PAGE_ALIGNED(
12370 copy_addr,
12371 MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12372
12373 /*
12374 * The copy_entries will be injected directly into the
12375 * destination map and might not be "map aligned" there...
12376 */
12377 tmp_entry->map_aligned = FALSE;
12378
12379 tmp_entry->vme_end = copy_addr +
12380 (tmp_entry->vme_end - tmp_entry->vme_start);
12381 tmp_entry->vme_start = copy_addr;
12382 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12383 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
12384 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
12385 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
12386 }
12387
12388 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
12389 copy_size < copy->size) {
12390 /*
12391 * The actual size of the VM map copy is smaller than what
12392 * was requested by the caller. This must be because some
12393 * PAGE_SIZE-sized pages are missing at the end of the last
12394 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
12395 * The caller might not have been aware of those missing
12396 * pages and might not want to be aware of it, which is
12397 * fine as long as they don't try to access (and crash on)
12398 * those missing pages.
12399 * Let's adjust the size of the "copy", to avoid failing
12400 * in vm_map_copyout() or vm_map_copy_overwrite().
12401 */
12402 assert(vm_map_round_page(copy_size,
12403 VM_MAP_PAGE_MASK(src_map)) ==
12404 vm_map_round_page(copy->size,
12405 VM_MAP_PAGE_MASK(src_map)));
12406 copy->size = copy_size;
12407 }
12408
12409 *copy_result = copy;
12410 return KERN_SUCCESS;
12411
12412 #undef RETURN
12413 }
12414
12415 kern_return_t
12416 vm_map_copy_extract(
12417 vm_map_t src_map,
12418 vm_map_address_t src_addr,
12419 vm_map_size_t len,
12420 vm_prot_t required_prot,
12421 boolean_t do_copy,
12422 vm_map_copy_t *copy_result, /* OUT */
12423 vm_prot_t *cur_prot, /* OUT */
12424 vm_prot_t *max_prot, /* OUT */
12425 vm_inherit_t inheritance,
12426 vm_map_kernel_flags_t vmk_flags)
12427 {
12428 vm_map_copy_t copy;
12429 kern_return_t kr;
12430
12431 /*
12432 * Check for copies of zero bytes.
12433 */
12434
12435 if (len == 0) {
12436 *copy_result = VM_MAP_COPY_NULL;
12437 return KERN_SUCCESS;
12438 }
12439
12440 /*
12441 * Check that the end address doesn't overflow
12442 */
12443 if (src_addr + len < src_addr) {
12444 return KERN_INVALID_ADDRESS;
12445 }
12446
12447 if (VM_MAP_PAGE_SIZE(src_map) < PAGE_SIZE) {
12448 DEBUG4K_SHARE("src_map %p src_addr 0x%llx src_end 0x%llx\n", src_map, (uint64_t)src_addr, (uint64_t)(src_addr + len));
12449 }
12450
12451 /*
12452 * Allocate a header element for the list.
12453 *
12454 * Use the start and end in the header to
12455 * remember the endpoints prior to rounding.
12456 */
12457
12458 copy = vm_map_copy_allocate();
12459 copy->type = VM_MAP_COPY_ENTRY_LIST;
12460 copy->cpy_hdr.entries_pageable = vmk_flags.vmkf_copy_pageable;
12461
12462 vm_map_store_init(&copy->cpy_hdr);
12463
12464 copy->offset = 0;
12465 copy->size = len;
12466
12467 kr = vm_map_remap_extract(src_map,
12468 src_addr,
12469 len,
12470 required_prot,
12471 do_copy, /* copy */
12472 &copy->cpy_hdr,
12473 cur_prot,
12474 max_prot,
12475 inheritance,
12476 vmk_flags);
12477 if (kr != KERN_SUCCESS) {
12478 vm_map_copy_discard(copy);
12479 return kr;
12480 }
12481 assert((*cur_prot & required_prot) == required_prot);
12482 assert((*max_prot & required_prot) == required_prot);
12483
12484 *copy_result = copy;
12485 return KERN_SUCCESS;
12486 }
12487
12488 /*
12489 * vm_map_copyin_object:
12490 *
12491 * Create a copy object from an object.
12492 * Our caller donates an object reference.
12493 */
12494
12495 kern_return_t
12496 vm_map_copyin_object(
12497 vm_object_t object,
12498 vm_object_offset_t offset, /* offset of region in object */
12499 vm_object_size_t size, /* size of region in object */
12500 vm_map_copy_t *copy_result) /* OUT */
12501 {
12502 vm_map_copy_t copy; /* Resulting copy */
12503
12504 /*
12505 * We drop the object into a special copy object
12506 * that contains the object directly.
12507 */
12508
12509 copy = vm_map_copy_allocate();
12510 copy->type = VM_MAP_COPY_OBJECT;
12511 copy->cpy_object = object;
12512 copy->offset = offset;
12513 copy->size = size;
12514
12515 *copy_result = copy;
12516 return KERN_SUCCESS;
12517 }
12518
12519 static void
12520 vm_map_fork_share(
12521 vm_map_t old_map,
12522 vm_map_entry_t old_entry,
12523 vm_map_t new_map)
12524 {
12525 vm_object_t object;
12526 vm_map_entry_t new_entry;
12527
12528 /*
12529 * New sharing code. New map entry
12530 * references original object. Internal
12531 * objects use asynchronous copy algorithm for
12532 * future copies. First make sure we have
12533 * the right object. If we need a shadow,
12534 * or someone else already has one, then
12535 * make a new shadow and share it.
12536 */
12537
12538 object = VME_OBJECT(old_entry);
12539 if (old_entry->is_sub_map) {
12540 assert(old_entry->wired_count == 0);
12541 #ifndef NO_NESTED_PMAP
12542 if (old_entry->use_pmap) {
12543 kern_return_t result;
12544
12545 result = pmap_nest(new_map->pmap,
12546 (VME_SUBMAP(old_entry))->pmap,
12547 (addr64_t)old_entry->vme_start,
12548 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12549 if (result) {
12550 panic("vm_map_fork_share: pmap_nest failed!");
12551 }
12552 }
12553 #endif /* NO_NESTED_PMAP */
12554 } else if (object == VM_OBJECT_NULL) {
12555 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
12556 old_entry->vme_start));
12557 VME_OFFSET_SET(old_entry, 0);
12558 VME_OBJECT_SET(old_entry, object);
12559 old_entry->use_pmap = TRUE;
12560 // assert(!old_entry->needs_copy);
12561 } else if (object->copy_strategy !=
12562 MEMORY_OBJECT_COPY_SYMMETRIC) {
12563 /*
12564 * We are already using an asymmetric
12565 * copy, and therefore we already have
12566 * the right object.
12567 */
12568
12569 assert(!old_entry->needs_copy);
12570 } else if (old_entry->needs_copy || /* case 1 */
12571 object->shadowed || /* case 2 */
12572 (!object->true_share && /* case 3 */
12573 !old_entry->is_shared &&
12574 (object->vo_size >
12575 (vm_map_size_t)(old_entry->vme_end -
12576 old_entry->vme_start)))) {
12577 /*
12578 * We need to create a shadow.
12579 * There are three cases here.
12580 * In the first case, we need to
12581 * complete a deferred symmetrical
12582 * copy that we participated in.
12583 * In the second and third cases,
12584 * we need to create the shadow so
12585 * that changes that we make to the
12586 * object do not interfere with
12587 * any symmetrical copies which
12588 * have occured (case 2) or which
12589 * might occur (case 3).
12590 *
12591 * The first case is when we had
12592 * deferred shadow object creation
12593 * via the entry->needs_copy mechanism.
12594 * This mechanism only works when
12595 * only one entry points to the source
12596 * object, and we are about to create
12597 * a second entry pointing to the
12598 * same object. The problem is that
12599 * there is no way of mapping from
12600 * an object to the entries pointing
12601 * to it. (Deferred shadow creation
12602 * works with one entry because occurs
12603 * at fault time, and we walk from the
12604 * entry to the object when handling
12605 * the fault.)
12606 *
12607 * The second case is when the object
12608 * to be shared has already been copied
12609 * with a symmetric copy, but we point
12610 * directly to the object without
12611 * needs_copy set in our entry. (This
12612 * can happen because different ranges
12613 * of an object can be pointed to by
12614 * different entries. In particular,
12615 * a single entry pointing to an object
12616 * can be split by a call to vm_inherit,
12617 * which, combined with task_create, can
12618 * result in the different entries
12619 * having different needs_copy values.)
12620 * The shadowed flag in the object allows
12621 * us to detect this case. The problem
12622 * with this case is that if this object
12623 * has or will have shadows, then we
12624 * must not perform an asymmetric copy
12625 * of this object, since such a copy
12626 * allows the object to be changed, which
12627 * will break the previous symmetrical
12628 * copies (which rely upon the object
12629 * not changing). In a sense, the shadowed
12630 * flag says "don't change this object".
12631 * We fix this by creating a shadow
12632 * object for this object, and sharing
12633 * that. This works because we are free
12634 * to change the shadow object (and thus
12635 * to use an asymmetric copy strategy);
12636 * this is also semantically correct,
12637 * since this object is temporary, and
12638 * therefore a copy of the object is
12639 * as good as the object itself. (This
12640 * is not true for permanent objects,
12641 * since the pager needs to see changes,
12642 * which won't happen if the changes
12643 * are made to a copy.)
12644 *
12645 * The third case is when the object
12646 * to be shared has parts sticking
12647 * outside of the entry we're working
12648 * with, and thus may in the future
12649 * be subject to a symmetrical copy.
12650 * (This is a preemptive version of
12651 * case 2.)
12652 */
12653 VME_OBJECT_SHADOW(old_entry,
12654 (vm_map_size_t) (old_entry->vme_end -
12655 old_entry->vme_start));
12656
12657 /*
12658 * If we're making a shadow for other than
12659 * copy on write reasons, then we have
12660 * to remove write permission.
12661 */
12662
12663 if (!old_entry->needs_copy &&
12664 (old_entry->protection & VM_PROT_WRITE)) {
12665 vm_prot_t prot;
12666
12667 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
12668
12669 prot = old_entry->protection & ~VM_PROT_WRITE;
12670
12671 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
12672
12673 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12674 prot |= VM_PROT_EXECUTE;
12675 }
12676
12677
12678 if (old_map->mapped_in_other_pmaps) {
12679 vm_object_pmap_protect(
12680 VME_OBJECT(old_entry),
12681 VME_OFFSET(old_entry),
12682 (old_entry->vme_end -
12683 old_entry->vme_start),
12684 PMAP_NULL,
12685 PAGE_SIZE,
12686 old_entry->vme_start,
12687 prot);
12688 } else {
12689 pmap_protect(old_map->pmap,
12690 old_entry->vme_start,
12691 old_entry->vme_end,
12692 prot);
12693 }
12694 }
12695
12696 old_entry->needs_copy = FALSE;
12697 object = VME_OBJECT(old_entry);
12698 }
12699
12700
12701 /*
12702 * If object was using a symmetric copy strategy,
12703 * change its copy strategy to the default
12704 * asymmetric copy strategy, which is copy_delay
12705 * in the non-norma case and copy_call in the
12706 * norma case. Bump the reference count for the
12707 * new entry.
12708 */
12709
12710 if (old_entry->is_sub_map) {
12711 vm_map_lock(VME_SUBMAP(old_entry));
12712 vm_map_reference(VME_SUBMAP(old_entry));
12713 vm_map_unlock(VME_SUBMAP(old_entry));
12714 } else {
12715 vm_object_lock(object);
12716 vm_object_reference_locked(object);
12717 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12718 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12719 }
12720 vm_object_unlock(object);
12721 }
12722
12723 /*
12724 * Clone the entry, using object ref from above.
12725 * Mark both entries as shared.
12726 */
12727
12728 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
12729 * map or descendants */
12730 vm_map_entry_copy(old_map, new_entry, old_entry);
12731 old_entry->is_shared = TRUE;
12732 new_entry->is_shared = TRUE;
12733
12734 /*
12735 * We're dealing with a shared mapping, so the resulting mapping
12736 * should inherit some of the original mapping's accounting settings.
12737 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12738 * "use_pmap" should stay the same as before (if it hasn't been reset
12739 * to TRUE when we cleared "iokit_acct").
12740 */
12741 assert(!new_entry->iokit_acct);
12742
12743 /*
12744 * If old entry's inheritence is VM_INHERIT_NONE,
12745 * the new entry is for corpse fork, remove the
12746 * write permission from the new entry.
12747 */
12748 if (old_entry->inheritance == VM_INHERIT_NONE) {
12749 new_entry->protection &= ~VM_PROT_WRITE;
12750 new_entry->max_protection &= ~VM_PROT_WRITE;
12751 }
12752
12753 /*
12754 * Insert the entry into the new map -- we
12755 * know we're inserting at the end of the new
12756 * map.
12757 */
12758
12759 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12760 VM_MAP_KERNEL_FLAGS_NONE);
12761
12762 /*
12763 * Update the physical map
12764 */
12765
12766 if (old_entry->is_sub_map) {
12767 /* Bill Angell pmap support goes here */
12768 } else {
12769 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12770 old_entry->vme_end - old_entry->vme_start,
12771 old_entry->vme_start);
12772 }
12773 }
12774
12775 static boolean_t
12776 vm_map_fork_copy(
12777 vm_map_t old_map,
12778 vm_map_entry_t *old_entry_p,
12779 vm_map_t new_map,
12780 int vm_map_copyin_flags)
12781 {
12782 vm_map_entry_t old_entry = *old_entry_p;
12783 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12784 vm_map_offset_t start = old_entry->vme_start;
12785 vm_map_copy_t copy;
12786 vm_map_entry_t last = vm_map_last_entry(new_map);
12787
12788 vm_map_unlock(old_map);
12789 /*
12790 * Use maxprot version of copyin because we
12791 * care about whether this memory can ever
12792 * be accessed, not just whether it's accessible
12793 * right now.
12794 */
12795 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12796 if (vm_map_copyin_internal(old_map, start, entry_size,
12797 vm_map_copyin_flags, &copy)
12798 != KERN_SUCCESS) {
12799 /*
12800 * The map might have changed while it
12801 * was unlocked, check it again. Skip
12802 * any blank space or permanently
12803 * unreadable region.
12804 */
12805 vm_map_lock(old_map);
12806 if (!vm_map_lookup_entry(old_map, start, &last) ||
12807 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12808 last = last->vme_next;
12809 }
12810 *old_entry_p = last;
12811
12812 /*
12813 * XXX For some error returns, want to
12814 * XXX skip to the next element. Note
12815 * that INVALID_ADDRESS and
12816 * PROTECTION_FAILURE are handled above.
12817 */
12818
12819 return FALSE;
12820 }
12821
12822 /*
12823 * Assert that the vm_map_copy is coming from the right
12824 * zone and hasn't been forged
12825 */
12826 vm_map_copy_require(copy);
12827
12828 /*
12829 * Insert the copy into the new map
12830 */
12831 vm_map_copy_insert(new_map, last, copy);
12832
12833 /*
12834 * Pick up the traversal at the end of
12835 * the copied region.
12836 */
12837
12838 vm_map_lock(old_map);
12839 start += entry_size;
12840 if (!vm_map_lookup_entry(old_map, start, &last)) {
12841 last = last->vme_next;
12842 } else {
12843 if (last->vme_start == start) {
12844 /*
12845 * No need to clip here and we don't
12846 * want to cause any unnecessary
12847 * unnesting...
12848 */
12849 } else {
12850 vm_map_clip_start(old_map, last, start);
12851 }
12852 }
12853 *old_entry_p = last;
12854
12855 return TRUE;
12856 }
12857
12858 /*
12859 * vm_map_fork:
12860 *
12861 * Create and return a new map based on the old
12862 * map, according to the inheritance values on the
12863 * regions in that map and the options.
12864 *
12865 * The source map must not be locked.
12866 */
12867 vm_map_t
12868 vm_map_fork(
12869 ledger_t ledger,
12870 vm_map_t old_map,
12871 int options)
12872 {
12873 pmap_t new_pmap;
12874 vm_map_t new_map;
12875 vm_map_entry_t old_entry;
12876 vm_map_size_t new_size = 0, entry_size;
12877 vm_map_entry_t new_entry;
12878 boolean_t src_needs_copy;
12879 boolean_t new_entry_needs_copy;
12880 boolean_t pmap_is64bit;
12881 int vm_map_copyin_flags;
12882 vm_inherit_t old_entry_inheritance;
12883 int map_create_options;
12884 kern_return_t footprint_collect_kr;
12885
12886 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
12887 VM_MAP_FORK_PRESERVE_PURGEABLE |
12888 VM_MAP_FORK_CORPSE_FOOTPRINT)) {
12889 /* unsupported option */
12890 return VM_MAP_NULL;
12891 }
12892
12893 pmap_is64bit =
12894 #if defined(__i386__) || defined(__x86_64__)
12895 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
12896 #elif defined(__arm64__)
12897 old_map->pmap->max == MACH_VM_MAX_ADDRESS;
12898 #elif defined(__arm__)
12899 FALSE;
12900 #else
12901 #error Unknown architecture.
12902 #endif
12903
12904 unsigned int pmap_flags = 0;
12905 pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
12906 #if defined(HAS_APPLE_PAC)
12907 pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
12908 #endif
12909 #if PMAP_CREATE_FORCE_4K_PAGES
12910 if (VM_MAP_PAGE_SIZE(old_map) == FOURK_PAGE_SIZE &&
12911 PAGE_SIZE != FOURK_PAGE_SIZE) {
12912 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
12913 }
12914 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
12915 new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
12916
12917 vm_map_reference_swap(old_map);
12918 vm_map_lock(old_map);
12919
12920 map_create_options = 0;
12921 if (old_map->hdr.entries_pageable) {
12922 map_create_options |= VM_MAP_CREATE_PAGEABLE;
12923 }
12924 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12925 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12926 footprint_collect_kr = KERN_SUCCESS;
12927 }
12928 new_map = vm_map_create_options(new_pmap,
12929 old_map->min_offset,
12930 old_map->max_offset,
12931 map_create_options);
12932 /* inherit cs_enforcement */
12933 vm_map_cs_enforcement_set(new_map, old_map->cs_enforcement);
12934 vm_map_lock(new_map);
12935 vm_commit_pagezero_status(new_map);
12936 /* inherit the parent map's page size */
12937 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
12938 for (
12939 old_entry = vm_map_first_entry(old_map);
12940 old_entry != vm_map_to_entry(old_map);
12941 ) {
12942 entry_size = old_entry->vme_end - old_entry->vme_start;
12943
12944 old_entry_inheritance = old_entry->inheritance;
12945 /*
12946 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12947 * share VM_INHERIT_NONE entries that are not backed by a
12948 * device pager.
12949 */
12950 if (old_entry_inheritance == VM_INHERIT_NONE &&
12951 (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
12952 (old_entry->protection & VM_PROT_READ) &&
12953 !(!old_entry->is_sub_map &&
12954 VME_OBJECT(old_entry) != NULL &&
12955 VME_OBJECT(old_entry)->pager != NULL &&
12956 is_device_pager_ops(
12957 VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
12958 old_entry_inheritance = VM_INHERIT_SHARE;
12959 }
12960
12961 if (old_entry_inheritance != VM_INHERIT_NONE &&
12962 (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12963 footprint_collect_kr == KERN_SUCCESS) {
12964 /*
12965 * The corpse won't have old_map->pmap to query
12966 * footprint information, so collect that data now
12967 * and store it in new_map->vmmap_corpse_footprint
12968 * for later autopsy.
12969 */
12970 footprint_collect_kr =
12971 vm_map_corpse_footprint_collect(old_map,
12972 old_entry,
12973 new_map);
12974 }
12975
12976 switch (old_entry_inheritance) {
12977 case VM_INHERIT_NONE:
12978 break;
12979
12980 case VM_INHERIT_SHARE:
12981 vm_map_fork_share(old_map, old_entry, new_map);
12982 new_size += entry_size;
12983 break;
12984
12985 case VM_INHERIT_COPY:
12986
12987 /*
12988 * Inline the copy_quickly case;
12989 * upon failure, fall back on call
12990 * to vm_map_fork_copy.
12991 */
12992
12993 if (old_entry->is_sub_map) {
12994 break;
12995 }
12996 if ((old_entry->wired_count != 0) ||
12997 ((VME_OBJECT(old_entry) != NULL) &&
12998 (VME_OBJECT(old_entry)->true_share))) {
12999 goto slow_vm_map_fork_copy;
13000 }
13001
13002 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
13003 vm_map_entry_copy(old_map, new_entry, old_entry);
13004
13005 if (new_entry->used_for_jit == TRUE && new_map->jit_entry_exists == FALSE) {
13006 new_map->jit_entry_exists = TRUE;
13007 }
13008
13009 if (new_entry->is_sub_map) {
13010 /* clear address space specifics */
13011 new_entry->use_pmap = FALSE;
13012 } else {
13013 /*
13014 * We're dealing with a copy-on-write operation,
13015 * so the resulting mapping should not inherit
13016 * the original mapping's accounting settings.
13017 * "iokit_acct" should have been cleared in
13018 * vm_map_entry_copy().
13019 * "use_pmap" should be reset to its default
13020 * (TRUE) so that the new mapping gets
13021 * accounted for in the task's memory footprint.
13022 */
13023 assert(!new_entry->iokit_acct);
13024 new_entry->use_pmap = TRUE;
13025 }
13026
13027 if (!vm_object_copy_quickly(
13028 VME_OBJECT_PTR(new_entry),
13029 VME_OFFSET(old_entry),
13030 (old_entry->vme_end -
13031 old_entry->vme_start),
13032 &src_needs_copy,
13033 &new_entry_needs_copy)) {
13034 vm_map_entry_dispose(new_map, new_entry);
13035 goto slow_vm_map_fork_copy;
13036 }
13037
13038 /*
13039 * Handle copy-on-write obligations
13040 */
13041
13042 if (src_needs_copy && !old_entry->needs_copy) {
13043 vm_prot_t prot;
13044
13045 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
13046
13047 prot = old_entry->protection & ~VM_PROT_WRITE;
13048
13049 if (override_nx(old_map, VME_ALIAS(old_entry))
13050 && prot) {
13051 prot |= VM_PROT_EXECUTE;
13052 }
13053
13054 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
13055
13056 vm_object_pmap_protect(
13057 VME_OBJECT(old_entry),
13058 VME_OFFSET(old_entry),
13059 (old_entry->vme_end -
13060 old_entry->vme_start),
13061 ((old_entry->is_shared
13062 || old_map->mapped_in_other_pmaps)
13063 ? PMAP_NULL :
13064 old_map->pmap),
13065 VM_MAP_PAGE_SIZE(old_map),
13066 old_entry->vme_start,
13067 prot);
13068
13069 assert(old_entry->wired_count == 0);
13070 old_entry->needs_copy = TRUE;
13071 }
13072 new_entry->needs_copy = new_entry_needs_copy;
13073
13074 /*
13075 * Insert the entry at the end
13076 * of the map.
13077 */
13078
13079 vm_map_store_entry_link(new_map,
13080 vm_map_last_entry(new_map),
13081 new_entry,
13082 VM_MAP_KERNEL_FLAGS_NONE);
13083 new_size += entry_size;
13084 break;
13085
13086 slow_vm_map_fork_copy:
13087 vm_map_copyin_flags = 0;
13088 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
13089 vm_map_copyin_flags |=
13090 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
13091 }
13092 if (vm_map_fork_copy(old_map,
13093 &old_entry,
13094 new_map,
13095 vm_map_copyin_flags)) {
13096 new_size += entry_size;
13097 }
13098 continue;
13099 }
13100 old_entry = old_entry->vme_next;
13101 }
13102
13103 #if defined(__arm64__)
13104 pmap_insert_sharedpage(new_map->pmap);
13105 #endif /* __arm64__ */
13106
13107 new_map->size = new_size;
13108
13109 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
13110 vm_map_corpse_footprint_collect_done(new_map);
13111 }
13112
13113 /* Propagate JIT entitlement for the pmap layer. */
13114 if (pmap_get_jit_entitled(old_map->pmap)) {
13115 /* Tell the pmap that it supports JIT. */
13116 pmap_set_jit_entitled(new_map->pmap);
13117 }
13118
13119 vm_map_unlock(new_map);
13120 vm_map_unlock(old_map);
13121 vm_map_deallocate(old_map);
13122
13123 return new_map;
13124 }
13125
13126 /*
13127 * vm_map_exec:
13128 *
13129 * Setup the "new_map" with the proper execution environment according
13130 * to the type of executable (platform, 64bit, chroot environment).
13131 * Map the comm page and shared region, etc...
13132 */
13133 kern_return_t
13134 vm_map_exec(
13135 vm_map_t new_map,
13136 task_t task,
13137 boolean_t is64bit,
13138 void *fsroot,
13139 cpu_type_t cpu,
13140 cpu_subtype_t cpu_subtype,
13141 boolean_t reslide)
13142 {
13143 SHARED_REGION_TRACE_DEBUG(
13144 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
13145 (void *)VM_KERNEL_ADDRPERM(current_task()),
13146 (void *)VM_KERNEL_ADDRPERM(new_map),
13147 (void *)VM_KERNEL_ADDRPERM(task),
13148 (void *)VM_KERNEL_ADDRPERM(fsroot),
13149 cpu,
13150 cpu_subtype));
13151 (void) vm_commpage_enter(new_map, task, is64bit);
13152
13153 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype, reslide);
13154
13155 SHARED_REGION_TRACE_DEBUG(
13156 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
13157 (void *)VM_KERNEL_ADDRPERM(current_task()),
13158 (void *)VM_KERNEL_ADDRPERM(new_map),
13159 (void *)VM_KERNEL_ADDRPERM(task),
13160 (void *)VM_KERNEL_ADDRPERM(fsroot),
13161 cpu,
13162 cpu_subtype));
13163
13164 /*
13165 * Some devices have region(s) of memory that shouldn't get allocated by
13166 * user processes. The following code creates dummy vm_map_entry_t's for each
13167 * of the regions that needs to be reserved to prevent any allocations in
13168 * those regions.
13169 */
13170 kern_return_t kr = KERN_FAILURE;
13171 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
13172 vmk_flags.vmkf_permanent = TRUE;
13173 vmk_flags.vmkf_beyond_max = TRUE;
13174
13175 struct vm_reserved_region *regions = NULL;
13176 size_t num_regions = ml_get_vm_reserved_regions(is64bit, &regions);
13177 assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
13178
13179 for (size_t i = 0; i < num_regions; ++i) {
13180 kr = vm_map_enter(
13181 new_map,
13182 &regions[i].vmrr_addr,
13183 regions[i].vmrr_size,
13184 (vm_map_offset_t)0,
13185 VM_FLAGS_FIXED,
13186 vmk_flags,
13187 VM_KERN_MEMORY_NONE,
13188 VM_OBJECT_NULL,
13189 (vm_object_offset_t)0,
13190 FALSE,
13191 VM_PROT_NONE,
13192 VM_PROT_NONE,
13193 VM_INHERIT_NONE);
13194
13195 if (kr != KERN_SUCCESS) {
13196 panic("Failed to reserve %s region in user map %p %d", regions[i].vmrr_name, new_map, kr);
13197 }
13198 }
13199
13200 new_map->reserved_regions = (num_regions ? TRUE : FALSE);
13201
13202 return KERN_SUCCESS;
13203 }
13204
13205 /*
13206 * vm_map_lookup_locked:
13207 *
13208 * Finds the VM object, offset, and
13209 * protection for a given virtual address in the
13210 * specified map, assuming a page fault of the
13211 * type specified.
13212 *
13213 * Returns the (object, offset, protection) for
13214 * this address, whether it is wired down, and whether
13215 * this map has the only reference to the data in question.
13216 * In order to later verify this lookup, a "version"
13217 * is returned.
13218 * If contended != NULL, *contended will be set to
13219 * true iff the thread had to spin or block to acquire
13220 * an exclusive lock.
13221 *
13222 * The map MUST be locked by the caller and WILL be
13223 * locked on exit. In order to guarantee the
13224 * existence of the returned object, it is returned
13225 * locked.
13226 *
13227 * If a lookup is requested with "write protection"
13228 * specified, the map may be changed to perform virtual
13229 * copying operations, although the data referenced will
13230 * remain the same.
13231 */
13232 kern_return_t
13233 vm_map_lookup_locked(
13234 vm_map_t *var_map, /* IN/OUT */
13235 vm_map_offset_t vaddr,
13236 vm_prot_t fault_type,
13237 int object_lock_type,
13238 vm_map_version_t *out_version, /* OUT */
13239 vm_object_t *object, /* OUT */
13240 vm_object_offset_t *offset, /* OUT */
13241 vm_prot_t *out_prot, /* OUT */
13242 boolean_t *wired, /* OUT */
13243 vm_object_fault_info_t fault_info, /* OUT */
13244 vm_map_t *real_map, /* OUT */
13245 bool *contended) /* OUT */
13246 {
13247 vm_map_entry_t entry;
13248 vm_map_t map = *var_map;
13249 vm_map_t old_map = *var_map;
13250 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
13251 vm_map_offset_t cow_parent_vaddr = 0;
13252 vm_map_offset_t old_start = 0;
13253 vm_map_offset_t old_end = 0;
13254 vm_prot_t prot;
13255 boolean_t mask_protections;
13256 boolean_t force_copy;
13257 boolean_t no_force_copy_if_executable;
13258 vm_prot_t original_fault_type;
13259 vm_map_size_t fault_page_mask;
13260
13261 /*
13262 * VM_PROT_MASK means that the caller wants us to use "fault_type"
13263 * as a mask against the mapping's actual protections, not as an
13264 * absolute value.
13265 */
13266 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
13267 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
13268 no_force_copy_if_executable = (fault_type & VM_PROT_COPY_FAIL_IF_EXECUTABLE) ? TRUE : FALSE;
13269 fault_type &= VM_PROT_ALL;
13270 original_fault_type = fault_type;
13271 if (contended) {
13272 *contended = false;
13273 }
13274
13275 *real_map = map;
13276
13277 fault_page_mask = MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK);
13278 vaddr = VM_MAP_TRUNC_PAGE(vaddr, fault_page_mask);
13279
13280 RetryLookup:
13281 fault_type = original_fault_type;
13282
13283 /*
13284 * If the map has an interesting hint, try it before calling
13285 * full blown lookup routine.
13286 */
13287 entry = map->hint;
13288
13289 if ((entry == vm_map_to_entry(map)) ||
13290 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
13291 vm_map_entry_t tmp_entry;
13292
13293 /*
13294 * Entry was either not a valid hint, or the vaddr
13295 * was not contained in the entry, so do a full lookup.
13296 */
13297 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
13298 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13299 vm_map_unlock(cow_sub_map_parent);
13300 }
13301 if ((*real_map != map)
13302 && (*real_map != cow_sub_map_parent)) {
13303 vm_map_unlock(*real_map);
13304 }
13305 return KERN_INVALID_ADDRESS;
13306 }
13307
13308 entry = tmp_entry;
13309 }
13310 if (map == old_map) {
13311 old_start = entry->vme_start;
13312 old_end = entry->vme_end;
13313 }
13314
13315 /*
13316 * Handle submaps. Drop lock on upper map, submap is
13317 * returned locked.
13318 */
13319
13320 submap_recurse:
13321 if (entry->is_sub_map) {
13322 vm_map_offset_t local_vaddr;
13323 vm_map_offset_t end_delta;
13324 vm_map_offset_t start_delta;
13325 vm_map_entry_t submap_entry, saved_submap_entry;
13326 vm_object_offset_t submap_entry_offset;
13327 vm_object_size_t submap_entry_size;
13328 vm_prot_t subentry_protection;
13329 vm_prot_t subentry_max_protection;
13330 boolean_t subentry_no_copy_on_read;
13331 boolean_t mapped_needs_copy = FALSE;
13332 vm_map_version_t version;
13333
13334 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
13335 "map %p (%d) entry %p submap %p (%d)\n",
13336 map, VM_MAP_PAGE_SHIFT(map), entry,
13337 VME_SUBMAP(entry), VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
13338
13339 local_vaddr = vaddr;
13340
13341 if ((entry->use_pmap &&
13342 !((fault_type & VM_PROT_WRITE) ||
13343 force_copy))) {
13344 /* if real_map equals map we unlock below */
13345 if ((*real_map != map) &&
13346 (*real_map != cow_sub_map_parent)) {
13347 vm_map_unlock(*real_map);
13348 }
13349 *real_map = VME_SUBMAP(entry);
13350 }
13351
13352 if (entry->needs_copy &&
13353 ((fault_type & VM_PROT_WRITE) ||
13354 force_copy)) {
13355 if (!mapped_needs_copy) {
13356 if (vm_map_lock_read_to_write(map)) {
13357 vm_map_lock_read(map);
13358 *real_map = map;
13359 goto RetryLookup;
13360 }
13361 vm_map_lock_read(VME_SUBMAP(entry));
13362 *var_map = VME_SUBMAP(entry);
13363 cow_sub_map_parent = map;
13364 /* reset base to map before cow object */
13365 /* this is the map which will accept */
13366 /* the new cow object */
13367 old_start = entry->vme_start;
13368 old_end = entry->vme_end;
13369 cow_parent_vaddr = vaddr;
13370 mapped_needs_copy = TRUE;
13371 } else {
13372 vm_map_lock_read(VME_SUBMAP(entry));
13373 *var_map = VME_SUBMAP(entry);
13374 if ((cow_sub_map_parent != map) &&
13375 (*real_map != map)) {
13376 vm_map_unlock(map);
13377 }
13378 }
13379 } else {
13380 vm_map_lock_read(VME_SUBMAP(entry));
13381 *var_map = VME_SUBMAP(entry);
13382 /* leave map locked if it is a target */
13383 /* cow sub_map above otherwise, just */
13384 /* follow the maps down to the object */
13385 /* here we unlock knowing we are not */
13386 /* revisiting the map. */
13387 if ((*real_map != map) && (map != cow_sub_map_parent)) {
13388 vm_map_unlock_read(map);
13389 }
13390 }
13391
13392 map = *var_map;
13393
13394 /* calculate the offset in the submap for vaddr */
13395 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
13396 assertf(VM_MAP_PAGE_ALIGNED(local_vaddr, fault_page_mask),
13397 "local_vaddr 0x%llx entry->vme_start 0x%llx fault_page_mask 0x%llx\n",
13398 (uint64_t)local_vaddr, (uint64_t)entry->vme_start, (uint64_t)fault_page_mask);
13399
13400 RetrySubMap:
13401 if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
13402 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13403 vm_map_unlock(cow_sub_map_parent);
13404 }
13405 if ((*real_map != map)
13406 && (*real_map != cow_sub_map_parent)) {
13407 vm_map_unlock(*real_map);
13408 }
13409 *real_map = map;
13410 return KERN_INVALID_ADDRESS;
13411 }
13412
13413 /* find the attenuated shadow of the underlying object */
13414 /* on our target map */
13415
13416 /* in english the submap object may extend beyond the */
13417 /* region mapped by the entry or, may only fill a portion */
13418 /* of it. For our purposes, we only care if the object */
13419 /* doesn't fill. In this case the area which will */
13420 /* ultimately be clipped in the top map will only need */
13421 /* to be as big as the portion of the underlying entry */
13422 /* which is mapped */
13423 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
13424 submap_entry->vme_start - VME_OFFSET(entry) : 0;
13425
13426 end_delta =
13427 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
13428 submap_entry->vme_end ?
13429 0 : (VME_OFFSET(entry) +
13430 (old_end - old_start))
13431 - submap_entry->vme_end;
13432
13433 old_start += start_delta;
13434 old_end -= end_delta;
13435
13436 if (submap_entry->is_sub_map) {
13437 entry = submap_entry;
13438 vaddr = local_vaddr;
13439 goto submap_recurse;
13440 }
13441
13442 if (((fault_type & VM_PROT_WRITE) ||
13443 force_copy)
13444 && cow_sub_map_parent) {
13445 vm_object_t sub_object, copy_object;
13446 vm_object_offset_t copy_offset;
13447 vm_map_offset_t local_start;
13448 vm_map_offset_t local_end;
13449 boolean_t copied_slowly = FALSE;
13450 vm_object_offset_t copied_slowly_phys_offset = 0;
13451 kern_return_t kr = KERN_SUCCESS;
13452
13453 if (vm_map_lock_read_to_write(map)) {
13454 vm_map_lock_read(map);
13455 old_start -= start_delta;
13456 old_end += end_delta;
13457 goto RetrySubMap;
13458 }
13459
13460
13461 sub_object = VME_OBJECT(submap_entry);
13462 if (sub_object == VM_OBJECT_NULL) {
13463 sub_object =
13464 vm_object_allocate(
13465 (vm_map_size_t)
13466 (submap_entry->vme_end -
13467 submap_entry->vme_start));
13468 VME_OBJECT_SET(submap_entry, sub_object);
13469 VME_OFFSET_SET(submap_entry, 0);
13470 assert(!submap_entry->is_sub_map);
13471 assert(submap_entry->use_pmap);
13472 }
13473 local_start = local_vaddr -
13474 (cow_parent_vaddr - old_start);
13475 local_end = local_vaddr +
13476 (old_end - cow_parent_vaddr);
13477 vm_map_clip_start(map, submap_entry, local_start);
13478 vm_map_clip_end(map, submap_entry, local_end);
13479 if (submap_entry->is_sub_map) {
13480 /* unnesting was done when clipping */
13481 assert(!submap_entry->use_pmap);
13482 }
13483
13484 /* This is the COW case, lets connect */
13485 /* an entry in our space to the underlying */
13486 /* object in the submap, bypassing the */
13487 /* submap. */
13488
13489 if (submap_entry->wired_count != 0 ||
13490 (sub_object->copy_strategy !=
13491 MEMORY_OBJECT_COPY_SYMMETRIC)) {
13492 if ((submap_entry->protection & VM_PROT_EXECUTE) &&
13493 no_force_copy_if_executable) {
13494 // printf("FBDP map %p entry %p start 0x%llx end 0x%llx wired %d strat %d\n", map, submap_entry, (uint64_t)local_start, (uint64_t)local_end, submap_entry->wired_count, sub_object->copy_strategy);
13495 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13496 vm_map_unlock(cow_sub_map_parent);
13497 }
13498 if ((*real_map != map)
13499 && (*real_map != cow_sub_map_parent)) {
13500 vm_map_unlock(*real_map);
13501 }
13502 *real_map = map;
13503 vm_map_lock_write_to_read(map);
13504 kr = KERN_PROTECTION_FAILURE;
13505 DTRACE_VM4(submap_no_copy_executable,
13506 vm_map_t, map,
13507 vm_object_offset_t, submap_entry_offset,
13508 vm_object_size_t, submap_entry_size,
13509 int, kr);
13510 return kr;
13511 }
13512
13513 vm_object_reference(sub_object);
13514
13515 assertf(VM_MAP_PAGE_ALIGNED(VME_OFFSET(submap_entry), VM_MAP_PAGE_MASK(map)),
13516 "submap_entry %p offset 0x%llx\n",
13517 submap_entry, VME_OFFSET(submap_entry));
13518 submap_entry_offset = VME_OFFSET(submap_entry);
13519 submap_entry_size = submap_entry->vme_end - submap_entry->vme_start;
13520
13521 DTRACE_VM6(submap_copy_slowly,
13522 vm_map_t, cow_sub_map_parent,
13523 vm_map_offset_t, vaddr,
13524 vm_map_t, map,
13525 vm_object_size_t, submap_entry_size,
13526 int, submap_entry->wired_count,
13527 int, sub_object->copy_strategy);
13528
13529 saved_submap_entry = submap_entry;
13530 version.main_timestamp = map->timestamp;
13531 vm_map_unlock(map); /* Increments timestamp by 1 */
13532 submap_entry = VM_MAP_ENTRY_NULL;
13533
13534 vm_object_lock(sub_object);
13535 kr = vm_object_copy_slowly(sub_object,
13536 submap_entry_offset,
13537 submap_entry_size,
13538 FALSE,
13539 &copy_object);
13540 copied_slowly = TRUE;
13541 /* 4k: account for extra offset in physical page */
13542 copied_slowly_phys_offset = submap_entry_offset - vm_object_trunc_page(submap_entry_offset);
13543 vm_object_deallocate(sub_object);
13544
13545 vm_map_lock(map);
13546
13547 if (kr != KERN_SUCCESS &&
13548 kr != KERN_MEMORY_RESTART_COPY) {
13549 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13550 vm_map_unlock(cow_sub_map_parent);
13551 }
13552 if ((*real_map != map)
13553 && (*real_map != cow_sub_map_parent)) {
13554 vm_map_unlock(*real_map);
13555 }
13556 *real_map = map;
13557 vm_object_deallocate(copy_object);
13558 copy_object = VM_OBJECT_NULL;
13559 vm_map_lock_write_to_read(map);
13560 DTRACE_VM4(submap_copy_slowly,
13561 vm_object_t, sub_object,
13562 vm_object_offset_t, submap_entry_offset,
13563 vm_object_size_t, submap_entry_size,
13564 int, kr);
13565 return kr;
13566 }
13567
13568 if ((kr == KERN_SUCCESS) &&
13569 (version.main_timestamp + 1) == map->timestamp) {
13570 submap_entry = saved_submap_entry;
13571 } else {
13572 saved_submap_entry = NULL;
13573 old_start -= start_delta;
13574 old_end += end_delta;
13575 vm_object_deallocate(copy_object);
13576 copy_object = VM_OBJECT_NULL;
13577 vm_map_lock_write_to_read(map);
13578 goto RetrySubMap;
13579 }
13580 } else {
13581 /* set up shadow object */
13582 copy_object = sub_object;
13583 vm_object_lock(sub_object);
13584 vm_object_reference_locked(sub_object);
13585 sub_object->shadowed = TRUE;
13586 vm_object_unlock(sub_object);
13587
13588 assert(submap_entry->wired_count == 0);
13589 submap_entry->needs_copy = TRUE;
13590
13591 prot = submap_entry->protection;
13592 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13593 prot = prot & ~VM_PROT_WRITE;
13594 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13595
13596 if (override_nx(old_map,
13597 VME_ALIAS(submap_entry))
13598 && prot) {
13599 prot |= VM_PROT_EXECUTE;
13600 }
13601
13602 vm_object_pmap_protect(
13603 sub_object,
13604 VME_OFFSET(submap_entry),
13605 submap_entry->vme_end -
13606 submap_entry->vme_start,
13607 (submap_entry->is_shared
13608 || map->mapped_in_other_pmaps) ?
13609 PMAP_NULL : map->pmap,
13610 VM_MAP_PAGE_SIZE(map),
13611 submap_entry->vme_start,
13612 prot);
13613 }
13614
13615 /*
13616 * Adjust the fault offset to the submap entry.
13617 */
13618 copy_offset = (local_vaddr -
13619 submap_entry->vme_start +
13620 VME_OFFSET(submap_entry));
13621
13622 /* This works diffently than the */
13623 /* normal submap case. We go back */
13624 /* to the parent of the cow map and*/
13625 /* clip out the target portion of */
13626 /* the sub_map, substituting the */
13627 /* new copy object, */
13628
13629 subentry_protection = submap_entry->protection;
13630 subentry_max_protection = submap_entry->max_protection;
13631 subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
13632 vm_map_unlock(map);
13633 submap_entry = NULL; /* not valid after map unlock */
13634
13635 local_start = old_start;
13636 local_end = old_end;
13637 map = cow_sub_map_parent;
13638 *var_map = cow_sub_map_parent;
13639 vaddr = cow_parent_vaddr;
13640 cow_sub_map_parent = NULL;
13641
13642 if (!vm_map_lookup_entry(map,
13643 vaddr, &entry)) {
13644 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13645 vm_map_unlock(cow_sub_map_parent);
13646 }
13647 if ((*real_map != map)
13648 && (*real_map != cow_sub_map_parent)) {
13649 vm_map_unlock(*real_map);
13650 }
13651 *real_map = map;
13652 vm_object_deallocate(
13653 copy_object);
13654 copy_object = VM_OBJECT_NULL;
13655 vm_map_lock_write_to_read(map);
13656 DTRACE_VM4(submap_lookup_post_unlock,
13657 uint64_t, (uint64_t)entry->vme_start,
13658 uint64_t, (uint64_t)entry->vme_end,
13659 vm_map_offset_t, vaddr,
13660 int, copied_slowly);
13661 return KERN_INVALID_ADDRESS;
13662 }
13663
13664 /* clip out the portion of space */
13665 /* mapped by the sub map which */
13666 /* corresponds to the underlying */
13667 /* object */
13668
13669 /*
13670 * Clip (and unnest) the smallest nested chunk
13671 * possible around the faulting address...
13672 */
13673 local_start = vaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
13674 local_end = local_start + pmap_shared_region_size_min(map->pmap);
13675 /*
13676 * ... but don't go beyond the "old_start" to "old_end"
13677 * range, to avoid spanning over another VM region
13678 * with a possibly different VM object and/or offset.
13679 */
13680 if (local_start < old_start) {
13681 local_start = old_start;
13682 }
13683 if (local_end > old_end) {
13684 local_end = old_end;
13685 }
13686 /*
13687 * Adjust copy_offset to the start of the range.
13688 */
13689 copy_offset -= (vaddr - local_start);
13690
13691 vm_map_clip_start(map, entry, local_start);
13692 vm_map_clip_end(map, entry, local_end);
13693 if (entry->is_sub_map) {
13694 /* unnesting was done when clipping */
13695 assert(!entry->use_pmap);
13696 }
13697
13698 /* substitute copy object for */
13699 /* shared map entry */
13700 vm_map_deallocate(VME_SUBMAP(entry));
13701 assert(!entry->iokit_acct);
13702 entry->is_sub_map = FALSE;
13703 entry->use_pmap = TRUE;
13704 VME_OBJECT_SET(entry, copy_object);
13705
13706 /* propagate the submap entry's protections */
13707 if (entry->protection != VM_PROT_READ) {
13708 /*
13709 * Someone has already altered the top entry's
13710 * protections via vm_protect(VM_PROT_COPY).
13711 * Respect these new values and ignore the
13712 * submap entry's protections.
13713 */
13714 } else {
13715 /*
13716 * Regular copy-on-write: propagate the submap
13717 * entry's protections to the top map entry.
13718 */
13719 entry->protection |= subentry_protection;
13720 }
13721 entry->max_protection |= subentry_max_protection;
13722 /* propagate no_copy_on_read */
13723 entry->vme_no_copy_on_read = subentry_no_copy_on_read;
13724
13725 if ((entry->protection & VM_PROT_WRITE) &&
13726 (entry->protection & VM_PROT_EXECUTE) &&
13727 #if XNU_TARGET_OS_OSX
13728 map->pmap != kernel_pmap &&
13729 (vm_map_cs_enforcement(map)
13730 #if __arm64__
13731 || !VM_MAP_IS_EXOTIC(map)
13732 #endif /* __arm64__ */
13733 ) &&
13734 #endif /* XNU_TARGET_OS_OSX */
13735 !(entry->used_for_jit) &&
13736 VM_MAP_POLICY_WX_STRIP_X(map)) {
13737 DTRACE_VM3(cs_wx,
13738 uint64_t, (uint64_t)entry->vme_start,
13739 uint64_t, (uint64_t)entry->vme_end,
13740 vm_prot_t, entry->protection);
13741 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13742 proc_selfpid(),
13743 (current_task()->bsd_info
13744 ? proc_name_address(current_task()->bsd_info)
13745 : "?"),
13746 __FUNCTION__);
13747 entry->protection &= ~VM_PROT_EXECUTE;
13748 }
13749
13750 if (copied_slowly) {
13751 VME_OFFSET_SET(entry, local_start - old_start + copied_slowly_phys_offset);
13752 entry->needs_copy = FALSE;
13753 entry->is_shared = FALSE;
13754 } else {
13755 VME_OFFSET_SET(entry, copy_offset);
13756 assert(entry->wired_count == 0);
13757 entry->needs_copy = TRUE;
13758 if (entry->inheritance == VM_INHERIT_SHARE) {
13759 entry->inheritance = VM_INHERIT_COPY;
13760 }
13761 if (map != old_map) {
13762 entry->is_shared = TRUE;
13763 }
13764 }
13765 if (entry->inheritance == VM_INHERIT_SHARE) {
13766 entry->inheritance = VM_INHERIT_COPY;
13767 }
13768
13769 vm_map_lock_write_to_read(map);
13770 } else {
13771 if ((cow_sub_map_parent)
13772 && (cow_sub_map_parent != *real_map)
13773 && (cow_sub_map_parent != map)) {
13774 vm_map_unlock(cow_sub_map_parent);
13775 }
13776 entry = submap_entry;
13777 vaddr = local_vaddr;
13778 }
13779 }
13780
13781 /*
13782 * Check whether this task is allowed to have
13783 * this page.
13784 */
13785
13786 prot = entry->protection;
13787
13788 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
13789 /*
13790 * HACK -- if not a stack, then allow execution
13791 */
13792 prot |= VM_PROT_EXECUTE;
13793 }
13794
13795 if (mask_protections) {
13796 fault_type &= prot;
13797 if (fault_type == VM_PROT_NONE) {
13798 goto protection_failure;
13799 }
13800 }
13801 if (((fault_type & prot) != fault_type)
13802 #if __arm64__
13803 /* prefetch abort in execute-only page */
13804 && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
13805 #endif
13806 ) {
13807 protection_failure:
13808 if (*real_map != map) {
13809 vm_map_unlock(*real_map);
13810 }
13811 *real_map = map;
13812
13813 if ((fault_type & VM_PROT_EXECUTE) && prot) {
13814 log_stack_execution_failure((addr64_t)vaddr, prot);
13815 }
13816
13817 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
13818 return KERN_PROTECTION_FAILURE;
13819 }
13820
13821 /*
13822 * If this page is not pageable, we have to get
13823 * it for all possible accesses.
13824 */
13825
13826 *wired = (entry->wired_count != 0);
13827 if (*wired) {
13828 fault_type = prot;
13829 }
13830
13831 /*
13832 * If the entry was copy-on-write, we either ...
13833 */
13834
13835 if (entry->needs_copy) {
13836 /*
13837 * If we want to write the page, we may as well
13838 * handle that now since we've got the map locked.
13839 *
13840 * If we don't need to write the page, we just
13841 * demote the permissions allowed.
13842 */
13843
13844 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
13845 /*
13846 * Make a new object, and place it in the
13847 * object chain. Note that no new references
13848 * have appeared -- one just moved from the
13849 * map to the new object.
13850 */
13851
13852 if (vm_map_lock_read_to_write(map)) {
13853 vm_map_lock_read(map);
13854 goto RetryLookup;
13855 }
13856
13857 if (VME_OBJECT(entry)->shadowed == FALSE) {
13858 vm_object_lock(VME_OBJECT(entry));
13859 VME_OBJECT(entry)->shadowed = TRUE;
13860 vm_object_unlock(VME_OBJECT(entry));
13861 }
13862 VME_OBJECT_SHADOW(entry,
13863 (vm_map_size_t) (entry->vme_end -
13864 entry->vme_start));
13865 entry->needs_copy = FALSE;
13866
13867 vm_map_lock_write_to_read(map);
13868 }
13869 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
13870 /*
13871 * We're attempting to read a copy-on-write
13872 * page -- don't allow writes.
13873 */
13874
13875 prot &= (~VM_PROT_WRITE);
13876 }
13877 }
13878
13879 /*
13880 * Create an object if necessary.
13881 */
13882 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
13883 if (vm_map_lock_read_to_write(map)) {
13884 vm_map_lock_read(map);
13885 goto RetryLookup;
13886 }
13887
13888 VME_OBJECT_SET(entry,
13889 vm_object_allocate(
13890 (vm_map_size_t)(entry->vme_end -
13891 entry->vme_start)));
13892 VME_OFFSET_SET(entry, 0);
13893 assert(entry->use_pmap);
13894 vm_map_lock_write_to_read(map);
13895 }
13896
13897 /*
13898 * Return the object/offset from this entry. If the entry
13899 * was copy-on-write or empty, it has been fixed up. Also
13900 * return the protection.
13901 */
13902
13903 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13904 *object = VME_OBJECT(entry);
13905 *out_prot = prot;
13906 KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), (unsigned long) VME_ALIAS(entry), 0, 0);
13907
13908 if (fault_info) {
13909 fault_info->interruptible = THREAD_UNINT; /* for now... */
13910 /* ... the caller will change "interruptible" if needed */
13911 fault_info->cluster_size = 0;
13912 fault_info->user_tag = VME_ALIAS(entry);
13913 fault_info->pmap_options = 0;
13914 if (entry->iokit_acct ||
13915 (!entry->is_sub_map && !entry->use_pmap)) {
13916 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13917 }
13918 fault_info->behavior = entry->behavior;
13919 fault_info->lo_offset = VME_OFFSET(entry);
13920 fault_info->hi_offset =
13921 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
13922 fault_info->no_cache = entry->no_cache;
13923 fault_info->stealth = FALSE;
13924 fault_info->io_sync = FALSE;
13925 if (entry->used_for_jit ||
13926 entry->vme_resilient_codesign) {
13927 fault_info->cs_bypass = TRUE;
13928 } else {
13929 fault_info->cs_bypass = FALSE;
13930 }
13931 fault_info->pmap_cs_associated = FALSE;
13932 #if CONFIG_PMAP_CS
13933 if (entry->pmap_cs_associated) {
13934 /*
13935 * The pmap layer will validate this page
13936 * before allowing it to be executed from.
13937 */
13938 fault_info->pmap_cs_associated = TRUE;
13939 }
13940 #endif /* CONFIG_PMAP_CS */
13941 fault_info->mark_zf_absent = FALSE;
13942 fault_info->batch_pmap_op = FALSE;
13943 fault_info->resilient_media = entry->vme_resilient_media;
13944 fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
13945 if (entry->translated_allow_execute) {
13946 fault_info->pmap_options |= PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE;
13947 }
13948 }
13949
13950 /*
13951 * Lock the object to prevent it from disappearing
13952 */
13953 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
13954 if (contended == NULL) {
13955 vm_object_lock(*object);
13956 } else {
13957 *contended = vm_object_lock_check_contended(*object);
13958 }
13959 } else {
13960 vm_object_lock_shared(*object);
13961 }
13962
13963 /*
13964 * Save the version number
13965 */
13966
13967 out_version->main_timestamp = map->timestamp;
13968
13969 return KERN_SUCCESS;
13970 }
13971
13972
13973 /*
13974 * vm_map_verify:
13975 *
13976 * Verifies that the map in question has not changed
13977 * since the given version. The map has to be locked
13978 * ("shared" mode is fine) before calling this function
13979 * and it will be returned locked too.
13980 */
13981 boolean_t
13982 vm_map_verify(
13983 vm_map_t map,
13984 vm_map_version_t *version) /* REF */
13985 {
13986 boolean_t result;
13987
13988 vm_map_lock_assert_held(map);
13989 result = (map->timestamp == version->main_timestamp);
13990
13991 return result;
13992 }
13993
13994 /*
13995 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
13996 * Goes away after regular vm_region_recurse function migrates to
13997 * 64 bits
13998 * vm_region_recurse: A form of vm_region which follows the
13999 * submaps in a target map
14000 *
14001 */
14002
14003 kern_return_t
14004 vm_map_region_recurse_64(
14005 vm_map_t map,
14006 vm_map_offset_t *address, /* IN/OUT */
14007 vm_map_size_t *size, /* OUT */
14008 natural_t *nesting_depth, /* IN/OUT */
14009 vm_region_submap_info_64_t submap_info, /* IN/OUT */
14010 mach_msg_type_number_t *count) /* IN/OUT */
14011 {
14012 mach_msg_type_number_t original_count;
14013 vm_region_extended_info_data_t extended;
14014 vm_map_entry_t tmp_entry;
14015 vm_map_offset_t user_address;
14016 unsigned int user_max_depth;
14017
14018 /*
14019 * "curr_entry" is the VM map entry preceding or including the
14020 * address we're looking for.
14021 * "curr_map" is the map or sub-map containing "curr_entry".
14022 * "curr_address" is the equivalent of the top map's "user_address"
14023 * in the current map.
14024 * "curr_offset" is the cumulated offset of "curr_map" in the
14025 * target task's address space.
14026 * "curr_depth" is the depth of "curr_map" in the chain of
14027 * sub-maps.
14028 *
14029 * "curr_max_below" and "curr_max_above" limit the range (around
14030 * "curr_address") we should take into account in the current (sub)map.
14031 * They limit the range to what's visible through the map entries
14032 * we've traversed from the top map to the current map.
14033 *
14034 */
14035 vm_map_entry_t curr_entry;
14036 vm_map_address_t curr_address;
14037 vm_map_offset_t curr_offset;
14038 vm_map_t curr_map;
14039 unsigned int curr_depth;
14040 vm_map_offset_t curr_max_below, curr_max_above;
14041 vm_map_offset_t curr_skip;
14042
14043 /*
14044 * "next_" is the same as "curr_" but for the VM region immediately
14045 * after the address we're looking for. We need to keep track of this
14046 * too because we want to return info about that region if the
14047 * address we're looking for is not mapped.
14048 */
14049 vm_map_entry_t next_entry;
14050 vm_map_offset_t next_offset;
14051 vm_map_offset_t next_address;
14052 vm_map_t next_map;
14053 unsigned int next_depth;
14054 vm_map_offset_t next_max_below, next_max_above;
14055 vm_map_offset_t next_skip;
14056
14057 boolean_t look_for_pages;
14058 vm_region_submap_short_info_64_t short_info;
14059 boolean_t do_region_footprint;
14060 int effective_page_size, effective_page_shift;
14061
14062 if (map == VM_MAP_NULL) {
14063 /* no address space to work on */
14064 return KERN_INVALID_ARGUMENT;
14065 }
14066
14067 effective_page_shift = vm_self_region_page_shift(map);
14068 effective_page_size = (1 << effective_page_shift);
14069
14070 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
14071 /*
14072 * "info" structure is not big enough and
14073 * would overflow
14074 */
14075 return KERN_INVALID_ARGUMENT;
14076 }
14077
14078 do_region_footprint = task_self_region_footprint();
14079 original_count = *count;
14080
14081 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
14082 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
14083 look_for_pages = FALSE;
14084 short_info = (vm_region_submap_short_info_64_t) submap_info;
14085 submap_info = NULL;
14086 } else {
14087 look_for_pages = TRUE;
14088 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
14089 short_info = NULL;
14090
14091 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14092 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
14093 }
14094 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14095 *count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
14096 }
14097 }
14098
14099 user_address = *address;
14100 user_max_depth = *nesting_depth;
14101
14102 if (not_in_kdp) {
14103 vm_map_lock_read(map);
14104 }
14105
14106 recurse_again:
14107 curr_entry = NULL;
14108 curr_map = map;
14109 curr_address = user_address;
14110 curr_offset = 0;
14111 curr_skip = 0;
14112 curr_depth = 0;
14113 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
14114 curr_max_below = curr_address;
14115
14116 next_entry = NULL;
14117 next_map = NULL;
14118 next_address = 0;
14119 next_offset = 0;
14120 next_skip = 0;
14121 next_depth = 0;
14122 next_max_above = (vm_map_offset_t) -1;
14123 next_max_below = (vm_map_offset_t) -1;
14124
14125 for (;;) {
14126 if (vm_map_lookup_entry(curr_map,
14127 curr_address,
14128 &tmp_entry)) {
14129 /* tmp_entry contains the address we're looking for */
14130 curr_entry = tmp_entry;
14131 } else {
14132 vm_map_offset_t skip;
14133 /*
14134 * The address is not mapped. "tmp_entry" is the
14135 * map entry preceding the address. We want the next
14136 * one, if it exists.
14137 */
14138 curr_entry = tmp_entry->vme_next;
14139
14140 if (curr_entry == vm_map_to_entry(curr_map) ||
14141 (curr_entry->vme_start >=
14142 curr_address + curr_max_above)) {
14143 /* no next entry at this level: stop looking */
14144 if (not_in_kdp) {
14145 vm_map_unlock_read(curr_map);
14146 }
14147 curr_entry = NULL;
14148 curr_map = NULL;
14149 curr_skip = 0;
14150 curr_offset = 0;
14151 curr_depth = 0;
14152 curr_max_above = 0;
14153 curr_max_below = 0;
14154 break;
14155 }
14156
14157 /* adjust current address and offset */
14158 skip = curr_entry->vme_start - curr_address;
14159 curr_address = curr_entry->vme_start;
14160 curr_skip += skip;
14161 curr_offset += skip;
14162 curr_max_above -= skip;
14163 curr_max_below = 0;
14164 }
14165
14166 /*
14167 * Is the next entry at this level closer to the address (or
14168 * deeper in the submap chain) than the one we had
14169 * so far ?
14170 */
14171 tmp_entry = curr_entry->vme_next;
14172 if (tmp_entry == vm_map_to_entry(curr_map)) {
14173 /* no next entry at this level */
14174 } else if (tmp_entry->vme_start >=
14175 curr_address + curr_max_above) {
14176 /*
14177 * tmp_entry is beyond the scope of what we mapped of
14178 * this submap in the upper level: ignore it.
14179 */
14180 } else if ((next_entry == NULL) ||
14181 (tmp_entry->vme_start + curr_offset <=
14182 next_entry->vme_start + next_offset)) {
14183 /*
14184 * We didn't have a "next_entry" or this one is
14185 * closer to the address we're looking for:
14186 * use this "tmp_entry" as the new "next_entry".
14187 */
14188 if (next_entry != NULL) {
14189 /* unlock the last "next_map" */
14190 if (next_map != curr_map && not_in_kdp) {
14191 vm_map_unlock_read(next_map);
14192 }
14193 }
14194 next_entry = tmp_entry;
14195 next_map = curr_map;
14196 next_depth = curr_depth;
14197 next_address = next_entry->vme_start;
14198 next_skip = curr_skip;
14199 next_skip += (next_address - curr_address);
14200 next_offset = curr_offset;
14201 next_offset += (next_address - curr_address);
14202 next_max_above = MIN(next_max_above, curr_max_above);
14203 next_max_above = MIN(next_max_above,
14204 next_entry->vme_end - next_address);
14205 next_max_below = MIN(next_max_below, curr_max_below);
14206 next_max_below = MIN(next_max_below,
14207 next_address - next_entry->vme_start);
14208 }
14209
14210 /*
14211 * "curr_max_{above,below}" allow us to keep track of the
14212 * portion of the submap that is actually mapped at this level:
14213 * the rest of that submap is irrelevant to us, since it's not
14214 * mapped here.
14215 * The relevant portion of the map starts at
14216 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
14217 */
14218 curr_max_above = MIN(curr_max_above,
14219 curr_entry->vme_end - curr_address);
14220 curr_max_below = MIN(curr_max_below,
14221 curr_address - curr_entry->vme_start);
14222
14223 if (!curr_entry->is_sub_map ||
14224 curr_depth >= user_max_depth) {
14225 /*
14226 * We hit a leaf map or we reached the maximum depth
14227 * we could, so stop looking. Keep the current map
14228 * locked.
14229 */
14230 break;
14231 }
14232
14233 /*
14234 * Get down to the next submap level.
14235 */
14236
14237 /*
14238 * Lock the next level and unlock the current level,
14239 * unless we need to keep it locked to access the "next_entry"
14240 * later.
14241 */
14242 if (not_in_kdp) {
14243 vm_map_lock_read(VME_SUBMAP(curr_entry));
14244 }
14245 if (curr_map == next_map) {
14246 /* keep "next_map" locked in case we need it */
14247 } else {
14248 /* release this map */
14249 if (not_in_kdp) {
14250 vm_map_unlock_read(curr_map);
14251 }
14252 }
14253
14254 /*
14255 * Adjust the offset. "curr_entry" maps the submap
14256 * at relative address "curr_entry->vme_start" in the
14257 * curr_map but skips the first "VME_OFFSET(curr_entry)"
14258 * bytes of the submap.
14259 * "curr_offset" always represents the offset of a virtual
14260 * address in the curr_map relative to the absolute address
14261 * space (i.e. the top-level VM map).
14262 */
14263 curr_offset +=
14264 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
14265 curr_address = user_address + curr_offset;
14266 /* switch to the submap */
14267 curr_map = VME_SUBMAP(curr_entry);
14268 curr_depth++;
14269 curr_entry = NULL;
14270 }
14271
14272 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
14273 // so probably should be a real 32b ID vs. ptr.
14274 // Current users just check for equality
14275
14276 if (curr_entry == NULL) {
14277 /* no VM region contains the address... */
14278
14279 if (do_region_footprint && /* we want footprint numbers */
14280 next_entry == NULL && /* & there are no more regions */
14281 /* & we haven't already provided our fake region: */
14282 user_address <= vm_map_last_entry(map)->vme_end) {
14283 ledger_amount_t ledger_resident, ledger_compressed;
14284
14285 /*
14286 * Add a fake memory region to account for
14287 * purgeable and/or ledger-tagged memory that
14288 * counts towards this task's memory footprint,
14289 * i.e. the resident/compressed pages of non-volatile
14290 * objects owned by that task.
14291 */
14292 task_ledgers_footprint(map->pmap->ledger,
14293 &ledger_resident,
14294 &ledger_compressed);
14295 if (ledger_resident + ledger_compressed == 0) {
14296 /* no purgeable memory usage to report */
14297 return KERN_INVALID_ADDRESS;
14298 }
14299 /* fake region to show nonvolatile footprint */
14300 if (look_for_pages) {
14301 submap_info->protection = VM_PROT_DEFAULT;
14302 submap_info->max_protection = VM_PROT_DEFAULT;
14303 submap_info->inheritance = VM_INHERIT_DEFAULT;
14304 submap_info->offset = 0;
14305 submap_info->user_tag = -1;
14306 submap_info->pages_resident = (unsigned int) (ledger_resident / effective_page_size);
14307 submap_info->pages_shared_now_private = 0;
14308 submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / effective_page_size);
14309 submap_info->pages_dirtied = submap_info->pages_resident;
14310 submap_info->ref_count = 1;
14311 submap_info->shadow_depth = 0;
14312 submap_info->external_pager = 0;
14313 submap_info->share_mode = SM_PRIVATE;
14314 submap_info->is_submap = 0;
14315 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
14316 submap_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14317 submap_info->user_wired_count = 0;
14318 submap_info->pages_reusable = 0;
14319 } else {
14320 short_info->user_tag = -1;
14321 short_info->offset = 0;
14322 short_info->protection = VM_PROT_DEFAULT;
14323 short_info->inheritance = VM_INHERIT_DEFAULT;
14324 short_info->max_protection = VM_PROT_DEFAULT;
14325 short_info->behavior = VM_BEHAVIOR_DEFAULT;
14326 short_info->user_wired_count = 0;
14327 short_info->is_submap = 0;
14328 short_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14329 short_info->external_pager = 0;
14330 short_info->shadow_depth = 0;
14331 short_info->share_mode = SM_PRIVATE;
14332 short_info->ref_count = 1;
14333 }
14334 *nesting_depth = 0;
14335 *size = (vm_map_size_t) (ledger_resident + ledger_compressed);
14336 // *address = user_address;
14337 *address = vm_map_last_entry(map)->vme_end;
14338 return KERN_SUCCESS;
14339 }
14340
14341 if (next_entry == NULL) {
14342 /* ... and no VM region follows it either */
14343 return KERN_INVALID_ADDRESS;
14344 }
14345 /* ... gather info about the next VM region */
14346 curr_entry = next_entry;
14347 curr_map = next_map; /* still locked ... */
14348 curr_address = next_address;
14349 curr_skip = next_skip;
14350 curr_offset = next_offset;
14351 curr_depth = next_depth;
14352 curr_max_above = next_max_above;
14353 curr_max_below = next_max_below;
14354 } else {
14355 /* we won't need "next_entry" after all */
14356 if (next_entry != NULL) {
14357 /* release "next_map" */
14358 if (next_map != curr_map && not_in_kdp) {
14359 vm_map_unlock_read(next_map);
14360 }
14361 }
14362 }
14363 next_entry = NULL;
14364 next_map = NULL;
14365 next_offset = 0;
14366 next_skip = 0;
14367 next_depth = 0;
14368 next_max_below = -1;
14369 next_max_above = -1;
14370
14371 if (curr_entry->is_sub_map &&
14372 curr_depth < user_max_depth) {
14373 /*
14374 * We're not as deep as we could be: we must have
14375 * gone back up after not finding anything mapped
14376 * below the original top-level map entry's.
14377 * Let's move "curr_address" forward and recurse again.
14378 */
14379 user_address = curr_address;
14380 goto recurse_again;
14381 }
14382
14383 *nesting_depth = curr_depth;
14384 *size = curr_max_above + curr_max_below;
14385 *address = user_address + curr_skip - curr_max_below;
14386
14387 if (look_for_pages) {
14388 submap_info->user_tag = VME_ALIAS(curr_entry);
14389 submap_info->offset = VME_OFFSET(curr_entry);
14390 submap_info->protection = curr_entry->protection;
14391 submap_info->inheritance = curr_entry->inheritance;
14392 submap_info->max_protection = curr_entry->max_protection;
14393 submap_info->behavior = curr_entry->behavior;
14394 submap_info->user_wired_count = curr_entry->user_wired_count;
14395 submap_info->is_submap = curr_entry->is_sub_map;
14396 submap_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14397 } else {
14398 short_info->user_tag = VME_ALIAS(curr_entry);
14399 short_info->offset = VME_OFFSET(curr_entry);
14400 short_info->protection = curr_entry->protection;
14401 short_info->inheritance = curr_entry->inheritance;
14402 short_info->max_protection = curr_entry->max_protection;
14403 short_info->behavior = curr_entry->behavior;
14404 short_info->user_wired_count = curr_entry->user_wired_count;
14405 short_info->is_submap = curr_entry->is_sub_map;
14406 short_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14407 }
14408
14409 extended.pages_resident = 0;
14410 extended.pages_swapped_out = 0;
14411 extended.pages_shared_now_private = 0;
14412 extended.pages_dirtied = 0;
14413 extended.pages_reusable = 0;
14414 extended.external_pager = 0;
14415 extended.shadow_depth = 0;
14416 extended.share_mode = SM_EMPTY;
14417 extended.ref_count = 0;
14418
14419 if (not_in_kdp) {
14420 if (!curr_entry->is_sub_map) {
14421 vm_map_offset_t range_start, range_end;
14422 range_start = MAX((curr_address - curr_max_below),
14423 curr_entry->vme_start);
14424 range_end = MIN((curr_address + curr_max_above),
14425 curr_entry->vme_end);
14426 vm_map_region_walk(curr_map,
14427 range_start,
14428 curr_entry,
14429 (VME_OFFSET(curr_entry) +
14430 (range_start -
14431 curr_entry->vme_start)),
14432 range_end - range_start,
14433 &extended,
14434 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
14435 if (extended.external_pager &&
14436 extended.ref_count == 2 &&
14437 extended.share_mode == SM_SHARED) {
14438 extended.share_mode = SM_PRIVATE;
14439 }
14440 } else {
14441 if (curr_entry->use_pmap) {
14442 extended.share_mode = SM_TRUESHARED;
14443 } else {
14444 extended.share_mode = SM_PRIVATE;
14445 }
14446 extended.ref_count = os_ref_get_count(&VME_SUBMAP(curr_entry)->map_refcnt);
14447 }
14448 }
14449
14450 if (look_for_pages) {
14451 submap_info->pages_resident = extended.pages_resident;
14452 submap_info->pages_swapped_out = extended.pages_swapped_out;
14453 submap_info->pages_shared_now_private =
14454 extended.pages_shared_now_private;
14455 submap_info->pages_dirtied = extended.pages_dirtied;
14456 submap_info->external_pager = extended.external_pager;
14457 submap_info->shadow_depth = extended.shadow_depth;
14458 submap_info->share_mode = extended.share_mode;
14459 submap_info->ref_count = extended.ref_count;
14460
14461 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14462 submap_info->pages_reusable = extended.pages_reusable;
14463 }
14464 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14465 submap_info->object_id_full = (vm_object_id_t) (VME_OBJECT(curr_entry) != NULL) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry)) : 0ULL;
14466 }
14467 } else {
14468 short_info->external_pager = extended.external_pager;
14469 short_info->shadow_depth = extended.shadow_depth;
14470 short_info->share_mode = extended.share_mode;
14471 short_info->ref_count = extended.ref_count;
14472 }
14473
14474 if (not_in_kdp) {
14475 vm_map_unlock_read(curr_map);
14476 }
14477
14478 return KERN_SUCCESS;
14479 }
14480
14481 /*
14482 * vm_region:
14483 *
14484 * User call to obtain information about a region in
14485 * a task's address map. Currently, only one flavor is
14486 * supported.
14487 *
14488 * XXX The reserved and behavior fields cannot be filled
14489 * in until the vm merge from the IK is completed, and
14490 * vm_reserve is implemented.
14491 */
14492
14493 kern_return_t
14494 vm_map_region(
14495 vm_map_t map,
14496 vm_map_offset_t *address, /* IN/OUT */
14497 vm_map_size_t *size, /* OUT */
14498 vm_region_flavor_t flavor, /* IN */
14499 vm_region_info_t info, /* OUT */
14500 mach_msg_type_number_t *count, /* IN/OUT */
14501 mach_port_t *object_name) /* OUT */
14502 {
14503 vm_map_entry_t tmp_entry;
14504 vm_map_entry_t entry;
14505 vm_map_offset_t start;
14506
14507 if (map == VM_MAP_NULL) {
14508 return KERN_INVALID_ARGUMENT;
14509 }
14510
14511 switch (flavor) {
14512 case VM_REGION_BASIC_INFO:
14513 /* legacy for old 32-bit objects info */
14514 {
14515 vm_region_basic_info_t basic;
14516
14517 if (*count < VM_REGION_BASIC_INFO_COUNT) {
14518 return KERN_INVALID_ARGUMENT;
14519 }
14520
14521 basic = (vm_region_basic_info_t) info;
14522 *count = VM_REGION_BASIC_INFO_COUNT;
14523
14524 vm_map_lock_read(map);
14525
14526 start = *address;
14527 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14528 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14529 vm_map_unlock_read(map);
14530 return KERN_INVALID_ADDRESS;
14531 }
14532 } else {
14533 entry = tmp_entry;
14534 }
14535
14536 start = entry->vme_start;
14537
14538 basic->offset = (uint32_t)VME_OFFSET(entry);
14539 basic->protection = entry->protection;
14540 basic->inheritance = entry->inheritance;
14541 basic->max_protection = entry->max_protection;
14542 basic->behavior = entry->behavior;
14543 basic->user_wired_count = entry->user_wired_count;
14544 basic->reserved = entry->is_sub_map;
14545 *address = start;
14546 *size = (entry->vme_end - start);
14547
14548 if (object_name) {
14549 *object_name = IP_NULL;
14550 }
14551 if (entry->is_sub_map) {
14552 basic->shared = FALSE;
14553 } else {
14554 basic->shared = entry->is_shared;
14555 }
14556
14557 vm_map_unlock_read(map);
14558 return KERN_SUCCESS;
14559 }
14560
14561 case VM_REGION_BASIC_INFO_64:
14562 {
14563 vm_region_basic_info_64_t basic;
14564
14565 if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
14566 return KERN_INVALID_ARGUMENT;
14567 }
14568
14569 basic = (vm_region_basic_info_64_t) info;
14570 *count = VM_REGION_BASIC_INFO_COUNT_64;
14571
14572 vm_map_lock_read(map);
14573
14574 start = *address;
14575 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14576 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14577 vm_map_unlock_read(map);
14578 return KERN_INVALID_ADDRESS;
14579 }
14580 } else {
14581 entry = tmp_entry;
14582 }
14583
14584 start = entry->vme_start;
14585
14586 basic->offset = VME_OFFSET(entry);
14587 basic->protection = entry->protection;
14588 basic->inheritance = entry->inheritance;
14589 basic->max_protection = entry->max_protection;
14590 basic->behavior = entry->behavior;
14591 basic->user_wired_count = entry->user_wired_count;
14592 basic->reserved = entry->is_sub_map;
14593 *address = start;
14594 *size = (entry->vme_end - start);
14595
14596 if (object_name) {
14597 *object_name = IP_NULL;
14598 }
14599 if (entry->is_sub_map) {
14600 basic->shared = FALSE;
14601 } else {
14602 basic->shared = entry->is_shared;
14603 }
14604
14605 vm_map_unlock_read(map);
14606 return KERN_SUCCESS;
14607 }
14608 case VM_REGION_EXTENDED_INFO:
14609 if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
14610 return KERN_INVALID_ARGUMENT;
14611 }
14612 OS_FALLTHROUGH;
14613 case VM_REGION_EXTENDED_INFO__legacy:
14614 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
14615 return KERN_INVALID_ARGUMENT;
14616 }
14617
14618 {
14619 vm_region_extended_info_t extended;
14620 mach_msg_type_number_t original_count;
14621 int effective_page_size, effective_page_shift;
14622
14623 extended = (vm_region_extended_info_t) info;
14624
14625 effective_page_shift = vm_self_region_page_shift(map);
14626 effective_page_size = (1 << effective_page_shift);
14627
14628 vm_map_lock_read(map);
14629
14630 start = *address;
14631 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14632 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14633 vm_map_unlock_read(map);
14634 return KERN_INVALID_ADDRESS;
14635 }
14636 } else {
14637 entry = tmp_entry;
14638 }
14639 start = entry->vme_start;
14640
14641 extended->protection = entry->protection;
14642 extended->user_tag = VME_ALIAS(entry);
14643 extended->pages_resident = 0;
14644 extended->pages_swapped_out = 0;
14645 extended->pages_shared_now_private = 0;
14646 extended->pages_dirtied = 0;
14647 extended->external_pager = 0;
14648 extended->shadow_depth = 0;
14649
14650 original_count = *count;
14651 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
14652 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
14653 } else {
14654 extended->pages_reusable = 0;
14655 *count = VM_REGION_EXTENDED_INFO_COUNT;
14656 }
14657
14658 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
14659
14660 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
14661 extended->share_mode = SM_PRIVATE;
14662 }
14663
14664 if (object_name) {
14665 *object_name = IP_NULL;
14666 }
14667 *address = start;
14668 *size = (entry->vme_end - start);
14669
14670 vm_map_unlock_read(map);
14671 return KERN_SUCCESS;
14672 }
14673 case VM_REGION_TOP_INFO:
14674 {
14675 vm_region_top_info_t top;
14676
14677 if (*count < VM_REGION_TOP_INFO_COUNT) {
14678 return KERN_INVALID_ARGUMENT;
14679 }
14680
14681 top = (vm_region_top_info_t) info;
14682 *count = VM_REGION_TOP_INFO_COUNT;
14683
14684 vm_map_lock_read(map);
14685
14686 start = *address;
14687 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14688 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14689 vm_map_unlock_read(map);
14690 return KERN_INVALID_ADDRESS;
14691 }
14692 } else {
14693 entry = tmp_entry;
14694 }
14695 start = entry->vme_start;
14696
14697 top->private_pages_resident = 0;
14698 top->shared_pages_resident = 0;
14699
14700 vm_map_region_top_walk(entry, top);
14701
14702 if (object_name) {
14703 *object_name = IP_NULL;
14704 }
14705 *address = start;
14706 *size = (entry->vme_end - start);
14707
14708 vm_map_unlock_read(map);
14709 return KERN_SUCCESS;
14710 }
14711 default:
14712 return KERN_INVALID_ARGUMENT;
14713 }
14714 }
14715
14716 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
14717 MIN((entry_size), \
14718 ((obj)->all_reusable ? \
14719 (obj)->wired_page_count : \
14720 (obj)->resident_page_count - (obj)->reusable_page_count))
14721
14722 void
14723 vm_map_region_top_walk(
14724 vm_map_entry_t entry,
14725 vm_region_top_info_t top)
14726 {
14727 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
14728 top->share_mode = SM_EMPTY;
14729 top->ref_count = 0;
14730 top->obj_id = 0;
14731 return;
14732 }
14733
14734 {
14735 struct vm_object *obj, *tmp_obj;
14736 int ref_count;
14737 uint32_t entry_size;
14738
14739 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
14740
14741 obj = VME_OBJECT(entry);
14742
14743 vm_object_lock(obj);
14744
14745 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14746 ref_count--;
14747 }
14748
14749 assert(obj->reusable_page_count <= obj->resident_page_count);
14750 if (obj->shadow) {
14751 if (ref_count == 1) {
14752 top->private_pages_resident =
14753 OBJ_RESIDENT_COUNT(obj, entry_size);
14754 } else {
14755 top->shared_pages_resident =
14756 OBJ_RESIDENT_COUNT(obj, entry_size);
14757 }
14758 top->ref_count = ref_count;
14759 top->share_mode = SM_COW;
14760
14761 while ((tmp_obj = obj->shadow)) {
14762 vm_object_lock(tmp_obj);
14763 vm_object_unlock(obj);
14764 obj = tmp_obj;
14765
14766 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14767 ref_count--;
14768 }
14769
14770 assert(obj->reusable_page_count <= obj->resident_page_count);
14771 top->shared_pages_resident +=
14772 OBJ_RESIDENT_COUNT(obj, entry_size);
14773 top->ref_count += ref_count - 1;
14774 }
14775 } else {
14776 if (entry->superpage_size) {
14777 top->share_mode = SM_LARGE_PAGE;
14778 top->shared_pages_resident = 0;
14779 top->private_pages_resident = entry_size;
14780 } else if (entry->needs_copy) {
14781 top->share_mode = SM_COW;
14782 top->shared_pages_resident =
14783 OBJ_RESIDENT_COUNT(obj, entry_size);
14784 } else {
14785 if (ref_count == 1 ||
14786 (ref_count == 2 && obj->named)) {
14787 top->share_mode = SM_PRIVATE;
14788 top->private_pages_resident =
14789 OBJ_RESIDENT_COUNT(obj,
14790 entry_size);
14791 } else {
14792 top->share_mode = SM_SHARED;
14793 top->shared_pages_resident =
14794 OBJ_RESIDENT_COUNT(obj,
14795 entry_size);
14796 }
14797 }
14798 top->ref_count = ref_count;
14799 }
14800 /* XXX K64: obj_id will be truncated */
14801 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
14802
14803 vm_object_unlock(obj);
14804 }
14805 }
14806
14807 void
14808 vm_map_region_walk(
14809 vm_map_t map,
14810 vm_map_offset_t va,
14811 vm_map_entry_t entry,
14812 vm_object_offset_t offset,
14813 vm_object_size_t range,
14814 vm_region_extended_info_t extended,
14815 boolean_t look_for_pages,
14816 mach_msg_type_number_t count)
14817 {
14818 struct vm_object *obj, *tmp_obj;
14819 vm_map_offset_t last_offset;
14820 int i;
14821 int ref_count;
14822 struct vm_object *shadow_object;
14823 unsigned short shadow_depth;
14824 boolean_t do_region_footprint;
14825 int effective_page_size, effective_page_shift;
14826 vm_map_offset_t effective_page_mask;
14827
14828 do_region_footprint = task_self_region_footprint();
14829
14830 if ((VME_OBJECT(entry) == 0) ||
14831 (entry->is_sub_map) ||
14832 (VME_OBJECT(entry)->phys_contiguous &&
14833 !entry->superpage_size)) {
14834 extended->share_mode = SM_EMPTY;
14835 extended->ref_count = 0;
14836 return;
14837 }
14838
14839 if (entry->superpage_size) {
14840 extended->shadow_depth = 0;
14841 extended->share_mode = SM_LARGE_PAGE;
14842 extended->ref_count = 1;
14843 extended->external_pager = 0;
14844
14845 /* TODO4K: Superpage in 4k mode? */
14846 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
14847 extended->shadow_depth = 0;
14848 return;
14849 }
14850
14851 effective_page_shift = vm_self_region_page_shift(map);
14852 effective_page_size = (1 << effective_page_shift);
14853 effective_page_mask = effective_page_size - 1;
14854
14855 offset = vm_map_trunc_page(offset, effective_page_mask);
14856
14857 obj = VME_OBJECT(entry);
14858
14859 vm_object_lock(obj);
14860
14861 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14862 ref_count--;
14863 }
14864
14865 if (look_for_pages) {
14866 for (last_offset = offset + range;
14867 offset < last_offset;
14868 offset += effective_page_size, va += effective_page_size) {
14869 if (do_region_footprint) {
14870 int disp;
14871
14872 disp = 0;
14873 if (map->has_corpse_footprint) {
14874 /*
14875 * Query the page info data we saved
14876 * while forking the corpse.
14877 */
14878 vm_map_corpse_footprint_query_page_info(
14879 map,
14880 va,
14881 &disp);
14882 } else {
14883 /*
14884 * Query the pmap.
14885 */
14886 vm_map_footprint_query_page_info(
14887 map,
14888 entry,
14889 va,
14890 &disp);
14891 }
14892 if (disp & VM_PAGE_QUERY_PAGE_PRESENT) {
14893 extended->pages_resident++;
14894 }
14895 if (disp & VM_PAGE_QUERY_PAGE_REUSABLE) {
14896 extended->pages_reusable++;
14897 }
14898 if (disp & VM_PAGE_QUERY_PAGE_DIRTY) {
14899 extended->pages_dirtied++;
14900 }
14901 if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
14902 extended->pages_swapped_out++;
14903 }
14904 continue;
14905 }
14906
14907 vm_map_region_look_for_page(map, va, obj,
14908 vm_object_trunc_page(offset), ref_count,
14909 0, extended, count);
14910 }
14911
14912 if (do_region_footprint) {
14913 goto collect_object_info;
14914 }
14915 } else {
14916 collect_object_info:
14917 shadow_object = obj->shadow;
14918 shadow_depth = 0;
14919
14920 if (!(obj->internal)) {
14921 extended->external_pager = 1;
14922 }
14923
14924 if (shadow_object != VM_OBJECT_NULL) {
14925 vm_object_lock(shadow_object);
14926 for (;
14927 shadow_object != VM_OBJECT_NULL;
14928 shadow_depth++) {
14929 vm_object_t next_shadow;
14930
14931 if (!(shadow_object->internal)) {
14932 extended->external_pager = 1;
14933 }
14934
14935 next_shadow = shadow_object->shadow;
14936 if (next_shadow) {
14937 vm_object_lock(next_shadow);
14938 }
14939 vm_object_unlock(shadow_object);
14940 shadow_object = next_shadow;
14941 }
14942 }
14943 extended->shadow_depth = shadow_depth;
14944 }
14945
14946 if (extended->shadow_depth || entry->needs_copy) {
14947 extended->share_mode = SM_COW;
14948 } else {
14949 if (ref_count == 1) {
14950 extended->share_mode = SM_PRIVATE;
14951 } else {
14952 if (obj->true_share) {
14953 extended->share_mode = SM_TRUESHARED;
14954 } else {
14955 extended->share_mode = SM_SHARED;
14956 }
14957 }
14958 }
14959 extended->ref_count = ref_count - extended->shadow_depth;
14960
14961 for (i = 0; i < extended->shadow_depth; i++) {
14962 if ((tmp_obj = obj->shadow) == 0) {
14963 break;
14964 }
14965 vm_object_lock(tmp_obj);
14966 vm_object_unlock(obj);
14967
14968 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
14969 ref_count--;
14970 }
14971
14972 extended->ref_count += ref_count;
14973 obj = tmp_obj;
14974 }
14975 vm_object_unlock(obj);
14976
14977 if (extended->share_mode == SM_SHARED) {
14978 vm_map_entry_t cur;
14979 vm_map_entry_t last;
14980 int my_refs;
14981
14982 obj = VME_OBJECT(entry);
14983 last = vm_map_to_entry(map);
14984 my_refs = 0;
14985
14986 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14987 ref_count--;
14988 }
14989 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
14990 my_refs += vm_map_region_count_obj_refs(cur, obj);
14991 }
14992
14993 if (my_refs == ref_count) {
14994 extended->share_mode = SM_PRIVATE_ALIASED;
14995 } else if (my_refs > 1) {
14996 extended->share_mode = SM_SHARED_ALIASED;
14997 }
14998 }
14999 }
15000
15001
15002 /* object is locked on entry and locked on return */
15003
15004
15005 static void
15006 vm_map_region_look_for_page(
15007 __unused vm_map_t map,
15008 __unused vm_map_offset_t va,
15009 vm_object_t object,
15010 vm_object_offset_t offset,
15011 int max_refcnt,
15012 unsigned short depth,
15013 vm_region_extended_info_t extended,
15014 mach_msg_type_number_t count)
15015 {
15016 vm_page_t p;
15017 vm_object_t shadow;
15018 int ref_count;
15019 vm_object_t caller_object;
15020
15021 shadow = object->shadow;
15022 caller_object = object;
15023
15024
15025 while (TRUE) {
15026 if (!(object->internal)) {
15027 extended->external_pager = 1;
15028 }
15029
15030 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
15031 if (shadow && (max_refcnt == 1)) {
15032 extended->pages_shared_now_private++;
15033 }
15034
15035 if (!p->vmp_fictitious &&
15036 (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
15037 extended->pages_dirtied++;
15038 } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
15039 if (p->vmp_reusable || object->all_reusable) {
15040 extended->pages_reusable++;
15041 }
15042 }
15043
15044 extended->pages_resident++;
15045
15046 if (object != caller_object) {
15047 vm_object_unlock(object);
15048 }
15049
15050 return;
15051 }
15052 if (object->internal &&
15053 object->alive &&
15054 !object->terminating &&
15055 object->pager_ready) {
15056 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
15057 == VM_EXTERNAL_STATE_EXISTS) {
15058 /* the pager has that page */
15059 extended->pages_swapped_out++;
15060 if (object != caller_object) {
15061 vm_object_unlock(object);
15062 }
15063 return;
15064 }
15065 }
15066
15067 if (shadow) {
15068 vm_object_lock(shadow);
15069
15070 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
15071 ref_count--;
15072 }
15073
15074 if (++depth > extended->shadow_depth) {
15075 extended->shadow_depth = depth;
15076 }
15077
15078 if (ref_count > max_refcnt) {
15079 max_refcnt = ref_count;
15080 }
15081
15082 if (object != caller_object) {
15083 vm_object_unlock(object);
15084 }
15085
15086 offset = offset + object->vo_shadow_offset;
15087 object = shadow;
15088 shadow = object->shadow;
15089 continue;
15090 }
15091 if (object != caller_object) {
15092 vm_object_unlock(object);
15093 }
15094 break;
15095 }
15096 }
15097
15098 static int
15099 vm_map_region_count_obj_refs(
15100 vm_map_entry_t entry,
15101 vm_object_t object)
15102 {
15103 int ref_count;
15104 vm_object_t chk_obj;
15105 vm_object_t tmp_obj;
15106
15107 if (VME_OBJECT(entry) == 0) {
15108 return 0;
15109 }
15110
15111 if (entry->is_sub_map) {
15112 return 0;
15113 } else {
15114 ref_count = 0;
15115
15116 chk_obj = VME_OBJECT(entry);
15117 vm_object_lock(chk_obj);
15118
15119 while (chk_obj) {
15120 if (chk_obj == object) {
15121 ref_count++;
15122 }
15123 tmp_obj = chk_obj->shadow;
15124 if (tmp_obj) {
15125 vm_object_lock(tmp_obj);
15126 }
15127 vm_object_unlock(chk_obj);
15128
15129 chk_obj = tmp_obj;
15130 }
15131 }
15132 return ref_count;
15133 }
15134
15135
15136 /*
15137 * Routine: vm_map_simplify
15138 *
15139 * Description:
15140 * Attempt to simplify the map representation in
15141 * the vicinity of the given starting address.
15142 * Note:
15143 * This routine is intended primarily to keep the
15144 * kernel maps more compact -- they generally don't
15145 * benefit from the "expand a map entry" technology
15146 * at allocation time because the adjacent entry
15147 * is often wired down.
15148 */
15149 void
15150 vm_map_simplify_entry(
15151 vm_map_t map,
15152 vm_map_entry_t this_entry)
15153 {
15154 vm_map_entry_t prev_entry;
15155
15156 counter(c_vm_map_simplify_entry_called++);
15157
15158 prev_entry = this_entry->vme_prev;
15159
15160 if ((this_entry != vm_map_to_entry(map)) &&
15161 (prev_entry != vm_map_to_entry(map)) &&
15162
15163 (prev_entry->vme_end == this_entry->vme_start) &&
15164
15165 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
15166 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
15167 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
15168 prev_entry->vme_start))
15169 == VME_OFFSET(this_entry)) &&
15170
15171 (prev_entry->behavior == this_entry->behavior) &&
15172 (prev_entry->needs_copy == this_entry->needs_copy) &&
15173 (prev_entry->protection == this_entry->protection) &&
15174 (prev_entry->max_protection == this_entry->max_protection) &&
15175 (prev_entry->inheritance == this_entry->inheritance) &&
15176 (prev_entry->use_pmap == this_entry->use_pmap) &&
15177 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
15178 (prev_entry->no_cache == this_entry->no_cache) &&
15179 (prev_entry->permanent == this_entry->permanent) &&
15180 (prev_entry->map_aligned == this_entry->map_aligned) &&
15181 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
15182 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
15183 (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
15184 /* from_reserved_zone: OK if that field doesn't match */
15185 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
15186 (prev_entry->vme_resilient_codesign ==
15187 this_entry->vme_resilient_codesign) &&
15188 (prev_entry->vme_resilient_media ==
15189 this_entry->vme_resilient_media) &&
15190 (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
15191
15192 (prev_entry->wired_count == this_entry->wired_count) &&
15193 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
15194
15195 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
15196 (prev_entry->in_transition == FALSE) &&
15197 (this_entry->in_transition == FALSE) &&
15198 (prev_entry->needs_wakeup == FALSE) &&
15199 (this_entry->needs_wakeup == FALSE) &&
15200 (prev_entry->is_shared == this_entry->is_shared) &&
15201 (prev_entry->superpage_size == FALSE) &&
15202 (this_entry->superpage_size == FALSE)
15203 ) {
15204 vm_map_store_entry_unlink(map, prev_entry);
15205 assert(prev_entry->vme_start < this_entry->vme_end);
15206 if (prev_entry->map_aligned) {
15207 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
15208 VM_MAP_PAGE_MASK(map)));
15209 }
15210 this_entry->vme_start = prev_entry->vme_start;
15211 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
15212
15213 if (map->holelistenabled) {
15214 vm_map_store_update_first_free(map, this_entry, TRUE);
15215 }
15216
15217 if (prev_entry->is_sub_map) {
15218 vm_map_deallocate(VME_SUBMAP(prev_entry));
15219 } else {
15220 vm_object_deallocate(VME_OBJECT(prev_entry));
15221 }
15222 vm_map_entry_dispose(map, prev_entry);
15223 SAVE_HINT_MAP_WRITE(map, this_entry);
15224 counter(c_vm_map_simplified++);
15225 }
15226 }
15227
15228 void
15229 vm_map_simplify(
15230 vm_map_t map,
15231 vm_map_offset_t start)
15232 {
15233 vm_map_entry_t this_entry;
15234
15235 vm_map_lock(map);
15236 if (vm_map_lookup_entry(map, start, &this_entry)) {
15237 vm_map_simplify_entry(map, this_entry);
15238 vm_map_simplify_entry(map, this_entry->vme_next);
15239 }
15240 counter(c_vm_map_simplify_called++);
15241 vm_map_unlock(map);
15242 }
15243
15244 static void
15245 vm_map_simplify_range(
15246 vm_map_t map,
15247 vm_map_offset_t start,
15248 vm_map_offset_t end)
15249 {
15250 vm_map_entry_t entry;
15251
15252 /*
15253 * The map should be locked (for "write") by the caller.
15254 */
15255
15256 if (start >= end) {
15257 /* invalid address range */
15258 return;
15259 }
15260
15261 start = vm_map_trunc_page(start,
15262 VM_MAP_PAGE_MASK(map));
15263 end = vm_map_round_page(end,
15264 VM_MAP_PAGE_MASK(map));
15265
15266 if (!vm_map_lookup_entry(map, start, &entry)) {
15267 /* "start" is not mapped and "entry" ends before "start" */
15268 if (entry == vm_map_to_entry(map)) {
15269 /* start with first entry in the map */
15270 entry = vm_map_first_entry(map);
15271 } else {
15272 /* start with next entry */
15273 entry = entry->vme_next;
15274 }
15275 }
15276
15277 while (entry != vm_map_to_entry(map) &&
15278 entry->vme_start <= end) {
15279 /* try and coalesce "entry" with its previous entry */
15280 vm_map_simplify_entry(map, entry);
15281 entry = entry->vme_next;
15282 }
15283 }
15284
15285
15286 /*
15287 * Routine: vm_map_machine_attribute
15288 * Purpose:
15289 * Provide machine-specific attributes to mappings,
15290 * such as cachability etc. for machines that provide
15291 * them. NUMA architectures and machines with big/strange
15292 * caches will use this.
15293 * Note:
15294 * Responsibilities for locking and checking are handled here,
15295 * everything else in the pmap module. If any non-volatile
15296 * information must be kept, the pmap module should handle
15297 * it itself. [This assumes that attributes do not
15298 * need to be inherited, which seems ok to me]
15299 */
15300 kern_return_t
15301 vm_map_machine_attribute(
15302 vm_map_t map,
15303 vm_map_offset_t start,
15304 vm_map_offset_t end,
15305 vm_machine_attribute_t attribute,
15306 vm_machine_attribute_val_t* value) /* IN/OUT */
15307 {
15308 kern_return_t ret;
15309 vm_map_size_t sync_size;
15310 vm_map_entry_t entry;
15311
15312 if (start < vm_map_min(map) || end > vm_map_max(map)) {
15313 return KERN_INVALID_ADDRESS;
15314 }
15315
15316 /* Figure how much memory we need to flush (in page increments) */
15317 sync_size = end - start;
15318
15319 vm_map_lock(map);
15320
15321 if (attribute != MATTR_CACHE) {
15322 /* If we don't have to find physical addresses, we */
15323 /* don't have to do an explicit traversal here. */
15324 ret = pmap_attribute(map->pmap, start, end - start,
15325 attribute, value);
15326 vm_map_unlock(map);
15327 return ret;
15328 }
15329
15330 ret = KERN_SUCCESS; /* Assume it all worked */
15331
15332 while (sync_size) {
15333 if (vm_map_lookup_entry(map, start, &entry)) {
15334 vm_map_size_t sub_size;
15335 if ((entry->vme_end - start) > sync_size) {
15336 sub_size = sync_size;
15337 sync_size = 0;
15338 } else {
15339 sub_size = entry->vme_end - start;
15340 sync_size -= sub_size;
15341 }
15342 if (entry->is_sub_map) {
15343 vm_map_offset_t sub_start;
15344 vm_map_offset_t sub_end;
15345
15346 sub_start = (start - entry->vme_start)
15347 + VME_OFFSET(entry);
15348 sub_end = sub_start + sub_size;
15349 vm_map_machine_attribute(
15350 VME_SUBMAP(entry),
15351 sub_start,
15352 sub_end,
15353 attribute, value);
15354 } else {
15355 if (VME_OBJECT(entry)) {
15356 vm_page_t m;
15357 vm_object_t object;
15358 vm_object_t base_object;
15359 vm_object_t last_object;
15360 vm_object_offset_t offset;
15361 vm_object_offset_t base_offset;
15362 vm_map_size_t range;
15363 range = sub_size;
15364 offset = (start - entry->vme_start)
15365 + VME_OFFSET(entry);
15366 offset = vm_object_trunc_page(offset);
15367 base_offset = offset;
15368 object = VME_OBJECT(entry);
15369 base_object = object;
15370 last_object = NULL;
15371
15372 vm_object_lock(object);
15373
15374 while (range) {
15375 m = vm_page_lookup(
15376 object, offset);
15377
15378 if (m && !m->vmp_fictitious) {
15379 ret =
15380 pmap_attribute_cache_sync(
15381 VM_PAGE_GET_PHYS_PAGE(m),
15382 PAGE_SIZE,
15383 attribute, value);
15384 } else if (object->shadow) {
15385 offset = offset + object->vo_shadow_offset;
15386 last_object = object;
15387 object = object->shadow;
15388 vm_object_lock(last_object->shadow);
15389 vm_object_unlock(last_object);
15390 continue;
15391 }
15392 if (range < PAGE_SIZE) {
15393 range = 0;
15394 } else {
15395 range -= PAGE_SIZE;
15396 }
15397
15398 if (base_object != object) {
15399 vm_object_unlock(object);
15400 vm_object_lock(base_object);
15401 object = base_object;
15402 }
15403 /* Bump to the next page */
15404 base_offset += PAGE_SIZE;
15405 offset = base_offset;
15406 }
15407 vm_object_unlock(object);
15408 }
15409 }
15410 start += sub_size;
15411 } else {
15412 vm_map_unlock(map);
15413 return KERN_FAILURE;
15414 }
15415 }
15416
15417 vm_map_unlock(map);
15418
15419 return ret;
15420 }
15421
15422 /*
15423 * vm_map_behavior_set:
15424 *
15425 * Sets the paging reference behavior of the specified address
15426 * range in the target map. Paging reference behavior affects
15427 * how pagein operations resulting from faults on the map will be
15428 * clustered.
15429 */
15430 kern_return_t
15431 vm_map_behavior_set(
15432 vm_map_t map,
15433 vm_map_offset_t start,
15434 vm_map_offset_t end,
15435 vm_behavior_t new_behavior)
15436 {
15437 vm_map_entry_t entry;
15438 vm_map_entry_t temp_entry;
15439
15440 if (start > end ||
15441 start < vm_map_min(map) ||
15442 end > vm_map_max(map)) {
15443 return KERN_NO_SPACE;
15444 }
15445
15446 switch (new_behavior) {
15447 /*
15448 * This first block of behaviors all set a persistent state on the specified
15449 * memory range. All we have to do here is to record the desired behavior
15450 * in the vm_map_entry_t's.
15451 */
15452
15453 case VM_BEHAVIOR_DEFAULT:
15454 case VM_BEHAVIOR_RANDOM:
15455 case VM_BEHAVIOR_SEQUENTIAL:
15456 case VM_BEHAVIOR_RSEQNTL:
15457 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
15458 vm_map_lock(map);
15459
15460 /*
15461 * The entire address range must be valid for the map.
15462 * Note that vm_map_range_check() does a
15463 * vm_map_lookup_entry() internally and returns the
15464 * entry containing the start of the address range if
15465 * the entire range is valid.
15466 */
15467 if (vm_map_range_check(map, start, end, &temp_entry)) {
15468 entry = temp_entry;
15469 vm_map_clip_start(map, entry, start);
15470 } else {
15471 vm_map_unlock(map);
15472 return KERN_INVALID_ADDRESS;
15473 }
15474
15475 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
15476 vm_map_clip_end(map, entry, end);
15477 if (entry->is_sub_map) {
15478 assert(!entry->use_pmap);
15479 }
15480
15481 if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
15482 entry->zero_wired_pages = TRUE;
15483 } else {
15484 entry->behavior = new_behavior;
15485 }
15486 entry = entry->vme_next;
15487 }
15488
15489 vm_map_unlock(map);
15490 break;
15491
15492 /*
15493 * The rest of these are different from the above in that they cause
15494 * an immediate action to take place as opposed to setting a behavior that
15495 * affects future actions.
15496 */
15497
15498 case VM_BEHAVIOR_WILLNEED:
15499 return vm_map_willneed(map, start, end);
15500
15501 case VM_BEHAVIOR_DONTNEED:
15502 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
15503
15504 case VM_BEHAVIOR_FREE:
15505 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
15506
15507 case VM_BEHAVIOR_REUSABLE:
15508 return vm_map_reusable_pages(map, start, end);
15509
15510 case VM_BEHAVIOR_REUSE:
15511 return vm_map_reuse_pages(map, start, end);
15512
15513 case VM_BEHAVIOR_CAN_REUSE:
15514 return vm_map_can_reuse(map, start, end);
15515
15516 #if MACH_ASSERT
15517 case VM_BEHAVIOR_PAGEOUT:
15518 return vm_map_pageout(map, start, end);
15519 #endif /* MACH_ASSERT */
15520
15521 default:
15522 return KERN_INVALID_ARGUMENT;
15523 }
15524
15525 return KERN_SUCCESS;
15526 }
15527
15528
15529 /*
15530 * Internals for madvise(MADV_WILLNEED) system call.
15531 *
15532 * The implementation is to do:-
15533 * a) read-ahead if the mapping corresponds to a mapped regular file
15534 * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
15535 */
15536
15537
15538 static kern_return_t
15539 vm_map_willneed(
15540 vm_map_t map,
15541 vm_map_offset_t start,
15542 vm_map_offset_t end
15543 )
15544 {
15545 vm_map_entry_t entry;
15546 vm_object_t object;
15547 memory_object_t pager;
15548 struct vm_object_fault_info fault_info = {};
15549 kern_return_t kr;
15550 vm_object_size_t len;
15551 vm_object_offset_t offset;
15552
15553 fault_info.interruptible = THREAD_UNINT; /* ignored value */
15554 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
15555 fault_info.stealth = TRUE;
15556
15557 /*
15558 * The MADV_WILLNEED operation doesn't require any changes to the
15559 * vm_map_entry_t's, so the read lock is sufficient.
15560 */
15561
15562 vm_map_lock_read(map);
15563
15564 /*
15565 * The madvise semantics require that the address range be fully
15566 * allocated with no holes. Otherwise, we're required to return
15567 * an error.
15568 */
15569
15570 if (!vm_map_range_check(map, start, end, &entry)) {
15571 vm_map_unlock_read(map);
15572 return KERN_INVALID_ADDRESS;
15573 }
15574
15575 /*
15576 * Examine each vm_map_entry_t in the range.
15577 */
15578 for (; entry != vm_map_to_entry(map) && start < end;) {
15579 /*
15580 * The first time through, the start address could be anywhere
15581 * within the vm_map_entry we found. So adjust the offset to
15582 * correspond. After that, the offset will always be zero to
15583 * correspond to the beginning of the current vm_map_entry.
15584 */
15585 offset = (start - entry->vme_start) + VME_OFFSET(entry);
15586
15587 /*
15588 * Set the length so we don't go beyond the end of the
15589 * map_entry or beyond the end of the range we were given.
15590 * This range could span also multiple map entries all of which
15591 * map different files, so make sure we only do the right amount
15592 * of I/O for each object. Note that it's possible for there
15593 * to be multiple map entries all referring to the same object
15594 * but with different page permissions, but it's not worth
15595 * trying to optimize that case.
15596 */
15597 len = MIN(entry->vme_end - start, end - start);
15598
15599 if ((vm_size_t) len != len) {
15600 /* 32-bit overflow */
15601 len = (vm_size_t) (0 - PAGE_SIZE);
15602 }
15603 fault_info.cluster_size = (vm_size_t) len;
15604 fault_info.lo_offset = offset;
15605 fault_info.hi_offset = offset + len;
15606 fault_info.user_tag = VME_ALIAS(entry);
15607 fault_info.pmap_options = 0;
15608 if (entry->iokit_acct ||
15609 (!entry->is_sub_map && !entry->use_pmap)) {
15610 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
15611 }
15612
15613 /*
15614 * If the entry is a submap OR there's no read permission
15615 * to this mapping, then just skip it.
15616 */
15617 if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
15618 entry = entry->vme_next;
15619 start = entry->vme_start;
15620 continue;
15621 }
15622
15623 object = VME_OBJECT(entry);
15624
15625 if (object == NULL ||
15626 (object && object->internal)) {
15627 /*
15628 * Memory range backed by anonymous memory.
15629 */
15630 vm_size_t region_size = 0, effective_page_size = 0;
15631 vm_map_offset_t addr = 0, effective_page_mask = 0;
15632
15633 region_size = len;
15634 addr = start;
15635
15636 effective_page_mask = MIN(vm_map_page_mask(current_map()), PAGE_MASK);
15637 effective_page_size = effective_page_mask + 1;
15638
15639 vm_map_unlock_read(map);
15640
15641 while (region_size) {
15642 vm_pre_fault(
15643 vm_map_trunc_page(addr, effective_page_mask),
15644 VM_PROT_READ | VM_PROT_WRITE);
15645
15646 region_size -= effective_page_size;
15647 addr += effective_page_size;
15648 }
15649 } else {
15650 /*
15651 * Find the file object backing this map entry. If there is
15652 * none, then we simply ignore the "will need" advice for this
15653 * entry and go on to the next one.
15654 */
15655 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
15656 entry = entry->vme_next;
15657 start = entry->vme_start;
15658 continue;
15659 }
15660
15661 vm_object_paging_begin(object);
15662 pager = object->pager;
15663 vm_object_unlock(object);
15664
15665 /*
15666 * The data_request() could take a long time, so let's
15667 * release the map lock to avoid blocking other threads.
15668 */
15669 vm_map_unlock_read(map);
15670
15671 /*
15672 * Get the data from the object asynchronously.
15673 *
15674 * Note that memory_object_data_request() places limits on the
15675 * amount of I/O it will do. Regardless of the len we
15676 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15677 * silently truncates the len to that size. This isn't
15678 * necessarily bad since madvise shouldn't really be used to
15679 * page in unlimited amounts of data. Other Unix variants
15680 * limit the willneed case as well. If this turns out to be an
15681 * issue for developers, then we can always adjust the policy
15682 * here and still be backwards compatible since this is all
15683 * just "advice".
15684 */
15685 kr = memory_object_data_request(
15686 pager,
15687 vm_object_trunc_page(offset) + object->paging_offset,
15688 0, /* ignored */
15689 VM_PROT_READ,
15690 (memory_object_fault_info_t)&fault_info);
15691
15692 vm_object_lock(object);
15693 vm_object_paging_end(object);
15694 vm_object_unlock(object);
15695
15696 /*
15697 * If we couldn't do the I/O for some reason, just give up on
15698 * the madvise. We still return success to the user since
15699 * madvise isn't supposed to fail when the advice can't be
15700 * taken.
15701 */
15702
15703 if (kr != KERN_SUCCESS) {
15704 return KERN_SUCCESS;
15705 }
15706 }
15707
15708 start += len;
15709 if (start >= end) {
15710 /* done */
15711 return KERN_SUCCESS;
15712 }
15713
15714 /* look up next entry */
15715 vm_map_lock_read(map);
15716 if (!vm_map_lookup_entry(map, start, &entry)) {
15717 /*
15718 * There's a new hole in the address range.
15719 */
15720 vm_map_unlock_read(map);
15721 return KERN_INVALID_ADDRESS;
15722 }
15723 }
15724
15725 vm_map_unlock_read(map);
15726 return KERN_SUCCESS;
15727 }
15728
15729 static boolean_t
15730 vm_map_entry_is_reusable(
15731 vm_map_entry_t entry)
15732 {
15733 /* Only user map entries */
15734
15735 vm_object_t object;
15736
15737 if (entry->is_sub_map) {
15738 return FALSE;
15739 }
15740
15741 switch (VME_ALIAS(entry)) {
15742 case VM_MEMORY_MALLOC:
15743 case VM_MEMORY_MALLOC_SMALL:
15744 case VM_MEMORY_MALLOC_LARGE:
15745 case VM_MEMORY_REALLOC:
15746 case VM_MEMORY_MALLOC_TINY:
15747 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
15748 case VM_MEMORY_MALLOC_LARGE_REUSED:
15749 /*
15750 * This is a malloc() memory region: check if it's still
15751 * in its original state and can be re-used for more
15752 * malloc() allocations.
15753 */
15754 break;
15755 default:
15756 /*
15757 * Not a malloc() memory region: let the caller decide if
15758 * it's re-usable.
15759 */
15760 return TRUE;
15761 }
15762
15763 if (/*entry->is_shared ||*/
15764 entry->is_sub_map ||
15765 entry->in_transition ||
15766 entry->protection != VM_PROT_DEFAULT ||
15767 entry->max_protection != VM_PROT_ALL ||
15768 entry->inheritance != VM_INHERIT_DEFAULT ||
15769 entry->no_cache ||
15770 entry->permanent ||
15771 entry->superpage_size != FALSE ||
15772 entry->zero_wired_pages ||
15773 entry->wired_count != 0 ||
15774 entry->user_wired_count != 0) {
15775 return FALSE;
15776 }
15777
15778 object = VME_OBJECT(entry);
15779 if (object == VM_OBJECT_NULL) {
15780 return TRUE;
15781 }
15782 if (
15783 #if 0
15784 /*
15785 * Let's proceed even if the VM object is potentially
15786 * shared.
15787 * We check for this later when processing the actual
15788 * VM pages, so the contents will be safe if shared.
15789 *
15790 * But we can still mark this memory region as "reusable" to
15791 * acknowledge that the caller did let us know that the memory
15792 * could be re-used and should not be penalized for holding
15793 * on to it. This allows its "resident size" to not include
15794 * the reusable range.
15795 */
15796 object->ref_count == 1 &&
15797 #endif
15798 object->wired_page_count == 0 &&
15799 object->copy == VM_OBJECT_NULL &&
15800 object->shadow == VM_OBJECT_NULL &&
15801 object->internal &&
15802 object->purgable == VM_PURGABLE_DENY &&
15803 object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
15804 !object->true_share &&
15805 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
15806 !object->code_signed) {
15807 return TRUE;
15808 }
15809 return FALSE;
15810 }
15811
15812 static kern_return_t
15813 vm_map_reuse_pages(
15814 vm_map_t map,
15815 vm_map_offset_t start,
15816 vm_map_offset_t end)
15817 {
15818 vm_map_entry_t entry;
15819 vm_object_t object;
15820 vm_object_offset_t start_offset, end_offset;
15821
15822 /*
15823 * The MADV_REUSE operation doesn't require any changes to the
15824 * vm_map_entry_t's, so the read lock is sufficient.
15825 */
15826
15827 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
15828 /*
15829 * XXX TODO4K
15830 * need to figure out what reusable means for a
15831 * portion of a native page.
15832 */
15833 return KERN_SUCCESS;
15834 }
15835
15836 vm_map_lock_read(map);
15837 assert(map->pmap != kernel_pmap); /* protect alias access */
15838
15839 /*
15840 * The madvise semantics require that the address range be fully
15841 * allocated with no holes. Otherwise, we're required to return
15842 * an error.
15843 */
15844
15845 if (!vm_map_range_check(map, start, end, &entry)) {
15846 vm_map_unlock_read(map);
15847 vm_page_stats_reusable.reuse_pages_failure++;
15848 return KERN_INVALID_ADDRESS;
15849 }
15850
15851 /*
15852 * Examine each vm_map_entry_t in the range.
15853 */
15854 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15855 entry = entry->vme_next) {
15856 /*
15857 * Sanity check on the VM map entry.
15858 */
15859 if (!vm_map_entry_is_reusable(entry)) {
15860 vm_map_unlock_read(map);
15861 vm_page_stats_reusable.reuse_pages_failure++;
15862 return KERN_INVALID_ADDRESS;
15863 }
15864
15865 /*
15866 * The first time through, the start address could be anywhere
15867 * within the vm_map_entry we found. So adjust the offset to
15868 * correspond.
15869 */
15870 if (entry->vme_start < start) {
15871 start_offset = start - entry->vme_start;
15872 } else {
15873 start_offset = 0;
15874 }
15875 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15876 start_offset += VME_OFFSET(entry);
15877 end_offset += VME_OFFSET(entry);
15878
15879 assert(!entry->is_sub_map);
15880 object = VME_OBJECT(entry);
15881 if (object != VM_OBJECT_NULL) {
15882 vm_object_lock(object);
15883 vm_object_reuse_pages(object, start_offset, end_offset,
15884 TRUE);
15885 vm_object_unlock(object);
15886 }
15887
15888 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
15889 /*
15890 * XXX
15891 * We do not hold the VM map exclusively here.
15892 * The "alias" field is not that critical, so it's
15893 * safe to update it here, as long as it is the only
15894 * one that can be modified while holding the VM map
15895 * "shared".
15896 */
15897 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
15898 }
15899 }
15900
15901 vm_map_unlock_read(map);
15902 vm_page_stats_reusable.reuse_pages_success++;
15903 return KERN_SUCCESS;
15904 }
15905
15906
15907 static kern_return_t
15908 vm_map_reusable_pages(
15909 vm_map_t map,
15910 vm_map_offset_t start,
15911 vm_map_offset_t end)
15912 {
15913 vm_map_entry_t entry;
15914 vm_object_t object;
15915 vm_object_offset_t start_offset, end_offset;
15916 vm_map_offset_t pmap_offset;
15917
15918 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
15919 /*
15920 * XXX TODO4K
15921 * need to figure out what reusable means for a portion
15922 * of a native page.
15923 */
15924 return KERN_SUCCESS;
15925 }
15926
15927 /*
15928 * The MADV_REUSABLE operation doesn't require any changes to the
15929 * vm_map_entry_t's, so the read lock is sufficient.
15930 */
15931
15932 vm_map_lock_read(map);
15933 assert(map->pmap != kernel_pmap); /* protect alias access */
15934
15935 /*
15936 * The madvise semantics require that the address range be fully
15937 * allocated with no holes. Otherwise, we're required to return
15938 * an error.
15939 */
15940
15941 if (!vm_map_range_check(map, start, end, &entry)) {
15942 vm_map_unlock_read(map);
15943 vm_page_stats_reusable.reusable_pages_failure++;
15944 return KERN_INVALID_ADDRESS;
15945 }
15946
15947 /*
15948 * Examine each vm_map_entry_t in the range.
15949 */
15950 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15951 entry = entry->vme_next) {
15952 int kill_pages = 0;
15953
15954 /*
15955 * Sanity check on the VM map entry.
15956 */
15957 if (!vm_map_entry_is_reusable(entry)) {
15958 vm_map_unlock_read(map);
15959 vm_page_stats_reusable.reusable_pages_failure++;
15960 return KERN_INVALID_ADDRESS;
15961 }
15962
15963 if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
15964 /* not writable: can't discard contents */
15965 vm_map_unlock_read(map);
15966 vm_page_stats_reusable.reusable_nonwritable++;
15967 vm_page_stats_reusable.reusable_pages_failure++;
15968 return KERN_PROTECTION_FAILURE;
15969 }
15970
15971 /*
15972 * The first time through, the start address could be anywhere
15973 * within the vm_map_entry we found. So adjust the offset to
15974 * correspond.
15975 */
15976 if (entry->vme_start < start) {
15977 start_offset = start - entry->vme_start;
15978 pmap_offset = start;
15979 } else {
15980 start_offset = 0;
15981 pmap_offset = entry->vme_start;
15982 }
15983 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15984 start_offset += VME_OFFSET(entry);
15985 end_offset += VME_OFFSET(entry);
15986
15987 assert(!entry->is_sub_map);
15988 object = VME_OBJECT(entry);
15989 if (object == VM_OBJECT_NULL) {
15990 continue;
15991 }
15992
15993
15994 vm_object_lock(object);
15995 if (((object->ref_count == 1) ||
15996 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
15997 object->copy == VM_OBJECT_NULL)) &&
15998 object->shadow == VM_OBJECT_NULL &&
15999 /*
16000 * "iokit_acct" entries are billed for their virtual size
16001 * (rather than for their resident pages only), so they
16002 * wouldn't benefit from making pages reusable, and it
16003 * would be hard to keep track of pages that are both
16004 * "iokit_acct" and "reusable" in the pmap stats and
16005 * ledgers.
16006 */
16007 !(entry->iokit_acct ||
16008 (!entry->is_sub_map && !entry->use_pmap))) {
16009 if (object->ref_count != 1) {
16010 vm_page_stats_reusable.reusable_shared++;
16011 }
16012 kill_pages = 1;
16013 } else {
16014 kill_pages = -1;
16015 }
16016 if (kill_pages != -1) {
16017 vm_object_deactivate_pages(object,
16018 start_offset,
16019 end_offset - start_offset,
16020 kill_pages,
16021 TRUE /*reusable_pages*/,
16022 map->pmap,
16023 pmap_offset);
16024 } else {
16025 vm_page_stats_reusable.reusable_pages_shared++;
16026 }
16027 vm_object_unlock(object);
16028
16029 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
16030 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
16031 /*
16032 * XXX
16033 * We do not hold the VM map exclusively here.
16034 * The "alias" field is not that critical, so it's
16035 * safe to update it here, as long as it is the only
16036 * one that can be modified while holding the VM map
16037 * "shared".
16038 */
16039 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
16040 }
16041 }
16042
16043 vm_map_unlock_read(map);
16044 vm_page_stats_reusable.reusable_pages_success++;
16045 return KERN_SUCCESS;
16046 }
16047
16048
16049 static kern_return_t
16050 vm_map_can_reuse(
16051 vm_map_t map,
16052 vm_map_offset_t start,
16053 vm_map_offset_t end)
16054 {
16055 vm_map_entry_t entry;
16056
16057 /*
16058 * The MADV_REUSABLE operation doesn't require any changes to the
16059 * vm_map_entry_t's, so the read lock is sufficient.
16060 */
16061
16062 vm_map_lock_read(map);
16063 assert(map->pmap != kernel_pmap); /* protect alias access */
16064
16065 /*
16066 * The madvise semantics require that the address range be fully
16067 * allocated with no holes. Otherwise, we're required to return
16068 * an error.
16069 */
16070
16071 if (!vm_map_range_check(map, start, end, &entry)) {
16072 vm_map_unlock_read(map);
16073 vm_page_stats_reusable.can_reuse_failure++;
16074 return KERN_INVALID_ADDRESS;
16075 }
16076
16077 /*
16078 * Examine each vm_map_entry_t in the range.
16079 */
16080 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16081 entry = entry->vme_next) {
16082 /*
16083 * Sanity check on the VM map entry.
16084 */
16085 if (!vm_map_entry_is_reusable(entry)) {
16086 vm_map_unlock_read(map);
16087 vm_page_stats_reusable.can_reuse_failure++;
16088 return KERN_INVALID_ADDRESS;
16089 }
16090 }
16091
16092 vm_map_unlock_read(map);
16093 vm_page_stats_reusable.can_reuse_success++;
16094 return KERN_SUCCESS;
16095 }
16096
16097
16098 #if MACH_ASSERT
16099 static kern_return_t
16100 vm_map_pageout(
16101 vm_map_t map,
16102 vm_map_offset_t start,
16103 vm_map_offset_t end)
16104 {
16105 vm_map_entry_t entry;
16106
16107 /*
16108 * The MADV_PAGEOUT operation doesn't require any changes to the
16109 * vm_map_entry_t's, so the read lock is sufficient.
16110 */
16111
16112 vm_map_lock_read(map);
16113
16114 /*
16115 * The madvise semantics require that the address range be fully
16116 * allocated with no holes. Otherwise, we're required to return
16117 * an error.
16118 */
16119
16120 if (!vm_map_range_check(map, start, end, &entry)) {
16121 vm_map_unlock_read(map);
16122 return KERN_INVALID_ADDRESS;
16123 }
16124
16125 /*
16126 * Examine each vm_map_entry_t in the range.
16127 */
16128 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16129 entry = entry->vme_next) {
16130 vm_object_t object;
16131
16132 /*
16133 * Sanity check on the VM map entry.
16134 */
16135 if (entry->is_sub_map) {
16136 vm_map_t submap;
16137 vm_map_offset_t submap_start;
16138 vm_map_offset_t submap_end;
16139 vm_map_entry_t submap_entry;
16140
16141 submap = VME_SUBMAP(entry);
16142 submap_start = VME_OFFSET(entry);
16143 submap_end = submap_start + (entry->vme_end -
16144 entry->vme_start);
16145
16146 vm_map_lock_read(submap);
16147
16148 if (!vm_map_range_check(submap,
16149 submap_start,
16150 submap_end,
16151 &submap_entry)) {
16152 vm_map_unlock_read(submap);
16153 vm_map_unlock_read(map);
16154 return KERN_INVALID_ADDRESS;
16155 }
16156
16157 object = VME_OBJECT(submap_entry);
16158 if (submap_entry->is_sub_map ||
16159 object == VM_OBJECT_NULL ||
16160 !object->internal) {
16161 vm_map_unlock_read(submap);
16162 continue;
16163 }
16164
16165 vm_object_pageout(object);
16166
16167 vm_map_unlock_read(submap);
16168 submap = VM_MAP_NULL;
16169 submap_entry = VM_MAP_ENTRY_NULL;
16170 continue;
16171 }
16172
16173 object = VME_OBJECT(entry);
16174 if (entry->is_sub_map ||
16175 object == VM_OBJECT_NULL ||
16176 !object->internal) {
16177 continue;
16178 }
16179
16180 vm_object_pageout(object);
16181 }
16182
16183 vm_map_unlock_read(map);
16184 return KERN_SUCCESS;
16185 }
16186 #endif /* MACH_ASSERT */
16187
16188
16189 /*
16190 * Routine: vm_map_entry_insert
16191 *
16192 * Description: This routine inserts a new vm_entry in a locked map.
16193 */
16194 vm_map_entry_t
16195 vm_map_entry_insert(
16196 vm_map_t map,
16197 vm_map_entry_t insp_entry,
16198 vm_map_offset_t start,
16199 vm_map_offset_t end,
16200 vm_object_t object,
16201 vm_object_offset_t offset,
16202 boolean_t needs_copy,
16203 boolean_t is_shared,
16204 boolean_t in_transition,
16205 vm_prot_t cur_protection,
16206 vm_prot_t max_protection,
16207 vm_behavior_t behavior,
16208 vm_inherit_t inheritance,
16209 unsigned short wired_count,
16210 boolean_t no_cache,
16211 boolean_t permanent,
16212 boolean_t no_copy_on_read,
16213 unsigned int superpage_size,
16214 boolean_t clear_map_aligned,
16215 boolean_t is_submap,
16216 boolean_t used_for_jit,
16217 int alias,
16218 boolean_t translated_allow_execute)
16219 {
16220 vm_map_entry_t new_entry;
16221
16222 assert(insp_entry != (vm_map_entry_t)0);
16223 vm_map_lock_assert_exclusive(map);
16224
16225 #if DEVELOPMENT || DEBUG
16226 vm_object_offset_t end_offset = 0;
16227 assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
16228 #endif /* DEVELOPMENT || DEBUG */
16229
16230 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
16231
16232 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
16233 new_entry->map_aligned = TRUE;
16234 } else {
16235 new_entry->map_aligned = FALSE;
16236 }
16237 if (clear_map_aligned &&
16238 (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
16239 !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
16240 new_entry->map_aligned = FALSE;
16241 }
16242
16243 new_entry->vme_start = start;
16244 new_entry->vme_end = end;
16245 if (new_entry->map_aligned) {
16246 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
16247 VM_MAP_PAGE_MASK(map)));
16248 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
16249 VM_MAP_PAGE_MASK(map)));
16250 } else {
16251 assert(page_aligned(new_entry->vme_start));
16252 assert(page_aligned(new_entry->vme_end));
16253 }
16254 assert(new_entry->vme_start < new_entry->vme_end);
16255
16256 VME_OBJECT_SET(new_entry, object);
16257 VME_OFFSET_SET(new_entry, offset);
16258 new_entry->is_shared = is_shared;
16259 new_entry->is_sub_map = is_submap;
16260 new_entry->needs_copy = needs_copy;
16261 new_entry->in_transition = in_transition;
16262 new_entry->needs_wakeup = FALSE;
16263 new_entry->inheritance = inheritance;
16264 new_entry->protection = cur_protection;
16265 new_entry->max_protection = max_protection;
16266 new_entry->behavior = behavior;
16267 new_entry->wired_count = wired_count;
16268 new_entry->user_wired_count = 0;
16269 if (is_submap) {
16270 /*
16271 * submap: "use_pmap" means "nested".
16272 * default: false.
16273 */
16274 new_entry->use_pmap = FALSE;
16275 } else {
16276 /*
16277 * object: "use_pmap" means "use pmap accounting" for footprint.
16278 * default: true.
16279 */
16280 new_entry->use_pmap = TRUE;
16281 }
16282 VME_ALIAS_SET(new_entry, alias);
16283 new_entry->zero_wired_pages = FALSE;
16284 new_entry->no_cache = no_cache;
16285 new_entry->permanent = permanent;
16286 if (superpage_size) {
16287 new_entry->superpage_size = TRUE;
16288 } else {
16289 new_entry->superpage_size = FALSE;
16290 }
16291 if (used_for_jit) {
16292 if (!(map->jit_entry_exists) ||
16293 VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
16294 new_entry->used_for_jit = TRUE;
16295 map->jit_entry_exists = TRUE;
16296 }
16297 } else {
16298 new_entry->used_for_jit = FALSE;
16299 }
16300 if (translated_allow_execute) {
16301 new_entry->translated_allow_execute = TRUE;
16302 } else {
16303 new_entry->translated_allow_execute = FALSE;
16304 }
16305 new_entry->pmap_cs_associated = FALSE;
16306 new_entry->iokit_acct = FALSE;
16307 new_entry->vme_resilient_codesign = FALSE;
16308 new_entry->vme_resilient_media = FALSE;
16309 new_entry->vme_atomic = FALSE;
16310 new_entry->vme_no_copy_on_read = no_copy_on_read;
16311
16312 /*
16313 * Insert the new entry into the list.
16314 */
16315
16316 vm_map_store_entry_link(map, insp_entry, new_entry,
16317 VM_MAP_KERNEL_FLAGS_NONE);
16318 map->size += end - start;
16319
16320 /*
16321 * Update the free space hint and the lookup hint.
16322 */
16323
16324 SAVE_HINT_MAP_WRITE(map, new_entry);
16325 return new_entry;
16326 }
16327
16328 int vm_remap_old_path = 0;
16329 int vm_remap_new_path = 0;
16330 /*
16331 * Routine: vm_map_remap_extract
16332 *
16333 * Description: This routine returns a vm_entry list from a map.
16334 */
16335 static kern_return_t
16336 vm_map_remap_extract(
16337 vm_map_t map,
16338 vm_map_offset_t addr,
16339 vm_map_size_t size,
16340 vm_prot_t required_protection,
16341 boolean_t copy,
16342 struct vm_map_header *map_header,
16343 vm_prot_t *cur_protection,
16344 vm_prot_t *max_protection,
16345 /* What, no behavior? */
16346 vm_inherit_t inheritance,
16347 vm_map_kernel_flags_t vmk_flags)
16348 {
16349 kern_return_t result;
16350 vm_map_size_t mapped_size;
16351 vm_map_size_t tmp_size;
16352 vm_map_entry_t src_entry; /* result of last map lookup */
16353 vm_map_entry_t new_entry;
16354 vm_object_offset_t offset;
16355 vm_map_offset_t map_address;
16356 vm_map_offset_t src_start; /* start of entry to map */
16357 vm_map_offset_t src_end; /* end of region to be mapped */
16358 vm_object_t object;
16359 vm_map_version_t version;
16360 boolean_t src_needs_copy;
16361 boolean_t new_entry_needs_copy;
16362 vm_map_entry_t saved_src_entry;
16363 boolean_t src_entry_was_wired;
16364 vm_prot_t max_prot_for_prot_copy;
16365 vm_map_offset_t effective_page_mask;
16366 boolean_t pageable, same_map;
16367
16368 pageable = vmk_flags.vmkf_copy_pageable;
16369 same_map = vmk_flags.vmkf_copy_same_map;
16370
16371 effective_page_mask = MIN(PAGE_MASK, VM_MAP_PAGE_MASK(map));
16372
16373 assert(map != VM_MAP_NULL);
16374 assert(size != 0);
16375 assert(size == vm_map_round_page(size, effective_page_mask));
16376 assert(inheritance == VM_INHERIT_NONE ||
16377 inheritance == VM_INHERIT_COPY ||
16378 inheritance == VM_INHERIT_SHARE);
16379 assert(!(required_protection & ~VM_PROT_ALL));
16380
16381 /*
16382 * Compute start and end of region.
16383 */
16384 src_start = vm_map_trunc_page(addr, effective_page_mask);
16385 src_end = vm_map_round_page(src_start + size, effective_page_mask);
16386
16387 /*
16388 * Initialize map_header.
16389 */
16390 map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16391 map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16392 map_header->nentries = 0;
16393 map_header->entries_pageable = pageable;
16394 // map_header->page_shift = MIN(VM_MAP_PAGE_SHIFT(map), PAGE_SHIFT);
16395 map_header->page_shift = VM_MAP_PAGE_SHIFT(map);
16396 map_header->rb_head_store.rbh_root = (void *)(int)SKIP_RB_TREE;
16397
16398 vm_map_store_init( map_header );
16399
16400 if (copy && vmk_flags.vmkf_remap_prot_copy) {
16401 max_prot_for_prot_copy = *max_protection & VM_PROT_ALL;
16402 } else {
16403 max_prot_for_prot_copy = VM_PROT_NONE;
16404 }
16405 *cur_protection = VM_PROT_ALL;
16406 *max_protection = VM_PROT_ALL;
16407
16408 map_address = 0;
16409 mapped_size = 0;
16410 result = KERN_SUCCESS;
16411
16412 /*
16413 * The specified source virtual space might correspond to
16414 * multiple map entries, need to loop on them.
16415 */
16416 vm_map_lock(map);
16417 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16418 /*
16419 * This address space uses sub-pages so the range might
16420 * not be re-mappable in an address space with larger
16421 * pages. Re-assemble any broken-up VM map entries to
16422 * improve our chances of making it work.
16423 */
16424 vm_map_simplify_range(map, src_start, src_end);
16425 }
16426 while (mapped_size != size) {
16427 vm_map_size_t entry_size;
16428
16429 /*
16430 * Find the beginning of the region.
16431 */
16432 if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
16433 result = KERN_INVALID_ADDRESS;
16434 break;
16435 }
16436
16437 if (src_start < src_entry->vme_start ||
16438 (mapped_size && src_start != src_entry->vme_start)) {
16439 result = KERN_INVALID_ADDRESS;
16440 break;
16441 }
16442
16443 tmp_size = size - mapped_size;
16444 if (src_end > src_entry->vme_end) {
16445 tmp_size -= (src_end - src_entry->vme_end);
16446 }
16447
16448 entry_size = (vm_map_size_t)(src_entry->vme_end -
16449 src_entry->vme_start);
16450
16451 if (src_entry->is_sub_map &&
16452 vmk_flags.vmkf_copy_single_object) {
16453 vm_map_t submap;
16454 vm_map_offset_t submap_start;
16455 vm_map_size_t submap_size;
16456
16457 /*
16458 * No check for "required_protection" on "src_entry"
16459 * because the protections that matter are the ones
16460 * on the submap's VM map entry, which will be checked
16461 * during the call to vm_map_remap_extract() below.
16462 */
16463 submap_size = src_entry->vme_end - src_start;
16464 if (submap_size > size) {
16465 submap_size = size;
16466 }
16467 submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16468 submap = VME_SUBMAP(src_entry);
16469 vm_map_reference(submap);
16470 vm_map_unlock(map);
16471 src_entry = NULL;
16472 result = vm_map_remap_extract(submap,
16473 submap_start,
16474 submap_size,
16475 required_protection,
16476 copy,
16477 map_header,
16478 cur_protection,
16479 max_protection,
16480 inheritance,
16481 vmk_flags);
16482 vm_map_deallocate(submap);
16483 return result;
16484 }
16485
16486 if ((src_entry->protection & required_protection)
16487 != required_protection) {
16488 if (vmk_flags.vmkf_copy_single_object &&
16489 mapped_size != 0) {
16490 /*
16491 * Single object extraction.
16492 * We can't extract more with the required
16493 * protection but we've extracted some, so
16494 * stop there and declare success.
16495 * The caller should check the size of
16496 * the copy entry we've extracted.
16497 */
16498 result = KERN_SUCCESS;
16499 } else {
16500 /*
16501 * VM range extraction.
16502 * Required proctection is not available
16503 * for this part of the range: fail.
16504 */
16505 result = KERN_PROTECTION_FAILURE;
16506 }
16507 break;
16508 }
16509
16510 if (src_entry->is_sub_map &&
16511 VM_MAP_PAGE_SHIFT(VME_SUBMAP(src_entry)) < PAGE_SHIFT) {
16512 vm_map_t submap;
16513 vm_map_offset_t submap_start;
16514 vm_map_size_t submap_size;
16515 vm_map_copy_t submap_copy;
16516 vm_prot_t submap_curprot, submap_maxprot;
16517
16518 vm_remap_new_path++;
16519
16520 /*
16521 * No check for "required_protection" on "src_entry"
16522 * because the protections that matter are the ones
16523 * on the submap's VM map entry, which will be checked
16524 * during the call to vm_map_copy_extract() below.
16525 */
16526 object = VM_OBJECT_NULL;
16527 submap_copy = VM_MAP_COPY_NULL;
16528
16529 /* find equivalent range in the submap */
16530 submap = VME_SUBMAP(src_entry);
16531 submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16532 submap_size = tmp_size;
16533 /* extra ref to keep submap alive */
16534 vm_map_reference(submap);
16535
16536 DTRACE_VM6(remap_submap_recurse,
16537 vm_map_t, map,
16538 vm_map_offset_t, addr,
16539 vm_map_size_t, size,
16540 boolean_t, copy,
16541 vm_map_offset_t, submap_start,
16542 vm_map_size_t, submap_size);
16543
16544 /*
16545 * The map can be safely unlocked since we
16546 * already hold a reference on the submap.
16547 *
16548 * No timestamp since we don't care if the map
16549 * gets modified while we're down in the submap.
16550 * We'll resume the extraction at src_start + tmp_size
16551 * anyway.
16552 */
16553 vm_map_unlock(map);
16554 src_entry = NULL; /* not valid once map is unlocked */
16555
16556 result = vm_map_copy_extract(submap,
16557 submap_start,
16558 submap_size,
16559 required_protection,
16560 copy,
16561 &submap_copy,
16562 &submap_curprot,
16563 &submap_maxprot,
16564 inheritance,
16565 vmk_flags);
16566
16567 /* release extra ref on submap */
16568 vm_map_deallocate(submap);
16569 submap = VM_MAP_NULL;
16570
16571 if (result != KERN_SUCCESS) {
16572 vm_map_lock(map);
16573 break;
16574 }
16575
16576 /* transfer submap_copy entries to map_header */
16577 while (vm_map_copy_first_entry(submap_copy) !=
16578 vm_map_copy_to_entry(submap_copy)) {
16579 vm_map_entry_t copy_entry;
16580 vm_map_size_t copy_entry_size;
16581
16582 copy_entry = vm_map_copy_first_entry(submap_copy);
16583 assert(!copy_entry->is_sub_map);
16584 vm_map_copy_entry_unlink(submap_copy, copy_entry);
16585 copy_entry_size = copy_entry->vme_end - copy_entry->vme_start;
16586 copy_entry->vme_start = map_address;
16587 copy_entry->vme_end = map_address + copy_entry_size;
16588 map_address += copy_entry_size;
16589 mapped_size += copy_entry_size;
16590 src_start += copy_entry_size;
16591 assert(src_start <= src_end);
16592 _vm_map_store_entry_link(map_header,
16593 map_header->links.prev,
16594 copy_entry);
16595 }
16596 /* done with submap_copy */
16597 vm_map_copy_discard(submap_copy);
16598
16599 *cur_protection &= submap_curprot;
16600 *max_protection &= submap_maxprot;
16601
16602 /* re-acquire the map lock and continue to next entry */
16603 vm_map_lock(map);
16604 continue;
16605 } else if (src_entry->is_sub_map) {
16606 vm_remap_old_path++;
16607 DTRACE_VM4(remap_submap,
16608 vm_map_t, map,
16609 vm_map_offset_t, addr,
16610 vm_map_size_t, size,
16611 boolean_t, copy);
16612
16613 vm_map_reference(VME_SUBMAP(src_entry));
16614 object = VM_OBJECT_NULL;
16615 } else {
16616 object = VME_OBJECT(src_entry);
16617 if (src_entry->iokit_acct) {
16618 /*
16619 * This entry uses "IOKit accounting".
16620 */
16621 } else if (object != VM_OBJECT_NULL &&
16622 (object->purgable != VM_PURGABLE_DENY ||
16623 object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
16624 /*
16625 * Purgeable objects have their own accounting:
16626 * no pmap accounting for them.
16627 */
16628 assertf(!src_entry->use_pmap,
16629 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16630 map,
16631 src_entry,
16632 (uint64_t)src_entry->vme_start,
16633 (uint64_t)src_entry->vme_end,
16634 src_entry->protection,
16635 src_entry->max_protection,
16636 VME_ALIAS(src_entry));
16637 } else {
16638 /*
16639 * Not IOKit or purgeable:
16640 * must be accounted by pmap stats.
16641 */
16642 assertf(src_entry->use_pmap,
16643 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16644 map,
16645 src_entry,
16646 (uint64_t)src_entry->vme_start,
16647 (uint64_t)src_entry->vme_end,
16648 src_entry->protection,
16649 src_entry->max_protection,
16650 VME_ALIAS(src_entry));
16651 }
16652
16653 if (object == VM_OBJECT_NULL) {
16654 assert(!src_entry->needs_copy);
16655 object = vm_object_allocate(entry_size);
16656 VME_OFFSET_SET(src_entry, 0);
16657 VME_OBJECT_SET(src_entry, object);
16658 assert(src_entry->use_pmap);
16659 } else if (src_entry->wired_count ||
16660 object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
16661 /*
16662 * A wired memory region should not have
16663 * any pending copy-on-write and needs to
16664 * keep pointing at the VM object that
16665 * contains the wired pages.
16666 * If we're sharing this memory (copy=false),
16667 * we'll share this VM object.
16668 * If we're copying this memory (copy=true),
16669 * we'll call vm_object_copy_slowly() below
16670 * and use the new VM object for the remapping.
16671 *
16672 * Or, we are already using an asymmetric
16673 * copy, and therefore we already have
16674 * the right object.
16675 */
16676 assert(!src_entry->needs_copy);
16677 } else if (src_entry->needs_copy || object->shadowed ||
16678 (object->internal && !object->true_share &&
16679 !src_entry->is_shared &&
16680 object->vo_size > entry_size)) {
16681 VME_OBJECT_SHADOW(src_entry, entry_size);
16682 assert(src_entry->use_pmap);
16683
16684 if (!src_entry->needs_copy &&
16685 (src_entry->protection & VM_PROT_WRITE)) {
16686 vm_prot_t prot;
16687
16688 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
16689
16690 prot = src_entry->protection & ~VM_PROT_WRITE;
16691
16692 if (override_nx(map,
16693 VME_ALIAS(src_entry))
16694 && prot) {
16695 prot |= VM_PROT_EXECUTE;
16696 }
16697
16698 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
16699
16700 if (map->mapped_in_other_pmaps) {
16701 vm_object_pmap_protect(
16702 VME_OBJECT(src_entry),
16703 VME_OFFSET(src_entry),
16704 entry_size,
16705 PMAP_NULL,
16706 PAGE_SIZE,
16707 src_entry->vme_start,
16708 prot);
16709 #if MACH_ASSERT
16710 } else if (__improbable(map->pmap == PMAP_NULL)) {
16711 extern boolean_t vm_tests_in_progress;
16712 assert(vm_tests_in_progress);
16713 /*
16714 * Some VM tests (in vm_tests.c)
16715 * sometimes want to use a VM
16716 * map without a pmap.
16717 * Otherwise, this should never
16718 * happen.
16719 */
16720 #endif /* MACH_ASSERT */
16721 } else {
16722 pmap_protect(vm_map_pmap(map),
16723 src_entry->vme_start,
16724 src_entry->vme_end,
16725 prot);
16726 }
16727 }
16728
16729 object = VME_OBJECT(src_entry);
16730 src_entry->needs_copy = FALSE;
16731 }
16732
16733
16734 vm_object_lock(object);
16735 vm_object_reference_locked(object); /* object ref. for new entry */
16736 assert(!src_entry->needs_copy);
16737 if (object->copy_strategy ==
16738 MEMORY_OBJECT_COPY_SYMMETRIC) {
16739 /*
16740 * If we want to share this object (copy==0),
16741 * it needs to be COPY_DELAY.
16742 * If we want to copy this object (copy==1),
16743 * we can't just set "needs_copy" on our side
16744 * and expect the other side to do the same
16745 * (symmetrically), so we can't let the object
16746 * stay COPY_SYMMETRIC.
16747 * So we always switch from COPY_SYMMETRIC to
16748 * COPY_DELAY.
16749 */
16750 object->copy_strategy =
16751 MEMORY_OBJECT_COPY_DELAY;
16752 }
16753 vm_object_unlock(object);
16754 }
16755
16756 offset = (VME_OFFSET(src_entry) +
16757 (src_start - src_entry->vme_start));
16758
16759 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
16760 vm_map_entry_copy(map, new_entry, src_entry);
16761 if (new_entry->is_sub_map) {
16762 /* clr address space specifics */
16763 new_entry->use_pmap = FALSE;
16764 } else if (copy) {
16765 /*
16766 * We're dealing with a copy-on-write operation,
16767 * so the resulting mapping should not inherit the
16768 * original mapping's accounting settings.
16769 * "use_pmap" should be reset to its default (TRUE)
16770 * so that the new mapping gets accounted for in
16771 * the task's memory footprint.
16772 */
16773 new_entry->use_pmap = TRUE;
16774 }
16775 /* "iokit_acct" was cleared in vm_map_entry_copy() */
16776 assert(!new_entry->iokit_acct);
16777
16778 new_entry->map_aligned = FALSE;
16779
16780 new_entry->vme_start = map_address;
16781 new_entry->vme_end = map_address + tmp_size;
16782 assert(new_entry->vme_start < new_entry->vme_end);
16783 if (copy && vmk_flags.vmkf_remap_prot_copy) {
16784 /*
16785 * Remapping for vm_map_protect(VM_PROT_COPY)
16786 * to convert a read-only mapping into a
16787 * copy-on-write version of itself but
16788 * with write access:
16789 * keep the original inheritance and add
16790 * VM_PROT_WRITE to the max protection.
16791 */
16792 new_entry->inheritance = src_entry->inheritance;
16793 new_entry->protection &= max_prot_for_prot_copy;
16794 new_entry->max_protection |= VM_PROT_WRITE;
16795 } else {
16796 new_entry->inheritance = inheritance;
16797 }
16798 VME_OFFSET_SET(new_entry, offset);
16799
16800 /*
16801 * The new region has to be copied now if required.
16802 */
16803 RestartCopy:
16804 if (!copy) {
16805 if (src_entry->used_for_jit == TRUE) {
16806 if (same_map) {
16807 #if __APRR_SUPPORTED__
16808 /*
16809 * Disallow re-mapping of any JIT regions on APRR devices.
16810 */
16811 result = KERN_PROTECTION_FAILURE;
16812 break;
16813 #endif /* __APRR_SUPPORTED__*/
16814 } else if (!VM_MAP_POLICY_ALLOW_JIT_SHARING(map)) {
16815 /*
16816 * Cannot allow an entry describing a JIT
16817 * region to be shared across address spaces.
16818 */
16819 result = KERN_INVALID_ARGUMENT;
16820 break;
16821 }
16822 }
16823
16824 src_entry->is_shared = TRUE;
16825 new_entry->is_shared = TRUE;
16826 if (!(new_entry->is_sub_map)) {
16827 new_entry->needs_copy = FALSE;
16828 }
16829 } else if (src_entry->is_sub_map) {
16830 /* make this a COW sub_map if not already */
16831 assert(new_entry->wired_count == 0);
16832 new_entry->needs_copy = TRUE;
16833 object = VM_OBJECT_NULL;
16834 } else if (src_entry->wired_count == 0 &&
16835 !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) &&
16836 vm_object_copy_quickly(VME_OBJECT_PTR(new_entry),
16837 VME_OFFSET(new_entry),
16838 (new_entry->vme_end -
16839 new_entry->vme_start),
16840 &src_needs_copy,
16841 &new_entry_needs_copy)) {
16842 new_entry->needs_copy = new_entry_needs_copy;
16843 new_entry->is_shared = FALSE;
16844 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
16845
16846 /*
16847 * Handle copy_on_write semantics.
16848 */
16849 if (src_needs_copy && !src_entry->needs_copy) {
16850 vm_prot_t prot;
16851
16852 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
16853
16854 prot = src_entry->protection & ~VM_PROT_WRITE;
16855
16856 if (override_nx(map,
16857 VME_ALIAS(src_entry))
16858 && prot) {
16859 prot |= VM_PROT_EXECUTE;
16860 }
16861
16862 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
16863
16864 vm_object_pmap_protect(object,
16865 offset,
16866 entry_size,
16867 ((src_entry->is_shared
16868 || map->mapped_in_other_pmaps) ?
16869 PMAP_NULL : map->pmap),
16870 VM_MAP_PAGE_SIZE(map),
16871 src_entry->vme_start,
16872 prot);
16873
16874 assert(src_entry->wired_count == 0);
16875 src_entry->needs_copy = TRUE;
16876 }
16877 /*
16878 * Throw away the old object reference of the new entry.
16879 */
16880 vm_object_deallocate(object);
16881 } else {
16882 new_entry->is_shared = FALSE;
16883 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
16884
16885 src_entry_was_wired = (src_entry->wired_count > 0);
16886 saved_src_entry = src_entry;
16887 src_entry = VM_MAP_ENTRY_NULL;
16888
16889 /*
16890 * The map can be safely unlocked since we
16891 * already hold a reference on the object.
16892 *
16893 * Record the timestamp of the map for later
16894 * verification, and unlock the map.
16895 */
16896 version.main_timestamp = map->timestamp;
16897 vm_map_unlock(map); /* Increments timestamp once! */
16898
16899 /*
16900 * Perform the copy.
16901 */
16902 if (src_entry_was_wired > 0 ||
16903 (debug4k_no_cow_copyin &&
16904 VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT)) {
16905 vm_object_lock(object);
16906 result = vm_object_copy_slowly(
16907 object,
16908 offset,
16909 (new_entry->vme_end -
16910 new_entry->vme_start),
16911 THREAD_UNINT,
16912 VME_OBJECT_PTR(new_entry));
16913
16914 VME_OFFSET_SET(new_entry, offset - vm_object_trunc_page(offset));
16915 new_entry->needs_copy = FALSE;
16916 } else {
16917 vm_object_offset_t new_offset;
16918
16919 new_offset = VME_OFFSET(new_entry);
16920 result = vm_object_copy_strategically(
16921 object,
16922 offset,
16923 (new_entry->vme_end -
16924 new_entry->vme_start),
16925 VME_OBJECT_PTR(new_entry),
16926 &new_offset,
16927 &new_entry_needs_copy);
16928 if (new_offset != VME_OFFSET(new_entry)) {
16929 VME_OFFSET_SET(new_entry, new_offset);
16930 }
16931
16932 new_entry->needs_copy = new_entry_needs_copy;
16933 }
16934
16935 /*
16936 * Throw away the old object reference of the new entry.
16937 */
16938 vm_object_deallocate(object);
16939
16940 if (result != KERN_SUCCESS &&
16941 result != KERN_MEMORY_RESTART_COPY) {
16942 _vm_map_entry_dispose(map_header, new_entry);
16943 vm_map_lock(map);
16944 break;
16945 }
16946
16947 /*
16948 * Verify that the map has not substantially
16949 * changed while the copy was being made.
16950 */
16951
16952 vm_map_lock(map);
16953 if (version.main_timestamp + 1 != map->timestamp) {
16954 /*
16955 * Simple version comparison failed.
16956 *
16957 * Retry the lookup and verify that the
16958 * same object/offset are still present.
16959 */
16960 saved_src_entry = VM_MAP_ENTRY_NULL;
16961 vm_object_deallocate(VME_OBJECT(new_entry));
16962 _vm_map_entry_dispose(map_header, new_entry);
16963 if (result == KERN_MEMORY_RESTART_COPY) {
16964 result = KERN_SUCCESS;
16965 }
16966 continue;
16967 }
16968 /* map hasn't changed: src_entry is still valid */
16969 src_entry = saved_src_entry;
16970 saved_src_entry = VM_MAP_ENTRY_NULL;
16971
16972 if (result == KERN_MEMORY_RESTART_COPY) {
16973 vm_object_reference(object);
16974 goto RestartCopy;
16975 }
16976 }
16977
16978 _vm_map_store_entry_link(map_header,
16979 map_header->links.prev, new_entry);
16980
16981 /*Protections for submap mapping are irrelevant here*/
16982 if (!src_entry->is_sub_map) {
16983 *cur_protection &= src_entry->protection;
16984 *max_protection &= src_entry->max_protection;
16985 }
16986
16987 map_address += tmp_size;
16988 mapped_size += tmp_size;
16989 src_start += tmp_size;
16990
16991 if (vmk_flags.vmkf_copy_single_object) {
16992 if (mapped_size != size) {
16993 DEBUG4K_SHARE("map %p addr 0x%llx size 0x%llx clipped copy at mapped_size 0x%llx\n", map, (uint64_t)addr, (uint64_t)size, (uint64_t)mapped_size);
16994 if (src_entry->vme_next != vm_map_to_entry(map) &&
16995 VME_OBJECT(src_entry->vme_next) == VME_OBJECT(src_entry)) {
16996 /* XXX TODO4K */
16997 DEBUG4K_ERROR("could have extended copy to next entry...\n");
16998 }
16999 }
17000 break;
17001 }
17002 } /* end while */
17003
17004 vm_map_unlock(map);
17005 if (result != KERN_SUCCESS) {
17006 /*
17007 * Free all allocated elements.
17008 */
17009 for (src_entry = map_header->links.next;
17010 src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
17011 src_entry = new_entry) {
17012 new_entry = src_entry->vme_next;
17013 _vm_map_store_entry_unlink(map_header, src_entry);
17014 if (src_entry->is_sub_map) {
17015 vm_map_deallocate(VME_SUBMAP(src_entry));
17016 } else {
17017 vm_object_deallocate(VME_OBJECT(src_entry));
17018 }
17019 _vm_map_entry_dispose(map_header, src_entry);
17020 }
17021 }
17022 return result;
17023 }
17024
17025 bool
17026 vm_map_is_exotic(
17027 vm_map_t map)
17028 {
17029 return VM_MAP_IS_EXOTIC(map);
17030 }
17031
17032 bool
17033 vm_map_is_alien(
17034 vm_map_t map)
17035 {
17036 return VM_MAP_IS_ALIEN(map);
17037 }
17038
17039 #if XNU_TARGET_OS_OSX
17040 void
17041 vm_map_mark_alien(
17042 vm_map_t map)
17043 {
17044 vm_map_lock(map);
17045 map->is_alien = true;
17046 vm_map_unlock(map);
17047 }
17048 #endif /* XNU_TARGET_OS_OSX */
17049
17050 void vm_map_copy_to_physcopy(vm_map_copy_t copy_map, vm_map_t target_map);
17051 void
17052 vm_map_copy_to_physcopy(
17053 vm_map_copy_t copy_map,
17054 vm_map_t target_map)
17055 {
17056 vm_map_size_t size;
17057 vm_map_entry_t entry;
17058 vm_map_entry_t new_entry;
17059 vm_object_t new_object;
17060 unsigned int pmap_flags;
17061 pmap_t new_pmap;
17062 vm_map_t new_map;
17063 vm_map_address_t src_start, src_end, src_cur;
17064 vm_map_address_t dst_start, dst_end, dst_cur;
17065 kern_return_t kr;
17066 void *kbuf;
17067
17068 /*
17069 * Perform the equivalent of vm_allocate() and memcpy().
17070 * Replace the mappings in "copy_map" with the newly allocated mapping.
17071 */
17072 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) BEFORE\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17073
17074 assert(copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_MASK(target_map));
17075
17076 /* allocate new VM object */
17077 size = VM_MAP_ROUND_PAGE(copy_map->size, PAGE_MASK);
17078 new_object = vm_object_allocate(size);
17079 assert(new_object);
17080
17081 /* allocate new VM map entry */
17082 new_entry = vm_map_copy_entry_create(copy_map, FALSE);
17083 assert(new_entry);
17084
17085 /* finish initializing new VM map entry */
17086 new_entry->protection = VM_PROT_DEFAULT;
17087 new_entry->max_protection = VM_PROT_DEFAULT;
17088 new_entry->use_pmap = TRUE;
17089
17090 /* make new VM map entry point to new VM object */
17091 new_entry->vme_start = 0;
17092 new_entry->vme_end = size;
17093 VME_OBJECT_SET(new_entry, new_object);
17094 VME_OFFSET_SET(new_entry, 0);
17095
17096 /* create a new pmap to map "copy_map" */
17097 pmap_flags = 0;
17098 assert(copy_map->cpy_hdr.page_shift == FOURK_PAGE_SHIFT);
17099 #if PMAP_CREATE_FORCE_4K_PAGES
17100 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
17101 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
17102 pmap_flags |= PMAP_CREATE_64BIT;
17103 new_pmap = pmap_create_options(NULL, (vm_map_size_t)0, pmap_flags);
17104 assert(new_pmap);
17105
17106 /* create a new pageable VM map to map "copy_map" */
17107 new_map = vm_map_create(new_pmap, 0, MACH_VM_MAX_ADDRESS, TRUE);
17108 assert(new_map);
17109 vm_map_set_page_shift(new_map, copy_map->cpy_hdr.page_shift);
17110
17111 /* map "copy_map" in the new VM map */
17112 src_start = 0;
17113 kr = vm_map_copyout_internal(
17114 new_map,
17115 &src_start,
17116 copy_map,
17117 copy_map->size,
17118 FALSE, /* consume_on_success */
17119 VM_PROT_DEFAULT,
17120 VM_PROT_DEFAULT,
17121 VM_INHERIT_DEFAULT);
17122 assert(kr == KERN_SUCCESS);
17123 src_end = src_start + copy_map->size;
17124
17125 /* map "new_object" in the new VM map */
17126 vm_object_reference(new_object);
17127 dst_start = 0;
17128 kr = vm_map_enter(new_map,
17129 &dst_start,
17130 size,
17131 0, /* mask */
17132 VM_FLAGS_ANYWHERE,
17133 VM_MAP_KERNEL_FLAGS_NONE,
17134 VM_KERN_MEMORY_OSFMK,
17135 new_object,
17136 0, /* offset */
17137 FALSE, /* needs copy */
17138 VM_PROT_DEFAULT,
17139 VM_PROT_DEFAULT,
17140 VM_INHERIT_DEFAULT);
17141 assert(kr == KERN_SUCCESS);
17142 dst_end = dst_start + size;
17143
17144 /* get a kernel buffer */
17145 kbuf = kheap_alloc(KHEAP_TEMP, PAGE_SIZE, Z_WAITOK);
17146 assert(kbuf);
17147
17148 /* physically copy "copy_map" mappings to new VM object */
17149 for (src_cur = src_start, dst_cur = dst_start;
17150 src_cur < src_end;
17151 src_cur += PAGE_SIZE, dst_cur += PAGE_SIZE) {
17152 vm_size_t bytes;
17153
17154 bytes = PAGE_SIZE;
17155 if (src_cur + PAGE_SIZE > src_end) {
17156 /* partial copy for last page */
17157 bytes = src_end - src_cur;
17158 assert(bytes > 0 && bytes < PAGE_SIZE);
17159 /* rest of dst page should be zero-filled */
17160 }
17161 /* get bytes from src mapping */
17162 kr = copyinmap(new_map, src_cur, kbuf, bytes);
17163 if (kr != KERN_SUCCESS) {
17164 DEBUG4K_COPY("copyinmap(%p, 0x%llx, %p, 0x%llx) kr 0x%x\n", new_map, (uint64_t)src_cur, kbuf, (uint64_t)bytes, kr);
17165 }
17166 /* put bytes in dst mapping */
17167 assert(dst_cur < dst_end);
17168 assert(dst_cur + bytes <= dst_end);
17169 kr = copyoutmap(new_map, kbuf, dst_cur, bytes);
17170 if (kr != KERN_SUCCESS) {
17171 DEBUG4K_COPY("copyoutmap(%p, %p, 0x%llx, 0x%llx) kr 0x%x\n", new_map, kbuf, (uint64_t)dst_cur, (uint64_t)bytes, kr);
17172 }
17173 }
17174
17175 /* free kernel buffer */
17176 kheap_free(KHEAP_TEMP, kbuf, PAGE_SIZE);
17177 kbuf = NULL;
17178
17179 /* destroy new map */
17180 vm_map_destroy(new_map, VM_MAP_REMOVE_NO_FLAGS);
17181 new_map = VM_MAP_NULL;
17182
17183 /* dispose of the old map entries in "copy_map" */
17184 while (vm_map_copy_first_entry(copy_map) !=
17185 vm_map_copy_to_entry(copy_map)) {
17186 entry = vm_map_copy_first_entry(copy_map);
17187 vm_map_copy_entry_unlink(copy_map, entry);
17188 if (entry->is_sub_map) {
17189 vm_map_deallocate(VME_SUBMAP(entry));
17190 } else {
17191 vm_object_deallocate(VME_OBJECT(entry));
17192 }
17193 vm_map_copy_entry_dispose(copy_map, entry);
17194 }
17195
17196 /* change "copy_map"'s page_size to match "target_map" */
17197 copy_map->cpy_hdr.page_shift = VM_MAP_PAGE_SHIFT(target_map);
17198 copy_map->offset = 0;
17199 copy_map->size = size;
17200
17201 /* insert new map entry in "copy_map" */
17202 assert(vm_map_copy_last_entry(copy_map) == vm_map_copy_to_entry(copy_map));
17203 vm_map_copy_entry_link(copy_map, vm_map_copy_last_entry(copy_map), new_entry);
17204
17205 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) AFTER\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17206 }
17207
17208 void
17209 vm_map_copy_adjust_get_target_copy_map(
17210 vm_map_copy_t copy_map,
17211 vm_map_copy_t *target_copy_map_p);
17212 void
17213 vm_map_copy_adjust_get_target_copy_map(
17214 vm_map_copy_t copy_map,
17215 vm_map_copy_t *target_copy_map_p)
17216 {
17217 vm_map_copy_t target_copy_map;
17218 vm_map_entry_t entry, target_entry;
17219
17220 if (*target_copy_map_p != VM_MAP_COPY_NULL) {
17221 /* the caller already has a "target_copy_map": use it */
17222 return;
17223 }
17224
17225 /* the caller wants us to create a new copy of "copy_map" */
17226 target_copy_map = vm_map_copy_allocate();
17227 target_copy_map->type = copy_map->type;
17228 assert(target_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17229 target_copy_map->offset = copy_map->offset;
17230 target_copy_map->size = copy_map->size;
17231 target_copy_map->cpy_hdr.page_shift = copy_map->cpy_hdr.page_shift;
17232 vm_map_store_init(&target_copy_map->cpy_hdr);
17233 for (entry = vm_map_copy_first_entry(copy_map);
17234 entry != vm_map_copy_to_entry(copy_map);
17235 entry = entry->vme_next) {
17236 target_entry = vm_map_copy_entry_create(target_copy_map, FALSE);
17237 vm_map_entry_copy_full(target_entry, entry);
17238 if (target_entry->is_sub_map) {
17239 vm_map_reference(VME_SUBMAP(target_entry));
17240 } else {
17241 vm_object_reference(VME_OBJECT(target_entry));
17242 }
17243 vm_map_copy_entry_link(
17244 target_copy_map,
17245 vm_map_copy_last_entry(target_copy_map),
17246 target_entry);
17247 }
17248 entry = VM_MAP_ENTRY_NULL;
17249 *target_copy_map_p = target_copy_map;
17250 }
17251
17252 void
17253 vm_map_copy_trim(
17254 vm_map_copy_t copy_map,
17255 int new_page_shift,
17256 vm_map_offset_t trim_start,
17257 vm_map_offset_t trim_end);
17258 void
17259 vm_map_copy_trim(
17260 vm_map_copy_t copy_map,
17261 int new_page_shift,
17262 vm_map_offset_t trim_start,
17263 vm_map_offset_t trim_end)
17264 {
17265 int copy_page_shift;
17266 vm_map_entry_t entry, next_entry;
17267
17268 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17269 assert(copy_map->cpy_hdr.nentries > 0);
17270
17271 trim_start += vm_map_copy_first_entry(copy_map)->vme_start;
17272 trim_end += vm_map_copy_first_entry(copy_map)->vme_start;
17273
17274 /* use the new page_shift to do the clipping */
17275 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17276 copy_map->cpy_hdr.page_shift = new_page_shift;
17277
17278 for (entry = vm_map_copy_first_entry(copy_map);
17279 entry != vm_map_copy_to_entry(copy_map);
17280 entry = next_entry) {
17281 next_entry = entry->vme_next;
17282 if (entry->vme_end <= trim_start) {
17283 /* entry fully before trim range: skip */
17284 continue;
17285 }
17286 if (entry->vme_start >= trim_end) {
17287 /* entry fully after trim range: done */
17288 break;
17289 }
17290 /* clip entry if needed */
17291 vm_map_copy_clip_start(copy_map, entry, trim_start);
17292 vm_map_copy_clip_end(copy_map, entry, trim_end);
17293 /* dispose of entry */
17294 copy_map->size -= entry->vme_end - entry->vme_start;
17295 vm_map_copy_entry_unlink(copy_map, entry);
17296 if (entry->is_sub_map) {
17297 vm_map_deallocate(VME_SUBMAP(entry));
17298 } else {
17299 vm_object_deallocate(VME_OBJECT(entry));
17300 }
17301 vm_map_copy_entry_dispose(copy_map, entry);
17302 entry = VM_MAP_ENTRY_NULL;
17303 }
17304
17305 /* restore copy_map's original page_shift */
17306 copy_map->cpy_hdr.page_shift = copy_page_shift;
17307 }
17308
17309 /*
17310 * Make any necessary adjustments to "copy_map" to allow it to be
17311 * mapped into "target_map".
17312 * If no changes were necessary, "target_copy_map" points to the
17313 * untouched "copy_map".
17314 * If changes are necessary, changes will be made to "target_copy_map".
17315 * If "target_copy_map" was NULL, we create a new "vm_map_copy_t" and
17316 * copy the original "copy_map" to it before applying the changes.
17317 * The caller should discard "target_copy_map" if it's not the same as
17318 * the original "copy_map".
17319 */
17320 /* TODO4K: also adjust to sub-range in the copy_map -> add start&end? */
17321 kern_return_t
17322 vm_map_copy_adjust_to_target(
17323 vm_map_copy_t src_copy_map,
17324 vm_map_offset_t offset,
17325 vm_map_size_t size,
17326 vm_map_t target_map,
17327 boolean_t copy,
17328 vm_map_copy_t *target_copy_map_p,
17329 vm_map_offset_t *overmap_start_p,
17330 vm_map_offset_t *overmap_end_p,
17331 vm_map_offset_t *trimmed_start_p)
17332 {
17333 vm_map_copy_t copy_map, target_copy_map;
17334 vm_map_size_t target_size;
17335 vm_map_size_t src_copy_map_size;
17336 vm_map_size_t overmap_start, overmap_end;
17337 int misalignments;
17338 vm_map_entry_t entry, target_entry;
17339 vm_map_offset_t addr_adjustment;
17340 vm_map_offset_t new_start, new_end;
17341 int copy_page_mask, target_page_mask;
17342 int copy_page_shift, target_page_shift;
17343 vm_map_offset_t trimmed_end;
17344
17345 /*
17346 * Assert that the vm_map_copy is coming from the right
17347 * zone and hasn't been forged
17348 */
17349 vm_map_copy_require(src_copy_map);
17350 assert(src_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17351
17352 /*
17353 * Start working with "src_copy_map" but we'll switch
17354 * to "target_copy_map" as soon as we start making adjustments.
17355 */
17356 copy_map = src_copy_map;
17357 src_copy_map_size = src_copy_map->size;
17358
17359 copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17360 copy_page_mask = VM_MAP_COPY_PAGE_MASK(copy_map);
17361 target_page_shift = VM_MAP_PAGE_SHIFT(target_map);
17362 target_page_mask = VM_MAP_PAGE_MASK(target_map);
17363
17364 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p...\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, *target_copy_map_p);
17365
17366 target_copy_map = *target_copy_map_p;
17367 if (target_copy_map != VM_MAP_COPY_NULL) {
17368 vm_map_copy_require(target_copy_map);
17369 }
17370
17371 if (offset + size > copy_map->size) {
17372 DEBUG4K_ERROR("copy_map %p (%d->%d) copy_map->size 0x%llx offset 0x%llx size 0x%llx KERN_INVALID_ARGUMENT\n", copy_map, copy_page_shift, target_page_shift, (uint64_t)copy_map->size, (uint64_t)offset, (uint64_t)size);
17373 return KERN_INVALID_ARGUMENT;
17374 }
17375
17376 /* trim the end */
17377 trimmed_end = 0;
17378 new_end = VM_MAP_ROUND_PAGE(offset + size, target_page_mask);
17379 if (new_end < copy_map->size) {
17380 trimmed_end = src_copy_map_size - new_end;
17381 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim end from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)new_end, (uint64_t)copy_map->size);
17382 /* get "target_copy_map" if needed and adjust it */
17383 vm_map_copy_adjust_get_target_copy_map(copy_map,
17384 &target_copy_map);
17385 copy_map = target_copy_map;
17386 vm_map_copy_trim(target_copy_map, target_page_shift,
17387 new_end, copy_map->size);
17388 }
17389
17390 /* trim the start */
17391 new_start = VM_MAP_TRUNC_PAGE(offset, target_page_mask);
17392 if (new_start != 0) {
17393 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim start from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)0, (uint64_t)new_start);
17394 /* get "target_copy_map" if needed and adjust it */
17395 vm_map_copy_adjust_get_target_copy_map(copy_map,
17396 &target_copy_map);
17397 copy_map = target_copy_map;
17398 vm_map_copy_trim(target_copy_map, target_page_shift,
17399 0, new_start);
17400 }
17401 *trimmed_start_p = new_start;
17402
17403 /* target_size starts with what's left after trimming */
17404 target_size = copy_map->size;
17405 assertf(target_size == src_copy_map_size - *trimmed_start_p - trimmed_end,
17406 "target_size 0x%llx src_copy_map_size 0x%llx trimmed_start 0x%llx trimmed_end 0x%llx\n",
17407 (uint64_t)target_size, (uint64_t)src_copy_map_size,
17408 (uint64_t)*trimmed_start_p, (uint64_t)trimmed_end);
17409
17410 /* check for misalignments but don't adjust yet */
17411 misalignments = 0;
17412 overmap_start = 0;
17413 overmap_end = 0;
17414 if (copy_page_shift < target_page_shift) {
17415 /*
17416 * Remapping from 4K to 16K: check the VM object alignments
17417 * throughout the range.
17418 * If the start and end of the range are mis-aligned, we can
17419 * over-map to re-align, and adjust the "overmap" start/end
17420 * and "target_size" of the range accordingly.
17421 * If there is any mis-alignment within the range:
17422 * if "copy":
17423 * we can do immediate-copy instead of copy-on-write,
17424 * else:
17425 * no way to remap and share; fail.
17426 */
17427 for (entry = vm_map_copy_first_entry(copy_map);
17428 entry != vm_map_copy_to_entry(copy_map);
17429 entry = entry->vme_next) {
17430 vm_object_offset_t object_offset_start, object_offset_end;
17431
17432 object_offset_start = VME_OFFSET(entry);
17433 object_offset_end = object_offset_start;
17434 object_offset_end += entry->vme_end - entry->vme_start;
17435 if (object_offset_start & target_page_mask) {
17436 if (entry == vm_map_copy_first_entry(copy_map) && !copy) {
17437 overmap_start++;
17438 } else {
17439 misalignments++;
17440 }
17441 }
17442 if (object_offset_end & target_page_mask) {
17443 if (entry->vme_next == vm_map_copy_to_entry(copy_map) && !copy) {
17444 overmap_end++;
17445 } else {
17446 misalignments++;
17447 }
17448 }
17449 }
17450 }
17451 entry = VM_MAP_ENTRY_NULL;
17452
17453 /* decide how to deal with misalignments */
17454 assert(overmap_start <= 1);
17455 assert(overmap_end <= 1);
17456 if (!overmap_start && !overmap_end && !misalignments) {
17457 /* copy_map is properly aligned for target_map ... */
17458 if (*trimmed_start_p) {
17459 /* ... but we trimmed it, so still need to adjust */
17460 } else {
17461 /* ... and we didn't trim anything: we're done */
17462 if (target_copy_map == VM_MAP_COPY_NULL) {
17463 target_copy_map = copy_map;
17464 }
17465 *target_copy_map_p = target_copy_map;
17466 *overmap_start_p = 0;
17467 *overmap_end_p = 0;
17468 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17469 return KERN_SUCCESS;
17470 }
17471 } else if (misalignments && !copy) {
17472 /* can't "share" if misaligned */
17473 DEBUG4K_ADJUST("unsupported sharing\n");
17474 #if MACH_ASSERT
17475 if (debug4k_panic_on_misaligned_sharing) {
17476 panic("DEBUG4k %s:%d unsupported sharing\n", __FUNCTION__, __LINE__);
17477 }
17478 #endif /* MACH_ASSERT */
17479 DEBUG4K_ADJUST("copy_map %p (%d) target_map %p (%d) copy %d target_copy_map %p -> KERN_NOT_SUPPORTED\n", copy_map, copy_page_shift, target_map, target_page_shift, copy, *target_copy_map_p);
17480 return KERN_NOT_SUPPORTED;
17481 } else {
17482 /* can't virtual-copy if misaligned (but can physical-copy) */
17483 DEBUG4K_ADJUST("mis-aligned copying\n");
17484 }
17485
17486 /* get a "target_copy_map" if needed and switch to it */
17487 vm_map_copy_adjust_get_target_copy_map(copy_map, &target_copy_map);
17488 copy_map = target_copy_map;
17489
17490 if (misalignments && copy) {
17491 vm_map_size_t target_copy_map_size;
17492
17493 /*
17494 * Can't do copy-on-write with misaligned mappings.
17495 * Replace the mappings with a physical copy of the original
17496 * mappings' contents.
17497 */
17498 target_copy_map_size = target_copy_map->size;
17499 vm_map_copy_to_physcopy(target_copy_map, target_map);
17500 *target_copy_map_p = target_copy_map;
17501 *overmap_start_p = 0;
17502 *overmap_end_p = target_copy_map->size - target_copy_map_size;
17503 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx)-> trimmed 0x%llx overmap start 0x%llx end 0x%llx PHYSCOPY\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17504 return KERN_SUCCESS;
17505 }
17506
17507 /* apply the adjustments */
17508 misalignments = 0;
17509 overmap_start = 0;
17510 overmap_end = 0;
17511 /* remove copy_map->offset, so that everything starts at offset 0 */
17512 addr_adjustment = copy_map->offset;
17513 /* also remove whatever we trimmed from the start */
17514 addr_adjustment += *trimmed_start_p;
17515 for (target_entry = vm_map_copy_first_entry(target_copy_map);
17516 target_entry != vm_map_copy_to_entry(target_copy_map);
17517 target_entry = target_entry->vme_next) {
17518 vm_object_offset_t object_offset_start, object_offset_end;
17519
17520 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx BEFORE\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17521 object_offset_start = VME_OFFSET(target_entry);
17522 if (object_offset_start & target_page_mask) {
17523 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at start\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17524 if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
17525 /*
17526 * start of 1st entry is mis-aligned:
17527 * re-adjust by over-mapping.
17528 */
17529 overmap_start = object_offset_start - trunc_page_mask_64(object_offset_start, target_page_mask);
17530 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_start 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_start);
17531 VME_OFFSET_SET(target_entry, VME_OFFSET(target_entry) - overmap_start);
17532 } else {
17533 misalignments++;
17534 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
17535 assert(copy);
17536 }
17537 }
17538
17539 if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
17540 target_size += overmap_start;
17541 } else {
17542 target_entry->vme_start += overmap_start;
17543 }
17544 target_entry->vme_end += overmap_start;
17545
17546 object_offset_end = VME_OFFSET(target_entry) + target_entry->vme_end - target_entry->vme_start;
17547 if (object_offset_end & target_page_mask) {
17548 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at end\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17549 if (target_entry->vme_next == vm_map_copy_to_entry(target_copy_map)) {
17550 /*
17551 * end of last entry is mis-aligned: re-adjust by over-mapping.
17552 */
17553 overmap_end = round_page_mask_64(object_offset_end, target_page_mask) - object_offset_end;
17554 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_end 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_end);
17555 target_entry->vme_end += overmap_end;
17556 target_size += overmap_end;
17557 } else {
17558 misalignments++;
17559 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
17560 assert(copy);
17561 }
17562 }
17563 target_entry->vme_start -= addr_adjustment;
17564 target_entry->vme_end -= addr_adjustment;
17565 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx AFTER\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17566 }
17567
17568 target_copy_map->size = target_size;
17569 target_copy_map->offset += overmap_start;
17570 target_copy_map->offset -= addr_adjustment;
17571 target_copy_map->cpy_hdr.page_shift = target_page_shift;
17572
17573 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->size, target_page_mask));
17574 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->offset, FOURK_PAGE_MASK));
17575 assert(overmap_start < VM_MAP_PAGE_SIZE(target_map));
17576 assert(overmap_end < VM_MAP_PAGE_SIZE(target_map));
17577
17578 *target_copy_map_p = target_copy_map;
17579 *overmap_start_p = overmap_start;
17580 *overmap_end_p = overmap_end;
17581
17582 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17583 return KERN_SUCCESS;
17584 }
17585
17586 kern_return_t
17587 vm_map_range_physical_size(
17588 vm_map_t map,
17589 vm_map_address_t start,
17590 mach_vm_size_t size,
17591 mach_vm_size_t * phys_size)
17592 {
17593 kern_return_t kr;
17594 vm_map_copy_t copy_map, target_copy_map;
17595 vm_map_offset_t adjusted_start, adjusted_end;
17596 vm_map_size_t adjusted_size;
17597 vm_prot_t cur_prot, max_prot;
17598 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
17599 vm_map_kernel_flags_t vmk_flags;
17600
17601 adjusted_start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(map));
17602 adjusted_end = vm_map_round_page(start + size, VM_MAP_PAGE_MASK(map));
17603 adjusted_size = adjusted_end - adjusted_start;
17604 *phys_size = adjusted_size;
17605 if (VM_MAP_PAGE_SIZE(map) == PAGE_SIZE) {
17606 return KERN_SUCCESS;
17607 }
17608 if (start == 0) {
17609 adjusted_start = vm_map_trunc_page(start, PAGE_MASK);
17610 adjusted_end = vm_map_round_page(start + size, PAGE_MASK);
17611 adjusted_size = adjusted_end - adjusted_start;
17612 *phys_size = adjusted_size;
17613 return KERN_SUCCESS;
17614 }
17615 if (adjusted_size == 0) {
17616 DEBUG4K_SHARE("map %p start 0x%llx size 0x%llx adjusted 0x%llx -> phys_size 0!\n", map, (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_size);
17617 *phys_size = 0;
17618 return KERN_SUCCESS;
17619 }
17620
17621 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
17622 vmk_flags.vmkf_copy_pageable = TRUE;
17623 vmk_flags.vmkf_copy_same_map = TRUE;
17624 assert(adjusted_size != 0);
17625 kr = vm_map_copy_extract(map, adjusted_start, adjusted_size,
17626 VM_PROT_NONE, /* required_protection: no check here */
17627 FALSE /* copy */,
17628 &copy_map,
17629 &cur_prot, &max_prot, VM_INHERIT_DEFAULT,
17630 vmk_flags);
17631 if (kr != KERN_SUCCESS) {
17632 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
17633 //assert(0);
17634 *phys_size = 0;
17635 return kr;
17636 }
17637 assert(copy_map != VM_MAP_COPY_NULL);
17638 target_copy_map = copy_map;
17639 DEBUG4K_ADJUST("adjusting...\n");
17640 kr = vm_map_copy_adjust_to_target(
17641 copy_map,
17642 start - adjusted_start, /* offset */
17643 size, /* size */
17644 kernel_map,
17645 FALSE, /* copy */
17646 &target_copy_map,
17647 &overmap_start,
17648 &overmap_end,
17649 &trimmed_start);
17650 if (kr == KERN_SUCCESS) {
17651 if (target_copy_map->size != *phys_size) {
17652 DEBUG4K_ADJUST("map %p (%d) start 0x%llx size 0x%llx adjusted_start 0x%llx adjusted_end 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx phys_size 0x%llx -> 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_start, (uint64_t)adjusted_end, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start, (uint64_t)*phys_size, (uint64_t)target_copy_map->size);
17653 }
17654 *phys_size = target_copy_map->size;
17655 } else {
17656 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
17657 //assert(0);
17658 *phys_size = 0;
17659 }
17660 vm_map_copy_discard(copy_map);
17661 copy_map = VM_MAP_COPY_NULL;
17662
17663 return kr;
17664 }
17665
17666
17667 kern_return_t
17668 memory_entry_check_for_adjustment(
17669 vm_map_t src_map,
17670 ipc_port_t port,
17671 vm_map_offset_t *overmap_start,
17672 vm_map_offset_t *overmap_end)
17673 {
17674 kern_return_t kr = KERN_SUCCESS;
17675 vm_map_copy_t copy_map = VM_MAP_COPY_NULL, target_copy_map = VM_MAP_COPY_NULL;
17676
17677 assert(port);
17678 assertf(ip_kotype(port) == IKOT_NAMED_ENTRY, "Port Type expected: %d...received:%d\n", IKOT_NAMED_ENTRY, ip_kotype(port));
17679
17680 vm_named_entry_t named_entry;
17681
17682 named_entry = (vm_named_entry_t) port->ip_kobject;
17683 named_entry_lock(named_entry);
17684 copy_map = named_entry->backing.copy;
17685 target_copy_map = copy_map;
17686
17687 if (src_map && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT) {
17688 vm_map_offset_t trimmed_start;
17689
17690 trimmed_start = 0;
17691 DEBUG4K_ADJUST("adjusting...\n");
17692 kr = vm_map_copy_adjust_to_target(
17693 copy_map,
17694 0, /* offset */
17695 copy_map->size, /* size */
17696 src_map,
17697 FALSE, /* copy */
17698 &target_copy_map,
17699 overmap_start,
17700 overmap_end,
17701 &trimmed_start);
17702 assert(trimmed_start == 0);
17703 }
17704 named_entry_unlock(named_entry);
17705
17706 return kr;
17707 }
17708
17709
17710 /*
17711 * Routine: vm_remap
17712 *
17713 * Map portion of a task's address space.
17714 * Mapped region must not overlap more than
17715 * one vm memory object. Protections and
17716 * inheritance attributes remain the same
17717 * as in the original task and are out parameters.
17718 * Source and Target task can be identical
17719 * Other attributes are identical as for vm_map()
17720 */
17721 kern_return_t
17722 vm_map_remap(
17723 vm_map_t target_map,
17724 vm_map_address_t *address,
17725 vm_map_size_t size,
17726 vm_map_offset_t mask,
17727 int flags,
17728 vm_map_kernel_flags_t vmk_flags,
17729 vm_tag_t tag,
17730 vm_map_t src_map,
17731 vm_map_offset_t memory_address,
17732 boolean_t copy,
17733 vm_prot_t *cur_protection,
17734 vm_prot_t *max_protection,
17735 vm_inherit_t inheritance)
17736 {
17737 kern_return_t result;
17738 vm_map_entry_t entry;
17739 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
17740 vm_map_entry_t new_entry;
17741 vm_map_copy_t copy_map;
17742 vm_map_offset_t offset_in_mapping;
17743 vm_map_size_t target_size = 0;
17744 vm_map_size_t src_page_mask, target_page_mask;
17745 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
17746 vm_map_offset_t initial_memory_address;
17747 vm_map_size_t initial_size;
17748
17749 if (target_map == VM_MAP_NULL) {
17750 return KERN_INVALID_ARGUMENT;
17751 }
17752
17753 initial_memory_address = memory_address;
17754 initial_size = size;
17755 src_page_mask = VM_MAP_PAGE_MASK(src_map);
17756 target_page_mask = VM_MAP_PAGE_MASK(target_map);
17757
17758 switch (inheritance) {
17759 case VM_INHERIT_NONE:
17760 case VM_INHERIT_COPY:
17761 case VM_INHERIT_SHARE:
17762 if (size != 0 && src_map != VM_MAP_NULL) {
17763 break;
17764 }
17765 OS_FALLTHROUGH;
17766 default:
17767 return KERN_INVALID_ARGUMENT;
17768 }
17769
17770 if (src_page_mask != target_page_mask) {
17771 if (copy) {
17772 DEBUG4K_COPY("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
17773 } else {
17774 DEBUG4K_SHARE("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
17775 }
17776 }
17777
17778 /*
17779 * If the user is requesting that we return the address of the
17780 * first byte of the data (rather than the base of the page),
17781 * then we use different rounding semantics: specifically,
17782 * we assume that (memory_address, size) describes a region
17783 * all of whose pages we must cover, rather than a base to be truncated
17784 * down and a size to be added to that base. So we figure out
17785 * the highest page that the requested region includes and make
17786 * sure that the size will cover it.
17787 *
17788 * The key example we're worried about it is of the form:
17789 *
17790 * memory_address = 0x1ff0, size = 0x20
17791 *
17792 * With the old semantics, we round down the memory_address to 0x1000
17793 * and round up the size to 0x1000, resulting in our covering *only*
17794 * page 0x1000. With the new semantics, we'd realize that the region covers
17795 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
17796 * 0x1000 and page 0x2000 in the region we remap.
17797 */
17798 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
17799 vm_map_offset_t range_start, range_end;
17800
17801 range_start = vm_map_trunc_page(memory_address, src_page_mask);
17802 range_end = vm_map_round_page(memory_address + size, src_page_mask);
17803 memory_address = range_start;
17804 size = range_end - range_start;
17805 offset_in_mapping = initial_memory_address - memory_address;
17806 } else {
17807 /*
17808 * IMPORTANT:
17809 * This legacy code path is broken: for the range mentioned
17810 * above [ memory_address = 0x1ff0,size = 0x20 ], which spans
17811 * two 4k pages, it yields [ memory_address = 0x1000,
17812 * size = 0x1000 ], which covers only the first 4k page.
17813 * BUT some code unfortunately depends on this bug, so we
17814 * can't fix it without breaking something.
17815 * New code should get automatically opted in the new
17816 * behavior with the new VM_FLAGS_RETURN_DATA_ADDR flags.
17817 */
17818 offset_in_mapping = 0;
17819 memory_address = vm_map_trunc_page(memory_address, src_page_mask);
17820 size = vm_map_round_page(size, src_page_mask);
17821 initial_memory_address = memory_address;
17822 initial_size = size;
17823 }
17824
17825
17826 if (size == 0) {
17827 return KERN_INVALID_ARGUMENT;
17828 }
17829
17830 if (flags & VM_FLAGS_RESILIENT_MEDIA) {
17831 /* must be copy-on-write to be "media resilient" */
17832 if (!copy) {
17833 return KERN_INVALID_ARGUMENT;
17834 }
17835 }
17836
17837 vmk_flags.vmkf_copy_pageable = target_map->hdr.entries_pageable;
17838 vmk_flags.vmkf_copy_same_map = (src_map == target_map);
17839
17840 assert(size != 0);
17841 result = vm_map_copy_extract(src_map,
17842 memory_address,
17843 size,
17844 VM_PROT_NONE, /* required_protection: no check here */
17845 copy, &copy_map,
17846 cur_protection,
17847 max_protection,
17848 inheritance,
17849 vmk_flags);
17850 if (result != KERN_SUCCESS) {
17851 return result;
17852 }
17853 assert(copy_map != VM_MAP_COPY_NULL);
17854
17855 overmap_start = 0;
17856 overmap_end = 0;
17857 trimmed_start = 0;
17858 target_size = size;
17859 if (src_page_mask != target_page_mask) {
17860 vm_map_copy_t target_copy_map;
17861
17862 target_copy_map = copy_map; /* can modify "copy_map" itself */
17863 DEBUG4K_ADJUST("adjusting...\n");
17864 result = vm_map_copy_adjust_to_target(
17865 copy_map,
17866 offset_in_mapping, /* offset */
17867 initial_size,
17868 target_map,
17869 copy,
17870 &target_copy_map,
17871 &overmap_start,
17872 &overmap_end,
17873 &trimmed_start);
17874 if (result != KERN_SUCCESS) {
17875 DEBUG4K_COPY("failed to adjust 0x%x\n", result);
17876 vm_map_copy_discard(copy_map);
17877 return result;
17878 }
17879 if (trimmed_start == 0) {
17880 /* nothing trimmed: no adjustment needed */
17881 } else if (trimmed_start >= offset_in_mapping) {
17882 /* trimmed more than offset_in_mapping: nothing left */
17883 assert(overmap_start == 0);
17884 assert(overmap_end == 0);
17885 offset_in_mapping = 0;
17886 } else {
17887 /* trimmed some of offset_in_mapping: adjust */
17888 assert(overmap_start == 0);
17889 assert(overmap_end == 0);
17890 offset_in_mapping -= trimmed_start;
17891 }
17892 offset_in_mapping += overmap_start;
17893 target_size = target_copy_map->size;
17894 }
17895
17896 /*
17897 * Allocate/check a range of free virtual address
17898 * space for the target
17899 */
17900 *address = vm_map_trunc_page(*address, target_page_mask);
17901 vm_map_lock(target_map);
17902 target_size = vm_map_round_page(target_size, target_page_mask);
17903 result = vm_map_remap_range_allocate(target_map, address,
17904 target_size,
17905 mask, flags, vmk_flags, tag,
17906 &insp_entry);
17907
17908 for (entry = vm_map_copy_first_entry(copy_map);
17909 entry != vm_map_copy_to_entry(copy_map);
17910 entry = new_entry) {
17911 new_entry = entry->vme_next;
17912 vm_map_copy_entry_unlink(copy_map, entry);
17913 if (result == KERN_SUCCESS) {
17914 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
17915 /* no codesigning -> read-only access */
17916 entry->max_protection = VM_PROT_READ;
17917 entry->protection = VM_PROT_READ;
17918 entry->vme_resilient_codesign = TRUE;
17919 }
17920 entry->vme_start += *address;
17921 entry->vme_end += *address;
17922 assert(!entry->map_aligned);
17923 if ((flags & VM_FLAGS_RESILIENT_MEDIA) &&
17924 !entry->is_sub_map &&
17925 (VME_OBJECT(entry) == VM_OBJECT_NULL ||
17926 VME_OBJECT(entry)->internal)) {
17927 entry->vme_resilient_media = TRUE;
17928 }
17929 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start, MIN(target_page_mask, PAGE_MASK)));
17930 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end, MIN(target_page_mask, PAGE_MASK)));
17931 assert(VM_MAP_PAGE_ALIGNED(VME_OFFSET(entry), MIN(target_page_mask, PAGE_MASK)));
17932 vm_map_store_entry_link(target_map, insp_entry, entry,
17933 vmk_flags);
17934 insp_entry = entry;
17935 } else {
17936 if (!entry->is_sub_map) {
17937 vm_object_deallocate(VME_OBJECT(entry));
17938 } else {
17939 vm_map_deallocate(VME_SUBMAP(entry));
17940 }
17941 vm_map_copy_entry_dispose(copy_map, entry);
17942 }
17943 }
17944
17945 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
17946 *cur_protection = VM_PROT_READ;
17947 *max_protection = VM_PROT_READ;
17948 }
17949
17950 if (target_map->disable_vmentry_reuse == TRUE) {
17951 assert(!target_map->is_nested_map);
17952 if (target_map->highest_entry_end < insp_entry->vme_end) {
17953 target_map->highest_entry_end = insp_entry->vme_end;
17954 }
17955 }
17956
17957 if (result == KERN_SUCCESS) {
17958 target_map->size += target_size;
17959 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
17960
17961 }
17962 vm_map_unlock(target_map);
17963
17964 if (result == KERN_SUCCESS && target_map->wiring_required) {
17965 result = vm_map_wire_kernel(target_map, *address,
17966 *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
17967 TRUE);
17968 }
17969
17970 /*
17971 * If requested, return the address of the data pointed to by the
17972 * request, rather than the base of the resulting page.
17973 */
17974 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
17975 *address += offset_in_mapping;
17976 }
17977
17978 if (src_page_mask != target_page_mask) {
17979 DEBUG4K_SHARE("vm_remap(%p 0x%llx 0x%llx copy=%d-> %p 0x%llx 0x%llx result=0x%x\n", src_map, (uint64_t)memory_address, (uint64_t)size, copy, target_map, (uint64_t)*address, (uint64_t)offset_in_mapping, result);
17980 }
17981 vm_map_copy_discard(copy_map);
17982 copy_map = VM_MAP_COPY_NULL;
17983
17984 return result;
17985 }
17986
17987 /*
17988 * Routine: vm_map_remap_range_allocate
17989 *
17990 * Description:
17991 * Allocate a range in the specified virtual address map.
17992 * returns the address and the map entry just before the allocated
17993 * range
17994 *
17995 * Map must be locked.
17996 */
17997
17998 static kern_return_t
17999 vm_map_remap_range_allocate(
18000 vm_map_t map,
18001 vm_map_address_t *address, /* IN/OUT */
18002 vm_map_size_t size,
18003 vm_map_offset_t mask,
18004 int flags,
18005 vm_map_kernel_flags_t vmk_flags,
18006 __unused vm_tag_t tag,
18007 vm_map_entry_t *map_entry) /* OUT */
18008 {
18009 vm_map_entry_t entry;
18010 vm_map_offset_t start;
18011 vm_map_offset_t end;
18012 vm_map_offset_t desired_empty_end;
18013 kern_return_t kr;
18014 vm_map_entry_t hole_entry;
18015
18016 StartAgain:;
18017
18018 start = *address;
18019
18020 if (flags & VM_FLAGS_ANYWHERE) {
18021 if (flags & VM_FLAGS_RANDOM_ADDR) {
18022 /*
18023 * Get a random start address.
18024 */
18025 kr = vm_map_random_address_for_size(map, address, size);
18026 if (kr != KERN_SUCCESS) {
18027 return kr;
18028 }
18029 start = *address;
18030 }
18031
18032 /*
18033 * Calculate the first possible address.
18034 */
18035
18036 if (start < map->min_offset) {
18037 start = map->min_offset;
18038 }
18039 if (start > map->max_offset) {
18040 return KERN_NO_SPACE;
18041 }
18042
18043 /*
18044 * Look for the first possible address;
18045 * if there's already something at this
18046 * address, we have to start after it.
18047 */
18048
18049 if (map->disable_vmentry_reuse == TRUE) {
18050 VM_MAP_HIGHEST_ENTRY(map, entry, start);
18051 } else {
18052 if (map->holelistenabled) {
18053 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
18054
18055 if (hole_entry == NULL) {
18056 /*
18057 * No more space in the map?
18058 */
18059 return KERN_NO_SPACE;
18060 } else {
18061 boolean_t found_hole = FALSE;
18062
18063 do {
18064 if (hole_entry->vme_start >= start) {
18065 start = hole_entry->vme_start;
18066 found_hole = TRUE;
18067 break;
18068 }
18069
18070 if (hole_entry->vme_end > start) {
18071 found_hole = TRUE;
18072 break;
18073 }
18074 hole_entry = hole_entry->vme_next;
18075 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
18076
18077 if (found_hole == FALSE) {
18078 return KERN_NO_SPACE;
18079 }
18080
18081 entry = hole_entry;
18082 }
18083 } else {
18084 assert(first_free_is_valid(map));
18085 if (start == map->min_offset) {
18086 if ((entry = map->first_free) != vm_map_to_entry(map)) {
18087 start = entry->vme_end;
18088 }
18089 } else {
18090 vm_map_entry_t tmp_entry;
18091 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
18092 start = tmp_entry->vme_end;
18093 }
18094 entry = tmp_entry;
18095 }
18096 }
18097 start = vm_map_round_page(start,
18098 VM_MAP_PAGE_MASK(map));
18099 }
18100
18101 /*
18102 * In any case, the "entry" always precedes
18103 * the proposed new region throughout the
18104 * loop:
18105 */
18106
18107 while (TRUE) {
18108 vm_map_entry_t next;
18109
18110 /*
18111 * Find the end of the proposed new region.
18112 * Be sure we didn't go beyond the end, or
18113 * wrap around the address.
18114 */
18115
18116 end = ((start + mask) & ~mask);
18117 end = vm_map_round_page(end,
18118 VM_MAP_PAGE_MASK(map));
18119 if (end < start) {
18120 return KERN_NO_SPACE;
18121 }
18122 start = end;
18123 end += size;
18124
18125 /* We want an entire page of empty space, but don't increase the allocation size. */
18126 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
18127
18128 if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
18129 if (map->wait_for_space) {
18130 if (size <= (map->max_offset -
18131 map->min_offset)) {
18132 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
18133 vm_map_unlock(map);
18134 thread_block(THREAD_CONTINUE_NULL);
18135 vm_map_lock(map);
18136 goto StartAgain;
18137 }
18138 }
18139
18140 return KERN_NO_SPACE;
18141 }
18142
18143 next = entry->vme_next;
18144
18145 if (map->holelistenabled) {
18146 if (entry->vme_end >= desired_empty_end) {
18147 break;
18148 }
18149 } else {
18150 /*
18151 * If there are no more entries, we must win.
18152 *
18153 * OR
18154 *
18155 * If there is another entry, it must be
18156 * after the end of the potential new region.
18157 */
18158
18159 if (next == vm_map_to_entry(map)) {
18160 break;
18161 }
18162
18163 if (next->vme_start >= desired_empty_end) {
18164 break;
18165 }
18166 }
18167
18168 /*
18169 * Didn't fit -- move to the next entry.
18170 */
18171
18172 entry = next;
18173
18174 if (map->holelistenabled) {
18175 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
18176 /*
18177 * Wrapped around
18178 */
18179 return KERN_NO_SPACE;
18180 }
18181 start = entry->vme_start;
18182 } else {
18183 start = entry->vme_end;
18184 }
18185 }
18186
18187 if (map->holelistenabled) {
18188 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
18189 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
18190 }
18191 }
18192
18193 *address = start;
18194 } else {
18195 vm_map_entry_t temp_entry;
18196
18197 /*
18198 * Verify that:
18199 * the address doesn't itself violate
18200 * the mask requirement.
18201 */
18202
18203 if ((start & mask) != 0) {
18204 return KERN_NO_SPACE;
18205 }
18206
18207
18208 /*
18209 * ... the address is within bounds
18210 */
18211
18212 end = start + size;
18213
18214 if ((start < map->min_offset) ||
18215 (end > map->max_offset) ||
18216 (start >= end)) {
18217 return KERN_INVALID_ADDRESS;
18218 }
18219
18220 /*
18221 * If we're asked to overwrite whatever was mapped in that
18222 * range, first deallocate that range.
18223 */
18224 if (flags & VM_FLAGS_OVERWRITE) {
18225 vm_map_t zap_map;
18226 int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
18227
18228 /*
18229 * We use a "zap_map" to avoid having to unlock
18230 * the "map" in vm_map_delete(), which would compromise
18231 * the atomicity of the "deallocate" and then "remap"
18232 * combination.
18233 */
18234 zap_map = vm_map_create(PMAP_NULL,
18235 start,
18236 end,
18237 map->hdr.entries_pageable);
18238 if (zap_map == VM_MAP_NULL) {
18239 return KERN_RESOURCE_SHORTAGE;
18240 }
18241 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
18242 vm_map_disable_hole_optimization(zap_map);
18243
18244 if (vmk_flags.vmkf_overwrite_immutable) {
18245 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
18246 }
18247 kr = vm_map_delete(map, start, end,
18248 remove_flags,
18249 zap_map);
18250 if (kr == KERN_SUCCESS) {
18251 vm_map_destroy(zap_map,
18252 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
18253 zap_map = VM_MAP_NULL;
18254 }
18255 }
18256
18257 /*
18258 * ... the starting address isn't allocated
18259 */
18260
18261 if (vm_map_lookup_entry(map, start, &temp_entry)) {
18262 return KERN_NO_SPACE;
18263 }
18264
18265 entry = temp_entry;
18266
18267 /*
18268 * ... the next region doesn't overlap the
18269 * end point.
18270 */
18271
18272 if ((entry->vme_next != vm_map_to_entry(map)) &&
18273 (entry->vme_next->vme_start < end)) {
18274 return KERN_NO_SPACE;
18275 }
18276 }
18277 *map_entry = entry;
18278 return KERN_SUCCESS;
18279 }
18280
18281 /*
18282 * vm_map_switch:
18283 *
18284 * Set the address map for the current thread to the specified map
18285 */
18286
18287 vm_map_t
18288 vm_map_switch(
18289 vm_map_t map)
18290 {
18291 int mycpu;
18292 thread_t thread = current_thread();
18293 vm_map_t oldmap = thread->map;
18294
18295 mp_disable_preemption();
18296 mycpu = cpu_number();
18297
18298 /*
18299 * Deactivate the current map and activate the requested map
18300 */
18301 PMAP_SWITCH_USER(thread, map, mycpu);
18302
18303 mp_enable_preemption();
18304 return oldmap;
18305 }
18306
18307
18308 /*
18309 * Routine: vm_map_write_user
18310 *
18311 * Description:
18312 * Copy out data from a kernel space into space in the
18313 * destination map. The space must already exist in the
18314 * destination map.
18315 * NOTE: This routine should only be called by threads
18316 * which can block on a page fault. i.e. kernel mode user
18317 * threads.
18318 *
18319 */
18320 kern_return_t
18321 vm_map_write_user(
18322 vm_map_t map,
18323 void *src_p,
18324 vm_map_address_t dst_addr,
18325 vm_size_t size)
18326 {
18327 kern_return_t kr = KERN_SUCCESS;
18328
18329 if (current_map() == map) {
18330 if (copyout(src_p, dst_addr, size)) {
18331 kr = KERN_INVALID_ADDRESS;
18332 }
18333 } else {
18334 vm_map_t oldmap;
18335
18336 /* take on the identity of the target map while doing */
18337 /* the transfer */
18338
18339 vm_map_reference(map);
18340 oldmap = vm_map_switch(map);
18341 if (copyout(src_p, dst_addr, size)) {
18342 kr = KERN_INVALID_ADDRESS;
18343 }
18344 vm_map_switch(oldmap);
18345 vm_map_deallocate(map);
18346 }
18347 return kr;
18348 }
18349
18350 /*
18351 * Routine: vm_map_read_user
18352 *
18353 * Description:
18354 * Copy in data from a user space source map into the
18355 * kernel map. The space must already exist in the
18356 * kernel map.
18357 * NOTE: This routine should only be called by threads
18358 * which can block on a page fault. i.e. kernel mode user
18359 * threads.
18360 *
18361 */
18362 kern_return_t
18363 vm_map_read_user(
18364 vm_map_t map,
18365 vm_map_address_t src_addr,
18366 void *dst_p,
18367 vm_size_t size)
18368 {
18369 kern_return_t kr = KERN_SUCCESS;
18370
18371 if (current_map() == map) {
18372 if (copyin(src_addr, dst_p, size)) {
18373 kr = KERN_INVALID_ADDRESS;
18374 }
18375 } else {
18376 vm_map_t oldmap;
18377
18378 /* take on the identity of the target map while doing */
18379 /* the transfer */
18380
18381 vm_map_reference(map);
18382 oldmap = vm_map_switch(map);
18383 if (copyin(src_addr, dst_p, size)) {
18384 kr = KERN_INVALID_ADDRESS;
18385 }
18386 vm_map_switch(oldmap);
18387 vm_map_deallocate(map);
18388 }
18389 return kr;
18390 }
18391
18392
18393 /*
18394 * vm_map_check_protection:
18395 *
18396 * Assert that the target map allows the specified
18397 * privilege on the entire address region given.
18398 * The entire region must be allocated.
18399 */
18400 boolean_t
18401 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
18402 vm_map_offset_t end, vm_prot_t protection)
18403 {
18404 vm_map_entry_t entry;
18405 vm_map_entry_t tmp_entry;
18406
18407 vm_map_lock(map);
18408
18409 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
18410 vm_map_unlock(map);
18411 return FALSE;
18412 }
18413
18414 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
18415 vm_map_unlock(map);
18416 return FALSE;
18417 }
18418
18419 entry = tmp_entry;
18420
18421 while (start < end) {
18422 if (entry == vm_map_to_entry(map)) {
18423 vm_map_unlock(map);
18424 return FALSE;
18425 }
18426
18427 /*
18428 * No holes allowed!
18429 */
18430
18431 if (start < entry->vme_start) {
18432 vm_map_unlock(map);
18433 return FALSE;
18434 }
18435
18436 /*
18437 * Check protection associated with entry.
18438 */
18439
18440 if ((entry->protection & protection) != protection) {
18441 vm_map_unlock(map);
18442 return FALSE;
18443 }
18444
18445 /* go to next entry */
18446
18447 start = entry->vme_end;
18448 entry = entry->vme_next;
18449 }
18450 vm_map_unlock(map);
18451 return TRUE;
18452 }
18453
18454 kern_return_t
18455 vm_map_purgable_control(
18456 vm_map_t map,
18457 vm_map_offset_t address,
18458 vm_purgable_t control,
18459 int *state)
18460 {
18461 vm_map_entry_t entry;
18462 vm_object_t object;
18463 kern_return_t kr;
18464 boolean_t was_nonvolatile;
18465
18466 /*
18467 * Vet all the input parameters and current type and state of the
18468 * underlaying object. Return with an error if anything is amiss.
18469 */
18470 if (map == VM_MAP_NULL) {
18471 return KERN_INVALID_ARGUMENT;
18472 }
18473
18474 if (control != VM_PURGABLE_SET_STATE &&
18475 control != VM_PURGABLE_GET_STATE &&
18476 control != VM_PURGABLE_PURGE_ALL &&
18477 control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
18478 return KERN_INVALID_ARGUMENT;
18479 }
18480
18481 if (control == VM_PURGABLE_PURGE_ALL) {
18482 vm_purgeable_object_purge_all();
18483 return KERN_SUCCESS;
18484 }
18485
18486 if ((control == VM_PURGABLE_SET_STATE ||
18487 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
18488 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
18489 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
18490 return KERN_INVALID_ARGUMENT;
18491 }
18492
18493 vm_map_lock_read(map);
18494
18495 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
18496 /*
18497 * Must pass a valid non-submap address.
18498 */
18499 vm_map_unlock_read(map);
18500 return KERN_INVALID_ADDRESS;
18501 }
18502
18503 if ((entry->protection & VM_PROT_WRITE) == 0) {
18504 /*
18505 * Can't apply purgable controls to something you can't write.
18506 */
18507 vm_map_unlock_read(map);
18508 return KERN_PROTECTION_FAILURE;
18509 }
18510
18511 object = VME_OBJECT(entry);
18512 if (object == VM_OBJECT_NULL ||
18513 object->purgable == VM_PURGABLE_DENY) {
18514 /*
18515 * Object must already be present and be purgeable.
18516 */
18517 vm_map_unlock_read(map);
18518 return KERN_INVALID_ARGUMENT;
18519 }
18520
18521 vm_object_lock(object);
18522
18523 #if 00
18524 if (VME_OFFSET(entry) != 0 ||
18525 entry->vme_end - entry->vme_start != object->vo_size) {
18526 /*
18527 * Can only apply purgable controls to the whole (existing)
18528 * object at once.
18529 */
18530 vm_map_unlock_read(map);
18531 vm_object_unlock(object);
18532 return KERN_INVALID_ARGUMENT;
18533 }
18534 #endif
18535
18536 assert(!entry->is_sub_map);
18537 assert(!entry->use_pmap); /* purgeable has its own accounting */
18538
18539 vm_map_unlock_read(map);
18540
18541 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
18542
18543 kr = vm_object_purgable_control(object, control, state);
18544
18545 if (was_nonvolatile &&
18546 object->purgable != VM_PURGABLE_NONVOLATILE &&
18547 map->pmap == kernel_pmap) {
18548 #if DEBUG
18549 object->vo_purgeable_volatilizer = kernel_task;
18550 #endif /* DEBUG */
18551 }
18552
18553 vm_object_unlock(object);
18554
18555 return kr;
18556 }
18557
18558 void
18559 vm_map_footprint_query_page_info(
18560 vm_map_t map,
18561 vm_map_entry_t map_entry,
18562 vm_map_offset_t curr_s_offset,
18563 int *disposition_p)
18564 {
18565 int pmap_disp;
18566 vm_object_t object;
18567 int disposition;
18568 int effective_page_size;
18569
18570 vm_map_lock_assert_held(map);
18571 assert(!map->has_corpse_footprint);
18572 assert(curr_s_offset >= map_entry->vme_start);
18573 assert(curr_s_offset < map_entry->vme_end);
18574
18575 object = VME_OBJECT(map_entry);
18576 if (object == VM_OBJECT_NULL) {
18577 *disposition_p = 0;
18578 return;
18579 }
18580
18581 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
18582
18583 pmap_disp = 0;
18584 if (object == VM_OBJECT_NULL) {
18585 /* nothing mapped here: no need to ask */
18586 *disposition_p = 0;
18587 return;
18588 } else if (map_entry->is_sub_map &&
18589 !map_entry->use_pmap) {
18590 /* nested pmap: no footprint */
18591 *disposition_p = 0;
18592 return;
18593 }
18594
18595 /*
18596 * Query the pmap.
18597 */
18598 pmap_query_page_info(map->pmap, curr_s_offset, &pmap_disp);
18599
18600 /*
18601 * Compute this page's disposition.
18602 */
18603 disposition = 0;
18604
18605 /* deal with "alternate accounting" first */
18606 if (!map_entry->is_sub_map &&
18607 object->vo_no_footprint) {
18608 /* does not count in footprint */
18609 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18610 } else if (!map_entry->is_sub_map &&
18611 (object->purgable == VM_PURGABLE_NONVOLATILE ||
18612 (object->purgable == VM_PURGABLE_DENY &&
18613 object->vo_ledger_tag)) &&
18614 VM_OBJECT_OWNER(object) != NULL &&
18615 VM_OBJECT_OWNER(object)->map == map) {
18616 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18617 if ((((curr_s_offset
18618 - map_entry->vme_start
18619 + VME_OFFSET(map_entry))
18620 / effective_page_size) <
18621 (object->resident_page_count +
18622 vm_compressor_pager_get_count(object->pager)))) {
18623 /*
18624 * Non-volatile purgeable object owned
18625 * by this task: report the first
18626 * "#resident + #compressed" pages as
18627 * "resident" (to show that they
18628 * contribute to the footprint) but not
18629 * "dirty" (to avoid double-counting
18630 * with the fake "non-volatile" region
18631 * we'll report at the end of the
18632 * address space to account for all
18633 * (mapped or not) non-volatile memory
18634 * owned by this task.
18635 */
18636 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18637 }
18638 } else if (!map_entry->is_sub_map &&
18639 (object->purgable == VM_PURGABLE_VOLATILE ||
18640 object->purgable == VM_PURGABLE_EMPTY) &&
18641 VM_OBJECT_OWNER(object) != NULL &&
18642 VM_OBJECT_OWNER(object)->map == map) {
18643 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18644 if ((((curr_s_offset
18645 - map_entry->vme_start
18646 + VME_OFFSET(map_entry))
18647 / effective_page_size) <
18648 object->wired_page_count)) {
18649 /*
18650 * Volatile|empty purgeable object owned
18651 * by this task: report the first
18652 * "#wired" pages as "resident" (to
18653 * show that they contribute to the
18654 * footprint) but not "dirty" (to avoid
18655 * double-counting with the fake
18656 * "non-volatile" region we'll report
18657 * at the end of the address space to
18658 * account for all (mapped or not)
18659 * non-volatile memory owned by this
18660 * task.
18661 */
18662 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18663 }
18664 } else if (!map_entry->is_sub_map &&
18665 map_entry->iokit_acct &&
18666 object->internal &&
18667 object->purgable == VM_PURGABLE_DENY) {
18668 /*
18669 * Non-purgeable IOKit memory: phys_footprint
18670 * includes the entire virtual mapping.
18671 */
18672 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18673 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18674 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18675 } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
18676 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
18677 /* alternate accounting */
18678 #if (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG)
18679 if (map->pmap->footprint_was_suspended) {
18680 /*
18681 * The assertion below can fail if dyld
18682 * suspended footprint accounting
18683 * while doing some adjustments to
18684 * this page; the mapping would say
18685 * "use pmap accounting" but the page
18686 * would be marked "alternate
18687 * accounting".
18688 */
18689 } else
18690 #endif /* (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG) */
18691 {
18692 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18693 }
18694 disposition = 0;
18695 } else {
18696 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
18697 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18698 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18699 disposition |= VM_PAGE_QUERY_PAGE_REF;
18700 if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
18701 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18702 } else {
18703 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
18704 }
18705 if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
18706 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
18707 }
18708 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
18709 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18710 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
18711 }
18712 }
18713
18714 *disposition_p = disposition;
18715 }
18716
18717 kern_return_t
18718 vm_map_page_query_internal(
18719 vm_map_t target_map,
18720 vm_map_offset_t offset,
18721 int *disposition,
18722 int *ref_count)
18723 {
18724 kern_return_t kr;
18725 vm_page_info_basic_data_t info;
18726 mach_msg_type_number_t count;
18727
18728 count = VM_PAGE_INFO_BASIC_COUNT;
18729 kr = vm_map_page_info(target_map,
18730 offset,
18731 VM_PAGE_INFO_BASIC,
18732 (vm_page_info_t) &info,
18733 &count);
18734 if (kr == KERN_SUCCESS) {
18735 *disposition = info.disposition;
18736 *ref_count = info.ref_count;
18737 } else {
18738 *disposition = 0;
18739 *ref_count = 0;
18740 }
18741
18742 return kr;
18743 }
18744
18745 kern_return_t
18746 vm_map_page_info(
18747 vm_map_t map,
18748 vm_map_offset_t offset,
18749 vm_page_info_flavor_t flavor,
18750 vm_page_info_t info,
18751 mach_msg_type_number_t *count)
18752 {
18753 return vm_map_page_range_info_internal(map,
18754 offset, /* start of range */
18755 (offset + 1), /* this will get rounded in the call to the page boundary */
18756 (int)-1, /* effective_page_shift: unspecified */
18757 flavor,
18758 info,
18759 count);
18760 }
18761
18762 kern_return_t
18763 vm_map_page_range_info_internal(
18764 vm_map_t map,
18765 vm_map_offset_t start_offset,
18766 vm_map_offset_t end_offset,
18767 int effective_page_shift,
18768 vm_page_info_flavor_t flavor,
18769 vm_page_info_t info,
18770 mach_msg_type_number_t *count)
18771 {
18772 vm_map_entry_t map_entry = VM_MAP_ENTRY_NULL;
18773 vm_object_t object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
18774 vm_page_t m = VM_PAGE_NULL;
18775 kern_return_t retval = KERN_SUCCESS;
18776 int disposition = 0;
18777 int ref_count = 0;
18778 int depth = 0, info_idx = 0;
18779 vm_page_info_basic_t basic_info = 0;
18780 vm_map_offset_t offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
18781 vm_map_offset_t start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
18782 boolean_t do_region_footprint;
18783 ledger_amount_t ledger_resident, ledger_compressed;
18784 int effective_page_size;
18785 vm_map_offset_t effective_page_mask;
18786
18787 switch (flavor) {
18788 case VM_PAGE_INFO_BASIC:
18789 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
18790 /*
18791 * The "vm_page_info_basic_data" structure was not
18792 * properly padded, so allow the size to be off by
18793 * one to maintain backwards binary compatibility...
18794 */
18795 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
18796 return KERN_INVALID_ARGUMENT;
18797 }
18798 }
18799 break;
18800 default:
18801 return KERN_INVALID_ARGUMENT;
18802 }
18803
18804 if (effective_page_shift == -1) {
18805 effective_page_shift = vm_self_region_page_shift_safely(map);
18806 if (effective_page_shift == -1) {
18807 return KERN_INVALID_ARGUMENT;
18808 }
18809 }
18810 effective_page_size = (1 << effective_page_shift);
18811 effective_page_mask = effective_page_size - 1;
18812
18813 do_region_footprint = task_self_region_footprint();
18814 disposition = 0;
18815 ref_count = 0;
18816 depth = 0;
18817 info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
18818 retval = KERN_SUCCESS;
18819
18820 offset_in_page = start_offset & effective_page_mask;
18821 start = vm_map_trunc_page(start_offset, effective_page_mask);
18822 end = vm_map_round_page(end_offset, effective_page_mask);
18823
18824 if (end < start) {
18825 return KERN_INVALID_ARGUMENT;
18826 }
18827
18828 assert((end - start) <= MAX_PAGE_RANGE_QUERY);
18829
18830 vm_map_lock_read(map);
18831
18832 task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
18833
18834 for (curr_s_offset = start; curr_s_offset < end;) {
18835 /*
18836 * New lookup needs reset of these variables.
18837 */
18838 curr_object = object = VM_OBJECT_NULL;
18839 offset_in_object = 0;
18840 ref_count = 0;
18841 depth = 0;
18842
18843 if (do_region_footprint &&
18844 curr_s_offset >= vm_map_last_entry(map)->vme_end) {
18845 /*
18846 * Request for "footprint" info about a page beyond
18847 * the end of address space: this must be for
18848 * the fake region vm_map_region_recurse_64()
18849 * reported to account for non-volatile purgeable
18850 * memory owned by this task.
18851 */
18852 disposition = 0;
18853
18854 if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
18855 (unsigned) ledger_compressed) {
18856 /*
18857 * We haven't reported all the "non-volatile
18858 * compressed" pages yet, so report this fake
18859 * page as "compressed".
18860 */
18861 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
18862 } else {
18863 /*
18864 * We've reported all the non-volatile
18865 * compressed page but not all the non-volatile
18866 * pages , so report this fake page as
18867 * "resident dirty".
18868 */
18869 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18870 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18871 disposition |= VM_PAGE_QUERY_PAGE_REF;
18872 }
18873 switch (flavor) {
18874 case VM_PAGE_INFO_BASIC:
18875 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
18876 basic_info->disposition = disposition;
18877 basic_info->ref_count = 1;
18878 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
18879 basic_info->offset = 0;
18880 basic_info->depth = 0;
18881
18882 info_idx++;
18883 break;
18884 }
18885 curr_s_offset += effective_page_size;
18886 continue;
18887 }
18888
18889 /*
18890 * First, find the map entry covering "curr_s_offset", going down
18891 * submaps if necessary.
18892 */
18893 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
18894 /* no entry -> no object -> no page */
18895
18896 if (curr_s_offset < vm_map_min(map)) {
18897 /*
18898 * Illegal address that falls below map min.
18899 */
18900 curr_e_offset = MIN(end, vm_map_min(map));
18901 } else if (curr_s_offset >= vm_map_max(map)) {
18902 /*
18903 * Illegal address that falls on/after map max.
18904 */
18905 curr_e_offset = end;
18906 } else if (map_entry == vm_map_to_entry(map)) {
18907 /*
18908 * Hit a hole.
18909 */
18910 if (map_entry->vme_next == vm_map_to_entry(map)) {
18911 /*
18912 * Empty map.
18913 */
18914 curr_e_offset = MIN(map->max_offset, end);
18915 } else {
18916 /*
18917 * Hole at start of the map.
18918 */
18919 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
18920 }
18921 } else {
18922 if (map_entry->vme_next == vm_map_to_entry(map)) {
18923 /*
18924 * Hole at the end of the map.
18925 */
18926 curr_e_offset = MIN(map->max_offset, end);
18927 } else {
18928 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
18929 }
18930 }
18931
18932 assert(curr_e_offset >= curr_s_offset);
18933
18934 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
18935
18936 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
18937
18938 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
18939
18940 curr_s_offset = curr_e_offset;
18941
18942 info_idx += num_pages;
18943
18944 continue;
18945 }
18946
18947 /* compute offset from this map entry's start */
18948 offset_in_object = curr_s_offset - map_entry->vme_start;
18949
18950 /* compute offset into this map entry's object (or submap) */
18951 offset_in_object += VME_OFFSET(map_entry);
18952
18953 if (map_entry->is_sub_map) {
18954 vm_map_t sub_map = VM_MAP_NULL;
18955 vm_page_info_t submap_info = 0;
18956 vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
18957
18958 range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
18959
18960 submap_s_offset = offset_in_object;
18961 submap_e_offset = submap_s_offset + range_len;
18962
18963 sub_map = VME_SUBMAP(map_entry);
18964
18965 vm_map_reference(sub_map);
18966 vm_map_unlock_read(map);
18967
18968 submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
18969
18970 assertf(VM_MAP_PAGE_SHIFT(sub_map) >= VM_MAP_PAGE_SHIFT(map),
18971 "Submap page size (%d) differs from current map (%d)\n", VM_MAP_PAGE_SIZE(sub_map), VM_MAP_PAGE_SIZE(map));
18972
18973 retval = vm_map_page_range_info_internal(sub_map,
18974 submap_s_offset,
18975 submap_e_offset,
18976 effective_page_shift,
18977 VM_PAGE_INFO_BASIC,
18978 (vm_page_info_t) submap_info,
18979 count);
18980
18981 assert(retval == KERN_SUCCESS);
18982
18983 vm_map_lock_read(map);
18984 vm_map_deallocate(sub_map);
18985
18986 /* Move the "info" index by the number of pages we inspected.*/
18987 info_idx += range_len >> effective_page_shift;
18988
18989 /* Move our current offset by the size of the range we inspected.*/
18990 curr_s_offset += range_len;
18991
18992 continue;
18993 }
18994
18995 object = VME_OBJECT(map_entry);
18996
18997 if (object == VM_OBJECT_NULL) {
18998 /*
18999 * We don't have an object here and, hence,
19000 * no pages to inspect. We'll fill up the
19001 * info structure appropriately.
19002 */
19003
19004 curr_e_offset = MIN(map_entry->vme_end, end);
19005
19006 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19007
19008 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19009
19010 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19011
19012 curr_s_offset = curr_e_offset;
19013
19014 info_idx += num_pages;
19015
19016 continue;
19017 }
19018
19019 if (do_region_footprint) {
19020 disposition = 0;
19021 if (map->has_corpse_footprint) {
19022 /*
19023 * Query the page info data we saved
19024 * while forking the corpse.
19025 */
19026 vm_map_corpse_footprint_query_page_info(
19027 map,
19028 curr_s_offset,
19029 &disposition);
19030 } else {
19031 /*
19032 * Query the live pmap for footprint info
19033 * about this page.
19034 */
19035 vm_map_footprint_query_page_info(
19036 map,
19037 map_entry,
19038 curr_s_offset,
19039 &disposition);
19040 }
19041 switch (flavor) {
19042 case VM_PAGE_INFO_BASIC:
19043 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19044 basic_info->disposition = disposition;
19045 basic_info->ref_count = 1;
19046 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
19047 basic_info->offset = 0;
19048 basic_info->depth = 0;
19049
19050 info_idx++;
19051 break;
19052 }
19053 curr_s_offset += effective_page_size;
19054 continue;
19055 }
19056
19057 vm_object_reference(object);
19058 /*
19059 * Shared mode -- so we can allow other readers
19060 * to grab the lock too.
19061 */
19062 vm_object_lock_shared(object);
19063
19064 curr_e_offset = MIN(map_entry->vme_end, end);
19065
19066 vm_map_unlock_read(map);
19067
19068 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
19069
19070 curr_object = object;
19071
19072 for (; curr_s_offset < curr_e_offset;) {
19073 if (object == curr_object) {
19074 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
19075 } else {
19076 ref_count = curr_object->ref_count;
19077 }
19078
19079 curr_offset_in_object = offset_in_object;
19080
19081 for (;;) {
19082 m = vm_page_lookup(curr_object, vm_object_trunc_page(curr_offset_in_object));
19083
19084 if (m != VM_PAGE_NULL) {
19085 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19086 break;
19087 } else {
19088 if (curr_object->internal &&
19089 curr_object->alive &&
19090 !curr_object->terminating &&
19091 curr_object->pager_ready) {
19092 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, vm_object_trunc_page(curr_offset_in_object))
19093 == VM_EXTERNAL_STATE_EXISTS) {
19094 /* the pager has that page */
19095 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19096 break;
19097 }
19098 }
19099
19100 /*
19101 * Go down the VM object shadow chain until we find the page
19102 * we're looking for.
19103 */
19104
19105 if (curr_object->shadow != VM_OBJECT_NULL) {
19106 vm_object_t shadow = VM_OBJECT_NULL;
19107
19108 curr_offset_in_object += curr_object->vo_shadow_offset;
19109 shadow = curr_object->shadow;
19110
19111 vm_object_lock_shared(shadow);
19112 vm_object_unlock(curr_object);
19113
19114 curr_object = shadow;
19115 depth++;
19116 continue;
19117 } else {
19118 break;
19119 }
19120 }
19121 }
19122
19123 /* The ref_count is not strictly accurate, it measures the number */
19124 /* of entities holding a ref on the object, they may not be mapping */
19125 /* the object or may not be mapping the section holding the */
19126 /* target page but its still a ball park number and though an over- */
19127 /* count, it picks up the copy-on-write cases */
19128
19129 /* We could also get a picture of page sharing from pmap_attributes */
19130 /* but this would under count as only faulted-in mappings would */
19131 /* show up. */
19132
19133 if ((curr_object == object) && curr_object->shadow) {
19134 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
19135 }
19136
19137 if (!curr_object->internal) {
19138 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
19139 }
19140
19141 if (m != VM_PAGE_NULL) {
19142 if (m->vmp_fictitious) {
19143 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
19144 } else {
19145 if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
19146 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19147 }
19148
19149 if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
19150 disposition |= VM_PAGE_QUERY_PAGE_REF;
19151 }
19152
19153 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
19154 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
19155 }
19156
19157 /*
19158 * XXX TODO4K:
19159 * when this routine deals with 4k
19160 * pages, check the appropriate CS bit
19161 * here.
19162 */
19163 if (m->vmp_cs_validated) {
19164 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
19165 }
19166 if (m->vmp_cs_tainted) {
19167 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
19168 }
19169 if (m->vmp_cs_nx) {
19170 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
19171 }
19172 if (m->vmp_reusable || curr_object->all_reusable) {
19173 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
19174 }
19175 }
19176 }
19177
19178 switch (flavor) {
19179 case VM_PAGE_INFO_BASIC:
19180 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19181 basic_info->disposition = disposition;
19182 basic_info->ref_count = ref_count;
19183 basic_info->object_id = (vm_object_id_t) (uintptr_t)
19184 VM_KERNEL_ADDRPERM(curr_object);
19185 basic_info->offset =
19186 (memory_object_offset_t) curr_offset_in_object + offset_in_page;
19187 basic_info->depth = depth;
19188
19189 info_idx++;
19190 break;
19191 }
19192
19193 disposition = 0;
19194 offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
19195
19196 /*
19197 * Move to next offset in the range and in our object.
19198 */
19199 curr_s_offset += effective_page_size;
19200 offset_in_object += effective_page_size;
19201 curr_offset_in_object = offset_in_object;
19202
19203 if (curr_object != object) {
19204 vm_object_unlock(curr_object);
19205
19206 curr_object = object;
19207
19208 vm_object_lock_shared(curr_object);
19209 } else {
19210 vm_object_lock_yield_shared(curr_object);
19211 }
19212 }
19213
19214 vm_object_unlock(curr_object);
19215 vm_object_deallocate(curr_object);
19216
19217 vm_map_lock_read(map);
19218 }
19219
19220 vm_map_unlock_read(map);
19221 return retval;
19222 }
19223
19224 /*
19225 * vm_map_msync
19226 *
19227 * Synchronises the memory range specified with its backing store
19228 * image by either flushing or cleaning the contents to the appropriate
19229 * memory manager engaging in a memory object synchronize dialog with
19230 * the manager. The client doesn't return until the manager issues
19231 * m_o_s_completed message. MIG Magically converts user task parameter
19232 * to the task's address map.
19233 *
19234 * interpretation of sync_flags
19235 * VM_SYNC_INVALIDATE - discard pages, only return precious
19236 * pages to manager.
19237 *
19238 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
19239 * - discard pages, write dirty or precious
19240 * pages back to memory manager.
19241 *
19242 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
19243 * - write dirty or precious pages back to
19244 * the memory manager.
19245 *
19246 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
19247 * is a hole in the region, and we would
19248 * have returned KERN_SUCCESS, return
19249 * KERN_INVALID_ADDRESS instead.
19250 *
19251 * NOTE
19252 * The memory object attributes have not yet been implemented, this
19253 * function will have to deal with the invalidate attribute
19254 *
19255 * RETURNS
19256 * KERN_INVALID_TASK Bad task parameter
19257 * KERN_INVALID_ARGUMENT both sync and async were specified.
19258 * KERN_SUCCESS The usual.
19259 * KERN_INVALID_ADDRESS There was a hole in the region.
19260 */
19261
19262 kern_return_t
19263 vm_map_msync(
19264 vm_map_t map,
19265 vm_map_address_t address,
19266 vm_map_size_t size,
19267 vm_sync_t sync_flags)
19268 {
19269 vm_map_entry_t entry;
19270 vm_map_size_t amount_left;
19271 vm_object_offset_t offset;
19272 vm_object_offset_t start_offset, end_offset;
19273 boolean_t do_sync_req;
19274 boolean_t had_hole = FALSE;
19275 vm_map_offset_t pmap_offset;
19276
19277 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
19278 (sync_flags & VM_SYNC_SYNCHRONOUS)) {
19279 return KERN_INVALID_ARGUMENT;
19280 }
19281
19282 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19283 DEBUG4K_SHARE("map %p address 0x%llx size 0x%llx flags 0x%x\n", map, (uint64_t)address, (uint64_t)size, sync_flags);
19284 }
19285
19286 /*
19287 * align address and size on page boundaries
19288 */
19289 size = (vm_map_round_page(address + size,
19290 VM_MAP_PAGE_MASK(map)) -
19291 vm_map_trunc_page(address,
19292 VM_MAP_PAGE_MASK(map)));
19293 address = vm_map_trunc_page(address,
19294 VM_MAP_PAGE_MASK(map));
19295
19296 if (map == VM_MAP_NULL) {
19297 return KERN_INVALID_TASK;
19298 }
19299
19300 if (size == 0) {
19301 return KERN_SUCCESS;
19302 }
19303
19304 amount_left = size;
19305
19306 while (amount_left > 0) {
19307 vm_object_size_t flush_size;
19308 vm_object_t object;
19309
19310 vm_map_lock(map);
19311 if (!vm_map_lookup_entry(map,
19312 address,
19313 &entry)) {
19314 vm_map_size_t skip;
19315
19316 /*
19317 * hole in the address map.
19318 */
19319 had_hole = TRUE;
19320
19321 if (sync_flags & VM_SYNC_KILLPAGES) {
19322 /*
19323 * For VM_SYNC_KILLPAGES, there should be
19324 * no holes in the range, since we couldn't
19325 * prevent someone else from allocating in
19326 * that hole and we wouldn't want to "kill"
19327 * their pages.
19328 */
19329 vm_map_unlock(map);
19330 break;
19331 }
19332
19333 /*
19334 * Check for empty map.
19335 */
19336 if (entry == vm_map_to_entry(map) &&
19337 entry->vme_next == entry) {
19338 vm_map_unlock(map);
19339 break;
19340 }
19341 /*
19342 * Check that we don't wrap and that
19343 * we have at least one real map entry.
19344 */
19345 if ((map->hdr.nentries == 0) ||
19346 (entry->vme_next->vme_start < address)) {
19347 vm_map_unlock(map);
19348 break;
19349 }
19350 /*
19351 * Move up to the next entry if needed
19352 */
19353 skip = (entry->vme_next->vme_start - address);
19354 if (skip >= amount_left) {
19355 amount_left = 0;
19356 } else {
19357 amount_left -= skip;
19358 }
19359 address = entry->vme_next->vme_start;
19360 vm_map_unlock(map);
19361 continue;
19362 }
19363
19364 offset = address - entry->vme_start;
19365 pmap_offset = address;
19366
19367 /*
19368 * do we have more to flush than is contained in this
19369 * entry ?
19370 */
19371 if (amount_left + entry->vme_start + offset > entry->vme_end) {
19372 flush_size = entry->vme_end -
19373 (entry->vme_start + offset);
19374 } else {
19375 flush_size = amount_left;
19376 }
19377 amount_left -= flush_size;
19378 address += flush_size;
19379
19380 if (entry->is_sub_map == TRUE) {
19381 vm_map_t local_map;
19382 vm_map_offset_t local_offset;
19383
19384 local_map = VME_SUBMAP(entry);
19385 local_offset = VME_OFFSET(entry);
19386 vm_map_reference(local_map);
19387 vm_map_unlock(map);
19388 if (vm_map_msync(
19389 local_map,
19390 local_offset,
19391 flush_size,
19392 sync_flags) == KERN_INVALID_ADDRESS) {
19393 had_hole = TRUE;
19394 }
19395 vm_map_deallocate(local_map);
19396 continue;
19397 }
19398 object = VME_OBJECT(entry);
19399
19400 /*
19401 * We can't sync this object if the object has not been
19402 * created yet
19403 */
19404 if (object == VM_OBJECT_NULL) {
19405 vm_map_unlock(map);
19406 continue;
19407 }
19408 offset += VME_OFFSET(entry);
19409
19410 vm_object_lock(object);
19411
19412 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
19413 int kill_pages = 0;
19414 boolean_t reusable_pages = FALSE;
19415
19416 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19417 /*
19418 * This is a destructive operation and so we
19419 * err on the side of limiting the range of
19420 * the operation.
19421 */
19422 start_offset = vm_object_round_page(offset);
19423 end_offset = vm_object_trunc_page(offset + flush_size);
19424
19425 if (end_offset <= start_offset) {
19426 vm_object_unlock(object);
19427 vm_map_unlock(map);
19428 continue;
19429 }
19430
19431 pmap_offset += start_offset - offset;;
19432 } else {
19433 start_offset = offset;
19434 end_offset = offset + flush_size;
19435 }
19436
19437 if (sync_flags & VM_SYNC_KILLPAGES) {
19438 if (((object->ref_count == 1) ||
19439 ((object->copy_strategy !=
19440 MEMORY_OBJECT_COPY_SYMMETRIC) &&
19441 (object->copy == VM_OBJECT_NULL))) &&
19442 (object->shadow == VM_OBJECT_NULL)) {
19443 if (object->ref_count != 1) {
19444 vm_page_stats_reusable.free_shared++;
19445 }
19446 kill_pages = 1;
19447 } else {
19448 kill_pages = -1;
19449 }
19450 }
19451 if (kill_pages != -1) {
19452 vm_object_deactivate_pages(
19453 object,
19454 start_offset,
19455 (vm_object_size_t) (end_offset - start_offset),
19456 kill_pages,
19457 reusable_pages,
19458 map->pmap,
19459 pmap_offset);
19460 }
19461 vm_object_unlock(object);
19462 vm_map_unlock(map);
19463 continue;
19464 }
19465 /*
19466 * We can't sync this object if there isn't a pager.
19467 * Don't bother to sync internal objects, since there can't
19468 * be any "permanent" storage for these objects anyway.
19469 */
19470 if ((object->pager == MEMORY_OBJECT_NULL) ||
19471 (object->internal) || (object->private)) {
19472 vm_object_unlock(object);
19473 vm_map_unlock(map);
19474 continue;
19475 }
19476 /*
19477 * keep reference on the object until syncing is done
19478 */
19479 vm_object_reference_locked(object);
19480 vm_object_unlock(object);
19481
19482 vm_map_unlock(map);
19483
19484 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19485 start_offset = vm_object_trunc_page(offset);
19486 end_offset = vm_object_round_page(offset + flush_size);
19487 } else {
19488 start_offset = offset;
19489 end_offset = offset + flush_size;
19490 }
19491
19492 do_sync_req = vm_object_sync(object,
19493 start_offset,
19494 (end_offset - start_offset),
19495 sync_flags & VM_SYNC_INVALIDATE,
19496 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
19497 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
19498 sync_flags & VM_SYNC_SYNCHRONOUS);
19499
19500 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
19501 /*
19502 * clear out the clustering and read-ahead hints
19503 */
19504 vm_object_lock(object);
19505
19506 object->pages_created = 0;
19507 object->pages_used = 0;
19508 object->sequential = 0;
19509 object->last_alloc = 0;
19510
19511 vm_object_unlock(object);
19512 }
19513 vm_object_deallocate(object);
19514 } /* while */
19515
19516 /* for proper msync() behaviour */
19517 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
19518 return KERN_INVALID_ADDRESS;
19519 }
19520
19521 return KERN_SUCCESS;
19522 }/* vm_msync */
19523
19524 kern_return_t
19525 vm_named_entry_from_vm_object(
19526 vm_named_entry_t named_entry,
19527 vm_object_t object,
19528 vm_object_offset_t offset,
19529 vm_object_size_t size,
19530 vm_prot_t prot)
19531 {
19532 vm_map_copy_t copy;
19533 vm_map_entry_t copy_entry;
19534
19535 assert(!named_entry->is_sub_map);
19536 assert(!named_entry->is_copy);
19537 assert(!named_entry->is_object);
19538 assert(!named_entry->internal);
19539 assert(named_entry->backing.copy == VM_MAP_COPY_NULL);
19540
19541 copy = vm_map_copy_allocate();
19542 copy->type = VM_MAP_COPY_ENTRY_LIST;
19543 copy->offset = offset;
19544 copy->size = size;
19545 copy->cpy_hdr.page_shift = PAGE_SHIFT;
19546 vm_map_store_init(&copy->cpy_hdr);
19547
19548 copy_entry = vm_map_copy_entry_create(copy, FALSE);
19549 copy_entry->protection = prot;
19550 copy_entry->max_protection = prot;
19551 copy_entry->use_pmap = TRUE;
19552 copy_entry->vme_start = VM_MAP_TRUNC_PAGE(offset, PAGE_MASK);
19553 copy_entry->vme_end = VM_MAP_ROUND_PAGE(offset + size, PAGE_MASK);
19554 VME_OBJECT_SET(copy_entry, object);
19555 VME_OFFSET_SET(copy_entry, vm_object_trunc_page(offset));
19556 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), copy_entry);
19557
19558 named_entry->backing.copy = copy;
19559 named_entry->is_object = TRUE;
19560 if (object->internal) {
19561 named_entry->internal = TRUE;
19562 }
19563
19564 DEBUG4K_MEMENTRY("named_entry %p copy %p object %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, object, offset, size, prot);
19565
19566 return KERN_SUCCESS;
19567 }
19568
19569 vm_object_t
19570 vm_named_entry_to_vm_object(
19571 vm_named_entry_t named_entry)
19572 {
19573 vm_map_copy_t copy;
19574 vm_map_entry_t copy_entry;
19575 vm_object_t object;
19576
19577 assert(!named_entry->is_sub_map);
19578 assert(!named_entry->is_copy);
19579 assert(named_entry->is_object);
19580 copy = named_entry->backing.copy;
19581 assert(copy != VM_MAP_COPY_NULL);
19582 assert(copy->cpy_hdr.nentries == 1);
19583 copy_entry = vm_map_copy_first_entry(copy);
19584 assert(!copy_entry->is_sub_map);
19585 object = VME_OBJECT(copy_entry);
19586
19587 DEBUG4K_MEMENTRY("%p -> %p -> %p [0x%llx 0x%llx 0x%llx 0x%x/0x%x ] -> %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, copy_entry, (uint64_t)copy_entry->vme_start, (uint64_t)copy_entry->vme_end, copy_entry->vme_offset, copy_entry->protection, copy_entry->max_protection, object, named_entry->offset, named_entry->size, named_entry->protection);
19588
19589 return object;
19590 }
19591
19592 /*
19593 * Routine: convert_port_entry_to_map
19594 * Purpose:
19595 * Convert from a port specifying an entry or a task
19596 * to a map. Doesn't consume the port ref; produces a map ref,
19597 * which may be null. Unlike convert_port_to_map, the
19598 * port may be task or a named entry backed.
19599 * Conditions:
19600 * Nothing locked.
19601 */
19602
19603
19604 vm_map_t
19605 convert_port_entry_to_map(
19606 ipc_port_t port)
19607 {
19608 vm_map_t map;
19609 vm_named_entry_t named_entry;
19610 uint32_t try_failed_count = 0;
19611
19612 if (IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
19613 while (TRUE) {
19614 ip_lock(port);
19615 if (ip_active(port) && (ip_kotype(port)
19616 == IKOT_NAMED_ENTRY)) {
19617 named_entry =
19618 (vm_named_entry_t) ip_get_kobject(port);
19619 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
19620 ip_unlock(port);
19621
19622 try_failed_count++;
19623 mutex_pause(try_failed_count);
19624 continue;
19625 }
19626 named_entry->ref_count++;
19627 lck_mtx_unlock(&(named_entry)->Lock);
19628 ip_unlock(port);
19629 if ((named_entry->is_sub_map) &&
19630 (named_entry->protection
19631 & VM_PROT_WRITE)) {
19632 map = named_entry->backing.map;
19633 if (map->pmap != PMAP_NULL) {
19634 if (map->pmap == kernel_pmap) {
19635 panic("userspace has access "
19636 "to a kernel map %p", map);
19637 }
19638 pmap_require(map->pmap);
19639 }
19640 } else {
19641 mach_destroy_memory_entry(port);
19642 return VM_MAP_NULL;
19643 }
19644 vm_map_reference_swap(map);
19645 mach_destroy_memory_entry(port);
19646 break;
19647 } else {
19648 return VM_MAP_NULL;
19649 }
19650 }
19651 } else {
19652 map = convert_port_to_map(port);
19653 }
19654
19655 return map;
19656 }
19657
19658 /*
19659 * Routine: convert_port_entry_to_object
19660 * Purpose:
19661 * Convert from a port specifying a named entry to an
19662 * object. Doesn't consume the port ref; produces a map ref,
19663 * which may be null.
19664 * Conditions:
19665 * Nothing locked.
19666 */
19667
19668
19669 vm_object_t
19670 convert_port_entry_to_object(
19671 ipc_port_t port)
19672 {
19673 vm_object_t object = VM_OBJECT_NULL;
19674 vm_named_entry_t named_entry;
19675 uint32_t try_failed_count = 0;
19676
19677 if (IP_VALID(port) &&
19678 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
19679 try_again:
19680 ip_lock(port);
19681 if (ip_active(port) &&
19682 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
19683 named_entry = (vm_named_entry_t) ip_get_kobject(port);
19684 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
19685 ip_unlock(port);
19686 try_failed_count++;
19687 mutex_pause(try_failed_count);
19688 goto try_again;
19689 }
19690 named_entry->ref_count++;
19691 lck_mtx_unlock(&(named_entry)->Lock);
19692 ip_unlock(port);
19693 if (!(named_entry->is_sub_map) &&
19694 !(named_entry->is_copy) &&
19695 (named_entry->is_object) &&
19696 (named_entry->protection & VM_PROT_WRITE)) {
19697 vm_map_copy_t copy;
19698 vm_map_entry_t copy_entry;
19699
19700 copy = named_entry->backing.copy;
19701 assert(copy->cpy_hdr.nentries == 1);
19702 copy_entry = vm_map_copy_first_entry(copy);
19703 assert(!copy_entry->is_sub_map);
19704 object = VME_OBJECT(copy_entry);
19705 assert(object != VM_OBJECT_NULL);
19706 vm_object_reference(object);
19707 }
19708 mach_destroy_memory_entry(port);
19709 }
19710 }
19711
19712 return object;
19713 }
19714
19715 /*
19716 * Export routines to other components for the things we access locally through
19717 * macros.
19718 */
19719 #undef current_map
19720 vm_map_t
19721 current_map(void)
19722 {
19723 return current_map_fast();
19724 }
19725
19726 /*
19727 * vm_map_reference:
19728 *
19729 * Most code internal to the osfmk will go through a
19730 * macro defining this. This is always here for the
19731 * use of other kernel components.
19732 */
19733 #undef vm_map_reference
19734 void
19735 vm_map_reference(
19736 vm_map_t map)
19737 {
19738 if (map == VM_MAP_NULL) {
19739 return;
19740 }
19741
19742 lck_mtx_lock(&map->s_lock);
19743 #if TASK_SWAPPER
19744 assert(map->res_count > 0);
19745 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
19746 map->res_count++;
19747 #endif
19748 os_ref_retain_locked(&map->map_refcnt);
19749 lck_mtx_unlock(&map->s_lock);
19750 }
19751
19752 /*
19753 * vm_map_deallocate:
19754 *
19755 * Removes a reference from the specified map,
19756 * destroying it if no references remain.
19757 * The map should not be locked.
19758 */
19759 void
19760 vm_map_deallocate(
19761 vm_map_t map)
19762 {
19763 unsigned int ref;
19764
19765 if (map == VM_MAP_NULL) {
19766 return;
19767 }
19768
19769 lck_mtx_lock(&map->s_lock);
19770 ref = os_ref_release_locked(&map->map_refcnt);
19771 if (ref > 0) {
19772 vm_map_res_deallocate(map);
19773 lck_mtx_unlock(&map->s_lock);
19774 return;
19775 }
19776 assert(os_ref_get_count(&map->map_refcnt) == 0);
19777 lck_mtx_unlock(&map->s_lock);
19778
19779 #if TASK_SWAPPER
19780 /*
19781 * The map residence count isn't decremented here because
19782 * the vm_map_delete below will traverse the entire map,
19783 * deleting entries, and the residence counts on objects
19784 * and sharing maps will go away then.
19785 */
19786 #endif
19787
19788 vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
19789 }
19790
19791 void
19792 vm_map_inspect_deallocate(
19793 vm_map_inspect_t map)
19794 {
19795 vm_map_deallocate((vm_map_t)map);
19796 }
19797
19798 void
19799 vm_map_read_deallocate(
19800 vm_map_read_t map)
19801 {
19802 vm_map_deallocate((vm_map_t)map);
19803 }
19804
19805
19806 void
19807 vm_map_disable_NX(vm_map_t map)
19808 {
19809 if (map == NULL) {
19810 return;
19811 }
19812 if (map->pmap == NULL) {
19813 return;
19814 }
19815
19816 pmap_disable_NX(map->pmap);
19817 }
19818
19819 void
19820 vm_map_disallow_data_exec(vm_map_t map)
19821 {
19822 if (map == NULL) {
19823 return;
19824 }
19825
19826 map->map_disallow_data_exec = TRUE;
19827 }
19828
19829 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
19830 * more descriptive.
19831 */
19832 void
19833 vm_map_set_32bit(vm_map_t map)
19834 {
19835 #if defined(__arm__) || defined(__arm64__)
19836 map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
19837 #else
19838 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
19839 #endif
19840 }
19841
19842
19843 void
19844 vm_map_set_64bit(vm_map_t map)
19845 {
19846 #if defined(__arm__) || defined(__arm64__)
19847 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
19848 #else
19849 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
19850 #endif
19851 }
19852
19853 /*
19854 * Expand the maximum size of an existing map to the maximum supported.
19855 */
19856 void
19857 vm_map_set_jumbo(vm_map_t map)
19858 {
19859 #if defined (__arm64__) && !defined(CONFIG_ARROW)
19860 vm_map_set_max_addr(map, ~0);
19861 #else /* arm64 */
19862 (void) map;
19863 #endif
19864 }
19865
19866 /*
19867 * This map has a JIT entitlement
19868 */
19869 void
19870 vm_map_set_jit_entitled(vm_map_t map)
19871 {
19872 #if defined (__arm64__)
19873 pmap_set_jit_entitled(map->pmap);
19874 #else /* arm64 */
19875 (void) map;
19876 #endif
19877 }
19878
19879 /*
19880 * Expand the maximum size of an existing map.
19881 */
19882 void
19883 vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
19884 {
19885 #if defined(__arm64__)
19886 vm_map_offset_t max_supported_offset = 0;
19887 vm_map_offset_t old_max_offset = map->max_offset;
19888 max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
19889
19890 new_max_offset = trunc_page(new_max_offset);
19891
19892 /* The address space cannot be shrunk using this routine. */
19893 if (old_max_offset >= new_max_offset) {
19894 return;
19895 }
19896
19897 if (max_supported_offset < new_max_offset) {
19898 new_max_offset = max_supported_offset;
19899 }
19900
19901 map->max_offset = new_max_offset;
19902
19903 if (map->holes_list->prev->vme_end == old_max_offset) {
19904 /*
19905 * There is already a hole at the end of the map; simply make it bigger.
19906 */
19907 map->holes_list->prev->vme_end = map->max_offset;
19908 } else {
19909 /*
19910 * There is no hole at the end, so we need to create a new hole
19911 * for the new empty space we're creating.
19912 */
19913 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
19914 new_hole->start = old_max_offset;
19915 new_hole->end = map->max_offset;
19916 new_hole->prev = map->holes_list->prev;
19917 new_hole->next = (struct vm_map_entry *)map->holes_list;
19918 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
19919 map->holes_list->prev = (struct vm_map_entry *)new_hole;
19920 }
19921 #else
19922 (void)map;
19923 (void)new_max_offset;
19924 #endif
19925 }
19926
19927 vm_map_offset_t
19928 vm_compute_max_offset(boolean_t is64)
19929 {
19930 #if defined(__arm__) || defined(__arm64__)
19931 return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
19932 #else
19933 return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
19934 #endif
19935 }
19936
19937 void
19938 vm_map_get_max_aslr_slide_section(
19939 vm_map_t map __unused,
19940 int64_t *max_sections,
19941 int64_t *section_size)
19942 {
19943 #if defined(__arm64__)
19944 *max_sections = 3;
19945 *section_size = ARM_TT_TWIG_SIZE;
19946 #else
19947 *max_sections = 1;
19948 *section_size = 0;
19949 #endif
19950 }
19951
19952 uint64_t
19953 vm_map_get_max_aslr_slide_pages(vm_map_t map)
19954 {
19955 #if defined(__arm64__)
19956 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
19957 * limited embedded address space; this is also meant to minimize pmap
19958 * memory usage on 16KB page systems.
19959 */
19960 return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
19961 #else
19962 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
19963 #endif
19964 }
19965
19966 uint64_t
19967 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
19968 {
19969 #if defined(__arm64__)
19970 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
19971 * of independent entropy on 16KB page systems.
19972 */
19973 return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
19974 #else
19975 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
19976 #endif
19977 }
19978
19979 #ifndef __arm__
19980 boolean_t
19981 vm_map_is_64bit(
19982 vm_map_t map)
19983 {
19984 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
19985 }
19986 #endif
19987
19988 boolean_t
19989 vm_map_has_hard_pagezero(
19990 vm_map_t map,
19991 vm_map_offset_t pagezero_size)
19992 {
19993 /*
19994 * XXX FBDP
19995 * We should lock the VM map (for read) here but we can get away
19996 * with it for now because there can't really be any race condition:
19997 * the VM map's min_offset is changed only when the VM map is created
19998 * and when the zero page is established (when the binary gets loaded),
19999 * and this routine gets called only when the task terminates and the
20000 * VM map is being torn down, and when a new map is created via
20001 * load_machfile()/execve().
20002 */
20003 return map->min_offset >= pagezero_size;
20004 }
20005
20006 /*
20007 * Raise a VM map's maximun offset.
20008 */
20009 kern_return_t
20010 vm_map_raise_max_offset(
20011 vm_map_t map,
20012 vm_map_offset_t new_max_offset)
20013 {
20014 kern_return_t ret;
20015
20016 vm_map_lock(map);
20017 ret = KERN_INVALID_ADDRESS;
20018
20019 if (new_max_offset >= map->max_offset) {
20020 if (!vm_map_is_64bit(map)) {
20021 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
20022 map->max_offset = new_max_offset;
20023 ret = KERN_SUCCESS;
20024 }
20025 } else {
20026 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
20027 map->max_offset = new_max_offset;
20028 ret = KERN_SUCCESS;
20029 }
20030 }
20031 }
20032
20033 vm_map_unlock(map);
20034 return ret;
20035 }
20036
20037
20038 /*
20039 * Raise a VM map's minimum offset.
20040 * To strictly enforce "page zero" reservation.
20041 */
20042 kern_return_t
20043 vm_map_raise_min_offset(
20044 vm_map_t map,
20045 vm_map_offset_t new_min_offset)
20046 {
20047 vm_map_entry_t first_entry;
20048
20049 new_min_offset = vm_map_round_page(new_min_offset,
20050 VM_MAP_PAGE_MASK(map));
20051
20052 vm_map_lock(map);
20053
20054 if (new_min_offset < map->min_offset) {
20055 /*
20056 * Can't move min_offset backwards, as that would expose
20057 * a part of the address space that was previously, and for
20058 * possibly good reasons, inaccessible.
20059 */
20060 vm_map_unlock(map);
20061 return KERN_INVALID_ADDRESS;
20062 }
20063 if (new_min_offset >= map->max_offset) {
20064 /* can't go beyond the end of the address space */
20065 vm_map_unlock(map);
20066 return KERN_INVALID_ADDRESS;
20067 }
20068
20069 first_entry = vm_map_first_entry(map);
20070 if (first_entry != vm_map_to_entry(map) &&
20071 first_entry->vme_start < new_min_offset) {
20072 /*
20073 * Some memory was already allocated below the new
20074 * minimun offset. It's too late to change it now...
20075 */
20076 vm_map_unlock(map);
20077 return KERN_NO_SPACE;
20078 }
20079
20080 map->min_offset = new_min_offset;
20081
20082 assert(map->holes_list);
20083 map->holes_list->start = new_min_offset;
20084 assert(new_min_offset < map->holes_list->end);
20085
20086 vm_map_unlock(map);
20087
20088 return KERN_SUCCESS;
20089 }
20090
20091 /*
20092 * Set the limit on the maximum amount of user wired memory allowed for this map.
20093 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
20094 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
20095 * don't have to reach over to the BSD data structures.
20096 */
20097
20098 void
20099 vm_map_set_user_wire_limit(vm_map_t map,
20100 vm_size_t limit)
20101 {
20102 map->user_wire_limit = limit;
20103 }
20104
20105
20106 void
20107 vm_map_switch_protect(vm_map_t map,
20108 boolean_t val)
20109 {
20110 vm_map_lock(map);
20111 map->switch_protect = val;
20112 vm_map_unlock(map);
20113 }
20114
20115 extern int cs_process_enforcement_enable;
20116 boolean_t
20117 vm_map_cs_enforcement(
20118 vm_map_t map)
20119 {
20120 if (cs_process_enforcement_enable) {
20121 return TRUE;
20122 }
20123 return map->cs_enforcement;
20124 }
20125
20126 void
20127 vm_map_cs_enforcement_set(
20128 vm_map_t map,
20129 boolean_t val)
20130 {
20131 vm_map_lock(map);
20132 map->cs_enforcement = val;
20133 pmap_set_vm_map_cs_enforced(map->pmap, val);
20134 vm_map_unlock(map);
20135 }
20136
20137 /*
20138 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
20139 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
20140 * bump both counters.
20141 */
20142 void
20143 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
20144 {
20145 pmap_t pmap = vm_map_pmap(map);
20146
20147 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20148 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20149 }
20150
20151 void
20152 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
20153 {
20154 pmap_t pmap = vm_map_pmap(map);
20155
20156 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20157 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20158 }
20159
20160 /* Add (generate) code signature for memory range */
20161 #if CONFIG_DYNAMIC_CODE_SIGNING
20162 kern_return_t
20163 vm_map_sign(vm_map_t map,
20164 vm_map_offset_t start,
20165 vm_map_offset_t end)
20166 {
20167 vm_map_entry_t entry;
20168 vm_page_t m;
20169 vm_object_t object;
20170
20171 /*
20172 * Vet all the input parameters and current type and state of the
20173 * underlaying object. Return with an error if anything is amiss.
20174 */
20175 if (map == VM_MAP_NULL) {
20176 return KERN_INVALID_ARGUMENT;
20177 }
20178
20179 vm_map_lock_read(map);
20180
20181 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
20182 /*
20183 * Must pass a valid non-submap address.
20184 */
20185 vm_map_unlock_read(map);
20186 return KERN_INVALID_ADDRESS;
20187 }
20188
20189 if ((entry->vme_start > start) || (entry->vme_end < end)) {
20190 /*
20191 * Map entry doesn't cover the requested range. Not handling
20192 * this situation currently.
20193 */
20194 vm_map_unlock_read(map);
20195 return KERN_INVALID_ARGUMENT;
20196 }
20197
20198 object = VME_OBJECT(entry);
20199 if (object == VM_OBJECT_NULL) {
20200 /*
20201 * Object must already be present or we can't sign.
20202 */
20203 vm_map_unlock_read(map);
20204 return KERN_INVALID_ARGUMENT;
20205 }
20206
20207 vm_object_lock(object);
20208 vm_map_unlock_read(map);
20209
20210 while (start < end) {
20211 uint32_t refmod;
20212
20213 m = vm_page_lookup(object,
20214 start - entry->vme_start + VME_OFFSET(entry));
20215 if (m == VM_PAGE_NULL) {
20216 /* shoud we try to fault a page here? we can probably
20217 * demand it exists and is locked for this request */
20218 vm_object_unlock(object);
20219 return KERN_FAILURE;
20220 }
20221 /* deal with special page status */
20222 if (m->vmp_busy ||
20223 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
20224 vm_object_unlock(object);
20225 return KERN_FAILURE;
20226 }
20227
20228 /* Page is OK... now "validate" it */
20229 /* This is the place where we'll call out to create a code
20230 * directory, later */
20231 /* XXX TODO4K: deal with 4k subpages individually? */
20232 m->vmp_cs_validated = VMP_CS_ALL_TRUE;
20233
20234 /* The page is now "clean" for codesigning purposes. That means
20235 * we don't consider it as modified (wpmapped) anymore. But
20236 * we'll disconnect the page so we note any future modification
20237 * attempts. */
20238 m->vmp_wpmapped = FALSE;
20239 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
20240
20241 /* Pull the dirty status from the pmap, since we cleared the
20242 * wpmapped bit */
20243 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
20244 SET_PAGE_DIRTY(m, FALSE);
20245 }
20246
20247 /* On to the next page */
20248 start += PAGE_SIZE;
20249 }
20250 vm_object_unlock(object);
20251
20252 return KERN_SUCCESS;
20253 }
20254 #endif
20255
20256 kern_return_t
20257 vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
20258 {
20259 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
20260 vm_map_entry_t next_entry;
20261 kern_return_t kr = KERN_SUCCESS;
20262 vm_map_t zap_map;
20263
20264 vm_map_lock(map);
20265
20266 /*
20267 * We use a "zap_map" to avoid having to unlock
20268 * the "map" in vm_map_delete().
20269 */
20270 zap_map = vm_map_create(PMAP_NULL,
20271 map->min_offset,
20272 map->max_offset,
20273 map->hdr.entries_pageable);
20274
20275 if (zap_map == VM_MAP_NULL) {
20276 return KERN_RESOURCE_SHORTAGE;
20277 }
20278
20279 vm_map_set_page_shift(zap_map,
20280 VM_MAP_PAGE_SHIFT(map));
20281 vm_map_disable_hole_optimization(zap_map);
20282
20283 for (entry = vm_map_first_entry(map);
20284 entry != vm_map_to_entry(map);
20285 entry = next_entry) {
20286 next_entry = entry->vme_next;
20287
20288 if (VME_OBJECT(entry) &&
20289 !entry->is_sub_map &&
20290 (VME_OBJECT(entry)->internal == TRUE) &&
20291 (VME_OBJECT(entry)->ref_count == 1)) {
20292 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
20293 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
20294
20295 (void)vm_map_delete(map,
20296 entry->vme_start,
20297 entry->vme_end,
20298 VM_MAP_REMOVE_SAVE_ENTRIES,
20299 zap_map);
20300 }
20301 }
20302
20303 vm_map_unlock(map);
20304
20305 /*
20306 * Get rid of the "zap_maps" and all the map entries that
20307 * they may still contain.
20308 */
20309 if (zap_map != VM_MAP_NULL) {
20310 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
20311 zap_map = VM_MAP_NULL;
20312 }
20313
20314 return kr;
20315 }
20316
20317
20318 #if DEVELOPMENT || DEBUG
20319
20320 int
20321 vm_map_disconnect_page_mappings(
20322 vm_map_t map,
20323 boolean_t do_unnest)
20324 {
20325 vm_map_entry_t entry;
20326 int page_count = 0;
20327
20328 if (do_unnest == TRUE) {
20329 #ifndef NO_NESTED_PMAP
20330 vm_map_lock(map);
20331
20332 for (entry = vm_map_first_entry(map);
20333 entry != vm_map_to_entry(map);
20334 entry = entry->vme_next) {
20335 if (entry->is_sub_map && entry->use_pmap) {
20336 /*
20337 * Make sure the range between the start of this entry and
20338 * the end of this entry is no longer nested, so that
20339 * we will only remove mappings from the pmap in use by this
20340 * this task
20341 */
20342 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
20343 }
20344 }
20345 vm_map_unlock(map);
20346 #endif
20347 }
20348 vm_map_lock_read(map);
20349
20350 page_count = map->pmap->stats.resident_count;
20351
20352 for (entry = vm_map_first_entry(map);
20353 entry != vm_map_to_entry(map);
20354 entry = entry->vme_next) {
20355 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
20356 (VME_OBJECT(entry)->phys_contiguous))) {
20357 continue;
20358 }
20359 if (entry->is_sub_map) {
20360 assert(!entry->use_pmap);
20361 }
20362
20363 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
20364 }
20365 vm_map_unlock_read(map);
20366
20367 return page_count;
20368 }
20369
20370 kern_return_t
20371 vm_map_inject_error(vm_map_t map, vm_map_offset_t vaddr)
20372 {
20373 vm_object_t object = NULL;
20374 vm_object_offset_t offset;
20375 vm_prot_t prot;
20376 boolean_t wired;
20377 vm_map_version_t version;
20378 vm_map_t real_map;
20379 int result = KERN_FAILURE;
20380
20381 vaddr = vm_map_trunc_page(vaddr, PAGE_MASK);
20382 vm_map_lock(map);
20383
20384 result = vm_map_lookup_locked(&map, vaddr, VM_PROT_READ,
20385 OBJECT_LOCK_EXCLUSIVE, &version, &object, &offset, &prot, &wired,
20386 NULL, &real_map, NULL);
20387 if (object == NULL) {
20388 result = KERN_MEMORY_ERROR;
20389 } else if (object->pager) {
20390 result = vm_compressor_pager_inject_error(object->pager,
20391 offset);
20392 } else {
20393 result = KERN_MEMORY_PRESENT;
20394 }
20395
20396 if (object != NULL) {
20397 vm_object_unlock(object);
20398 }
20399
20400 if (real_map != map) {
20401 vm_map_unlock(real_map);
20402 }
20403 vm_map_unlock(map);
20404
20405 return result;
20406 }
20407
20408 #endif
20409
20410
20411 #if CONFIG_FREEZE
20412
20413
20414 extern struct freezer_context freezer_context_global;
20415 AbsoluteTime c_freezer_last_yield_ts = 0;
20416
20417 extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
20418 extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
20419
20420 kern_return_t
20421 vm_map_freeze(
20422 task_t task,
20423 unsigned int *purgeable_count,
20424 unsigned int *wired_count,
20425 unsigned int *clean_count,
20426 unsigned int *dirty_count,
20427 unsigned int dirty_budget,
20428 unsigned int *shared_count,
20429 int *freezer_error_code,
20430 boolean_t eval_only)
20431 {
20432 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
20433 kern_return_t kr = KERN_SUCCESS;
20434 boolean_t evaluation_phase = TRUE;
20435 vm_object_t cur_shared_object = NULL;
20436 int cur_shared_obj_ref_cnt = 0;
20437 unsigned int dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
20438
20439 *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
20440
20441 /*
20442 * We need the exclusive lock here so that we can
20443 * block any page faults or lookups while we are
20444 * in the middle of freezing this vm map.
20445 */
20446 vm_map_t map = task->map;
20447
20448 vm_map_lock(map);
20449
20450 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
20451
20452 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20453 if (vm_compressor_low_on_space()) {
20454 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20455 }
20456
20457 if (vm_swap_low_on_space()) {
20458 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20459 }
20460
20461 kr = KERN_NO_SPACE;
20462 goto done;
20463 }
20464
20465 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
20466 /*
20467 * In-memory compressor backing the freezer. No disk.
20468 * So no need to do the evaluation phase.
20469 */
20470 evaluation_phase = FALSE;
20471
20472 if (eval_only == TRUE) {
20473 /*
20474 * We don't support 'eval_only' mode
20475 * in this non-swap config.
20476 */
20477 *freezer_error_code = FREEZER_ERROR_GENERIC;
20478 kr = KERN_INVALID_ARGUMENT;
20479 goto done;
20480 }
20481
20482 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
20483 clock_get_uptime(&c_freezer_last_yield_ts);
20484 }
20485 again:
20486
20487 for (entry2 = vm_map_first_entry(map);
20488 entry2 != vm_map_to_entry(map);
20489 entry2 = entry2->vme_next) {
20490 vm_object_t src_object = VME_OBJECT(entry2);
20491
20492 if (src_object &&
20493 !entry2->is_sub_map &&
20494 !src_object->phys_contiguous) {
20495 /* If eligible, scan the entry, moving eligible pages over to our parent object */
20496
20497 if (src_object->internal == TRUE) {
20498 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
20499 /*
20500 * We skip purgeable objects during evaluation phase only.
20501 * If we decide to freeze this process, we'll explicitly
20502 * purge these objects before we go around again with
20503 * 'evaluation_phase' set to FALSE.
20504 */
20505
20506 if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
20507 /*
20508 * We want to purge objects that may not belong to this task but are mapped
20509 * in this task alone. Since we already purged this task's purgeable memory
20510 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
20511 * on this task's purgeable objects. Hence the check for only volatile objects.
20512 */
20513 if (evaluation_phase == FALSE &&
20514 (src_object->purgable == VM_PURGABLE_VOLATILE) &&
20515 (src_object->ref_count == 1)) {
20516 vm_object_lock(src_object);
20517 vm_object_purge(src_object, 0);
20518 vm_object_unlock(src_object);
20519 }
20520 continue;
20521 }
20522
20523 /*
20524 * Pages belonging to this object could be swapped to disk.
20525 * Make sure it's not a shared object because we could end
20526 * up just bringing it back in again.
20527 *
20528 * We try to optimize somewhat by checking for objects that are mapped
20529 * more than once within our own map. But we don't do full searches,
20530 * we just look at the entries following our current entry.
20531 */
20532
20533 if (src_object->ref_count > 1) {
20534 if (src_object != cur_shared_object) {
20535 obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20536 dirty_shared_count += obj_pages_snapshot;
20537
20538 cur_shared_object = src_object;
20539 cur_shared_obj_ref_cnt = 1;
20540 continue;
20541 } else {
20542 cur_shared_obj_ref_cnt++;
20543 if (src_object->ref_count == cur_shared_obj_ref_cnt) {
20544 /*
20545 * Fall through to below and treat this object as private.
20546 * So deduct its pages from our shared total and add it to the
20547 * private total.
20548 */
20549
20550 dirty_shared_count -= obj_pages_snapshot;
20551 dirty_private_count += obj_pages_snapshot;
20552 } else {
20553 continue;
20554 }
20555 }
20556 }
20557
20558
20559 if (src_object->ref_count == 1) {
20560 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20561 }
20562
20563 if (evaluation_phase == TRUE) {
20564 continue;
20565 }
20566 }
20567
20568 uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
20569 *wired_count += src_object->wired_page_count;
20570
20571 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20572 if (vm_compressor_low_on_space()) {
20573 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20574 }
20575
20576 if (vm_swap_low_on_space()) {
20577 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20578 }
20579
20580 kr = KERN_NO_SPACE;
20581 break;
20582 }
20583 if (paged_out_count >= dirty_budget) {
20584 break;
20585 }
20586 dirty_budget -= paged_out_count;
20587 }
20588 }
20589 }
20590
20591 *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
20592 if (evaluation_phase) {
20593 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
20594
20595 if (dirty_shared_count > shared_pages_threshold) {
20596 *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
20597 kr = KERN_FAILURE;
20598 goto done;
20599 }
20600
20601 if (dirty_shared_count &&
20602 ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
20603 *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
20604 kr = KERN_FAILURE;
20605 goto done;
20606 }
20607
20608 evaluation_phase = FALSE;
20609 dirty_shared_count = dirty_private_count = 0;
20610
20611 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
20612 clock_get_uptime(&c_freezer_last_yield_ts);
20613
20614 if (eval_only) {
20615 kr = KERN_SUCCESS;
20616 goto done;
20617 }
20618
20619 vm_purgeable_purge_task_owned(task);
20620
20621 goto again;
20622 } else {
20623 kr = KERN_SUCCESS;
20624 }
20625
20626 done:
20627 vm_map_unlock(map);
20628
20629 if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
20630 vm_object_compressed_freezer_done();
20631 }
20632 return kr;
20633 }
20634
20635 #endif
20636
20637 /*
20638 * vm_map_entry_should_cow_for_true_share:
20639 *
20640 * Determines if the map entry should be clipped and setup for copy-on-write
20641 * to avoid applying "true_share" to a large VM object when only a subset is
20642 * targeted.
20643 *
20644 * For now, we target only the map entries created for the Objective C
20645 * Garbage Collector, which initially have the following properties:
20646 * - alias == VM_MEMORY_MALLOC
20647 * - wired_count == 0
20648 * - !needs_copy
20649 * and a VM object with:
20650 * - internal
20651 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
20652 * - !true_share
20653 * - vo_size == ANON_CHUNK_SIZE
20654 *
20655 * Only non-kernel map entries.
20656 */
20657 boolean_t
20658 vm_map_entry_should_cow_for_true_share(
20659 vm_map_entry_t entry)
20660 {
20661 vm_object_t object;
20662
20663 if (entry->is_sub_map) {
20664 /* entry does not point at a VM object */
20665 return FALSE;
20666 }
20667
20668 if (entry->needs_copy) {
20669 /* already set for copy_on_write: done! */
20670 return FALSE;
20671 }
20672
20673 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
20674 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
20675 /* not a malloc heap or Obj-C Garbage Collector heap */
20676 return FALSE;
20677 }
20678
20679 if (entry->wired_count) {
20680 /* wired: can't change the map entry... */
20681 vm_counters.should_cow_but_wired++;
20682 return FALSE;
20683 }
20684
20685 object = VME_OBJECT(entry);
20686
20687 if (object == VM_OBJECT_NULL) {
20688 /* no object yet... */
20689 return FALSE;
20690 }
20691
20692 if (!object->internal) {
20693 /* not an internal object */
20694 return FALSE;
20695 }
20696
20697 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
20698 /* not the default copy strategy */
20699 return FALSE;
20700 }
20701
20702 if (object->true_share) {
20703 /* already true_share: too late to avoid it */
20704 return FALSE;
20705 }
20706
20707 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
20708 object->vo_size != ANON_CHUNK_SIZE) {
20709 /* ... not an object created for the ObjC Garbage Collector */
20710 return FALSE;
20711 }
20712
20713 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
20714 object->vo_size != 2048 * 4096) {
20715 /* ... not a "MALLOC_SMALL" heap */
20716 return FALSE;
20717 }
20718
20719 /*
20720 * All the criteria match: we have a large object being targeted for "true_share".
20721 * To limit the adverse side-effects linked with "true_share", tell the caller to
20722 * try and avoid setting up the entire object for "true_share" by clipping the
20723 * targeted range and setting it up for copy-on-write.
20724 */
20725 return TRUE;
20726 }
20727
20728 vm_map_offset_t
20729 vm_map_round_page_mask(
20730 vm_map_offset_t offset,
20731 vm_map_offset_t mask)
20732 {
20733 return VM_MAP_ROUND_PAGE(offset, mask);
20734 }
20735
20736 vm_map_offset_t
20737 vm_map_trunc_page_mask(
20738 vm_map_offset_t offset,
20739 vm_map_offset_t mask)
20740 {
20741 return VM_MAP_TRUNC_PAGE(offset, mask);
20742 }
20743
20744 boolean_t
20745 vm_map_page_aligned(
20746 vm_map_offset_t offset,
20747 vm_map_offset_t mask)
20748 {
20749 return ((offset) & mask) == 0;
20750 }
20751
20752 int
20753 vm_map_page_shift(
20754 vm_map_t map)
20755 {
20756 return VM_MAP_PAGE_SHIFT(map);
20757 }
20758
20759 int
20760 vm_map_page_size(
20761 vm_map_t map)
20762 {
20763 return VM_MAP_PAGE_SIZE(map);
20764 }
20765
20766 vm_map_offset_t
20767 vm_map_page_mask(
20768 vm_map_t map)
20769 {
20770 return VM_MAP_PAGE_MASK(map);
20771 }
20772
20773 kern_return_t
20774 vm_map_set_page_shift(
20775 vm_map_t map,
20776 int pageshift)
20777 {
20778 if (map->hdr.nentries != 0) {
20779 /* too late to change page size */
20780 return KERN_FAILURE;
20781 }
20782
20783 map->hdr.page_shift = pageshift;
20784
20785 return KERN_SUCCESS;
20786 }
20787
20788 kern_return_t
20789 vm_map_query_volatile(
20790 vm_map_t map,
20791 mach_vm_size_t *volatile_virtual_size_p,
20792 mach_vm_size_t *volatile_resident_size_p,
20793 mach_vm_size_t *volatile_compressed_size_p,
20794 mach_vm_size_t *volatile_pmap_size_p,
20795 mach_vm_size_t *volatile_compressed_pmap_size_p)
20796 {
20797 mach_vm_size_t volatile_virtual_size;
20798 mach_vm_size_t volatile_resident_count;
20799 mach_vm_size_t volatile_compressed_count;
20800 mach_vm_size_t volatile_pmap_count;
20801 mach_vm_size_t volatile_compressed_pmap_count;
20802 mach_vm_size_t resident_count;
20803 vm_map_entry_t entry;
20804 vm_object_t object;
20805
20806 /* map should be locked by caller */
20807
20808 volatile_virtual_size = 0;
20809 volatile_resident_count = 0;
20810 volatile_compressed_count = 0;
20811 volatile_pmap_count = 0;
20812 volatile_compressed_pmap_count = 0;
20813
20814 for (entry = vm_map_first_entry(map);
20815 entry != vm_map_to_entry(map);
20816 entry = entry->vme_next) {
20817 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
20818
20819 if (entry->is_sub_map) {
20820 continue;
20821 }
20822 if (!(entry->protection & VM_PROT_WRITE)) {
20823 continue;
20824 }
20825 object = VME_OBJECT(entry);
20826 if (object == VM_OBJECT_NULL) {
20827 continue;
20828 }
20829 if (object->purgable != VM_PURGABLE_VOLATILE &&
20830 object->purgable != VM_PURGABLE_EMPTY) {
20831 continue;
20832 }
20833 if (VME_OFFSET(entry)) {
20834 /*
20835 * If the map entry has been split and the object now
20836 * appears several times in the VM map, we don't want
20837 * to count the object's resident_page_count more than
20838 * once. We count it only for the first one, starting
20839 * at offset 0 and ignore the other VM map entries.
20840 */
20841 continue;
20842 }
20843 resident_count = object->resident_page_count;
20844 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
20845 resident_count = 0;
20846 } else {
20847 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
20848 }
20849
20850 volatile_virtual_size += entry->vme_end - entry->vme_start;
20851 volatile_resident_count += resident_count;
20852 if (object->pager) {
20853 volatile_compressed_count +=
20854 vm_compressor_pager_get_count(object->pager);
20855 }
20856 pmap_compressed_bytes = 0;
20857 pmap_resident_bytes =
20858 pmap_query_resident(map->pmap,
20859 entry->vme_start,
20860 entry->vme_end,
20861 &pmap_compressed_bytes);
20862 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
20863 volatile_compressed_pmap_count += (pmap_compressed_bytes
20864 / PAGE_SIZE);
20865 }
20866
20867 /* map is still locked on return */
20868
20869 *volatile_virtual_size_p = volatile_virtual_size;
20870 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
20871 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
20872 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
20873 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
20874
20875 return KERN_SUCCESS;
20876 }
20877
20878 void
20879 vm_map_sizes(vm_map_t map,
20880 vm_map_size_t * psize,
20881 vm_map_size_t * pfree,
20882 vm_map_size_t * plargest_free)
20883 {
20884 vm_map_entry_t entry;
20885 vm_map_offset_t prev;
20886 vm_map_size_t free, total_free, largest_free;
20887 boolean_t end;
20888
20889 if (!map) {
20890 *psize = *pfree = *plargest_free = 0;
20891 return;
20892 }
20893 total_free = largest_free = 0;
20894
20895 vm_map_lock_read(map);
20896 if (psize) {
20897 *psize = map->max_offset - map->min_offset;
20898 }
20899
20900 prev = map->min_offset;
20901 for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
20902 end = (entry == vm_map_to_entry(map));
20903
20904 if (end) {
20905 free = entry->vme_end - prev;
20906 } else {
20907 free = entry->vme_start - prev;
20908 }
20909
20910 total_free += free;
20911 if (free > largest_free) {
20912 largest_free = free;
20913 }
20914
20915 if (end) {
20916 break;
20917 }
20918 prev = entry->vme_end;
20919 }
20920 vm_map_unlock_read(map);
20921 if (pfree) {
20922 *pfree = total_free;
20923 }
20924 if (plargest_free) {
20925 *plargest_free = largest_free;
20926 }
20927 }
20928
20929 #if VM_SCAN_FOR_SHADOW_CHAIN
20930 int vm_map_shadow_max(vm_map_t map);
20931 int
20932 vm_map_shadow_max(
20933 vm_map_t map)
20934 {
20935 int shadows, shadows_max;
20936 vm_map_entry_t entry;
20937 vm_object_t object, next_object;
20938
20939 if (map == NULL) {
20940 return 0;
20941 }
20942
20943 shadows_max = 0;
20944
20945 vm_map_lock_read(map);
20946
20947 for (entry = vm_map_first_entry(map);
20948 entry != vm_map_to_entry(map);
20949 entry = entry->vme_next) {
20950 if (entry->is_sub_map) {
20951 continue;
20952 }
20953 object = VME_OBJECT(entry);
20954 if (object == NULL) {
20955 continue;
20956 }
20957 vm_object_lock_shared(object);
20958 for (shadows = 0;
20959 object->shadow != NULL;
20960 shadows++, object = next_object) {
20961 next_object = object->shadow;
20962 vm_object_lock_shared(next_object);
20963 vm_object_unlock(object);
20964 }
20965 vm_object_unlock(object);
20966 if (shadows > shadows_max) {
20967 shadows_max = shadows;
20968 }
20969 }
20970
20971 vm_map_unlock_read(map);
20972
20973 return shadows_max;
20974 }
20975 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
20976
20977 void
20978 vm_commit_pagezero_status(vm_map_t lmap)
20979 {
20980 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
20981 }
20982
20983 #if XNU_TARGET_OS_OSX
20984 void
20985 vm_map_set_high_start(
20986 vm_map_t map,
20987 vm_map_offset_t high_start)
20988 {
20989 map->vmmap_high_start = high_start;
20990 }
20991 #endif /* XNU_TARGET_OS_OSX */
20992
20993
20994 /*
20995 * FORKED CORPSE FOOTPRINT
20996 *
20997 * A forked corpse gets a copy of the original VM map but its pmap is mostly
20998 * empty since it never ran and never got to fault in any pages.
20999 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
21000 * a forked corpse would therefore return very little information.
21001 *
21002 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
21003 * to vm_map_fork() to collect footprint information from the original VM map
21004 * and its pmap, and store it in the forked corpse's VM map. That information
21005 * is stored in place of the VM map's "hole list" since we'll never need to
21006 * lookup for holes in the corpse's map.
21007 *
21008 * The corpse's footprint info looks like this:
21009 *
21010 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
21011 * as follows:
21012 * +---------------------------------------+
21013 * header-> | cf_size |
21014 * +-------------------+-------------------+
21015 * | cf_last_region | cf_last_zeroes |
21016 * +-------------------+-------------------+
21017 * region1-> | cfr_vaddr |
21018 * +-------------------+-------------------+
21019 * | cfr_num_pages | d0 | d1 | d2 | d3 |
21020 * +---------------------------------------+
21021 * | d4 | d5 | ... |
21022 * +---------------------------------------+
21023 * | ... |
21024 * +-------------------+-------------------+
21025 * | dy | dz | na | na | cfr_vaddr... | <-region2
21026 * +-------------------+-------------------+
21027 * | cfr_vaddr (ctd) | cfr_num_pages |
21028 * +---------------------------------------+
21029 * | d0 | d1 ... |
21030 * +---------------------------------------+
21031 * ...
21032 * +---------------------------------------+
21033 * last region-> | cfr_vaddr |
21034 * +---------------------------------------+
21035 * + cfr_num_pages | d0 | d1 | d2 | d3 |
21036 * +---------------------------------------+
21037 * ...
21038 * +---------------------------------------+
21039 * | dx | dy | dz | na | na | na | na | na |
21040 * +---------------------------------------+
21041 *
21042 * where:
21043 * cf_size: total size of the buffer (rounded to page size)
21044 * cf_last_region: offset in the buffer of the last "region" sub-header
21045 * cf_last_zeroes: number of trailing "zero" dispositions at the end
21046 * of last region
21047 * cfr_vaddr: virtual address of the start of the covered "region"
21048 * cfr_num_pages: number of pages in the covered "region"
21049 * d*: disposition of the page at that virtual address
21050 * Regions in the buffer are word-aligned.
21051 *
21052 * We estimate the size of the buffer based on the number of memory regions
21053 * and the virtual size of the address space. While copying each memory region
21054 * during vm_map_fork(), we also collect the footprint info for that region
21055 * and store it in the buffer, packing it as much as possible (coalescing
21056 * contiguous memory regions to avoid having too many region headers and
21057 * avoiding long streaks of "zero" page dispositions by splitting footprint
21058 * "regions", so the number of regions in the footprint buffer might not match
21059 * the number of memory regions in the address space.
21060 *
21061 * We also have to copy the original task's "nonvolatile" ledgers since that's
21062 * part of the footprint and will need to be reported to any tool asking for
21063 * the footprint information of the forked corpse.
21064 */
21065
21066 uint64_t vm_map_corpse_footprint_count = 0;
21067 uint64_t vm_map_corpse_footprint_size_avg = 0;
21068 uint64_t vm_map_corpse_footprint_size_max = 0;
21069 uint64_t vm_map_corpse_footprint_full = 0;
21070 uint64_t vm_map_corpse_footprint_no_buf = 0;
21071
21072 struct vm_map_corpse_footprint_header {
21073 vm_size_t cf_size; /* allocated buffer size */
21074 uint32_t cf_last_region; /* offset of last region in buffer */
21075 union {
21076 uint32_t cfu_last_zeroes; /* during creation:
21077 * number of "zero" dispositions at
21078 * end of last region */
21079 uint32_t cfu_hint_region; /* during lookup:
21080 * offset of last looked up region */
21081 #define cf_last_zeroes cfu.cfu_last_zeroes
21082 #define cf_hint_region cfu.cfu_hint_region
21083 } cfu;
21084 };
21085 typedef uint8_t cf_disp_t;
21086 struct vm_map_corpse_footprint_region {
21087 vm_map_offset_t cfr_vaddr; /* region start virtual address */
21088 uint32_t cfr_num_pages; /* number of pages in this "region" */
21089 cf_disp_t cfr_disposition[0]; /* disposition of each page */
21090 } __attribute__((packed));
21091
21092 static cf_disp_t
21093 vm_page_disposition_to_cf_disp(
21094 int disposition)
21095 {
21096 assert(sizeof(cf_disp_t) == 1);
21097 /* relocate bits that don't fit in a "uint8_t" */
21098 if (disposition & VM_PAGE_QUERY_PAGE_REUSABLE) {
21099 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
21100 }
21101 /* cast gets rid of extra bits */
21102 return (cf_disp_t) disposition;
21103 }
21104
21105 static int
21106 vm_page_cf_disp_to_disposition(
21107 cf_disp_t cf_disp)
21108 {
21109 int disposition;
21110
21111 assert(sizeof(cf_disp_t) == 1);
21112 disposition = (int) cf_disp;
21113 /* move relocated bits back in place */
21114 if (cf_disp & VM_PAGE_QUERY_PAGE_FICTITIOUS) {
21115 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
21116 disposition &= ~VM_PAGE_QUERY_PAGE_FICTITIOUS;
21117 }
21118 return disposition;
21119 }
21120
21121 /*
21122 * vm_map_corpse_footprint_new_region:
21123 * closes the current footprint "region" and creates a new one
21124 *
21125 * Returns NULL if there's not enough space in the buffer for a new region.
21126 */
21127 static struct vm_map_corpse_footprint_region *
21128 vm_map_corpse_footprint_new_region(
21129 struct vm_map_corpse_footprint_header *footprint_header)
21130 {
21131 uintptr_t footprint_edge;
21132 uint32_t new_region_offset;
21133 struct vm_map_corpse_footprint_region *footprint_region;
21134 struct vm_map_corpse_footprint_region *new_footprint_region;
21135
21136 footprint_edge = ((uintptr_t)footprint_header +
21137 footprint_header->cf_size);
21138 footprint_region = ((struct vm_map_corpse_footprint_region *)
21139 ((char *)footprint_header +
21140 footprint_header->cf_last_region));
21141 assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
21142 footprint_edge);
21143
21144 /* get rid of trailing zeroes in the last region */
21145 assert(footprint_region->cfr_num_pages >=
21146 footprint_header->cf_last_zeroes);
21147 footprint_region->cfr_num_pages -=
21148 footprint_header->cf_last_zeroes;
21149 footprint_header->cf_last_zeroes = 0;
21150
21151 /* reuse this region if it's now empty */
21152 if (footprint_region->cfr_num_pages == 0) {
21153 return footprint_region;
21154 }
21155
21156 /* compute offset of new region */
21157 new_region_offset = footprint_header->cf_last_region;
21158 new_region_offset += sizeof(*footprint_region);
21159 new_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
21160 new_region_offset = roundup(new_region_offset, sizeof(int));
21161
21162 /* check if we're going over the edge */
21163 if (((uintptr_t)footprint_header +
21164 new_region_offset +
21165 sizeof(*footprint_region)) >=
21166 footprint_edge) {
21167 /* over the edge: no new region */
21168 return NULL;
21169 }
21170
21171 /* adjust offset of last region in header */
21172 footprint_header->cf_last_region = new_region_offset;
21173
21174 new_footprint_region = (struct vm_map_corpse_footprint_region *)
21175 ((char *)footprint_header +
21176 footprint_header->cf_last_region);
21177 new_footprint_region->cfr_vaddr = 0;
21178 new_footprint_region->cfr_num_pages = 0;
21179 /* caller needs to initialize new region */
21180
21181 return new_footprint_region;
21182 }
21183
21184 /*
21185 * vm_map_corpse_footprint_collect:
21186 * collect footprint information for "old_entry" in "old_map" and
21187 * stores it in "new_map"'s vmmap_footprint_info.
21188 */
21189 kern_return_t
21190 vm_map_corpse_footprint_collect(
21191 vm_map_t old_map,
21192 vm_map_entry_t old_entry,
21193 vm_map_t new_map)
21194 {
21195 vm_map_offset_t va;
21196 kern_return_t kr;
21197 struct vm_map_corpse_footprint_header *footprint_header;
21198 struct vm_map_corpse_footprint_region *footprint_region;
21199 struct vm_map_corpse_footprint_region *new_footprint_region;
21200 cf_disp_t *next_disp_p;
21201 uintptr_t footprint_edge;
21202 uint32_t num_pages_tmp;
21203 int effective_page_size;
21204
21205 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(old_map));
21206
21207 va = old_entry->vme_start;
21208
21209 vm_map_lock_assert_exclusive(old_map);
21210 vm_map_lock_assert_exclusive(new_map);
21211
21212 assert(new_map->has_corpse_footprint);
21213 assert(!old_map->has_corpse_footprint);
21214 if (!new_map->has_corpse_footprint ||
21215 old_map->has_corpse_footprint) {
21216 /*
21217 * This can only transfer footprint info from a
21218 * map with a live pmap to a map with a corpse footprint.
21219 */
21220 return KERN_NOT_SUPPORTED;
21221 }
21222
21223 if (new_map->vmmap_corpse_footprint == NULL) {
21224 vm_offset_t buf;
21225 vm_size_t buf_size;
21226
21227 buf = 0;
21228 buf_size = (sizeof(*footprint_header) +
21229 (old_map->hdr.nentries
21230 *
21231 (sizeof(*footprint_region) +
21232 +3)) /* potential alignment for each region */
21233 +
21234 ((old_map->size / effective_page_size)
21235 *
21236 sizeof(cf_disp_t))); /* disposition for each page */
21237 // printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
21238 buf_size = round_page(buf_size);
21239
21240 /* limit buffer to 1 page to validate overflow detection */
21241 // buf_size = PAGE_SIZE;
21242
21243 /* limit size to a somewhat sane amount */
21244 #if XNU_TARGET_OS_OSX
21245 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
21246 #else /* XNU_TARGET_OS_OSX */
21247 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
21248 #endif /* XNU_TARGET_OS_OSX */
21249 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
21250 buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
21251 }
21252
21253 /*
21254 * Allocate the pageable buffer (with a trailing guard page).
21255 * It will be zero-filled on demand.
21256 */
21257 kr = kernel_memory_allocate(kernel_map,
21258 &buf,
21259 (buf_size
21260 + PAGE_SIZE), /* trailing guard page */
21261 0, /* mask */
21262 KMA_PAGEABLE | KMA_GUARD_LAST,
21263 VM_KERN_MEMORY_DIAG);
21264 if (kr != KERN_SUCCESS) {
21265 vm_map_corpse_footprint_no_buf++;
21266 return kr;
21267 }
21268
21269 /* initialize header and 1st region */
21270 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
21271 new_map->vmmap_corpse_footprint = footprint_header;
21272
21273 footprint_header->cf_size = buf_size;
21274 footprint_header->cf_last_region =
21275 sizeof(*footprint_header);
21276 footprint_header->cf_last_zeroes = 0;
21277
21278 footprint_region = (struct vm_map_corpse_footprint_region *)
21279 ((char *)footprint_header +
21280 footprint_header->cf_last_region);
21281 footprint_region->cfr_vaddr = 0;
21282 footprint_region->cfr_num_pages = 0;
21283 } else {
21284 /* retrieve header and last region */
21285 footprint_header = (struct vm_map_corpse_footprint_header *)
21286 new_map->vmmap_corpse_footprint;
21287 footprint_region = (struct vm_map_corpse_footprint_region *)
21288 ((char *)footprint_header +
21289 footprint_header->cf_last_region);
21290 }
21291 footprint_edge = ((uintptr_t)footprint_header +
21292 footprint_header->cf_size);
21293
21294 if ((footprint_region->cfr_vaddr +
21295 (((vm_map_offset_t)footprint_region->cfr_num_pages) *
21296 effective_page_size))
21297 != old_entry->vme_start) {
21298 uint64_t num_pages_delta, num_pages_delta_size;
21299 uint32_t region_offset_delta_size;
21300
21301 /*
21302 * Not the next contiguous virtual address:
21303 * start a new region or store "zero" dispositions for
21304 * the missing pages?
21305 */
21306 /* size of gap in actual page dispositions */
21307 num_pages_delta = ((old_entry->vme_start -
21308 footprint_region->cfr_vaddr) / effective_page_size)
21309 - footprint_region->cfr_num_pages;
21310 num_pages_delta_size = num_pages_delta * sizeof(cf_disp_t);
21311 /* size of gap as a new footprint region header */
21312 region_offset_delta_size =
21313 (sizeof(*footprint_region) +
21314 roundup(((footprint_region->cfr_num_pages -
21315 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)),
21316 sizeof(int)) -
21317 ((footprint_region->cfr_num_pages -
21318 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)));
21319 // printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
21320 if (region_offset_delta_size < num_pages_delta_size ||
21321 os_add3_overflow(footprint_region->cfr_num_pages,
21322 (uint32_t) num_pages_delta,
21323 1,
21324 &num_pages_tmp)) {
21325 /*
21326 * Storing data for this gap would take more space
21327 * than inserting a new footprint region header:
21328 * let's start a new region and save space. If it's a
21329 * tie, let's avoid using a new region, since that
21330 * would require more region hops to find the right
21331 * range during lookups.
21332 *
21333 * If the current region's cfr_num_pages would overflow
21334 * if we added "zero" page dispositions for the gap,
21335 * no choice but to start a new region.
21336 */
21337 // printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
21338 new_footprint_region =
21339 vm_map_corpse_footprint_new_region(footprint_header);
21340 /* check that we're not going over the edge */
21341 if (new_footprint_region == NULL) {
21342 goto over_the_edge;
21343 }
21344 footprint_region = new_footprint_region;
21345 /* initialize new region as empty */
21346 footprint_region->cfr_vaddr = old_entry->vme_start;
21347 footprint_region->cfr_num_pages = 0;
21348 } else {
21349 /*
21350 * Store "zero" page dispositions for the missing
21351 * pages.
21352 */
21353 // printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
21354 for (; num_pages_delta > 0; num_pages_delta--) {
21355 next_disp_p = (cf_disp_t *)
21356 ((uintptr_t) footprint_region +
21357 sizeof(*footprint_region));
21358 next_disp_p += footprint_region->cfr_num_pages;
21359 /* check that we're not going over the edge */
21360 if ((uintptr_t)next_disp_p >= footprint_edge) {
21361 goto over_the_edge;
21362 }
21363 /* store "zero" disposition for this gap page */
21364 footprint_region->cfr_num_pages++;
21365 *next_disp_p = (cf_disp_t) 0;
21366 footprint_header->cf_last_zeroes++;
21367 }
21368 }
21369 }
21370
21371 for (va = old_entry->vme_start;
21372 va < old_entry->vme_end;
21373 va += effective_page_size) {
21374 int disposition;
21375 cf_disp_t cf_disp;
21376
21377 vm_map_footprint_query_page_info(old_map,
21378 old_entry,
21379 va,
21380 &disposition);
21381 cf_disp = vm_page_disposition_to_cf_disp(disposition);
21382
21383 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
21384
21385 if (cf_disp == 0 && footprint_region->cfr_num_pages == 0) {
21386 /*
21387 * Ignore "zero" dispositions at start of
21388 * region: just move start of region.
21389 */
21390 footprint_region->cfr_vaddr += effective_page_size;
21391 continue;
21392 }
21393
21394 /* would region's cfr_num_pages overflow? */
21395 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
21396 &num_pages_tmp)) {
21397 /* overflow: create a new region */
21398 new_footprint_region =
21399 vm_map_corpse_footprint_new_region(
21400 footprint_header);
21401 if (new_footprint_region == NULL) {
21402 goto over_the_edge;
21403 }
21404 footprint_region = new_footprint_region;
21405 footprint_region->cfr_vaddr = va;
21406 footprint_region->cfr_num_pages = 0;
21407 }
21408
21409 next_disp_p = (cf_disp_t *) ((uintptr_t) footprint_region +
21410 sizeof(*footprint_region));
21411 next_disp_p += footprint_region->cfr_num_pages;
21412 /* check that we're not going over the edge */
21413 if ((uintptr_t)next_disp_p >= footprint_edge) {
21414 goto over_the_edge;
21415 }
21416 /* store this dispostion */
21417 *next_disp_p = cf_disp;
21418 footprint_region->cfr_num_pages++;
21419
21420 if (cf_disp != 0) {
21421 /* non-zero disp: break the current zero streak */
21422 footprint_header->cf_last_zeroes = 0;
21423 /* done */
21424 continue;
21425 }
21426
21427 /* zero disp: add to the current streak of zeroes */
21428 footprint_header->cf_last_zeroes++;
21429 if ((footprint_header->cf_last_zeroes +
21430 roundup(((footprint_region->cfr_num_pages -
21431 footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)) &
21432 (sizeof(int) - 1),
21433 sizeof(int))) <
21434 (sizeof(*footprint_header))) {
21435 /*
21436 * There are not enough trailing "zero" dispositions
21437 * (+ the extra padding we would need for the previous
21438 * region); creating a new region would not save space
21439 * at this point, so let's keep this "zero" disposition
21440 * in this region and reconsider later.
21441 */
21442 continue;
21443 }
21444 /*
21445 * Create a new region to avoid having too many consecutive
21446 * "zero" dispositions.
21447 */
21448 new_footprint_region =
21449 vm_map_corpse_footprint_new_region(footprint_header);
21450 if (new_footprint_region == NULL) {
21451 goto over_the_edge;
21452 }
21453 footprint_region = new_footprint_region;
21454 /* initialize the new region as empty ... */
21455 footprint_region->cfr_num_pages = 0;
21456 /* ... and skip this "zero" disp */
21457 footprint_region->cfr_vaddr = va + effective_page_size;
21458 }
21459
21460 return KERN_SUCCESS;
21461
21462 over_the_edge:
21463 // printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
21464 vm_map_corpse_footprint_full++;
21465 return KERN_RESOURCE_SHORTAGE;
21466 }
21467
21468 /*
21469 * vm_map_corpse_footprint_collect_done:
21470 * completes the footprint collection by getting rid of any remaining
21471 * trailing "zero" dispositions and trimming the unused part of the
21472 * kernel buffer
21473 */
21474 void
21475 vm_map_corpse_footprint_collect_done(
21476 vm_map_t new_map)
21477 {
21478 struct vm_map_corpse_footprint_header *footprint_header;
21479 struct vm_map_corpse_footprint_region *footprint_region;
21480 vm_size_t buf_size, actual_size;
21481 kern_return_t kr;
21482
21483 assert(new_map->has_corpse_footprint);
21484 if (!new_map->has_corpse_footprint ||
21485 new_map->vmmap_corpse_footprint == NULL) {
21486 return;
21487 }
21488
21489 footprint_header = (struct vm_map_corpse_footprint_header *)
21490 new_map->vmmap_corpse_footprint;
21491 buf_size = footprint_header->cf_size;
21492
21493 footprint_region = (struct vm_map_corpse_footprint_region *)
21494 ((char *)footprint_header +
21495 footprint_header->cf_last_region);
21496
21497 /* get rid of trailing zeroes in last region */
21498 assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
21499 footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
21500 footprint_header->cf_last_zeroes = 0;
21501
21502 actual_size = (vm_size_t)(footprint_header->cf_last_region +
21503 sizeof(*footprint_region) +
21504 (footprint_region->cfr_num_pages * sizeof(cf_disp_t)));
21505
21506 // printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
21507 vm_map_corpse_footprint_size_avg =
21508 (((vm_map_corpse_footprint_size_avg *
21509 vm_map_corpse_footprint_count) +
21510 actual_size) /
21511 (vm_map_corpse_footprint_count + 1));
21512 vm_map_corpse_footprint_count++;
21513 if (actual_size > vm_map_corpse_footprint_size_max) {
21514 vm_map_corpse_footprint_size_max = actual_size;
21515 }
21516
21517 actual_size = round_page(actual_size);
21518 if (buf_size > actual_size) {
21519 kr = vm_deallocate(kernel_map,
21520 ((vm_address_t)footprint_header +
21521 actual_size +
21522 PAGE_SIZE), /* trailing guard page */
21523 (buf_size - actual_size));
21524 assertf(kr == KERN_SUCCESS,
21525 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21526 footprint_header,
21527 (uint64_t) buf_size,
21528 (uint64_t) actual_size,
21529 kr);
21530 kr = vm_protect(kernel_map,
21531 ((vm_address_t)footprint_header +
21532 actual_size),
21533 PAGE_SIZE,
21534 FALSE, /* set_maximum */
21535 VM_PROT_NONE);
21536 assertf(kr == KERN_SUCCESS,
21537 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21538 footprint_header,
21539 (uint64_t) buf_size,
21540 (uint64_t) actual_size,
21541 kr);
21542 }
21543
21544 footprint_header->cf_size = actual_size;
21545 }
21546
21547 /*
21548 * vm_map_corpse_footprint_query_page_info:
21549 * retrieves the disposition of the page at virtual address "vaddr"
21550 * in the forked corpse's VM map
21551 *
21552 * This is the equivalent of vm_map_footprint_query_page_info() for a forked corpse.
21553 */
21554 kern_return_t
21555 vm_map_corpse_footprint_query_page_info(
21556 vm_map_t map,
21557 vm_map_offset_t va,
21558 int *disposition_p)
21559 {
21560 struct vm_map_corpse_footprint_header *footprint_header;
21561 struct vm_map_corpse_footprint_region *footprint_region;
21562 uint32_t footprint_region_offset;
21563 vm_map_offset_t region_start, region_end;
21564 int disp_idx;
21565 kern_return_t kr;
21566 int effective_page_size;
21567 cf_disp_t cf_disp;
21568
21569 if (!map->has_corpse_footprint) {
21570 *disposition_p = 0;
21571 kr = KERN_INVALID_ARGUMENT;
21572 goto done;
21573 }
21574
21575 footprint_header = map->vmmap_corpse_footprint;
21576 if (footprint_header == NULL) {
21577 *disposition_p = 0;
21578 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21579 kr = KERN_INVALID_ARGUMENT;
21580 goto done;
21581 }
21582
21583 /* start looking at the hint ("cf_hint_region") */
21584 footprint_region_offset = footprint_header->cf_hint_region;
21585
21586 effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
21587
21588 lookup_again:
21589 if (footprint_region_offset < sizeof(*footprint_header)) {
21590 /* hint too low: start from 1st region */
21591 footprint_region_offset = sizeof(*footprint_header);
21592 }
21593 if (footprint_region_offset >= footprint_header->cf_last_region) {
21594 /* hint too high: re-start from 1st region */
21595 footprint_region_offset = sizeof(*footprint_header);
21596 }
21597 footprint_region = (struct vm_map_corpse_footprint_region *)
21598 ((char *)footprint_header + footprint_region_offset);
21599 region_start = footprint_region->cfr_vaddr;
21600 region_end = (region_start +
21601 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
21602 effective_page_size));
21603 if (va < region_start &&
21604 footprint_region_offset != sizeof(*footprint_header)) {
21605 /* our range starts before the hint region */
21606
21607 /* reset the hint (in a racy way...) */
21608 footprint_header->cf_hint_region = sizeof(*footprint_header);
21609 /* lookup "va" again from 1st region */
21610 footprint_region_offset = sizeof(*footprint_header);
21611 goto lookup_again;
21612 }
21613
21614 while (va >= region_end) {
21615 if (footprint_region_offset >= footprint_header->cf_last_region) {
21616 break;
21617 }
21618 /* skip the region's header */
21619 footprint_region_offset += sizeof(*footprint_region);
21620 /* skip the region's page dispositions */
21621 footprint_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
21622 /* align to next word boundary */
21623 footprint_region_offset =
21624 roundup(footprint_region_offset,
21625 sizeof(int));
21626 footprint_region = (struct vm_map_corpse_footprint_region *)
21627 ((char *)footprint_header + footprint_region_offset);
21628 region_start = footprint_region->cfr_vaddr;
21629 region_end = (region_start +
21630 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
21631 effective_page_size));
21632 }
21633 if (va < region_start || va >= region_end) {
21634 /* page not found */
21635 *disposition_p = 0;
21636 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21637 kr = KERN_SUCCESS;
21638 goto done;
21639 }
21640
21641 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
21642 footprint_header->cf_hint_region = footprint_region_offset;
21643
21644 /* get page disposition for "va" in this region */
21645 disp_idx = (int) ((va - footprint_region->cfr_vaddr) / effective_page_size);
21646 cf_disp = footprint_region->cfr_disposition[disp_idx];
21647 *disposition_p = vm_page_cf_disp_to_disposition(cf_disp);
21648 kr = KERN_SUCCESS;
21649 done:
21650 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21651 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
21652 DTRACE_VM4(footprint_query_page_info,
21653 vm_map_t, map,
21654 vm_map_offset_t, va,
21655 int, *disposition_p,
21656 kern_return_t, kr);
21657
21658 return kr;
21659 }
21660
21661 void
21662 vm_map_corpse_footprint_destroy(
21663 vm_map_t map)
21664 {
21665 if (map->has_corpse_footprint &&
21666 map->vmmap_corpse_footprint != 0) {
21667 struct vm_map_corpse_footprint_header *footprint_header;
21668 vm_size_t buf_size;
21669 kern_return_t kr;
21670
21671 footprint_header = map->vmmap_corpse_footprint;
21672 buf_size = footprint_header->cf_size;
21673 kr = vm_deallocate(kernel_map,
21674 (vm_offset_t) map->vmmap_corpse_footprint,
21675 ((vm_size_t) buf_size
21676 + PAGE_SIZE)); /* trailing guard page */
21677 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
21678 map->vmmap_corpse_footprint = 0;
21679 map->has_corpse_footprint = FALSE;
21680 }
21681 }
21682
21683 /*
21684 * vm_map_copy_footprint_ledgers:
21685 * copies any ledger that's relevant to the memory footprint of "old_task"
21686 * into the forked corpse's task ("new_task")
21687 */
21688 void
21689 vm_map_copy_footprint_ledgers(
21690 task_t old_task,
21691 task_t new_task)
21692 {
21693 vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
21694 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
21695 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
21696 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
21697 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
21698 vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
21699 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
21700 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
21701 vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
21702 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
21703 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
21704 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
21705 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
21706 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
21707 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
21708 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
21709 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
21710 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
21711 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
21712 vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
21713 }
21714
21715 /*
21716 * vm_map_copy_ledger:
21717 * copy a single ledger from "old_task" to "new_task"
21718 */
21719 void
21720 vm_map_copy_ledger(
21721 task_t old_task,
21722 task_t new_task,
21723 int ledger_entry)
21724 {
21725 ledger_amount_t old_balance, new_balance, delta;
21726
21727 assert(new_task->map->has_corpse_footprint);
21728 if (!new_task->map->has_corpse_footprint) {
21729 return;
21730 }
21731
21732 /* turn off sanity checks for the ledger we're about to mess with */
21733 ledger_disable_panic_on_negative(new_task->ledger,
21734 ledger_entry);
21735
21736 /* adjust "new_task" to match "old_task" */
21737 ledger_get_balance(old_task->ledger,
21738 ledger_entry,
21739 &old_balance);
21740 ledger_get_balance(new_task->ledger,
21741 ledger_entry,
21742 &new_balance);
21743 if (new_balance == old_balance) {
21744 /* new == old: done */
21745 } else if (new_balance > old_balance) {
21746 /* new > old ==> new -= new - old */
21747 delta = new_balance - old_balance;
21748 ledger_debit(new_task->ledger,
21749 ledger_entry,
21750 delta);
21751 } else {
21752 /* new < old ==> new += old - new */
21753 delta = old_balance - new_balance;
21754 ledger_credit(new_task->ledger,
21755 ledger_entry,
21756 delta);
21757 }
21758 }
21759
21760 #if MACH_ASSERT
21761
21762 extern int pmap_ledgers_panic;
21763 extern int pmap_ledgers_panic_leeway;
21764
21765 #define LEDGER_DRIFT(__LEDGER) \
21766 int __LEDGER##_over; \
21767 ledger_amount_t __LEDGER##_over_total; \
21768 ledger_amount_t __LEDGER##_over_max; \
21769 int __LEDGER##_under; \
21770 ledger_amount_t __LEDGER##_under_total; \
21771 ledger_amount_t __LEDGER##_under_max
21772
21773 struct {
21774 uint64_t num_pmaps_checked;
21775
21776 LEDGER_DRIFT(phys_footprint);
21777 LEDGER_DRIFT(internal);
21778 LEDGER_DRIFT(internal_compressed);
21779 LEDGER_DRIFT(iokit_mapped);
21780 LEDGER_DRIFT(alternate_accounting);
21781 LEDGER_DRIFT(alternate_accounting_compressed);
21782 LEDGER_DRIFT(page_table);
21783 LEDGER_DRIFT(purgeable_volatile);
21784 LEDGER_DRIFT(purgeable_nonvolatile);
21785 LEDGER_DRIFT(purgeable_volatile_compressed);
21786 LEDGER_DRIFT(purgeable_nonvolatile_compressed);
21787 LEDGER_DRIFT(tagged_nofootprint);
21788 LEDGER_DRIFT(tagged_footprint);
21789 LEDGER_DRIFT(tagged_nofootprint_compressed);
21790 LEDGER_DRIFT(tagged_footprint_compressed);
21791 LEDGER_DRIFT(network_volatile);
21792 LEDGER_DRIFT(network_nonvolatile);
21793 LEDGER_DRIFT(network_volatile_compressed);
21794 LEDGER_DRIFT(network_nonvolatile_compressed);
21795 LEDGER_DRIFT(media_nofootprint);
21796 LEDGER_DRIFT(media_footprint);
21797 LEDGER_DRIFT(media_nofootprint_compressed);
21798 LEDGER_DRIFT(media_footprint_compressed);
21799 LEDGER_DRIFT(graphics_nofootprint);
21800 LEDGER_DRIFT(graphics_footprint);
21801 LEDGER_DRIFT(graphics_nofootprint_compressed);
21802 LEDGER_DRIFT(graphics_footprint_compressed);
21803 LEDGER_DRIFT(neural_nofootprint);
21804 LEDGER_DRIFT(neural_footprint);
21805 LEDGER_DRIFT(neural_nofootprint_compressed);
21806 LEDGER_DRIFT(neural_footprint_compressed);
21807 } pmap_ledgers_drift;
21808
21809 void
21810 vm_map_pmap_check_ledgers(
21811 pmap_t pmap,
21812 ledger_t ledger,
21813 int pid,
21814 char *procname)
21815 {
21816 ledger_amount_t bal;
21817 boolean_t do_panic;
21818
21819 do_panic = FALSE;
21820
21821 pmap_ledgers_drift.num_pmaps_checked++;
21822
21823 #define LEDGER_CHECK_BALANCE(__LEDGER) \
21824 MACRO_BEGIN \
21825 int panic_on_negative = TRUE; \
21826 ledger_get_balance(ledger, \
21827 task_ledgers.__LEDGER, \
21828 &bal); \
21829 ledger_get_panic_on_negative(ledger, \
21830 task_ledgers.__LEDGER, \
21831 &panic_on_negative); \
21832 if (bal != 0) { \
21833 if (panic_on_negative || \
21834 (pmap_ledgers_panic && \
21835 pmap_ledgers_panic_leeway > 0 && \
21836 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
21837 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
21838 do_panic = TRUE; \
21839 } \
21840 printf("LEDGER BALANCE proc %d (%s) " \
21841 "\"%s\" = %lld\n", \
21842 pid, procname, #__LEDGER, bal); \
21843 if (bal > 0) { \
21844 pmap_ledgers_drift.__LEDGER##_over++; \
21845 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
21846 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
21847 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
21848 } \
21849 } else if (bal < 0) { \
21850 pmap_ledgers_drift.__LEDGER##_under++; \
21851 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
21852 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
21853 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
21854 } \
21855 } \
21856 } \
21857 MACRO_END
21858
21859 LEDGER_CHECK_BALANCE(phys_footprint);
21860 LEDGER_CHECK_BALANCE(internal);
21861 LEDGER_CHECK_BALANCE(internal_compressed);
21862 LEDGER_CHECK_BALANCE(iokit_mapped);
21863 LEDGER_CHECK_BALANCE(alternate_accounting);
21864 LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
21865 LEDGER_CHECK_BALANCE(page_table);
21866 LEDGER_CHECK_BALANCE(purgeable_volatile);
21867 LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
21868 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
21869 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
21870 LEDGER_CHECK_BALANCE(tagged_nofootprint);
21871 LEDGER_CHECK_BALANCE(tagged_footprint);
21872 LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
21873 LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
21874 LEDGER_CHECK_BALANCE(network_volatile);
21875 LEDGER_CHECK_BALANCE(network_nonvolatile);
21876 LEDGER_CHECK_BALANCE(network_volatile_compressed);
21877 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
21878 LEDGER_CHECK_BALANCE(media_nofootprint);
21879 LEDGER_CHECK_BALANCE(media_footprint);
21880 LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
21881 LEDGER_CHECK_BALANCE(media_footprint_compressed);
21882 LEDGER_CHECK_BALANCE(graphics_nofootprint);
21883 LEDGER_CHECK_BALANCE(graphics_footprint);
21884 LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
21885 LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
21886 LEDGER_CHECK_BALANCE(neural_nofootprint);
21887 LEDGER_CHECK_BALANCE(neural_footprint);
21888 LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
21889 LEDGER_CHECK_BALANCE(neural_footprint_compressed);
21890
21891 if (do_panic) {
21892 if (pmap_ledgers_panic) {
21893 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
21894 pmap, pid, procname);
21895 } else {
21896 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
21897 pmap, pid, procname);
21898 }
21899 }
21900 }
21901 #endif /* MACH_ASSERT */