2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
63 * Virtual memory mapping module.
66 #include <mach_assert.h>
68 #include <vm/vm_options.h>
70 #include <libkern/OSAtomic.h>
72 #include <mach/kern_return.h>
73 #include <mach/port.h>
74 #include <mach/vm_attributes.h>
75 #include <mach/vm_param.h>
76 #include <mach/vm_behavior.h>
77 #include <mach/vm_statistics.h>
78 #include <mach/memory_object.h>
79 #include <mach/mach_vm.h>
80 #include <machine/cpu_capabilities.h>
83 #include <kern/assert.h>
84 #include <kern/backtrace.h>
85 #include <kern/counter.h>
86 #include <kern/exc_guard.h>
87 #include <kern/kalloc.h>
88 #include <kern/zalloc_internal.h>
91 #include <vm/vm_compressor.h>
92 #include <vm/vm_compressor_pager.h>
93 #include <vm/vm_init.h>
94 #include <vm/vm_fault.h>
95 #include <vm/vm_map.h>
96 #include <vm/vm_object.h>
97 #include <vm/vm_page.h>
98 #include <vm/vm_pageout.h>
100 #include <vm/vm_kern.h>
101 #include <ipc/ipc_port.h>
102 #include <kern/sched_prim.h>
103 #include <kern/misc_protos.h>
105 #include <mach/vm_map_server.h>
106 #include <mach/mach_host_server.h>
107 #include <vm/vm_protos.h>
108 #include <vm/vm_purgeable_internal.h>
110 #include <vm/vm_protos.h>
111 #include <vm/vm_shared_region.h>
112 #include <vm/vm_map_store.h>
114 #include <san/kasan.h>
116 #include <sys/codesign.h>
117 #include <sys/mman.h>
119 #include <libkern/section_keywords.h>
120 #if DEVELOPMENT || DEBUG
121 extern int proc_selfcsflags(void);
122 int panic_on_unsigned_execute
= 0;
123 #endif /* DEVELOPMENT || DEBUG */
126 int debug4k_filter
= 0;
127 char debug4k_proc_name
[1024] = "";
128 int debug4k_proc_filter
= (int)-1 & ~(1 << __DEBUG4K_FAULT
);
129 int debug4k_panic_on_misaligned_sharing
= 0;
130 const char *debug4k_category_name
[] = {
145 #endif /* MACH_ASSERT */
146 int debug4k_no_cow_copyin
= 0;
150 extern const int fourk_binary_compatibility_unsafe
;
151 extern const int fourk_binary_compatibility_allow_wx
;
152 #endif /* __arm64__ */
153 extern int proc_selfpid(void);
154 extern char *proc_name_address(void *p
);
156 #if VM_MAP_DEBUG_APPLE_PROTECT
157 int vm_map_debug_apple_protect
= 0;
158 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
159 #if VM_MAP_DEBUG_FOURK
160 int vm_map_debug_fourk
= 0;
161 #endif /* VM_MAP_DEBUG_FOURK */
163 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable
= 1;
164 int vm_map_executable_immutable_verbose
= 0;
166 os_refgrp_decl(static, map_refgrp
, "vm_map", NULL
);
168 extern u_int32_t
random(void); /* from <libkern/libkern.h> */
169 /* Internal prototypes
172 static void vm_map_simplify_range(
174 vm_map_offset_t start
,
175 vm_map_offset_t end
); /* forward */
177 static boolean_t
vm_map_range_check(
179 vm_map_offset_t start
,
181 vm_map_entry_t
*entry
);
183 static vm_map_entry_t
_vm_map_entry_create(
184 struct vm_map_header
*map_header
, boolean_t map_locked
);
186 static void _vm_map_entry_dispose(
187 struct vm_map_header
*map_header
,
188 vm_map_entry_t entry
);
190 static void vm_map_pmap_enter(
192 vm_map_offset_t addr
,
193 vm_map_offset_t end_addr
,
195 vm_object_offset_t offset
,
196 vm_prot_t protection
);
198 static void _vm_map_clip_end(
199 struct vm_map_header
*map_header
,
200 vm_map_entry_t entry
,
201 vm_map_offset_t end
);
203 static void _vm_map_clip_start(
204 struct vm_map_header
*map_header
,
205 vm_map_entry_t entry
,
206 vm_map_offset_t start
);
208 static void vm_map_entry_delete(
210 vm_map_entry_t entry
);
212 static kern_return_t
vm_map_delete(
214 vm_map_offset_t start
,
219 static void vm_map_copy_insert(
221 vm_map_entry_t after_where
,
224 static kern_return_t
vm_map_copy_overwrite_unaligned(
226 vm_map_entry_t entry
,
228 vm_map_address_t start
,
229 boolean_t discard_on_success
);
231 static kern_return_t
vm_map_copy_overwrite_aligned(
233 vm_map_entry_t tmp_entry
,
235 vm_map_offset_t start
,
238 static kern_return_t
vm_map_copyin_kernel_buffer(
240 vm_map_address_t src_addr
,
242 boolean_t src_destroy
,
243 vm_map_copy_t
*copy_result
); /* OUT */
245 static kern_return_t
vm_map_copyout_kernel_buffer(
247 vm_map_address_t
*addr
, /* IN/OUT */
249 vm_map_size_t copy_size
,
251 boolean_t consume_on_success
);
253 static void vm_map_fork_share(
255 vm_map_entry_t old_entry
,
258 static boolean_t
vm_map_fork_copy(
260 vm_map_entry_t
*old_entry_p
,
262 int vm_map_copyin_flags
);
264 static kern_return_t
vm_map_wire_nested(
266 vm_map_offset_t start
,
268 vm_prot_t caller_prot
,
272 vm_map_offset_t pmap_addr
,
273 ppnum_t
*physpage_p
);
275 static kern_return_t
vm_map_unwire_nested(
277 vm_map_offset_t start
,
281 vm_map_offset_t pmap_addr
);
283 static kern_return_t
vm_map_overwrite_submap_recurse(
285 vm_map_offset_t dst_addr
,
286 vm_map_size_t dst_size
);
288 static kern_return_t
vm_map_copy_overwrite_nested(
290 vm_map_offset_t dst_addr
,
292 boolean_t interruptible
,
294 boolean_t discard_on_success
);
296 static kern_return_t
vm_map_remap_extract(
298 vm_map_offset_t addr
,
301 struct vm_map_header
*map_header
,
302 vm_prot_t
*cur_protection
,
303 vm_prot_t
*max_protection
,
304 vm_inherit_t inheritance
,
305 vm_map_kernel_flags_t vmk_flags
);
307 static kern_return_t
vm_map_remap_range_allocate(
309 vm_map_address_t
*address
,
311 vm_map_offset_t mask
,
313 vm_map_kernel_flags_t vmk_flags
,
315 vm_map_entry_t
*map_entry
);
317 static void vm_map_region_look_for_page(
321 vm_object_offset_t offset
,
323 unsigned short depth
,
324 vm_region_extended_info_t extended
,
325 mach_msg_type_number_t count
);
327 static int vm_map_region_count_obj_refs(
328 vm_map_entry_t entry
,
332 static kern_return_t
vm_map_willneed(
334 vm_map_offset_t start
,
335 vm_map_offset_t end
);
337 static kern_return_t
vm_map_reuse_pages(
339 vm_map_offset_t start
,
340 vm_map_offset_t end
);
342 static kern_return_t
vm_map_reusable_pages(
344 vm_map_offset_t start
,
345 vm_map_offset_t end
);
347 static kern_return_t
vm_map_can_reuse(
349 vm_map_offset_t start
,
350 vm_map_offset_t end
);
353 static kern_return_t
vm_map_pageout(
355 vm_map_offset_t start
,
356 vm_map_offset_t end
);
357 #endif /* MACH_ASSERT */
359 kern_return_t
vm_map_corpse_footprint_collect(
361 vm_map_entry_t old_entry
,
363 void vm_map_corpse_footprint_collect_done(
365 void vm_map_corpse_footprint_destroy(
367 kern_return_t
vm_map_corpse_footprint_query_page_info(
371 void vm_map_footprint_query_page_info(
373 vm_map_entry_t map_entry
,
374 vm_map_offset_t curr_s_offset
,
377 static const struct vm_map_entry vm_map_entry_template
= {
378 .behavior
= VM_BEHAVIOR_DEFAULT
,
379 .inheritance
= VM_INHERIT_DEFAULT
,
382 pid_t
find_largest_process_vm_map_entries(void);
385 * Macros to copy a vm_map_entry. We must be careful to correctly
386 * manage the wired page count. vm_map_entry_copy() creates a new
387 * map entry to the same memory - the wired count in the new entry
388 * must be set to zero. vm_map_entry_copy_full() creates a new
389 * entry that is identical to the old entry. This preserves the
390 * wire count; it's used for map splitting and zone changing in
395 vm_map_entry_copy_pmap_cs_assoc(
396 vm_map_t map __unused
,
397 vm_map_entry_t
new __unused
,
398 vm_map_entry_t old __unused
)
400 /* when pmap_cs is not enabled, assert as a sanity check */
401 assert(new->pmap_cs_associated
== FALSE
);
405 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
406 * But for security reasons on some platforms, we don't want the
407 * new mapping to be "used for jit", so we reset the flag here.
410 vm_map_entry_copy_code_signing(
413 vm_map_entry_t old __unused
)
415 if (VM_MAP_POLICY_ALLOW_JIT_COPY(map
)) {
416 assert(new->used_for_jit
== old
->used_for_jit
);
418 new->used_for_jit
= FALSE
;
428 boolean_t _vmec_reserved
= new->from_reserved_zone
;
430 new->is_shared
= FALSE
;
431 new->needs_wakeup
= FALSE
;
432 new->in_transition
= FALSE
;
433 new->wired_count
= 0;
434 new->user_wired_count
= 0;
435 new->permanent
= FALSE
;
436 vm_map_entry_copy_code_signing(map
, new, old
);
437 vm_map_entry_copy_pmap_cs_assoc(map
, new, old
);
438 new->from_reserved_zone
= _vmec_reserved
;
439 if (new->iokit_acct
) {
440 assertf(!new->use_pmap
, "old %p new %p\n", old
, new);
441 new->iokit_acct
= FALSE
;
442 new->use_pmap
= TRUE
;
444 new->vme_resilient_codesign
= FALSE
;
445 new->vme_resilient_media
= FALSE
;
446 new->vme_atomic
= FALSE
;
447 new->vme_no_copy_on_read
= FALSE
;
451 vm_map_entry_copy_full(
455 boolean_t _vmecf_reserved
= new->from_reserved_zone
;
457 new->from_reserved_zone
= _vmecf_reserved
;
461 * Normal lock_read_to_write() returns FALSE/0 on failure.
462 * These functions evaluate to zero on success and non-zero value on failure.
464 __attribute__((always_inline
))
466 vm_map_lock_read_to_write(vm_map_t map
)
468 if (lck_rw_lock_shared_to_exclusive(&(map
)->lock
)) {
469 DTRACE_VM(vm_map_lock_upgrade
);
475 __attribute__((always_inline
))
477 vm_map_try_lock(vm_map_t map
)
479 if (lck_rw_try_lock_exclusive(&(map
)->lock
)) {
480 DTRACE_VM(vm_map_lock_w
);
486 __attribute__((always_inline
))
488 vm_map_try_lock_read(vm_map_t map
)
490 if (lck_rw_try_lock_shared(&(map
)->lock
)) {
491 DTRACE_VM(vm_map_lock_r
);
498 * Routines to get the page size the caller should
499 * use while inspecting the target address space.
500 * Use the "_safely" variant if the caller is dealing with a user-provided
501 * array whose size depends on the page size, to avoid any overflow or
502 * underflow of a user-allocated buffer.
505 vm_self_region_page_shift_safely(
508 int effective_page_shift
= 0;
510 if (PAGE_SIZE
== (4096)) {
511 /* x86_64 and 4k watches: always use 4k */
514 /* did caller provide an explicit page size for this thread to use? */
515 effective_page_shift
= thread_self_region_page_shift();
516 if (effective_page_shift
) {
517 /* use the explicitly-provided page size */
518 return effective_page_shift
;
520 /* no explicit page size: use the caller's page size... */
521 effective_page_shift
= VM_MAP_PAGE_SHIFT(current_map());
522 if (effective_page_shift
== VM_MAP_PAGE_SHIFT(target_map
)) {
523 /* page size match: safe to use */
524 return effective_page_shift
;
526 /* page size mismatch */
530 vm_self_region_page_shift(
533 int effective_page_shift
;
535 effective_page_shift
= vm_self_region_page_shift_safely(target_map
);
536 if (effective_page_shift
== -1) {
537 /* no safe value but OK to guess for caller */
538 effective_page_shift
= MIN(VM_MAP_PAGE_SHIFT(current_map()),
539 VM_MAP_PAGE_SHIFT(target_map
));
541 return effective_page_shift
;
546 * Decide if we want to allow processes to execute from their data or stack areas.
547 * override_nx() returns true if we do. Data/stack execution can be enabled independently
548 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
549 * or allow_stack_exec to enable data execution for that type of data area for that particular
550 * ABI (or both by or'ing the flags together). These are initialized in the architecture
551 * specific pmap files since the default behavior varies according to architecture. The
552 * main reason it varies is because of the need to provide binary compatibility with old
553 * applications that were written before these restrictions came into being. In the old
554 * days, an app could execute anything it could read, but this has slowly been tightened
555 * up over time. The default behavior is:
557 * 32-bit PPC apps may execute from both stack and data areas
558 * 32-bit Intel apps may exeucte from data areas but not stack
559 * 64-bit PPC/Intel apps may not execute from either data or stack
561 * An application on any architecture may override these defaults by explicitly
562 * adding PROT_EXEC permission to the page in question with the mprotect(2)
563 * system call. This code here just determines what happens when an app tries to
564 * execute from a page that lacks execute permission.
566 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
567 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
568 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
569 * execution from data areas for a particular binary even if the arch normally permits it. As
570 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
571 * to support some complicated use cases, notably browsers with out-of-process plugins that
572 * are not all NX-safe.
575 extern int allow_data_exec
, allow_stack_exec
;
578 override_nx(vm_map_t map
, uint32_t user_tag
) /* map unused on arm */
582 if (map
->pmap
== kernel_pmap
) {
587 * Determine if the app is running in 32 or 64 bit mode.
590 if (vm_map_is_64bit(map
)) {
591 current_abi
= VM_ABI_64
;
593 current_abi
= VM_ABI_32
;
597 * Determine if we should allow the execution based on whether it's a
598 * stack or data area and the current architecture.
601 if (user_tag
== VM_MEMORY_STACK
) {
602 return allow_stack_exec
& current_abi
;
605 return (allow_data_exec
& current_abi
) && (map
->map_disallow_data_exec
== FALSE
);
610 * Virtual memory maps provide for the mapping, protection,
611 * and sharing of virtual memory objects. In addition,
612 * this module provides for an efficient virtual copy of
613 * memory from one map to another.
615 * Synchronization is required prior to most operations.
617 * Maps consist of an ordered doubly-linked list of simple
618 * entries; a single hint is used to speed up lookups.
620 * Sharing maps have been deleted from this version of Mach.
621 * All shared objects are now mapped directly into the respective
622 * maps. This requires a change in the copy on write strategy;
623 * the asymmetric (delayed) strategy is used for shared temporary
624 * objects instead of the symmetric (shadow) strategy. All maps
625 * are now "top level" maps (either task map, kernel map or submap
626 * of the kernel map).
628 * Since portions of maps are specified by start/end addreses,
629 * which may not align with existing map entries, all
630 * routines merely "clip" entries to these start/end values.
631 * [That is, an entry is split into two, bordering at a
632 * start or end value.] Note that these clippings may not
633 * always be necessary (as the two resulting entries are then
634 * not changed); however, the clipping is done for convenience.
635 * No attempt is currently made to "glue back together" two
638 * The symmetric (shadow) copy strategy implements virtual copy
639 * by copying VM object references from one map to
640 * another, and then marking both regions as copy-on-write.
641 * It is important to note that only one writeable reference
642 * to a VM object region exists in any map when this strategy
643 * is used -- this means that shadow object creation can be
644 * delayed until a write operation occurs. The symmetric (delayed)
645 * strategy allows multiple maps to have writeable references to
646 * the same region of a vm object, and hence cannot delay creating
647 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
648 * Copying of permanent objects is completely different; see
649 * vm_object_copy_strategically() in vm_object.c.
652 static SECURITY_READ_ONLY_LATE(zone_t
) vm_map_zone
; /* zone for vm_map structures */
653 static SECURITY_READ_ONLY_LATE(zone_t
) vm_map_entry_reserved_zone
; /* zone with reserve for non-blocking allocations */
654 static SECURITY_READ_ONLY_LATE(zone_t
) vm_map_copy_zone
; /* zone for vm_map_copy structures */
656 SECURITY_READ_ONLY_LATE(zone_t
) vm_map_entry_zone
; /* zone for vm_map_entry structures */
657 SECURITY_READ_ONLY_LATE(zone_t
) vm_map_holes_zone
; /* zone for vm map holes (vm_map_links) structures */
659 #define VM_MAP_ZONE_NAME "maps"
660 #define VM_MAP_ZFLAGS ( \
666 #define VME_RESERVED_ZONE_NAME "Reserved VM map entries"
667 #define VM_MAP_RESERVED_ZFLAGS ( \
672 ZC_KASAN_NOQUARANTINE | \
675 #define VM_MAP_HOLES_ZONE_NAME "VM map holes"
676 #define VM_MAP_HOLES_ZFLAGS ( \
683 * Asserts that a vm_map_copy object is coming from the
684 * vm_map_copy_zone to ensure that it isn't a fake constructed
688 vm_map_copy_require(struct vm_map_copy
*copy
)
690 zone_id_require(ZONE_ID_VM_MAP_COPY
, sizeof(struct vm_map_copy
), copy
);
696 * Ensures that the argument is memory allocated from the genuine
697 * vm map zone. (See zone_id_require_allow_foreign).
700 vm_map_require(vm_map_t map
)
702 zone_id_require_allow_foreign(ZONE_ID_VM_MAP
, sizeof(struct _vm_map
), map
);
705 static __startup_data vm_offset_t map_data
;
706 static __startup_data vm_size_t map_data_size
;
707 static __startup_data vm_offset_t kentry_data
;
708 static __startup_data vm_size_t kentry_data_size
;
709 static __startup_data vm_offset_t map_holes_data
;
710 static __startup_data vm_size_t map_holes_data_size
;
712 #if XNU_TARGET_OS_OSX
713 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
714 #else /* XNU_TARGET_OS_OSX */
715 #define NO_COALESCE_LIMIT 0
716 #endif /* XNU_TARGET_OS_OSX */
718 /* Skip acquiring locks if we're in the midst of a kernel core dump */
719 unsigned int not_in_kdp
= 1;
721 unsigned int vm_map_set_cache_attr_count
= 0;
724 vm_map_set_cache_attr(
728 vm_map_entry_t map_entry
;
730 kern_return_t kr
= KERN_SUCCESS
;
732 vm_map_lock_read(map
);
734 if (!vm_map_lookup_entry(map
, va
, &map_entry
) ||
735 map_entry
->is_sub_map
) {
737 * that memory is not properly mapped
739 kr
= KERN_INVALID_ARGUMENT
;
742 object
= VME_OBJECT(map_entry
);
744 if (object
== VM_OBJECT_NULL
) {
746 * there should be a VM object here at this point
748 kr
= KERN_INVALID_ARGUMENT
;
751 vm_object_lock(object
);
752 object
->set_cache_attr
= TRUE
;
753 vm_object_unlock(object
);
755 vm_map_set_cache_attr_count
++;
757 vm_map_unlock_read(map
);
763 #if CONFIG_CODE_DECRYPTION
765 * vm_map_apple_protected:
766 * This remaps the requested part of the object with an object backed by
767 * the decrypting pager.
768 * crypt_info contains entry points and session data for the crypt module.
769 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
770 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
773 vm_map_apple_protected(
775 vm_map_offset_t start
,
777 vm_object_offset_t crypto_backing_offset
,
778 struct pager_crypt_info
*crypt_info
,
781 boolean_t map_locked
;
783 vm_map_entry_t map_entry
;
784 struct vm_map_entry tmp_entry
;
785 memory_object_t unprotected_mem_obj
;
786 vm_object_t protected_object
;
787 vm_map_offset_t map_addr
;
788 vm_map_offset_t start_aligned
, end_aligned
;
789 vm_object_offset_t crypto_start
, crypto_end
;
791 vm_map_kernel_flags_t vmk_flags
;
792 boolean_t cache_pager
;
795 vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
798 unprotected_mem_obj
= MEMORY_OBJECT_NULL
;
800 start_aligned
= vm_map_trunc_page(start
, PAGE_MASK_64
);
801 end_aligned
= vm_map_round_page(end
, PAGE_MASK_64
);
802 start_aligned
= vm_map_trunc_page(start_aligned
, VM_MAP_PAGE_MASK(map
));
803 end_aligned
= vm_map_round_page(end_aligned
, VM_MAP_PAGE_MASK(map
));
807 * "start" and "end" might be 4K-aligned but not 16K-aligned,
808 * so we might have to loop and establish up to 3 mappings:
810 * + the first 16K-page, which might overlap with the previous
811 * 4K-aligned mapping,
813 * + the last 16K-page, which might overlap with the next
814 * 4K-aligned mapping.
815 * Each of these mapping might be backed by a vnode pager (if
816 * properly page-aligned) or a "fourk_pager", itself backed by a
817 * vnode pager (if 4K-aligned but not page-aligned).
819 #endif /* __arm64__ */
821 map_addr
= start_aligned
;
822 for (map_addr
= start_aligned
;
824 map_addr
= tmp_entry
.vme_end
) {
828 /* lookup the protected VM object */
829 if (!vm_map_lookup_entry(map
,
832 map_entry
->is_sub_map
||
833 VME_OBJECT(map_entry
) == VM_OBJECT_NULL
) {
834 /* that memory is not properly mapped */
835 kr
= KERN_INVALID_ARGUMENT
;
839 /* ensure mapped memory is mapped as executable except
840 * except for model decryption flow */
841 if ((cryptid
!= CRYPTID_MODEL_ENCRYPTION
) &&
842 !(map_entry
->protection
& VM_PROT_EXECUTE
)) {
843 kr
= KERN_INVALID_ARGUMENT
;
847 /* get the protected object to be decrypted */
848 protected_object
= VME_OBJECT(map_entry
);
849 if (protected_object
== VM_OBJECT_NULL
) {
850 /* there should be a VM object here at this point */
851 kr
= KERN_INVALID_ARGUMENT
;
854 /* ensure protected object stays alive while map is unlocked */
855 vm_object_reference(protected_object
);
857 /* limit the map entry to the area we want to cover */
858 vm_map_clip_start(map
, map_entry
, start_aligned
);
859 vm_map_clip_end(map
, map_entry
, end_aligned
);
861 tmp_entry
= *map_entry
;
862 map_entry
= VM_MAP_ENTRY_NULL
; /* not valid after unlocking map */
867 * This map entry might be only partially encrypted
868 * (if not fully "page-aligned").
871 crypto_end
= tmp_entry
.vme_end
- tmp_entry
.vme_start
;
872 if (tmp_entry
.vme_start
< start
) {
873 if (tmp_entry
.vme_start
!= start_aligned
) {
874 kr
= KERN_INVALID_ADDRESS
;
876 crypto_start
+= (start
- tmp_entry
.vme_start
);
878 if (tmp_entry
.vme_end
> end
) {
879 if (tmp_entry
.vme_end
!= end_aligned
) {
880 kr
= KERN_INVALID_ADDRESS
;
882 crypto_end
-= (tmp_entry
.vme_end
- end
);
886 * This "extra backing offset" is needed to get the decryption
887 * routine to use the right key. It adjusts for the possibly
888 * relative offset of an interposed "4K" pager...
890 if (crypto_backing_offset
== (vm_object_offset_t
) -1) {
891 crypto_backing_offset
= VME_OFFSET(&tmp_entry
);
895 #if XNU_TARGET_OS_OSX
896 if (vm_map_is_alien(map
)) {
899 #endif /* XNU_TARGET_OS_OSX */
902 * Lookup (and create if necessary) the protected memory object
903 * matching that VM object.
904 * If successful, this also grabs a reference on the memory object,
905 * to guarantee that it doesn't go away before we get a chance to map
908 unprotected_mem_obj
= apple_protect_pager_setup(
910 VME_OFFSET(&tmp_entry
),
911 crypto_backing_offset
,
917 /* release extra ref on protected object */
918 vm_object_deallocate(protected_object
);
920 if (unprotected_mem_obj
== NULL
) {
925 vm_flags
= VM_FLAGS_FIXED
| VM_FLAGS_OVERWRITE
;
926 /* can overwrite an immutable mapping */
927 vmk_flags
.vmkf_overwrite_immutable
= TRUE
;
929 if (tmp_entry
.used_for_jit
&&
930 (VM_MAP_PAGE_SHIFT(map
) != FOURK_PAGE_SHIFT
||
931 PAGE_SHIFT
!= FOURK_PAGE_SHIFT
) &&
932 fourk_binary_compatibility_unsafe
&&
933 fourk_binary_compatibility_allow_wx
) {
934 printf("** FOURK_COMPAT [%d]: "
935 "allowing write+execute at 0x%llx\n",
936 proc_selfpid(), tmp_entry
.vme_start
);
937 vmk_flags
.vmkf_map_jit
= TRUE
;
939 #endif /* __arm64__ */
941 /* map this memory object in place of the current one */
942 map_addr
= tmp_entry
.vme_start
;
943 kr
= vm_map_enter_mem_object(map
,
946 tmp_entry
.vme_start
),
947 (mach_vm_offset_t
) 0,
951 (ipc_port_t
)(uintptr_t) unprotected_mem_obj
,
954 tmp_entry
.protection
,
955 tmp_entry
.max_protection
,
956 tmp_entry
.inheritance
);
957 assertf(kr
== KERN_SUCCESS
,
959 assertf(map_addr
== tmp_entry
.vme_start
,
960 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
962 (uint64_t) tmp_entry
.vme_start
,
965 #if VM_MAP_DEBUG_APPLE_PROTECT
966 if (vm_map_debug_apple_protect
) {
967 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
968 " backing:[object:%p,offset:0x%llx,"
969 "crypto_backing_offset:0x%llx,"
970 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
973 (uint64_t) (map_addr
+ (tmp_entry
.vme_end
-
974 tmp_entry
.vme_start
)),
977 VME_OFFSET(&tmp_entry
),
978 crypto_backing_offset
,
982 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
985 * Release the reference obtained by
986 * apple_protect_pager_setup().
987 * The mapping (if it succeeded) is now holding a reference on
990 memory_object_deallocate(unprotected_mem_obj
);
991 unprotected_mem_obj
= MEMORY_OBJECT_NULL
;
993 /* continue with next map entry */
994 crypto_backing_offset
+= (tmp_entry
.vme_end
-
995 tmp_entry
.vme_start
);
996 crypto_backing_offset
-= crypto_start
;
1006 #endif /* CONFIG_CODE_DECRYPTION */
1009 LCK_GRP_DECLARE(vm_map_lck_grp
, "vm_map");
1010 LCK_ATTR_DECLARE(vm_map_lck_attr
, 0, 0);
1011 LCK_ATTR_DECLARE(vm_map_lck_rw_attr
, 0, LCK_ATTR_DEBUG
);
1013 #if XNU_TARGET_OS_OSX
1014 int malloc_no_cow
= 0;
1015 #else /* XNU_TARGET_OS_OSX */
1016 int malloc_no_cow
= 1;
1017 #endif /* XNU_TARGET_OS_OSX */
1018 uint64_t vm_memory_malloc_no_cow_mask
= 0ULL;
1020 int vm_check_map_sanity
= 0;
1026 * Initialize the vm_map module. Must be called before
1027 * any other vm_map routines.
1029 * Map and entry structures are allocated from zones -- we must
1030 * initialize those zones.
1032 * There are three zones of interest:
1034 * vm_map_zone: used to allocate maps.
1035 * vm_map_entry_zone: used to allocate map entries.
1036 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
1038 * The kernel allocates map entries from a special zone that is initially
1039 * "crammed" with memory. It would be difficult (perhaps impossible) for
1040 * the kernel to allocate more memory to a entry zone when it became
1041 * empty since the very act of allocating memory implies the creation
1048 const char *mez_name
= "VM map entries";
1052 PE_parse_boot_argn("debug4k_filter", &debug4k_filter
,
1053 sizeof(debug4k_filter
));
1054 #endif /* MACH_ASSERT */
1056 vm_map_zone
= zone_create_ext(VM_MAP_ZONE_NAME
, sizeof(struct _vm_map
),
1057 VM_MAP_ZFLAGS
, ZONE_ID_VM_MAP
, NULL
);
1059 vm_map_entry_zone
= zone_create(mez_name
, sizeof(struct vm_map_entry
),
1060 ZC_NOENCRYPT
| ZC_NOGZALLOC
| ZC_NOCALLOUT
);
1063 * Don't quarantine because we always need elements available
1064 * Disallow GC on this zone... to aid the GC.
1066 vm_map_entry_reserved_zone
= zone_create_ext(VME_RESERVED_ZONE_NAME
,
1067 sizeof(struct vm_map_entry
), VM_MAP_RESERVED_ZFLAGS
,
1070 vm_map_copy_zone
= zone_create_ext("VM map copies", sizeof(struct vm_map_copy
),
1071 ZC_NOENCRYPT
| ZC_CACHING
, ZONE_ID_VM_MAP_COPY
, NULL
);
1073 vm_map_holes_zone
= zone_create(VM_MAP_HOLES_ZONE_NAME
,
1074 sizeof(struct vm_map_links
), VM_MAP_HOLES_ZFLAGS
);
1077 * Add the stolen memory to zones, adjust zone size and stolen counts.
1079 zone_cram_foreign(vm_map_zone
, map_data
, map_data_size
);
1080 zone_cram_foreign(vm_map_entry_reserved_zone
, kentry_data
, kentry_data_size
);
1081 zone_cram_foreign(vm_map_holes_zone
, map_holes_data
, map_holes_data_size
);
1084 * Since these are covered by zones, remove them from stolen page accounting.
1086 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size
) + atop_64(kentry_data_size
) + atop_64(map_holes_data_size
));
1088 #if VM_MAP_DEBUG_APPLE_PROTECT
1089 PE_parse_boot_argn("vm_map_debug_apple_protect",
1090 &vm_map_debug_apple_protect
,
1091 sizeof(vm_map_debug_apple_protect
));
1092 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1093 #if VM_MAP_DEBUG_APPLE_FOURK
1094 PE_parse_boot_argn("vm_map_debug_fourk",
1095 &vm_map_debug_fourk
,
1096 sizeof(vm_map_debug_fourk
));
1097 #endif /* VM_MAP_DEBUG_FOURK */
1098 PE_parse_boot_argn("vm_map_executable_immutable",
1099 &vm_map_executable_immutable
,
1100 sizeof(vm_map_executable_immutable
));
1101 PE_parse_boot_argn("vm_map_executable_immutable_verbose",
1102 &vm_map_executable_immutable_verbose
,
1103 sizeof(vm_map_executable_immutable_verbose
));
1105 PE_parse_boot_argn("malloc_no_cow",
1107 sizeof(malloc_no_cow
));
1108 if (malloc_no_cow
) {
1109 vm_memory_malloc_no_cow_mask
= 0ULL;
1110 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC
;
1111 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_SMALL
;
1112 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_MEDIUM
;
1113 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_LARGE
;
1114 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1115 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1116 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_TINY
;
1117 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE
;
1118 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED
;
1119 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_NANO
;
1120 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1121 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1122 &vm_memory_malloc_no_cow_mask
,
1123 sizeof(vm_memory_malloc_no_cow_mask
));
1127 PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity
, sizeof(vm_check_map_sanity
));
1128 if (vm_check_map_sanity
) {
1129 kprintf("VM sanity checking enabled\n");
1131 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1135 #if DEVELOPMENT || DEBUG
1136 PE_parse_boot_argn("panic_on_unsigned_execute",
1137 &panic_on_unsigned_execute
,
1138 sizeof(panic_on_unsigned_execute
));
1139 #endif /* DEVELOPMENT || DEBUG */
1144 vm_map_steal_memory(void)
1146 uint16_t kentry_initial_pages
;
1147 uint16_t zone_foreign_pages
;
1149 map_data_size
= zone_get_foreign_alloc_size(VM_MAP_ZONE_NAME
,
1150 sizeof(struct _vm_map
), VM_MAP_ZFLAGS
, 1);
1153 * kentry_initial_pages corresponds to the number of kernel map entries
1154 * required during bootstrap until the asynchronous replenishment
1155 * scheme is activated and/or entries are available from the general
1158 #if defined(__LP64__)
1159 kentry_initial_pages
= (uint16_t)atop(16 * 4096);
1161 kentry_initial_pages
= 6;
1165 /* If using the guard allocator, reserve more memory for the kernel
1166 * reserved map entry pool.
1168 if (gzalloc_enabled()) {
1169 kentry_initial_pages
*= 1024;
1172 if (PE_parse_boot_argn("zone_foreign_pages", &zone_foreign_pages
,
1173 sizeof(zone_foreign_pages
))) {
1174 kentry_initial_pages
= zone_foreign_pages
;
1177 kentry_data_size
= zone_get_foreign_alloc_size(VME_RESERVED_ZONE_NAME
,
1178 sizeof(struct vm_map_entry
), VM_MAP_RESERVED_ZFLAGS
,
1179 kentry_initial_pages
);
1181 map_holes_data_size
= zone_get_foreign_alloc_size(VM_MAP_HOLES_ZONE_NAME
,
1182 sizeof(struct vm_map_links
), VM_MAP_HOLES_ZFLAGS
,
1183 kentry_initial_pages
);
1186 * Steal a contiguous range of memory so that a simple range check
1187 * can validate foreign addresses being freed/crammed to these
1190 vm_size_t total_size
;
1191 if (os_add3_overflow(map_data_size
, kentry_data_size
,
1192 map_holes_data_size
, &total_size
)) {
1193 panic("vm_map_steal_memory: overflow in amount of memory requested");
1195 map_data
= zone_foreign_mem_init(total_size
);
1196 kentry_data
= map_data
+ map_data_size
;
1197 map_holes_data
= kentry_data
+ kentry_data_size
;
1199 STARTUP(PMAP_STEAL
, STARTUP_RANK_FIRST
, vm_map_steal_memory
);
1201 boolean_t vm_map_supports_hole_optimization
= FALSE
;
1204 vm_kernel_reserved_entry_init(void)
1206 zone_replenish_configure(vm_map_entry_reserved_zone
);
1209 * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1211 zone_replenish_configure(vm_map_holes_zone
);
1212 vm_map_supports_hole_optimization
= TRUE
;
1216 vm_map_disable_hole_optimization(vm_map_t map
)
1218 vm_map_entry_t head_entry
, hole_entry
, next_hole_entry
;
1220 if (map
->holelistenabled
) {
1221 head_entry
= hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
1223 while (hole_entry
!= NULL
) {
1224 next_hole_entry
= hole_entry
->vme_next
;
1226 hole_entry
->vme_next
= NULL
;
1227 hole_entry
->vme_prev
= NULL
;
1228 zfree(vm_map_holes_zone
, hole_entry
);
1230 if (next_hole_entry
== head_entry
) {
1233 hole_entry
= next_hole_entry
;
1237 map
->holes_list
= NULL
;
1238 map
->holelistenabled
= FALSE
;
1240 map
->first_free
= vm_map_first_entry(map
);
1241 SAVE_HINT_HOLE_WRITE(map
, NULL
);
1246 vm_kernel_map_is_kernel(vm_map_t map
)
1248 return map
->pmap
== kernel_pmap
;
1254 * Creates and returns a new empty VM map with
1255 * the given physical map structure, and having
1256 * the given lower and upper address bounds.
1262 vm_map_offset_t min
,
1263 vm_map_offset_t max
,
1270 options
|= VM_MAP_CREATE_PAGEABLE
;
1272 return vm_map_create_options(pmap
, min
, max
, options
);
1276 vm_map_create_options(
1278 vm_map_offset_t min
,
1279 vm_map_offset_t max
,
1283 struct vm_map_links
*hole_entry
= NULL
;
1285 if (options
& ~(VM_MAP_CREATE_ALL_OPTIONS
)) {
1286 /* unknown option */
1290 result
= (vm_map_t
) zalloc(vm_map_zone
);
1291 if (result
== VM_MAP_NULL
) {
1292 panic("vm_map_create");
1295 vm_map_first_entry(result
) = vm_map_to_entry(result
);
1296 vm_map_last_entry(result
) = vm_map_to_entry(result
);
1297 result
->hdr
.nentries
= 0;
1298 if (options
& VM_MAP_CREATE_PAGEABLE
) {
1299 result
->hdr
.entries_pageable
= TRUE
;
1301 result
->hdr
.entries_pageable
= FALSE
;
1304 vm_map_store_init( &(result
->hdr
));
1306 result
->hdr
.page_shift
= PAGE_SHIFT
;
1309 result
->user_wire_limit
= MACH_VM_MAX_ADDRESS
; /* default limit is unlimited */
1310 result
->user_wire_size
= 0;
1311 #if XNU_TARGET_OS_OSX
1312 result
->vmmap_high_start
= 0;
1314 os_ref_init_count(&result
->map_refcnt
, &map_refgrp
, 1);
1315 result
->pmap
= pmap
;
1316 result
->min_offset
= min
;
1317 result
->max_offset
= max
;
1318 result
->wiring_required
= FALSE
;
1319 result
->no_zero_fill
= FALSE
;
1320 result
->mapped_in_other_pmaps
= FALSE
;
1321 result
->wait_for_space
= FALSE
;
1322 result
->switch_protect
= FALSE
;
1323 result
->disable_vmentry_reuse
= FALSE
;
1324 result
->map_disallow_data_exec
= FALSE
;
1325 result
->is_nested_map
= FALSE
;
1326 result
->map_disallow_new_exec
= FALSE
;
1327 result
->terminated
= FALSE
;
1328 result
->cs_enforcement
= FALSE
;
1329 result
->highest_entry_end
= 0;
1330 result
->first_free
= vm_map_to_entry(result
);
1331 result
->hint
= vm_map_to_entry(result
);
1332 result
->jit_entry_exists
= FALSE
;
1333 result
->is_alien
= FALSE
;
1334 result
->reserved_regions
= FALSE
;
1335 result
->single_jit
= FALSE
;
1337 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1338 if (options
& VM_MAP_CREATE_CORPSE_FOOTPRINT
) {
1339 result
->has_corpse_footprint
= TRUE
;
1340 result
->holelistenabled
= FALSE
;
1341 result
->vmmap_corpse_footprint
= NULL
;
1343 result
->has_corpse_footprint
= FALSE
;
1344 if (vm_map_supports_hole_optimization
) {
1345 hole_entry
= zalloc(vm_map_holes_zone
);
1347 hole_entry
->start
= min
;
1348 #if defined(__arm__) || defined(__arm64__)
1349 hole_entry
->end
= result
->max_offset
;
1351 hole_entry
->end
= (max
> (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
) ? max
: (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
;
1353 result
->holes_list
= result
->hole_hint
= hole_entry
;
1354 hole_entry
->prev
= hole_entry
->next
= CAST_TO_VM_MAP_ENTRY(hole_entry
);
1355 result
->holelistenabled
= TRUE
;
1357 result
->holelistenabled
= FALSE
;
1361 vm_map_lock_init(result
);
1362 lck_mtx_init_ext(&result
->s_lock
, &result
->s_lock_ext
, &vm_map_lck_grp
, &vm_map_lck_attr
);
1368 vm_map_adjusted_size(vm_map_t map
)
1370 struct vm_reserved_region
*regions
= NULL
;
1371 size_t num_regions
= 0;
1372 mach_vm_size_t reserved_size
= 0, map_size
= 0;
1374 if (map
== NULL
|| (map
->size
== 0)) {
1378 map_size
= map
->size
;
1380 if (map
->reserved_regions
== FALSE
|| !vm_map_is_exotic(map
) || map
->terminated
) {
1382 * No special reserved regions or not an exotic map or the task
1383 * is terminating and these special regions might have already
1389 num_regions
= ml_get_vm_reserved_regions(vm_map_is_64bit(map
), ®ions
);
1390 assert((num_regions
== 0) || (num_regions
> 0 && regions
!= NULL
));
1392 while (num_regions
) {
1393 reserved_size
+= regions
[--num_regions
].vmrr_size
;
1397 * There are a few places where the map is being switched out due to
1398 * 'termination' without that bit being set (e.g. exec and corpse purging).
1399 * In those cases, we could have the map's regions being deallocated on
1400 * a core while some accounting process is trying to get the map's size.
1401 * So this assert can't be enabled till all those places are uniform in
1402 * their use of the 'map->terminated' bit.
1404 * assert(map_size >= reserved_size);
1407 return (map_size
>= reserved_size
) ? (map_size
- reserved_size
) : map_size
;
1411 * vm_map_entry_create: [ internal use only ]
1413 * Allocates a VM map entry for insertion in the
1414 * given map (or map copy). No fields are filled.
1416 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1418 #define vm_map_copy_entry_create(copy, map_locked) \
1419 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1420 unsigned reserved_zalloc_count
, nonreserved_zalloc_count
;
1422 static vm_map_entry_t
1423 _vm_map_entry_create(
1424 struct vm_map_header
*map_header
, boolean_t __unused map_locked
)
1427 vm_map_entry_t entry
;
1429 zone
= vm_map_entry_zone
;
1431 assert(map_header
->entries_pageable
? !map_locked
: TRUE
);
1433 if (map_header
->entries_pageable
) {
1434 entry
= (vm_map_entry_t
) zalloc(zone
);
1436 entry
= (vm_map_entry_t
) zalloc_noblock(zone
);
1438 if (entry
== VM_MAP_ENTRY_NULL
) {
1439 zone
= vm_map_entry_reserved_zone
;
1440 entry
= (vm_map_entry_t
) zalloc(zone
);
1441 OSAddAtomic(1, &reserved_zalloc_count
);
1443 OSAddAtomic(1, &nonreserved_zalloc_count
);
1447 if (entry
== VM_MAP_ENTRY_NULL
) {
1448 panic("vm_map_entry_create");
1450 *entry
= vm_map_entry_template
;
1451 entry
->from_reserved_zone
= (zone
== vm_map_entry_reserved_zone
);
1453 vm_map_store_update((vm_map_t
) NULL
, entry
, VM_MAP_ENTRY_CREATE
);
1454 #if MAP_ENTRY_CREATION_DEBUG
1455 entry
->vme_creation_maphdr
= map_header
;
1456 backtrace(&entry
->vme_creation_bt
[0],
1457 (sizeof(entry
->vme_creation_bt
) / sizeof(uintptr_t)), NULL
);
1463 * vm_map_entry_dispose: [ internal use only ]
1465 * Inverse of vm_map_entry_create.
1467 * write map lock held so no need to
1468 * do anything special to insure correctness
1471 #define vm_map_entry_dispose(map, entry) \
1472 _vm_map_entry_dispose(&(map)->hdr, (entry))
1474 #define vm_map_copy_entry_dispose(copy, entry) \
1475 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1478 _vm_map_entry_dispose(
1479 struct vm_map_header
*map_header
,
1480 vm_map_entry_t entry
)
1484 if (map_header
->entries_pageable
|| !(entry
->from_reserved_zone
)) {
1485 zone
= vm_map_entry_zone
;
1487 zone
= vm_map_entry_reserved_zone
;
1490 if (!map_header
->entries_pageable
) {
1491 if (zone
== vm_map_entry_zone
) {
1492 OSAddAtomic(-1, &nonreserved_zalloc_count
);
1494 OSAddAtomic(-1, &reserved_zalloc_count
);
1502 static boolean_t first_free_check
= FALSE
;
1504 first_free_is_valid(
1507 if (!first_free_check
) {
1511 return first_free_is_valid_store( map
);
1513 #endif /* MACH_ASSERT */
1516 #define vm_map_copy_entry_link(copy, after_where, entry) \
1517 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1519 #define vm_map_copy_entry_unlink(copy, entry) \
1520 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1525 * Actually destroy a map.
1534 /* final cleanup: no need to unnest shared region */
1535 flags
|= VM_MAP_REMOVE_NO_UNNESTING
;
1536 /* final cleanup: ok to remove immutable mappings */
1537 flags
|= VM_MAP_REMOVE_IMMUTABLE
;
1538 /* final cleanup: allow gaps in range */
1539 flags
|= VM_MAP_REMOVE_GAPS_OK
;
1541 /* clean up regular map entries */
1542 (void) vm_map_delete(map
, map
->min_offset
, map
->max_offset
,
1543 flags
, VM_MAP_NULL
);
1544 /* clean up leftover special mappings (commpage, GPU carveout, etc...) */
1545 #if !defined(__arm__)
1546 (void) vm_map_delete(map
, 0x0, 0xFFFFFFFFFFFFF000ULL
,
1547 flags
, VM_MAP_NULL
);
1548 #endif /* !__arm__ */
1550 vm_map_disable_hole_optimization(map
);
1551 vm_map_corpse_footprint_destroy(map
);
1555 assert(map
->hdr
.nentries
== 0);
1558 pmap_destroy(map
->pmap
);
1561 if (vm_map_lck_attr
.lck_attr_val
& LCK_ATTR_DEBUG
) {
1563 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1564 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1565 * structure or kalloc'ed via lck_mtx_init.
1566 * An example is s_lock_ext within struct _vm_map.
1568 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1569 * can add another tag to detect embedded vs alloc'ed indirect external
1570 * mutexes but that'll be additional checks in the lock path and require
1571 * updating dependencies for the old vs new tag.
1573 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1574 * just when lock debugging is ON, we choose to forego explicitly destroying
1575 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1576 * count on vm_map_lck_grp, which has no serious side-effect.
1579 lck_rw_destroy(&(map
)->lock
, &vm_map_lck_grp
);
1580 lck_mtx_destroy(&(map
)->s_lock
, &vm_map_lck_grp
);
1583 zfree(vm_map_zone
, map
);
1587 * Returns pid of the task with the largest number of VM map entries.
1588 * Used in the zone-map-exhaustion jetsam path.
1591 find_largest_process_vm_map_entries(void)
1593 pid_t victim_pid
= -1;
1594 int max_vm_map_entries
= 0;
1595 task_t task
= TASK_NULL
;
1596 queue_head_t
*task_list
= &tasks
;
1598 lck_mtx_lock(&tasks_threads_lock
);
1599 queue_iterate(task_list
, task
, task_t
, tasks
) {
1600 if (task
== kernel_task
|| !task
->active
) {
1604 vm_map_t task_map
= task
->map
;
1605 if (task_map
!= VM_MAP_NULL
) {
1606 int task_vm_map_entries
= task_map
->hdr
.nentries
;
1607 if (task_vm_map_entries
> max_vm_map_entries
) {
1608 max_vm_map_entries
= task_vm_map_entries
;
1609 victim_pid
= pid_from_task(task
);
1613 lck_mtx_unlock(&tasks_threads_lock
);
1615 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid
, max_vm_map_entries
);
1621 * vm_map_lookup_entry: [ internal use only ]
1623 * Calls into the vm map store layer to find the map
1624 * entry containing (or immediately preceding) the
1625 * specified address in the given map; the entry is returned
1626 * in the "entry" parameter. The boolean
1627 * result indicates whether the address is
1628 * actually contained in the map.
1631 vm_map_lookup_entry(
1633 vm_map_offset_t address
,
1634 vm_map_entry_t
*entry
) /* OUT */
1636 return vm_map_store_lookup_entry( map
, address
, entry
);
1640 * Routine: vm_map_find_space
1642 * Allocate a range in the specified virtual address map,
1643 * returning the entry allocated for that range.
1644 * Used by kmem_alloc, etc.
1646 * The map must be NOT be locked. It will be returned locked
1647 * on KERN_SUCCESS, unlocked on failure.
1649 * If an entry is allocated, the object/offset fields
1650 * are initialized to zero.
1652 * If VM_MAP_FIND_LAST_FREE flag is set, allocate from end of map. This
1653 * is currently only used for allocating memory for zones backing
1654 * one of the kalloc heaps.(rdar://65832263)
1659 vm_map_offset_t
*address
, /* OUT */
1661 vm_map_offset_t mask
,
1663 vm_map_kernel_flags_t vmk_flags
,
1665 vm_map_entry_t
*o_entry
) /* OUT */
1667 vm_map_entry_t entry
, new_entry
, hole_entry
;
1668 vm_map_offset_t start
;
1669 vm_map_offset_t end
;
1673 return KERN_INVALID_ARGUMENT
;
1676 new_entry
= vm_map_entry_create(map
, FALSE
);
1679 if (flags
& VM_MAP_FIND_LAST_FREE
) {
1680 assert(!map
->disable_vmentry_reuse
);
1681 /* TODO: Make backward lookup generic and support guard pages */
1682 assert(!vmk_flags
.vmkf_guard_after
&& !vmk_flags
.vmkf_guard_before
);
1683 assert(VM_MAP_PAGE_ALIGNED(size
, VM_MAP_PAGE_MASK(map
)));
1685 /* Allocate space from end of map */
1686 vm_map_store_find_last_free(map
, &entry
);
1692 if (entry
== vm_map_to_entry(map
)) {
1693 end
= map
->max_offset
;
1695 end
= entry
->vme_start
;
1699 vm_map_entry_t prev
;
1703 if ((start
< map
->min_offset
) || end
< start
) {
1707 prev
= entry
->vme_prev
;
1710 if (prev
== vm_map_to_entry(map
)) {
1714 if (prev
->vme_end
<= start
) {
1719 * Didn't fit -- move to the next entry.
1722 end
= entry
->vme_start
;
1725 if (vmk_flags
.vmkf_guard_after
) {
1726 /* account for the back guard page in the size */
1727 size
+= VM_MAP_PAGE_SIZE(map
);
1731 * Look for the first possible address; if there's already
1732 * something at this address, we have to start after it.
1735 if (map
->disable_vmentry_reuse
== TRUE
) {
1736 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
1738 if (map
->holelistenabled
) {
1739 hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
1741 if (hole_entry
== NULL
) {
1743 * No more space in the map?
1749 start
= entry
->vme_start
;
1751 assert(first_free_is_valid(map
));
1752 if ((entry
= map
->first_free
) == vm_map_to_entry(map
)) {
1753 start
= map
->min_offset
;
1755 start
= entry
->vme_end
;
1761 * In any case, the "entry" always precedes
1762 * the proposed new region throughout the loop:
1766 vm_map_entry_t next
;
1769 * Find the end of the proposed new region.
1770 * Be sure we didn't go beyond the end, or
1771 * wrap around the address.
1774 if (vmk_flags
.vmkf_guard_before
) {
1775 /* reserve space for the front guard page */
1776 start
+= VM_MAP_PAGE_SIZE(map
);
1778 end
= ((start
+ mask
) & ~mask
);
1784 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
1786 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
1788 if ((end
> map
->max_offset
) || (end
< start
)) {
1792 next
= entry
->vme_next
;
1794 if (map
->holelistenabled
) {
1795 if (entry
->vme_end
>= end
) {
1800 * If there are no more entries, we must win.
1804 * If there is another entry, it must be
1805 * after the end of the potential new region.
1808 if (next
== vm_map_to_entry(map
)) {
1812 if (next
->vme_start
>= end
) {
1818 * Didn't fit -- move to the next entry.
1823 if (map
->holelistenabled
) {
1824 if (entry
== CAST_TO_VM_MAP_ENTRY(map
->holes_list
)) {
1830 start
= entry
->vme_start
;
1832 start
= entry
->vme_end
;
1836 if (vmk_flags
.vmkf_guard_before
) {
1837 /* go back for the front guard page */
1838 start
-= VM_MAP_PAGE_SIZE(map
);
1842 if (map
->holelistenabled
) {
1843 if (vm_map_lookup_entry(map
, entry
->vme_start
, &entry
)) {
1844 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry
, (unsigned long long)entry
->vme_start
);
1850 * "start" and "end" should define the endpoints of the
1851 * available new range, and
1852 * "entry" should refer to the region before the new
1855 * the map should be locked.
1860 assert(start
< end
);
1861 new_entry
->vme_start
= start
;
1862 new_entry
->vme_end
= end
;
1863 assert(page_aligned(new_entry
->vme_start
));
1864 assert(page_aligned(new_entry
->vme_end
));
1865 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_start
,
1866 VM_MAP_PAGE_MASK(map
)));
1867 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_end
,
1868 VM_MAP_PAGE_MASK(map
)));
1870 new_entry
->is_shared
= FALSE
;
1871 new_entry
->is_sub_map
= FALSE
;
1872 new_entry
->use_pmap
= TRUE
;
1873 VME_OBJECT_SET(new_entry
, VM_OBJECT_NULL
);
1874 VME_OFFSET_SET(new_entry
, (vm_object_offset_t
) 0);
1876 new_entry
->needs_copy
= FALSE
;
1878 new_entry
->inheritance
= VM_INHERIT_DEFAULT
;
1879 new_entry
->protection
= VM_PROT_DEFAULT
;
1880 new_entry
->max_protection
= VM_PROT_ALL
;
1881 new_entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
1882 new_entry
->wired_count
= 0;
1883 new_entry
->user_wired_count
= 0;
1885 new_entry
->in_transition
= FALSE
;
1886 new_entry
->needs_wakeup
= FALSE
;
1887 new_entry
->no_cache
= FALSE
;
1888 new_entry
->permanent
= FALSE
;
1889 new_entry
->superpage_size
= FALSE
;
1890 if (VM_MAP_PAGE_SHIFT(map
) != PAGE_SHIFT
) {
1891 new_entry
->map_aligned
= TRUE
;
1893 new_entry
->map_aligned
= FALSE
;
1896 new_entry
->used_for_jit
= FALSE
;
1897 new_entry
->pmap_cs_associated
= FALSE
;
1898 new_entry
->zero_wired_pages
= FALSE
;
1899 new_entry
->iokit_acct
= FALSE
;
1900 new_entry
->vme_resilient_codesign
= FALSE
;
1901 new_entry
->vme_resilient_media
= FALSE
;
1902 if (vmk_flags
.vmkf_atomic_entry
) {
1903 new_entry
->vme_atomic
= TRUE
;
1905 new_entry
->vme_atomic
= FALSE
;
1908 VME_ALIAS_SET(new_entry
, tag
);
1911 * Insert the new entry into the list
1914 vm_map_store_entry_link(map
, entry
, new_entry
, VM_MAP_KERNEL_FLAGS_NONE
);
1919 * Update the lookup hint
1921 SAVE_HINT_MAP_WRITE(map
, new_entry
);
1923 *o_entry
= new_entry
;
1924 return KERN_SUCCESS
;
1928 vm_map_entry_dispose(map
, new_entry
);
1930 return KERN_NO_SPACE
;
1933 int vm_map_pmap_enter_print
= FALSE
;
1934 int vm_map_pmap_enter_enable
= FALSE
;
1937 * Routine: vm_map_pmap_enter [internal only]
1940 * Force pages from the specified object to be entered into
1941 * the pmap at the specified address if they are present.
1942 * As soon as a page not found in the object the scan ends.
1947 * In/out conditions:
1948 * The source map should not be locked on entry.
1950 __unused
static void
1953 vm_map_offset_t addr
,
1954 vm_map_offset_t end_addr
,
1956 vm_object_offset_t offset
,
1957 vm_prot_t protection
)
1961 struct vm_object_fault_info fault_info
= {};
1963 if (map
->pmap
== 0) {
1967 assert(VM_MAP_PAGE_SHIFT(map
) == PAGE_SHIFT
);
1969 while (addr
< end_addr
) {
1975 * From vm_map_enter(), we come into this function without the map
1976 * lock held or the object lock held.
1977 * We haven't taken a reference on the object either.
1978 * We should do a proper lookup on the map to make sure
1979 * that things are sane before we go locking objects that
1980 * could have been deallocated from under us.
1983 vm_object_lock(object
);
1985 m
= vm_page_lookup(object
, offset
);
1987 if (m
== VM_PAGE_NULL
|| m
->vmp_busy
|| m
->vmp_fictitious
||
1988 (m
->vmp_unusual
&& (m
->vmp_error
|| m
->vmp_restart
|| m
->vmp_absent
))) {
1989 vm_object_unlock(object
);
1993 if (vm_map_pmap_enter_print
) {
1994 printf("vm_map_pmap_enter:");
1995 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1996 map
, (unsigned long long)addr
, object
, (unsigned long long)offset
);
1998 type_of_fault
= DBG_CACHE_HIT_FAULT
;
1999 kr
= vm_fault_enter(m
, map
->pmap
,
2002 protection
, protection
,
2004 FALSE
, /* change_wiring */
2005 VM_KERN_MEMORY_NONE
, /* tag - not wiring */
2007 NULL
, /* need_retry */
2010 vm_object_unlock(object
);
2012 offset
+= PAGE_SIZE_64
;
2017 boolean_t
vm_map_pmap_is_empty(
2019 vm_map_offset_t start
,
2020 vm_map_offset_t end
);
2022 vm_map_pmap_is_empty(
2024 vm_map_offset_t start
,
2025 vm_map_offset_t end
)
2027 #ifdef MACHINE_PMAP_IS_EMPTY
2028 return pmap_is_empty(map
->pmap
, start
, end
);
2029 #else /* MACHINE_PMAP_IS_EMPTY */
2030 vm_map_offset_t offset
;
2033 if (map
->pmap
== NULL
) {
2037 for (offset
= start
;
2039 offset
+= PAGE_SIZE
) {
2040 phys_page
= pmap_find_phys(map
->pmap
, offset
);
2042 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
2043 "page %d at 0x%llx\n",
2044 map
, (long long)start
, (long long)end
,
2045 phys_page
, (long long)offset
);
2050 #endif /* MACHINE_PMAP_IS_EMPTY */
2053 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2055 vm_map_random_address_for_size(
2057 vm_map_offset_t
*address
,
2060 kern_return_t kr
= KERN_SUCCESS
;
2062 vm_map_offset_t random_addr
= 0;
2063 vm_map_offset_t hole_end
;
2065 vm_map_entry_t next_entry
= VM_MAP_ENTRY_NULL
;
2066 vm_map_entry_t prev_entry
= VM_MAP_ENTRY_NULL
;
2067 vm_map_size_t vm_hole_size
= 0;
2068 vm_map_size_t addr_space_size
;
2070 addr_space_size
= vm_map_max(map
) - vm_map_min(map
);
2072 assert(VM_MAP_PAGE_ALIGNED(size
, VM_MAP_PAGE_MASK(map
)));
2074 while (tries
< MAX_TRIES_TO_GET_RANDOM_ADDRESS
) {
2075 if (startup_phase
< STARTUP_SUB_ZALLOC
) {
2076 random_addr
= (vm_map_offset_t
)early_random();
2078 random_addr
= (vm_map_offset_t
)random();
2080 random_addr
<<= VM_MAP_PAGE_SHIFT(map
);
2081 random_addr
= vm_map_trunc_page(
2082 vm_map_min(map
) + (random_addr
% addr_space_size
),
2083 VM_MAP_PAGE_MASK(map
));
2085 if (vm_map_lookup_entry(map
, random_addr
, &prev_entry
) == FALSE
) {
2086 if (prev_entry
== vm_map_to_entry(map
)) {
2087 next_entry
= vm_map_first_entry(map
);
2089 next_entry
= prev_entry
->vme_next
;
2091 if (next_entry
== vm_map_to_entry(map
)) {
2092 hole_end
= vm_map_max(map
);
2094 hole_end
= next_entry
->vme_start
;
2096 vm_hole_size
= hole_end
- random_addr
;
2097 if (vm_hole_size
>= size
) {
2098 *address
= random_addr
;
2105 if (tries
== MAX_TRIES_TO_GET_RANDOM_ADDRESS
) {
2112 vm_memory_malloc_no_cow(
2115 uint64_t alias_mask
;
2121 alias_mask
= 1ULL << alias
;
2122 if (alias_mask
& vm_memory_malloc_no_cow_mask
) {
2129 * Routine: vm_map_enter
2132 * Allocate a range in the specified virtual address map.
2133 * The resulting range will refer to memory defined by
2134 * the given memory object and offset into that object.
2136 * Arguments are as defined in the vm_map call.
2138 static unsigned int vm_map_enter_restore_successes
= 0;
2139 static unsigned int vm_map_enter_restore_failures
= 0;
2143 vm_map_offset_t
*address
, /* IN/OUT */
2145 vm_map_offset_t mask
,
2147 vm_map_kernel_flags_t vmk_flags
,
2150 vm_object_offset_t offset
,
2151 boolean_t needs_copy
,
2152 vm_prot_t cur_protection
,
2153 vm_prot_t max_protection
,
2154 vm_inherit_t inheritance
)
2156 vm_map_entry_t entry
, new_entry
;
2157 vm_map_offset_t start
, tmp_start
, tmp_offset
;
2158 vm_map_offset_t end
, tmp_end
;
2159 vm_map_offset_t tmp2_start
, tmp2_end
;
2160 vm_map_offset_t desired_empty_end
;
2161 vm_map_offset_t step
;
2162 kern_return_t result
= KERN_SUCCESS
;
2163 vm_map_t zap_old_map
= VM_MAP_NULL
;
2164 vm_map_t zap_new_map
= VM_MAP_NULL
;
2165 boolean_t map_locked
= FALSE
;
2166 boolean_t pmap_empty
= TRUE
;
2167 boolean_t new_mapping_established
= FALSE
;
2168 boolean_t keep_map_locked
= vmk_flags
.vmkf_keep_map_locked
;
2169 boolean_t anywhere
= ((flags
& VM_FLAGS_ANYWHERE
) != 0);
2170 boolean_t purgable
= ((flags
& VM_FLAGS_PURGABLE
) != 0);
2171 boolean_t overwrite
= ((flags
& VM_FLAGS_OVERWRITE
) != 0);
2172 boolean_t no_cache
= ((flags
& VM_FLAGS_NO_CACHE
) != 0);
2173 boolean_t is_submap
= vmk_flags
.vmkf_submap
;
2174 boolean_t permanent
= (((flags
& VM_FLAGS_PERMANENT
) != 0) || vmk_flags
.vmkf_permanent
);
2175 boolean_t no_copy_on_read
= vmk_flags
.vmkf_no_copy_on_read
;
2176 boolean_t entry_for_jit
= vmk_flags
.vmkf_map_jit
;
2177 boolean_t iokit_acct
= vmk_flags
.vmkf_iokit_acct
;
2178 boolean_t translated_allow_execute
= vmk_flags
.vmkf_translated_allow_execute
;
2179 boolean_t resilient_codesign
= ((flags
& VM_FLAGS_RESILIENT_CODESIGN
) != 0);
2180 boolean_t resilient_media
= ((flags
& VM_FLAGS_RESILIENT_MEDIA
) != 0);
2181 boolean_t random_address
= ((flags
& VM_FLAGS_RANDOM_ADDR
) != 0);
2182 unsigned int superpage_size
= ((flags
& VM_FLAGS_SUPERPAGE_MASK
) >> VM_FLAGS_SUPERPAGE_SHIFT
);
2183 vm_tag_t user_alias
;
2184 vm_map_offset_t effective_min_offset
, effective_max_offset
;
2186 boolean_t clear_map_aligned
= FALSE
;
2187 vm_map_entry_t hole_entry
;
2188 vm_map_size_t chunk_size
= 0;
2190 assertf(vmk_flags
.__vmkf_unused
== 0, "vmk_flags unused=0x%x\n", vmk_flags
.__vmkf_unused
);
2192 if (flags
& VM_FLAGS_4GB_CHUNK
) {
2193 #if defined(__LP64__)
2194 chunk_size
= (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2195 #else /* __LP64__ */
2196 chunk_size
= ANON_CHUNK_SIZE
;
2197 #endif /* __LP64__ */
2199 chunk_size
= ANON_CHUNK_SIZE
;
2202 if (superpage_size
) {
2203 switch (superpage_size
) {
2205 * Note that the current implementation only supports
2206 * a single size for superpages, SUPERPAGE_SIZE, per
2207 * architecture. As soon as more sizes are supposed
2208 * to be supported, SUPERPAGE_SIZE has to be replaced
2209 * with a lookup of the size depending on superpage_size.
2212 case SUPERPAGE_SIZE_ANY
:
2213 /* handle it like 2 MB and round up to page size */
2214 size
= (size
+ 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2216 case SUPERPAGE_SIZE_2MB
:
2220 return KERN_INVALID_ARGUMENT
;
2222 mask
= SUPERPAGE_SIZE
- 1;
2223 if (size
& (SUPERPAGE_SIZE
- 1)) {
2224 return KERN_INVALID_ARGUMENT
;
2226 inheritance
= VM_INHERIT_NONE
; /* fork() children won't inherit superpages */
2230 if ((cur_protection
& VM_PROT_WRITE
) &&
2231 (cur_protection
& VM_PROT_EXECUTE
) &&
2232 #if XNU_TARGET_OS_OSX
2233 map
->pmap
!= kernel_pmap
&&
2234 (cs_process_global_enforcement() ||
2235 (vmk_flags
.vmkf_cs_enforcement_override
2236 ? vmk_flags
.vmkf_cs_enforcement
2237 : (vm_map_cs_enforcement(map
)
2239 || !VM_MAP_IS_EXOTIC(map
)
2240 #endif /* __arm64__ */
2242 #endif /* XNU_TARGET_OS_OSX */
2243 (VM_MAP_POLICY_WX_FAIL(map
) ||
2244 VM_MAP_POLICY_WX_STRIP_X(map
)) &&
2246 boolean_t vm_protect_wx_fail
= VM_MAP_POLICY_WX_FAIL(map
);
2251 vm_prot_t
, cur_protection
);
2252 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. %s\n",
2254 (current_task()->bsd_info
2255 ? proc_name_address(current_task()->bsd_info
)
2258 (vm_protect_wx_fail
? "failing" : "turning off execute"));
2259 cur_protection
&= ~VM_PROT_EXECUTE
;
2260 if (vm_protect_wx_fail
) {
2261 return KERN_PROTECTION_FAILURE
;
2266 * If the task has requested executable lockdown,
2267 * deny any new executable mapping.
2269 if (map
->map_disallow_new_exec
== TRUE
) {
2270 if (cur_protection
& VM_PROT_EXECUTE
) {
2271 return KERN_PROTECTION_FAILURE
;
2275 if (resilient_codesign
) {
2277 int reject_prot
= (needs_copy
? VM_PROT_EXECUTE
: (VM_PROT_WRITE
| VM_PROT_EXECUTE
));
2278 if ((cur_protection
| max_protection
) & reject_prot
) {
2279 return KERN_PROTECTION_FAILURE
;
2283 if (resilient_media
) {
2285 // assert(!needs_copy);
2286 if (object
!= VM_OBJECT_NULL
&&
2287 !object
->internal
) {
2289 * This mapping is directly backed by an external
2290 * memory manager (e.g. a vnode pager for a file):
2291 * we would not have any safe place to inject
2292 * a zero-filled page if an actual page is not
2293 * available, without possibly impacting the actual
2294 * contents of the mapped object (e.g. the file),
2295 * so we can't provide any media resiliency here.
2297 return KERN_INVALID_ARGUMENT
;
2303 /* submaps can not be purgeable */
2304 return KERN_INVALID_ARGUMENT
;
2306 if (object
== VM_OBJECT_NULL
) {
2307 /* submaps can not be created lazily */
2308 return KERN_INVALID_ARGUMENT
;
2311 if (vmk_flags
.vmkf_already
) {
2313 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2314 * is already present. For it to be meaningul, the requested
2315 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2316 * we shouldn't try and remove what was mapped there first
2317 * (!VM_FLAGS_OVERWRITE).
2319 if ((flags
& VM_FLAGS_ANYWHERE
) ||
2320 (flags
& VM_FLAGS_OVERWRITE
)) {
2321 return KERN_INVALID_ARGUMENT
;
2325 effective_min_offset
= map
->min_offset
;
2327 if (vmk_flags
.vmkf_beyond_max
) {
2329 * Allow an insertion beyond the map's max offset.
2331 #if !defined(__arm__)
2332 if (vm_map_is_64bit(map
)) {
2333 effective_max_offset
= 0xFFFFFFFFFFFFF000ULL
;
2335 #endif /* __arm__ */
2336 effective_max_offset
= 0x00000000FFFFF000ULL
;
2338 #if XNU_TARGET_OS_OSX
2339 if (__improbable(vmk_flags
.vmkf_32bit_map_va
)) {
2340 effective_max_offset
= MIN(map
->max_offset
, 0x00000000FFFFF000ULL
);
2342 effective_max_offset
= map
->max_offset
;
2344 #else /* XNU_TARGET_OS_OSX */
2345 effective_max_offset
= map
->max_offset
;
2346 #endif /* XNU_TARGET_OS_OSX */
2350 (offset
& MIN(VM_MAP_PAGE_MASK(map
), PAGE_MASK_64
)) != 0) {
2352 return KERN_INVALID_ARGUMENT
;
2355 if (map
->pmap
== kernel_pmap
) {
2356 user_alias
= VM_KERN_MEMORY_NONE
;
2361 if (user_alias
== VM_MEMORY_MALLOC_MEDIUM
) {
2362 chunk_size
= MALLOC_MEDIUM_CHUNK_SIZE
;
2365 #define RETURN(value) { result = value; goto BailOut; }
2367 assertf(VM_MAP_PAGE_ALIGNED(*address
, FOURK_PAGE_MASK
), "0x%llx", (uint64_t)*address
);
2368 assertf(VM_MAP_PAGE_ALIGNED(size
, FOURK_PAGE_MASK
), "0x%llx", (uint64_t)size
);
2369 if (VM_MAP_PAGE_MASK(map
) >= PAGE_MASK
) {
2370 assertf(page_aligned(*address
), "0x%llx", (uint64_t)*address
);
2371 assertf(page_aligned(size
), "0x%llx", (uint64_t)size
);
2374 if (VM_MAP_PAGE_MASK(map
) >= PAGE_MASK
&&
2375 !VM_MAP_PAGE_ALIGNED(size
, VM_MAP_PAGE_MASK(map
))) {
2377 * In most cases, the caller rounds the size up to the
2379 * If we get a size that is explicitly not map-aligned here,
2380 * we'll have to respect the caller's wish and mark the
2381 * mapping as "not map-aligned" to avoid tripping the
2382 * map alignment checks later.
2384 clear_map_aligned
= TRUE
;
2387 VM_MAP_PAGE_MASK(map
) >= PAGE_MASK
&&
2388 !VM_MAP_PAGE_ALIGNED(*address
, VM_MAP_PAGE_MASK(map
))) {
2390 * We've been asked to map at a fixed address and that
2391 * address is not aligned to the map's specific alignment.
2392 * The caller should know what it's doing (i.e. most likely
2393 * mapping some fragmented copy map, transferring memory from
2394 * a VM map with a different alignment), so clear map_aligned
2395 * for this new VM map entry and proceed.
2397 clear_map_aligned
= TRUE
;
2401 * Only zero-fill objects are allowed to be purgable.
2402 * LP64todo - limit purgable objects to 32-bits for now
2406 (object
!= VM_OBJECT_NULL
&&
2407 (object
->vo_size
!= size
||
2408 object
->purgable
== VM_PURGABLE_DENY
))
2409 || size
> ANON_MAX_SIZE
)) { /* LP64todo: remove when dp capable */
2410 return KERN_INVALID_ARGUMENT
;
2413 if (!anywhere
&& overwrite
) {
2415 * Create a temporary VM map to hold the old mappings in the
2416 * affected area while we create the new one.
2417 * This avoids releasing the VM map lock in
2418 * vm_map_entry_delete() and allows atomicity
2419 * when we want to replace some mappings with a new one.
2420 * It also allows us to restore the old VM mappings if the
2421 * new mapping fails.
2423 zap_old_map
= vm_map_create(PMAP_NULL
,
2426 map
->hdr
.entries_pageable
);
2427 vm_map_set_page_shift(zap_old_map
, VM_MAP_PAGE_SHIFT(map
));
2428 vm_map_disable_hole_optimization(zap_old_map
);
2439 if (entry_for_jit
) {
2440 if (map
->jit_entry_exists
&&
2441 !VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map
)) {
2442 result
= KERN_INVALID_ARGUMENT
;
2445 if (VM_MAP_POLICY_ALLOW_JIT_RANDOM_ADDRESS(map
)) {
2446 random_address
= TRUE
;
2450 if (random_address
) {
2452 * Get a random start address.
2454 result
= vm_map_random_address_for_size(map
, address
, size
);
2455 if (result
!= KERN_SUCCESS
) {
2460 #if XNU_TARGET_OS_OSX
2461 else if ((start
== 0 || start
== vm_map_min(map
)) &&
2462 !map
->disable_vmentry_reuse
&&
2463 map
->vmmap_high_start
!= 0) {
2464 start
= map
->vmmap_high_start
;
2466 #endif /* XNU_TARGET_OS_OSX */
2470 * Calculate the first possible address.
2473 if (start
< effective_min_offset
) {
2474 start
= effective_min_offset
;
2476 if (start
> effective_max_offset
) {
2477 RETURN(KERN_NO_SPACE
);
2481 * Look for the first possible address;
2482 * if there's already something at this
2483 * address, we have to start after it.
2486 if (map
->disable_vmentry_reuse
== TRUE
) {
2487 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
2489 if (map
->holelistenabled
) {
2490 hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
2492 if (hole_entry
== NULL
) {
2494 * No more space in the map?
2496 result
= KERN_NO_SPACE
;
2499 boolean_t found_hole
= FALSE
;
2502 if (hole_entry
->vme_start
>= start
) {
2503 start
= hole_entry
->vme_start
;
2508 if (hole_entry
->vme_end
> start
) {
2512 hole_entry
= hole_entry
->vme_next
;
2513 } while (hole_entry
!= CAST_TO_VM_MAP_ENTRY(map
->holes_list
));
2515 if (found_hole
== FALSE
) {
2516 result
= KERN_NO_SPACE
;
2523 start
+= PAGE_SIZE_64
;
2527 assert(first_free_is_valid(map
));
2529 entry
= map
->first_free
;
2531 if (entry
== vm_map_to_entry(map
)) {
2534 if (entry
->vme_next
== vm_map_to_entry(map
)) {
2536 * Hole at the end of the map.
2540 if (start
< (entry
->vme_next
)->vme_start
) {
2541 start
= entry
->vme_end
;
2542 start
= vm_map_round_page(start
,
2543 VM_MAP_PAGE_MASK(map
));
2546 * Need to do a lookup.
2553 if (entry
== NULL
) {
2554 vm_map_entry_t tmp_entry
;
2555 if (vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
2556 assert(!entry_for_jit
);
2557 start
= tmp_entry
->vme_end
;
2558 start
= vm_map_round_page(start
,
2559 VM_MAP_PAGE_MASK(map
));
2567 * In any case, the "entry" always precedes
2568 * the proposed new region throughout the
2573 vm_map_entry_t next
;
2576 * Find the end of the proposed new region.
2577 * Be sure we didn't go beyond the end, or
2578 * wrap around the address.
2581 end
= ((start
+ mask
) & ~mask
);
2582 end
= vm_map_round_page(end
,
2583 VM_MAP_PAGE_MASK(map
));
2585 RETURN(KERN_NO_SPACE
);
2588 assert(VM_MAP_PAGE_ALIGNED(start
,
2589 VM_MAP_PAGE_MASK(map
)));
2592 /* We want an entire page of empty space, but don't increase the allocation size. */
2593 desired_empty_end
= vm_map_round_page(end
, VM_MAP_PAGE_MASK(map
));
2595 if ((desired_empty_end
> effective_max_offset
) || (desired_empty_end
< start
)) {
2596 if (map
->wait_for_space
) {
2597 assert(!keep_map_locked
);
2598 if (size
<= (effective_max_offset
-
2599 effective_min_offset
)) {
2600 assert_wait((event_t
)map
,
2604 thread_block(THREAD_CONTINUE_NULL
);
2608 RETURN(KERN_NO_SPACE
);
2611 next
= entry
->vme_next
;
2613 if (map
->holelistenabled
) {
2614 if (entry
->vme_end
>= desired_empty_end
) {
2619 * If there are no more entries, we must win.
2623 * If there is another entry, it must be
2624 * after the end of the potential new region.
2627 if (next
== vm_map_to_entry(map
)) {
2631 if (next
->vme_start
>= desired_empty_end
) {
2637 * Didn't fit -- move to the next entry.
2642 if (map
->holelistenabled
) {
2643 if (entry
== CAST_TO_VM_MAP_ENTRY(map
->holes_list
)) {
2647 result
= KERN_NO_SPACE
;
2650 start
= entry
->vme_start
;
2652 start
= entry
->vme_end
;
2655 start
= vm_map_round_page(start
,
2656 VM_MAP_PAGE_MASK(map
));
2659 if (map
->holelistenabled
) {
2660 if (vm_map_lookup_entry(map
, entry
->vme_start
, &entry
)) {
2661 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry
, (unsigned long long)entry
->vme_start
);
2666 assert(VM_MAP_PAGE_ALIGNED(*address
,
2667 VM_MAP_PAGE_MASK(map
)));
2669 if (VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
&&
2671 user_alias
== VM_MEMORY_REALLOC
) {
2673 * Force realloc() to switch to a new allocation,
2674 * to prevent 4k-fragmented virtual ranges.
2676 // DEBUG4K_ERROR("no realloc in place");
2677 return KERN_NO_SPACE
;
2682 * the address doesn't itself violate
2683 * the mask requirement.
2688 if ((start
& mask
) != 0) {
2689 RETURN(KERN_NO_SPACE
);
2693 * ... the address is within bounds
2698 if ((start
< effective_min_offset
) ||
2699 (end
> effective_max_offset
) ||
2701 RETURN(KERN_INVALID_ADDRESS
);
2704 if (overwrite
&& zap_old_map
!= VM_MAP_NULL
) {
2707 * Fixed mapping and "overwrite" flag: attempt to
2708 * remove all existing mappings in the specified
2709 * address range, saving them in our "zap_old_map".
2711 remove_flags
= VM_MAP_REMOVE_SAVE_ENTRIES
;
2712 remove_flags
|= VM_MAP_REMOVE_NO_MAP_ALIGN
;
2713 if (vmk_flags
.vmkf_overwrite_immutable
) {
2714 /* we can overwrite immutable mappings */
2715 remove_flags
|= VM_MAP_REMOVE_IMMUTABLE
;
2717 (void) vm_map_delete(map
, start
, end
,
2723 * ... the starting address isn't allocated
2726 if (vm_map_lookup_entry(map
, start
, &entry
)) {
2727 if (!(vmk_flags
.vmkf_already
)) {
2728 RETURN(KERN_NO_SPACE
);
2731 * Check if what's already there is what we want.
2734 tmp_offset
= offset
;
2735 if (entry
->vme_start
< start
) {
2736 tmp_start
-= start
- entry
->vme_start
;
2737 tmp_offset
-= start
- entry
->vme_start
;
2739 for (; entry
->vme_start
< end
;
2740 entry
= entry
->vme_next
) {
2742 * Check if the mapping's attributes
2743 * match the existing map entry.
2745 if (entry
== vm_map_to_entry(map
) ||
2746 entry
->vme_start
!= tmp_start
||
2747 entry
->is_sub_map
!= is_submap
||
2748 VME_OFFSET(entry
) != tmp_offset
||
2749 entry
->needs_copy
!= needs_copy
||
2750 entry
->protection
!= cur_protection
||
2751 entry
->max_protection
!= max_protection
||
2752 entry
->inheritance
!= inheritance
||
2753 entry
->iokit_acct
!= iokit_acct
||
2754 VME_ALIAS(entry
) != alias
) {
2755 /* not the same mapping ! */
2756 RETURN(KERN_NO_SPACE
);
2759 * Check if the same object is being mapped.
2762 if (VME_SUBMAP(entry
) !=
2763 (vm_map_t
) object
) {
2764 /* not the same submap */
2765 RETURN(KERN_NO_SPACE
);
2768 if (VME_OBJECT(entry
) != object
) {
2769 /* not the same VM object... */
2772 obj2
= VME_OBJECT(entry
);
2773 if ((obj2
== VM_OBJECT_NULL
||
2775 (object
== VM_OBJECT_NULL
||
2776 object
->internal
)) {
2783 RETURN(KERN_NO_SPACE
);
2788 tmp_offset
+= entry
->vme_end
- entry
->vme_start
;
2789 tmp_start
+= entry
->vme_end
- entry
->vme_start
;
2790 if (entry
->vme_end
>= end
) {
2791 /* reached the end of our mapping */
2795 /* it all matches: let's use what's already there ! */
2796 RETURN(KERN_MEMORY_PRESENT
);
2800 * ... the next region doesn't overlap the
2804 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
2805 (entry
->vme_next
->vme_start
< end
)) {
2806 RETURN(KERN_NO_SPACE
);
2812 * "start" and "end" should define the endpoints of the
2813 * available new range, and
2814 * "entry" should refer to the region before the new
2817 * the map should be locked.
2821 * See whether we can avoid creating a new entry (and object) by
2822 * extending one of our neighbors. [So far, we only attempt to
2823 * extend from below.] Note that we can never extend/join
2824 * purgable objects because they need to remain distinct
2825 * entities in order to implement their "volatile object"
2831 vm_memory_malloc_no_cow(user_alias
)) {
2832 if (object
== VM_OBJECT_NULL
) {
2833 object
= vm_object_allocate(size
);
2834 object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
2835 object
->true_share
= FALSE
;
2838 object
->purgable
= VM_PURGABLE_NONVOLATILE
;
2839 if (map
->pmap
== kernel_pmap
) {
2841 * Purgeable mappings made in a kernel
2842 * map are "owned" by the kernel itself
2843 * rather than the current user task
2844 * because they're likely to be used by
2845 * more than this user task (see
2846 * execargs_purgeable_allocate(), for
2849 owner
= kernel_task
;
2851 owner
= current_task();
2853 assert(object
->vo_owner
== NULL
);
2854 assert(object
->resident_page_count
== 0);
2855 assert(object
->wired_page_count
== 0);
2856 vm_object_lock(object
);
2857 vm_purgeable_nonvolatile_enqueue(object
, owner
);
2858 vm_object_unlock(object
);
2860 offset
= (vm_object_offset_t
)0;
2862 } else if (VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
) {
2863 /* no coalescing if address space uses sub-pages */
2864 } else if ((is_submap
== FALSE
) &&
2865 (object
== VM_OBJECT_NULL
) &&
2866 (entry
!= vm_map_to_entry(map
)) &&
2867 (entry
->vme_end
== start
) &&
2868 (!entry
->is_shared
) &&
2869 (!entry
->is_sub_map
) &&
2870 (!entry
->in_transition
) &&
2871 (!entry
->needs_wakeup
) &&
2872 (entry
->behavior
== VM_BEHAVIOR_DEFAULT
) &&
2873 (entry
->protection
== cur_protection
) &&
2874 (entry
->max_protection
== max_protection
) &&
2875 (entry
->inheritance
== inheritance
) &&
2876 ((user_alias
== VM_MEMORY_REALLOC
) ||
2877 (VME_ALIAS(entry
) == alias
)) &&
2878 (entry
->no_cache
== no_cache
) &&
2879 (entry
->permanent
== permanent
) &&
2880 /* no coalescing for immutable executable mappings */
2881 !((entry
->protection
& VM_PROT_EXECUTE
) &&
2882 entry
->permanent
) &&
2883 (!entry
->superpage_size
&& !superpage_size
) &&
2885 * No coalescing if not map-aligned, to avoid propagating
2886 * that condition any further than needed:
2888 (!entry
->map_aligned
|| !clear_map_aligned
) &&
2889 (!entry
->zero_wired_pages
) &&
2890 (!entry
->used_for_jit
&& !entry_for_jit
) &&
2891 (!entry
->pmap_cs_associated
) &&
2892 (entry
->iokit_acct
== iokit_acct
) &&
2893 (!entry
->vme_resilient_codesign
) &&
2894 (!entry
->vme_resilient_media
) &&
2895 (!entry
->vme_atomic
) &&
2896 (entry
->vme_no_copy_on_read
== no_copy_on_read
) &&
2898 ((entry
->vme_end
- entry
->vme_start
) + size
<=
2899 (user_alias
== VM_MEMORY_REALLOC
?
2901 NO_COALESCE_LIMIT
)) &&
2903 (entry
->wired_count
== 0)) { /* implies user_wired_count == 0 */
2904 if (vm_object_coalesce(VME_OBJECT(entry
),
2907 (vm_object_offset_t
) 0,
2908 (vm_map_size_t
)(entry
->vme_end
- entry
->vme_start
),
2909 (vm_map_size_t
)(end
- entry
->vme_end
))) {
2911 * Coalesced the two objects - can extend
2912 * the previous map entry to include the
2915 map
->size
+= (end
- entry
->vme_end
);
2916 assert(entry
->vme_start
< end
);
2917 assert(VM_MAP_PAGE_ALIGNED(end
,
2918 VM_MAP_PAGE_MASK(map
)));
2919 if (__improbable(vm_debug_events
)) {
2920 DTRACE_VM5(map_entry_extend
, vm_map_t
, map
, vm_map_entry_t
, entry
, vm_address_t
, entry
->vme_start
, vm_address_t
, entry
->vme_end
, vm_address_t
, end
);
2922 entry
->vme_end
= end
;
2923 if (map
->holelistenabled
) {
2924 vm_map_store_update_first_free(map
, entry
, TRUE
);
2926 vm_map_store_update_first_free(map
, map
->first_free
, TRUE
);
2928 new_mapping_established
= TRUE
;
2929 RETURN(KERN_SUCCESS
);
2933 step
= superpage_size
? SUPERPAGE_SIZE
: (end
- start
);
2936 for (tmp2_start
= start
; tmp2_start
< end
; tmp2_start
+= step
) {
2937 tmp2_end
= tmp2_start
+ step
;
2939 * Create a new entry
2942 * The reserved "page zero" in each process's address space can
2943 * be arbitrarily large. Splitting it into separate objects and
2944 * therefore different VM map entries serves no purpose and just
2945 * slows down operations on the VM map, so let's not split the
2946 * allocation into chunks if the max protection is NONE. That
2947 * memory should never be accessible, so it will never get to the
2950 tmp_start
= tmp2_start
;
2951 if (object
== VM_OBJECT_NULL
&&
2952 size
> chunk_size
&&
2953 max_protection
!= VM_PROT_NONE
&&
2954 superpage_size
== 0) {
2955 tmp_end
= tmp_start
+ chunk_size
;
2960 new_entry
= vm_map_entry_insert(map
,
2961 entry
, tmp_start
, tmp_end
,
2962 object
, offset
, vmk_flags
,
2963 needs_copy
, FALSE
, FALSE
,
2964 cur_protection
, max_protection
,
2965 VM_BEHAVIOR_DEFAULT
,
2966 (entry_for_jit
&& !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map
) ?
2967 VM_INHERIT_NONE
: inheritance
),
2977 translated_allow_execute
);
2979 assert((object
!= kernel_object
) || (VM_KERN_MEMORY_NONE
!= alias
));
2981 if (resilient_codesign
) {
2982 int reject_prot
= (needs_copy
? VM_PROT_EXECUTE
: (VM_PROT_WRITE
| VM_PROT_EXECUTE
));
2983 if (!((cur_protection
| max_protection
) & reject_prot
)) {
2984 new_entry
->vme_resilient_codesign
= TRUE
;
2988 if (resilient_media
&&
2989 (object
== VM_OBJECT_NULL
||
2990 object
->internal
)) {
2991 new_entry
->vme_resilient_media
= TRUE
;
2994 assert(!new_entry
->iokit_acct
);
2996 object
!= VM_OBJECT_NULL
&&
2997 (object
->purgable
!= VM_PURGABLE_DENY
||
2998 object
->vo_ledger_tag
)) {
2999 assert(new_entry
->use_pmap
);
3000 assert(!new_entry
->iokit_acct
);
3002 * Turn off pmap accounting since
3003 * purgeable (or tagged) objects have their
3006 new_entry
->use_pmap
= FALSE
;
3007 } else if (!is_submap
&&
3009 object
!= VM_OBJECT_NULL
&&
3011 /* alternate accounting */
3012 assert(!new_entry
->iokit_acct
);
3013 assert(new_entry
->use_pmap
);
3014 new_entry
->iokit_acct
= TRUE
;
3015 new_entry
->use_pmap
= FALSE
;
3017 vm_map_iokit_mapped_region
,
3019 vm_map_offset_t
, new_entry
->vme_start
,
3020 vm_map_offset_t
, new_entry
->vme_end
,
3021 int, VME_ALIAS(new_entry
));
3022 vm_map_iokit_mapped_region(
3024 (new_entry
->vme_end
-
3025 new_entry
->vme_start
));
3026 } else if (!is_submap
) {
3027 assert(!new_entry
->iokit_acct
);
3028 assert(new_entry
->use_pmap
);
3033 boolean_t submap_is_64bit
;
3036 assert(new_entry
->is_sub_map
);
3037 assert(!new_entry
->use_pmap
);
3038 assert(!new_entry
->iokit_acct
);
3039 submap
= (vm_map_t
) object
;
3040 submap_is_64bit
= vm_map_is_64bit(submap
);
3041 use_pmap
= vmk_flags
.vmkf_nested_pmap
;
3042 #ifndef NO_NESTED_PMAP
3043 if (use_pmap
&& submap
->pmap
== NULL
) {
3044 ledger_t ledger
= map
->pmap
->ledger
;
3045 /* we need a sub pmap to nest... */
3046 submap
->pmap
= pmap_create_options(ledger
, 0,
3047 submap_is_64bit
? PMAP_CREATE_64BIT
: 0);
3048 if (submap
->pmap
== NULL
) {
3049 /* let's proceed without nesting... */
3051 #if defined(__arm__) || defined(__arm64__)
3053 pmap_set_nested(submap
->pmap
);
3057 if (use_pmap
&& submap
->pmap
!= NULL
) {
3058 if (VM_MAP_PAGE_SHIFT(map
) != VM_MAP_PAGE_SHIFT(submap
)) {
3059 DEBUG4K_ERROR("map %p (%d) submap %p (%d): incompatible page sizes\n", map
, VM_MAP_PAGE_SHIFT(map
), submap
, VM_MAP_PAGE_SHIFT(submap
));
3062 kr
= pmap_nest(map
->pmap
,
3065 tmp_end
- tmp_start
);
3067 if (kr
!= KERN_SUCCESS
) {
3068 printf("vm_map_enter: "
3069 "pmap_nest(0x%llx,0x%llx) "
3071 (long long)tmp_start
,
3075 /* we're now nested ! */
3076 new_entry
->use_pmap
= TRUE
;
3080 #endif /* NO_NESTED_PMAP */
3084 if (superpage_size
) {
3086 vm_object_t sp_object
;
3087 vm_object_offset_t sp_offset
;
3089 VME_OFFSET_SET(entry
, 0);
3091 /* allocate one superpage */
3092 kr
= cpm_allocate(SUPERPAGE_SIZE
, &pages
, 0, SUPERPAGE_NBASEPAGES
- 1, TRUE
, 0);
3093 if (kr
!= KERN_SUCCESS
) {
3094 /* deallocate whole range... */
3095 new_mapping_established
= TRUE
;
3096 /* ... but only up to "tmp_end" */
3097 size
-= end
- tmp_end
;
3101 /* create one vm_object per superpage */
3102 sp_object
= vm_object_allocate((vm_map_size_t
)(entry
->vme_end
- entry
->vme_start
));
3103 sp_object
->phys_contiguous
= TRUE
;
3104 sp_object
->vo_shadow_offset
= (vm_object_offset_t
)VM_PAGE_GET_PHYS_PAGE(pages
) * PAGE_SIZE
;
3105 VME_OBJECT_SET(entry
, sp_object
);
3106 assert(entry
->use_pmap
);
3108 /* enter the base pages into the object */
3109 vm_object_lock(sp_object
);
3111 sp_offset
< SUPERPAGE_SIZE
;
3112 sp_offset
+= PAGE_SIZE
) {
3114 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m
));
3115 pages
= NEXT_PAGE(m
);
3116 *(NEXT_PAGE_PTR(m
)) = VM_PAGE_NULL
;
3117 vm_page_insert_wired(m
, sp_object
, sp_offset
, VM_KERN_MEMORY_OSFMK
);
3119 vm_object_unlock(sp_object
);
3121 } while (tmp_end
!= tmp2_end
&&
3122 (tmp_start
= tmp_end
) &&
3123 (tmp_end
= (tmp2_end
- tmp_end
> chunk_size
) ?
3124 tmp_end
+ chunk_size
: tmp2_end
));
3127 new_mapping_established
= TRUE
;
3130 assert(map_locked
== TRUE
);
3132 if (result
== KERN_SUCCESS
) {
3133 vm_prot_t pager_prot
;
3134 memory_object_t pager
;
3138 !(vmk_flags
.vmkf_no_pmap_check
)) {
3139 assert(vm_map_pmap_is_empty(map
,
3146 * For "named" VM objects, let the pager know that the
3147 * memory object is being mapped. Some pagers need to keep
3148 * track of this, to know when they can reclaim the memory
3149 * object, for example.
3150 * VM calls memory_object_map() for each mapping (specifying
3151 * the protection of each mapping) and calls
3152 * memory_object_last_unmap() when all the mappings are gone.
3154 pager_prot
= max_protection
;
3157 * Copy-On-Write mapping: won't modify
3158 * the memory object.
3160 pager_prot
&= ~VM_PROT_WRITE
;
3163 object
!= VM_OBJECT_NULL
&&
3165 object
->pager
!= MEMORY_OBJECT_NULL
) {
3166 vm_object_lock(object
);
3167 pager
= object
->pager
;
3168 if (object
->named
&&
3169 pager
!= MEMORY_OBJECT_NULL
) {
3170 assert(object
->pager_ready
);
3171 vm_object_mapping_wait(object
, THREAD_UNINT
);
3172 vm_object_mapping_begin(object
);
3173 vm_object_unlock(object
);
3175 kr
= memory_object_map(pager
, pager_prot
);
3176 assert(kr
== KERN_SUCCESS
);
3178 vm_object_lock(object
);
3179 vm_object_mapping_end(object
);
3181 vm_object_unlock(object
);
3185 assert(map_locked
== TRUE
);
3187 if (!keep_map_locked
) {
3193 * We can't hold the map lock if we enter this block.
3196 if (result
== KERN_SUCCESS
) {
3197 /* Wire down the new entry if the user
3198 * requested all new map entries be wired.
3200 if ((map
->wiring_required
) || (superpage_size
)) {
3201 assert(!keep_map_locked
);
3202 pmap_empty
= FALSE
; /* pmap won't be empty */
3203 kr
= vm_map_wire_kernel(map
, start
, end
,
3204 new_entry
->protection
, VM_KERN_MEMORY_MLOCK
,
3211 if (result
!= KERN_SUCCESS
) {
3212 if (new_mapping_established
) {
3214 * We have to get rid of the new mappings since we
3215 * won't make them available to the user.
3216 * Try and do that atomically, to minimize the risk
3217 * that someone else create new mappings that range.
3219 zap_new_map
= vm_map_create(PMAP_NULL
,
3222 map
->hdr
.entries_pageable
);
3223 vm_map_set_page_shift(zap_new_map
,
3224 VM_MAP_PAGE_SHIFT(map
));
3225 vm_map_disable_hole_optimization(zap_new_map
);
3231 (void) vm_map_delete(map
, *address
, *address
+ size
,
3232 (VM_MAP_REMOVE_SAVE_ENTRIES
|
3233 VM_MAP_REMOVE_NO_MAP_ALIGN
),
3236 if (zap_old_map
!= VM_MAP_NULL
&&
3237 zap_old_map
->hdr
.nentries
!= 0) {
3238 vm_map_entry_t entry1
, entry2
;
3241 * The new mapping failed. Attempt to restore
3242 * the old mappings, saved in the "zap_old_map".
3249 /* first check if the coast is still clear */
3250 start
= vm_map_first_entry(zap_old_map
)->vme_start
;
3251 end
= vm_map_last_entry(zap_old_map
)->vme_end
;
3252 if (vm_map_lookup_entry(map
, start
, &entry1
) ||
3253 vm_map_lookup_entry(map
, end
, &entry2
) ||
3256 * Part of that range has already been
3257 * re-mapped: we can't restore the old
3260 vm_map_enter_restore_failures
++;
3263 * Transfer the saved map entries from
3264 * "zap_old_map" to the original "map",
3265 * inserting them all after "entry1".
3267 for (entry2
= vm_map_first_entry(zap_old_map
);
3268 entry2
!= vm_map_to_entry(zap_old_map
);
3269 entry2
= vm_map_first_entry(zap_old_map
)) {
3270 vm_map_size_t entry_size
;
3272 entry_size
= (entry2
->vme_end
-
3274 vm_map_store_entry_unlink(zap_old_map
,
3276 zap_old_map
->size
-= entry_size
;
3277 vm_map_store_entry_link(map
, entry1
, entry2
,
3278 VM_MAP_KERNEL_FLAGS_NONE
);
3279 map
->size
+= entry_size
;
3282 if (map
->wiring_required
) {
3284 * XXX TODO: we should rewire the
3288 vm_map_enter_restore_successes
++;
3294 * The caller is responsible for releasing the lock if it requested to
3295 * keep the map locked.
3297 if (map_locked
&& !keep_map_locked
) {
3302 * Get rid of the "zap_maps" and all the map entries that
3303 * they may still contain.
3305 if (zap_old_map
!= VM_MAP_NULL
) {
3306 vm_map_destroy(zap_old_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3307 zap_old_map
= VM_MAP_NULL
;
3309 if (zap_new_map
!= VM_MAP_NULL
) {
3310 vm_map_destroy(zap_new_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3311 zap_new_map
= VM_MAP_NULL
;
3320 extern const struct memory_object_pager_ops fourk_pager_ops
;
3324 vm_map_offset_t
*address
, /* IN/OUT */
3326 vm_map_offset_t mask
,
3328 vm_map_kernel_flags_t vmk_flags
,
3331 vm_object_offset_t offset
,
3332 boolean_t needs_copy
,
3333 vm_prot_t cur_protection
,
3334 vm_prot_t max_protection
,
3335 vm_inherit_t inheritance
)
3337 vm_map_entry_t entry
, new_entry
;
3338 vm_map_offset_t start
, fourk_start
;
3339 vm_map_offset_t end
, fourk_end
;
3340 vm_map_size_t fourk_size
;
3341 kern_return_t result
= KERN_SUCCESS
;
3342 vm_map_t zap_old_map
= VM_MAP_NULL
;
3343 vm_map_t zap_new_map
= VM_MAP_NULL
;
3344 boolean_t map_locked
= FALSE
;
3345 boolean_t pmap_empty
= TRUE
;
3346 boolean_t new_mapping_established
= FALSE
;
3347 boolean_t keep_map_locked
= vmk_flags
.vmkf_keep_map_locked
;
3348 boolean_t anywhere
= ((flags
& VM_FLAGS_ANYWHERE
) != 0);
3349 boolean_t purgable
= ((flags
& VM_FLAGS_PURGABLE
) != 0);
3350 boolean_t overwrite
= ((flags
& VM_FLAGS_OVERWRITE
) != 0);
3351 boolean_t no_cache
= ((flags
& VM_FLAGS_NO_CACHE
) != 0);
3352 boolean_t is_submap
= vmk_flags
.vmkf_submap
;
3353 boolean_t permanent
= vmk_flags
.vmkf_permanent
;
3354 boolean_t no_copy_on_read
= vmk_flags
.vmkf_permanent
;
3355 boolean_t entry_for_jit
= vmk_flags
.vmkf_map_jit
;
3356 // boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3357 boolean_t translated_allow_execute
= vmk_flags
.vmkf_translated_allow_execute
;
3358 unsigned int superpage_size
= ((flags
& VM_FLAGS_SUPERPAGE_MASK
) >> VM_FLAGS_SUPERPAGE_SHIFT
);
3359 vm_map_offset_t effective_min_offset
, effective_max_offset
;
3361 boolean_t clear_map_aligned
= FALSE
;
3362 memory_object_t fourk_mem_obj
;
3363 vm_object_t fourk_object
;
3364 vm_map_offset_t fourk_pager_offset
;
3365 int fourk_pager_index_start
, fourk_pager_index_num
;
3367 boolean_t fourk_copy
;
3368 vm_object_t copy_object
;
3369 vm_object_offset_t copy_offset
;
3371 if (VM_MAP_PAGE_MASK(map
) < PAGE_MASK
) {
3372 panic("%s:%d\n", __FUNCTION__
, __LINE__
);
3374 fourk_mem_obj
= MEMORY_OBJECT_NULL
;
3375 fourk_object
= VM_OBJECT_NULL
;
3377 if (superpage_size
) {
3378 return KERN_NOT_SUPPORTED
;
3381 if ((cur_protection
& VM_PROT_WRITE
) &&
3382 (cur_protection
& VM_PROT_EXECUTE
) &&
3383 #if XNU_TARGET_OS_OSX
3384 map
->pmap
!= kernel_pmap
&&
3385 (vm_map_cs_enforcement(map
)
3387 || !VM_MAP_IS_EXOTIC(map
)
3388 #endif /* __arm64__ */
3390 #endif /* XNU_TARGET_OS_OSX */
3395 vm_prot_t
, cur_protection
);
3396 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3397 "turning off execute\n",
3399 (current_task()->bsd_info
3400 ? proc_name_address(current_task()->bsd_info
)
3403 cur_protection
&= ~VM_PROT_EXECUTE
;
3407 * If the task has requested executable lockdown,
3408 * deny any new executable mapping.
3410 if (map
->map_disallow_new_exec
== TRUE
) {
3411 if (cur_protection
& VM_PROT_EXECUTE
) {
3412 return KERN_PROTECTION_FAILURE
;
3417 return KERN_NOT_SUPPORTED
;
3419 if (vmk_flags
.vmkf_already
) {
3420 return KERN_NOT_SUPPORTED
;
3422 if (purgable
|| entry_for_jit
) {
3423 return KERN_NOT_SUPPORTED
;
3426 effective_min_offset
= map
->min_offset
;
3428 if (vmk_flags
.vmkf_beyond_max
) {
3429 return KERN_NOT_SUPPORTED
;
3431 effective_max_offset
= map
->max_offset
;
3435 (offset
& FOURK_PAGE_MASK
) != 0) {
3437 return KERN_INVALID_ARGUMENT
;
3440 #define RETURN(value) { result = value; goto BailOut; }
3442 assert(VM_MAP_PAGE_ALIGNED(*address
, FOURK_PAGE_MASK
));
3443 assert(VM_MAP_PAGE_ALIGNED(size
, FOURK_PAGE_MASK
));
3445 if (!anywhere
&& overwrite
) {
3446 return KERN_NOT_SUPPORTED
;
3448 if (!anywhere
&& overwrite
) {
3450 * Create a temporary VM map to hold the old mappings in the
3451 * affected area while we create the new one.
3452 * This avoids releasing the VM map lock in
3453 * vm_map_entry_delete() and allows atomicity
3454 * when we want to replace some mappings with a new one.
3455 * It also allows us to restore the old VM mappings if the
3456 * new mapping fails.
3458 zap_old_map
= vm_map_create(PMAP_NULL
,
3461 map
->hdr
.entries_pageable
);
3462 vm_map_set_page_shift(zap_old_map
, VM_MAP_PAGE_SHIFT(map
));
3463 vm_map_disable_hole_optimization(zap_old_map
);
3466 fourk_start
= *address
;
3468 fourk_end
= fourk_start
+ fourk_size
;
3470 start
= vm_map_trunc_page(*address
, VM_MAP_PAGE_MASK(map
));
3471 end
= vm_map_round_page(fourk_end
, VM_MAP_PAGE_MASK(map
));
3475 return KERN_NOT_SUPPORTED
;
3479 * the address doesn't itself violate
3480 * the mask requirement.
3485 if ((start
& mask
) != 0) {
3486 RETURN(KERN_NO_SPACE
);
3490 * ... the address is within bounds
3495 if ((start
< effective_min_offset
) ||
3496 (end
> effective_max_offset
) ||
3498 RETURN(KERN_INVALID_ADDRESS
);
3501 if (overwrite
&& zap_old_map
!= VM_MAP_NULL
) {
3503 * Fixed mapping and "overwrite" flag: attempt to
3504 * remove all existing mappings in the specified
3505 * address range, saving them in our "zap_old_map".
3507 (void) vm_map_delete(map
, start
, end
,
3508 (VM_MAP_REMOVE_SAVE_ENTRIES
|
3509 VM_MAP_REMOVE_NO_MAP_ALIGN
),
3514 * ... the starting address isn't allocated
3516 if (vm_map_lookup_entry(map
, start
, &entry
)) {
3517 vm_object_t cur_object
, shadow_object
;
3520 * We might already some 4K mappings
3521 * in a 16K page here.
3524 if (entry
->vme_end
- entry
->vme_start
3525 != SIXTEENK_PAGE_SIZE
) {
3526 RETURN(KERN_NO_SPACE
);
3528 if (entry
->is_sub_map
) {
3529 RETURN(KERN_NO_SPACE
);
3531 if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
3532 RETURN(KERN_NO_SPACE
);
3535 /* go all the way down the shadow chain */
3536 cur_object
= VME_OBJECT(entry
);
3537 vm_object_lock(cur_object
);
3538 while (cur_object
->shadow
!= VM_OBJECT_NULL
) {
3539 shadow_object
= cur_object
->shadow
;
3540 vm_object_lock(shadow_object
);
3541 vm_object_unlock(cur_object
);
3542 cur_object
= shadow_object
;
3543 shadow_object
= VM_OBJECT_NULL
;
3545 if (cur_object
->internal
||
3546 cur_object
->pager
== NULL
) {
3547 vm_object_unlock(cur_object
);
3548 RETURN(KERN_NO_SPACE
);
3550 if (cur_object
->pager
->mo_pager_ops
3551 != &fourk_pager_ops
) {
3552 vm_object_unlock(cur_object
);
3553 RETURN(KERN_NO_SPACE
);
3555 fourk_object
= cur_object
;
3556 fourk_mem_obj
= fourk_object
->pager
;
3558 /* keep the "4K" object alive */
3559 vm_object_reference_locked(fourk_object
);
3560 memory_object_reference(fourk_mem_obj
);
3561 vm_object_unlock(fourk_object
);
3563 /* merge permissions */
3564 entry
->protection
|= cur_protection
;
3565 entry
->max_protection
|= max_protection
;
3566 if ((entry
->protection
& (VM_PROT_WRITE
|
3567 VM_PROT_EXECUTE
)) ==
3568 (VM_PROT_WRITE
| VM_PROT_EXECUTE
) &&
3569 fourk_binary_compatibility_unsafe
&&
3570 fourk_binary_compatibility_allow_wx
) {
3571 /* write+execute: need to be "jit" */
3572 entry
->used_for_jit
= TRUE
;
3574 goto map_in_fourk_pager
;
3578 * ... the next region doesn't overlap the
3582 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
3583 (entry
->vme_next
->vme_start
< end
)) {
3584 RETURN(KERN_NO_SPACE
);
3590 * "start" and "end" should define the endpoints of the
3591 * available new range, and
3592 * "entry" should refer to the region before the new
3595 * the map should be locked.
3598 /* create a new "4K" pager */
3599 fourk_mem_obj
= fourk_pager_create();
3600 fourk_object
= fourk_pager_to_vm_object(fourk_mem_obj
);
3601 assert(fourk_object
);
3603 /* keep the "4" object alive */
3604 vm_object_reference(fourk_object
);
3606 /* create a "copy" object, to map the "4K" object copy-on-write */
3608 result
= vm_object_copy_strategically(fourk_object
,
3614 assert(result
== KERN_SUCCESS
);
3615 assert(copy_object
!= VM_OBJECT_NULL
);
3616 assert(copy_offset
== 0);
3618 /* map the "4K" pager's copy object */
3620 vm_map_entry_insert(map
, entry
,
3621 vm_map_trunc_page(start
,
3622 VM_MAP_PAGE_MASK(map
)),
3623 vm_map_round_page(end
,
3624 VM_MAP_PAGE_MASK(map
)),
3628 FALSE
, /* needs_copy */
3631 cur_protection
, max_protection
,
3632 VM_BEHAVIOR_DEFAULT
,
3633 (entry_for_jit
&& !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map
) ?
3634 VM_INHERIT_NONE
: inheritance
),
3644 translated_allow_execute
);
3647 #if VM_MAP_DEBUG_FOURK
3648 if (vm_map_debug_fourk
) {
3649 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3651 (uint64_t) entry
->vme_start
,
3652 (uint64_t) entry
->vme_end
,
3655 #endif /* VM_MAP_DEBUG_FOURK */
3657 new_mapping_established
= TRUE
;
3660 /* "map" the original "object" where it belongs in the "4K" pager */
3661 fourk_pager_offset
= (fourk_start
& SIXTEENK_PAGE_MASK
);
3662 fourk_pager_index_start
= (int) (fourk_pager_offset
/ FOURK_PAGE_SIZE
);
3663 if (fourk_size
> SIXTEENK_PAGE_SIZE
) {
3664 fourk_pager_index_num
= 4;
3666 fourk_pager_index_num
= (int) (fourk_size
/ FOURK_PAGE_SIZE
);
3668 if (fourk_pager_index_start
+ fourk_pager_index_num
> 4) {
3669 fourk_pager_index_num
= 4 - fourk_pager_index_start
;
3672 cur_idx
< fourk_pager_index_num
;
3674 vm_object_t old_object
;
3675 vm_object_offset_t old_offset
;
3677 kr
= fourk_pager_populate(fourk_mem_obj
,
3678 TRUE
, /* overwrite */
3679 fourk_pager_index_start
+ cur_idx
,
3683 (cur_idx
* FOURK_PAGE_SIZE
))
3687 #if VM_MAP_DEBUG_FOURK
3688 if (vm_map_debug_fourk
) {
3689 if (old_object
== (vm_object_t
) -1 &&
3690 old_offset
== (vm_object_offset_t
) -1) {
3691 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3692 "pager [%p:0x%llx] "
3694 "[object:%p,offset:0x%llx]\n",
3696 (uint64_t) entry
->vme_start
,
3697 (uint64_t) entry
->vme_end
,
3700 fourk_pager_index_start
+ cur_idx
,
3703 ? (offset
+ (cur_idx
* FOURK_PAGE_SIZE
))
3706 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3707 "pager [%p:0x%llx] "
3708 "populate[%d] [object:%p,offset:0x%llx] "
3709 "old [%p:0x%llx]\n",
3711 (uint64_t) entry
->vme_start
,
3712 (uint64_t) entry
->vme_end
,
3715 fourk_pager_index_start
+ cur_idx
,
3718 ? (offset
+ (cur_idx
* FOURK_PAGE_SIZE
))
3724 #endif /* VM_MAP_DEBUG_FOURK */
3726 assert(kr
== KERN_SUCCESS
);
3727 if (object
!= old_object
&&
3728 object
!= VM_OBJECT_NULL
&&
3729 object
!= (vm_object_t
) -1) {
3730 vm_object_reference(object
);
3732 if (object
!= old_object
&&
3733 old_object
!= VM_OBJECT_NULL
&&
3734 old_object
!= (vm_object_t
) -1) {
3735 vm_object_deallocate(old_object
);
3740 assert(map_locked
== TRUE
);
3742 if (result
== KERN_SUCCESS
) {
3743 vm_prot_t pager_prot
;
3744 memory_object_t pager
;
3748 !(vmk_flags
.vmkf_no_pmap_check
)) {
3749 assert(vm_map_pmap_is_empty(map
,
3756 * For "named" VM objects, let the pager know that the
3757 * memory object is being mapped. Some pagers need to keep
3758 * track of this, to know when they can reclaim the memory
3759 * object, for example.
3760 * VM calls memory_object_map() for each mapping (specifying
3761 * the protection of each mapping) and calls
3762 * memory_object_last_unmap() when all the mappings are gone.
3764 pager_prot
= max_protection
;
3767 * Copy-On-Write mapping: won't modify
3768 * the memory object.
3770 pager_prot
&= ~VM_PROT_WRITE
;
3773 object
!= VM_OBJECT_NULL
&&
3775 object
->pager
!= MEMORY_OBJECT_NULL
) {
3776 vm_object_lock(object
);
3777 pager
= object
->pager
;
3778 if (object
->named
&&
3779 pager
!= MEMORY_OBJECT_NULL
) {
3780 assert(object
->pager_ready
);
3781 vm_object_mapping_wait(object
, THREAD_UNINT
);
3782 vm_object_mapping_begin(object
);
3783 vm_object_unlock(object
);
3785 kr
= memory_object_map(pager
, pager_prot
);
3786 assert(kr
== KERN_SUCCESS
);
3788 vm_object_lock(object
);
3789 vm_object_mapping_end(object
);
3791 vm_object_unlock(object
);
3794 fourk_object
!= VM_OBJECT_NULL
&&
3795 fourk_object
->named
&&
3796 fourk_object
->pager
!= MEMORY_OBJECT_NULL
) {
3797 vm_object_lock(fourk_object
);
3798 pager
= fourk_object
->pager
;
3799 if (fourk_object
->named
&&
3800 pager
!= MEMORY_OBJECT_NULL
) {
3801 assert(fourk_object
->pager_ready
);
3802 vm_object_mapping_wait(fourk_object
,
3804 vm_object_mapping_begin(fourk_object
);
3805 vm_object_unlock(fourk_object
);
3807 kr
= memory_object_map(pager
, VM_PROT_READ
);
3808 assert(kr
== KERN_SUCCESS
);
3810 vm_object_lock(fourk_object
);
3811 vm_object_mapping_end(fourk_object
);
3813 vm_object_unlock(fourk_object
);
3817 if (fourk_object
!= VM_OBJECT_NULL
) {
3818 vm_object_deallocate(fourk_object
);
3819 fourk_object
= VM_OBJECT_NULL
;
3820 memory_object_deallocate(fourk_mem_obj
);
3821 fourk_mem_obj
= MEMORY_OBJECT_NULL
;
3824 assert(map_locked
== TRUE
);
3826 if (!keep_map_locked
) {
3832 * We can't hold the map lock if we enter this block.
3835 if (result
== KERN_SUCCESS
) {
3836 /* Wire down the new entry if the user
3837 * requested all new map entries be wired.
3839 if ((map
->wiring_required
) || (superpage_size
)) {
3840 assert(!keep_map_locked
);
3841 pmap_empty
= FALSE
; /* pmap won't be empty */
3842 kr
= vm_map_wire_kernel(map
, start
, end
,
3843 new_entry
->protection
, VM_KERN_MEMORY_MLOCK
,
3850 if (result
!= KERN_SUCCESS
) {
3851 if (new_mapping_established
) {
3853 * We have to get rid of the new mappings since we
3854 * won't make them available to the user.
3855 * Try and do that atomically, to minimize the risk
3856 * that someone else create new mappings that range.
3858 zap_new_map
= vm_map_create(PMAP_NULL
,
3861 map
->hdr
.entries_pageable
);
3862 vm_map_set_page_shift(zap_new_map
,
3863 VM_MAP_PAGE_SHIFT(map
));
3864 vm_map_disable_hole_optimization(zap_new_map
);
3870 (void) vm_map_delete(map
, *address
, *address
+ size
,
3871 (VM_MAP_REMOVE_SAVE_ENTRIES
|
3872 VM_MAP_REMOVE_NO_MAP_ALIGN
),
3875 if (zap_old_map
!= VM_MAP_NULL
&&
3876 zap_old_map
->hdr
.nentries
!= 0) {
3877 vm_map_entry_t entry1
, entry2
;
3880 * The new mapping failed. Attempt to restore
3881 * the old mappings, saved in the "zap_old_map".
3888 /* first check if the coast is still clear */
3889 start
= vm_map_first_entry(zap_old_map
)->vme_start
;
3890 end
= vm_map_last_entry(zap_old_map
)->vme_end
;
3891 if (vm_map_lookup_entry(map
, start
, &entry1
) ||
3892 vm_map_lookup_entry(map
, end
, &entry2
) ||
3895 * Part of that range has already been
3896 * re-mapped: we can't restore the old
3899 vm_map_enter_restore_failures
++;
3902 * Transfer the saved map entries from
3903 * "zap_old_map" to the original "map",
3904 * inserting them all after "entry1".
3906 for (entry2
= vm_map_first_entry(zap_old_map
);
3907 entry2
!= vm_map_to_entry(zap_old_map
);
3908 entry2
= vm_map_first_entry(zap_old_map
)) {
3909 vm_map_size_t entry_size
;
3911 entry_size
= (entry2
->vme_end
-
3913 vm_map_store_entry_unlink(zap_old_map
,
3915 zap_old_map
->size
-= entry_size
;
3916 vm_map_store_entry_link(map
, entry1
, entry2
,
3917 VM_MAP_KERNEL_FLAGS_NONE
);
3918 map
->size
+= entry_size
;
3921 if (map
->wiring_required
) {
3923 * XXX TODO: we should rewire the
3927 vm_map_enter_restore_successes
++;
3933 * The caller is responsible for releasing the lock if it requested to
3934 * keep the map locked.
3936 if (map_locked
&& !keep_map_locked
) {
3941 * Get rid of the "zap_maps" and all the map entries that
3942 * they may still contain.
3944 if (zap_old_map
!= VM_MAP_NULL
) {
3945 vm_map_destroy(zap_old_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3946 zap_old_map
= VM_MAP_NULL
;
3948 if (zap_new_map
!= VM_MAP_NULL
) {
3949 vm_map_destroy(zap_new_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3950 zap_new_map
= VM_MAP_NULL
;
3957 #endif /* __arm64__ */
3960 * Counters for the prefault optimization.
3962 int64_t vm_prefault_nb_pages
= 0;
3963 int64_t vm_prefault_nb_bailout
= 0;
3965 static kern_return_t
3966 vm_map_enter_mem_object_helper(
3967 vm_map_t target_map
,
3968 vm_map_offset_t
*address
,
3969 vm_map_size_t initial_size
,
3970 vm_map_offset_t mask
,
3972 vm_map_kernel_flags_t vmk_flags
,
3975 vm_object_offset_t offset
,
3977 vm_prot_t cur_protection
,
3978 vm_prot_t max_protection
,
3979 vm_inherit_t inheritance
,
3980 upl_page_list_ptr_t page_list
,
3981 unsigned int page_list_count
)
3983 vm_map_address_t map_addr
;
3984 vm_map_size_t map_size
;
3986 vm_object_size_t size
;
3987 kern_return_t result
;
3988 boolean_t mask_cur_protection
, mask_max_protection
;
3989 boolean_t kernel_prefault
, try_prefault
= (page_list_count
!= 0);
3990 vm_map_offset_t offset_in_mapping
= 0;
3992 boolean_t fourk
= vmk_flags
.vmkf_fourk
;
3993 #endif /* __arm64__ */
3995 if (VM_MAP_PAGE_SHIFT(target_map
) < PAGE_SHIFT
) {
3996 /* XXX TODO4K prefaulting depends on page size... */
3997 try_prefault
= FALSE
;
4000 assertf(vmk_flags
.__vmkf_unused
== 0, "vmk_flags unused=0x%x\n", vmk_flags
.__vmkf_unused
);
4002 mask_cur_protection
= cur_protection
& VM_PROT_IS_MASK
;
4003 mask_max_protection
= max_protection
& VM_PROT_IS_MASK
;
4004 cur_protection
&= ~VM_PROT_IS_MASK
;
4005 max_protection
&= ~VM_PROT_IS_MASK
;
4008 * Check arguments for validity
4010 if ((target_map
== VM_MAP_NULL
) ||
4011 (cur_protection
& ~VM_PROT_ALL
) ||
4012 (max_protection
& ~VM_PROT_ALL
) ||
4013 (inheritance
> VM_INHERIT_LAST_VALID
) ||
4014 (try_prefault
&& (copy
|| !page_list
)) ||
4015 initial_size
== 0) {
4016 return KERN_INVALID_ARGUMENT
;
4020 if (fourk
&& VM_MAP_PAGE_SHIFT(target_map
) < PAGE_SHIFT
) {
4021 /* no "fourk" if map is using a sub-page page size */
4025 map_addr
= vm_map_trunc_page(*address
, FOURK_PAGE_MASK
);
4026 map_size
= vm_map_round_page(initial_size
, FOURK_PAGE_MASK
);
4028 #endif /* __arm64__ */
4030 map_addr
= vm_map_trunc_page(*address
,
4031 VM_MAP_PAGE_MASK(target_map
));
4032 map_size
= vm_map_round_page(initial_size
,
4033 VM_MAP_PAGE_MASK(target_map
));
4035 size
= vm_object_round_page(initial_size
);
4038 * Find the vm object (if any) corresponding to this port.
4040 if (!IP_VALID(port
)) {
4041 object
= VM_OBJECT_NULL
;
4044 } else if (ip_kotype(port
) == IKOT_NAMED_ENTRY
) {
4045 vm_named_entry_t named_entry
;
4046 vm_object_offset_t data_offset
;
4048 named_entry
= (vm_named_entry_t
) ip_get_kobject(port
);
4050 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4051 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4052 data_offset
= named_entry
->data_offset
;
4053 offset
+= named_entry
->data_offset
;
4058 /* a few checks to make sure user is obeying rules */
4060 if (offset
>= named_entry
->size
) {
4061 return KERN_INVALID_RIGHT
;
4063 size
= named_entry
->size
- offset
;
4065 if (mask_max_protection
) {
4066 max_protection
&= named_entry
->protection
;
4068 if (mask_cur_protection
) {
4069 cur_protection
&= named_entry
->protection
;
4071 if ((named_entry
->protection
& max_protection
) !=
4073 return KERN_INVALID_RIGHT
;
4075 if ((named_entry
->protection
& cur_protection
) !=
4077 return KERN_INVALID_RIGHT
;
4079 if (offset
+ size
< offset
) {
4081 return KERN_INVALID_ARGUMENT
;
4083 if (named_entry
->size
< (offset
+ initial_size
)) {
4084 return KERN_INVALID_ARGUMENT
;
4087 if (named_entry
->is_copy
) {
4088 /* for a vm_map_copy, we can only map it whole */
4089 if ((size
!= named_entry
->size
) &&
4090 (vm_map_round_page(size
,
4091 VM_MAP_PAGE_MASK(target_map
)) ==
4092 named_entry
->size
)) {
4093 /* XXX FBDP use the rounded size... */
4094 size
= vm_map_round_page(
4096 VM_MAP_PAGE_MASK(target_map
));
4100 /* the callers parameter offset is defined to be the */
4101 /* offset from beginning of named entry offset in object */
4102 offset
= offset
+ named_entry
->offset
;
4104 if (!VM_MAP_PAGE_ALIGNED(size
,
4105 VM_MAP_PAGE_MASK(target_map
))) {
4107 * Let's not map more than requested;
4108 * vm_map_enter() will handle this "not map-aligned"
4114 named_entry_lock(named_entry
);
4115 if (named_entry
->is_sub_map
) {
4118 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4119 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4120 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4123 submap
= named_entry
->backing
.map
;
4124 vm_map_reference(submap
);
4125 named_entry_unlock(named_entry
);
4127 vmk_flags
.vmkf_submap
= TRUE
;
4129 result
= vm_map_enter(target_map
,
4136 (vm_object_t
)(uintptr_t) submap
,
4142 if (result
!= KERN_SUCCESS
) {
4143 vm_map_deallocate(submap
);
4146 * No need to lock "submap" just to check its
4147 * "mapped" flag: that flag is never reset
4148 * once it's been set and if we race, we'll
4149 * just end up setting it twice, which is OK.
4151 if (submap
->mapped_in_other_pmaps
== FALSE
&&
4152 vm_map_pmap(submap
) != PMAP_NULL
&&
4153 vm_map_pmap(submap
) !=
4154 vm_map_pmap(target_map
)) {
4156 * This submap is being mapped in a map
4157 * that uses a different pmap.
4158 * Set its "mapped_in_other_pmaps" flag
4159 * to indicate that we now need to
4160 * remove mappings from all pmaps rather
4161 * than just the submap's pmap.
4163 vm_map_lock(submap
);
4164 submap
->mapped_in_other_pmaps
= TRUE
;
4165 vm_map_unlock(submap
);
4167 *address
= map_addr
;
4170 } else if (named_entry
->is_copy
) {
4172 vm_map_copy_t copy_map
;
4173 vm_map_entry_t copy_entry
;
4174 vm_map_offset_t copy_addr
;
4175 vm_map_copy_t target_copy_map
;
4176 vm_map_offset_t overmap_start
, overmap_end
;
4177 vm_map_offset_t trimmed_start
;
4178 vm_map_size_t target_size
;
4180 if (flags
& ~(VM_FLAGS_FIXED
|
4182 VM_FLAGS_OVERWRITE
|
4183 VM_FLAGS_RETURN_4K_DATA_ADDR
|
4184 VM_FLAGS_RETURN_DATA_ADDR
|
4185 VM_FLAGS_ALIAS_MASK
)) {
4186 named_entry_unlock(named_entry
);
4187 return KERN_INVALID_ARGUMENT
;
4190 copy_map
= named_entry
->backing
.copy
;
4191 assert(copy_map
->type
== VM_MAP_COPY_ENTRY_LIST
);
4192 if (copy_map
->type
!= VM_MAP_COPY_ENTRY_LIST
) {
4193 /* unsupported type; should not happen */
4194 printf("vm_map_enter_mem_object: "
4195 "memory_entry->backing.copy "
4196 "unsupported type 0x%x\n",
4198 named_entry_unlock(named_entry
);
4199 return KERN_INVALID_ARGUMENT
;
4202 if (VM_MAP_PAGE_SHIFT(target_map
) != copy_map
->cpy_hdr
.page_shift
) {
4203 DEBUG4K_SHARE("copy_map %p offset %llx size 0x%llx pgshift %d -> target_map %p pgshift %d\n", copy_map
, offset
, (uint64_t)map_size
, copy_map
->cpy_hdr
.page_shift
, target_map
, VM_MAP_PAGE_SHIFT(target_map
));
4206 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4207 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4208 offset_in_mapping
= offset
& VM_MAP_PAGE_MASK(target_map
);
4209 if (flags
& VM_FLAGS_RETURN_4K_DATA_ADDR
) {
4210 offset_in_mapping
&= ~((signed)(0xFFF));
4214 target_copy_map
= VM_MAP_COPY_NULL
;
4215 target_size
= copy_map
->size
;
4219 if (copy_map
->cpy_hdr
.page_shift
!= VM_MAP_PAGE_SHIFT(target_map
)) {
4220 DEBUG4K_ADJUST("adjusting...\n");
4221 kr
= vm_map_copy_adjust_to_target(
4223 offset
/* includes data_offset */,
4231 if (kr
!= KERN_SUCCESS
) {
4232 named_entry_unlock(named_entry
);
4235 target_size
= target_copy_map
->size
;
4236 if (trimmed_start
>= data_offset
) {
4237 data_offset
= offset
& VM_MAP_PAGE_MASK(target_map
);
4239 data_offset
-= trimmed_start
;
4242 target_copy_map
= copy_map
;
4245 /* reserve a contiguous range */
4246 kr
= vm_map_enter(target_map
,
4248 vm_map_round_page(target_size
, VM_MAP_PAGE_MASK(target_map
)),
4250 flags
& (VM_FLAGS_ANYWHERE
|
4251 VM_FLAGS_OVERWRITE
|
4252 VM_FLAGS_RETURN_4K_DATA_ADDR
|
4253 VM_FLAGS_RETURN_DATA_ADDR
),
4262 if (kr
!= KERN_SUCCESS
) {
4263 DEBUG4K_ERROR("kr 0x%x\n", kr
);
4264 if (target_copy_map
!= copy_map
) {
4265 vm_map_copy_discard(target_copy_map
);
4266 target_copy_map
= VM_MAP_COPY_NULL
;
4268 named_entry_unlock(named_entry
);
4272 copy_addr
= map_addr
;
4274 for (copy_entry
= vm_map_copy_first_entry(target_copy_map
);
4275 copy_entry
!= vm_map_copy_to_entry(target_copy_map
);
4276 copy_entry
= copy_entry
->vme_next
) {
4278 vm_map_kernel_flags_t vmk_remap_flags
;
4279 vm_map_t copy_submap
;
4280 vm_object_t copy_object
;
4281 vm_map_size_t copy_size
;
4282 vm_object_offset_t copy_offset
;
4286 vmk_remap_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
4288 copy_object
= VME_OBJECT(copy_entry
);
4289 copy_offset
= VME_OFFSET(copy_entry
);
4290 copy_size
= (copy_entry
->vme_end
-
4291 copy_entry
->vme_start
);
4292 VM_GET_FLAGS_ALIAS(flags
, copy_vm_alias
);
4293 if (copy_vm_alias
== 0) {
4295 * Caller does not want a specific
4296 * alias for this new mapping: use
4297 * the alias of the original mapping.
4299 copy_vm_alias
= VME_ALIAS(copy_entry
);
4303 if ((copy_addr
+ copy_size
) >
4305 overmap_start
+ overmap_end
+
4306 named_entry
->size
/* XXX full size */)) {
4307 /* over-mapping too much !? */
4308 kr
= KERN_INVALID_ARGUMENT
;
4309 DEBUG4K_ERROR("kr 0x%x\n", kr
);
4314 /* take a reference on the object */
4315 if (copy_entry
->is_sub_map
) {
4316 vmk_remap_flags
.vmkf_submap
= TRUE
;
4317 copy_submap
= VME_SUBMAP(copy_entry
);
4318 vm_map_lock(copy_submap
);
4319 vm_map_reference(copy_submap
);
4320 vm_map_unlock(copy_submap
);
4321 copy_object
= (vm_object_t
)(uintptr_t) copy_submap
;
4323 copy_object
!= VM_OBJECT_NULL
&&
4324 (copy_entry
->needs_copy
||
4325 copy_object
->shadowed
||
4326 (!copy_object
->true_share
&&
4327 !copy_entry
->is_shared
&&
4328 copy_object
->vo_size
> copy_size
))) {
4330 * We need to resolve our side of this
4331 * "symmetric" copy-on-write now; we
4332 * need a new object to map and share,
4333 * instead of the current one which
4334 * might still be shared with the
4337 * Note: A "vm_map_copy_t" does not
4338 * have a lock but we're protected by
4339 * the named entry's lock here.
4341 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4342 VME_OBJECT_SHADOW(copy_entry
, copy_size
);
4343 if (!copy_entry
->needs_copy
&&
4344 copy_entry
->protection
& VM_PROT_WRITE
) {
4347 prot
= copy_entry
->protection
& ~VM_PROT_WRITE
;
4348 vm_object_pmap_protect(copy_object
,
4357 copy_entry
->needs_copy
= FALSE
;
4358 copy_entry
->is_shared
= TRUE
;
4359 copy_object
= VME_OBJECT(copy_entry
);
4360 copy_offset
= VME_OFFSET(copy_entry
);
4361 vm_object_lock(copy_object
);
4362 vm_object_reference_locked(copy_object
);
4363 if (copy_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
) {
4364 /* we're about to make a shared mapping of this object */
4365 copy_object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
4366 copy_object
->true_share
= TRUE
;
4368 vm_object_unlock(copy_object
);
4371 * We already have the right object
4374 copy_object
= VME_OBJECT(copy_entry
);
4375 vm_object_reference(copy_object
);
4378 /* over-map the object into destination */
4379 remap_flags
|= flags
;
4380 remap_flags
|= VM_FLAGS_FIXED
;
4381 remap_flags
|= VM_FLAGS_OVERWRITE
;
4382 remap_flags
&= ~VM_FLAGS_ANYWHERE
;
4383 if (!copy
&& !copy_entry
->is_sub_map
) {
4385 * copy-on-write should have been
4386 * resolved at this point, or we would
4387 * end up sharing instead of copying.
4389 assert(!copy_entry
->needs_copy
);
4391 #if XNU_TARGET_OS_OSX
4392 if (copy_entry
->used_for_jit
) {
4393 vmk_remap_flags
.vmkf_map_jit
= TRUE
;
4395 #endif /* XNU_TARGET_OS_OSX */
4397 assertf((copy_vm_alias
& VME_ALIAS_MASK
) == copy_vm_alias
,
4398 "VM Tag truncated from 0x%x to 0x%x\n", copy_vm_alias
, (copy_vm_alias
& VME_ALIAS_MASK
));
4399 kr
= vm_map_enter(target_map
,
4402 (vm_map_offset_t
) 0,
4405 (vm_tag_t
) copy_vm_alias
, /* see comment at end of vm_fault_unwire re. cast*/
4408 ((copy_object
== NULL
)
4410 : (copy
|| copy_entry
->needs_copy
)),
4414 if (kr
!= KERN_SUCCESS
) {
4415 DEBUG4K_SHARE("failed kr 0x%x\n", kr
);
4416 if (copy_entry
->is_sub_map
) {
4417 vm_map_deallocate(copy_submap
);
4419 vm_object_deallocate(copy_object
);
4426 copy_addr
+= copy_size
;
4429 if (kr
== KERN_SUCCESS
) {
4430 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4431 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4432 *address
= map_addr
+ offset_in_mapping
;
4434 *address
= map_addr
;
4436 if (overmap_start
) {
4437 *address
+= overmap_start
;
4438 DEBUG4K_SHARE("map %p map_addr 0x%llx offset_in_mapping 0x%llx overmap_start 0x%llx -> *address 0x%llx\n", target_map
, (uint64_t)map_addr
, (uint64_t) offset_in_mapping
, (uint64_t)overmap_start
, (uint64_t)*address
);
4441 named_entry_unlock(named_entry
);
4442 if (target_copy_map
!= copy_map
) {
4443 vm_map_copy_discard(target_copy_map
);
4444 target_copy_map
= VM_MAP_COPY_NULL
;
4447 if (kr
!= KERN_SUCCESS
) {
4448 if (!(flags
& VM_FLAGS_OVERWRITE
)) {
4449 /* deallocate the contiguous range */
4450 (void) vm_deallocate(target_map
,
4459 if (named_entry
->is_object
) {
4460 unsigned int access
;
4461 vm_prot_t protections
;
4462 unsigned int wimg_mode
;
4464 /* we are mapping a VM object */
4466 protections
= named_entry
->protection
& VM_PROT_ALL
;
4467 access
= GET_MAP_MEM(named_entry
->protection
);
4469 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4470 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4471 offset_in_mapping
= offset
- VM_MAP_TRUNC_PAGE(offset
, VM_MAP_PAGE_MASK(target_map
));
4472 if (flags
& VM_FLAGS_RETURN_4K_DATA_ADDR
) {
4473 offset_in_mapping
&= ~((signed)(0xFFF));
4475 offset
= VM_MAP_TRUNC_PAGE(offset
, VM_MAP_PAGE_MASK(target_map
));
4476 map_size
= VM_MAP_ROUND_PAGE((offset
+ offset_in_mapping
+ initial_size
) - offset
, VM_MAP_PAGE_MASK(target_map
));
4479 object
= vm_named_entry_to_vm_object(named_entry
);
4480 assert(object
!= VM_OBJECT_NULL
);
4481 vm_object_lock(object
);
4482 named_entry_unlock(named_entry
);
4484 vm_object_reference_locked(object
);
4486 wimg_mode
= object
->wimg_bits
;
4487 vm_prot_to_wimg(access
, &wimg_mode
);
4488 if (object
->wimg_bits
!= wimg_mode
) {
4489 vm_object_change_wimg_mode(object
, wimg_mode
);
4492 vm_object_unlock(object
);
4494 panic("invalid VM named entry %p", named_entry
);
4496 } else if (ip_kotype(port
) == IKOT_MEMORY_OBJECT
) {
4498 * JMM - This is temporary until we unify named entries
4499 * and raw memory objects.
4501 * Detected fake ip_kotype for a memory object. In
4502 * this case, the port isn't really a port at all, but
4503 * instead is just a raw memory object.
4505 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4506 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4507 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4510 object
= memory_object_to_vm_object((memory_object_t
)port
);
4511 if (object
== VM_OBJECT_NULL
) {
4512 return KERN_INVALID_OBJECT
;
4514 vm_object_reference(object
);
4516 /* wait for object (if any) to be ready */
4517 if (object
!= VM_OBJECT_NULL
) {
4518 if (object
== kernel_object
) {
4519 printf("Warning: Attempt to map kernel object"
4520 " by a non-private kernel entity\n");
4521 return KERN_INVALID_OBJECT
;
4523 if (!object
->pager_ready
) {
4524 vm_object_lock(object
);
4526 while (!object
->pager_ready
) {
4527 vm_object_wait(object
,
4528 VM_OBJECT_EVENT_PAGER_READY
,
4530 vm_object_lock(object
);
4532 vm_object_unlock(object
);
4536 return KERN_INVALID_OBJECT
;
4539 if (object
!= VM_OBJECT_NULL
&&
4541 object
->pager
!= MEMORY_OBJECT_NULL
&&
4542 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
4543 memory_object_t pager
;
4544 vm_prot_t pager_prot
;
4548 * For "named" VM objects, let the pager know that the
4549 * memory object is being mapped. Some pagers need to keep
4550 * track of this, to know when they can reclaim the memory
4551 * object, for example.
4552 * VM calls memory_object_map() for each mapping (specifying
4553 * the protection of each mapping) and calls
4554 * memory_object_last_unmap() when all the mappings are gone.
4556 pager_prot
= max_protection
;
4559 * Copy-On-Write mapping: won't modify the
4562 pager_prot
&= ~VM_PROT_WRITE
;
4564 vm_object_lock(object
);
4565 pager
= object
->pager
;
4566 if (object
->named
&&
4567 pager
!= MEMORY_OBJECT_NULL
&&
4568 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
4569 assert(object
->pager_ready
);
4570 vm_object_mapping_wait(object
, THREAD_UNINT
);
4571 vm_object_mapping_begin(object
);
4572 vm_object_unlock(object
);
4574 kr
= memory_object_map(pager
, pager_prot
);
4575 assert(kr
== KERN_SUCCESS
);
4577 vm_object_lock(object
);
4578 vm_object_mapping_end(object
);
4580 vm_object_unlock(object
);
4584 * Perform the copy if requested
4588 vm_object_t new_object
;
4589 vm_object_offset_t new_offset
;
4591 result
= vm_object_copy_strategically(object
, offset
,
4593 &new_object
, &new_offset
,
4597 if (result
== KERN_MEMORY_RESTART_COPY
) {
4599 boolean_t src_needs_copy
;
4603 * We currently ignore src_needs_copy.
4604 * This really is the issue of how to make
4605 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4606 * non-kernel users to use. Solution forthcoming.
4607 * In the meantime, since we don't allow non-kernel
4608 * memory managers to specify symmetric copy,
4609 * we won't run into problems here.
4611 new_object
= object
;
4612 new_offset
= offset
;
4613 success
= vm_object_copy_quickly(&new_object
,
4619 result
= KERN_SUCCESS
;
4622 * Throw away the reference to the
4623 * original object, as it won't be mapped.
4626 vm_object_deallocate(object
);
4628 if (result
!= KERN_SUCCESS
) {
4632 object
= new_object
;
4633 offset
= new_offset
;
4637 * If non-kernel users want to try to prefault pages, the mapping and prefault
4638 * needs to be atomic.
4640 kernel_prefault
= (try_prefault
&& vm_kernel_map_is_kernel(target_map
));
4641 vmk_flags
.vmkf_keep_map_locked
= (try_prefault
&& !kernel_prefault
);
4645 /* map this object in a "4K" pager */
4646 result
= vm_map_enter_fourk(target_map
,
4649 (vm_map_offset_t
) mask
,
4660 #endif /* __arm64__ */
4662 result
= vm_map_enter(target_map
,
4663 &map_addr
, map_size
,
4664 (vm_map_offset_t
)mask
,
4670 cur_protection
, max_protection
,
4673 if (result
!= KERN_SUCCESS
) {
4674 vm_object_deallocate(object
);
4678 * Try to prefault, and do not forget to release the vm map lock.
4680 if (result
== KERN_SUCCESS
&& try_prefault
) {
4681 mach_vm_address_t va
= map_addr
;
4682 kern_return_t kr
= KERN_SUCCESS
;
4686 pmap_options
= kernel_prefault
? 0 : PMAP_OPTIONS_NOWAIT
;
4687 if (object
->internal
) {
4688 pmap_options
|= PMAP_OPTIONS_INTERNAL
;
4691 for (i
= 0; i
< page_list_count
; ++i
) {
4692 if (!UPL_VALID_PAGE(page_list
, i
)) {
4693 if (kernel_prefault
) {
4694 assertf(FALSE
, "kernel_prefault && !UPL_VALID_PAGE");
4695 result
= KERN_MEMORY_ERROR
;
4700 * If this function call failed, we should stop
4701 * trying to optimize, other calls are likely
4702 * going to fail too.
4704 * We are not gonna report an error for such
4705 * failure though. That's an optimization, not
4706 * something critical.
4708 kr
= pmap_enter_options(target_map
->pmap
,
4709 va
, UPL_PHYS_PAGE(page_list
, i
),
4710 cur_protection
, VM_PROT_NONE
,
4711 0, TRUE
, pmap_options
, NULL
);
4712 if (kr
!= KERN_SUCCESS
) {
4713 OSIncrementAtomic64(&vm_prefault_nb_bailout
);
4714 if (kernel_prefault
) {
4719 OSIncrementAtomic64(&vm_prefault_nb_pages
);
4722 /* Next virtual address */
4725 if (vmk_flags
.vmkf_keep_map_locked
) {
4726 vm_map_unlock(target_map
);
4730 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4731 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4732 *address
= map_addr
+ offset_in_mapping
;
4734 *address
= map_addr
;
4740 vm_map_enter_mem_object(
4741 vm_map_t target_map
,
4742 vm_map_offset_t
*address
,
4743 vm_map_size_t initial_size
,
4744 vm_map_offset_t mask
,
4746 vm_map_kernel_flags_t vmk_flags
,
4749 vm_object_offset_t offset
,
4751 vm_prot_t cur_protection
,
4752 vm_prot_t max_protection
,
4753 vm_inherit_t inheritance
)
4757 ret
= vm_map_enter_mem_object_helper(target_map
,
4774 if (ret
== KERN_SUCCESS
&& address
&& target_map
->pmap
== kernel_pmap
) {
4775 kasan_notify_address(*address
, initial_size
);
4783 vm_map_enter_mem_object_prefault(
4784 vm_map_t target_map
,
4785 vm_map_offset_t
*address
,
4786 vm_map_size_t initial_size
,
4787 vm_map_offset_t mask
,
4789 vm_map_kernel_flags_t vmk_flags
,
4792 vm_object_offset_t offset
,
4793 vm_prot_t cur_protection
,
4794 vm_prot_t max_protection
,
4795 upl_page_list_ptr_t page_list
,
4796 unsigned int page_list_count
)
4800 ret
= vm_map_enter_mem_object_helper(target_map
,
4817 if (ret
== KERN_SUCCESS
&& address
&& target_map
->pmap
== kernel_pmap
) {
4818 kasan_notify_address(*address
, initial_size
);
4827 vm_map_enter_mem_object_control(
4828 vm_map_t target_map
,
4829 vm_map_offset_t
*address
,
4830 vm_map_size_t initial_size
,
4831 vm_map_offset_t mask
,
4833 vm_map_kernel_flags_t vmk_flags
,
4835 memory_object_control_t control
,
4836 vm_object_offset_t offset
,
4838 vm_prot_t cur_protection
,
4839 vm_prot_t max_protection
,
4840 vm_inherit_t inheritance
)
4842 vm_map_address_t map_addr
;
4843 vm_map_size_t map_size
;
4845 vm_object_size_t size
;
4846 kern_return_t result
;
4847 memory_object_t pager
;
4848 vm_prot_t pager_prot
;
4851 boolean_t fourk
= vmk_flags
.vmkf_fourk
;
4852 #endif /* __arm64__ */
4855 * Check arguments for validity
4857 if ((target_map
== VM_MAP_NULL
) ||
4858 (cur_protection
& ~VM_PROT_ALL
) ||
4859 (max_protection
& ~VM_PROT_ALL
) ||
4860 (inheritance
> VM_INHERIT_LAST_VALID
) ||
4861 initial_size
== 0) {
4862 return KERN_INVALID_ARGUMENT
;
4866 if (fourk
&& VM_MAP_PAGE_MASK(target_map
) < PAGE_MASK
) {
4871 map_addr
= vm_map_trunc_page(*address
,
4873 map_size
= vm_map_round_page(initial_size
,
4876 #endif /* __arm64__ */
4878 map_addr
= vm_map_trunc_page(*address
,
4879 VM_MAP_PAGE_MASK(target_map
));
4880 map_size
= vm_map_round_page(initial_size
,
4881 VM_MAP_PAGE_MASK(target_map
));
4883 size
= vm_object_round_page(initial_size
);
4885 object
= memory_object_control_to_vm_object(control
);
4887 if (object
== VM_OBJECT_NULL
) {
4888 return KERN_INVALID_OBJECT
;
4891 if (object
== kernel_object
) {
4892 printf("Warning: Attempt to map kernel object"
4893 " by a non-private kernel entity\n");
4894 return KERN_INVALID_OBJECT
;
4897 vm_object_lock(object
);
4898 object
->ref_count
++;
4901 * For "named" VM objects, let the pager know that the
4902 * memory object is being mapped. Some pagers need to keep
4903 * track of this, to know when they can reclaim the memory
4904 * object, for example.
4905 * VM calls memory_object_map() for each mapping (specifying
4906 * the protection of each mapping) and calls
4907 * memory_object_last_unmap() when all the mappings are gone.
4909 pager_prot
= max_protection
;
4911 pager_prot
&= ~VM_PROT_WRITE
;
4913 pager
= object
->pager
;
4914 if (object
->named
&&
4915 pager
!= MEMORY_OBJECT_NULL
&&
4916 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
4917 assert(object
->pager_ready
);
4918 vm_object_mapping_wait(object
, THREAD_UNINT
);
4919 vm_object_mapping_begin(object
);
4920 vm_object_unlock(object
);
4922 kr
= memory_object_map(pager
, pager_prot
);
4923 assert(kr
== KERN_SUCCESS
);
4925 vm_object_lock(object
);
4926 vm_object_mapping_end(object
);
4928 vm_object_unlock(object
);
4931 * Perform the copy if requested
4935 vm_object_t new_object
;
4936 vm_object_offset_t new_offset
;
4938 result
= vm_object_copy_strategically(object
, offset
, size
,
4939 &new_object
, &new_offset
,
4943 if (result
== KERN_MEMORY_RESTART_COPY
) {
4945 boolean_t src_needs_copy
;
4949 * We currently ignore src_needs_copy.
4950 * This really is the issue of how to make
4951 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4952 * non-kernel users to use. Solution forthcoming.
4953 * In the meantime, since we don't allow non-kernel
4954 * memory managers to specify symmetric copy,
4955 * we won't run into problems here.
4957 new_object
= object
;
4958 new_offset
= offset
;
4959 success
= vm_object_copy_quickly(&new_object
,
4964 result
= KERN_SUCCESS
;
4967 * Throw away the reference to the
4968 * original object, as it won't be mapped.
4971 vm_object_deallocate(object
);
4973 if (result
!= KERN_SUCCESS
) {
4977 object
= new_object
;
4978 offset
= new_offset
;
4983 result
= vm_map_enter_fourk(target_map
,
4986 (vm_map_offset_t
)mask
,
4992 cur_protection
, max_protection
,
4995 #endif /* __arm64__ */
4997 result
= vm_map_enter(target_map
,
4998 &map_addr
, map_size
,
4999 (vm_map_offset_t
)mask
,
5005 cur_protection
, max_protection
,
5008 if (result
!= KERN_SUCCESS
) {
5009 vm_object_deallocate(object
);
5011 *address
= map_addr
;
5020 extern pmap_paddr_t avail_start
, avail_end
;
5024 * Allocate memory in the specified map, with the caveat that
5025 * the memory is physically contiguous. This call may fail
5026 * if the system can't find sufficient contiguous memory.
5027 * This call may cause or lead to heart-stopping amounts of
5030 * Memory obtained from this call should be freed in the
5031 * normal way, viz., via vm_deallocate.
5036 vm_map_offset_t
*addr
,
5040 vm_object_t cpm_obj
;
5044 vm_map_offset_t va
, start
, end
, offset
;
5046 vm_map_offset_t prev_addr
= 0;
5047 #endif /* MACH_ASSERT */
5049 boolean_t anywhere
= ((VM_FLAGS_ANYWHERE
& flags
) != 0);
5052 if (VM_MAP_PAGE_SHIFT(map
) != PAGE_SHIFT
) {
5053 /* XXX TODO4K do we need to support this? */
5055 return KERN_NOT_SUPPORTED
;
5058 VM_GET_FLAGS_ALIAS(flags
, tag
);
5062 return KERN_SUCCESS
;
5065 *addr
= vm_map_min(map
);
5067 *addr
= vm_map_trunc_page(*addr
,
5068 VM_MAP_PAGE_MASK(map
));
5070 size
= vm_map_round_page(size
,
5071 VM_MAP_PAGE_MASK(map
));
5074 * LP64todo - cpm_allocate should probably allow
5075 * allocations of >4GB, but not with the current
5076 * algorithm, so just cast down the size for now.
5078 if (size
> VM_MAX_ADDRESS
) {
5079 return KERN_RESOURCE_SHORTAGE
;
5081 if ((kr
= cpm_allocate(CAST_DOWN(vm_size_t
, size
),
5082 &pages
, 0, 0, TRUE
, flags
)) != KERN_SUCCESS
) {
5086 cpm_obj
= vm_object_allocate((vm_object_size_t
)size
);
5087 assert(cpm_obj
!= VM_OBJECT_NULL
);
5088 assert(cpm_obj
->internal
);
5089 assert(cpm_obj
->vo_size
== (vm_object_size_t
)size
);
5090 assert(cpm_obj
->can_persist
== FALSE
);
5091 assert(cpm_obj
->pager_created
== FALSE
);
5092 assert(cpm_obj
->pageout
== FALSE
);
5093 assert(cpm_obj
->shadow
== VM_OBJECT_NULL
);
5096 * Insert pages into object.
5099 vm_object_lock(cpm_obj
);
5100 for (offset
= 0; offset
< size
; offset
+= PAGE_SIZE
) {
5102 pages
= NEXT_PAGE(m
);
5103 *(NEXT_PAGE_PTR(m
)) = VM_PAGE_NULL
;
5105 assert(!m
->vmp_gobbled
);
5106 assert(!m
->vmp_wanted
);
5107 assert(!m
->vmp_pageout
);
5108 assert(!m
->vmp_tabled
);
5109 assert(VM_PAGE_WIRED(m
));
5110 assert(m
->vmp_busy
);
5111 assert(VM_PAGE_GET_PHYS_PAGE(m
) >= (avail_start
>> PAGE_SHIFT
) && VM_PAGE_GET_PHYS_PAGE(m
) <= (avail_end
>> PAGE_SHIFT
));
5113 m
->vmp_busy
= FALSE
;
5114 vm_page_insert(m
, cpm_obj
, offset
);
5116 assert(cpm_obj
->resident_page_count
== size
/ PAGE_SIZE
);
5117 vm_object_unlock(cpm_obj
);
5120 * Hang onto a reference on the object in case a
5121 * multi-threaded application for some reason decides
5122 * to deallocate the portion of the address space into
5123 * which we will insert this object.
5125 * Unfortunately, we must insert the object now before
5126 * we can talk to the pmap module about which addresses
5127 * must be wired down. Hence, the race with a multi-
5130 vm_object_reference(cpm_obj
);
5133 * Insert object into map.
5142 VM_MAP_KERNEL_FLAGS_NONE
,
5144 (vm_object_offset_t
)0,
5148 VM_INHERIT_DEFAULT
);
5150 if (kr
!= KERN_SUCCESS
) {
5152 * A CPM object doesn't have can_persist set,
5153 * so all we have to do is deallocate it to
5154 * free up these pages.
5156 assert(cpm_obj
->pager_created
== FALSE
);
5157 assert(cpm_obj
->can_persist
== FALSE
);
5158 assert(cpm_obj
->pageout
== FALSE
);
5159 assert(cpm_obj
->shadow
== VM_OBJECT_NULL
);
5160 vm_object_deallocate(cpm_obj
); /* kill acquired ref */
5161 vm_object_deallocate(cpm_obj
); /* kill creation ref */
5165 * Inform the physical mapping system that the
5166 * range of addresses may not fault, so that
5167 * page tables and such can be locked down as well.
5171 pmap
= vm_map_pmap(map
);
5172 pmap_pageable(pmap
, start
, end
, FALSE
);
5175 * Enter each page into the pmap, to avoid faults.
5176 * Note that this loop could be coded more efficiently,
5177 * if the need arose, rather than looking up each page
5180 for (offset
= 0, va
= start
; offset
< size
;
5181 va
+= PAGE_SIZE
, offset
+= PAGE_SIZE
) {
5184 vm_object_lock(cpm_obj
);
5185 m
= vm_page_lookup(cpm_obj
, (vm_object_offset_t
)offset
);
5186 assert(m
!= VM_PAGE_NULL
);
5188 vm_page_zero_fill(m
);
5190 type_of_fault
= DBG_ZERO_FILL_FAULT
;
5192 vm_fault_enter(m
, pmap
, va
,
5194 VM_PROT_ALL
, VM_PROT_WRITE
,
5196 FALSE
, /* change_wiring */
5197 VM_KERN_MEMORY_NONE
, /* tag - not wiring */
5198 FALSE
, /* no_cache */
5199 FALSE
, /* cs_bypass */
5201 0, /* pmap_options */
5202 NULL
, /* need_retry */
5205 vm_object_unlock(cpm_obj
);
5210 * Verify ordering in address space.
5212 for (offset
= 0; offset
< size
; offset
+= PAGE_SIZE
) {
5213 vm_object_lock(cpm_obj
);
5214 m
= vm_page_lookup(cpm_obj
, (vm_object_offset_t
)offset
);
5215 vm_object_unlock(cpm_obj
);
5216 if (m
== VM_PAGE_NULL
) {
5217 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
5218 cpm_obj
, (uint64_t)offset
);
5220 assert(m
->vmp_tabled
);
5221 assert(!m
->vmp_busy
);
5222 assert(!m
->vmp_wanted
);
5223 assert(!m
->vmp_fictitious
);
5224 assert(!m
->vmp_private
);
5225 assert(!m
->vmp_absent
);
5226 assert(!m
->vmp_error
);
5227 assert(!m
->vmp_cleaning
);
5228 assert(!m
->vmp_laundry
);
5229 assert(!m
->vmp_precious
);
5230 assert(!m
->vmp_clustered
);
5232 if (VM_PAGE_GET_PHYS_PAGE(m
) != prev_addr
+ 1) {
5233 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5234 (uint64_t)start
, (uint64_t)end
, (uint64_t)va
);
5235 printf("obj %p off 0x%llx\n", cpm_obj
, (uint64_t)offset
);
5236 printf("m %p prev_address 0x%llx\n", m
, (uint64_t)prev_addr
);
5237 panic("vm_allocate_cpm: pages not contig!");
5240 prev_addr
= VM_PAGE_GET_PHYS_PAGE(m
);
5242 #endif /* MACH_ASSERT */
5244 vm_object_deallocate(cpm_obj
); /* kill extra ref */
5253 * Interface is defined in all cases, but unless the kernel
5254 * is built explicitly for this option, the interface does
5260 __unused vm_map_t map
,
5261 __unused vm_map_offset_t
*addr
,
5262 __unused vm_map_size_t size
,
5265 return KERN_FAILURE
;
5269 /* Not used without nested pmaps */
5270 #ifndef NO_NESTED_PMAP
5272 * Clip and unnest a portion of a nested submap mapping.
5279 vm_map_entry_t entry
,
5280 vm_map_offset_t start_unnest
,
5281 vm_map_offset_t end_unnest
)
5283 vm_map_offset_t old_start_unnest
= start_unnest
;
5284 vm_map_offset_t old_end_unnest
= end_unnest
;
5286 assert(entry
->is_sub_map
);
5287 assert(VME_SUBMAP(entry
) != NULL
);
5288 assert(entry
->use_pmap
);
5291 * Query the platform for the optimal unnest range.
5292 * DRK: There's some duplication of effort here, since
5293 * callers may have adjusted the range to some extent. This
5294 * routine was introduced to support 1GiB subtree nesting
5295 * for x86 platforms, which can also nest on 2MiB boundaries
5296 * depending on size/alignment.
5298 if (pmap_adjust_unnest_parameters(map
->pmap
, &start_unnest
, &end_unnest
)) {
5299 assert(VME_SUBMAP(entry
)->is_nested_map
);
5300 assert(!VME_SUBMAP(entry
)->disable_vmentry_reuse
);
5301 log_unnest_badness(map
,
5304 VME_SUBMAP(entry
)->is_nested_map
,
5306 VME_SUBMAP(entry
)->lowest_unnestable_start
-
5307 VME_OFFSET(entry
)));
5310 if (entry
->vme_start
> start_unnest
||
5311 entry
->vme_end
< end_unnest
) {
5312 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5313 "bad nested entry: start=0x%llx end=0x%llx\n",
5314 (long long)start_unnest
, (long long)end_unnest
,
5315 (long long)entry
->vme_start
, (long long)entry
->vme_end
);
5318 if (start_unnest
> entry
->vme_start
) {
5319 _vm_map_clip_start(&map
->hdr
,
5322 if (map
->holelistenabled
) {
5323 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5325 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5328 if (entry
->vme_end
> end_unnest
) {
5329 _vm_map_clip_end(&map
->hdr
,
5332 if (map
->holelistenabled
) {
5333 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5335 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5339 pmap_unnest(map
->pmap
,
5341 entry
->vme_end
- entry
->vme_start
);
5342 if ((map
->mapped_in_other_pmaps
) && os_ref_get_count(&map
->map_refcnt
) != 0) {
5343 /* clean up parent map/maps */
5344 vm_map_submap_pmap_clean(
5345 map
, entry
->vme_start
,
5350 entry
->use_pmap
= FALSE
;
5351 if ((map
->pmap
!= kernel_pmap
) &&
5352 (VME_ALIAS(entry
) == VM_MEMORY_SHARED_PMAP
)) {
5353 VME_ALIAS_SET(entry
, VM_MEMORY_UNSHARED_PMAP
);
5356 #endif /* NO_NESTED_PMAP */
5359 * vm_map_clip_start: [ internal use only ]
5361 * Asserts that the given entry begins at or after
5362 * the specified address; if necessary,
5363 * it splits the entry into two.
5368 vm_map_entry_t entry
,
5369 vm_map_offset_t startaddr
)
5371 #ifndef NO_NESTED_PMAP
5372 if (entry
->is_sub_map
&&
5374 startaddr
>= entry
->vme_start
) {
5375 vm_map_offset_t start_unnest
, end_unnest
;
5378 * Make sure "startaddr" is no longer in a nested range
5379 * before we clip. Unnest only the minimum range the platform
5381 * vm_map_clip_unnest may perform additional adjustments to
5384 start_unnest
= startaddr
& ~(pmap_shared_region_size_min(map
->pmap
) - 1);
5385 end_unnest
= start_unnest
+ pmap_shared_region_size_min(map
->pmap
);
5386 vm_map_clip_unnest(map
, entry
, start_unnest
, end_unnest
);
5388 #endif /* NO_NESTED_PMAP */
5389 if (startaddr
> entry
->vme_start
) {
5390 if (VME_OBJECT(entry
) &&
5391 !entry
->is_sub_map
&&
5392 VME_OBJECT(entry
)->phys_contiguous
) {
5393 pmap_remove(map
->pmap
,
5394 (addr64_t
)(entry
->vme_start
),
5395 (addr64_t
)(entry
->vme_end
));
5397 if (entry
->vme_atomic
) {
5398 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map
, entry
);
5404 vm_map_offset_t
, entry
->vme_start
,
5405 vm_map_offset_t
, entry
->vme_end
,
5406 vm_map_offset_t
, startaddr
,
5407 int, VME_ALIAS(entry
));
5409 _vm_map_clip_start(&map
->hdr
, entry
, startaddr
);
5410 if (map
->holelistenabled
) {
5411 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5413 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5419 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5421 if ((startaddr) > (entry)->vme_start) \
5422 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5426 * This routine is called only when it is known that
5427 * the entry must be split.
5431 struct vm_map_header
*map_header
,
5432 vm_map_entry_t entry
,
5433 vm_map_offset_t start
)
5435 vm_map_entry_t new_entry
;
5438 * Split off the front portion --
5439 * note that we must insert the new
5440 * entry BEFORE this one, so that
5441 * this entry has the specified starting
5445 if (entry
->map_aligned
) {
5446 assert(VM_MAP_PAGE_ALIGNED(start
,
5447 VM_MAP_HDR_PAGE_MASK(map_header
)));
5450 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
5451 vm_map_entry_copy_full(new_entry
, entry
);
5453 new_entry
->vme_end
= start
;
5454 assert(new_entry
->vme_start
< new_entry
->vme_end
);
5455 VME_OFFSET_SET(entry
, VME_OFFSET(entry
) + (start
- entry
->vme_start
));
5456 assert(start
< entry
->vme_end
);
5457 entry
->vme_start
= start
;
5459 _vm_map_store_entry_link(map_header
, entry
->vme_prev
, new_entry
);
5461 if (entry
->is_sub_map
) {
5462 vm_map_reference(VME_SUBMAP(new_entry
));
5464 vm_object_reference(VME_OBJECT(new_entry
));
5470 * vm_map_clip_end: [ internal use only ]
5472 * Asserts that the given entry ends at or before
5473 * the specified address; if necessary,
5474 * it splits the entry into two.
5479 vm_map_entry_t entry
,
5480 vm_map_offset_t endaddr
)
5482 if (endaddr
> entry
->vme_end
) {
5484 * Within the scope of this clipping, limit "endaddr" to
5485 * the end of this map entry...
5487 endaddr
= entry
->vme_end
;
5489 #ifndef NO_NESTED_PMAP
5490 if (entry
->is_sub_map
&& entry
->use_pmap
) {
5491 vm_map_offset_t start_unnest
, end_unnest
;
5494 * Make sure the range between the start of this entry and
5495 * the new "endaddr" is no longer nested before we clip.
5496 * Unnest only the minimum range the platform can handle.
5497 * vm_map_clip_unnest may perform additional adjustments to
5500 start_unnest
= entry
->vme_start
;
5502 (endaddr
+ pmap_shared_region_size_min(map
->pmap
) - 1) &
5503 ~(pmap_shared_region_size_min(map
->pmap
) - 1);
5504 vm_map_clip_unnest(map
, entry
, start_unnest
, end_unnest
);
5506 #endif /* NO_NESTED_PMAP */
5507 if (endaddr
< entry
->vme_end
) {
5508 if (VME_OBJECT(entry
) &&
5509 !entry
->is_sub_map
&&
5510 VME_OBJECT(entry
)->phys_contiguous
) {
5511 pmap_remove(map
->pmap
,
5512 (addr64_t
)(entry
->vme_start
),
5513 (addr64_t
)(entry
->vme_end
));
5515 if (entry
->vme_atomic
) {
5516 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map
, entry
);
5521 vm_map_offset_t
, entry
->vme_start
,
5522 vm_map_offset_t
, entry
->vme_end
,
5523 vm_map_offset_t
, endaddr
,
5524 int, VME_ALIAS(entry
));
5526 _vm_map_clip_end(&map
->hdr
, entry
, endaddr
);
5527 if (map
->holelistenabled
) {
5528 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5530 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5536 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5538 if ((endaddr) < (entry)->vme_end) \
5539 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5543 * This routine is called only when it is known that
5544 * the entry must be split.
5548 struct vm_map_header
*map_header
,
5549 vm_map_entry_t entry
,
5550 vm_map_offset_t end
)
5552 vm_map_entry_t new_entry
;
5555 * Create a new entry and insert it
5556 * AFTER the specified entry
5559 if (entry
->map_aligned
) {
5560 assert(VM_MAP_PAGE_ALIGNED(end
,
5561 VM_MAP_HDR_PAGE_MASK(map_header
)));
5564 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
5565 vm_map_entry_copy_full(new_entry
, entry
);
5567 assert(entry
->vme_start
< end
);
5568 new_entry
->vme_start
= entry
->vme_end
= end
;
5569 VME_OFFSET_SET(new_entry
,
5570 VME_OFFSET(new_entry
) + (end
- entry
->vme_start
));
5571 assert(new_entry
->vme_start
< new_entry
->vme_end
);
5573 _vm_map_store_entry_link(map_header
, entry
, new_entry
);
5575 if (entry
->is_sub_map
) {
5576 vm_map_reference(VME_SUBMAP(new_entry
));
5578 vm_object_reference(VME_OBJECT(new_entry
));
5584 * VM_MAP_RANGE_CHECK: [ internal use only ]
5586 * Asserts that the starting and ending region
5587 * addresses fall within the valid range of the map.
5589 #define VM_MAP_RANGE_CHECK(map, start, end) \
5591 if (start < vm_map_min(map)) \
5592 start = vm_map_min(map); \
5593 if (end > vm_map_max(map)) \
5594 end = vm_map_max(map); \
5600 * vm_map_range_check: [ internal use only ]
5602 * Check that the region defined by the specified start and
5603 * end addresses are wholly contained within a single map
5604 * entry or set of adjacent map entries of the spacified map,
5605 * i.e. the specified region contains no unmapped space.
5606 * If any or all of the region is unmapped, FALSE is returned.
5607 * Otherwise, TRUE is returned and if the output argument 'entry'
5608 * is not NULL it points to the map entry containing the start
5611 * The map is locked for reading on entry and is left locked.
5616 vm_map_offset_t start
,
5617 vm_map_offset_t end
,
5618 vm_map_entry_t
*entry
)
5621 vm_map_offset_t prev
;
5624 * Basic sanity checks first
5626 if (start
< vm_map_min(map
) || end
> vm_map_max(map
) || start
> end
) {
5631 * Check first if the region starts within a valid
5632 * mapping for the map.
5634 if (!vm_map_lookup_entry(map
, start
, &cur
)) {
5639 * Optimize for the case that the region is contained
5640 * in a single map entry.
5642 if (entry
!= (vm_map_entry_t
*) NULL
) {
5645 if (end
<= cur
->vme_end
) {
5650 * If the region is not wholly contained within a
5651 * single entry, walk the entries looking for holes.
5653 prev
= cur
->vme_end
;
5654 cur
= cur
->vme_next
;
5655 while ((cur
!= vm_map_to_entry(map
)) && (prev
== cur
->vme_start
)) {
5656 if (end
<= cur
->vme_end
) {
5659 prev
= cur
->vme_end
;
5660 cur
= cur
->vme_next
;
5666 * vm_map_submap: [ kernel use only ]
5668 * Mark the given range as handled by a subordinate map.
5670 * This range must have been created with vm_map_find using
5671 * the vm_submap_object, and no other operations may have been
5672 * performed on this range prior to calling vm_map_submap.
5674 * Only a limited number of operations can be performed
5675 * within this rage after calling vm_map_submap:
5677 * [Don't try vm_map_copyin!]
5679 * To remove a submapping, one must first remove the
5680 * range from the superior map, and then destroy the
5681 * submap (if desired). [Better yet, don't try it.]
5686 vm_map_offset_t start
,
5687 vm_map_offset_t end
,
5689 vm_map_offset_t offset
,
5690 #ifdef NO_NESTED_PMAP
5692 #endif /* NO_NESTED_PMAP */
5695 vm_map_entry_t entry
;
5696 kern_return_t result
= KERN_INVALID_ARGUMENT
;
5701 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
5702 entry
= entry
->vme_next
;
5705 if (entry
== vm_map_to_entry(map
) ||
5706 entry
->is_sub_map
) {
5708 return KERN_INVALID_ARGUMENT
;
5711 vm_map_clip_start(map
, entry
, start
);
5712 vm_map_clip_end(map
, entry
, end
);
5714 if ((entry
->vme_start
== start
) && (entry
->vme_end
== end
) &&
5715 (!entry
->is_sub_map
) &&
5716 ((object
= VME_OBJECT(entry
)) == vm_submap_object
) &&
5717 (object
->resident_page_count
== 0) &&
5718 (object
->copy
== VM_OBJECT_NULL
) &&
5719 (object
->shadow
== VM_OBJECT_NULL
) &&
5720 (!object
->pager_created
)) {
5721 VME_OFFSET_SET(entry
, (vm_object_offset_t
)offset
);
5722 VME_OBJECT_SET(entry
, VM_OBJECT_NULL
);
5723 vm_object_deallocate(object
);
5724 entry
->is_sub_map
= TRUE
;
5725 entry
->use_pmap
= FALSE
;
5726 VME_SUBMAP_SET(entry
, submap
);
5727 vm_map_reference(submap
);
5728 if (submap
->mapped_in_other_pmaps
== FALSE
&&
5729 vm_map_pmap(submap
) != PMAP_NULL
&&
5730 vm_map_pmap(submap
) != vm_map_pmap(map
)) {
5732 * This submap is being mapped in a map
5733 * that uses a different pmap.
5734 * Set its "mapped_in_other_pmaps" flag
5735 * to indicate that we now need to
5736 * remove mappings from all pmaps rather
5737 * than just the submap's pmap.
5739 submap
->mapped_in_other_pmaps
= TRUE
;
5742 #ifndef NO_NESTED_PMAP
5744 /* nest if platform code will allow */
5745 if (submap
->pmap
== NULL
) {
5746 ledger_t ledger
= map
->pmap
->ledger
;
5747 submap
->pmap
= pmap_create_options(ledger
,
5748 (vm_map_size_t
) 0, 0);
5749 if (submap
->pmap
== PMAP_NULL
) {
5751 return KERN_NO_SPACE
;
5753 #if defined(__arm__) || defined(__arm64__)
5754 pmap_set_nested(submap
->pmap
);
5757 result
= pmap_nest(map
->pmap
,
5758 (VME_SUBMAP(entry
))->pmap
,
5760 (uint64_t)(end
- start
));
5762 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result
);
5764 entry
->use_pmap
= TRUE
;
5766 #else /* NO_NESTED_PMAP */
5767 pmap_remove(map
->pmap
, (addr64_t
)start
, (addr64_t
)end
);
5768 #endif /* NO_NESTED_PMAP */
5769 result
= KERN_SUCCESS
;
5779 * Sets the protection of the specified address
5780 * region in the target map. If "set_max" is
5781 * specified, the maximum protection is to be set;
5782 * otherwise, only the current protection is affected.
5787 vm_map_offset_t start
,
5788 vm_map_offset_t end
,
5792 vm_map_entry_t current
;
5793 vm_map_offset_t prev
;
5794 vm_map_entry_t entry
;
5796 int pmap_options
= 0;
5799 if (new_prot
& VM_PROT_COPY
) {
5800 vm_map_offset_t new_start
;
5801 vm_prot_t cur_prot
, max_prot
;
5802 vm_map_kernel_flags_t kflags
;
5804 /* LP64todo - see below */
5805 if (start
>= map
->max_offset
) {
5806 return KERN_INVALID_ADDRESS
;
5809 if ((new_prot
& VM_PROT_EXECUTE
) &&
5810 map
->pmap
!= kernel_pmap
&&
5811 (vm_map_cs_enforcement(map
)
5812 #if XNU_TARGET_OS_OSX && __arm64__
5813 || !VM_MAP_IS_EXOTIC(map
)
5814 #endif /* XNU_TARGET_OS_OSX && __arm64__ */
5816 VM_MAP_POLICY_WX_FAIL(map
)) {
5818 uint64_t, (uint64_t) start
,
5819 uint64_t, (uint64_t) end
,
5820 vm_prot_t
, new_prot
);
5821 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5823 (current_task()->bsd_info
5824 ? proc_name_address(current_task()->bsd_info
)
5827 return KERN_PROTECTION_FAILURE
;
5831 * Let vm_map_remap_extract() know that it will need to:
5832 * + make a copy of the mapping
5833 * + add VM_PROT_WRITE to the max protections
5834 * + remove any protections that are no longer allowed from the
5835 * max protections (to avoid any WRITE/EXECUTE conflict, for
5837 * Note that "max_prot" is an IN/OUT parameter only for this
5838 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
5841 max_prot
= new_prot
& VM_PROT_ALL
;
5842 cur_prot
= VM_PROT_NONE
;
5843 kflags
= VM_MAP_KERNEL_FLAGS_NONE
;
5844 kflags
.vmkf_remap_prot_copy
= TRUE
;
5845 kflags
.vmkf_overwrite_immutable
= TRUE
;
5847 kr
= vm_map_remap(map
,
5851 VM_FLAGS_FIXED
| VM_FLAGS_OVERWRITE
,
5856 TRUE
, /* copy-on-write remapping! */
5857 &cur_prot
, /* IN/OUT */
5858 &max_prot
, /* IN/OUT */
5859 VM_INHERIT_DEFAULT
);
5860 if (kr
!= KERN_SUCCESS
) {
5863 new_prot
&= ~VM_PROT_COPY
;
5868 /* LP64todo - remove this check when vm_map_commpage64()
5869 * no longer has to stuff in a map_entry for the commpage
5870 * above the map's max_offset.
5872 if (start
>= map
->max_offset
) {
5874 return KERN_INVALID_ADDRESS
;
5879 * Lookup the entry. If it doesn't start in a valid
5880 * entry, return an error.
5882 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
5884 return KERN_INVALID_ADDRESS
;
5887 if (entry
->superpage_size
&& (start
& (SUPERPAGE_SIZE
- 1))) { /* extend request to whole entry */
5888 start
= SUPERPAGE_ROUND_DOWN(start
);
5893 if (entry
->superpage_size
) {
5894 end
= SUPERPAGE_ROUND_UP(end
);
5898 * Make a first pass to check for protection and address
5903 prev
= current
->vme_start
;
5904 while ((current
!= vm_map_to_entry(map
)) &&
5905 (current
->vme_start
< end
)) {
5907 * If there is a hole, return an error.
5909 if (current
->vme_start
!= prev
) {
5911 return KERN_INVALID_ADDRESS
;
5914 new_max
= current
->max_protection
;
5915 if ((new_prot
& new_max
) != new_prot
) {
5917 return KERN_PROTECTION_FAILURE
;
5920 if (current
->used_for_jit
&&
5921 pmap_has_prot_policy(map
->pmap
, current
->translated_allow_execute
, current
->protection
)) {
5923 return KERN_PROTECTION_FAILURE
;
5926 if ((new_prot
& VM_PROT_WRITE
) &&
5927 (new_prot
& VM_PROT_EXECUTE
) &&
5928 #if XNU_TARGET_OS_OSX
5929 map
->pmap
!= kernel_pmap
&&
5930 (vm_map_cs_enforcement(map
)
5932 || !VM_MAP_IS_EXOTIC(map
)
5933 #endif /* __arm64__ */
5935 #endif /* XNU_TARGET_OS_OSX */
5936 !(current
->used_for_jit
)) {
5938 uint64_t, (uint64_t) current
->vme_start
,
5939 uint64_t, (uint64_t) current
->vme_end
,
5940 vm_prot_t
, new_prot
);
5941 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5943 (current_task()->bsd_info
5944 ? proc_name_address(current_task()->bsd_info
)
5947 new_prot
&= ~VM_PROT_EXECUTE
;
5948 if (VM_MAP_POLICY_WX_FAIL(map
)) {
5950 return KERN_PROTECTION_FAILURE
;
5955 * If the task has requested executable lockdown,
5957 * - adding executable protections OR
5958 * - adding write protections to an existing executable mapping.
5960 if (map
->map_disallow_new_exec
== TRUE
) {
5961 if ((new_prot
& VM_PROT_EXECUTE
) ||
5962 ((current
->protection
& VM_PROT_EXECUTE
) && (new_prot
& VM_PROT_WRITE
))) {
5964 return KERN_PROTECTION_FAILURE
;
5968 prev
= current
->vme_end
;
5969 current
= current
->vme_next
;
5974 end
== vm_map_round_page(prev
, VM_MAP_PAGE_MASK(map
))) {
5975 vm_map_entry_t prev_entry
;
5977 prev_entry
= current
->vme_prev
;
5978 if (prev_entry
!= vm_map_to_entry(map
) &&
5979 !prev_entry
->map_aligned
&&
5980 (vm_map_round_page(prev_entry
->vme_end
,
5981 VM_MAP_PAGE_MASK(map
))
5984 * The last entry in our range is not "map-aligned"
5985 * but it would have reached all the way to "end"
5986 * if it had been map-aligned, so this is not really
5987 * a hole in the range and we can proceed.
5992 #endif /* __arm64__ */
5996 return KERN_INVALID_ADDRESS
;
6000 * Go back and fix up protections.
6001 * Clip to start here if the range starts within
6006 if (current
!= vm_map_to_entry(map
)) {
6007 /* clip and unnest if necessary */
6008 vm_map_clip_start(map
, current
, start
);
6011 while ((current
!= vm_map_to_entry(map
)) &&
6012 (current
->vme_start
< end
)) {
6015 vm_map_clip_end(map
, current
, end
);
6017 if (current
->is_sub_map
) {
6018 /* clipping did unnest if needed */
6019 assert(!current
->use_pmap
);
6022 old_prot
= current
->protection
;
6025 current
->max_protection
= new_prot
;
6026 current
->protection
= new_prot
& old_prot
;
6028 current
->protection
= new_prot
;
6032 * Update physical map if necessary.
6033 * If the request is to turn off write protection,
6034 * we won't do it for real (in pmap). This is because
6035 * it would cause copy-on-write to fail. We've already
6036 * set, the new protection in the map, so if a
6037 * write-protect fault occurred, it will be fixed up
6038 * properly, COW or not.
6040 if (current
->protection
!= old_prot
) {
6041 /* Look one level in we support nested pmaps */
6042 /* from mapped submaps which are direct entries */
6047 prot
= current
->protection
;
6048 if (current
->is_sub_map
|| (VME_OBJECT(current
) == NULL
) || (VME_OBJECT(current
) != compressor_object
)) {
6049 prot
&= ~VM_PROT_WRITE
;
6051 assert(!VME_OBJECT(current
)->code_signed
);
6052 assert(VME_OBJECT(current
)->copy_strategy
== MEMORY_OBJECT_COPY_NONE
);
6055 if (override_nx(map
, VME_ALIAS(current
)) && prot
) {
6056 prot
|= VM_PROT_EXECUTE
;
6059 #if DEVELOPMENT || DEBUG
6060 if (!(old_prot
& VM_PROT_EXECUTE
) &&
6061 (prot
& VM_PROT_EXECUTE
) &&
6062 panic_on_unsigned_execute
&&
6063 (proc_selfcsflags() & CS_KILL
)) {
6064 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map
, (uint64_t)current
->vme_start
, (uint64_t)current
->vme_end
, old_prot
, prot
);
6066 #endif /* DEVELOPMENT || DEBUG */
6068 if (pmap_has_prot_policy(map
->pmap
, current
->translated_allow_execute
, prot
)) {
6069 if (current
->wired_count
) {
6070 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
6071 map
, (uint64_t)current
->vme_start
, (uint64_t)current
->vme_end
, prot
, current
->wired_count
);
6074 /* If the pmap layer cares about this
6075 * protection type, force a fault for
6076 * each page so that vm_fault will
6077 * repopulate the page with the full
6078 * set of protections.
6081 * TODO: We don't seem to need this,
6082 * but this is due to an internal
6083 * implementation detail of
6084 * pmap_protect. Do we want to rely
6087 prot
= VM_PROT_NONE
;
6090 if (current
->is_sub_map
&& current
->use_pmap
) {
6091 pmap_protect(VME_SUBMAP(current
)->pmap
,
6096 if (prot
& VM_PROT_WRITE
) {
6097 if (VME_OBJECT(current
) == compressor_object
) {
6099 * For write requests on the
6100 * compressor, we wil ask the
6101 * pmap layer to prevent us from
6102 * taking a write fault when we
6103 * attempt to access the mapping
6106 pmap_options
|= PMAP_OPTIONS_PROTECT_IMMEDIATE
;
6110 pmap_protect_options(map
->pmap
,
6118 current
= current
->vme_next
;
6122 while ((current
!= vm_map_to_entry(map
)) &&
6123 (current
->vme_start
<= end
)) {
6124 vm_map_simplify_entry(map
, current
);
6125 current
= current
->vme_next
;
6129 return KERN_SUCCESS
;
6135 * Sets the inheritance of the specified address
6136 * range in the target map. Inheritance
6137 * affects how the map will be shared with
6138 * child maps at the time of vm_map_fork.
6143 vm_map_offset_t start
,
6144 vm_map_offset_t end
,
6145 vm_inherit_t new_inheritance
)
6147 vm_map_entry_t entry
;
6148 vm_map_entry_t temp_entry
;
6152 VM_MAP_RANGE_CHECK(map
, start
, end
);
6154 if (vm_map_lookup_entry(map
, start
, &temp_entry
)) {
6157 temp_entry
= temp_entry
->vme_next
;
6161 /* first check entire range for submaps which can't support the */
6162 /* given inheritance. */
6163 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
6164 if (entry
->is_sub_map
) {
6165 if (new_inheritance
== VM_INHERIT_COPY
) {
6167 return KERN_INVALID_ARGUMENT
;
6171 entry
= entry
->vme_next
;
6175 if (entry
!= vm_map_to_entry(map
)) {
6176 /* clip and unnest if necessary */
6177 vm_map_clip_start(map
, entry
, start
);
6180 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
6181 vm_map_clip_end(map
, entry
, end
);
6182 if (entry
->is_sub_map
) {
6183 /* clip did unnest if needed */
6184 assert(!entry
->use_pmap
);
6187 entry
->inheritance
= new_inheritance
;
6189 entry
= entry
->vme_next
;
6193 return KERN_SUCCESS
;
6197 * Update the accounting for the amount of wired memory in this map. If the user has
6198 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6201 static kern_return_t
6204 vm_map_entry_t entry
,
6205 boolean_t user_wire
)
6210 unsigned int total_wire_count
= vm_page_wire_count
+ vm_lopage_free_count
;
6213 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6217 if (entry
->user_wired_count
== 0) {
6218 size
= entry
->vme_end
- entry
->vme_start
;
6221 * Since this is the first time the user is wiring this map entry, check to see if we're
6222 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6223 * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value. There is also
6224 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6225 * limit, then we fail.
6228 if (size
+ map
->user_wire_size
> MIN(map
->user_wire_limit
, vm_per_task_user_wire_limit
) ||
6229 size
+ ptoa_64(total_wire_count
) > vm_global_user_wire_limit
) {
6230 if (size
+ ptoa_64(total_wire_count
) > vm_global_user_wire_limit
) {
6231 os_atomic_inc(&vm_add_wire_count_over_global_limit
, relaxed
);
6233 os_atomic_inc(&vm_add_wire_count_over_user_limit
, relaxed
);
6235 return KERN_RESOURCE_SHORTAGE
;
6239 * The first time the user wires an entry, we also increment the wired_count and add this to
6240 * the total that has been wired in the map.
6243 if (entry
->wired_count
>= MAX_WIRE_COUNT
) {
6244 return KERN_FAILURE
;
6247 entry
->wired_count
++;
6248 map
->user_wire_size
+= size
;
6251 if (entry
->user_wired_count
>= MAX_WIRE_COUNT
) {
6252 return KERN_FAILURE
;
6255 entry
->user_wired_count
++;
6258 * The kernel's wiring the memory. Just bump the count and continue.
6261 if (entry
->wired_count
>= MAX_WIRE_COUNT
) {
6262 panic("vm_map_wire: too many wirings");
6265 entry
->wired_count
++;
6268 return KERN_SUCCESS
;
6272 * Update the memory wiring accounting now that the given map entry is being unwired.
6276 subtract_wire_counts(
6278 vm_map_entry_t entry
,
6279 boolean_t user_wire
)
6283 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6286 if (entry
->user_wired_count
== 1) {
6288 * We're removing the last user wire reference. Decrement the wired_count and the total
6289 * user wired memory for this map.
6292 assert(entry
->wired_count
>= 1);
6293 entry
->wired_count
--;
6294 map
->user_wire_size
-= entry
->vme_end
- entry
->vme_start
;
6297 assert(entry
->user_wired_count
>= 1);
6298 entry
->user_wired_count
--;
6301 * The kernel is unwiring the memory. Just update the count.
6304 assert(entry
->wired_count
>= 1);
6305 entry
->wired_count
--;
6309 int cs_executable_wire
= 0;
6314 * Sets the pageability of the specified address range in the
6315 * target map as wired. Regions specified as not pageable require
6316 * locked-down physical memory and physical page maps. The
6317 * access_type variable indicates types of accesses that must not
6318 * generate page faults. This is checked against protection of
6319 * memory being locked-down.
6321 * The map must not be locked, but a reference must remain to the
6322 * map throughout the call.
6324 static kern_return_t
6327 vm_map_offset_t start
,
6328 vm_map_offset_t end
,
6329 vm_prot_t caller_prot
,
6331 boolean_t user_wire
,
6333 vm_map_offset_t pmap_addr
,
6334 ppnum_t
*physpage_p
)
6336 vm_map_entry_t entry
;
6337 vm_prot_t access_type
;
6338 struct vm_map_entry
*first_entry
, tmp_entry
;
6340 vm_map_offset_t s
, e
;
6342 boolean_t need_wakeup
;
6343 boolean_t main_map
= FALSE
;
6344 wait_interrupt_t interruptible_state
;
6345 thread_t cur_thread
;
6346 unsigned int last_timestamp
;
6348 boolean_t wire_and_extract
;
6349 vm_prot_t extra_prots
;
6351 extra_prots
= VM_PROT_COPY
;
6352 extra_prots
|= VM_PROT_COPY_FAIL_IF_EXECUTABLE
;
6353 #if XNU_TARGET_OS_OSX
6354 if (map
->pmap
== kernel_pmap
||
6355 !vm_map_cs_enforcement(map
)) {
6356 extra_prots
&= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE
;
6358 #endif /* XNU_TARGET_OS_OSX */
6360 access_type
= (caller_prot
& VM_PROT_ALL
);
6362 wire_and_extract
= FALSE
;
6363 if (physpage_p
!= NULL
) {
6365 * The caller wants the physical page number of the
6366 * wired page. We return only one physical page number
6367 * so this works for only one page at a time.
6369 if ((end
- start
) != PAGE_SIZE
) {
6370 return KERN_INVALID_ARGUMENT
;
6372 wire_and_extract
= TRUE
;
6377 if (map_pmap
== NULL
) {
6380 last_timestamp
= map
->timestamp
;
6382 VM_MAP_RANGE_CHECK(map
, start
, end
);
6383 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
6384 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
6387 /* We wired what the caller asked for, zero pages */
6389 return KERN_SUCCESS
;
6392 need_wakeup
= FALSE
;
6393 cur_thread
= current_thread();
6398 if (vm_map_lookup_entry(map
, s
, &first_entry
)) {
6399 entry
= first_entry
;
6401 * vm_map_clip_start will be done later.
6402 * We don't want to unnest any nested submaps here !
6405 /* Start address is not in map */
6406 rc
= KERN_INVALID_ADDRESS
;
6410 while ((entry
!= vm_map_to_entry(map
)) && (s
< end
)) {
6412 * At this point, we have wired from "start" to "s".
6413 * We still need to wire from "s" to "end".
6415 * "entry" hasn't been clipped, so it could start before "s"
6416 * and/or end after "end".
6419 /* "e" is how far we want to wire in this entry */
6426 * If another thread is wiring/unwiring this entry then
6427 * block after informing other thread to wake us up.
6429 if (entry
->in_transition
) {
6430 wait_result_t wait_result
;
6433 * We have not clipped the entry. Make sure that
6434 * the start address is in range so that the lookup
6435 * below will succeed.
6436 * "s" is the current starting point: we've already
6437 * wired from "start" to "s" and we still have
6438 * to wire from "s" to "end".
6441 entry
->needs_wakeup
= TRUE
;
6444 * wake up anybody waiting on entries that we have
6448 vm_map_entry_wakeup(map
);
6449 need_wakeup
= FALSE
;
6452 * User wiring is interruptible
6454 wait_result
= vm_map_entry_wait(map
,
6455 (user_wire
) ? THREAD_ABORTSAFE
:
6457 if (user_wire
&& wait_result
== THREAD_INTERRUPTED
) {
6459 * undo the wirings we have done so far
6460 * We do not clear the needs_wakeup flag,
6461 * because we cannot tell if we were the
6469 * Cannot avoid a lookup here. reset timestamp.
6471 last_timestamp
= map
->timestamp
;
6474 * The entry could have been clipped, look it up again.
6475 * Worse that can happen is, it may not exist anymore.
6477 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
6479 * User: undo everything upto the previous
6480 * entry. let vm_map_unwire worry about
6481 * checking the validity of the range.
6486 entry
= first_entry
;
6490 if (entry
->is_sub_map
) {
6491 vm_map_offset_t sub_start
;
6492 vm_map_offset_t sub_end
;
6493 vm_map_offset_t local_start
;
6494 vm_map_offset_t local_end
;
6497 if (wire_and_extract
) {
6499 * Wiring would result in copy-on-write
6500 * which would not be compatible with
6501 * the sharing we have with the original
6502 * provider of this memory.
6504 rc
= KERN_INVALID_ARGUMENT
;
6508 vm_map_clip_start(map
, entry
, s
);
6509 vm_map_clip_end(map
, entry
, end
);
6511 sub_start
= VME_OFFSET(entry
);
6512 sub_end
= entry
->vme_end
;
6513 sub_end
+= VME_OFFSET(entry
) - entry
->vme_start
;
6515 local_end
= entry
->vme_end
;
6516 if (map_pmap
== NULL
) {
6518 vm_object_offset_t offset
;
6521 vm_map_entry_t local_entry
;
6522 vm_map_version_t version
;
6523 vm_map_t lookup_map
;
6525 if (entry
->use_pmap
) {
6526 pmap
= VME_SUBMAP(entry
)->pmap
;
6527 /* ppc implementation requires that */
6528 /* submaps pmap address ranges line */
6529 /* up with parent map */
6531 pmap_addr
= sub_start
;
6539 if (entry
->wired_count
) {
6540 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
6545 * The map was not unlocked:
6546 * no need to goto re-lookup.
6547 * Just go directly to next entry.
6549 entry
= entry
->vme_next
;
6550 s
= entry
->vme_start
;
6554 /* call vm_map_lookup_locked to */
6555 /* cause any needs copy to be */
6557 local_start
= entry
->vme_start
;
6559 vm_map_lock_write_to_read(map
);
6560 rc
= vm_map_lookup_locked(
6561 &lookup_map
, local_start
,
6562 (access_type
| extra_prots
),
6563 OBJECT_LOCK_EXCLUSIVE
,
6565 &offset
, &prot
, &wired
,
6568 if (rc
!= KERN_SUCCESS
) {
6569 vm_map_unlock_read(lookup_map
);
6570 assert(map_pmap
== NULL
);
6571 vm_map_unwire(map
, start
,
6575 vm_object_unlock(object
);
6576 if (real_map
!= lookup_map
) {
6577 vm_map_unlock(real_map
);
6579 vm_map_unlock_read(lookup_map
);
6582 /* we unlocked, so must re-lookup */
6583 if (!vm_map_lookup_entry(map
,
6591 * entry could have been "simplified",
6594 entry
= local_entry
;
6595 assert(s
== local_start
);
6596 vm_map_clip_start(map
, entry
, s
);
6597 vm_map_clip_end(map
, entry
, end
);
6598 /* re-compute "e" */
6604 /* did we have a change of type? */
6605 if (!entry
->is_sub_map
) {
6606 last_timestamp
= map
->timestamp
;
6610 local_start
= entry
->vme_start
;
6614 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
6618 entry
->in_transition
= TRUE
;
6621 rc
= vm_map_wire_nested(VME_SUBMAP(entry
),
6624 user_wire
, pmap
, pmap_addr
,
6629 * Find the entry again. It could have been clipped
6630 * after we unlocked the map.
6632 if (!vm_map_lookup_entry(map
, local_start
,
6634 panic("vm_map_wire: re-lookup failed");
6636 entry
= first_entry
;
6638 assert(local_start
== s
);
6639 /* re-compute "e" */
6645 last_timestamp
= map
->timestamp
;
6646 while ((entry
!= vm_map_to_entry(map
)) &&
6647 (entry
->vme_start
< e
)) {
6648 assert(entry
->in_transition
);
6649 entry
->in_transition
= FALSE
;
6650 if (entry
->needs_wakeup
) {
6651 entry
->needs_wakeup
= FALSE
;
6654 if (rc
!= KERN_SUCCESS
) {/* from vm_*_wire */
6655 subtract_wire_counts(map
, entry
, user_wire
);
6657 entry
= entry
->vme_next
;
6659 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
6663 /* no need to relookup again */
6664 s
= entry
->vme_start
;
6669 * If this entry is already wired then increment
6670 * the appropriate wire reference count.
6672 if (entry
->wired_count
) {
6673 if ((entry
->protection
& access_type
) != access_type
) {
6674 /* found a protection problem */
6678 * We should always return an error
6679 * in this case but since we didn't
6680 * enforce it before, let's do
6681 * it only for the new "wire_and_extract"
6682 * code path for now...
6684 if (wire_and_extract
) {
6685 rc
= KERN_PROTECTION_FAILURE
;
6691 * entry is already wired down, get our reference
6692 * after clipping to our range.
6694 vm_map_clip_start(map
, entry
, s
);
6695 vm_map_clip_end(map
, entry
, end
);
6697 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
6701 if (wire_and_extract
) {
6703 vm_object_offset_t offset
;
6707 * We don't have to "wire" the page again
6708 * bit we still have to "extract" its
6709 * physical page number, after some sanity
6712 assert((entry
->vme_end
- entry
->vme_start
)
6714 assert(!entry
->needs_copy
);
6715 assert(!entry
->is_sub_map
);
6716 assert(VME_OBJECT(entry
));
6717 if (((entry
->vme_end
- entry
->vme_start
)
6719 entry
->needs_copy
||
6720 entry
->is_sub_map
||
6721 VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
6722 rc
= KERN_INVALID_ARGUMENT
;
6726 object
= VME_OBJECT(entry
);
6727 offset
= VME_OFFSET(entry
);
6728 /* need exclusive lock to update m->dirty */
6729 if (entry
->protection
& VM_PROT_WRITE
) {
6730 vm_object_lock(object
);
6732 vm_object_lock_shared(object
);
6734 m
= vm_page_lookup(object
, offset
);
6735 assert(m
!= VM_PAGE_NULL
);
6736 assert(VM_PAGE_WIRED(m
));
6737 if (m
!= VM_PAGE_NULL
&& VM_PAGE_WIRED(m
)) {
6738 *physpage_p
= VM_PAGE_GET_PHYS_PAGE(m
);
6739 if (entry
->protection
& VM_PROT_WRITE
) {
6740 vm_object_lock_assert_exclusive(
6742 m
->vmp_dirty
= TRUE
;
6745 /* not already wired !? */
6748 vm_object_unlock(object
);
6751 /* map was not unlocked: no need to relookup */
6752 entry
= entry
->vme_next
;
6753 s
= entry
->vme_start
;
6758 * Unwired entry or wire request transmitted via submap
6762 * Wiring would copy the pages to the shadow object.
6763 * The shadow object would not be code-signed so
6764 * attempting to execute code from these copied pages
6765 * would trigger a code-signing violation.
6768 if ((entry
->protection
& VM_PROT_EXECUTE
)
6769 #if XNU_TARGET_OS_OSX
6771 map
->pmap
!= kernel_pmap
&&
6772 (vm_map_cs_enforcement(map
)
6774 || !VM_MAP_IS_EXOTIC(map
)
6775 #endif /* __arm64__ */
6777 #endif /* XNU_TARGET_OS_OSX */
6780 printf("pid %d[%s] wiring executable range from "
6781 "0x%llx to 0x%llx: rejected to preserve "
6784 (current_task()->bsd_info
6785 ? proc_name_address(current_task()->bsd_info
)
6787 (uint64_t) entry
->vme_start
,
6788 (uint64_t) entry
->vme_end
);
6789 #endif /* MACH_ASSERT */
6790 DTRACE_VM2(cs_executable_wire
,
6791 uint64_t, (uint64_t)entry
->vme_start
,
6792 uint64_t, (uint64_t)entry
->vme_end
);
6793 cs_executable_wire
++;
6794 rc
= KERN_PROTECTION_FAILURE
;
6799 * Perform actions of vm_map_lookup that need the write
6800 * lock on the map: create a shadow object for a
6801 * copy-on-write region, or an object for a zero-fill
6804 size
= entry
->vme_end
- entry
->vme_start
;
6806 * If wiring a copy-on-write page, we need to copy it now
6807 * even if we're only (currently) requesting read access.
6808 * This is aggressive, but once it's wired we can't move it.
6810 if (entry
->needs_copy
) {
6811 if (wire_and_extract
) {
6813 * We're supposed to share with the original
6814 * provider so should not be "needs_copy"
6816 rc
= KERN_INVALID_ARGUMENT
;
6820 VME_OBJECT_SHADOW(entry
, size
);
6821 entry
->needs_copy
= FALSE
;
6822 } else if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
6823 if (wire_and_extract
) {
6825 * We're supposed to share with the original
6826 * provider so should already have an object.
6828 rc
= KERN_INVALID_ARGUMENT
;
6831 VME_OBJECT_SET(entry
, vm_object_allocate(size
));
6832 VME_OFFSET_SET(entry
, (vm_object_offset_t
)0);
6833 assert(entry
->use_pmap
);
6836 vm_map_clip_start(map
, entry
, s
);
6837 vm_map_clip_end(map
, entry
, end
);
6839 /* re-compute "e" */
6846 * Check for holes and protection mismatch.
6847 * Holes: Next entry should be contiguous unless this
6848 * is the end of the region.
6849 * Protection: Access requested must be allowed, unless
6850 * wiring is by protection class
6852 if ((entry
->vme_end
< end
) &&
6853 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
6854 (entry
->vme_next
->vme_start
> entry
->vme_end
))) {
6856 rc
= KERN_INVALID_ADDRESS
;
6859 if ((entry
->protection
& access_type
) != access_type
) {
6860 /* found a protection problem */
6861 rc
= KERN_PROTECTION_FAILURE
;
6865 assert(entry
->wired_count
== 0 && entry
->user_wired_count
== 0);
6867 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
6871 entry
->in_transition
= TRUE
;
6874 * This entry might get split once we unlock the map.
6875 * In vm_fault_wire(), we need the current range as
6876 * defined by this entry. In order for this to work
6877 * along with a simultaneous clip operation, we make a
6878 * temporary copy of this entry and use that for the
6879 * wiring. Note that the underlying objects do not
6880 * change during a clip.
6885 * The in_transition state guarentees that the entry
6886 * (or entries for this range, if split occured) will be
6887 * there when the map lock is acquired for the second time.
6891 if (!user_wire
&& cur_thread
!= THREAD_NULL
) {
6892 interruptible_state
= thread_interrupt_level(THREAD_UNINT
);
6894 interruptible_state
= THREAD_UNINT
;
6898 rc
= vm_fault_wire(map
,
6899 &tmp_entry
, caller_prot
, tag
, map_pmap
, pmap_addr
,
6902 rc
= vm_fault_wire(map
,
6903 &tmp_entry
, caller_prot
, tag
, map
->pmap
,
6904 tmp_entry
.vme_start
,
6908 if (!user_wire
&& cur_thread
!= THREAD_NULL
) {
6909 thread_interrupt_level(interruptible_state
);
6914 if (last_timestamp
+ 1 != map
->timestamp
) {
6916 * Find the entry again. It could have been clipped
6917 * after we unlocked the map.
6919 if (!vm_map_lookup_entry(map
, tmp_entry
.vme_start
,
6921 panic("vm_map_wire: re-lookup failed");
6924 entry
= first_entry
;
6927 last_timestamp
= map
->timestamp
;
6929 while ((entry
!= vm_map_to_entry(map
)) &&
6930 (entry
->vme_start
< tmp_entry
.vme_end
)) {
6931 assert(entry
->in_transition
);
6932 entry
->in_transition
= FALSE
;
6933 if (entry
->needs_wakeup
) {
6934 entry
->needs_wakeup
= FALSE
;
6937 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
6938 subtract_wire_counts(map
, entry
, user_wire
);
6940 entry
= entry
->vme_next
;
6943 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
6947 if ((entry
!= vm_map_to_entry(map
)) && /* we still have entries in the map */
6948 (tmp_entry
.vme_end
!= end
) && /* AND, we are not at the end of the requested range */
6949 (entry
->vme_start
!= tmp_entry
.vme_end
)) { /* AND, the next entry is not contiguous. */
6950 /* found a "new" hole */
6951 s
= tmp_entry
.vme_end
;
6952 rc
= KERN_INVALID_ADDRESS
;
6956 s
= entry
->vme_start
;
6957 } /* end while loop through map entries */
6960 if (rc
== KERN_SUCCESS
) {
6961 /* repair any damage we may have made to the VM map */
6962 vm_map_simplify_range(map
, start
, end
);
6968 * wake up anybody waiting on entries we wired.
6971 vm_map_entry_wakeup(map
);
6974 if (rc
!= KERN_SUCCESS
) {
6975 /* undo what has been wired so far */
6976 vm_map_unwire_nested(map
, start
, s
, user_wire
,
6977 map_pmap
, pmap_addr
);
6987 vm_map_wire_external(
6989 vm_map_offset_t start
,
6990 vm_map_offset_t end
,
6991 vm_prot_t caller_prot
,
6992 boolean_t user_wire
)
6996 kret
= vm_map_wire_nested(map
, start
, end
, caller_prot
, vm_tag_bt(),
6997 user_wire
, (pmap_t
)NULL
, 0, NULL
);
7004 vm_map_offset_t start
,
7005 vm_map_offset_t end
,
7006 vm_prot_t caller_prot
,
7008 boolean_t user_wire
)
7012 kret
= vm_map_wire_nested(map
, start
, end
, caller_prot
, tag
,
7013 user_wire
, (pmap_t
)NULL
, 0, NULL
);
7018 vm_map_wire_and_extract_external(
7020 vm_map_offset_t start
,
7021 vm_prot_t caller_prot
,
7022 boolean_t user_wire
,
7023 ppnum_t
*physpage_p
)
7027 kret
= vm_map_wire_nested(map
,
7029 start
+ VM_MAP_PAGE_SIZE(map
),
7036 if (kret
!= KERN_SUCCESS
&&
7037 physpage_p
!= NULL
) {
7044 vm_map_wire_and_extract_kernel(
7046 vm_map_offset_t start
,
7047 vm_prot_t caller_prot
,
7049 boolean_t user_wire
,
7050 ppnum_t
*physpage_p
)
7054 kret
= vm_map_wire_nested(map
,
7056 start
+ VM_MAP_PAGE_SIZE(map
),
7063 if (kret
!= KERN_SUCCESS
&&
7064 physpage_p
!= NULL
) {
7073 * Sets the pageability of the specified address range in the target
7074 * as pageable. Regions specified must have been wired previously.
7076 * The map must not be locked, but a reference must remain to the map
7077 * throughout the call.
7079 * Kernel will panic on failures. User unwire ignores holes and
7080 * unwired and intransition entries to avoid losing memory by leaving
7083 static kern_return_t
7084 vm_map_unwire_nested(
7086 vm_map_offset_t start
,
7087 vm_map_offset_t end
,
7088 boolean_t user_wire
,
7090 vm_map_offset_t pmap_addr
)
7092 vm_map_entry_t entry
;
7093 struct vm_map_entry
*first_entry
, tmp_entry
;
7094 boolean_t need_wakeup
;
7095 boolean_t main_map
= FALSE
;
7096 unsigned int last_timestamp
;
7099 if (map_pmap
== NULL
) {
7102 last_timestamp
= map
->timestamp
;
7104 VM_MAP_RANGE_CHECK(map
, start
, end
);
7105 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
7106 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
7109 /* We unwired what the caller asked for: zero pages */
7111 return KERN_SUCCESS
;
7114 if (vm_map_lookup_entry(map
, start
, &first_entry
)) {
7115 entry
= first_entry
;
7117 * vm_map_clip_start will be done later.
7118 * We don't want to unnest any nested sub maps here !
7122 panic("vm_map_unwire: start not found");
7124 /* Start address is not in map. */
7126 return KERN_INVALID_ADDRESS
;
7129 if (entry
->superpage_size
) {
7130 /* superpages are always wired */
7132 return KERN_INVALID_ADDRESS
;
7135 need_wakeup
= FALSE
;
7136 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
7137 if (entry
->in_transition
) {
7140 * Another thread is wiring down this entry. Note
7141 * that if it is not for the other thread we would
7142 * be unwiring an unwired entry. This is not
7143 * permitted. If we wait, we will be unwiring memory
7147 * Another thread is unwiring this entry. We did not
7148 * have a reference to it, because if we did, this
7149 * entry will not be getting unwired now.
7154 * This could happen: there could be some
7155 * overlapping vslock/vsunlock operations
7157 * We should probably just wait and retry,
7158 * but then we have to be careful that this
7159 * entry could get "simplified" after
7160 * "in_transition" gets unset and before
7161 * we re-lookup the entry, so we would
7162 * have to re-clip the entry to avoid
7163 * re-unwiring what we have already unwired...
7164 * See vm_map_wire_nested().
7166 * Or we could just ignore "in_transition"
7167 * here and proceed to decement the wired
7168 * count(s) on this entry. That should be fine
7169 * as long as "wired_count" doesn't drop all
7170 * the way to 0 (and we should panic if THAT
7173 panic("vm_map_unwire: in_transition entry");
7176 entry
= entry
->vme_next
;
7180 if (entry
->is_sub_map
) {
7181 vm_map_offset_t sub_start
;
7182 vm_map_offset_t sub_end
;
7183 vm_map_offset_t local_end
;
7186 vm_map_clip_start(map
, entry
, start
);
7187 vm_map_clip_end(map
, entry
, end
);
7189 sub_start
= VME_OFFSET(entry
);
7190 sub_end
= entry
->vme_end
- entry
->vme_start
;
7191 sub_end
+= VME_OFFSET(entry
);
7192 local_end
= entry
->vme_end
;
7193 if (map_pmap
== NULL
) {
7194 if (entry
->use_pmap
) {
7195 pmap
= VME_SUBMAP(entry
)->pmap
;
7196 pmap_addr
= sub_start
;
7201 if (entry
->wired_count
== 0 ||
7202 (user_wire
&& entry
->user_wired_count
== 0)) {
7204 panic("vm_map_unwire: entry is unwired");
7206 entry
= entry
->vme_next
;
7212 * Holes: Next entry should be contiguous unless
7213 * this is the end of the region.
7215 if (((entry
->vme_end
< end
) &&
7216 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
7217 (entry
->vme_next
->vme_start
7218 > entry
->vme_end
)))) {
7220 panic("vm_map_unwire: non-contiguous region");
7223 * entry = entry->vme_next;
7228 subtract_wire_counts(map
, entry
, user_wire
);
7230 if (entry
->wired_count
!= 0) {
7231 entry
= entry
->vme_next
;
7235 entry
->in_transition
= TRUE
;
7236 tmp_entry
= *entry
;/* see comment in vm_map_wire() */
7239 * We can unlock the map now. The in_transition state
7240 * guarantees existance of the entry.
7243 vm_map_unwire_nested(VME_SUBMAP(entry
),
7244 sub_start
, sub_end
, user_wire
, pmap
, pmap_addr
);
7247 if (last_timestamp
+ 1 != map
->timestamp
) {
7249 * Find the entry again. It could have been
7250 * clipped or deleted after we unlocked the map.
7252 if (!vm_map_lookup_entry(map
,
7253 tmp_entry
.vme_start
,
7256 panic("vm_map_unwire: re-lookup failed");
7258 entry
= first_entry
->vme_next
;
7260 entry
= first_entry
;
7263 last_timestamp
= map
->timestamp
;
7266 * clear transition bit for all constituent entries
7267 * that were in the original entry (saved in
7268 * tmp_entry). Also check for waiters.
7270 while ((entry
!= vm_map_to_entry(map
)) &&
7271 (entry
->vme_start
< tmp_entry
.vme_end
)) {
7272 assert(entry
->in_transition
);
7273 entry
->in_transition
= FALSE
;
7274 if (entry
->needs_wakeup
) {
7275 entry
->needs_wakeup
= FALSE
;
7278 entry
= entry
->vme_next
;
7283 vm_map_unwire_nested(VME_SUBMAP(entry
),
7284 sub_start
, sub_end
, user_wire
, map_pmap
,
7288 if (last_timestamp
+ 1 != map
->timestamp
) {
7290 * Find the entry again. It could have been
7291 * clipped or deleted after we unlocked the map.
7293 if (!vm_map_lookup_entry(map
,
7294 tmp_entry
.vme_start
,
7297 panic("vm_map_unwire: re-lookup failed");
7299 entry
= first_entry
->vme_next
;
7301 entry
= first_entry
;
7304 last_timestamp
= map
->timestamp
;
7309 if ((entry
->wired_count
== 0) ||
7310 (user_wire
&& entry
->user_wired_count
== 0)) {
7312 panic("vm_map_unwire: entry is unwired");
7315 entry
= entry
->vme_next
;
7319 assert(entry
->wired_count
> 0 &&
7320 (!user_wire
|| entry
->user_wired_count
> 0));
7322 vm_map_clip_start(map
, entry
, start
);
7323 vm_map_clip_end(map
, entry
, end
);
7327 * Holes: Next entry should be contiguous unless
7328 * this is the end of the region.
7330 if (((entry
->vme_end
< end
) &&
7331 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
7332 (entry
->vme_next
->vme_start
> entry
->vme_end
)))) {
7334 panic("vm_map_unwire: non-contiguous region");
7336 entry
= entry
->vme_next
;
7340 subtract_wire_counts(map
, entry
, user_wire
);
7342 if (entry
->wired_count
!= 0) {
7343 entry
= entry
->vme_next
;
7347 if (entry
->zero_wired_pages
) {
7348 entry
->zero_wired_pages
= FALSE
;
7351 entry
->in_transition
= TRUE
;
7352 tmp_entry
= *entry
; /* see comment in vm_map_wire() */
7355 * We can unlock the map now. The in_transition state
7356 * guarantees existance of the entry.
7360 vm_fault_unwire(map
,
7361 &tmp_entry
, FALSE
, map_pmap
, pmap_addr
);
7363 vm_fault_unwire(map
,
7364 &tmp_entry
, FALSE
, map
->pmap
,
7365 tmp_entry
.vme_start
);
7369 if (last_timestamp
+ 1 != map
->timestamp
) {
7371 * Find the entry again. It could have been clipped
7372 * or deleted after we unlocked the map.
7374 if (!vm_map_lookup_entry(map
, tmp_entry
.vme_start
,
7377 panic("vm_map_unwire: re-lookup failed");
7379 entry
= first_entry
->vme_next
;
7381 entry
= first_entry
;
7384 last_timestamp
= map
->timestamp
;
7387 * clear transition bit for all constituent entries that
7388 * were in the original entry (saved in tmp_entry). Also
7389 * check for waiters.
7391 while ((entry
!= vm_map_to_entry(map
)) &&
7392 (entry
->vme_start
< tmp_entry
.vme_end
)) {
7393 assert(entry
->in_transition
);
7394 entry
->in_transition
= FALSE
;
7395 if (entry
->needs_wakeup
) {
7396 entry
->needs_wakeup
= FALSE
;
7399 entry
= entry
->vme_next
;
7404 * We might have fragmented the address space when we wired this
7405 * range of addresses. Attempt to re-coalesce these VM map entries
7406 * with their neighbors now that they're no longer wired.
7407 * Under some circumstances, address space fragmentation can
7408 * prevent VM object shadow chain collapsing, which can cause
7411 vm_map_simplify_range(map
, start
, end
);
7415 * wake up anybody waiting on entries that we have unwired.
7418 vm_map_entry_wakeup(map
);
7420 return KERN_SUCCESS
;
7426 vm_map_offset_t start
,
7427 vm_map_offset_t end
,
7428 boolean_t user_wire
)
7430 return vm_map_unwire_nested(map
, start
, end
,
7431 user_wire
, (pmap_t
)NULL
, 0);
7436 * vm_map_entry_delete: [ internal use only ]
7438 * Deallocate the given entry from the target map.
7441 vm_map_entry_delete(
7443 vm_map_entry_t entry
)
7445 vm_map_offset_t s
, e
;
7449 s
= entry
->vme_start
;
7451 assert(VM_MAP_PAGE_ALIGNED(s
, FOURK_PAGE_MASK
));
7452 assert(VM_MAP_PAGE_ALIGNED(e
, FOURK_PAGE_MASK
));
7453 if (VM_MAP_PAGE_MASK(map
) >= PAGE_MASK
) {
7454 assert(page_aligned(s
));
7455 assert(page_aligned(e
));
7457 if (entry
->map_aligned
== TRUE
) {
7458 assert(VM_MAP_PAGE_ALIGNED(s
, VM_MAP_PAGE_MASK(map
)));
7459 assert(VM_MAP_PAGE_ALIGNED(e
, VM_MAP_PAGE_MASK(map
)));
7461 assert(entry
->wired_count
== 0);
7462 assert(entry
->user_wired_count
== 0);
7463 assert(!entry
->permanent
);
7465 if (entry
->is_sub_map
) {
7467 submap
= VME_SUBMAP(entry
);
7470 object
= VME_OBJECT(entry
);
7473 vm_map_store_entry_unlink(map
, entry
);
7476 vm_map_entry_dispose(map
, entry
);
7480 * Deallocate the object only after removing all
7481 * pmap entries pointing to its pages.
7484 vm_map_deallocate(submap
);
7486 vm_object_deallocate(object
);
7491 vm_map_submap_pmap_clean(
7493 vm_map_offset_t start
,
7494 vm_map_offset_t end
,
7496 vm_map_offset_t offset
)
7498 vm_map_offset_t submap_start
;
7499 vm_map_offset_t submap_end
;
7500 vm_map_size_t remove_size
;
7501 vm_map_entry_t entry
;
7503 submap_end
= offset
+ (end
- start
);
7504 submap_start
= offset
;
7506 vm_map_lock_read(sub_map
);
7507 if (vm_map_lookup_entry(sub_map
, offset
, &entry
)) {
7508 remove_size
= (entry
->vme_end
- entry
->vme_start
);
7509 if (offset
> entry
->vme_start
) {
7510 remove_size
-= offset
- entry
->vme_start
;
7514 if (submap_end
< entry
->vme_end
) {
7516 entry
->vme_end
- submap_end
;
7518 if (entry
->is_sub_map
) {
7519 vm_map_submap_pmap_clean(
7522 start
+ remove_size
,
7526 if (map
->mapped_in_other_pmaps
&&
7527 os_ref_get_count(&map
->map_refcnt
) != 0 &&
7528 VME_OBJECT(entry
) != NULL
) {
7529 vm_object_pmap_protect_options(
7531 (VME_OFFSET(entry
) +
7539 PMAP_OPTIONS_REMOVE
);
7541 pmap_remove(map
->pmap
,
7543 (addr64_t
)(start
+ remove_size
));
7548 entry
= entry
->vme_next
;
7550 while ((entry
!= vm_map_to_entry(sub_map
))
7551 && (entry
->vme_start
< submap_end
)) {
7552 remove_size
= (entry
->vme_end
- entry
->vme_start
);
7553 if (submap_end
< entry
->vme_end
) {
7554 remove_size
-= entry
->vme_end
- submap_end
;
7556 if (entry
->is_sub_map
) {
7557 vm_map_submap_pmap_clean(
7559 (start
+ entry
->vme_start
) - offset
,
7560 ((start
+ entry
->vme_start
) - offset
) + remove_size
,
7564 if (map
->mapped_in_other_pmaps
&&
7565 os_ref_get_count(&map
->map_refcnt
) != 0 &&
7566 VME_OBJECT(entry
) != NULL
) {
7567 vm_object_pmap_protect_options(
7575 PMAP_OPTIONS_REMOVE
);
7577 pmap_remove(map
->pmap
,
7578 (addr64_t
)((start
+ entry
->vme_start
)
7580 (addr64_t
)(((start
+ entry
->vme_start
)
7581 - offset
) + remove_size
));
7584 entry
= entry
->vme_next
;
7586 vm_map_unlock_read(sub_map
);
7591 * virt_memory_guard_ast:
7593 * Handle the AST callout for a virtual memory guard.
7594 * raise an EXC_GUARD exception and terminate the task
7595 * if configured to do so.
7598 virt_memory_guard_ast(
7600 mach_exception_data_type_t code
,
7601 mach_exception_data_type_t subcode
)
7603 task_t task
= thread
->task
;
7604 assert(task
!= kernel_task
);
7605 assert(task
== current_task());
7608 behavior
= task
->task_exc_guard
;
7610 /* Is delivery enabled */
7611 if ((behavior
& TASK_EXC_GUARD_VM_DELIVER
) == 0) {
7615 /* If only once, make sure we're that once */
7616 while (behavior
& TASK_EXC_GUARD_VM_ONCE
) {
7617 uint32_t new_behavior
= behavior
& ~TASK_EXC_GUARD_VM_DELIVER
;
7619 if (OSCompareAndSwap(behavior
, new_behavior
, &task
->task_exc_guard
)) {
7622 behavior
= task
->task_exc_guard
;
7623 if ((behavior
& TASK_EXC_GUARD_VM_DELIVER
) == 0) {
7628 /* Raise exception via corpse fork or synchronously */
7629 if ((task
->task_exc_guard
& TASK_EXC_GUARD_VM_CORPSE
) &&
7630 (task
->task_exc_guard
& TASK_EXC_GUARD_VM_FATAL
) == 0) {
7631 task_violated_guard(code
, subcode
, NULL
);
7633 task_exception_notify(EXC_GUARD
, code
, subcode
);
7636 /* Terminate the task if desired */
7637 if (task
->task_exc_guard
& TASK_EXC_GUARD_VM_FATAL
) {
7638 task_bsdtask_kill(current_task());
7643 * vm_map_guard_exception:
7645 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7647 * Right now, we do this when we find nothing mapped, or a
7648 * gap in the mapping when a user address space deallocate
7649 * was requested. We report the address of the first gap found.
7652 vm_map_guard_exception(
7653 vm_map_offset_t gap_start
,
7656 mach_exception_code_t code
= 0;
7657 unsigned int guard_type
= GUARD_TYPE_VIRT_MEMORY
;
7658 unsigned int target
= 0; /* should we pass in pid associated with map? */
7659 mach_exception_data_type_t subcode
= (uint64_t)gap_start
;
7660 boolean_t fatal
= FALSE
;
7662 task_t task
= current_task();
7664 /* Can't deliver exceptions to kernel task */
7665 if (task
== kernel_task
) {
7669 EXC_GUARD_ENCODE_TYPE(code
, guard_type
);
7670 EXC_GUARD_ENCODE_FLAVOR(code
, reason
);
7671 EXC_GUARD_ENCODE_TARGET(code
, target
);
7673 if (task
->task_exc_guard
& TASK_EXC_GUARD_VM_FATAL
) {
7676 thread_guard_violation(current_thread(), code
, subcode
, fatal
);
7680 * vm_map_delete: [ internal use only ]
7682 * Deallocates the given address range from the target map.
7683 * Removes all user wirings. Unwires one kernel wiring if
7684 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7685 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7686 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7688 * This routine is called with map locked and leaves map locked.
7690 static kern_return_t
7693 vm_map_offset_t start
,
7694 vm_map_offset_t end
,
7698 vm_map_entry_t entry
, next
;
7699 struct vm_map_entry
*first_entry
, tmp_entry
;
7702 boolean_t need_wakeup
;
7703 unsigned int last_timestamp
= ~0; /* unlikely value */
7705 vm_map_offset_t gap_start
;
7706 __unused vm_map_offset_t save_start
= start
;
7707 __unused vm_map_offset_t save_end
= end
;
7708 const vm_map_offset_t FIND_GAP
= 1; /* a not page aligned value */
7709 const vm_map_offset_t GAPS_OK
= 2; /* a different not page aligned value */
7711 if (map
!= kernel_map
&& !(flags
& VM_MAP_REMOVE_GAPS_OK
) && !map
->terminated
) {
7712 gap_start
= FIND_GAP
;
7714 gap_start
= GAPS_OK
;
7717 interruptible
= (flags
& VM_MAP_REMOVE_INTERRUPTIBLE
) ?
7718 THREAD_ABORTSAFE
: THREAD_UNINT
;
7721 * All our DMA I/O operations in IOKit are currently done by
7722 * wiring through the map entries of the task requesting the I/O.
7723 * Because of this, we must always wait for kernel wirings
7724 * to go away on the entries before deleting them.
7726 * Any caller who wants to actually remove a kernel wiring
7727 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7728 * properly remove one wiring instead of blasting through
7731 flags
|= VM_MAP_REMOVE_WAIT_FOR_KWIRE
;
7735 * Find the start of the region, and clip it
7737 if (vm_map_lookup_entry(map
, start
, &first_entry
)) {
7738 entry
= first_entry
;
7739 if (map
== kalloc_map
&&
7740 (entry
->vme_start
!= start
||
7741 entry
->vme_end
!= end
)) {
7742 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7743 "mismatched entry %p [0x%llx:0x%llx]\n",
7748 (uint64_t)entry
->vme_start
,
7749 (uint64_t)entry
->vme_end
);
7753 * If in a superpage, extend the range to include the start of the mapping.
7755 if (entry
->superpage_size
&& (start
& ~SUPERPAGE_MASK
)) {
7756 start
= SUPERPAGE_ROUND_DOWN(start
);
7760 if (start
== entry
->vme_start
) {
7762 * No need to clip. We don't want to cause
7763 * any unnecessary unnesting in this case...
7766 if ((flags
& VM_MAP_REMOVE_NO_MAP_ALIGN
) &&
7767 entry
->map_aligned
&&
7768 !VM_MAP_PAGE_ALIGNED(
7770 VM_MAP_PAGE_MASK(map
))) {
7772 * The entry will no longer be
7773 * map-aligned after clipping
7774 * and the caller said it's OK.
7776 entry
->map_aligned
= FALSE
;
7778 if (map
== kalloc_map
) {
7779 panic("vm_map_delete(%p,0x%llx,0x%llx):"
7780 " clipping %p at 0x%llx\n",
7787 vm_map_clip_start(map
, entry
, start
);
7791 * Fix the lookup hint now, rather than each
7792 * time through the loop.
7794 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
7796 if (map
->pmap
== kernel_pmap
&&
7797 os_ref_get_count(&map
->map_refcnt
) != 0) {
7798 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7799 "no map entry at 0x%llx\n",
7805 entry
= first_entry
->vme_next
;
7806 if (gap_start
== FIND_GAP
) {
7812 if (entry
->superpage_size
) {
7813 end
= SUPERPAGE_ROUND_UP(end
);
7816 need_wakeup
= FALSE
;
7818 * Step through all entries in this region
7820 s
= entry
->vme_start
;
7821 while ((entry
!= vm_map_to_entry(map
)) && (s
< end
)) {
7823 * At this point, we have deleted all the memory entries
7824 * between "start" and "s". We still need to delete
7825 * all memory entries between "s" and "end".
7826 * While we were blocked and the map was unlocked, some
7827 * new memory entries could have been re-allocated between
7828 * "start" and "s" and we don't want to mess with those.
7829 * Some of those entries could even have been re-assembled
7830 * with an entry after "s" (in vm_map_simplify_entry()), so
7831 * we may have to vm_map_clip_start() again.
7834 if (entry
->vme_start
>= s
) {
7836 * This entry starts on or after "s"
7837 * so no need to clip its start.
7841 * This entry has been re-assembled by a
7842 * vm_map_simplify_entry(). We need to
7843 * re-clip its start.
7845 if ((flags
& VM_MAP_REMOVE_NO_MAP_ALIGN
) &&
7846 entry
->map_aligned
&&
7847 !VM_MAP_PAGE_ALIGNED(s
,
7848 VM_MAP_PAGE_MASK(map
))) {
7850 * The entry will no longer be map-aligned
7851 * after clipping and the caller said it's OK.
7853 entry
->map_aligned
= FALSE
;
7855 if (map
== kalloc_map
) {
7856 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7857 "clipping %p at 0x%llx\n",
7864 vm_map_clip_start(map
, entry
, s
);
7866 if (entry
->vme_end
<= end
) {
7868 * This entry is going away completely, so no need
7869 * to clip and possibly cause an unnecessary unnesting.
7872 if ((flags
& VM_MAP_REMOVE_NO_MAP_ALIGN
) &&
7873 entry
->map_aligned
&&
7874 !VM_MAP_PAGE_ALIGNED(end
,
7875 VM_MAP_PAGE_MASK(map
))) {
7877 * The entry will no longer be map-aligned
7878 * after clipping and the caller said it's OK.
7880 entry
->map_aligned
= FALSE
;
7882 if (map
== kalloc_map
) {
7883 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7884 "clipping %p at 0x%llx\n",
7891 vm_map_clip_end(map
, entry
, end
);
7894 if (entry
->permanent
) {
7895 if (map
->pmap
== kernel_pmap
) {
7896 panic("%s(%p,0x%llx,0x%llx): "
7897 "attempt to remove permanent "
7899 "%p [0x%llx:0x%llx]\n",
7905 (uint64_t) entry
->vme_start
,
7906 (uint64_t) entry
->vme_end
);
7907 } else if (flags
& VM_MAP_REMOVE_IMMUTABLE
) {
7908 // printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7909 entry
->permanent
= FALSE
;
7911 if (vm_map_executable_immutable_verbose
) {
7912 printf("%d[%s] %s(0x%llx,0x%llx): "
7913 "permanent entry [0x%llx:0x%llx] "
7916 (current_task()->bsd_info
7917 ? proc_name_address(current_task()->bsd_info
)
7922 (uint64_t)entry
->vme_start
,
7923 (uint64_t)entry
->vme_end
,
7925 entry
->max_protection
);
7928 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7930 DTRACE_VM5(vm_map_delete_permanent
,
7931 vm_map_offset_t
, entry
->vme_start
,
7932 vm_map_offset_t
, entry
->vme_end
,
7933 vm_prot_t
, entry
->protection
,
7934 vm_prot_t
, entry
->max_protection
,
7935 int, VME_ALIAS(entry
));
7940 if (entry
->in_transition
) {
7941 wait_result_t wait_result
;
7944 * Another thread is wiring/unwiring this entry.
7945 * Let the other thread know we are waiting.
7947 assert(s
== entry
->vme_start
);
7948 entry
->needs_wakeup
= TRUE
;
7951 * wake up anybody waiting on entries that we have
7952 * already unwired/deleted.
7955 vm_map_entry_wakeup(map
);
7956 need_wakeup
= FALSE
;
7959 wait_result
= vm_map_entry_wait(map
, interruptible
);
7961 if (interruptible
&&
7962 wait_result
== THREAD_INTERRUPTED
) {
7964 * We do not clear the needs_wakeup flag,
7965 * since we cannot tell if we were the only one.
7967 return KERN_ABORTED
;
7971 * The entry could have been clipped or it
7972 * may not exist anymore. Look it up again.
7974 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
7976 * User: use the next entry
7978 if (gap_start
== FIND_GAP
) {
7981 entry
= first_entry
->vme_next
;
7982 s
= entry
->vme_start
;
7984 entry
= first_entry
;
7985 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
7987 last_timestamp
= map
->timestamp
;
7989 } /* end in_transition */
7991 if (entry
->wired_count
) {
7992 boolean_t user_wire
;
7994 user_wire
= entry
->user_wired_count
> 0;
7997 * Remove a kernel wiring if requested
7999 if (flags
& VM_MAP_REMOVE_KUNWIRE
) {
8000 entry
->wired_count
--;
8004 * Remove all user wirings for proper accounting
8006 if (entry
->user_wired_count
> 0) {
8007 while (entry
->user_wired_count
) {
8008 subtract_wire_counts(map
, entry
, user_wire
);
8012 if (entry
->wired_count
!= 0) {
8013 assert(map
!= kernel_map
);
8015 * Cannot continue. Typical case is when
8016 * a user thread has physical io pending on
8017 * on this page. Either wait for the
8018 * kernel wiring to go away or return an
8021 if (flags
& VM_MAP_REMOVE_WAIT_FOR_KWIRE
) {
8022 wait_result_t wait_result
;
8024 assert(s
== entry
->vme_start
);
8025 entry
->needs_wakeup
= TRUE
;
8026 wait_result
= vm_map_entry_wait(map
,
8029 if (interruptible
&&
8030 wait_result
== THREAD_INTERRUPTED
) {
8032 * We do not clear the
8033 * needs_wakeup flag, since we
8034 * cannot tell if we were the
8037 return KERN_ABORTED
;
8041 * The entry could have been clipped or
8042 * it may not exist anymore. Look it
8045 if (!vm_map_lookup_entry(map
, s
,
8047 assert(map
!= kernel_map
);
8049 * User: use the next entry
8051 if (gap_start
== FIND_GAP
) {
8054 entry
= first_entry
->vme_next
;
8055 s
= entry
->vme_start
;
8057 entry
= first_entry
;
8058 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8060 last_timestamp
= map
->timestamp
;
8063 return KERN_FAILURE
;
8067 entry
->in_transition
= TRUE
;
8069 * copy current entry. see comment in vm_map_wire()
8072 assert(s
== entry
->vme_start
);
8075 * We can unlock the map now. The in_transition
8076 * state guarentees existance of the entry.
8080 if (tmp_entry
.is_sub_map
) {
8082 vm_map_offset_t sub_start
, sub_end
;
8084 vm_map_offset_t pmap_addr
;
8087 sub_map
= VME_SUBMAP(&tmp_entry
);
8088 sub_start
= VME_OFFSET(&tmp_entry
);
8089 sub_end
= sub_start
+ (tmp_entry
.vme_end
-
8090 tmp_entry
.vme_start
);
8091 if (tmp_entry
.use_pmap
) {
8092 pmap
= sub_map
->pmap
;
8093 pmap_addr
= tmp_entry
.vme_start
;
8096 pmap_addr
= tmp_entry
.vme_start
;
8098 (void) vm_map_unwire_nested(sub_map
,
8103 if (VME_OBJECT(&tmp_entry
) == kernel_object
) {
8104 pmap_protect_options(
8106 tmp_entry
.vme_start
,
8109 PMAP_OPTIONS_REMOVE
,
8112 vm_fault_unwire(map
, &tmp_entry
,
8113 VME_OBJECT(&tmp_entry
) == kernel_object
,
8114 map
->pmap
, tmp_entry
.vme_start
);
8119 if (last_timestamp
+ 1 != map
->timestamp
) {
8121 * Find the entry again. It could have
8122 * been clipped after we unlocked the map.
8124 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
8125 assert((map
!= kernel_map
) &&
8126 (!entry
->is_sub_map
));
8127 if (gap_start
== FIND_GAP
) {
8130 first_entry
= first_entry
->vme_next
;
8131 s
= first_entry
->vme_start
;
8133 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8136 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8137 first_entry
= entry
;
8140 last_timestamp
= map
->timestamp
;
8142 entry
= first_entry
;
8143 while ((entry
!= vm_map_to_entry(map
)) &&
8144 (entry
->vme_start
< tmp_entry
.vme_end
)) {
8145 assert(entry
->in_transition
);
8146 entry
->in_transition
= FALSE
;
8147 if (entry
->needs_wakeup
) {
8148 entry
->needs_wakeup
= FALSE
;
8151 entry
= entry
->vme_next
;
8154 * We have unwired the entry(s). Go back and
8157 entry
= first_entry
;
8161 /* entry is unwired */
8162 assert(entry
->wired_count
== 0);
8163 assert(entry
->user_wired_count
== 0);
8165 assert(s
== entry
->vme_start
);
8167 if (flags
& VM_MAP_REMOVE_NO_PMAP_CLEANUP
) {
8169 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8170 * vm_map_delete(), some map entries might have been
8171 * transferred to a "zap_map", which doesn't have a
8172 * pmap. The original pmap has already been flushed
8173 * in the vm_map_delete() call targeting the original
8174 * map, but when we get to destroying the "zap_map",
8175 * we don't have any pmap to flush, so let's just skip
8178 } else if (entry
->is_sub_map
) {
8179 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry
)) >= VM_MAP_PAGE_SHIFT(map
),
8180 "map %p (%d) entry %p submap %p (%d)\n",
8181 map
, VM_MAP_PAGE_SHIFT(map
), entry
,
8183 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry
)));
8184 if (entry
->use_pmap
) {
8185 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry
)) == VM_MAP_PAGE_SHIFT(map
),
8186 "map %p (%d) entry %p submap %p (%d)\n",
8187 map
, VM_MAP_PAGE_SHIFT(map
), entry
,
8189 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry
)));
8190 #ifndef NO_NESTED_PMAP
8193 if (flags
& VM_MAP_REMOVE_NO_UNNESTING
) {
8195 * This is the final cleanup of the
8196 * address space being terminated.
8197 * No new mappings are expected and
8198 * we don't really need to unnest the
8199 * shared region (and lose the "global"
8200 * pmap mappings, if applicable).
8202 * Tell the pmap layer that we're
8203 * "clean" wrt nesting.
8205 pmap_flags
= PMAP_UNNEST_CLEAN
;
8208 * We're unmapping part of the nested
8209 * shared region, so we can't keep the
8214 pmap_unnest_options(
8216 (addr64_t
)entry
->vme_start
,
8217 entry
->vme_end
- entry
->vme_start
,
8219 #endif /* NO_NESTED_PMAP */
8220 if (map
->mapped_in_other_pmaps
&&
8221 os_ref_get_count(&map
->map_refcnt
) != 0) {
8222 /* clean up parent map/maps */
8223 vm_map_submap_pmap_clean(
8224 map
, entry
->vme_start
,
8230 vm_map_submap_pmap_clean(
8231 map
, entry
->vme_start
, entry
->vme_end
,
8235 } else if (VME_OBJECT(entry
) != kernel_object
&&
8236 VME_OBJECT(entry
) != compressor_object
) {
8237 object
= VME_OBJECT(entry
);
8238 if (map
->mapped_in_other_pmaps
&&
8239 os_ref_get_count(&map
->map_refcnt
) != 0) {
8240 vm_object_pmap_protect_options(
8241 object
, VME_OFFSET(entry
),
8242 entry
->vme_end
- entry
->vme_start
,
8247 PMAP_OPTIONS_REMOVE
);
8248 } else if ((VME_OBJECT(entry
) != VM_OBJECT_NULL
) ||
8249 (map
->pmap
== kernel_pmap
)) {
8250 /* Remove translations associated
8251 * with this range unless the entry
8252 * does not have an object, or
8253 * it's the kernel map or a descendant
8254 * since the platform could potentially
8255 * create "backdoor" mappings invisible
8256 * to the VM. It is expected that
8257 * objectless, non-kernel ranges
8258 * do not have such VM invisible
8261 pmap_remove_options(map
->pmap
,
8262 (addr64_t
)entry
->vme_start
,
8263 (addr64_t
)entry
->vme_end
,
8264 PMAP_OPTIONS_REMOVE
);
8268 if (entry
->iokit_acct
) {
8269 /* alternate accounting */
8270 DTRACE_VM4(vm_map_iokit_unmapped_region
,
8272 vm_map_offset_t
, entry
->vme_start
,
8273 vm_map_offset_t
, entry
->vme_end
,
8274 int, VME_ALIAS(entry
));
8275 vm_map_iokit_unmapped_region(map
,
8278 entry
->iokit_acct
= FALSE
;
8279 entry
->use_pmap
= FALSE
;
8283 * All pmap mappings for this map entry must have been
8287 assert(vm_map_pmap_is_empty(map
,
8292 next
= entry
->vme_next
;
8294 if (map
->pmap
== kernel_pmap
&&
8295 os_ref_get_count(&map
->map_refcnt
) != 0 &&
8296 entry
->vme_end
< end
&&
8297 (next
== vm_map_to_entry(map
) ||
8298 next
->vme_start
!= entry
->vme_end
)) {
8299 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8300 "hole after %p at 0x%llx\n",
8305 (uint64_t)entry
->vme_end
);
8309 * If the desired range didn't end with "entry", then there is a gap if
8310 * we wrapped around to the start of the map or if "entry" and "next"
8311 * aren't contiguous.
8313 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8314 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8316 if (gap_start
== FIND_GAP
&&
8317 vm_map_round_page(entry
->vme_end
, VM_MAP_PAGE_MASK(map
)) < end
&&
8318 (next
== vm_map_to_entry(map
) || entry
->vme_end
!= next
->vme_start
)) {
8319 gap_start
= entry
->vme_end
;
8321 s
= next
->vme_start
;
8322 last_timestamp
= map
->timestamp
;
8324 if (entry
->permanent
) {
8326 * A permanent entry can not be removed, so leave it
8327 * in place but remove all access permissions.
8329 entry
->protection
= VM_PROT_NONE
;
8330 entry
->max_protection
= VM_PROT_NONE
;
8331 } else if ((flags
& VM_MAP_REMOVE_SAVE_ENTRIES
) &&
8332 zap_map
!= VM_MAP_NULL
) {
8333 vm_map_size_t entry_size
;
8335 * The caller wants to save the affected VM map entries
8336 * into the "zap_map". The caller will take care of
8339 /* unlink the entry from "map" ... */
8340 vm_map_store_entry_unlink(map
, entry
);
8341 /* ... and add it to the end of the "zap_map" */
8342 vm_map_store_entry_link(zap_map
,
8343 vm_map_last_entry(zap_map
),
8345 VM_MAP_KERNEL_FLAGS_NONE
);
8346 entry_size
= entry
->vme_end
- entry
->vme_start
;
8347 map
->size
-= entry_size
;
8348 zap_map
->size
+= entry_size
;
8349 /* we didn't unlock the map, so no timestamp increase */
8352 vm_map_entry_delete(map
, entry
);
8353 /* vm_map_entry_delete unlocks the map */
8359 if (entry
== vm_map_to_entry(map
)) {
8362 if (last_timestamp
+ 1 != map
->timestamp
) {
8364 * We are responsible for deleting everything
8365 * from the given space. If someone has interfered,
8366 * we pick up where we left off. Back fills should
8367 * be all right for anyone, except map_delete, and
8368 * we have to assume that the task has been fully
8369 * disabled before we get here
8371 if (!vm_map_lookup_entry(map
, s
, &entry
)) {
8372 entry
= entry
->vme_next
;
8375 * Nothing found for s. If we weren't already done, then there is a gap.
8377 if (gap_start
== FIND_GAP
&& s
< end
) {
8380 s
= entry
->vme_start
;
8382 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8385 * others can not only allocate behind us, we can
8386 * also see coalesce while we don't have the map lock
8388 if (entry
== vm_map_to_entry(map
)) {
8392 last_timestamp
= map
->timestamp
;
8395 if (map
->wait_for_space
) {
8396 thread_wakeup((event_t
) map
);
8399 * wake up anybody waiting on entries that we have already deleted.
8402 vm_map_entry_wakeup(map
);
8405 if (gap_start
!= FIND_GAP
&& gap_start
!= GAPS_OK
) {
8406 DTRACE_VM3(kern_vm_deallocate_gap
,
8407 vm_map_offset_t
, gap_start
,
8408 vm_map_offset_t
, save_start
,
8409 vm_map_offset_t
, save_end
);
8410 if (!(flags
& VM_MAP_REMOVE_GAPS_OK
)) {
8411 vm_map_guard_exception(gap_start
, kGUARD_EXC_DEALLOC_GAP
);
8415 return KERN_SUCCESS
;
8422 * Clean out a task's map.
8429 map
->terminated
= TRUE
;
8432 return vm_map_remove(map
,
8438 * + remove immutable mappings
8439 * + allow gaps in range
8441 (VM_MAP_REMOVE_NO_UNNESTING
|
8442 VM_MAP_REMOVE_IMMUTABLE
|
8443 VM_MAP_REMOVE_GAPS_OK
));
8449 * Remove the given address range from the target map.
8450 * This is the exported form of vm_map_delete.
8455 vm_map_offset_t start
,
8456 vm_map_offset_t end
,
8459 kern_return_t result
;
8462 VM_MAP_RANGE_CHECK(map
, start
, end
);
8464 * For the zone maps, the kernel controls the allocation/freeing of memory.
8465 * Any free to the zone maps should be within the bounds of the map and
8466 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8467 * free to the zone maps into a no-op, there is a problem and we should
8470 if ((start
== end
) && zone_maps_owned(start
, 1)) {
8471 panic("Nothing being freed to a zone map. start = end = %p\n", (void *)start
);
8473 result
= vm_map_delete(map
, start
, end
, flags
, VM_MAP_NULL
);
8480 * vm_map_remove_locked:
8482 * Remove the given address range from the target locked map.
8483 * This is the exported form of vm_map_delete.
8486 vm_map_remove_locked(
8488 vm_map_offset_t start
,
8489 vm_map_offset_t end
,
8492 kern_return_t result
;
8494 VM_MAP_RANGE_CHECK(map
, start
, end
);
8495 result
= vm_map_delete(map
, start
, end
, flags
, VM_MAP_NULL
);
8501 * Routine: vm_map_copy_allocate
8504 * Allocates and initializes a map copy object.
8506 static vm_map_copy_t
8507 vm_map_copy_allocate(void)
8509 vm_map_copy_t new_copy
;
8511 new_copy
= zalloc(vm_map_copy_zone
);
8512 bzero(new_copy
, sizeof(*new_copy
));
8513 new_copy
->c_u
.hdr
.rb_head_store
.rbh_root
= (void*)(int)SKIP_RB_TREE
;
8514 vm_map_copy_first_entry(new_copy
) = vm_map_copy_to_entry(new_copy
);
8515 vm_map_copy_last_entry(new_copy
) = vm_map_copy_to_entry(new_copy
);
8520 * Routine: vm_map_copy_discard
8523 * Dispose of a map copy object (returned by
8527 vm_map_copy_discard(
8530 if (copy
== VM_MAP_COPY_NULL
) {
8534 switch (copy
->type
) {
8535 case VM_MAP_COPY_ENTRY_LIST
:
8536 while (vm_map_copy_first_entry(copy
) !=
8537 vm_map_copy_to_entry(copy
)) {
8538 vm_map_entry_t entry
= vm_map_copy_first_entry(copy
);
8540 vm_map_copy_entry_unlink(copy
, entry
);
8541 if (entry
->is_sub_map
) {
8542 vm_map_deallocate(VME_SUBMAP(entry
));
8544 vm_object_deallocate(VME_OBJECT(entry
));
8546 vm_map_copy_entry_dispose(copy
, entry
);
8549 case VM_MAP_COPY_OBJECT
:
8550 vm_object_deallocate(copy
->cpy_object
);
8552 case VM_MAP_COPY_KERNEL_BUFFER
:
8555 * The vm_map_copy_t and possibly the data buffer were
8556 * allocated by a single call to kheap_alloc(), i.e. the
8557 * vm_map_copy_t was not allocated out of the zone.
8559 if (copy
->size
> msg_ool_size_small
|| copy
->offset
) {
8560 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8561 (long long)copy
->size
, (long long)copy
->offset
);
8563 kheap_free(KHEAP_DATA_BUFFERS
, copy
->cpy_kdata
, copy
->size
);
8565 zfree(vm_map_copy_zone
, copy
);
8569 * Routine: vm_map_copy_copy
8572 * Move the information in a map copy object to
8573 * a new map copy object, leaving the old one
8576 * This is used by kernel routines that need
8577 * to look at out-of-line data (in copyin form)
8578 * before deciding whether to return SUCCESS.
8579 * If the routine returns FAILURE, the original
8580 * copy object will be deallocated; therefore,
8581 * these routines must make a copy of the copy
8582 * object and leave the original empty so that
8583 * deallocation will not fail.
8589 vm_map_copy_t new_copy
;
8591 if (copy
== VM_MAP_COPY_NULL
) {
8592 return VM_MAP_COPY_NULL
;
8596 * Allocate a new copy object, and copy the information
8597 * from the old one into it.
8600 new_copy
= (vm_map_copy_t
) zalloc(vm_map_copy_zone
);
8601 memcpy((void *) new_copy
, (void *) copy
, sizeof(struct vm_map_copy
));
8602 #if __has_feature(ptrauth_calls)
8603 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
8604 new_copy
->cpy_kdata
= copy
->cpy_kdata
;
8608 if (copy
->type
== VM_MAP_COPY_ENTRY_LIST
) {
8610 * The links in the entry chain must be
8611 * changed to point to the new copy object.
8613 vm_map_copy_first_entry(copy
)->vme_prev
8614 = vm_map_copy_to_entry(new_copy
);
8615 vm_map_copy_last_entry(copy
)->vme_next
8616 = vm_map_copy_to_entry(new_copy
);
8620 * Change the old copy object into one that contains
8621 * nothing to be deallocated.
8623 copy
->type
= VM_MAP_COPY_OBJECT
;
8624 copy
->cpy_object
= VM_OBJECT_NULL
;
8627 * Return the new object.
8632 static kern_return_t
8633 vm_map_overwrite_submap_recurse(
8635 vm_map_offset_t dst_addr
,
8636 vm_map_size_t dst_size
)
8638 vm_map_offset_t dst_end
;
8639 vm_map_entry_t tmp_entry
;
8640 vm_map_entry_t entry
;
8641 kern_return_t result
;
8642 boolean_t encountered_sub_map
= FALSE
;
8647 * Verify that the destination is all writeable
8648 * initially. We have to trunc the destination
8649 * address and round the copy size or we'll end up
8650 * splitting entries in strange ways.
8653 dst_end
= vm_map_round_page(dst_addr
+ dst_size
,
8654 VM_MAP_PAGE_MASK(dst_map
));
8655 vm_map_lock(dst_map
);
8658 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &tmp_entry
)) {
8659 vm_map_unlock(dst_map
);
8660 return KERN_INVALID_ADDRESS
;
8663 vm_map_clip_start(dst_map
,
8665 vm_map_trunc_page(dst_addr
,
8666 VM_MAP_PAGE_MASK(dst_map
)));
8667 if (tmp_entry
->is_sub_map
) {
8668 /* clipping did unnest if needed */
8669 assert(!tmp_entry
->use_pmap
);
8672 for (entry
= tmp_entry
;;) {
8673 vm_map_entry_t next
;
8675 next
= entry
->vme_next
;
8676 while (entry
->is_sub_map
) {
8677 vm_map_offset_t sub_start
;
8678 vm_map_offset_t sub_end
;
8679 vm_map_offset_t local_end
;
8681 if (entry
->in_transition
) {
8683 * Say that we are waiting, and wait for entry.
8685 entry
->needs_wakeup
= TRUE
;
8686 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8691 encountered_sub_map
= TRUE
;
8692 sub_start
= VME_OFFSET(entry
);
8694 if (entry
->vme_end
< dst_end
) {
8695 sub_end
= entry
->vme_end
;
8699 sub_end
-= entry
->vme_start
;
8700 sub_end
+= VME_OFFSET(entry
);
8701 local_end
= entry
->vme_end
;
8702 vm_map_unlock(dst_map
);
8704 result
= vm_map_overwrite_submap_recurse(
8707 sub_end
- sub_start
);
8709 if (result
!= KERN_SUCCESS
) {
8712 if (dst_end
<= entry
->vme_end
) {
8713 return KERN_SUCCESS
;
8715 vm_map_lock(dst_map
);
8716 if (!vm_map_lookup_entry(dst_map
, local_end
,
8718 vm_map_unlock(dst_map
);
8719 return KERN_INVALID_ADDRESS
;
8722 next
= entry
->vme_next
;
8725 if (!(entry
->protection
& VM_PROT_WRITE
)) {
8726 vm_map_unlock(dst_map
);
8727 return KERN_PROTECTION_FAILURE
;
8731 * If the entry is in transition, we must wait
8732 * for it to exit that state. Anything could happen
8733 * when we unlock the map, so start over.
8735 if (entry
->in_transition
) {
8737 * Say that we are waiting, and wait for entry.
8739 entry
->needs_wakeup
= TRUE
;
8740 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8746 * our range is contained completely within this map entry
8748 if (dst_end
<= entry
->vme_end
) {
8749 vm_map_unlock(dst_map
);
8750 return KERN_SUCCESS
;
8753 * check that range specified is contiguous region
8755 if ((next
== vm_map_to_entry(dst_map
)) ||
8756 (next
->vme_start
!= entry
->vme_end
)) {
8757 vm_map_unlock(dst_map
);
8758 return KERN_INVALID_ADDRESS
;
8762 * Check for permanent objects in the destination.
8764 if ((VME_OBJECT(entry
) != VM_OBJECT_NULL
) &&
8765 ((!VME_OBJECT(entry
)->internal
) ||
8766 (VME_OBJECT(entry
)->true_share
))) {
8767 if (encountered_sub_map
) {
8768 vm_map_unlock(dst_map
);
8769 return KERN_FAILURE
;
8776 vm_map_unlock(dst_map
);
8777 return KERN_SUCCESS
;
8781 * Routine: vm_map_copy_overwrite
8784 * Copy the memory described by the map copy
8785 * object (copy; returned by vm_map_copyin) onto
8786 * the specified destination region (dst_map, dst_addr).
8787 * The destination must be writeable.
8789 * Unlike vm_map_copyout, this routine actually
8790 * writes over previously-mapped memory. If the
8791 * previous mapping was to a permanent (user-supplied)
8792 * memory object, it is preserved.
8794 * The attributes (protection and inheritance) of the
8795 * destination region are preserved.
8797 * If successful, consumes the copy object.
8798 * Otherwise, the caller is responsible for it.
8800 * Implementation notes:
8801 * To overwrite aligned temporary virtual memory, it is
8802 * sufficient to remove the previous mapping and insert
8803 * the new copy. This replacement is done either on
8804 * the whole region (if no permanent virtual memory
8805 * objects are embedded in the destination region) or
8806 * in individual map entries.
8808 * To overwrite permanent virtual memory , it is necessary
8809 * to copy each page, as the external memory management
8810 * interface currently does not provide any optimizations.
8812 * Unaligned memory also has to be copied. It is possible
8813 * to use 'vm_trickery' to copy the aligned data. This is
8814 * not done but not hard to implement.
8816 * Once a page of permanent memory has been overwritten,
8817 * it is impossible to interrupt this function; otherwise,
8818 * the call would be neither atomic nor location-independent.
8819 * The kernel-state portion of a user thread must be
8822 * It may be expensive to forward all requests that might
8823 * overwrite permanent memory (vm_write, vm_copy) to
8824 * uninterruptible kernel threads. This routine may be
8825 * called by interruptible threads; however, success is
8826 * not guaranteed -- if the request cannot be performed
8827 * atomically and interruptibly, an error indication is
8831 static kern_return_t
8832 vm_map_copy_overwrite_nested(
8834 vm_map_address_t dst_addr
,
8836 boolean_t interruptible
,
8838 boolean_t discard_on_success
)
8840 vm_map_offset_t dst_end
;
8841 vm_map_entry_t tmp_entry
;
8842 vm_map_entry_t entry
;
8844 boolean_t aligned
= TRUE
;
8845 boolean_t contains_permanent_objects
= FALSE
;
8846 boolean_t encountered_sub_map
= FALSE
;
8847 vm_map_offset_t base_addr
;
8848 vm_map_size_t copy_size
;
8849 vm_map_size_t total_size
;
8850 int copy_page_shift
;
8854 * Check for null copy object.
8857 if (copy
== VM_MAP_COPY_NULL
) {
8858 return KERN_SUCCESS
;
8862 * Assert that the vm_map_copy is coming from the right
8863 * zone and hasn't been forged
8865 vm_map_copy_require(copy
);
8868 * Check for special kernel buffer allocated
8869 * by new_ipc_kmsg_copyin.
8872 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
8873 return vm_map_copyout_kernel_buffer(
8875 copy
, copy
->size
, TRUE
, discard_on_success
);
8879 * Only works for entry lists at the moment. Will
8880 * support page lists later.
8883 assert(copy
->type
== VM_MAP_COPY_ENTRY_LIST
);
8885 if (copy
->size
== 0) {
8886 if (discard_on_success
) {
8887 vm_map_copy_discard(copy
);
8889 return KERN_SUCCESS
;
8892 copy_page_shift
= copy
->cpy_hdr
.page_shift
;
8895 * Verify that the destination is all writeable
8896 * initially. We have to trunc the destination
8897 * address and round the copy size or we'll end up
8898 * splitting entries in strange ways.
8901 if (!VM_MAP_PAGE_ALIGNED(copy
->size
,
8902 VM_MAP_PAGE_MASK(dst_map
)) ||
8903 !VM_MAP_PAGE_ALIGNED(copy
->offset
,
8904 VM_MAP_PAGE_MASK(dst_map
)) ||
8905 !VM_MAP_PAGE_ALIGNED(dst_addr
,
8906 VM_MAP_PAGE_MASK(dst_map
)) ||
8907 copy_page_shift
!= VM_MAP_PAGE_SHIFT(dst_map
)) {
8909 dst_end
= vm_map_round_page(dst_addr
+ copy
->size
,
8910 VM_MAP_PAGE_MASK(dst_map
));
8912 dst_end
= dst_addr
+ copy
->size
;
8915 vm_map_lock(dst_map
);
8917 /* LP64todo - remove this check when vm_map_commpage64()
8918 * no longer has to stuff in a map_entry for the commpage
8919 * above the map's max_offset.
8921 if (dst_addr
>= dst_map
->max_offset
) {
8922 vm_map_unlock(dst_map
);
8923 return KERN_INVALID_ADDRESS
;
8927 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &tmp_entry
)) {
8928 vm_map_unlock(dst_map
);
8929 return KERN_INVALID_ADDRESS
;
8931 vm_map_clip_start(dst_map
,
8933 vm_map_trunc_page(dst_addr
,
8934 VM_MAP_PAGE_MASK(dst_map
)));
8935 for (entry
= tmp_entry
;;) {
8936 vm_map_entry_t next
= entry
->vme_next
;
8938 while (entry
->is_sub_map
) {
8939 vm_map_offset_t sub_start
;
8940 vm_map_offset_t sub_end
;
8941 vm_map_offset_t local_end
;
8943 if (entry
->in_transition
) {
8945 * Say that we are waiting, and wait for entry.
8947 entry
->needs_wakeup
= TRUE
;
8948 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8953 local_end
= entry
->vme_end
;
8954 if (!(entry
->needs_copy
)) {
8955 /* if needs_copy we are a COW submap */
8956 /* in such a case we just replace so */
8957 /* there is no need for the follow- */
8959 encountered_sub_map
= TRUE
;
8960 sub_start
= VME_OFFSET(entry
);
8962 if (entry
->vme_end
< dst_end
) {
8963 sub_end
= entry
->vme_end
;
8967 sub_end
-= entry
->vme_start
;
8968 sub_end
+= VME_OFFSET(entry
);
8969 vm_map_unlock(dst_map
);
8971 kr
= vm_map_overwrite_submap_recurse(
8974 sub_end
- sub_start
);
8975 if (kr
!= KERN_SUCCESS
) {
8978 vm_map_lock(dst_map
);
8981 if (dst_end
<= entry
->vme_end
) {
8982 goto start_overwrite
;
8984 if (!vm_map_lookup_entry(dst_map
, local_end
,
8986 vm_map_unlock(dst_map
);
8987 return KERN_INVALID_ADDRESS
;
8989 next
= entry
->vme_next
;
8992 if (!(entry
->protection
& VM_PROT_WRITE
)) {
8993 vm_map_unlock(dst_map
);
8994 return KERN_PROTECTION_FAILURE
;
8998 * If the entry is in transition, we must wait
8999 * for it to exit that state. Anything could happen
9000 * when we unlock the map, so start over.
9002 if (entry
->in_transition
) {
9004 * Say that we are waiting, and wait for entry.
9006 entry
->needs_wakeup
= TRUE
;
9007 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
9013 * our range is contained completely within this map entry
9015 if (dst_end
<= entry
->vme_end
) {
9019 * check that range specified is contiguous region
9021 if ((next
== vm_map_to_entry(dst_map
)) ||
9022 (next
->vme_start
!= entry
->vme_end
)) {
9023 vm_map_unlock(dst_map
);
9024 return KERN_INVALID_ADDRESS
;
9029 * Check for permanent objects in the destination.
9031 if ((VME_OBJECT(entry
) != VM_OBJECT_NULL
) &&
9032 ((!VME_OBJECT(entry
)->internal
) ||
9033 (VME_OBJECT(entry
)->true_share
))) {
9034 contains_permanent_objects
= TRUE
;
9042 * If there are permanent objects in the destination, then
9043 * the copy cannot be interrupted.
9046 if (interruptible
&& contains_permanent_objects
) {
9047 vm_map_unlock(dst_map
);
9048 return KERN_FAILURE
; /* XXX */
9053 * Make a second pass, overwriting the data
9054 * At the beginning of each loop iteration,
9055 * the next entry to be overwritten is "tmp_entry"
9056 * (initially, the value returned from the lookup above),
9057 * and the starting address expected in that entry
9061 total_size
= copy
->size
;
9062 if (encountered_sub_map
) {
9064 /* re-calculate tmp_entry since we've had the map */
9066 if (!vm_map_lookup_entry( dst_map
, dst_addr
, &tmp_entry
)) {
9067 vm_map_unlock(dst_map
);
9068 return KERN_INVALID_ADDRESS
;
9071 copy_size
= copy
->size
;
9074 base_addr
= dst_addr
;
9076 /* deconstruct the copy object and do in parts */
9077 /* only in sub_map, interruptable case */
9078 vm_map_entry_t copy_entry
;
9079 vm_map_entry_t previous_prev
= VM_MAP_ENTRY_NULL
;
9080 vm_map_entry_t next_copy
= VM_MAP_ENTRY_NULL
;
9082 int remaining_entries
= 0;
9083 vm_map_offset_t new_offset
= 0;
9085 for (entry
= tmp_entry
; copy_size
== 0;) {
9086 vm_map_entry_t next
;
9088 next
= entry
->vme_next
;
9090 /* tmp_entry and base address are moved along */
9091 /* each time we encounter a sub-map. Otherwise */
9092 /* entry can outpase tmp_entry, and the copy_size */
9093 /* may reflect the distance between them */
9094 /* if the current entry is found to be in transition */
9095 /* we will start over at the beginning or the last */
9096 /* encounter of a submap as dictated by base_addr */
9097 /* we will zero copy_size accordingly. */
9098 if (entry
->in_transition
) {
9100 * Say that we are waiting, and wait for entry.
9102 entry
->needs_wakeup
= TRUE
;
9103 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
9105 if (!vm_map_lookup_entry(dst_map
, base_addr
,
9107 vm_map_unlock(dst_map
);
9108 return KERN_INVALID_ADDRESS
;
9114 if (entry
->is_sub_map
) {
9115 vm_map_offset_t sub_start
;
9116 vm_map_offset_t sub_end
;
9117 vm_map_offset_t local_end
;
9119 if (entry
->needs_copy
) {
9120 /* if this is a COW submap */
9121 /* just back the range with a */
9122 /* anonymous entry */
9123 if (entry
->vme_end
< dst_end
) {
9124 sub_end
= entry
->vme_end
;
9128 if (entry
->vme_start
< base_addr
) {
9129 sub_start
= base_addr
;
9131 sub_start
= entry
->vme_start
;
9134 dst_map
, entry
, sub_end
);
9136 dst_map
, entry
, sub_start
);
9137 assert(!entry
->use_pmap
);
9138 assert(!entry
->iokit_acct
);
9139 entry
->use_pmap
= TRUE
;
9140 entry
->is_sub_map
= FALSE
;
9143 VME_OBJECT_SET(entry
, VM_OBJECT_NULL
);
9144 VME_OFFSET_SET(entry
, 0);
9145 entry
->is_shared
= FALSE
;
9146 entry
->needs_copy
= FALSE
;
9147 entry
->protection
= VM_PROT_DEFAULT
;
9148 entry
->max_protection
= VM_PROT_ALL
;
9149 entry
->wired_count
= 0;
9150 entry
->user_wired_count
= 0;
9151 if (entry
->inheritance
9152 == VM_INHERIT_SHARE
) {
9153 entry
->inheritance
= VM_INHERIT_COPY
;
9157 /* first take care of any non-sub_map */
9158 /* entries to send */
9159 if (base_addr
< entry
->vme_start
) {
9162 entry
->vme_start
- base_addr
;
9165 sub_start
= VME_OFFSET(entry
);
9167 if (entry
->vme_end
< dst_end
) {
9168 sub_end
= entry
->vme_end
;
9172 sub_end
-= entry
->vme_start
;
9173 sub_end
+= VME_OFFSET(entry
);
9174 local_end
= entry
->vme_end
;
9175 vm_map_unlock(dst_map
);
9176 copy_size
= sub_end
- sub_start
;
9178 /* adjust the copy object */
9179 if (total_size
> copy_size
) {
9180 vm_map_size_t local_size
= 0;
9181 vm_map_size_t entry_size
;
9184 new_offset
= copy
->offset
;
9185 copy_entry
= vm_map_copy_first_entry(copy
);
9186 while (copy_entry
!=
9187 vm_map_copy_to_entry(copy
)) {
9188 entry_size
= copy_entry
->vme_end
-
9189 copy_entry
->vme_start
;
9190 if ((local_size
< copy_size
) &&
9191 ((local_size
+ entry_size
)
9193 vm_map_copy_clip_end(copy
,
9195 copy_entry
->vme_start
+
9196 (copy_size
- local_size
));
9197 entry_size
= copy_entry
->vme_end
-
9198 copy_entry
->vme_start
;
9199 local_size
+= entry_size
;
9200 new_offset
+= entry_size
;
9202 if (local_size
>= copy_size
) {
9203 next_copy
= copy_entry
->vme_next
;
9204 copy_entry
->vme_next
=
9205 vm_map_copy_to_entry(copy
);
9207 copy
->cpy_hdr
.links
.prev
;
9208 copy
->cpy_hdr
.links
.prev
= copy_entry
;
9209 copy
->size
= copy_size
;
9211 copy
->cpy_hdr
.nentries
;
9212 remaining_entries
-= nentries
;
9213 copy
->cpy_hdr
.nentries
= nentries
;
9216 local_size
+= entry_size
;
9217 new_offset
+= entry_size
;
9220 copy_entry
= copy_entry
->vme_next
;
9224 if ((entry
->use_pmap
) && (pmap
== NULL
)) {
9225 kr
= vm_map_copy_overwrite_nested(
9230 VME_SUBMAP(entry
)->pmap
,
9232 } else if (pmap
!= NULL
) {
9233 kr
= vm_map_copy_overwrite_nested(
9237 interruptible
, pmap
,
9240 kr
= vm_map_copy_overwrite_nested(
9248 if (kr
!= KERN_SUCCESS
) {
9249 if (next_copy
!= NULL
) {
9250 copy
->cpy_hdr
.nentries
+=
9252 copy
->cpy_hdr
.links
.prev
->vme_next
=
9254 copy
->cpy_hdr
.links
.prev
9256 copy
->size
= total_size
;
9260 if (dst_end
<= local_end
) {
9261 return KERN_SUCCESS
;
9263 /* otherwise copy no longer exists, it was */
9264 /* destroyed after successful copy_overwrite */
9265 copy
= vm_map_copy_allocate();
9266 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
9267 copy
->offset
= new_offset
;
9268 copy
->cpy_hdr
.page_shift
= copy_page_shift
;
9272 * this does not seem to deal with
9273 * the VM map store (R&B tree)
9276 total_size
-= copy_size
;
9278 /* put back remainder of copy in container */
9279 if (next_copy
!= NULL
) {
9280 copy
->cpy_hdr
.nentries
= remaining_entries
;
9281 copy
->cpy_hdr
.links
.next
= next_copy
;
9282 copy
->cpy_hdr
.links
.prev
= previous_prev
;
9283 copy
->size
= total_size
;
9284 next_copy
->vme_prev
=
9285 vm_map_copy_to_entry(copy
);
9288 base_addr
= local_end
;
9289 vm_map_lock(dst_map
);
9290 if (!vm_map_lookup_entry(dst_map
,
9291 local_end
, &tmp_entry
)) {
9292 vm_map_unlock(dst_map
);
9293 return KERN_INVALID_ADDRESS
;
9298 if (dst_end
<= entry
->vme_end
) {
9299 copy_size
= dst_end
- base_addr
;
9303 if ((next
== vm_map_to_entry(dst_map
)) ||
9304 (next
->vme_start
!= entry
->vme_end
)) {
9305 vm_map_unlock(dst_map
);
9306 return KERN_INVALID_ADDRESS
;
9315 /* adjust the copy object */
9316 if (total_size
> copy_size
) {
9317 vm_map_size_t local_size
= 0;
9318 vm_map_size_t entry_size
;
9320 new_offset
= copy
->offset
;
9321 copy_entry
= vm_map_copy_first_entry(copy
);
9322 while (copy_entry
!= vm_map_copy_to_entry(copy
)) {
9323 entry_size
= copy_entry
->vme_end
-
9324 copy_entry
->vme_start
;
9325 if ((local_size
< copy_size
) &&
9326 ((local_size
+ entry_size
)
9328 vm_map_copy_clip_end(copy
, copy_entry
,
9329 copy_entry
->vme_start
+
9330 (copy_size
- local_size
));
9331 entry_size
= copy_entry
->vme_end
-
9332 copy_entry
->vme_start
;
9333 local_size
+= entry_size
;
9334 new_offset
+= entry_size
;
9336 if (local_size
>= copy_size
) {
9337 next_copy
= copy_entry
->vme_next
;
9338 copy_entry
->vme_next
=
9339 vm_map_copy_to_entry(copy
);
9341 copy
->cpy_hdr
.links
.prev
;
9342 copy
->cpy_hdr
.links
.prev
= copy_entry
;
9343 copy
->size
= copy_size
;
9345 copy
->cpy_hdr
.nentries
;
9346 remaining_entries
-= nentries
;
9347 copy
->cpy_hdr
.nentries
= nentries
;
9350 local_size
+= entry_size
;
9351 new_offset
+= entry_size
;
9354 copy_entry
= copy_entry
->vme_next
;
9364 local_pmap
= dst_map
->pmap
;
9367 if ((kr
= vm_map_copy_overwrite_aligned(
9368 dst_map
, tmp_entry
, copy
,
9369 base_addr
, local_pmap
)) != KERN_SUCCESS
) {
9370 if (next_copy
!= NULL
) {
9371 copy
->cpy_hdr
.nentries
+=
9373 copy
->cpy_hdr
.links
.prev
->vme_next
=
9375 copy
->cpy_hdr
.links
.prev
=
9377 copy
->size
+= copy_size
;
9381 vm_map_unlock(dst_map
);
9386 * if the copy and dst address are misaligned but the same
9387 * offset within the page we can copy_not_aligned the
9388 * misaligned parts and copy aligned the rest. If they are
9389 * aligned but len is unaligned we simply need to copy
9390 * the end bit unaligned. We'll need to split the misaligned
9391 * bits of the region in this case !
9393 /* ALWAYS UNLOCKS THE dst_map MAP */
9394 kr
= vm_map_copy_overwrite_unaligned(
9399 discard_on_success
);
9400 if (kr
!= KERN_SUCCESS
) {
9401 if (next_copy
!= NULL
) {
9402 copy
->cpy_hdr
.nentries
+=
9404 copy
->cpy_hdr
.links
.prev
->vme_next
=
9406 copy
->cpy_hdr
.links
.prev
=
9408 copy
->size
+= copy_size
;
9413 total_size
-= copy_size
;
9414 if (total_size
== 0) {
9417 base_addr
+= copy_size
;
9419 copy
->offset
= new_offset
;
9420 if (next_copy
!= NULL
) {
9421 copy
->cpy_hdr
.nentries
= remaining_entries
;
9422 copy
->cpy_hdr
.links
.next
= next_copy
;
9423 copy
->cpy_hdr
.links
.prev
= previous_prev
;
9424 next_copy
->vme_prev
= vm_map_copy_to_entry(copy
);
9425 copy
->size
= total_size
;
9427 vm_map_lock(dst_map
);
9429 if (!vm_map_lookup_entry(dst_map
,
9430 base_addr
, &tmp_entry
)) {
9431 vm_map_unlock(dst_map
);
9432 return KERN_INVALID_ADDRESS
;
9434 if (tmp_entry
->in_transition
) {
9435 entry
->needs_wakeup
= TRUE
;
9436 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
9441 vm_map_clip_start(dst_map
,
9443 vm_map_trunc_page(base_addr
,
9444 VM_MAP_PAGE_MASK(dst_map
)));
9450 * Throw away the vm_map_copy object
9452 if (discard_on_success
) {
9453 vm_map_copy_discard(copy
);
9456 return KERN_SUCCESS
;
9457 }/* vm_map_copy_overwrite */
9460 vm_map_copy_overwrite(
9462 vm_map_offset_t dst_addr
,
9464 vm_map_size_t copy_size
,
9465 boolean_t interruptible
)
9467 vm_map_size_t head_size
, tail_size
;
9468 vm_map_copy_t head_copy
, tail_copy
;
9469 vm_map_offset_t head_addr
, tail_addr
;
9470 vm_map_entry_t entry
;
9472 vm_map_offset_t effective_page_mask
, effective_page_size
;
9473 int copy_page_shift
;
9482 if (interruptible
||
9483 copy
== VM_MAP_COPY_NULL
||
9484 copy
->type
!= VM_MAP_COPY_ENTRY_LIST
) {
9486 * We can't split the "copy" map if we're interruptible
9487 * or if we don't have a "copy" map...
9490 return vm_map_copy_overwrite_nested(dst_map
,
9498 copy_page_shift
= VM_MAP_COPY_PAGE_SHIFT(copy
);
9499 if (copy_page_shift
< PAGE_SHIFT
||
9500 VM_MAP_PAGE_SHIFT(dst_map
) < PAGE_SHIFT
) {
9504 if (VM_MAP_PAGE_SHIFT(dst_map
) < PAGE_SHIFT
) {
9505 effective_page_mask
= VM_MAP_PAGE_MASK(dst_map
);
9507 effective_page_mask
= MAX(VM_MAP_PAGE_MASK(dst_map
), PAGE_MASK
);
9508 effective_page_mask
= MAX(VM_MAP_COPY_PAGE_MASK(copy
),
9509 effective_page_mask
);
9511 effective_page_size
= effective_page_mask
+ 1;
9513 if (copy_size
< VM_MAP_COPY_OVERWRITE_OPTIMIZATION_THRESHOLD_PAGES
* effective_page_size
) {
9515 * Too small to bother with optimizing...
9520 if ((dst_addr
& effective_page_mask
) !=
9521 (copy
->offset
& effective_page_mask
)) {
9523 * Incompatible mis-alignment of source and destination...
9529 * Proper alignment or identical mis-alignment at the beginning.
9530 * Let's try and do a small unaligned copy first (if needed)
9531 * and then an aligned copy for the rest.
9533 if (!vm_map_page_aligned(dst_addr
, effective_page_mask
)) {
9534 head_addr
= dst_addr
;
9535 head_size
= (effective_page_size
-
9536 (copy
->offset
& effective_page_mask
));
9537 head_size
= MIN(head_size
, copy_size
);
9539 if (!vm_map_page_aligned(copy
->offset
+ copy_size
,
9540 effective_page_mask
)) {
9542 * Mis-alignment at the end.
9543 * Do an aligned copy up to the last page and
9544 * then an unaligned copy for the remaining bytes.
9546 tail_size
= ((copy
->offset
+ copy_size
) &
9547 effective_page_mask
);
9548 tail_size
= MIN(tail_size
, copy_size
);
9549 tail_addr
= dst_addr
+ copy_size
- tail_size
;
9550 assert(tail_addr
>= head_addr
+ head_size
);
9552 assert(head_size
+ tail_size
<= copy_size
);
9554 if (head_size
+ tail_size
== copy_size
) {
9556 * It's all unaligned, no optimization possible...
9562 * Can't optimize if there are any submaps in the
9563 * destination due to the way we free the "copy" map
9564 * progressively in vm_map_copy_overwrite_nested()
9567 vm_map_lock_read(dst_map
);
9568 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &entry
)) {
9569 vm_map_unlock_read(dst_map
);
9573 (entry
!= vm_map_copy_to_entry(copy
) &&
9574 entry
->vme_start
< dst_addr
+ copy_size
);
9575 entry
= entry
->vme_next
) {
9576 if (entry
->is_sub_map
) {
9577 vm_map_unlock_read(dst_map
);
9581 vm_map_unlock_read(dst_map
);
9585 * Unaligned copy of the first "head_size" bytes, to reach
9590 * Extract "head_copy" out of "copy".
9592 head_copy
= vm_map_copy_allocate();
9593 head_copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
9594 head_copy
->cpy_hdr
.entries_pageable
=
9595 copy
->cpy_hdr
.entries_pageable
;
9596 vm_map_store_init(&head_copy
->cpy_hdr
);
9597 head_copy
->cpy_hdr
.page_shift
= copy_page_shift
;
9599 entry
= vm_map_copy_first_entry(copy
);
9600 if (entry
->vme_end
< copy
->offset
+ head_size
) {
9601 head_size
= entry
->vme_end
- copy
->offset
;
9604 head_copy
->offset
= copy
->offset
;
9605 head_copy
->size
= head_size
;
9606 copy
->offset
+= head_size
;
9607 copy
->size
-= head_size
;
9608 copy_size
-= head_size
;
9609 assert(copy_size
> 0);
9611 vm_map_copy_clip_end(copy
, entry
, copy
->offset
);
9612 vm_map_copy_entry_unlink(copy
, entry
);
9613 vm_map_copy_entry_link(head_copy
,
9614 vm_map_copy_to_entry(head_copy
),
9618 * Do the unaligned copy.
9620 kr
= vm_map_copy_overwrite_nested(dst_map
,
9626 if (kr
!= KERN_SUCCESS
) {
9633 * Extract "tail_copy" out of "copy".
9635 tail_copy
= vm_map_copy_allocate();
9636 tail_copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
9637 tail_copy
->cpy_hdr
.entries_pageable
=
9638 copy
->cpy_hdr
.entries_pageable
;
9639 vm_map_store_init(&tail_copy
->cpy_hdr
);
9640 tail_copy
->cpy_hdr
.page_shift
= copy_page_shift
;
9642 tail_copy
->offset
= copy
->offset
+ copy_size
- tail_size
;
9643 tail_copy
->size
= tail_size
;
9645 copy
->size
-= tail_size
;
9646 copy_size
-= tail_size
;
9647 assert(copy_size
> 0);
9649 entry
= vm_map_copy_last_entry(copy
);
9650 vm_map_copy_clip_start(copy
, entry
, tail_copy
->offset
);
9651 entry
= vm_map_copy_last_entry(copy
);
9652 vm_map_copy_entry_unlink(copy
, entry
);
9653 vm_map_copy_entry_link(tail_copy
,
9654 vm_map_copy_last_entry(tail_copy
),
9659 * If we are here from ipc_kmsg_copyout_ool_descriptor(),
9660 * we want to avoid TOCTOU issues w.r.t copy->size but
9661 * we don't need to change vm_map_copy_overwrite_nested()
9662 * and all other vm_map_copy_overwrite variants.
9664 * So we assign the original copy_size that was passed into
9665 * this routine back to copy.
9667 * This use of local 'copy_size' passed into this routine is
9668 * to try and protect against TOCTOU attacks where the kernel
9669 * has been exploited. We don't expect this to be an issue
9670 * during normal system operation.
9672 assertf(copy
->size
== copy_size
,
9673 "Mismatch of copy sizes. Expected 0x%llx, Got 0x%llx\n", (uint64_t) copy_size
, (uint64_t) copy
->size
);
9674 copy
->size
= copy_size
;
9677 * Copy most (or possibly all) of the data.
9679 kr
= vm_map_copy_overwrite_nested(dst_map
,
9680 dst_addr
+ head_size
,
9685 if (kr
!= KERN_SUCCESS
) {
9690 kr
= vm_map_copy_overwrite_nested(dst_map
,
9699 assert(copy
->type
== VM_MAP_COPY_ENTRY_LIST
);
9700 if (kr
== KERN_SUCCESS
) {
9702 * Discard all the copy maps.
9705 vm_map_copy_discard(head_copy
);
9708 vm_map_copy_discard(copy
);
9710 vm_map_copy_discard(tail_copy
);
9715 * Re-assemble the original copy map.
9718 entry
= vm_map_copy_first_entry(head_copy
);
9719 vm_map_copy_entry_unlink(head_copy
, entry
);
9720 vm_map_copy_entry_link(copy
,
9721 vm_map_copy_to_entry(copy
),
9723 copy
->offset
-= head_size
;
9724 copy
->size
+= head_size
;
9725 vm_map_copy_discard(head_copy
);
9729 entry
= vm_map_copy_last_entry(tail_copy
);
9730 vm_map_copy_entry_unlink(tail_copy
, entry
);
9731 vm_map_copy_entry_link(copy
,
9732 vm_map_copy_last_entry(copy
),
9734 copy
->size
+= tail_size
;
9735 vm_map_copy_discard(tail_copy
);
9744 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
9747 * Physically copy unaligned data
9750 * Unaligned parts of pages have to be physically copied. We use
9751 * a modified form of vm_fault_copy (which understands none-aligned
9752 * page offsets and sizes) to do the copy. We attempt to copy as
9753 * much memory in one go as possibly, however vm_fault_copy copies
9754 * within 1 memory object so we have to find the smaller of "amount left"
9755 * "source object data size" and "target object data size". With
9756 * unaligned data we don't need to split regions, therefore the source
9757 * (copy) object should be one map entry, the target range may be split
9758 * over multiple map entries however. In any event we are pessimistic
9759 * about these assumptions.
9762 * dst_map is locked on entry and is return locked on success,
9763 * unlocked on error.
9766 static kern_return_t
9767 vm_map_copy_overwrite_unaligned(
9769 vm_map_entry_t entry
,
9771 vm_map_offset_t start
,
9772 boolean_t discard_on_success
)
9774 vm_map_entry_t copy_entry
;
9775 vm_map_entry_t copy_entry_next
;
9776 vm_map_version_t version
;
9777 vm_object_t dst_object
;
9778 vm_object_offset_t dst_offset
;
9779 vm_object_offset_t src_offset
;
9780 vm_object_offset_t entry_offset
;
9781 vm_map_offset_t entry_end
;
9782 vm_map_size_t src_size
,
9786 kern_return_t kr
= KERN_SUCCESS
;
9789 copy_entry
= vm_map_copy_first_entry(copy
);
9791 vm_map_lock_write_to_read(dst_map
);
9793 src_offset
= copy
->offset
- trunc_page_mask_64(copy
->offset
, VM_MAP_COPY_PAGE_MASK(copy
));
9794 amount_left
= copy
->size
;
9796 * unaligned so we never clipped this entry, we need the offset into
9797 * the vm_object not just the data.
9799 while (amount_left
> 0) {
9800 if (entry
== vm_map_to_entry(dst_map
)) {
9801 vm_map_unlock_read(dst_map
);
9802 return KERN_INVALID_ADDRESS
;
9805 /* "start" must be within the current map entry */
9806 assert((start
>= entry
->vme_start
) && (start
< entry
->vme_end
));
9808 dst_offset
= start
- entry
->vme_start
;
9810 dst_size
= entry
->vme_end
- start
;
9812 src_size
= copy_entry
->vme_end
-
9813 (copy_entry
->vme_start
+ src_offset
);
9815 if (dst_size
< src_size
) {
9817 * we can only copy dst_size bytes before
9818 * we have to get the next destination entry
9820 copy_size
= dst_size
;
9823 * we can only copy src_size bytes before
9824 * we have to get the next source copy entry
9826 copy_size
= src_size
;
9829 if (copy_size
> amount_left
) {
9830 copy_size
= amount_left
;
9833 * Entry needs copy, create a shadow shadow object for
9834 * Copy on write region.
9836 if (entry
->needs_copy
&&
9837 ((entry
->protection
& VM_PROT_WRITE
) != 0)) {
9838 if (vm_map_lock_read_to_write(dst_map
)) {
9839 vm_map_lock_read(dst_map
);
9842 VME_OBJECT_SHADOW(entry
,
9843 (vm_map_size_t
)(entry
->vme_end
9844 - entry
->vme_start
));
9845 entry
->needs_copy
= FALSE
;
9846 vm_map_lock_write_to_read(dst_map
);
9848 dst_object
= VME_OBJECT(entry
);
9850 * unlike with the virtual (aligned) copy we're going
9851 * to fault on it therefore we need a target object.
9853 if (dst_object
== VM_OBJECT_NULL
) {
9854 if (vm_map_lock_read_to_write(dst_map
)) {
9855 vm_map_lock_read(dst_map
);
9858 dst_object
= vm_object_allocate((vm_map_size_t
)
9859 entry
->vme_end
- entry
->vme_start
);
9860 VME_OBJECT_SET(entry
, dst_object
);
9861 VME_OFFSET_SET(entry
, 0);
9862 assert(entry
->use_pmap
);
9863 vm_map_lock_write_to_read(dst_map
);
9866 * Take an object reference and unlock map. The "entry" may
9867 * disappear or change when the map is unlocked.
9869 vm_object_reference(dst_object
);
9870 version
.main_timestamp
= dst_map
->timestamp
;
9871 entry_offset
= VME_OFFSET(entry
);
9872 entry_end
= entry
->vme_end
;
9873 vm_map_unlock_read(dst_map
);
9875 * Copy as much as possible in one pass
9878 VME_OBJECT(copy_entry
),
9879 VME_OFFSET(copy_entry
) + src_offset
,
9882 entry_offset
+ dst_offset
,
9888 src_offset
+= copy_size
;
9889 amount_left
-= copy_size
;
9891 * Release the object reference
9893 vm_object_deallocate(dst_object
);
9895 * If a hard error occurred, return it now
9897 if (kr
!= KERN_SUCCESS
) {
9901 if ((copy_entry
->vme_start
+ src_offset
) == copy_entry
->vme_end
9902 || amount_left
== 0) {
9904 * all done with this copy entry, dispose.
9906 copy_entry_next
= copy_entry
->vme_next
;
9908 if (discard_on_success
) {
9909 vm_map_copy_entry_unlink(copy
, copy_entry
);
9910 assert(!copy_entry
->is_sub_map
);
9911 vm_object_deallocate(VME_OBJECT(copy_entry
));
9912 vm_map_copy_entry_dispose(copy
, copy_entry
);
9915 if (copy_entry_next
== vm_map_copy_to_entry(copy
) &&
9918 * not finished copying but run out of source
9920 return KERN_INVALID_ADDRESS
;
9923 copy_entry
= copy_entry_next
;
9928 if (amount_left
== 0) {
9929 return KERN_SUCCESS
;
9932 vm_map_lock_read(dst_map
);
9933 if (version
.main_timestamp
== dst_map
->timestamp
) {
9934 if (start
== entry_end
) {
9936 * destination region is split. Use the version
9937 * information to avoid a lookup in the normal
9940 entry
= entry
->vme_next
;
9942 * should be contiguous. Fail if we encounter
9943 * a hole in the destination.
9945 if (start
!= entry
->vme_start
) {
9946 vm_map_unlock_read(dst_map
);
9947 return KERN_INVALID_ADDRESS
;
9952 * Map version check failed.
9953 * we must lookup the entry because somebody
9954 * might have changed the map behind our backs.
9957 if (!vm_map_lookup_entry(dst_map
, start
, &entry
)) {
9958 vm_map_unlock_read(dst_map
);
9959 return KERN_INVALID_ADDRESS
;
9964 return KERN_SUCCESS
;
9965 }/* vm_map_copy_overwrite_unaligned */
9968 * Routine: vm_map_copy_overwrite_aligned [internal use only]
9971 * Does all the vm_trickery possible for whole pages.
9975 * If there are no permanent objects in the destination,
9976 * and the source and destination map entry zones match,
9977 * and the destination map entry is not shared,
9978 * then the map entries can be deleted and replaced
9979 * with those from the copy. The following code is the
9980 * basic idea of what to do, but there are lots of annoying
9981 * little details about getting protection and inheritance
9982 * right. Should add protection, inheritance, and sharing checks
9983 * to the above pass and make sure that no wiring is involved.
9986 int vm_map_copy_overwrite_aligned_src_not_internal
= 0;
9987 int vm_map_copy_overwrite_aligned_src_not_symmetric
= 0;
9988 int vm_map_copy_overwrite_aligned_src_large
= 0;
9990 static kern_return_t
9991 vm_map_copy_overwrite_aligned(
9993 vm_map_entry_t tmp_entry
,
9995 vm_map_offset_t start
,
9996 __unused pmap_t pmap
)
9999 vm_map_entry_t copy_entry
;
10000 vm_map_size_t copy_size
;
10001 vm_map_size_t size
;
10002 vm_map_entry_t entry
;
10004 while ((copy_entry
= vm_map_copy_first_entry(copy
))
10005 != vm_map_copy_to_entry(copy
)) {
10006 copy_size
= (copy_entry
->vme_end
- copy_entry
->vme_start
);
10009 if (entry
->is_sub_map
) {
10010 /* unnested when clipped earlier */
10011 assert(!entry
->use_pmap
);
10013 if (entry
== vm_map_to_entry(dst_map
)) {
10014 vm_map_unlock(dst_map
);
10015 return KERN_INVALID_ADDRESS
;
10017 size
= (entry
->vme_end
- entry
->vme_start
);
10019 * Make sure that no holes popped up in the
10020 * address map, and that the protection is
10021 * still valid, in case the map was unlocked
10025 if ((entry
->vme_start
!= start
) || ((entry
->is_sub_map
)
10026 && !entry
->needs_copy
)) {
10027 vm_map_unlock(dst_map
);
10028 return KERN_INVALID_ADDRESS
;
10030 assert(entry
!= vm_map_to_entry(dst_map
));
10033 * Check protection again
10036 if (!(entry
->protection
& VM_PROT_WRITE
)) {
10037 vm_map_unlock(dst_map
);
10038 return KERN_PROTECTION_FAILURE
;
10042 * Adjust to source size first
10045 if (copy_size
< size
) {
10046 if (entry
->map_aligned
&&
10047 !VM_MAP_PAGE_ALIGNED(entry
->vme_start
+ copy_size
,
10048 VM_MAP_PAGE_MASK(dst_map
))) {
10049 /* no longer map-aligned */
10050 entry
->map_aligned
= FALSE
;
10052 vm_map_clip_end(dst_map
, entry
, entry
->vme_start
+ copy_size
);
10057 * Adjust to destination size
10060 if (size
< copy_size
) {
10061 vm_map_copy_clip_end(copy
, copy_entry
,
10062 copy_entry
->vme_start
+ size
);
10066 assert((entry
->vme_end
- entry
->vme_start
) == size
);
10067 assert((tmp_entry
->vme_end
- tmp_entry
->vme_start
) == size
);
10068 assert((copy_entry
->vme_end
- copy_entry
->vme_start
) == size
);
10071 * If the destination contains temporary unshared memory,
10072 * we can perform the copy by throwing it away and
10073 * installing the source data.
10076 object
= VME_OBJECT(entry
);
10077 if ((!entry
->is_shared
&&
10078 ((object
== VM_OBJECT_NULL
) ||
10079 (object
->internal
&& !object
->true_share
))) ||
10080 entry
->needs_copy
) {
10081 vm_object_t old_object
= VME_OBJECT(entry
);
10082 vm_object_offset_t old_offset
= VME_OFFSET(entry
);
10083 vm_object_offset_t offset
;
10086 * Ensure that the source and destination aren't
10089 if (old_object
== VME_OBJECT(copy_entry
) &&
10090 old_offset
== VME_OFFSET(copy_entry
)) {
10091 vm_map_copy_entry_unlink(copy
, copy_entry
);
10092 vm_map_copy_entry_dispose(copy
, copy_entry
);
10094 if (old_object
!= VM_OBJECT_NULL
) {
10095 vm_object_deallocate(old_object
);
10098 start
= tmp_entry
->vme_end
;
10099 tmp_entry
= tmp_entry
->vme_next
;
10103 #if XNU_TARGET_OS_OSX
10104 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
10105 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
10106 if (VME_OBJECT(copy_entry
) != VM_OBJECT_NULL
&&
10107 VME_OBJECT(copy_entry
)->vo_size
>= __TRADEOFF1_OBJ_SIZE
&&
10108 copy_size
<= __TRADEOFF1_COPY_SIZE
) {
10110 * Virtual vs. Physical copy tradeoff #1.
10112 * Copying only a few pages out of a large
10113 * object: do a physical copy instead of
10114 * a virtual copy, to avoid possibly keeping
10115 * the entire large object alive because of
10116 * those few copy-on-write pages.
10118 vm_map_copy_overwrite_aligned_src_large
++;
10121 #endif /* XNU_TARGET_OS_OSX */
10123 if ((dst_map
->pmap
!= kernel_pmap
) &&
10124 (VME_ALIAS(entry
) >= VM_MEMORY_MALLOC
) &&
10125 (VME_ALIAS(entry
) <= VM_MEMORY_MALLOC_MEDIUM
)) {
10126 vm_object_t new_object
, new_shadow
;
10129 * We're about to map something over a mapping
10130 * established by malloc()...
10132 new_object
= VME_OBJECT(copy_entry
);
10133 if (new_object
!= VM_OBJECT_NULL
) {
10134 vm_object_lock_shared(new_object
);
10136 while (new_object
!= VM_OBJECT_NULL
&&
10137 #if XNU_TARGET_OS_OSX
10138 !new_object
->true_share
&&
10139 new_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
&&
10140 #endif /* XNU_TARGET_OS_OSX */
10141 new_object
->internal
) {
10142 new_shadow
= new_object
->shadow
;
10143 if (new_shadow
== VM_OBJECT_NULL
) {
10146 vm_object_lock_shared(new_shadow
);
10147 vm_object_unlock(new_object
);
10148 new_object
= new_shadow
;
10150 if (new_object
!= VM_OBJECT_NULL
) {
10151 if (!new_object
->internal
) {
10153 * The new mapping is backed
10154 * by an external object. We
10155 * don't want malloc'ed memory
10156 * to be replaced with such a
10157 * non-anonymous mapping, so
10158 * let's go off the optimized
10161 vm_map_copy_overwrite_aligned_src_not_internal
++;
10162 vm_object_unlock(new_object
);
10165 #if XNU_TARGET_OS_OSX
10166 if (new_object
->true_share
||
10167 new_object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) {
10169 * Same if there's a "true_share"
10170 * object in the shadow chain, or
10171 * an object with a non-default
10172 * (SYMMETRIC) copy strategy.
10174 vm_map_copy_overwrite_aligned_src_not_symmetric
++;
10175 vm_object_unlock(new_object
);
10178 #endif /* XNU_TARGET_OS_OSX */
10179 vm_object_unlock(new_object
);
10182 * The new mapping is still backed by
10183 * anonymous (internal) memory, so it's
10184 * OK to substitute it for the original
10185 * malloc() mapping.
10189 if (old_object
!= VM_OBJECT_NULL
) {
10190 if (entry
->is_sub_map
) {
10191 if (entry
->use_pmap
) {
10192 #ifndef NO_NESTED_PMAP
10193 pmap_unnest(dst_map
->pmap
,
10194 (addr64_t
)entry
->vme_start
,
10195 entry
->vme_end
- entry
->vme_start
);
10196 #endif /* NO_NESTED_PMAP */
10197 if (dst_map
->mapped_in_other_pmaps
) {
10198 /* clean up parent */
10200 vm_map_submap_pmap_clean(
10201 dst_map
, entry
->vme_start
,
10204 VME_OFFSET(entry
));
10207 vm_map_submap_pmap_clean(
10208 dst_map
, entry
->vme_start
,
10211 VME_OFFSET(entry
));
10213 vm_map_deallocate(VME_SUBMAP(entry
));
10215 if (dst_map
->mapped_in_other_pmaps
) {
10216 vm_object_pmap_protect_options(
10220 - entry
->vme_start
,
10225 PMAP_OPTIONS_REMOVE
);
10227 pmap_remove_options(
10229 (addr64_t
)(entry
->vme_start
),
10230 (addr64_t
)(entry
->vme_end
),
10231 PMAP_OPTIONS_REMOVE
);
10233 vm_object_deallocate(old_object
);
10237 if (entry
->iokit_acct
) {
10238 /* keep using iokit accounting */
10239 entry
->use_pmap
= FALSE
;
10241 /* use pmap accounting */
10242 entry
->use_pmap
= TRUE
;
10244 entry
->is_sub_map
= FALSE
;
10245 VME_OBJECT_SET(entry
, VME_OBJECT(copy_entry
));
10246 object
= VME_OBJECT(entry
);
10247 entry
->needs_copy
= copy_entry
->needs_copy
;
10248 entry
->wired_count
= 0;
10249 entry
->user_wired_count
= 0;
10250 offset
= VME_OFFSET(copy_entry
);
10251 VME_OFFSET_SET(entry
, offset
);
10253 vm_map_copy_entry_unlink(copy
, copy_entry
);
10254 vm_map_copy_entry_dispose(copy
, copy_entry
);
10257 * we could try to push pages into the pmap at this point, BUT
10258 * this optimization only saved on average 2 us per page if ALL
10259 * the pages in the source were currently mapped
10260 * and ALL the pages in the dest were touched, if there were fewer
10261 * than 2/3 of the pages touched, this optimization actually cost more cycles
10262 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10266 * Set up for the next iteration. The map
10267 * has not been unlocked, so the next
10268 * address should be at the end of this
10269 * entry, and the next map entry should be
10270 * the one following it.
10273 start
= tmp_entry
->vme_end
;
10274 tmp_entry
= tmp_entry
->vme_next
;
10276 vm_map_version_t version
;
10277 vm_object_t dst_object
;
10278 vm_object_offset_t dst_offset
;
10282 if (entry
->needs_copy
) {
10283 VME_OBJECT_SHADOW(entry
,
10285 entry
->vme_start
));
10286 entry
->needs_copy
= FALSE
;
10289 dst_object
= VME_OBJECT(entry
);
10290 dst_offset
= VME_OFFSET(entry
);
10293 * Take an object reference, and record
10294 * the map version information so that the
10295 * map can be safely unlocked.
10298 if (dst_object
== VM_OBJECT_NULL
) {
10300 * We would usually have just taken the
10301 * optimized path above if the destination
10302 * object has not been allocated yet. But we
10303 * now disable that optimization if the copy
10304 * entry's object is not backed by anonymous
10305 * memory to avoid replacing malloc'ed
10306 * (i.e. re-usable) anonymous memory with a
10307 * not-so-anonymous mapping.
10308 * So we have to handle this case here and
10309 * allocate a new VM object for this map entry.
10311 dst_object
= vm_object_allocate(
10312 entry
->vme_end
- entry
->vme_start
);
10314 VME_OBJECT_SET(entry
, dst_object
);
10315 VME_OFFSET_SET(entry
, dst_offset
);
10316 assert(entry
->use_pmap
);
10319 vm_object_reference(dst_object
);
10321 /* account for unlock bumping up timestamp */
10322 version
.main_timestamp
= dst_map
->timestamp
+ 1;
10324 vm_map_unlock(dst_map
);
10327 * Copy as much as possible in one pass
10332 VME_OBJECT(copy_entry
),
10333 VME_OFFSET(copy_entry
),
10342 * Release the object reference
10345 vm_object_deallocate(dst_object
);
10348 * If a hard error occurred, return it now
10351 if (r
!= KERN_SUCCESS
) {
10355 if (copy_size
!= 0) {
10357 * Dispose of the copied region
10360 vm_map_copy_clip_end(copy
, copy_entry
,
10361 copy_entry
->vme_start
+ copy_size
);
10362 vm_map_copy_entry_unlink(copy
, copy_entry
);
10363 vm_object_deallocate(VME_OBJECT(copy_entry
));
10364 vm_map_copy_entry_dispose(copy
, copy_entry
);
10368 * Pick up in the destination map where we left off.
10370 * Use the version information to avoid a lookup
10371 * in the normal case.
10374 start
+= copy_size
;
10375 vm_map_lock(dst_map
);
10376 if (version
.main_timestamp
== dst_map
->timestamp
&&
10378 /* We can safely use saved tmp_entry value */
10380 if (tmp_entry
->map_aligned
&&
10381 !VM_MAP_PAGE_ALIGNED(
10383 VM_MAP_PAGE_MASK(dst_map
))) {
10384 /* no longer map-aligned */
10385 tmp_entry
->map_aligned
= FALSE
;
10387 vm_map_clip_end(dst_map
, tmp_entry
, start
);
10388 tmp_entry
= tmp_entry
->vme_next
;
10390 /* Must do lookup of tmp_entry */
10392 if (!vm_map_lookup_entry(dst_map
, start
, &tmp_entry
)) {
10393 vm_map_unlock(dst_map
);
10394 return KERN_INVALID_ADDRESS
;
10396 if (tmp_entry
->map_aligned
&&
10397 !VM_MAP_PAGE_ALIGNED(
10399 VM_MAP_PAGE_MASK(dst_map
))) {
10400 /* no longer map-aligned */
10401 tmp_entry
->map_aligned
= FALSE
;
10403 vm_map_clip_start(dst_map
, tmp_entry
, start
);
10408 return KERN_SUCCESS
;
10409 }/* vm_map_copy_overwrite_aligned */
10412 * Routine: vm_map_copyin_kernel_buffer [internal use only]
10415 * Copy in data to a kernel buffer from space in the
10416 * source map. The original space may be optionally
10419 * If successful, returns a new copy object.
10421 static kern_return_t
10422 vm_map_copyin_kernel_buffer(
10424 vm_map_offset_t src_addr
,
10426 boolean_t src_destroy
,
10427 vm_map_copy_t
*copy_result
)
10430 vm_map_copy_t copy
;
10432 if (len
> msg_ool_size_small
) {
10433 return KERN_INVALID_ARGUMENT
;
10436 copy
= zalloc_flags(vm_map_copy_zone
, Z_WAITOK
| Z_ZERO
);
10437 if (copy
== VM_MAP_COPY_NULL
) {
10438 return KERN_RESOURCE_SHORTAGE
;
10440 copy
->cpy_kdata
= kheap_alloc(KHEAP_DATA_BUFFERS
, len
, Z_WAITOK
);
10441 if (copy
->cpy_kdata
== NULL
) {
10442 zfree(vm_map_copy_zone
, copy
);
10443 return KERN_RESOURCE_SHORTAGE
;
10446 copy
->type
= VM_MAP_COPY_KERNEL_BUFFER
;
10450 kr
= copyinmap(src_map
, src_addr
, copy
->cpy_kdata
, (vm_size_t
)len
);
10451 if (kr
!= KERN_SUCCESS
) {
10452 kheap_free(KHEAP_DATA_BUFFERS
, copy
->cpy_kdata
, len
);
10453 zfree(vm_map_copy_zone
, copy
);
10457 (void) vm_map_remove(
10459 vm_map_trunc_page(src_addr
,
10460 VM_MAP_PAGE_MASK(src_map
)),
10461 vm_map_round_page(src_addr
+ len
,
10462 VM_MAP_PAGE_MASK(src_map
)),
10463 (VM_MAP_REMOVE_INTERRUPTIBLE
|
10464 VM_MAP_REMOVE_WAIT_FOR_KWIRE
|
10465 ((src_map
== kernel_map
) ? VM_MAP_REMOVE_KUNWIRE
: VM_MAP_REMOVE_NO_FLAGS
)));
10467 *copy_result
= copy
;
10468 return KERN_SUCCESS
;
10472 * Routine: vm_map_copyout_kernel_buffer [internal use only]
10475 * Copy out data from a kernel buffer into space in the
10476 * destination map. The space may be otpionally dynamically
10479 * If successful, consumes the copy object.
10480 * Otherwise, the caller is responsible for it.
10482 static int vm_map_copyout_kernel_buffer_failures
= 0;
10483 static kern_return_t
10484 vm_map_copyout_kernel_buffer(
10486 vm_map_address_t
*addr
, /* IN/OUT */
10487 vm_map_copy_t copy
,
10488 vm_map_size_t copy_size
,
10489 boolean_t overwrite
,
10490 boolean_t consume_on_success
)
10492 kern_return_t kr
= KERN_SUCCESS
;
10493 thread_t thread
= current_thread();
10495 assert(copy
->size
== copy_size
);
10498 * check for corrupted vm_map_copy structure
10500 if (copy_size
> msg_ool_size_small
|| copy
->offset
) {
10501 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10502 (long long)copy
->size
, (long long)copy
->offset
);
10507 * Allocate space in the target map for the data
10510 kr
= vm_map_enter(map
,
10512 vm_map_round_page(copy_size
,
10513 VM_MAP_PAGE_MASK(map
)),
10514 (vm_map_offset_t
) 0,
10516 VM_MAP_KERNEL_FLAGS_NONE
,
10517 VM_KERN_MEMORY_NONE
,
10519 (vm_object_offset_t
) 0,
10523 VM_INHERIT_DEFAULT
);
10524 if (kr
!= KERN_SUCCESS
) {
10528 if (map
->pmap
== kernel_pmap
) {
10529 kasan_notify_address(*addr
, copy
->size
);
10535 * Copyout the data from the kernel buffer to the target map.
10537 if (thread
->map
== map
) {
10539 * If the target map is the current map, just do
10542 assert((vm_size_t
)copy_size
== copy_size
);
10543 if (copyout(copy
->cpy_kdata
, *addr
, (vm_size_t
)copy_size
)) {
10544 kr
= KERN_INVALID_ADDRESS
;
10550 * If the target map is another map, assume the
10551 * target's address space identity for the duration
10554 vm_map_reference(map
);
10555 oldmap
= vm_map_switch(map
);
10557 assert((vm_size_t
)copy_size
== copy_size
);
10558 if (copyout(copy
->cpy_kdata
, *addr
, (vm_size_t
)copy_size
)) {
10559 vm_map_copyout_kernel_buffer_failures
++;
10560 kr
= KERN_INVALID_ADDRESS
;
10563 (void) vm_map_switch(oldmap
);
10564 vm_map_deallocate(map
);
10567 if (kr
!= KERN_SUCCESS
) {
10568 /* the copy failed, clean up */
10571 * Deallocate the space we allocated in the target map.
10573 (void) vm_map_remove(
10575 vm_map_trunc_page(*addr
,
10576 VM_MAP_PAGE_MASK(map
)),
10577 vm_map_round_page((*addr
+
10578 vm_map_round_page(copy_size
,
10579 VM_MAP_PAGE_MASK(map
))),
10580 VM_MAP_PAGE_MASK(map
)),
10581 VM_MAP_REMOVE_NO_FLAGS
);
10585 /* copy was successful, dicard the copy structure */
10586 if (consume_on_success
) {
10587 kheap_free(KHEAP_DATA_BUFFERS
, copy
->cpy_kdata
, copy_size
);
10588 zfree(vm_map_copy_zone
, copy
);
10596 * Routine: vm_map_copy_insert [internal use only]
10599 * Link a copy chain ("copy") into a map at the
10600 * specified location (after "where").
10602 * The copy chain is destroyed.
10605 vm_map_copy_insert(
10607 vm_map_entry_t after_where
,
10608 vm_map_copy_t copy
)
10610 vm_map_entry_t entry
;
10612 while (vm_map_copy_first_entry(copy
) != vm_map_copy_to_entry(copy
)) {
10613 entry
= vm_map_copy_first_entry(copy
);
10614 vm_map_copy_entry_unlink(copy
, entry
);
10615 vm_map_store_entry_link(map
, after_where
, entry
,
10616 VM_MAP_KERNEL_FLAGS_NONE
);
10617 after_where
= entry
;
10619 zfree(vm_map_copy_zone
, copy
);
10625 vm_map_entry_t where
,
10626 vm_map_copy_t copy
,
10627 vm_map_offset_t adjustment
,
10628 vm_prot_t cur_prot
,
10629 vm_prot_t max_prot
,
10630 vm_inherit_t inheritance
)
10632 vm_map_entry_t copy_entry
, new_entry
;
10634 for (copy_entry
= vm_map_copy_first_entry(copy
);
10635 copy_entry
!= vm_map_copy_to_entry(copy
);
10636 copy_entry
= copy_entry
->vme_next
) {
10637 /* get a new VM map entry for the map */
10638 new_entry
= vm_map_entry_create(map
,
10639 !map
->hdr
.entries_pageable
);
10640 /* copy the "copy entry" to the new entry */
10641 vm_map_entry_copy(map
, new_entry
, copy_entry
);
10642 /* adjust "start" and "end" */
10643 new_entry
->vme_start
+= adjustment
;
10644 new_entry
->vme_end
+= adjustment
;
10645 /* clear some attributes */
10646 new_entry
->inheritance
= inheritance
;
10647 new_entry
->protection
= cur_prot
;
10648 new_entry
->max_protection
= max_prot
;
10649 new_entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
10650 /* take an extra reference on the entry's "object" */
10651 if (new_entry
->is_sub_map
) {
10652 assert(!new_entry
->use_pmap
); /* not nested */
10653 vm_map_lock(VME_SUBMAP(new_entry
));
10654 vm_map_reference(VME_SUBMAP(new_entry
));
10655 vm_map_unlock(VME_SUBMAP(new_entry
));
10657 vm_object_reference(VME_OBJECT(new_entry
));
10659 /* insert the new entry in the map */
10660 vm_map_store_entry_link(map
, where
, new_entry
,
10661 VM_MAP_KERNEL_FLAGS_NONE
);
10662 /* continue inserting the "copy entries" after the new entry */
10669 * Returns true if *size matches (or is in the range of) copy->size.
10670 * Upon returning true, the *size field is updated with the actual size of the
10671 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10674 vm_map_copy_validate_size(
10676 vm_map_copy_t copy
,
10677 vm_map_size_t
*size
)
10679 if (copy
== VM_MAP_COPY_NULL
) {
10682 vm_map_size_t copy_sz
= copy
->size
;
10683 vm_map_size_t sz
= *size
;
10684 switch (copy
->type
) {
10685 case VM_MAP_COPY_OBJECT
:
10686 case VM_MAP_COPY_KERNEL_BUFFER
:
10687 if (sz
== copy_sz
) {
10691 case VM_MAP_COPY_ENTRY_LIST
:
10693 * potential page-size rounding prevents us from exactly
10694 * validating this flavor of vm_map_copy, but we can at least
10695 * assert that it's within a range.
10697 if (copy_sz
>= sz
&&
10698 copy_sz
<= vm_map_round_page(sz
, VM_MAP_PAGE_MASK(dst_map
))) {
10710 * Routine: vm_map_copyout_size
10713 * Copy out a copy chain ("copy") into newly-allocated
10714 * space in the destination map. Uses a prevalidated
10715 * size for the copy object (vm_map_copy_validate_size).
10717 * If successful, consumes the copy object.
10718 * Otherwise, the caller is responsible for it.
10721 vm_map_copyout_size(
10723 vm_map_address_t
*dst_addr
, /* OUT */
10724 vm_map_copy_t copy
,
10725 vm_map_size_t copy_size
)
10727 return vm_map_copyout_internal(dst_map
, dst_addr
, copy
, copy_size
,
10728 TRUE
, /* consume_on_success */
10731 VM_INHERIT_DEFAULT
);
10735 * Routine: vm_map_copyout
10738 * Copy out a copy chain ("copy") into newly-allocated
10739 * space in the destination map.
10741 * If successful, consumes the copy object.
10742 * Otherwise, the caller is responsible for it.
10747 vm_map_address_t
*dst_addr
, /* OUT */
10748 vm_map_copy_t copy
)
10750 return vm_map_copyout_internal(dst_map
, dst_addr
, copy
, copy
? copy
->size
: 0,
10751 TRUE
, /* consume_on_success */
10754 VM_INHERIT_DEFAULT
);
10758 vm_map_copyout_internal(
10760 vm_map_address_t
*dst_addr
, /* OUT */
10761 vm_map_copy_t copy
,
10762 vm_map_size_t copy_size
,
10763 boolean_t consume_on_success
,
10764 vm_prot_t cur_protection
,
10765 vm_prot_t max_protection
,
10766 vm_inherit_t inheritance
)
10768 vm_map_size_t size
;
10769 vm_map_size_t adjustment
;
10770 vm_map_offset_t start
;
10771 vm_object_offset_t vm_copy_start
;
10772 vm_map_entry_t last
;
10773 vm_map_entry_t entry
;
10774 vm_map_entry_t hole_entry
;
10775 vm_map_copy_t original_copy
;
10778 * Check for null copy object.
10781 if (copy
== VM_MAP_COPY_NULL
) {
10783 return KERN_SUCCESS
;
10787 * Assert that the vm_map_copy is coming from the right
10788 * zone and hasn't been forged
10790 vm_map_copy_require(copy
);
10792 if (copy
->size
!= copy_size
) {
10794 return KERN_FAILURE
;
10798 * Check for special copy object, created
10799 * by vm_map_copyin_object.
10802 if (copy
->type
== VM_MAP_COPY_OBJECT
) {
10803 vm_object_t object
= copy
->cpy_object
;
10805 vm_object_offset_t offset
;
10807 offset
= vm_object_trunc_page(copy
->offset
);
10808 size
= vm_map_round_page((copy_size
+
10809 (vm_map_size_t
)(copy
->offset
-
10811 VM_MAP_PAGE_MASK(dst_map
));
10813 kr
= vm_map_enter(dst_map
, dst_addr
, size
,
10814 (vm_map_offset_t
) 0, VM_FLAGS_ANYWHERE
,
10815 VM_MAP_KERNEL_FLAGS_NONE
,
10816 VM_KERN_MEMORY_NONE
,
10817 object
, offset
, FALSE
,
10818 VM_PROT_DEFAULT
, VM_PROT_ALL
,
10819 VM_INHERIT_DEFAULT
);
10820 if (kr
!= KERN_SUCCESS
) {
10823 /* Account for non-pagealigned copy object */
10824 *dst_addr
+= (vm_map_offset_t
)(copy
->offset
- offset
);
10825 if (consume_on_success
) {
10826 zfree(vm_map_copy_zone
, copy
);
10828 return KERN_SUCCESS
;
10832 * Check for special kernel buffer allocated
10833 * by new_ipc_kmsg_copyin.
10836 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
10837 return vm_map_copyout_kernel_buffer(dst_map
, dst_addr
,
10838 copy
, copy_size
, FALSE
,
10839 consume_on_success
);
10842 original_copy
= copy
;
10843 if (copy
->cpy_hdr
.page_shift
!= VM_MAP_PAGE_SHIFT(dst_map
)) {
10845 vm_map_copy_t target_copy
;
10846 vm_map_offset_t overmap_start
, overmap_end
, trimmed_start
;
10848 target_copy
= VM_MAP_COPY_NULL
;
10849 DEBUG4K_ADJUST("adjusting...\n");
10850 kr
= vm_map_copy_adjust_to_target(
10853 copy
->size
, /* size */
10860 if (kr
!= KERN_SUCCESS
) {
10861 DEBUG4K_COPY("adjust failed 0x%x\n", kr
);
10864 DEBUG4K_COPY("copy %p (%d 0x%llx 0x%llx) dst_map %p (%d) target_copy %p (%d 0x%llx 0x%llx) overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx\n", copy
, copy
->cpy_hdr
.page_shift
, copy
->offset
, (uint64_t)copy
->size
, dst_map
, VM_MAP_PAGE_SHIFT(dst_map
), target_copy
, target_copy
->cpy_hdr
.page_shift
, target_copy
->offset
, (uint64_t)target_copy
->size
, (uint64_t)overmap_start
, (uint64_t)overmap_end
, (uint64_t)trimmed_start
);
10865 if (target_copy
!= copy
) {
10866 copy
= target_copy
;
10868 copy_size
= copy
->size
;
10872 * Find space for the data
10875 vm_copy_start
= vm_map_trunc_page((vm_map_size_t
)copy
->offset
,
10876 VM_MAP_COPY_PAGE_MASK(copy
));
10877 size
= vm_map_round_page((vm_map_size_t
)copy
->offset
+ copy_size
,
10878 VM_MAP_COPY_PAGE_MASK(copy
))
10884 vm_map_lock(dst_map
);
10885 if (dst_map
->disable_vmentry_reuse
== TRUE
) {
10886 VM_MAP_HIGHEST_ENTRY(dst_map
, entry
, start
);
10889 if (dst_map
->holelistenabled
) {
10890 hole_entry
= CAST_TO_VM_MAP_ENTRY(dst_map
->holes_list
);
10892 if (hole_entry
== NULL
) {
10894 * No more space in the map?
10896 vm_map_unlock(dst_map
);
10897 return KERN_NO_SPACE
;
10901 start
= last
->vme_start
;
10903 assert(first_free_is_valid(dst_map
));
10904 start
= ((last
= dst_map
->first_free
) == vm_map_to_entry(dst_map
)) ?
10905 vm_map_min(dst_map
) : last
->vme_end
;
10907 start
= vm_map_round_page(start
,
10908 VM_MAP_PAGE_MASK(dst_map
));
10912 vm_map_entry_t next
= last
->vme_next
;
10913 vm_map_offset_t end
= start
+ size
;
10915 if ((end
> dst_map
->max_offset
) || (end
< start
)) {
10916 if (dst_map
->wait_for_space
) {
10917 if (size
<= (dst_map
->max_offset
- dst_map
->min_offset
)) {
10918 assert_wait((event_t
) dst_map
,
10919 THREAD_INTERRUPTIBLE
);
10920 vm_map_unlock(dst_map
);
10921 thread_block(THREAD_CONTINUE_NULL
);
10925 vm_map_unlock(dst_map
);
10926 return KERN_NO_SPACE
;
10929 if (dst_map
->holelistenabled
) {
10930 if (last
->vme_end
>= end
) {
10935 * If there are no more entries, we must win.
10939 * If there is another entry, it must be
10940 * after the end of the potential new region.
10943 if (next
== vm_map_to_entry(dst_map
)) {
10947 if (next
->vme_start
>= end
) {
10954 if (dst_map
->holelistenabled
) {
10955 if (last
== CAST_TO_VM_MAP_ENTRY(dst_map
->holes_list
)) {
10959 vm_map_unlock(dst_map
);
10960 return KERN_NO_SPACE
;
10962 start
= last
->vme_start
;
10964 start
= last
->vme_end
;
10966 start
= vm_map_round_page(start
,
10967 VM_MAP_PAGE_MASK(dst_map
));
10970 if (dst_map
->holelistenabled
) {
10971 if (vm_map_lookup_entry(dst_map
, last
->vme_start
, &last
)) {
10972 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last
, (unsigned long long)last
->vme_start
);
10977 adjustment
= start
- vm_copy_start
;
10978 if (!consume_on_success
) {
10980 * We're not allowed to consume "copy", so we'll have to
10981 * copy its map entries into the destination map below.
10982 * No need to re-allocate map entries from the correct
10983 * (pageable or not) zone, since we'll get new map entries
10984 * during the transfer.
10985 * We'll also adjust the map entries's "start" and "end"
10986 * during the transfer, to keep "copy"'s entries consistent
10987 * with its "offset".
10989 goto after_adjustments
;
10993 * Since we're going to just drop the map
10994 * entries from the copy into the destination
10995 * map, they must come from the same pool.
10998 if (copy
->cpy_hdr
.entries_pageable
!= dst_map
->hdr
.entries_pageable
) {
11000 * Mismatches occur when dealing with the default
11004 vm_map_entry_t next
, new;
11007 * Find the zone that the copies were allocated from
11010 entry
= vm_map_copy_first_entry(copy
);
11013 * Reinitialize the copy so that vm_map_copy_entry_link
11016 vm_map_store_copy_reset(copy
, entry
);
11017 copy
->cpy_hdr
.entries_pageable
= dst_map
->hdr
.entries_pageable
;
11022 while (entry
!= vm_map_copy_to_entry(copy
)) {
11023 new = vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
11024 vm_map_entry_copy_full(new, entry
);
11025 new->vme_no_copy_on_read
= FALSE
;
11026 assert(!new->iokit_acct
);
11027 if (new->is_sub_map
) {
11028 /* clr address space specifics */
11029 new->use_pmap
= FALSE
;
11031 vm_map_copy_entry_link(copy
,
11032 vm_map_copy_last_entry(copy
),
11034 next
= entry
->vme_next
;
11035 old_zone
= entry
->from_reserved_zone
? vm_map_entry_reserved_zone
: vm_map_entry_zone
;
11036 zfree(old_zone
, entry
);
11042 * Adjust the addresses in the copy chain, and
11043 * reset the region attributes.
11046 for (entry
= vm_map_copy_first_entry(copy
);
11047 entry
!= vm_map_copy_to_entry(copy
);
11048 entry
= entry
->vme_next
) {
11049 if (VM_MAP_PAGE_SHIFT(dst_map
) == PAGE_SHIFT
) {
11051 * We're injecting this copy entry into a map that
11052 * has the standard page alignment, so clear
11053 * "map_aligned" (which might have been inherited
11054 * from the original map entry).
11056 entry
->map_aligned
= FALSE
;
11059 entry
->vme_start
+= adjustment
;
11060 entry
->vme_end
+= adjustment
;
11062 if (entry
->map_aligned
) {
11063 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_start
,
11064 VM_MAP_PAGE_MASK(dst_map
)));
11065 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_end
,
11066 VM_MAP_PAGE_MASK(dst_map
)));
11069 entry
->inheritance
= VM_INHERIT_DEFAULT
;
11070 entry
->protection
= VM_PROT_DEFAULT
;
11071 entry
->max_protection
= VM_PROT_ALL
;
11072 entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
11075 * If the entry is now wired,
11076 * map the pages into the destination map.
11078 if (entry
->wired_count
!= 0) {
11079 vm_map_offset_t va
;
11080 vm_object_offset_t offset
;
11081 vm_object_t object
;
11085 /* TODO4K would need to use actual page size */
11086 assert(VM_MAP_PAGE_SHIFT(dst_map
) == PAGE_SHIFT
);
11088 object
= VME_OBJECT(entry
);
11089 offset
= VME_OFFSET(entry
);
11090 va
= entry
->vme_start
;
11092 pmap_pageable(dst_map
->pmap
,
11097 while (va
< entry
->vme_end
) {
11099 struct vm_object_fault_info fault_info
= {};
11102 * Look up the page in the object.
11103 * Assert that the page will be found in the
11106 * the object was newly created by
11107 * vm_object_copy_slowly, and has
11108 * copies of all of the pages from
11109 * the source object
11111 * the object was moved from the old
11112 * map entry; because the old map
11113 * entry was wired, all of the pages
11114 * were in the top-level object.
11115 * (XXX not true if we wire pages for
11118 vm_object_lock(object
);
11120 m
= vm_page_lookup(object
, offset
);
11121 if (m
== VM_PAGE_NULL
|| !VM_PAGE_WIRED(m
) ||
11123 panic("vm_map_copyout: wiring %p", m
);
11126 prot
= entry
->protection
;
11128 if (override_nx(dst_map
, VME_ALIAS(entry
)) &&
11130 prot
|= VM_PROT_EXECUTE
;
11133 type_of_fault
= DBG_CACHE_HIT_FAULT
;
11135 fault_info
.user_tag
= VME_ALIAS(entry
);
11136 fault_info
.pmap_options
= 0;
11137 if (entry
->iokit_acct
||
11138 (!entry
->is_sub_map
&& !entry
->use_pmap
)) {
11139 fault_info
.pmap_options
|= PMAP_OPTIONS_ALT_ACCT
;
11149 FALSE
, /* change_wiring */
11150 VM_KERN_MEMORY_NONE
, /* tag - not wiring */
11152 NULL
, /* need_retry */
11155 vm_object_unlock(object
);
11157 offset
+= PAGE_SIZE_64
;
11166 * Correct the page alignment for the result
11169 *dst_addr
= start
+ (copy
->offset
- vm_copy_start
);
11172 kasan_notify_address(*dst_addr
, size
);
11176 * Update the hints and the map size
11179 if (consume_on_success
) {
11180 SAVE_HINT_MAP_WRITE(dst_map
, vm_map_copy_last_entry(copy
));
11182 SAVE_HINT_MAP_WRITE(dst_map
, last
);
11185 dst_map
->size
+= size
;
11191 if (consume_on_success
) {
11192 vm_map_copy_insert(dst_map
, last
, copy
);
11193 if (copy
!= original_copy
) {
11194 vm_map_copy_discard(original_copy
);
11195 original_copy
= VM_MAP_COPY_NULL
;
11198 vm_map_copy_remap(dst_map
, last
, copy
, adjustment
,
11199 cur_protection
, max_protection
,
11201 if (copy
!= original_copy
&& original_copy
!= VM_MAP_COPY_NULL
) {
11202 vm_map_copy_discard(copy
);
11203 copy
= original_copy
;
11208 vm_map_unlock(dst_map
);
11211 * XXX If wiring_required, call vm_map_pageable
11214 return KERN_SUCCESS
;
11218 * Routine: vm_map_copyin
11221 * see vm_map_copyin_common. Exported via Unsupported.exports.
11225 #undef vm_map_copyin
11230 vm_map_address_t src_addr
,
11232 boolean_t src_destroy
,
11233 vm_map_copy_t
*copy_result
) /* OUT */
11235 return vm_map_copyin_common(src_map
, src_addr
, len
, src_destroy
,
11236 FALSE
, copy_result
, FALSE
);
11240 * Routine: vm_map_copyin_common
11243 * Copy the specified region (src_addr, len) from the
11244 * source address space (src_map), possibly removing
11245 * the region from the source address space (src_destroy).
11248 * A vm_map_copy_t object (copy_result), suitable for
11249 * insertion into another address space (using vm_map_copyout),
11250 * copying over another address space region (using
11251 * vm_map_copy_overwrite). If the copy is unused, it
11252 * should be destroyed (using vm_map_copy_discard).
11254 * In/out conditions:
11255 * The source map should not be locked on entry.
11258 typedef struct submap_map
{
11259 vm_map_t parent_map
;
11260 vm_map_offset_t base_start
;
11261 vm_map_offset_t base_end
;
11262 vm_map_size_t base_len
;
11263 struct submap_map
*next
;
11267 vm_map_copyin_common(
11269 vm_map_address_t src_addr
,
11271 boolean_t src_destroy
,
11272 __unused boolean_t src_volatile
,
11273 vm_map_copy_t
*copy_result
, /* OUT */
11274 boolean_t use_maxprot
)
11280 flags
|= VM_MAP_COPYIN_SRC_DESTROY
;
11283 flags
|= VM_MAP_COPYIN_USE_MAXPROT
;
11285 return vm_map_copyin_internal(src_map
,
11292 vm_map_copyin_internal(
11294 vm_map_address_t src_addr
,
11297 vm_map_copy_t
*copy_result
) /* OUT */
11299 vm_map_entry_t tmp_entry
; /* Result of last map lookup --
11300 * in multi-level lookup, this
11301 * entry contains the actual
11302 * vm_object/offset.
11304 vm_map_entry_t new_entry
= VM_MAP_ENTRY_NULL
; /* Map entry for copy */
11306 vm_map_offset_t src_start
; /* Start of current entry --
11307 * where copy is taking place now
11309 vm_map_offset_t src_end
; /* End of entire region to be
11311 vm_map_offset_t src_base
;
11312 vm_map_t base_map
= src_map
;
11313 boolean_t map_share
= FALSE
;
11314 submap_map_t
*parent_maps
= NULL
;
11316 vm_map_copy_t copy
; /* Resulting copy */
11317 vm_map_address_t copy_addr
;
11318 vm_map_size_t copy_size
;
11319 boolean_t src_destroy
;
11320 boolean_t use_maxprot
;
11321 boolean_t preserve_purgeable
;
11322 boolean_t entry_was_shared
;
11323 vm_map_entry_t saved_src_entry
;
11325 if (flags
& ~VM_MAP_COPYIN_ALL_FLAGS
) {
11326 return KERN_INVALID_ARGUMENT
;
11329 src_destroy
= (flags
& VM_MAP_COPYIN_SRC_DESTROY
) ? TRUE
: FALSE
;
11330 use_maxprot
= (flags
& VM_MAP_COPYIN_USE_MAXPROT
) ? TRUE
: FALSE
;
11331 preserve_purgeable
=
11332 (flags
& VM_MAP_COPYIN_PRESERVE_PURGEABLE
) ? TRUE
: FALSE
;
11335 * Check for copies of zero bytes.
11339 *copy_result
= VM_MAP_COPY_NULL
;
11340 return KERN_SUCCESS
;
11344 * Check that the end address doesn't overflow
11346 src_end
= src_addr
+ len
;
11347 if (src_end
< src_addr
) {
11348 return KERN_INVALID_ADDRESS
;
11352 * Compute (page aligned) start and end of region
11354 src_start
= vm_map_trunc_page(src_addr
,
11355 VM_MAP_PAGE_MASK(src_map
));
11356 src_end
= vm_map_round_page(src_end
,
11357 VM_MAP_PAGE_MASK(src_map
));
11360 * If the copy is sufficiently small, use a kernel buffer instead
11361 * of making a virtual copy. The theory being that the cost of
11362 * setting up VM (and taking C-O-W faults) dominates the copy costs
11363 * for small regions.
11365 if ((len
< msg_ool_size_small
) &&
11367 !preserve_purgeable
&&
11368 !(flags
& VM_MAP_COPYIN_ENTRY_LIST
) &&
11370 * Since the "msg_ool_size_small" threshold was increased and
11371 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11372 * address space limits, we revert to doing a virtual copy if the
11373 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11374 * of the commpage would now fail when it used to work.
11376 (src_start
>= vm_map_min(src_map
) &&
11377 src_start
< vm_map_max(src_map
) &&
11378 src_end
>= vm_map_min(src_map
) &&
11379 src_end
< vm_map_max(src_map
))) {
11380 return vm_map_copyin_kernel_buffer(src_map
, src_addr
, len
,
11381 src_destroy
, copy_result
);
11385 * Allocate a header element for the list.
11387 * Use the start and end in the header to
11388 * remember the endpoints prior to rounding.
11391 copy
= vm_map_copy_allocate();
11392 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
11393 copy
->cpy_hdr
.entries_pageable
= TRUE
;
11394 copy
->cpy_hdr
.page_shift
= VM_MAP_PAGE_SHIFT(src_map
);
11396 vm_map_store_init( &(copy
->cpy_hdr
));
11398 copy
->offset
= src_addr
;
11401 new_entry
= vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
11403 #define RETURN(x) \
11405 vm_map_unlock(src_map); \
11406 if(src_map != base_map) \
11407 vm_map_deallocate(src_map); \
11408 if (new_entry != VM_MAP_ENTRY_NULL) \
11409 vm_map_copy_entry_dispose(copy,new_entry); \
11410 vm_map_copy_discard(copy); \
11412 submap_map_t *_ptr; \
11414 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11415 parent_maps=parent_maps->next; \
11416 if (_ptr->parent_map != base_map) \
11417 vm_map_deallocate(_ptr->parent_map); \
11418 kfree(_ptr, sizeof(submap_map_t)); \
11425 * Find the beginning of the region.
11428 vm_map_lock(src_map
);
11431 * Lookup the original "src_addr" rather than the truncated
11432 * "src_start", in case "src_start" falls in a non-map-aligned
11433 * map entry *before* the map entry that contains "src_addr"...
11435 if (!vm_map_lookup_entry(src_map
, src_addr
, &tmp_entry
)) {
11436 RETURN(KERN_INVALID_ADDRESS
);
11438 if (!tmp_entry
->is_sub_map
) {
11440 * ... but clip to the map-rounded "src_start" rather than
11441 * "src_addr" to preserve map-alignment. We'll adjust the
11442 * first copy entry at the end, if needed.
11444 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
11446 if (src_start
< tmp_entry
->vme_start
) {
11448 * Move "src_start" up to the start of the
11449 * first map entry to copy.
11451 src_start
= tmp_entry
->vme_start
;
11453 /* set for later submap fix-up */
11454 copy_addr
= src_start
;
11457 * Go through entries until we get to the end.
11461 vm_map_entry_t src_entry
= tmp_entry
; /* Top-level entry */
11462 vm_map_size_t src_size
; /* Size of source
11463 * map entry (in both
11467 vm_object_t src_object
; /* Object to copy */
11468 vm_object_offset_t src_offset
;
11470 boolean_t src_needs_copy
; /* Should source map
11471 * be made read-only
11472 * for copy-on-write?
11475 boolean_t new_entry_needs_copy
; /* Will new entry be COW? */
11477 boolean_t was_wired
; /* Was source wired? */
11478 vm_map_version_t version
; /* Version before locks
11479 * dropped to make copy
11481 kern_return_t result
; /* Return value from
11482 * copy_strategically.
11484 while (tmp_entry
->is_sub_map
) {
11485 vm_map_size_t submap_len
;
11488 ptr
= (submap_map_t
*)kalloc(sizeof(submap_map_t
));
11489 ptr
->next
= parent_maps
;
11491 ptr
->parent_map
= src_map
;
11492 ptr
->base_start
= src_start
;
11493 ptr
->base_end
= src_end
;
11494 submap_len
= tmp_entry
->vme_end
- src_start
;
11495 if (submap_len
> (src_end
- src_start
)) {
11496 submap_len
= src_end
- src_start
;
11498 ptr
->base_len
= submap_len
;
11500 src_start
-= tmp_entry
->vme_start
;
11501 src_start
+= VME_OFFSET(tmp_entry
);
11502 src_end
= src_start
+ submap_len
;
11503 src_map
= VME_SUBMAP(tmp_entry
);
11504 vm_map_lock(src_map
);
11505 /* keep an outstanding reference for all maps in */
11506 /* the parents tree except the base map */
11507 vm_map_reference(src_map
);
11508 vm_map_unlock(ptr
->parent_map
);
11509 if (!vm_map_lookup_entry(
11510 src_map
, src_start
, &tmp_entry
)) {
11511 RETURN(KERN_INVALID_ADDRESS
);
11514 if (!tmp_entry
->is_sub_map
) {
11515 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
11517 src_entry
= tmp_entry
;
11519 /* we are now in the lowest level submap... */
11521 if ((VME_OBJECT(tmp_entry
) != VM_OBJECT_NULL
) &&
11522 (VME_OBJECT(tmp_entry
)->phys_contiguous
)) {
11523 /* This is not, supported for now.In future */
11524 /* we will need to detect the phys_contig */
11525 /* condition and then upgrade copy_slowly */
11526 /* to do physical copy from the device mem */
11527 /* based object. We can piggy-back off of */
11528 /* the was wired boolean to set-up the */
11529 /* proper handling */
11530 RETURN(KERN_PROTECTION_FAILURE
);
11533 * Create a new address map entry to hold the result.
11534 * Fill in the fields from the appropriate source entries.
11535 * We must unlock the source map to do this if we need
11536 * to allocate a map entry.
11538 if (new_entry
== VM_MAP_ENTRY_NULL
) {
11539 version
.main_timestamp
= src_map
->timestamp
;
11540 vm_map_unlock(src_map
);
11542 new_entry
= vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
11544 vm_map_lock(src_map
);
11545 if ((version
.main_timestamp
+ 1) != src_map
->timestamp
) {
11546 if (!vm_map_lookup_entry(src_map
, src_start
,
11548 RETURN(KERN_INVALID_ADDRESS
);
11550 if (!tmp_entry
->is_sub_map
) {
11551 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
11553 continue; /* restart w/ new tmp_entry */
11558 * Verify that the region can be read.
11560 if (((src_entry
->protection
& VM_PROT_READ
) == VM_PROT_NONE
&&
11562 (src_entry
->max_protection
& VM_PROT_READ
) == 0) {
11563 RETURN(KERN_PROTECTION_FAILURE
);
11567 * Clip against the endpoints of the entire region.
11570 vm_map_clip_end(src_map
, src_entry
, src_end
);
11572 src_size
= src_entry
->vme_end
- src_start
;
11573 src_object
= VME_OBJECT(src_entry
);
11574 src_offset
= VME_OFFSET(src_entry
);
11575 was_wired
= (src_entry
->wired_count
!= 0);
11577 vm_map_entry_copy(src_map
, new_entry
, src_entry
);
11578 if (new_entry
->is_sub_map
) {
11579 /* clr address space specifics */
11580 new_entry
->use_pmap
= FALSE
;
11583 * We're dealing with a copy-on-write operation,
11584 * so the resulting mapping should not inherit the
11585 * original mapping's accounting settings.
11586 * "iokit_acct" should have been cleared in
11587 * vm_map_entry_copy().
11588 * "use_pmap" should be reset to its default (TRUE)
11589 * so that the new mapping gets accounted for in
11590 * the task's memory footprint.
11592 assert(!new_entry
->iokit_acct
);
11593 new_entry
->use_pmap
= TRUE
;
11597 * Attempt non-blocking copy-on-write optimizations.
11601 * If we are destroying the source, and the object
11602 * is internal, we could move the object reference
11603 * from the source to the copy. The copy is
11604 * copy-on-write only if the source is.
11605 * We make another reference to the object, because
11606 * destroying the source entry will deallocate it.
11608 * This memory transfer has to be atomic, (to prevent
11609 * the VM object from being shared or copied while
11610 * it's being moved here), so we could only do this
11611 * if we won't have to unlock the VM map until the
11612 * original mapping has been fully removed.
11616 if ((src_object
== VM_OBJECT_NULL
||
11617 (!was_wired
&& !map_share
&& !tmp_entry
->is_shared
11618 && !(debug4k_no_cow_copyin
&& VM_MAP_PAGE_SHIFT(src_map
) < PAGE_SHIFT
))) &&
11619 vm_object_copy_quickly(
11620 VME_OBJECT_PTR(new_entry
),
11624 &new_entry_needs_copy
)) {
11625 new_entry
->needs_copy
= new_entry_needs_copy
;
11628 * Handle copy-on-write obligations
11631 if (src_needs_copy
&& !tmp_entry
->needs_copy
) {
11634 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
11636 if (override_nx(src_map
, VME_ALIAS(src_entry
))
11638 prot
|= VM_PROT_EXECUTE
;
11641 vm_object_pmap_protect(
11645 (src_entry
->is_shared
?
11648 VM_MAP_PAGE_SIZE(src_map
),
11649 src_entry
->vme_start
,
11652 assert(tmp_entry
->wired_count
== 0);
11653 tmp_entry
->needs_copy
= TRUE
;
11657 * The map has never been unlocked, so it's safe
11658 * to move to the next entry rather than doing
11662 goto CopySuccessful
;
11665 entry_was_shared
= tmp_entry
->is_shared
;
11668 * Take an object reference, so that we may
11669 * release the map lock(s).
11672 assert(src_object
!= VM_OBJECT_NULL
);
11673 vm_object_reference(src_object
);
11676 * Record the timestamp for later verification.
11680 version
.main_timestamp
= src_map
->timestamp
;
11681 vm_map_unlock(src_map
); /* Increments timestamp once! */
11682 saved_src_entry
= src_entry
;
11683 tmp_entry
= VM_MAP_ENTRY_NULL
;
11684 src_entry
= VM_MAP_ENTRY_NULL
;
11691 (debug4k_no_cow_copyin
&&
11692 VM_MAP_PAGE_SHIFT(src_map
) < PAGE_SHIFT
)) {
11694 vm_object_lock(src_object
);
11695 result
= vm_object_copy_slowly(
11700 VME_OBJECT_PTR(new_entry
));
11701 VME_OFFSET_SET(new_entry
,
11702 src_offset
- vm_object_trunc_page(src_offset
));
11703 new_entry
->needs_copy
= FALSE
;
11704 } else if (src_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
&&
11705 (entry_was_shared
|| map_share
)) {
11706 vm_object_t new_object
;
11708 vm_object_lock_shared(src_object
);
11709 new_object
= vm_object_copy_delayed(
11714 if (new_object
== VM_OBJECT_NULL
) {
11718 VME_OBJECT_SET(new_entry
, new_object
);
11719 assert(new_entry
->wired_count
== 0);
11720 new_entry
->needs_copy
= TRUE
;
11721 assert(!new_entry
->iokit_acct
);
11722 assert(new_object
->purgable
== VM_PURGABLE_DENY
);
11723 assertf(new_entry
->use_pmap
, "src_map %p new_entry %p\n", src_map
, new_entry
);
11724 result
= KERN_SUCCESS
;
11726 vm_object_offset_t new_offset
;
11727 new_offset
= VME_OFFSET(new_entry
);
11728 result
= vm_object_copy_strategically(src_object
,
11731 VME_OBJECT_PTR(new_entry
),
11733 &new_entry_needs_copy
);
11734 if (new_offset
!= VME_OFFSET(new_entry
)) {
11735 VME_OFFSET_SET(new_entry
, new_offset
);
11738 new_entry
->needs_copy
= new_entry_needs_copy
;
11741 if (result
== KERN_SUCCESS
&&
11742 ((preserve_purgeable
&&
11743 src_object
->purgable
!= VM_PURGABLE_DENY
) ||
11744 new_entry
->used_for_jit
)) {
11746 * Purgeable objects should be COPY_NONE, true share;
11747 * this should be propogated to the copy.
11749 * Also force mappings the pmap specially protects to
11750 * be COPY_NONE; trying to COW these mappings would
11751 * change the effective protections, which could have
11752 * side effects if the pmap layer relies on the
11753 * specified protections.
11756 vm_object_t new_object
;
11758 new_object
= VME_OBJECT(new_entry
);
11759 assert(new_object
!= src_object
);
11760 vm_object_lock(new_object
);
11761 assert(new_object
->ref_count
== 1);
11762 assert(new_object
->shadow
== VM_OBJECT_NULL
);
11763 assert(new_object
->copy
== VM_OBJECT_NULL
);
11764 assert(new_object
->vo_owner
== NULL
);
11766 new_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
11768 if (preserve_purgeable
&&
11769 src_object
->purgable
!= VM_PURGABLE_DENY
) {
11770 new_object
->true_share
= TRUE
;
11772 /* start as non-volatile with no owner... */
11773 new_object
->purgable
= VM_PURGABLE_NONVOLATILE
;
11774 vm_purgeable_nonvolatile_enqueue(new_object
, NULL
);
11775 /* ... and move to src_object's purgeable state */
11776 if (src_object
->purgable
!= VM_PURGABLE_NONVOLATILE
) {
11778 state
= src_object
->purgable
;
11779 vm_object_purgable_control(
11781 VM_PURGABLE_SET_STATE_FROM_KERNEL
,
11784 /* no pmap accounting for purgeable objects */
11785 new_entry
->use_pmap
= FALSE
;
11788 vm_object_unlock(new_object
);
11789 new_object
= VM_OBJECT_NULL
;
11792 if (result
!= KERN_SUCCESS
&&
11793 result
!= KERN_MEMORY_RESTART_COPY
) {
11794 vm_map_lock(src_map
);
11799 * Throw away the extra reference
11802 vm_object_deallocate(src_object
);
11805 * Verify that the map has not substantially
11806 * changed while the copy was being made.
11809 vm_map_lock(src_map
);
11811 if ((version
.main_timestamp
+ 1) == src_map
->timestamp
) {
11812 /* src_map hasn't changed: src_entry is still valid */
11813 src_entry
= saved_src_entry
;
11814 goto VerificationSuccessful
;
11818 * Simple version comparison failed.
11820 * Retry the lookup and verify that the
11821 * same object/offset are still present.
11823 * [Note: a memory manager that colludes with
11824 * the calling task can detect that we have
11825 * cheated. While the map was unlocked, the
11826 * mapping could have been changed and restored.]
11829 if (!vm_map_lookup_entry(src_map
, src_start
, &tmp_entry
)) {
11830 if (result
!= KERN_MEMORY_RESTART_COPY
) {
11831 vm_object_deallocate(VME_OBJECT(new_entry
));
11832 VME_OBJECT_SET(new_entry
, VM_OBJECT_NULL
);
11833 /* reset accounting state */
11834 new_entry
->iokit_acct
= FALSE
;
11835 new_entry
->use_pmap
= TRUE
;
11837 RETURN(KERN_INVALID_ADDRESS
);
11840 src_entry
= tmp_entry
;
11841 vm_map_clip_start(src_map
, src_entry
, src_start
);
11843 if ((((src_entry
->protection
& VM_PROT_READ
) == VM_PROT_NONE
) &&
11845 ((src_entry
->max_protection
& VM_PROT_READ
) == 0)) {
11846 goto VerificationFailed
;
11849 if (src_entry
->vme_end
< new_entry
->vme_end
) {
11851 * This entry might have been shortened
11852 * (vm_map_clip_end) or been replaced with
11853 * an entry that ends closer to "src_start"
11855 * Adjust "new_entry" accordingly; copying
11856 * less memory would be correct but we also
11857 * redo the copy (see below) if the new entry
11858 * no longer points at the same object/offset.
11860 assert(VM_MAP_PAGE_ALIGNED(src_entry
->vme_end
,
11861 VM_MAP_COPY_PAGE_MASK(copy
)));
11862 new_entry
->vme_end
= src_entry
->vme_end
;
11863 src_size
= new_entry
->vme_end
- src_start
;
11864 } else if (src_entry
->vme_end
> new_entry
->vme_end
) {
11866 * This entry might have been extended
11867 * (vm_map_entry_simplify() or coalesce)
11868 * or been replaced with an entry that ends farther
11869 * from "src_start" than before.
11871 * We've called vm_object_copy_*() only on
11872 * the previous <start:end> range, so we can't
11873 * just extend new_entry. We have to re-do
11874 * the copy based on the new entry as if it was
11875 * pointing at a different object/offset (see
11876 * "Verification failed" below).
11880 if ((VME_OBJECT(src_entry
) != src_object
) ||
11881 (VME_OFFSET(src_entry
) != src_offset
) ||
11882 (src_entry
->vme_end
> new_entry
->vme_end
)) {
11884 * Verification failed.
11886 * Start over with this top-level entry.
11889 VerificationFailed
: ;
11891 vm_object_deallocate(VME_OBJECT(new_entry
));
11892 tmp_entry
= src_entry
;
11897 * Verification succeeded.
11900 VerificationSuccessful
:;
11902 if (result
== KERN_MEMORY_RESTART_COPY
) {
11913 * Link in the new copy entry.
11916 vm_map_copy_entry_link(copy
, vm_map_copy_last_entry(copy
),
11920 * Determine whether the entire region
11923 src_base
= src_start
;
11924 src_start
= new_entry
->vme_end
;
11925 new_entry
= VM_MAP_ENTRY_NULL
;
11926 while ((src_start
>= src_end
) && (src_end
!= 0)) {
11929 if (src_map
== base_map
) {
11930 /* back to the top */
11935 assert(ptr
!= NULL
);
11936 parent_maps
= parent_maps
->next
;
11938 /* fix up the damage we did in that submap */
11939 vm_map_simplify_range(src_map
,
11943 vm_map_unlock(src_map
);
11944 vm_map_deallocate(src_map
);
11945 vm_map_lock(ptr
->parent_map
);
11946 src_map
= ptr
->parent_map
;
11947 src_base
= ptr
->base_start
;
11948 src_start
= ptr
->base_start
+ ptr
->base_len
;
11949 src_end
= ptr
->base_end
;
11950 if (!vm_map_lookup_entry(src_map
,
11953 (src_end
> src_start
)) {
11954 RETURN(KERN_INVALID_ADDRESS
);
11956 kfree(ptr
, sizeof(submap_map_t
));
11957 if (parent_maps
== NULL
) {
11960 src_entry
= tmp_entry
->vme_prev
;
11963 if ((VM_MAP_PAGE_SHIFT(src_map
) != PAGE_SHIFT
) &&
11964 (src_start
>= src_addr
+ len
) &&
11965 (src_addr
+ len
!= 0)) {
11967 * Stop copying now, even though we haven't reached
11968 * "src_end". We'll adjust the end of the last copy
11969 * entry at the end, if needed.
11971 * If src_map's aligment is different from the
11972 * system's page-alignment, there could be
11973 * extra non-map-aligned map entries between
11974 * the original (non-rounded) "src_addr + len"
11975 * and the rounded "src_end".
11976 * We do not want to copy those map entries since
11977 * they're not part of the copied range.
11982 if ((src_start
>= src_end
) && (src_end
!= 0)) {
11987 * Verify that there are no gaps in the region
11990 tmp_entry
= src_entry
->vme_next
;
11991 if ((tmp_entry
->vme_start
!= src_start
) ||
11992 (tmp_entry
== vm_map_to_entry(src_map
))) {
11993 RETURN(KERN_INVALID_ADDRESS
);
11998 * If the source should be destroyed, do it now, since the
11999 * copy was successful.
12002 (void) vm_map_delete(
12004 vm_map_trunc_page(src_addr
,
12005 VM_MAP_PAGE_MASK(src_map
)),
12007 ((src_map
== kernel_map
) ?
12008 VM_MAP_REMOVE_KUNWIRE
:
12009 VM_MAP_REMOVE_NO_FLAGS
),
12012 /* fix up the damage we did in the base map */
12013 vm_map_simplify_range(
12015 vm_map_trunc_page(src_addr
,
12016 VM_MAP_PAGE_MASK(src_map
)),
12017 vm_map_round_page(src_end
,
12018 VM_MAP_PAGE_MASK(src_map
)));
12021 vm_map_unlock(src_map
);
12022 tmp_entry
= VM_MAP_ENTRY_NULL
;
12024 if (VM_MAP_PAGE_SHIFT(src_map
) > PAGE_SHIFT
&&
12025 VM_MAP_PAGE_SHIFT(src_map
) != VM_MAP_COPY_PAGE_SHIFT(copy
)) {
12026 vm_map_offset_t original_start
, original_offset
, original_end
;
12028 assert(VM_MAP_COPY_PAGE_MASK(copy
) == PAGE_MASK
);
12030 /* adjust alignment of first copy_entry's "vme_start" */
12031 tmp_entry
= vm_map_copy_first_entry(copy
);
12032 if (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
12033 vm_map_offset_t adjustment
;
12035 original_start
= tmp_entry
->vme_start
;
12036 original_offset
= VME_OFFSET(tmp_entry
);
12038 /* map-align the start of the first copy entry... */
12039 adjustment
= (tmp_entry
->vme_start
-
12041 tmp_entry
->vme_start
,
12042 VM_MAP_PAGE_MASK(src_map
)));
12043 tmp_entry
->vme_start
-= adjustment
;
12044 VME_OFFSET_SET(tmp_entry
,
12045 VME_OFFSET(tmp_entry
) - adjustment
);
12046 copy_addr
-= adjustment
;
12047 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
12048 /* ... adjust for mis-aligned start of copy range */
12050 (vm_map_trunc_page(copy
->offset
,
12052 vm_map_trunc_page(copy
->offset
,
12053 VM_MAP_PAGE_MASK(src_map
)));
12055 assert(page_aligned(adjustment
));
12056 assert(adjustment
< VM_MAP_PAGE_SIZE(src_map
));
12057 tmp_entry
->vme_start
+= adjustment
;
12058 VME_OFFSET_SET(tmp_entry
,
12059 (VME_OFFSET(tmp_entry
) +
12061 copy_addr
+= adjustment
;
12062 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
12066 * Assert that the adjustments haven't exposed
12067 * more than was originally copied...
12069 assert(tmp_entry
->vme_start
>= original_start
);
12070 assert(VME_OFFSET(tmp_entry
) >= original_offset
);
12072 * ... and that it did not adjust outside of a
12073 * a single 16K page.
12075 assert(vm_map_trunc_page(tmp_entry
->vme_start
,
12076 VM_MAP_PAGE_MASK(src_map
)) ==
12077 vm_map_trunc_page(original_start
,
12078 VM_MAP_PAGE_MASK(src_map
)));
12081 /* adjust alignment of last copy_entry's "vme_end" */
12082 tmp_entry
= vm_map_copy_last_entry(copy
);
12083 if (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
12084 vm_map_offset_t adjustment
;
12086 original_end
= tmp_entry
->vme_end
;
12088 /* map-align the end of the last copy entry... */
12089 tmp_entry
->vme_end
=
12090 vm_map_round_page(tmp_entry
->vme_end
,
12091 VM_MAP_PAGE_MASK(src_map
));
12092 /* ... adjust for mis-aligned end of copy range */
12094 (vm_map_round_page((copy
->offset
+
12096 VM_MAP_PAGE_MASK(src_map
)) -
12097 vm_map_round_page((copy
->offset
+
12101 assert(page_aligned(adjustment
));
12102 assert(adjustment
< VM_MAP_PAGE_SIZE(src_map
));
12103 tmp_entry
->vme_end
-= adjustment
;
12104 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
12108 * Assert that the adjustments haven't exposed
12109 * more than was originally copied...
12111 assert(tmp_entry
->vme_end
<= original_end
);
12113 * ... and that it did not adjust outside of a
12114 * a single 16K page.
12116 assert(vm_map_round_page(tmp_entry
->vme_end
,
12117 VM_MAP_PAGE_MASK(src_map
)) ==
12118 vm_map_round_page(original_end
,
12119 VM_MAP_PAGE_MASK(src_map
)));
12123 /* Fix-up start and end points in copy. This is necessary */
12124 /* when the various entries in the copy object were picked */
12125 /* up from different sub-maps */
12127 tmp_entry
= vm_map_copy_first_entry(copy
);
12128 copy_size
= 0; /* compute actual size */
12129 while (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
12130 assert(VM_MAP_PAGE_ALIGNED(
12131 copy_addr
+ (tmp_entry
->vme_end
-
12132 tmp_entry
->vme_start
),
12133 MIN(VM_MAP_COPY_PAGE_MASK(copy
), PAGE_MASK
)));
12134 assert(VM_MAP_PAGE_ALIGNED(
12136 MIN(VM_MAP_COPY_PAGE_MASK(copy
), PAGE_MASK
)));
12139 * The copy_entries will be injected directly into the
12140 * destination map and might not be "map aligned" there...
12142 tmp_entry
->map_aligned
= FALSE
;
12144 tmp_entry
->vme_end
= copy_addr
+
12145 (tmp_entry
->vme_end
- tmp_entry
->vme_start
);
12146 tmp_entry
->vme_start
= copy_addr
;
12147 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
12148 copy_addr
+= tmp_entry
->vme_end
- tmp_entry
->vme_start
;
12149 copy_size
+= tmp_entry
->vme_end
- tmp_entry
->vme_start
;
12150 tmp_entry
= (struct vm_map_entry
*)tmp_entry
->vme_next
;
12153 if (VM_MAP_PAGE_SHIFT(src_map
) != PAGE_SHIFT
&&
12154 copy_size
< copy
->size
) {
12156 * The actual size of the VM map copy is smaller than what
12157 * was requested by the caller. This must be because some
12158 * PAGE_SIZE-sized pages are missing at the end of the last
12159 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
12160 * The caller might not have been aware of those missing
12161 * pages and might not want to be aware of it, which is
12162 * fine as long as they don't try to access (and crash on)
12163 * those missing pages.
12164 * Let's adjust the size of the "copy", to avoid failing
12165 * in vm_map_copyout() or vm_map_copy_overwrite().
12167 assert(vm_map_round_page(copy_size
,
12168 VM_MAP_PAGE_MASK(src_map
)) ==
12169 vm_map_round_page(copy
->size
,
12170 VM_MAP_PAGE_MASK(src_map
)));
12171 copy
->size
= copy_size
;
12174 *copy_result
= copy
;
12175 return KERN_SUCCESS
;
12181 vm_map_copy_extract(
12183 vm_map_address_t src_addr
,
12186 vm_map_copy_t
*copy_result
, /* OUT */
12187 vm_prot_t
*cur_prot
, /* IN/OUT */
12188 vm_prot_t
*max_prot
, /* IN/OUT */
12189 vm_inherit_t inheritance
,
12190 vm_map_kernel_flags_t vmk_flags
)
12192 vm_map_copy_t copy
;
12194 vm_prot_t required_cur_prot
, required_max_prot
;
12197 * Check for copies of zero bytes.
12201 *copy_result
= VM_MAP_COPY_NULL
;
12202 return KERN_SUCCESS
;
12206 * Check that the end address doesn't overflow
12208 if (src_addr
+ len
< src_addr
) {
12209 return KERN_INVALID_ADDRESS
;
12212 if (VM_MAP_PAGE_SIZE(src_map
) < PAGE_SIZE
) {
12213 DEBUG4K_SHARE("src_map %p src_addr 0x%llx src_end 0x%llx\n", src_map
, (uint64_t)src_addr
, (uint64_t)(src_addr
+ len
));
12216 required_cur_prot
= *cur_prot
;
12217 required_max_prot
= *max_prot
;
12220 * Allocate a header element for the list.
12222 * Use the start and end in the header to
12223 * remember the endpoints prior to rounding.
12226 copy
= vm_map_copy_allocate();
12227 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
12228 copy
->cpy_hdr
.entries_pageable
= vmk_flags
.vmkf_copy_pageable
;
12230 vm_map_store_init(©
->cpy_hdr
);
12235 kr
= vm_map_remap_extract(src_map
,
12238 do_copy
, /* copy */
12240 cur_prot
, /* IN/OUT */
12241 max_prot
, /* IN/OUT */
12244 if (kr
!= KERN_SUCCESS
) {
12245 vm_map_copy_discard(copy
);
12248 if (required_cur_prot
!= VM_PROT_NONE
) {
12249 assert((*cur_prot
& required_cur_prot
) == required_cur_prot
);
12250 assert((*max_prot
& required_max_prot
) == required_max_prot
);
12253 *copy_result
= copy
;
12254 return KERN_SUCCESS
;
12258 * vm_map_copyin_object:
12260 * Create a copy object from an object.
12261 * Our caller donates an object reference.
12265 vm_map_copyin_object(
12266 vm_object_t object
,
12267 vm_object_offset_t offset
, /* offset of region in object */
12268 vm_object_size_t size
, /* size of region in object */
12269 vm_map_copy_t
*copy_result
) /* OUT */
12271 vm_map_copy_t copy
; /* Resulting copy */
12274 * We drop the object into a special copy object
12275 * that contains the object directly.
12278 copy
= vm_map_copy_allocate();
12279 copy
->type
= VM_MAP_COPY_OBJECT
;
12280 copy
->cpy_object
= object
;
12281 copy
->offset
= offset
;
12284 *copy_result
= copy
;
12285 return KERN_SUCCESS
;
12291 vm_map_entry_t old_entry
,
12294 vm_object_t object
;
12295 vm_map_entry_t new_entry
;
12298 * New sharing code. New map entry
12299 * references original object. Internal
12300 * objects use asynchronous copy algorithm for
12301 * future copies. First make sure we have
12302 * the right object. If we need a shadow,
12303 * or someone else already has one, then
12304 * make a new shadow and share it.
12307 object
= VME_OBJECT(old_entry
);
12308 if (old_entry
->is_sub_map
) {
12309 assert(old_entry
->wired_count
== 0);
12310 #ifndef NO_NESTED_PMAP
12311 if (old_entry
->use_pmap
) {
12312 kern_return_t result
;
12314 result
= pmap_nest(new_map
->pmap
,
12315 (VME_SUBMAP(old_entry
))->pmap
,
12316 (addr64_t
)old_entry
->vme_start
,
12317 (uint64_t)(old_entry
->vme_end
- old_entry
->vme_start
));
12319 panic("vm_map_fork_share: pmap_nest failed!");
12322 #endif /* NO_NESTED_PMAP */
12323 } else if (object
== VM_OBJECT_NULL
) {
12324 object
= vm_object_allocate((vm_map_size_t
)(old_entry
->vme_end
-
12325 old_entry
->vme_start
));
12326 VME_OFFSET_SET(old_entry
, 0);
12327 VME_OBJECT_SET(old_entry
, object
);
12328 old_entry
->use_pmap
= TRUE
;
12329 // assert(!old_entry->needs_copy);
12330 } else if (object
->copy_strategy
!=
12331 MEMORY_OBJECT_COPY_SYMMETRIC
) {
12333 * We are already using an asymmetric
12334 * copy, and therefore we already have
12335 * the right object.
12338 assert(!old_entry
->needs_copy
);
12339 } else if (old_entry
->needs_copy
|| /* case 1 */
12340 object
->shadowed
|| /* case 2 */
12341 (!object
->true_share
&& /* case 3 */
12342 !old_entry
->is_shared
&&
12344 (vm_map_size_t
)(old_entry
->vme_end
-
12345 old_entry
->vme_start
)))) {
12347 * We need to create a shadow.
12348 * There are three cases here.
12349 * In the first case, we need to
12350 * complete a deferred symmetrical
12351 * copy that we participated in.
12352 * In the second and third cases,
12353 * we need to create the shadow so
12354 * that changes that we make to the
12355 * object do not interfere with
12356 * any symmetrical copies which
12357 * have occured (case 2) or which
12358 * might occur (case 3).
12360 * The first case is when we had
12361 * deferred shadow object creation
12362 * via the entry->needs_copy mechanism.
12363 * This mechanism only works when
12364 * only one entry points to the source
12365 * object, and we are about to create
12366 * a second entry pointing to the
12367 * same object. The problem is that
12368 * there is no way of mapping from
12369 * an object to the entries pointing
12370 * to it. (Deferred shadow creation
12371 * works with one entry because occurs
12372 * at fault time, and we walk from the
12373 * entry to the object when handling
12376 * The second case is when the object
12377 * to be shared has already been copied
12378 * with a symmetric copy, but we point
12379 * directly to the object without
12380 * needs_copy set in our entry. (This
12381 * can happen because different ranges
12382 * of an object can be pointed to by
12383 * different entries. In particular,
12384 * a single entry pointing to an object
12385 * can be split by a call to vm_inherit,
12386 * which, combined with task_create, can
12387 * result in the different entries
12388 * having different needs_copy values.)
12389 * The shadowed flag in the object allows
12390 * us to detect this case. The problem
12391 * with this case is that if this object
12392 * has or will have shadows, then we
12393 * must not perform an asymmetric copy
12394 * of this object, since such a copy
12395 * allows the object to be changed, which
12396 * will break the previous symmetrical
12397 * copies (which rely upon the object
12398 * not changing). In a sense, the shadowed
12399 * flag says "don't change this object".
12400 * We fix this by creating a shadow
12401 * object for this object, and sharing
12402 * that. This works because we are free
12403 * to change the shadow object (and thus
12404 * to use an asymmetric copy strategy);
12405 * this is also semantically correct,
12406 * since this object is temporary, and
12407 * therefore a copy of the object is
12408 * as good as the object itself. (This
12409 * is not true for permanent objects,
12410 * since the pager needs to see changes,
12411 * which won't happen if the changes
12412 * are made to a copy.)
12414 * The third case is when the object
12415 * to be shared has parts sticking
12416 * outside of the entry we're working
12417 * with, and thus may in the future
12418 * be subject to a symmetrical copy.
12419 * (This is a preemptive version of
12422 VME_OBJECT_SHADOW(old_entry
,
12423 (vm_map_size_t
) (old_entry
->vme_end
-
12424 old_entry
->vme_start
));
12427 * If we're making a shadow for other than
12428 * copy on write reasons, then we have
12429 * to remove write permission.
12432 if (!old_entry
->needs_copy
&&
12433 (old_entry
->protection
& VM_PROT_WRITE
)) {
12436 assert(!pmap_has_prot_policy(old_map
->pmap
, old_entry
->translated_allow_execute
, old_entry
->protection
));
12438 prot
= old_entry
->protection
& ~VM_PROT_WRITE
;
12440 assert(!pmap_has_prot_policy(old_map
->pmap
, old_entry
->translated_allow_execute
, prot
));
12442 if (override_nx(old_map
, VME_ALIAS(old_entry
)) && prot
) {
12443 prot
|= VM_PROT_EXECUTE
;
12447 if (old_map
->mapped_in_other_pmaps
) {
12448 vm_object_pmap_protect(
12449 VME_OBJECT(old_entry
),
12450 VME_OFFSET(old_entry
),
12451 (old_entry
->vme_end
-
12452 old_entry
->vme_start
),
12455 old_entry
->vme_start
,
12458 pmap_protect(old_map
->pmap
,
12459 old_entry
->vme_start
,
12460 old_entry
->vme_end
,
12465 old_entry
->needs_copy
= FALSE
;
12466 object
= VME_OBJECT(old_entry
);
12471 * If object was using a symmetric copy strategy,
12472 * change its copy strategy to the default
12473 * asymmetric copy strategy, which is copy_delay
12474 * in the non-norma case and copy_call in the
12475 * norma case. Bump the reference count for the
12479 if (old_entry
->is_sub_map
) {
12480 vm_map_lock(VME_SUBMAP(old_entry
));
12481 vm_map_reference(VME_SUBMAP(old_entry
));
12482 vm_map_unlock(VME_SUBMAP(old_entry
));
12484 vm_object_lock(object
);
12485 vm_object_reference_locked(object
);
12486 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
) {
12487 object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
12489 vm_object_unlock(object
);
12493 * Clone the entry, using object ref from above.
12494 * Mark both entries as shared.
12497 new_entry
= vm_map_entry_create(new_map
, FALSE
); /* Never the kernel
12498 * map or descendants */
12499 vm_map_entry_copy(old_map
, new_entry
, old_entry
);
12500 old_entry
->is_shared
= TRUE
;
12501 new_entry
->is_shared
= TRUE
;
12504 * We're dealing with a shared mapping, so the resulting mapping
12505 * should inherit some of the original mapping's accounting settings.
12506 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12507 * "use_pmap" should stay the same as before (if it hasn't been reset
12508 * to TRUE when we cleared "iokit_acct").
12510 assert(!new_entry
->iokit_acct
);
12513 * If old entry's inheritence is VM_INHERIT_NONE,
12514 * the new entry is for corpse fork, remove the
12515 * write permission from the new entry.
12517 if (old_entry
->inheritance
== VM_INHERIT_NONE
) {
12518 new_entry
->protection
&= ~VM_PROT_WRITE
;
12519 new_entry
->max_protection
&= ~VM_PROT_WRITE
;
12523 * Insert the entry into the new map -- we
12524 * know we're inserting at the end of the new
12528 vm_map_store_entry_link(new_map
, vm_map_last_entry(new_map
), new_entry
,
12529 VM_MAP_KERNEL_FLAGS_NONE
);
12532 * Update the physical map
12535 if (old_entry
->is_sub_map
) {
12536 /* Bill Angell pmap support goes here */
12538 pmap_copy(new_map
->pmap
, old_map
->pmap
, new_entry
->vme_start
,
12539 old_entry
->vme_end
- old_entry
->vme_start
,
12540 old_entry
->vme_start
);
12547 vm_map_entry_t
*old_entry_p
,
12549 int vm_map_copyin_flags
)
12551 vm_map_entry_t old_entry
= *old_entry_p
;
12552 vm_map_size_t entry_size
= old_entry
->vme_end
- old_entry
->vme_start
;
12553 vm_map_offset_t start
= old_entry
->vme_start
;
12554 vm_map_copy_t copy
;
12555 vm_map_entry_t last
= vm_map_last_entry(new_map
);
12557 vm_map_unlock(old_map
);
12559 * Use maxprot version of copyin because we
12560 * care about whether this memory can ever
12561 * be accessed, not just whether it's accessible
12564 vm_map_copyin_flags
|= VM_MAP_COPYIN_USE_MAXPROT
;
12565 if (vm_map_copyin_internal(old_map
, start
, entry_size
,
12566 vm_map_copyin_flags
, ©
)
12569 * The map might have changed while it
12570 * was unlocked, check it again. Skip
12571 * any blank space or permanently
12572 * unreadable region.
12574 vm_map_lock(old_map
);
12575 if (!vm_map_lookup_entry(old_map
, start
, &last
) ||
12576 (last
->max_protection
& VM_PROT_READ
) == VM_PROT_NONE
) {
12577 last
= last
->vme_next
;
12579 *old_entry_p
= last
;
12582 * XXX For some error returns, want to
12583 * XXX skip to the next element. Note
12584 * that INVALID_ADDRESS and
12585 * PROTECTION_FAILURE are handled above.
12592 * Assert that the vm_map_copy is coming from the right
12593 * zone and hasn't been forged
12595 vm_map_copy_require(copy
);
12598 * Insert the copy into the new map
12600 vm_map_copy_insert(new_map
, last
, copy
);
12603 * Pick up the traversal at the end of
12604 * the copied region.
12607 vm_map_lock(old_map
);
12608 start
+= entry_size
;
12609 if (!vm_map_lookup_entry(old_map
, start
, &last
)) {
12610 last
= last
->vme_next
;
12612 if (last
->vme_start
== start
) {
12614 * No need to clip here and we don't
12615 * want to cause any unnecessary
12619 vm_map_clip_start(old_map
, last
, start
);
12622 *old_entry_p
= last
;
12630 * Create and return a new map based on the old
12631 * map, according to the inheritance values on the
12632 * regions in that map and the options.
12634 * The source map must not be locked.
12644 vm_map_entry_t old_entry
;
12645 vm_map_size_t new_size
= 0, entry_size
;
12646 vm_map_entry_t new_entry
;
12647 boolean_t src_needs_copy
;
12648 boolean_t new_entry_needs_copy
;
12649 boolean_t pmap_is64bit
;
12650 int vm_map_copyin_flags
;
12651 vm_inherit_t old_entry_inheritance
;
12652 int map_create_options
;
12653 kern_return_t footprint_collect_kr
;
12655 if (options
& ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE
|
12656 VM_MAP_FORK_PRESERVE_PURGEABLE
|
12657 VM_MAP_FORK_CORPSE_FOOTPRINT
)) {
12658 /* unsupported option */
12659 return VM_MAP_NULL
;
12663 #if defined(__i386__) || defined(__x86_64__)
12664 old_map
->pmap
->pm_task_map
!= TASK_MAP_32BIT
;
12665 #elif defined(__arm64__)
12666 old_map
->pmap
->max
== MACH_VM_MAX_ADDRESS
;
12667 #elif defined(__arm__)
12670 #error Unknown architecture.
12673 unsigned int pmap_flags
= 0;
12674 pmap_flags
|= pmap_is64bit
? PMAP_CREATE_64BIT
: 0;
12675 #if defined(HAS_APPLE_PAC)
12676 pmap_flags
|= old_map
->pmap
->disable_jop
? PMAP_CREATE_DISABLE_JOP
: 0;
12678 #if PMAP_CREATE_FORCE_4K_PAGES
12679 if (VM_MAP_PAGE_SIZE(old_map
) == FOURK_PAGE_SIZE
&&
12680 PAGE_SIZE
!= FOURK_PAGE_SIZE
) {
12681 pmap_flags
|= PMAP_CREATE_FORCE_4K_PAGES
;
12683 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
12684 new_pmap
= pmap_create_options(ledger
, (vm_map_size_t
) 0, pmap_flags
);
12686 vm_map_reference(old_map
);
12687 vm_map_lock(old_map
);
12689 map_create_options
= 0;
12690 if (old_map
->hdr
.entries_pageable
) {
12691 map_create_options
|= VM_MAP_CREATE_PAGEABLE
;
12693 if (options
& VM_MAP_FORK_CORPSE_FOOTPRINT
) {
12694 map_create_options
|= VM_MAP_CREATE_CORPSE_FOOTPRINT
;
12695 footprint_collect_kr
= KERN_SUCCESS
;
12697 new_map
= vm_map_create_options(new_pmap
,
12698 old_map
->min_offset
,
12699 old_map
->max_offset
,
12700 map_create_options
);
12701 /* inherit cs_enforcement */
12702 vm_map_cs_enforcement_set(new_map
, old_map
->cs_enforcement
);
12703 vm_map_lock(new_map
);
12704 vm_commit_pagezero_status(new_map
);
12705 /* inherit the parent map's page size */
12706 vm_map_set_page_shift(new_map
, VM_MAP_PAGE_SHIFT(old_map
));
12708 old_entry
= vm_map_first_entry(old_map
);
12709 old_entry
!= vm_map_to_entry(old_map
);
12711 entry_size
= old_entry
->vme_end
- old_entry
->vme_start
;
12713 old_entry_inheritance
= old_entry
->inheritance
;
12715 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12716 * share VM_INHERIT_NONE entries that are not backed by a
12719 if (old_entry_inheritance
== VM_INHERIT_NONE
&&
12720 (options
& VM_MAP_FORK_SHARE_IF_INHERIT_NONE
) &&
12721 (old_entry
->protection
& VM_PROT_READ
) &&
12722 !(!old_entry
->is_sub_map
&&
12723 VME_OBJECT(old_entry
) != NULL
&&
12724 VME_OBJECT(old_entry
)->pager
!= NULL
&&
12725 is_device_pager_ops(
12726 VME_OBJECT(old_entry
)->pager
->mo_pager_ops
))) {
12727 old_entry_inheritance
= VM_INHERIT_SHARE
;
12730 if (old_entry_inheritance
!= VM_INHERIT_NONE
&&
12731 (options
& VM_MAP_FORK_CORPSE_FOOTPRINT
) &&
12732 footprint_collect_kr
== KERN_SUCCESS
) {
12734 * The corpse won't have old_map->pmap to query
12735 * footprint information, so collect that data now
12736 * and store it in new_map->vmmap_corpse_footprint
12737 * for later autopsy.
12739 footprint_collect_kr
=
12740 vm_map_corpse_footprint_collect(old_map
,
12745 switch (old_entry_inheritance
) {
12746 case VM_INHERIT_NONE
:
12749 case VM_INHERIT_SHARE
:
12750 vm_map_fork_share(old_map
, old_entry
, new_map
);
12751 new_size
+= entry_size
;
12754 case VM_INHERIT_COPY
:
12757 * Inline the copy_quickly case;
12758 * upon failure, fall back on call
12759 * to vm_map_fork_copy.
12762 if (old_entry
->is_sub_map
) {
12765 if ((old_entry
->wired_count
!= 0) ||
12766 ((VME_OBJECT(old_entry
) != NULL
) &&
12767 (VME_OBJECT(old_entry
)->true_share
))) {
12768 goto slow_vm_map_fork_copy
;
12771 new_entry
= vm_map_entry_create(new_map
, FALSE
); /* never the kernel map or descendants */
12772 vm_map_entry_copy(old_map
, new_entry
, old_entry
);
12774 if (new_entry
->used_for_jit
== TRUE
&& new_map
->jit_entry_exists
== FALSE
) {
12775 new_map
->jit_entry_exists
= TRUE
;
12778 if (new_entry
->is_sub_map
) {
12779 /* clear address space specifics */
12780 new_entry
->use_pmap
= FALSE
;
12783 * We're dealing with a copy-on-write operation,
12784 * so the resulting mapping should not inherit
12785 * the original mapping's accounting settings.
12786 * "iokit_acct" should have been cleared in
12787 * vm_map_entry_copy().
12788 * "use_pmap" should be reset to its default
12789 * (TRUE) so that the new mapping gets
12790 * accounted for in the task's memory footprint.
12792 assert(!new_entry
->iokit_acct
);
12793 new_entry
->use_pmap
= TRUE
;
12796 if (!vm_object_copy_quickly(
12797 VME_OBJECT_PTR(new_entry
),
12798 VME_OFFSET(old_entry
),
12799 (old_entry
->vme_end
-
12800 old_entry
->vme_start
),
12802 &new_entry_needs_copy
)) {
12803 vm_map_entry_dispose(new_map
, new_entry
);
12804 goto slow_vm_map_fork_copy
;
12808 * Handle copy-on-write obligations
12811 if (src_needs_copy
&& !old_entry
->needs_copy
) {
12814 assert(!pmap_has_prot_policy(old_map
->pmap
, old_entry
->translated_allow_execute
, old_entry
->protection
));
12816 prot
= old_entry
->protection
& ~VM_PROT_WRITE
;
12818 if (override_nx(old_map
, VME_ALIAS(old_entry
))
12820 prot
|= VM_PROT_EXECUTE
;
12823 assert(!pmap_has_prot_policy(old_map
->pmap
, old_entry
->translated_allow_execute
, prot
));
12825 vm_object_pmap_protect(
12826 VME_OBJECT(old_entry
),
12827 VME_OFFSET(old_entry
),
12828 (old_entry
->vme_end
-
12829 old_entry
->vme_start
),
12830 ((old_entry
->is_shared
12831 || old_map
->mapped_in_other_pmaps
)
12834 VM_MAP_PAGE_SIZE(old_map
),
12835 old_entry
->vme_start
,
12838 assert(old_entry
->wired_count
== 0);
12839 old_entry
->needs_copy
= TRUE
;
12841 new_entry
->needs_copy
= new_entry_needs_copy
;
12844 * Insert the entry at the end
12848 vm_map_store_entry_link(new_map
,
12849 vm_map_last_entry(new_map
),
12851 VM_MAP_KERNEL_FLAGS_NONE
);
12852 new_size
+= entry_size
;
12855 slow_vm_map_fork_copy
:
12856 vm_map_copyin_flags
= 0;
12857 if (options
& VM_MAP_FORK_PRESERVE_PURGEABLE
) {
12858 vm_map_copyin_flags
|=
12859 VM_MAP_COPYIN_PRESERVE_PURGEABLE
;
12861 if (vm_map_fork_copy(old_map
,
12864 vm_map_copyin_flags
)) {
12865 new_size
+= entry_size
;
12869 old_entry
= old_entry
->vme_next
;
12872 #if defined(__arm64__)
12873 pmap_insert_sharedpage(new_map
->pmap
);
12874 #endif /* __arm64__ */
12876 new_map
->size
= new_size
;
12878 if (options
& VM_MAP_FORK_CORPSE_FOOTPRINT
) {
12879 vm_map_corpse_footprint_collect_done(new_map
);
12882 /* Propagate JIT entitlement for the pmap layer. */
12883 if (pmap_get_jit_entitled(old_map
->pmap
)) {
12884 /* Tell the pmap that it supports JIT. */
12885 pmap_set_jit_entitled(new_map
->pmap
);
12888 vm_map_unlock(new_map
);
12889 vm_map_unlock(old_map
);
12890 vm_map_deallocate(old_map
);
12898 * Setup the "new_map" with the proper execution environment according
12899 * to the type of executable (platform, 64bit, chroot environment).
12900 * Map the comm page and shared region, etc...
12909 cpu_subtype_t cpu_subtype
,
12912 SHARED_REGION_TRACE_DEBUG(
12913 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
12914 (void *)VM_KERNEL_ADDRPERM(current_task()),
12915 (void *)VM_KERNEL_ADDRPERM(new_map
),
12916 (void *)VM_KERNEL_ADDRPERM(task
),
12917 (void *)VM_KERNEL_ADDRPERM(fsroot
),
12920 (void) vm_commpage_enter(new_map
, task
, is64bit
);
12922 (void) vm_shared_region_enter(new_map
, task
, is64bit
, fsroot
, cpu
, cpu_subtype
, reslide
);
12924 SHARED_REGION_TRACE_DEBUG(
12925 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
12926 (void *)VM_KERNEL_ADDRPERM(current_task()),
12927 (void *)VM_KERNEL_ADDRPERM(new_map
),
12928 (void *)VM_KERNEL_ADDRPERM(task
),
12929 (void *)VM_KERNEL_ADDRPERM(fsroot
),
12934 * Some devices have region(s) of memory that shouldn't get allocated by
12935 * user processes. The following code creates dummy vm_map_entry_t's for each
12936 * of the regions that needs to be reserved to prevent any allocations in
12939 kern_return_t kr
= KERN_FAILURE
;
12940 vm_map_kernel_flags_t vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
12941 vmk_flags
.vmkf_permanent
= TRUE
;
12942 vmk_flags
.vmkf_beyond_max
= TRUE
;
12944 struct vm_reserved_region
*regions
= NULL
;
12945 size_t num_regions
= ml_get_vm_reserved_regions(is64bit
, ®ions
);
12946 assert((num_regions
== 0) || (num_regions
> 0 && regions
!= NULL
));
12948 for (size_t i
= 0; i
< num_regions
; ++i
) {
12951 ®ions
[i
].vmrr_addr
,
12952 regions
[i
].vmrr_size
,
12953 (vm_map_offset_t
)0,
12956 VM_KERN_MEMORY_NONE
,
12958 (vm_object_offset_t
)0,
12964 if (kr
!= KERN_SUCCESS
) {
12965 panic("Failed to reserve %s region in user map %p %d", regions
[i
].vmrr_name
, new_map
, kr
);
12969 new_map
->reserved_regions
= (num_regions
? TRUE
: FALSE
);
12971 return KERN_SUCCESS
;
12974 uint64_t vm_map_lookup_locked_copy_slowly_count
= 0;
12975 uint64_t vm_map_lookup_locked_copy_slowly_size
= 0;
12976 uint64_t vm_map_lookup_locked_copy_slowly_max
= 0;
12977 uint64_t vm_map_lookup_locked_copy_slowly_restart
= 0;
12978 uint64_t vm_map_lookup_locked_copy_slowly_error
= 0;
12979 uint64_t vm_map_lookup_locked_copy_strategically_count
= 0;
12980 uint64_t vm_map_lookup_locked_copy_strategically_size
= 0;
12981 uint64_t vm_map_lookup_locked_copy_strategically_max
= 0;
12982 uint64_t vm_map_lookup_locked_copy_strategically_restart
= 0;
12983 uint64_t vm_map_lookup_locked_copy_strategically_error
= 0;
12984 uint64_t vm_map_lookup_locked_copy_shadow_count
= 0;
12985 uint64_t vm_map_lookup_locked_copy_shadow_size
= 0;
12986 uint64_t vm_map_lookup_locked_copy_shadow_max
= 0;
12988 * vm_map_lookup_locked:
12990 * Finds the VM object, offset, and
12991 * protection for a given virtual address in the
12992 * specified map, assuming a page fault of the
12995 * Returns the (object, offset, protection) for
12996 * this address, whether it is wired down, and whether
12997 * this map has the only reference to the data in question.
12998 * In order to later verify this lookup, a "version"
13000 * If contended != NULL, *contended will be set to
13001 * true iff the thread had to spin or block to acquire
13002 * an exclusive lock.
13004 * The map MUST be locked by the caller and WILL be
13005 * locked on exit. In order to guarantee the
13006 * existence of the returned object, it is returned
13009 * If a lookup is requested with "write protection"
13010 * specified, the map may be changed to perform virtual
13011 * copying operations, although the data referenced will
13015 vm_map_lookup_locked(
13016 vm_map_t
*var_map
, /* IN/OUT */
13017 vm_map_offset_t vaddr
,
13018 vm_prot_t fault_type
,
13019 int object_lock_type
,
13020 vm_map_version_t
*out_version
, /* OUT */
13021 vm_object_t
*object
, /* OUT */
13022 vm_object_offset_t
*offset
, /* OUT */
13023 vm_prot_t
*out_prot
, /* OUT */
13024 boolean_t
*wired
, /* OUT */
13025 vm_object_fault_info_t fault_info
, /* OUT */
13026 vm_map_t
*real_map
, /* OUT */
13027 bool *contended
) /* OUT */
13029 vm_map_entry_t entry
;
13030 vm_map_t map
= *var_map
;
13031 vm_map_t old_map
= *var_map
;
13032 vm_map_t cow_sub_map_parent
= VM_MAP_NULL
;
13033 vm_map_offset_t cow_parent_vaddr
= 0;
13034 vm_map_offset_t old_start
= 0;
13035 vm_map_offset_t old_end
= 0;
13037 boolean_t mask_protections
;
13038 boolean_t force_copy
;
13039 boolean_t no_force_copy_if_executable
;
13040 boolean_t submap_needed_copy
;
13041 vm_prot_t original_fault_type
;
13042 vm_map_size_t fault_page_mask
;
13045 * VM_PROT_MASK means that the caller wants us to use "fault_type"
13046 * as a mask against the mapping's actual protections, not as an
13049 mask_protections
= (fault_type
& VM_PROT_IS_MASK
) ? TRUE
: FALSE
;
13050 force_copy
= (fault_type
& VM_PROT_COPY
) ? TRUE
: FALSE
;
13051 no_force_copy_if_executable
= (fault_type
& VM_PROT_COPY_FAIL_IF_EXECUTABLE
) ? TRUE
: FALSE
;
13052 fault_type
&= VM_PROT_ALL
;
13053 original_fault_type
= fault_type
;
13055 *contended
= false;
13060 fault_page_mask
= MIN(VM_MAP_PAGE_MASK(map
), PAGE_MASK
);
13061 vaddr
= VM_MAP_TRUNC_PAGE(vaddr
, fault_page_mask
);
13064 fault_type
= original_fault_type
;
13067 * If the map has an interesting hint, try it before calling
13068 * full blown lookup routine.
13072 if ((entry
== vm_map_to_entry(map
)) ||
13073 (vaddr
< entry
->vme_start
) || (vaddr
>= entry
->vme_end
)) {
13074 vm_map_entry_t tmp_entry
;
13077 * Entry was either not a valid hint, or the vaddr
13078 * was not contained in the entry, so do a full lookup.
13080 if (!vm_map_lookup_entry(map
, vaddr
, &tmp_entry
)) {
13081 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
13082 vm_map_unlock(cow_sub_map_parent
);
13084 if ((*real_map
!= map
)
13085 && (*real_map
!= cow_sub_map_parent
)) {
13086 vm_map_unlock(*real_map
);
13088 return KERN_INVALID_ADDRESS
;
13093 if (map
== old_map
) {
13094 old_start
= entry
->vme_start
;
13095 old_end
= entry
->vme_end
;
13099 * Handle submaps. Drop lock on upper map, submap is
13103 submap_needed_copy
= FALSE
;
13105 if (entry
->is_sub_map
) {
13106 vm_map_offset_t local_vaddr
;
13107 vm_map_offset_t end_delta
;
13108 vm_map_offset_t start_delta
;
13109 vm_map_entry_t submap_entry
, saved_submap_entry
;
13110 vm_object_offset_t submap_entry_offset
;
13111 vm_object_size_t submap_entry_size
;
13112 vm_prot_t subentry_protection
;
13113 vm_prot_t subentry_max_protection
;
13114 boolean_t subentry_no_copy_on_read
;
13115 boolean_t mapped_needs_copy
= FALSE
;
13116 vm_map_version_t version
;
13118 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry
)) >= VM_MAP_PAGE_SHIFT(map
),
13119 "map %p (%d) entry %p submap %p (%d)\n",
13120 map
, VM_MAP_PAGE_SHIFT(map
), entry
,
13121 VME_SUBMAP(entry
), VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry
)));
13123 local_vaddr
= vaddr
;
13125 if ((entry
->use_pmap
&&
13126 !((fault_type
& VM_PROT_WRITE
) ||
13128 /* if real_map equals map we unlock below */
13129 if ((*real_map
!= map
) &&
13130 (*real_map
!= cow_sub_map_parent
)) {
13131 vm_map_unlock(*real_map
);
13133 *real_map
= VME_SUBMAP(entry
);
13136 if (entry
->needs_copy
&&
13137 ((fault_type
& VM_PROT_WRITE
) ||
13139 if (!mapped_needs_copy
) {
13140 if (vm_map_lock_read_to_write(map
)) {
13141 vm_map_lock_read(map
);
13145 vm_map_lock_read(VME_SUBMAP(entry
));
13146 *var_map
= VME_SUBMAP(entry
);
13147 cow_sub_map_parent
= map
;
13148 /* reset base to map before cow object */
13149 /* this is the map which will accept */
13150 /* the new cow object */
13151 old_start
= entry
->vme_start
;
13152 old_end
= entry
->vme_end
;
13153 cow_parent_vaddr
= vaddr
;
13154 mapped_needs_copy
= TRUE
;
13156 vm_map_lock_read(VME_SUBMAP(entry
));
13157 *var_map
= VME_SUBMAP(entry
);
13158 if ((cow_sub_map_parent
!= map
) &&
13159 (*real_map
!= map
)) {
13160 vm_map_unlock(map
);
13164 if (entry
->needs_copy
) {
13165 submap_needed_copy
= TRUE
;
13167 vm_map_lock_read(VME_SUBMAP(entry
));
13168 *var_map
= VME_SUBMAP(entry
);
13169 /* leave map locked if it is a target */
13170 /* cow sub_map above otherwise, just */
13171 /* follow the maps down to the object */
13172 /* here we unlock knowing we are not */
13173 /* revisiting the map. */
13174 if ((*real_map
!= map
) && (map
!= cow_sub_map_parent
)) {
13175 vm_map_unlock_read(map
);
13181 /* calculate the offset in the submap for vaddr */
13182 local_vaddr
= (local_vaddr
- entry
->vme_start
) + VME_OFFSET(entry
);
13183 assertf(VM_MAP_PAGE_ALIGNED(local_vaddr
, fault_page_mask
),
13184 "local_vaddr 0x%llx entry->vme_start 0x%llx fault_page_mask 0x%llx\n",
13185 (uint64_t)local_vaddr
, (uint64_t)entry
->vme_start
, (uint64_t)fault_page_mask
);
13188 if (!vm_map_lookup_entry(map
, local_vaddr
, &submap_entry
)) {
13189 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
13190 vm_map_unlock(cow_sub_map_parent
);
13192 if ((*real_map
!= map
)
13193 && (*real_map
!= cow_sub_map_parent
)) {
13194 vm_map_unlock(*real_map
);
13197 return KERN_INVALID_ADDRESS
;
13200 /* find the attenuated shadow of the underlying object */
13201 /* on our target map */
13203 /* in english the submap object may extend beyond the */
13204 /* region mapped by the entry or, may only fill a portion */
13205 /* of it. For our purposes, we only care if the object */
13206 /* doesn't fill. In this case the area which will */
13207 /* ultimately be clipped in the top map will only need */
13208 /* to be as big as the portion of the underlying entry */
13209 /* which is mapped */
13210 start_delta
= submap_entry
->vme_start
> VME_OFFSET(entry
) ?
13211 submap_entry
->vme_start
- VME_OFFSET(entry
) : 0;
13214 (VME_OFFSET(entry
) + start_delta
+ (old_end
- old_start
)) <=
13215 submap_entry
->vme_end
?
13216 0 : (VME_OFFSET(entry
) +
13217 (old_end
- old_start
))
13218 - submap_entry
->vme_end
;
13220 old_start
+= start_delta
;
13221 old_end
-= end_delta
;
13223 if (submap_entry
->is_sub_map
) {
13224 entry
= submap_entry
;
13225 vaddr
= local_vaddr
;
13226 goto submap_recurse
;
13229 if (((fault_type
& VM_PROT_WRITE
) ||
13231 && cow_sub_map_parent
) {
13232 vm_object_t sub_object
, copy_object
;
13233 vm_object_offset_t copy_offset
;
13234 vm_map_offset_t local_start
;
13235 vm_map_offset_t local_end
;
13236 boolean_t object_copied
= FALSE
;
13237 vm_object_offset_t object_copied_offset
= 0;
13238 boolean_t object_copied_needs_copy
= FALSE
;
13239 kern_return_t kr
= KERN_SUCCESS
;
13241 if (vm_map_lock_read_to_write(map
)) {
13242 vm_map_lock_read(map
);
13243 old_start
-= start_delta
;
13244 old_end
+= end_delta
;
13249 sub_object
= VME_OBJECT(submap_entry
);
13250 if (sub_object
== VM_OBJECT_NULL
) {
13252 vm_object_allocate(
13254 (submap_entry
->vme_end
-
13255 submap_entry
->vme_start
));
13256 VME_OBJECT_SET(submap_entry
, sub_object
);
13257 VME_OFFSET_SET(submap_entry
, 0);
13258 assert(!submap_entry
->is_sub_map
);
13259 assert(submap_entry
->use_pmap
);
13261 local_start
= local_vaddr
-
13262 (cow_parent_vaddr
- old_start
);
13263 local_end
= local_vaddr
+
13264 (old_end
- cow_parent_vaddr
);
13265 vm_map_clip_start(map
, submap_entry
, local_start
);
13266 vm_map_clip_end(map
, submap_entry
, local_end
);
13267 if (submap_entry
->is_sub_map
) {
13268 /* unnesting was done when clipping */
13269 assert(!submap_entry
->use_pmap
);
13272 /* This is the COW case, lets connect */
13273 /* an entry in our space to the underlying */
13274 /* object in the submap, bypassing the */
13276 submap_entry_offset
= VME_OFFSET(submap_entry
);
13277 submap_entry_size
= submap_entry
->vme_end
- submap_entry
->vme_start
;
13279 if ((submap_entry
->wired_count
!= 0 ||
13280 sub_object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) &&
13281 (submap_entry
->protection
& VM_PROT_EXECUTE
) &&
13282 no_force_copy_if_executable
) {
13283 // printf("FBDP map %p entry %p start 0x%llx end 0x%llx wired %d strat %d\n", map, submap_entry, (uint64_t)local_start, (uint64_t)local_end, submap_entry->wired_count, sub_object->copy_strategy);
13284 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
13285 vm_map_unlock(cow_sub_map_parent
);
13287 if ((*real_map
!= map
)
13288 && (*real_map
!= cow_sub_map_parent
)) {
13289 vm_map_unlock(*real_map
);
13292 vm_map_lock_write_to_read(map
);
13293 kr
= KERN_PROTECTION_FAILURE
;
13294 DTRACE_VM4(submap_no_copy_executable
,
13296 vm_object_offset_t
, submap_entry_offset
,
13297 vm_object_size_t
, submap_entry_size
,
13302 if (submap_entry
->wired_count
!= 0) {
13303 vm_object_reference(sub_object
);
13305 assertf(VM_MAP_PAGE_ALIGNED(VME_OFFSET(submap_entry
), VM_MAP_PAGE_MASK(map
)),
13306 "submap_entry %p offset 0x%llx\n",
13307 submap_entry
, VME_OFFSET(submap_entry
));
13309 DTRACE_VM6(submap_copy_slowly
,
13310 vm_map_t
, cow_sub_map_parent
,
13311 vm_map_offset_t
, vaddr
,
13313 vm_object_size_t
, submap_entry_size
,
13314 int, submap_entry
->wired_count
,
13315 int, sub_object
->copy_strategy
);
13317 saved_submap_entry
= submap_entry
;
13318 version
.main_timestamp
= map
->timestamp
;
13319 vm_map_unlock(map
); /* Increments timestamp by 1 */
13320 submap_entry
= VM_MAP_ENTRY_NULL
;
13322 vm_object_lock(sub_object
);
13323 kr
= vm_object_copy_slowly(sub_object
,
13324 submap_entry_offset
,
13328 object_copied
= TRUE
;
13329 object_copied_offset
= 0;
13330 /* 4k: account for extra offset in physical page */
13331 object_copied_offset
+= submap_entry_offset
- vm_object_trunc_page(submap_entry_offset
);
13332 object_copied_needs_copy
= FALSE
;
13333 vm_object_deallocate(sub_object
);
13337 if (kr
!= KERN_SUCCESS
&&
13338 kr
!= KERN_MEMORY_RESTART_COPY
) {
13339 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
13340 vm_map_unlock(cow_sub_map_parent
);
13342 if ((*real_map
!= map
)
13343 && (*real_map
!= cow_sub_map_parent
)) {
13344 vm_map_unlock(*real_map
);
13347 vm_object_deallocate(copy_object
);
13348 copy_object
= VM_OBJECT_NULL
;
13349 vm_map_lock_write_to_read(map
);
13350 DTRACE_VM4(submap_copy_error_slowly
,
13351 vm_object_t
, sub_object
,
13352 vm_object_offset_t
, submap_entry_offset
,
13353 vm_object_size_t
, submap_entry_size
,
13355 vm_map_lookup_locked_copy_slowly_error
++;
13359 if ((kr
== KERN_SUCCESS
) &&
13360 (version
.main_timestamp
+ 1) == map
->timestamp
) {
13361 submap_entry
= saved_submap_entry
;
13363 saved_submap_entry
= NULL
;
13364 old_start
-= start_delta
;
13365 old_end
+= end_delta
;
13366 vm_object_deallocate(copy_object
);
13367 copy_object
= VM_OBJECT_NULL
;
13368 vm_map_lock_write_to_read(map
);
13369 vm_map_lookup_locked_copy_slowly_restart
++;
13372 vm_map_lookup_locked_copy_slowly_count
++;
13373 vm_map_lookup_locked_copy_slowly_size
+= submap_entry_size
;
13374 if (submap_entry_size
> vm_map_lookup_locked_copy_slowly_max
) {
13375 vm_map_lookup_locked_copy_slowly_max
= submap_entry_size
;
13377 } else if (sub_object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) {
13378 submap_entry_offset
= VME_OFFSET(submap_entry
);
13379 copy_object
= VM_OBJECT_NULL
;
13380 object_copied_offset
= submap_entry_offset
;
13381 object_copied_needs_copy
= FALSE
;
13382 DTRACE_VM6(submap_copy_strategically
,
13383 vm_map_t
, cow_sub_map_parent
,
13384 vm_map_offset_t
, vaddr
,
13386 vm_object_size_t
, submap_entry_size
,
13387 int, submap_entry
->wired_count
,
13388 int, sub_object
->copy_strategy
);
13389 kr
= vm_object_copy_strategically(
13391 submap_entry_offset
,
13392 submap_entry
->vme_end
- submap_entry
->vme_start
,
13394 &object_copied_offset
,
13395 &object_copied_needs_copy
);
13396 if (kr
== KERN_MEMORY_RESTART_COPY
) {
13397 old_start
-= start_delta
;
13398 old_end
+= end_delta
;
13399 vm_object_deallocate(copy_object
);
13400 copy_object
= VM_OBJECT_NULL
;
13401 vm_map_lock_write_to_read(map
);
13402 vm_map_lookup_locked_copy_strategically_restart
++;
13405 if (kr
!= KERN_SUCCESS
) {
13406 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
13407 vm_map_unlock(cow_sub_map_parent
);
13409 if ((*real_map
!= map
)
13410 && (*real_map
!= cow_sub_map_parent
)) {
13411 vm_map_unlock(*real_map
);
13414 vm_object_deallocate(copy_object
);
13415 copy_object
= VM_OBJECT_NULL
;
13416 vm_map_lock_write_to_read(map
);
13417 DTRACE_VM4(submap_copy_error_strategically
,
13418 vm_object_t
, sub_object
,
13419 vm_object_offset_t
, submap_entry_offset
,
13420 vm_object_size_t
, submap_entry_size
,
13422 vm_map_lookup_locked_copy_strategically_error
++;
13425 assert(copy_object
!= VM_OBJECT_NULL
);
13426 assert(copy_object
!= sub_object
);
13427 object_copied
= TRUE
;
13428 vm_map_lookup_locked_copy_strategically_count
++;
13429 vm_map_lookup_locked_copy_strategically_size
+= submap_entry_size
;
13430 if (submap_entry_size
> vm_map_lookup_locked_copy_strategically_max
) {
13431 vm_map_lookup_locked_copy_strategically_max
= submap_entry_size
;
13434 /* set up shadow object */
13435 object_copied
= FALSE
;
13436 copy_object
= sub_object
;
13437 vm_object_lock(sub_object
);
13438 vm_object_reference_locked(sub_object
);
13439 sub_object
->shadowed
= TRUE
;
13440 vm_object_unlock(sub_object
);
13442 assert(submap_entry
->wired_count
== 0);
13443 submap_entry
->needs_copy
= TRUE
;
13445 prot
= submap_entry
->protection
;
13446 assert(!pmap_has_prot_policy(map
->pmap
, submap_entry
->translated_allow_execute
, prot
));
13447 prot
= prot
& ~VM_PROT_WRITE
;
13448 assert(!pmap_has_prot_policy(map
->pmap
, submap_entry
->translated_allow_execute
, prot
));
13450 if (override_nx(old_map
,
13451 VME_ALIAS(submap_entry
))
13453 prot
|= VM_PROT_EXECUTE
;
13456 vm_object_pmap_protect(
13458 VME_OFFSET(submap_entry
),
13459 submap_entry
->vme_end
-
13460 submap_entry
->vme_start
,
13461 (submap_entry
->is_shared
13462 || map
->mapped_in_other_pmaps
) ?
13463 PMAP_NULL
: map
->pmap
,
13464 VM_MAP_PAGE_SIZE(map
),
13465 submap_entry
->vme_start
,
13467 vm_map_lookup_locked_copy_shadow_count
++;
13468 vm_map_lookup_locked_copy_shadow_size
+= submap_entry_size
;
13469 if (submap_entry_size
> vm_map_lookup_locked_copy_shadow_max
) {
13470 vm_map_lookup_locked_copy_shadow_max
= submap_entry_size
;
13475 * Adjust the fault offset to the submap entry.
13477 copy_offset
= (local_vaddr
-
13478 submap_entry
->vme_start
+
13479 VME_OFFSET(submap_entry
));
13481 /* This works diffently than the */
13482 /* normal submap case. We go back */
13483 /* to the parent of the cow map and*/
13484 /* clip out the target portion of */
13485 /* the sub_map, substituting the */
13486 /* new copy object, */
13488 subentry_protection
= submap_entry
->protection
;
13489 subentry_max_protection
= submap_entry
->max_protection
;
13490 subentry_no_copy_on_read
= submap_entry
->vme_no_copy_on_read
;
13491 vm_map_unlock(map
);
13492 submap_entry
= NULL
; /* not valid after map unlock */
13494 local_start
= old_start
;
13495 local_end
= old_end
;
13496 map
= cow_sub_map_parent
;
13497 *var_map
= cow_sub_map_parent
;
13498 vaddr
= cow_parent_vaddr
;
13499 cow_sub_map_parent
= NULL
;
13501 if (!vm_map_lookup_entry(map
,
13503 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
13504 vm_map_unlock(cow_sub_map_parent
);
13506 if ((*real_map
!= map
)
13507 && (*real_map
!= cow_sub_map_parent
)) {
13508 vm_map_unlock(*real_map
);
13511 vm_object_deallocate(
13513 copy_object
= VM_OBJECT_NULL
;
13514 vm_map_lock_write_to_read(map
);
13515 DTRACE_VM4(submap_lookup_post_unlock
,
13516 uint64_t, (uint64_t)entry
->vme_start
,
13517 uint64_t, (uint64_t)entry
->vme_end
,
13518 vm_map_offset_t
, vaddr
,
13519 int, object_copied
);
13520 return KERN_INVALID_ADDRESS
;
13523 /* clip out the portion of space */
13524 /* mapped by the sub map which */
13525 /* corresponds to the underlying */
13529 * Clip (and unnest) the smallest nested chunk
13530 * possible around the faulting address...
13532 local_start
= vaddr
& ~(pmap_shared_region_size_min(map
->pmap
) - 1);
13533 local_end
= local_start
+ pmap_shared_region_size_min(map
->pmap
);
13535 * ... but don't go beyond the "old_start" to "old_end"
13536 * range, to avoid spanning over another VM region
13537 * with a possibly different VM object and/or offset.
13539 if (local_start
< old_start
) {
13540 local_start
= old_start
;
13542 if (local_end
> old_end
) {
13543 local_end
= old_end
;
13546 * Adjust copy_offset to the start of the range.
13548 copy_offset
-= (vaddr
- local_start
);
13550 vm_map_clip_start(map
, entry
, local_start
);
13551 vm_map_clip_end(map
, entry
, local_end
);
13552 if (entry
->is_sub_map
) {
13553 /* unnesting was done when clipping */
13554 assert(!entry
->use_pmap
);
13557 /* substitute copy object for */
13558 /* shared map entry */
13559 vm_map_deallocate(VME_SUBMAP(entry
));
13560 assert(!entry
->iokit_acct
);
13561 entry
->is_sub_map
= FALSE
;
13562 entry
->use_pmap
= TRUE
;
13563 VME_OBJECT_SET(entry
, copy_object
);
13565 /* propagate the submap entry's protections */
13566 if (entry
->protection
!= VM_PROT_READ
) {
13568 * Someone has already altered the top entry's
13569 * protections via vm_protect(VM_PROT_COPY).
13570 * Respect these new values and ignore the
13571 * submap entry's protections.
13575 * Regular copy-on-write: propagate the submap
13576 * entry's protections to the top map entry.
13578 entry
->protection
|= subentry_protection
;
13580 entry
->max_protection
|= subentry_max_protection
;
13581 /* propagate no_copy_on_read */
13582 entry
->vme_no_copy_on_read
= subentry_no_copy_on_read
;
13584 if ((entry
->protection
& VM_PROT_WRITE
) &&
13585 (entry
->protection
& VM_PROT_EXECUTE
) &&
13586 #if XNU_TARGET_OS_OSX
13587 map
->pmap
!= kernel_pmap
&&
13588 (vm_map_cs_enforcement(map
)
13590 || !VM_MAP_IS_EXOTIC(map
)
13591 #endif /* __arm64__ */
13593 #endif /* XNU_TARGET_OS_OSX */
13594 !(entry
->used_for_jit
) &&
13595 VM_MAP_POLICY_WX_STRIP_X(map
)) {
13597 uint64_t, (uint64_t)entry
->vme_start
,
13598 uint64_t, (uint64_t)entry
->vme_end
,
13599 vm_prot_t
, entry
->protection
);
13600 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13602 (current_task()->bsd_info
13603 ? proc_name_address(current_task()->bsd_info
)
13606 entry
->protection
&= ~VM_PROT_EXECUTE
;
13609 if (object_copied
) {
13610 VME_OFFSET_SET(entry
, local_start
- old_start
+ object_copied_offset
);
13611 entry
->needs_copy
= object_copied_needs_copy
;
13612 entry
->is_shared
= FALSE
;
13614 assert(VME_OBJECT(entry
) != VM_OBJECT_NULL
);
13615 assert(VME_OBJECT(entry
)->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
);
13616 assert(entry
->wired_count
== 0);
13617 VME_OFFSET_SET(entry
, copy_offset
);
13618 entry
->needs_copy
= TRUE
;
13619 if (map
!= old_map
) {
13620 entry
->is_shared
= TRUE
;
13623 if (entry
->inheritance
== VM_INHERIT_SHARE
) {
13624 entry
->inheritance
= VM_INHERIT_COPY
;
13627 vm_map_lock_write_to_read(map
);
13629 if ((cow_sub_map_parent
)
13630 && (cow_sub_map_parent
!= *real_map
)
13631 && (cow_sub_map_parent
!= map
)) {
13632 vm_map_unlock(cow_sub_map_parent
);
13634 entry
= submap_entry
;
13635 vaddr
= local_vaddr
;
13640 * Check whether this task is allowed to have
13644 prot
= entry
->protection
;
13646 if (override_nx(old_map
, VME_ALIAS(entry
)) && prot
) {
13648 * HACK -- if not a stack, then allow execution
13650 prot
|= VM_PROT_EXECUTE
;
13653 if (mask_protections
) {
13654 fault_type
&= prot
;
13655 if (fault_type
== VM_PROT_NONE
) {
13656 goto protection_failure
;
13659 if (((fault_type
& prot
) != fault_type
)
13661 /* prefetch abort in execute-only page */
13662 && !(prot
== VM_PROT_EXECUTE
&& fault_type
== (VM_PROT_READ
| VM_PROT_EXECUTE
))
13665 protection_failure
:
13666 if (*real_map
!= map
) {
13667 vm_map_unlock(*real_map
);
13671 if ((fault_type
& VM_PROT_EXECUTE
) && prot
) {
13672 log_stack_execution_failure((addr64_t
)vaddr
, prot
);
13675 DTRACE_VM2(prot_fault
, int, 1, (uint64_t *), NULL
);
13676 return KERN_PROTECTION_FAILURE
;
13680 * If this page is not pageable, we have to get
13681 * it for all possible accesses.
13684 *wired
= (entry
->wired_count
!= 0);
13690 * If the entry was copy-on-write, we either ...
13693 if (entry
->needs_copy
) {
13695 * If we want to write the page, we may as well
13696 * handle that now since we've got the map locked.
13698 * If we don't need to write the page, we just
13699 * demote the permissions allowed.
13702 if ((fault_type
& VM_PROT_WRITE
) || *wired
|| force_copy
) {
13704 * Make a new object, and place it in the
13705 * object chain. Note that no new references
13706 * have appeared -- one just moved from the
13707 * map to the new object.
13710 if (vm_map_lock_read_to_write(map
)) {
13711 vm_map_lock_read(map
);
13715 if (VME_OBJECT(entry
)->shadowed
== FALSE
) {
13716 vm_object_lock(VME_OBJECT(entry
));
13717 VME_OBJECT(entry
)->shadowed
= TRUE
;
13718 vm_object_unlock(VME_OBJECT(entry
));
13720 VME_OBJECT_SHADOW(entry
,
13721 (vm_map_size_t
) (entry
->vme_end
-
13722 entry
->vme_start
));
13723 entry
->needs_copy
= FALSE
;
13725 vm_map_lock_write_to_read(map
);
13727 if ((fault_type
& VM_PROT_WRITE
) == 0 && *wired
== 0) {
13729 * We're attempting to read a copy-on-write
13730 * page -- don't allow writes.
13733 prot
&= (~VM_PROT_WRITE
);
13737 if (submap_needed_copy
&& (prot
& VM_PROT_WRITE
)) {
13739 * We went through a "needs_copy" submap without triggering
13740 * a copy, so granting write access to the page would bypass
13741 * that submap's "needs_copy".
13743 assert(!(fault_type
& VM_PROT_WRITE
));
13745 assert(!force_copy
);
13746 // printf("FBDP %d[%s] submap_needed_copy for %p 0x%llx\n", proc_selfpid(), proc_name_address(current_task()->bsd_info), map, vaddr);
13747 prot
&= ~VM_PROT_WRITE
;
13751 * Create an object if necessary.
13753 if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
13754 if (vm_map_lock_read_to_write(map
)) {
13755 vm_map_lock_read(map
);
13759 VME_OBJECT_SET(entry
,
13760 vm_object_allocate(
13761 (vm_map_size_t
)(entry
->vme_end
-
13762 entry
->vme_start
)));
13763 VME_OFFSET_SET(entry
, 0);
13764 assert(entry
->use_pmap
);
13765 vm_map_lock_write_to_read(map
);
13769 * Return the object/offset from this entry. If the entry
13770 * was copy-on-write or empty, it has been fixed up. Also
13771 * return the protection.
13774 *offset
= (vaddr
- entry
->vme_start
) + VME_OFFSET(entry
);
13775 *object
= VME_OBJECT(entry
);
13777 KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET
, VM_MAP_LOOKUP_OBJECT
), VM_KERNEL_UNSLIDE_OR_PERM(*object
), (unsigned long) VME_ALIAS(entry
), 0, 0);
13780 fault_info
->interruptible
= THREAD_UNINT
; /* for now... */
13781 /* ... the caller will change "interruptible" if needed */
13782 fault_info
->cluster_size
= 0;
13783 fault_info
->user_tag
= VME_ALIAS(entry
);
13784 fault_info
->pmap_options
= 0;
13785 if (entry
->iokit_acct
||
13786 (!entry
->is_sub_map
&& !entry
->use_pmap
)) {
13787 fault_info
->pmap_options
|= PMAP_OPTIONS_ALT_ACCT
;
13789 fault_info
->behavior
= entry
->behavior
;
13790 fault_info
->lo_offset
= VME_OFFSET(entry
);
13791 fault_info
->hi_offset
=
13792 (entry
->vme_end
- entry
->vme_start
) + VME_OFFSET(entry
);
13793 fault_info
->no_cache
= entry
->no_cache
;
13794 fault_info
->stealth
= FALSE
;
13795 fault_info
->io_sync
= FALSE
;
13796 if (entry
->used_for_jit
||
13797 entry
->vme_resilient_codesign
) {
13798 fault_info
->cs_bypass
= TRUE
;
13800 fault_info
->cs_bypass
= FALSE
;
13802 fault_info
->pmap_cs_associated
= FALSE
;
13804 if (entry
->pmap_cs_associated
) {
13806 * The pmap layer will validate this page
13807 * before allowing it to be executed from.
13809 fault_info
->pmap_cs_associated
= TRUE
;
13811 #endif /* CONFIG_PMAP_CS */
13812 fault_info
->mark_zf_absent
= FALSE
;
13813 fault_info
->batch_pmap_op
= FALSE
;
13814 fault_info
->resilient_media
= entry
->vme_resilient_media
;
13815 fault_info
->no_copy_on_read
= entry
->vme_no_copy_on_read
;
13816 if (entry
->translated_allow_execute
) {
13817 fault_info
->pmap_options
|= PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE
;
13822 * Lock the object to prevent it from disappearing
13824 if (object_lock_type
== OBJECT_LOCK_EXCLUSIVE
) {
13825 if (contended
== NULL
) {
13826 vm_object_lock(*object
);
13828 *contended
= vm_object_lock_check_contended(*object
);
13831 vm_object_lock_shared(*object
);
13835 * Save the version number
13838 out_version
->main_timestamp
= map
->timestamp
;
13840 return KERN_SUCCESS
;
13847 * Verifies that the map in question has not changed
13848 * since the given version. The map has to be locked
13849 * ("shared" mode is fine) before calling this function
13850 * and it will be returned locked too.
13855 vm_map_version_t
*version
) /* REF */
13859 vm_map_lock_assert_held(map
);
13860 result
= (map
->timestamp
== version
->main_timestamp
);
13866 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
13867 * Goes away after regular vm_region_recurse function migrates to
13869 * vm_region_recurse: A form of vm_region which follows the
13870 * submaps in a target map
13875 vm_map_region_recurse_64(
13877 vm_map_offset_t
*address
, /* IN/OUT */
13878 vm_map_size_t
*size
, /* OUT */
13879 natural_t
*nesting_depth
, /* IN/OUT */
13880 vm_region_submap_info_64_t submap_info
, /* IN/OUT */
13881 mach_msg_type_number_t
*count
) /* IN/OUT */
13883 mach_msg_type_number_t original_count
;
13884 vm_region_extended_info_data_t extended
;
13885 vm_map_entry_t tmp_entry
;
13886 vm_map_offset_t user_address
;
13887 unsigned int user_max_depth
;
13890 * "curr_entry" is the VM map entry preceding or including the
13891 * address we're looking for.
13892 * "curr_map" is the map or sub-map containing "curr_entry".
13893 * "curr_address" is the equivalent of the top map's "user_address"
13894 * in the current map.
13895 * "curr_offset" is the cumulated offset of "curr_map" in the
13896 * target task's address space.
13897 * "curr_depth" is the depth of "curr_map" in the chain of
13900 * "curr_max_below" and "curr_max_above" limit the range (around
13901 * "curr_address") we should take into account in the current (sub)map.
13902 * They limit the range to what's visible through the map entries
13903 * we've traversed from the top map to the current map.
13906 vm_map_entry_t curr_entry
;
13907 vm_map_address_t curr_address
;
13908 vm_map_offset_t curr_offset
;
13910 unsigned int curr_depth
;
13911 vm_map_offset_t curr_max_below
, curr_max_above
;
13912 vm_map_offset_t curr_skip
;
13915 * "next_" is the same as "curr_" but for the VM region immediately
13916 * after the address we're looking for. We need to keep track of this
13917 * too because we want to return info about that region if the
13918 * address we're looking for is not mapped.
13920 vm_map_entry_t next_entry
;
13921 vm_map_offset_t next_offset
;
13922 vm_map_offset_t next_address
;
13924 unsigned int next_depth
;
13925 vm_map_offset_t next_max_below
, next_max_above
;
13926 vm_map_offset_t next_skip
;
13928 boolean_t look_for_pages
;
13929 vm_region_submap_short_info_64_t short_info
;
13930 boolean_t do_region_footprint
;
13931 int effective_page_size
, effective_page_shift
;
13932 boolean_t submap_needed_copy
;
13934 if (map
== VM_MAP_NULL
) {
13935 /* no address space to work on */
13936 return KERN_INVALID_ARGUMENT
;
13939 effective_page_shift
= vm_self_region_page_shift(map
);
13940 effective_page_size
= (1 << effective_page_shift
);
13942 if (*count
< VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
) {
13944 * "info" structure is not big enough and
13947 return KERN_INVALID_ARGUMENT
;
13950 do_region_footprint
= task_self_region_footprint();
13951 original_count
= *count
;
13953 if (original_count
< VM_REGION_SUBMAP_INFO_V0_COUNT_64
) {
13954 *count
= VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
;
13955 look_for_pages
= FALSE
;
13956 short_info
= (vm_region_submap_short_info_64_t
) submap_info
;
13957 submap_info
= NULL
;
13959 look_for_pages
= TRUE
;
13960 *count
= VM_REGION_SUBMAP_INFO_V0_COUNT_64
;
13963 if (original_count
>= VM_REGION_SUBMAP_INFO_V1_COUNT_64
) {
13964 *count
= VM_REGION_SUBMAP_INFO_V1_COUNT_64
;
13966 if (original_count
>= VM_REGION_SUBMAP_INFO_V2_COUNT_64
) {
13967 *count
= VM_REGION_SUBMAP_INFO_V2_COUNT_64
;
13971 user_address
= *address
;
13972 user_max_depth
= *nesting_depth
;
13973 submap_needed_copy
= FALSE
;
13976 vm_map_lock_read(map
);
13982 curr_address
= user_address
;
13986 curr_max_above
= ((vm_map_offset_t
) -1) - curr_address
;
13987 curr_max_below
= curr_address
;
13995 next_max_above
= (vm_map_offset_t
) -1;
13996 next_max_below
= (vm_map_offset_t
) -1;
13999 if (vm_map_lookup_entry(curr_map
,
14002 /* tmp_entry contains the address we're looking for */
14003 curr_entry
= tmp_entry
;
14005 vm_map_offset_t skip
;
14007 * The address is not mapped. "tmp_entry" is the
14008 * map entry preceding the address. We want the next
14009 * one, if it exists.
14011 curr_entry
= tmp_entry
->vme_next
;
14013 if (curr_entry
== vm_map_to_entry(curr_map
) ||
14014 (curr_entry
->vme_start
>=
14015 curr_address
+ curr_max_above
)) {
14016 /* no next entry at this level: stop looking */
14018 vm_map_unlock_read(curr_map
);
14025 curr_max_above
= 0;
14026 curr_max_below
= 0;
14030 /* adjust current address and offset */
14031 skip
= curr_entry
->vme_start
- curr_address
;
14032 curr_address
= curr_entry
->vme_start
;
14034 curr_offset
+= skip
;
14035 curr_max_above
-= skip
;
14036 curr_max_below
= 0;
14040 * Is the next entry at this level closer to the address (or
14041 * deeper in the submap chain) than the one we had
14044 tmp_entry
= curr_entry
->vme_next
;
14045 if (tmp_entry
== vm_map_to_entry(curr_map
)) {
14046 /* no next entry at this level */
14047 } else if (tmp_entry
->vme_start
>=
14048 curr_address
+ curr_max_above
) {
14050 * tmp_entry is beyond the scope of what we mapped of
14051 * this submap in the upper level: ignore it.
14053 } else if ((next_entry
== NULL
) ||
14054 (tmp_entry
->vme_start
+ curr_offset
<=
14055 next_entry
->vme_start
+ next_offset
)) {
14057 * We didn't have a "next_entry" or this one is
14058 * closer to the address we're looking for:
14059 * use this "tmp_entry" as the new "next_entry".
14061 if (next_entry
!= NULL
) {
14062 /* unlock the last "next_map" */
14063 if (next_map
!= curr_map
&& not_in_kdp
) {
14064 vm_map_unlock_read(next_map
);
14067 next_entry
= tmp_entry
;
14068 next_map
= curr_map
;
14069 next_depth
= curr_depth
;
14070 next_address
= next_entry
->vme_start
;
14071 next_skip
= curr_skip
;
14072 next_skip
+= (next_address
- curr_address
);
14073 next_offset
= curr_offset
;
14074 next_offset
+= (next_address
- curr_address
);
14075 next_max_above
= MIN(next_max_above
, curr_max_above
);
14076 next_max_above
= MIN(next_max_above
,
14077 next_entry
->vme_end
- next_address
);
14078 next_max_below
= MIN(next_max_below
, curr_max_below
);
14079 next_max_below
= MIN(next_max_below
,
14080 next_address
- next_entry
->vme_start
);
14084 * "curr_max_{above,below}" allow us to keep track of the
14085 * portion of the submap that is actually mapped at this level:
14086 * the rest of that submap is irrelevant to us, since it's not
14088 * The relevant portion of the map starts at
14089 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
14091 curr_max_above
= MIN(curr_max_above
,
14092 curr_entry
->vme_end
- curr_address
);
14093 curr_max_below
= MIN(curr_max_below
,
14094 curr_address
- curr_entry
->vme_start
);
14096 if (!curr_entry
->is_sub_map
||
14097 curr_depth
>= user_max_depth
) {
14099 * We hit a leaf map or we reached the maximum depth
14100 * we could, so stop looking. Keep the current map
14107 * Get down to the next submap level.
14110 if (curr_entry
->needs_copy
) {
14111 /* everything below this is effectively copy-on-write */
14112 submap_needed_copy
= TRUE
;
14116 * Lock the next level and unlock the current level,
14117 * unless we need to keep it locked to access the "next_entry"
14121 vm_map_lock_read(VME_SUBMAP(curr_entry
));
14123 if (curr_map
== next_map
) {
14124 /* keep "next_map" locked in case we need it */
14126 /* release this map */
14128 vm_map_unlock_read(curr_map
);
14133 * Adjust the offset. "curr_entry" maps the submap
14134 * at relative address "curr_entry->vme_start" in the
14135 * curr_map but skips the first "VME_OFFSET(curr_entry)"
14136 * bytes of the submap.
14137 * "curr_offset" always represents the offset of a virtual
14138 * address in the curr_map relative to the absolute address
14139 * space (i.e. the top-level VM map).
14142 (VME_OFFSET(curr_entry
) - curr_entry
->vme_start
);
14143 curr_address
= user_address
+ curr_offset
;
14144 /* switch to the submap */
14145 curr_map
= VME_SUBMAP(curr_entry
);
14150 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
14151 // so probably should be a real 32b ID vs. ptr.
14152 // Current users just check for equality
14154 if (curr_entry
== NULL
) {
14155 /* no VM region contains the address... */
14157 if (do_region_footprint
&& /* we want footprint numbers */
14158 next_entry
== NULL
&& /* & there are no more regions */
14159 /* & we haven't already provided our fake region: */
14160 user_address
<= vm_map_last_entry(map
)->vme_end
) {
14161 ledger_amount_t ledger_resident
, ledger_compressed
;
14164 * Add a fake memory region to account for
14165 * purgeable and/or ledger-tagged memory that
14166 * counts towards this task's memory footprint,
14167 * i.e. the resident/compressed pages of non-volatile
14168 * objects owned by that task.
14170 task_ledgers_footprint(map
->pmap
->ledger
,
14172 &ledger_compressed
);
14173 if (ledger_resident
+ ledger_compressed
== 0) {
14174 /* no purgeable memory usage to report */
14175 return KERN_INVALID_ADDRESS
;
14177 /* fake region to show nonvolatile footprint */
14178 if (look_for_pages
) {
14179 submap_info
->protection
= VM_PROT_DEFAULT
;
14180 submap_info
->max_protection
= VM_PROT_DEFAULT
;
14181 submap_info
->inheritance
= VM_INHERIT_DEFAULT
;
14182 submap_info
->offset
= 0;
14183 submap_info
->user_tag
= -1;
14184 submap_info
->pages_resident
= (unsigned int) (ledger_resident
/ effective_page_size
);
14185 submap_info
->pages_shared_now_private
= 0;
14186 submap_info
->pages_swapped_out
= (unsigned int) (ledger_compressed
/ effective_page_size
);
14187 submap_info
->pages_dirtied
= submap_info
->pages_resident
;
14188 submap_info
->ref_count
= 1;
14189 submap_info
->shadow_depth
= 0;
14190 submap_info
->external_pager
= 0;
14191 submap_info
->share_mode
= SM_PRIVATE
;
14192 if (submap_needed_copy
) {
14193 submap_info
->share_mode
= SM_COW
;
14195 submap_info
->is_submap
= 0;
14196 submap_info
->behavior
= VM_BEHAVIOR_DEFAULT
;
14197 submap_info
->object_id
= VM_OBJECT_ID_FAKE(map
, task_ledgers
.purgeable_nonvolatile
);
14198 submap_info
->user_wired_count
= 0;
14199 submap_info
->pages_reusable
= 0;
14201 short_info
->user_tag
= -1;
14202 short_info
->offset
= 0;
14203 short_info
->protection
= VM_PROT_DEFAULT
;
14204 short_info
->inheritance
= VM_INHERIT_DEFAULT
;
14205 short_info
->max_protection
= VM_PROT_DEFAULT
;
14206 short_info
->behavior
= VM_BEHAVIOR_DEFAULT
;
14207 short_info
->user_wired_count
= 0;
14208 short_info
->is_submap
= 0;
14209 short_info
->object_id
= VM_OBJECT_ID_FAKE(map
, task_ledgers
.purgeable_nonvolatile
);
14210 short_info
->external_pager
= 0;
14211 short_info
->shadow_depth
= 0;
14212 short_info
->share_mode
= SM_PRIVATE
;
14213 if (submap_needed_copy
) {
14214 short_info
->share_mode
= SM_COW
;
14216 short_info
->ref_count
= 1;
14218 *nesting_depth
= 0;
14219 *size
= (vm_map_size_t
) (ledger_resident
+ ledger_compressed
);
14220 // *address = user_address;
14221 *address
= vm_map_last_entry(map
)->vme_end
;
14222 return KERN_SUCCESS
;
14225 if (next_entry
== NULL
) {
14226 /* ... and no VM region follows it either */
14227 return KERN_INVALID_ADDRESS
;
14229 /* ... gather info about the next VM region */
14230 curr_entry
= next_entry
;
14231 curr_map
= next_map
; /* still locked ... */
14232 curr_address
= next_address
;
14233 curr_skip
= next_skip
;
14234 curr_offset
= next_offset
;
14235 curr_depth
= next_depth
;
14236 curr_max_above
= next_max_above
;
14237 curr_max_below
= next_max_below
;
14239 /* we won't need "next_entry" after all */
14240 if (next_entry
!= NULL
) {
14241 /* release "next_map" */
14242 if (next_map
!= curr_map
&& not_in_kdp
) {
14243 vm_map_unlock_read(next_map
);
14252 next_max_below
= -1;
14253 next_max_above
= -1;
14255 if (curr_entry
->is_sub_map
&&
14256 curr_depth
< user_max_depth
) {
14258 * We're not as deep as we could be: we must have
14259 * gone back up after not finding anything mapped
14260 * below the original top-level map entry's.
14261 * Let's move "curr_address" forward and recurse again.
14263 user_address
= curr_address
;
14264 goto recurse_again
;
14267 *nesting_depth
= curr_depth
;
14268 *size
= curr_max_above
+ curr_max_below
;
14269 *address
= user_address
+ curr_skip
- curr_max_below
;
14271 if (look_for_pages
) {
14272 submap_info
->user_tag
= VME_ALIAS(curr_entry
);
14273 submap_info
->offset
= VME_OFFSET(curr_entry
);
14274 submap_info
->protection
= curr_entry
->protection
;
14275 submap_info
->inheritance
= curr_entry
->inheritance
;
14276 submap_info
->max_protection
= curr_entry
->max_protection
;
14277 submap_info
->behavior
= curr_entry
->behavior
;
14278 submap_info
->user_wired_count
= curr_entry
->user_wired_count
;
14279 submap_info
->is_submap
= curr_entry
->is_sub_map
;
14280 submap_info
->object_id
= VM_OBJECT_ID(VME_OBJECT(curr_entry
));
14282 short_info
->user_tag
= VME_ALIAS(curr_entry
);
14283 short_info
->offset
= VME_OFFSET(curr_entry
);
14284 short_info
->protection
= curr_entry
->protection
;
14285 short_info
->inheritance
= curr_entry
->inheritance
;
14286 short_info
->max_protection
= curr_entry
->max_protection
;
14287 short_info
->behavior
= curr_entry
->behavior
;
14288 short_info
->user_wired_count
= curr_entry
->user_wired_count
;
14289 short_info
->is_submap
= curr_entry
->is_sub_map
;
14290 short_info
->object_id
= VM_OBJECT_ID(VME_OBJECT(curr_entry
));
14293 extended
.pages_resident
= 0;
14294 extended
.pages_swapped_out
= 0;
14295 extended
.pages_shared_now_private
= 0;
14296 extended
.pages_dirtied
= 0;
14297 extended
.pages_reusable
= 0;
14298 extended
.external_pager
= 0;
14299 extended
.shadow_depth
= 0;
14300 extended
.share_mode
= SM_EMPTY
;
14301 extended
.ref_count
= 0;
14304 if (!curr_entry
->is_sub_map
) {
14305 vm_map_offset_t range_start
, range_end
;
14306 range_start
= MAX((curr_address
- curr_max_below
),
14307 curr_entry
->vme_start
);
14308 range_end
= MIN((curr_address
+ curr_max_above
),
14309 curr_entry
->vme_end
);
14310 vm_map_region_walk(curr_map
,
14313 (VME_OFFSET(curr_entry
) +
14315 curr_entry
->vme_start
)),
14316 range_end
- range_start
,
14318 look_for_pages
, VM_REGION_EXTENDED_INFO_COUNT
);
14319 if (extended
.external_pager
&&
14320 extended
.ref_count
== 2 &&
14321 extended
.share_mode
== SM_SHARED
) {
14322 extended
.share_mode
= SM_PRIVATE
;
14324 if (submap_needed_copy
) {
14325 extended
.share_mode
= SM_COW
;
14328 if (curr_entry
->use_pmap
) {
14329 extended
.share_mode
= SM_TRUESHARED
;
14331 extended
.share_mode
= SM_PRIVATE
;
14333 extended
.ref_count
= os_ref_get_count(&VME_SUBMAP(curr_entry
)->map_refcnt
);
14337 if (look_for_pages
) {
14338 submap_info
->pages_resident
= extended
.pages_resident
;
14339 submap_info
->pages_swapped_out
= extended
.pages_swapped_out
;
14340 submap_info
->pages_shared_now_private
=
14341 extended
.pages_shared_now_private
;
14342 submap_info
->pages_dirtied
= extended
.pages_dirtied
;
14343 submap_info
->external_pager
= extended
.external_pager
;
14344 submap_info
->shadow_depth
= extended
.shadow_depth
;
14345 submap_info
->share_mode
= extended
.share_mode
;
14346 submap_info
->ref_count
= extended
.ref_count
;
14348 if (original_count
>= VM_REGION_SUBMAP_INFO_V1_COUNT_64
) {
14349 submap_info
->pages_reusable
= extended
.pages_reusable
;
14351 if (original_count
>= VM_REGION_SUBMAP_INFO_V2_COUNT_64
) {
14352 submap_info
->object_id_full
= (vm_object_id_t
) (VME_OBJECT(curr_entry
) != NULL
) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry
)) : 0ULL;
14355 short_info
->external_pager
= extended
.external_pager
;
14356 short_info
->shadow_depth
= extended
.shadow_depth
;
14357 short_info
->share_mode
= extended
.share_mode
;
14358 short_info
->ref_count
= extended
.ref_count
;
14362 vm_map_unlock_read(curr_map
);
14365 return KERN_SUCCESS
;
14371 * User call to obtain information about a region in
14372 * a task's address map. Currently, only one flavor is
14375 * XXX The reserved and behavior fields cannot be filled
14376 * in until the vm merge from the IK is completed, and
14377 * vm_reserve is implemented.
14383 vm_map_offset_t
*address
, /* IN/OUT */
14384 vm_map_size_t
*size
, /* OUT */
14385 vm_region_flavor_t flavor
, /* IN */
14386 vm_region_info_t info
, /* OUT */
14387 mach_msg_type_number_t
*count
, /* IN/OUT */
14388 mach_port_t
*object_name
) /* OUT */
14390 vm_map_entry_t tmp_entry
;
14391 vm_map_entry_t entry
;
14392 vm_map_offset_t start
;
14394 if (map
== VM_MAP_NULL
) {
14395 return KERN_INVALID_ARGUMENT
;
14399 case VM_REGION_BASIC_INFO
:
14400 /* legacy for old 32-bit objects info */
14402 vm_region_basic_info_t basic
;
14404 if (*count
< VM_REGION_BASIC_INFO_COUNT
) {
14405 return KERN_INVALID_ARGUMENT
;
14408 basic
= (vm_region_basic_info_t
) info
;
14409 *count
= VM_REGION_BASIC_INFO_COUNT
;
14411 vm_map_lock_read(map
);
14414 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
14415 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
14416 vm_map_unlock_read(map
);
14417 return KERN_INVALID_ADDRESS
;
14423 start
= entry
->vme_start
;
14425 basic
->offset
= (uint32_t)VME_OFFSET(entry
);
14426 basic
->protection
= entry
->protection
;
14427 basic
->inheritance
= entry
->inheritance
;
14428 basic
->max_protection
= entry
->max_protection
;
14429 basic
->behavior
= entry
->behavior
;
14430 basic
->user_wired_count
= entry
->user_wired_count
;
14431 basic
->reserved
= entry
->is_sub_map
;
14433 *size
= (entry
->vme_end
- start
);
14436 *object_name
= IP_NULL
;
14438 if (entry
->is_sub_map
) {
14439 basic
->shared
= FALSE
;
14441 basic
->shared
= entry
->is_shared
;
14444 vm_map_unlock_read(map
);
14445 return KERN_SUCCESS
;
14448 case VM_REGION_BASIC_INFO_64
:
14450 vm_region_basic_info_64_t basic
;
14452 if (*count
< VM_REGION_BASIC_INFO_COUNT_64
) {
14453 return KERN_INVALID_ARGUMENT
;
14456 basic
= (vm_region_basic_info_64_t
) info
;
14457 *count
= VM_REGION_BASIC_INFO_COUNT_64
;
14459 vm_map_lock_read(map
);
14462 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
14463 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
14464 vm_map_unlock_read(map
);
14465 return KERN_INVALID_ADDRESS
;
14471 start
= entry
->vme_start
;
14473 basic
->offset
= VME_OFFSET(entry
);
14474 basic
->protection
= entry
->protection
;
14475 basic
->inheritance
= entry
->inheritance
;
14476 basic
->max_protection
= entry
->max_protection
;
14477 basic
->behavior
= entry
->behavior
;
14478 basic
->user_wired_count
= entry
->user_wired_count
;
14479 basic
->reserved
= entry
->is_sub_map
;
14481 *size
= (entry
->vme_end
- start
);
14484 *object_name
= IP_NULL
;
14486 if (entry
->is_sub_map
) {
14487 basic
->shared
= FALSE
;
14489 basic
->shared
= entry
->is_shared
;
14492 vm_map_unlock_read(map
);
14493 return KERN_SUCCESS
;
14495 case VM_REGION_EXTENDED_INFO
:
14496 if (*count
< VM_REGION_EXTENDED_INFO_COUNT
) {
14497 return KERN_INVALID_ARGUMENT
;
14500 case VM_REGION_EXTENDED_INFO__legacy
:
14501 if (*count
< VM_REGION_EXTENDED_INFO_COUNT__legacy
) {
14502 return KERN_INVALID_ARGUMENT
;
14506 vm_region_extended_info_t extended
;
14507 mach_msg_type_number_t original_count
;
14508 int effective_page_size
, effective_page_shift
;
14510 extended
= (vm_region_extended_info_t
) info
;
14512 effective_page_shift
= vm_self_region_page_shift(map
);
14513 effective_page_size
= (1 << effective_page_shift
);
14515 vm_map_lock_read(map
);
14518 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
14519 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
14520 vm_map_unlock_read(map
);
14521 return KERN_INVALID_ADDRESS
;
14526 start
= entry
->vme_start
;
14528 extended
->protection
= entry
->protection
;
14529 extended
->user_tag
= VME_ALIAS(entry
);
14530 extended
->pages_resident
= 0;
14531 extended
->pages_swapped_out
= 0;
14532 extended
->pages_shared_now_private
= 0;
14533 extended
->pages_dirtied
= 0;
14534 extended
->external_pager
= 0;
14535 extended
->shadow_depth
= 0;
14537 original_count
= *count
;
14538 if (flavor
== VM_REGION_EXTENDED_INFO__legacy
) {
14539 *count
= VM_REGION_EXTENDED_INFO_COUNT__legacy
;
14541 extended
->pages_reusable
= 0;
14542 *count
= VM_REGION_EXTENDED_INFO_COUNT
;
14545 vm_map_region_walk(map
, start
, entry
, VME_OFFSET(entry
), entry
->vme_end
- start
, extended
, TRUE
, *count
);
14547 if (extended
->external_pager
&& extended
->ref_count
== 2 && extended
->share_mode
== SM_SHARED
) {
14548 extended
->share_mode
= SM_PRIVATE
;
14552 *object_name
= IP_NULL
;
14555 *size
= (entry
->vme_end
- start
);
14557 vm_map_unlock_read(map
);
14558 return KERN_SUCCESS
;
14560 case VM_REGION_TOP_INFO
:
14562 vm_region_top_info_t top
;
14564 if (*count
< VM_REGION_TOP_INFO_COUNT
) {
14565 return KERN_INVALID_ARGUMENT
;
14568 top
= (vm_region_top_info_t
) info
;
14569 *count
= VM_REGION_TOP_INFO_COUNT
;
14571 vm_map_lock_read(map
);
14574 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
14575 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
14576 vm_map_unlock_read(map
);
14577 return KERN_INVALID_ADDRESS
;
14582 start
= entry
->vme_start
;
14584 top
->private_pages_resident
= 0;
14585 top
->shared_pages_resident
= 0;
14587 vm_map_region_top_walk(entry
, top
);
14590 *object_name
= IP_NULL
;
14593 *size
= (entry
->vme_end
- start
);
14595 vm_map_unlock_read(map
);
14596 return KERN_SUCCESS
;
14599 return KERN_INVALID_ARGUMENT
;
14603 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
14604 MIN((entry_size), \
14605 ((obj)->all_reusable ? \
14606 (obj)->wired_page_count : \
14607 (obj)->resident_page_count - (obj)->reusable_page_count))
14610 vm_map_region_top_walk(
14611 vm_map_entry_t entry
,
14612 vm_region_top_info_t top
)
14614 if (VME_OBJECT(entry
) == 0 || entry
->is_sub_map
) {
14615 top
->share_mode
= SM_EMPTY
;
14616 top
->ref_count
= 0;
14622 struct vm_object
*obj
, *tmp_obj
;
14624 uint32_t entry_size
;
14626 entry_size
= (uint32_t) ((entry
->vme_end
- entry
->vme_start
) / PAGE_SIZE_64
);
14628 obj
= VME_OBJECT(entry
);
14630 vm_object_lock(obj
);
14632 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14636 assert(obj
->reusable_page_count
<= obj
->resident_page_count
);
14638 if (ref_count
== 1) {
14639 top
->private_pages_resident
=
14640 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14642 top
->shared_pages_resident
=
14643 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14645 top
->ref_count
= ref_count
;
14646 top
->share_mode
= SM_COW
;
14648 while ((tmp_obj
= obj
->shadow
)) {
14649 vm_object_lock(tmp_obj
);
14650 vm_object_unlock(obj
);
14653 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14657 assert(obj
->reusable_page_count
<= obj
->resident_page_count
);
14658 top
->shared_pages_resident
+=
14659 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14660 top
->ref_count
+= ref_count
- 1;
14663 if (entry
->superpage_size
) {
14664 top
->share_mode
= SM_LARGE_PAGE
;
14665 top
->shared_pages_resident
= 0;
14666 top
->private_pages_resident
= entry_size
;
14667 } else if (entry
->needs_copy
) {
14668 top
->share_mode
= SM_COW
;
14669 top
->shared_pages_resident
=
14670 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14672 if (ref_count
== 1 ||
14673 (ref_count
== 2 && obj
->named
)) {
14674 top
->share_mode
= SM_PRIVATE
;
14675 top
->private_pages_resident
=
14676 OBJ_RESIDENT_COUNT(obj
,
14679 top
->share_mode
= SM_SHARED
;
14680 top
->shared_pages_resident
=
14681 OBJ_RESIDENT_COUNT(obj
,
14685 top
->ref_count
= ref_count
;
14687 /* XXX K64: obj_id will be truncated */
14688 top
->obj_id
= (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj
);
14690 vm_object_unlock(obj
);
14695 vm_map_region_walk(
14697 vm_map_offset_t va
,
14698 vm_map_entry_t entry
,
14699 vm_object_offset_t offset
,
14700 vm_object_size_t range
,
14701 vm_region_extended_info_t extended
,
14702 boolean_t look_for_pages
,
14703 mach_msg_type_number_t count
)
14705 struct vm_object
*obj
, *tmp_obj
;
14706 vm_map_offset_t last_offset
;
14709 struct vm_object
*shadow_object
;
14710 unsigned short shadow_depth
;
14711 boolean_t do_region_footprint
;
14712 int effective_page_size
, effective_page_shift
;
14713 vm_map_offset_t effective_page_mask
;
14715 do_region_footprint
= task_self_region_footprint();
14717 if ((VME_OBJECT(entry
) == 0) ||
14718 (entry
->is_sub_map
) ||
14719 (VME_OBJECT(entry
)->phys_contiguous
&&
14720 !entry
->superpage_size
)) {
14721 extended
->share_mode
= SM_EMPTY
;
14722 extended
->ref_count
= 0;
14726 if (entry
->superpage_size
) {
14727 extended
->shadow_depth
= 0;
14728 extended
->share_mode
= SM_LARGE_PAGE
;
14729 extended
->ref_count
= 1;
14730 extended
->external_pager
= 0;
14732 /* TODO4K: Superpage in 4k mode? */
14733 extended
->pages_resident
= (unsigned int)(range
>> PAGE_SHIFT
);
14734 extended
->shadow_depth
= 0;
14738 effective_page_shift
= vm_self_region_page_shift(map
);
14739 effective_page_size
= (1 << effective_page_shift
);
14740 effective_page_mask
= effective_page_size
- 1;
14742 offset
= vm_map_trunc_page(offset
, effective_page_mask
);
14744 obj
= VME_OBJECT(entry
);
14746 vm_object_lock(obj
);
14748 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14752 if (look_for_pages
) {
14753 for (last_offset
= offset
+ range
;
14754 offset
< last_offset
;
14755 offset
+= effective_page_size
, va
+= effective_page_size
) {
14756 if (do_region_footprint
) {
14760 if (map
->has_corpse_footprint
) {
14762 * Query the page info data we saved
14763 * while forking the corpse.
14765 vm_map_corpse_footprint_query_page_info(
14773 vm_map_footprint_query_page_info(
14779 if (disp
& VM_PAGE_QUERY_PAGE_PRESENT
) {
14780 extended
->pages_resident
++;
14782 if (disp
& VM_PAGE_QUERY_PAGE_REUSABLE
) {
14783 extended
->pages_reusable
++;
14785 if (disp
& VM_PAGE_QUERY_PAGE_DIRTY
) {
14786 extended
->pages_dirtied
++;
14788 if (disp
& PMAP_QUERY_PAGE_COMPRESSED
) {
14789 extended
->pages_swapped_out
++;
14794 vm_map_region_look_for_page(map
, va
, obj
,
14795 vm_object_trunc_page(offset
), ref_count
,
14796 0, extended
, count
);
14799 if (do_region_footprint
) {
14800 goto collect_object_info
;
14803 collect_object_info
:
14804 shadow_object
= obj
->shadow
;
14807 if (!(obj
->internal
)) {
14808 extended
->external_pager
= 1;
14811 if (shadow_object
!= VM_OBJECT_NULL
) {
14812 vm_object_lock(shadow_object
);
14814 shadow_object
!= VM_OBJECT_NULL
;
14816 vm_object_t next_shadow
;
14818 if (!(shadow_object
->internal
)) {
14819 extended
->external_pager
= 1;
14822 next_shadow
= shadow_object
->shadow
;
14824 vm_object_lock(next_shadow
);
14826 vm_object_unlock(shadow_object
);
14827 shadow_object
= next_shadow
;
14830 extended
->shadow_depth
= shadow_depth
;
14833 if (extended
->shadow_depth
|| entry
->needs_copy
) {
14834 extended
->share_mode
= SM_COW
;
14836 if (ref_count
== 1) {
14837 extended
->share_mode
= SM_PRIVATE
;
14839 if (obj
->true_share
) {
14840 extended
->share_mode
= SM_TRUESHARED
;
14842 extended
->share_mode
= SM_SHARED
;
14846 extended
->ref_count
= ref_count
- extended
->shadow_depth
;
14848 for (i
= 0; i
< extended
->shadow_depth
; i
++) {
14849 if ((tmp_obj
= obj
->shadow
) == 0) {
14852 vm_object_lock(tmp_obj
);
14853 vm_object_unlock(obj
);
14855 if ((ref_count
= tmp_obj
->ref_count
) > 1 && tmp_obj
->paging_in_progress
) {
14859 extended
->ref_count
+= ref_count
;
14862 vm_object_unlock(obj
);
14864 if (extended
->share_mode
== SM_SHARED
) {
14865 vm_map_entry_t cur
;
14866 vm_map_entry_t last
;
14869 obj
= VME_OBJECT(entry
);
14870 last
= vm_map_to_entry(map
);
14873 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14876 for (cur
= vm_map_first_entry(map
); cur
!= last
; cur
= cur
->vme_next
) {
14877 my_refs
+= vm_map_region_count_obj_refs(cur
, obj
);
14880 if (my_refs
== ref_count
) {
14881 extended
->share_mode
= SM_PRIVATE_ALIASED
;
14882 } else if (my_refs
> 1) {
14883 extended
->share_mode
= SM_SHARED_ALIASED
;
14889 /* object is locked on entry and locked on return */
14893 vm_map_region_look_for_page(
14894 __unused vm_map_t map
,
14895 __unused vm_map_offset_t va
,
14896 vm_object_t object
,
14897 vm_object_offset_t offset
,
14899 unsigned short depth
,
14900 vm_region_extended_info_t extended
,
14901 mach_msg_type_number_t count
)
14904 vm_object_t shadow
;
14906 vm_object_t caller_object
;
14908 shadow
= object
->shadow
;
14909 caller_object
= object
;
14913 if (!(object
->internal
)) {
14914 extended
->external_pager
= 1;
14917 if ((p
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
14918 if (shadow
&& (max_refcnt
== 1)) {
14919 extended
->pages_shared_now_private
++;
14922 if (!p
->vmp_fictitious
&&
14923 (p
->vmp_dirty
|| pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p
)))) {
14924 extended
->pages_dirtied
++;
14925 } else if (count
>= VM_REGION_EXTENDED_INFO_COUNT
) {
14926 if (p
->vmp_reusable
|| object
->all_reusable
) {
14927 extended
->pages_reusable
++;
14931 extended
->pages_resident
++;
14933 if (object
!= caller_object
) {
14934 vm_object_unlock(object
);
14939 if (object
->internal
&&
14941 !object
->terminating
&&
14942 object
->pager_ready
) {
14943 if (VM_COMPRESSOR_PAGER_STATE_GET(object
, offset
)
14944 == VM_EXTERNAL_STATE_EXISTS
) {
14945 /* the pager has that page */
14946 extended
->pages_swapped_out
++;
14947 if (object
!= caller_object
) {
14948 vm_object_unlock(object
);
14955 vm_object_lock(shadow
);
14957 if ((ref_count
= shadow
->ref_count
) > 1 && shadow
->paging_in_progress
) {
14961 if (++depth
> extended
->shadow_depth
) {
14962 extended
->shadow_depth
= depth
;
14965 if (ref_count
> max_refcnt
) {
14966 max_refcnt
= ref_count
;
14969 if (object
!= caller_object
) {
14970 vm_object_unlock(object
);
14973 offset
= offset
+ object
->vo_shadow_offset
;
14975 shadow
= object
->shadow
;
14978 if (object
!= caller_object
) {
14979 vm_object_unlock(object
);
14986 vm_map_region_count_obj_refs(
14987 vm_map_entry_t entry
,
14988 vm_object_t object
)
14991 vm_object_t chk_obj
;
14992 vm_object_t tmp_obj
;
14994 if (VME_OBJECT(entry
) == 0) {
14998 if (entry
->is_sub_map
) {
15003 chk_obj
= VME_OBJECT(entry
);
15004 vm_object_lock(chk_obj
);
15007 if (chk_obj
== object
) {
15010 tmp_obj
= chk_obj
->shadow
;
15012 vm_object_lock(tmp_obj
);
15014 vm_object_unlock(chk_obj
);
15024 * Routine: vm_map_simplify
15027 * Attempt to simplify the map representation in
15028 * the vicinity of the given starting address.
15030 * This routine is intended primarily to keep the
15031 * kernel maps more compact -- they generally don't
15032 * benefit from the "expand a map entry" technology
15033 * at allocation time because the adjacent entry
15034 * is often wired down.
15037 vm_map_simplify_entry(
15039 vm_map_entry_t this_entry
)
15041 vm_map_entry_t prev_entry
;
15043 prev_entry
= this_entry
->vme_prev
;
15045 if ((this_entry
!= vm_map_to_entry(map
)) &&
15046 (prev_entry
!= vm_map_to_entry(map
)) &&
15048 (prev_entry
->vme_end
== this_entry
->vme_start
) &&
15050 (prev_entry
->is_sub_map
== this_entry
->is_sub_map
) &&
15051 (VME_OBJECT(prev_entry
) == VME_OBJECT(this_entry
)) &&
15052 ((VME_OFFSET(prev_entry
) + (prev_entry
->vme_end
-
15053 prev_entry
->vme_start
))
15054 == VME_OFFSET(this_entry
)) &&
15056 (prev_entry
->behavior
== this_entry
->behavior
) &&
15057 (prev_entry
->needs_copy
== this_entry
->needs_copy
) &&
15058 (prev_entry
->protection
== this_entry
->protection
) &&
15059 (prev_entry
->max_protection
== this_entry
->max_protection
) &&
15060 (prev_entry
->inheritance
== this_entry
->inheritance
) &&
15061 (prev_entry
->use_pmap
== this_entry
->use_pmap
) &&
15062 (VME_ALIAS(prev_entry
) == VME_ALIAS(this_entry
)) &&
15063 (prev_entry
->no_cache
== this_entry
->no_cache
) &&
15064 (prev_entry
->permanent
== this_entry
->permanent
) &&
15065 (prev_entry
->map_aligned
== this_entry
->map_aligned
) &&
15066 (prev_entry
->zero_wired_pages
== this_entry
->zero_wired_pages
) &&
15067 (prev_entry
->used_for_jit
== this_entry
->used_for_jit
) &&
15068 (prev_entry
->pmap_cs_associated
== this_entry
->pmap_cs_associated
) &&
15069 /* from_reserved_zone: OK if that field doesn't match */
15070 (prev_entry
->iokit_acct
== this_entry
->iokit_acct
) &&
15071 (prev_entry
->vme_resilient_codesign
==
15072 this_entry
->vme_resilient_codesign
) &&
15073 (prev_entry
->vme_resilient_media
==
15074 this_entry
->vme_resilient_media
) &&
15075 (prev_entry
->vme_no_copy_on_read
== this_entry
->vme_no_copy_on_read
) &&
15077 (prev_entry
->wired_count
== this_entry
->wired_count
) &&
15078 (prev_entry
->user_wired_count
== this_entry
->user_wired_count
) &&
15080 ((prev_entry
->vme_atomic
== FALSE
) && (this_entry
->vme_atomic
== FALSE
)) &&
15081 (prev_entry
->in_transition
== FALSE
) &&
15082 (this_entry
->in_transition
== FALSE
) &&
15083 (prev_entry
->needs_wakeup
== FALSE
) &&
15084 (this_entry
->needs_wakeup
== FALSE
) &&
15085 (prev_entry
->is_shared
== this_entry
->is_shared
) &&
15086 (prev_entry
->superpage_size
== FALSE
) &&
15087 (this_entry
->superpage_size
== FALSE
)
15089 vm_map_store_entry_unlink(map
, prev_entry
);
15090 assert(prev_entry
->vme_start
< this_entry
->vme_end
);
15091 if (prev_entry
->map_aligned
) {
15092 assert(VM_MAP_PAGE_ALIGNED(prev_entry
->vme_start
,
15093 VM_MAP_PAGE_MASK(map
)));
15095 this_entry
->vme_start
= prev_entry
->vme_start
;
15096 VME_OFFSET_SET(this_entry
, VME_OFFSET(prev_entry
));
15098 if (map
->holelistenabled
) {
15099 vm_map_store_update_first_free(map
, this_entry
, TRUE
);
15102 if (prev_entry
->is_sub_map
) {
15103 vm_map_deallocate(VME_SUBMAP(prev_entry
));
15105 vm_object_deallocate(VME_OBJECT(prev_entry
));
15107 vm_map_entry_dispose(map
, prev_entry
);
15108 SAVE_HINT_MAP_WRITE(map
, this_entry
);
15115 vm_map_offset_t start
)
15117 vm_map_entry_t this_entry
;
15120 if (vm_map_lookup_entry(map
, start
, &this_entry
)) {
15121 vm_map_simplify_entry(map
, this_entry
);
15122 vm_map_simplify_entry(map
, this_entry
->vme_next
);
15124 vm_map_unlock(map
);
15128 vm_map_simplify_range(
15130 vm_map_offset_t start
,
15131 vm_map_offset_t end
)
15133 vm_map_entry_t entry
;
15136 * The map should be locked (for "write") by the caller.
15139 if (start
>= end
) {
15140 /* invalid address range */
15144 start
= vm_map_trunc_page(start
,
15145 VM_MAP_PAGE_MASK(map
));
15146 end
= vm_map_round_page(end
,
15147 VM_MAP_PAGE_MASK(map
));
15149 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
15150 /* "start" is not mapped and "entry" ends before "start" */
15151 if (entry
== vm_map_to_entry(map
)) {
15152 /* start with first entry in the map */
15153 entry
= vm_map_first_entry(map
);
15155 /* start with next entry */
15156 entry
= entry
->vme_next
;
15160 while (entry
!= vm_map_to_entry(map
) &&
15161 entry
->vme_start
<= end
) {
15162 /* try and coalesce "entry" with its previous entry */
15163 vm_map_simplify_entry(map
, entry
);
15164 entry
= entry
->vme_next
;
15170 * Routine: vm_map_machine_attribute
15172 * Provide machine-specific attributes to mappings,
15173 * such as cachability etc. for machines that provide
15174 * them. NUMA architectures and machines with big/strange
15175 * caches will use this.
15177 * Responsibilities for locking and checking are handled here,
15178 * everything else in the pmap module. If any non-volatile
15179 * information must be kept, the pmap module should handle
15180 * it itself. [This assumes that attributes do not
15181 * need to be inherited, which seems ok to me]
15184 vm_map_machine_attribute(
15186 vm_map_offset_t start
,
15187 vm_map_offset_t end
,
15188 vm_machine_attribute_t attribute
,
15189 vm_machine_attribute_val_t
* value
) /* IN/OUT */
15192 vm_map_size_t sync_size
;
15193 vm_map_entry_t entry
;
15195 if (start
< vm_map_min(map
) || end
> vm_map_max(map
)) {
15196 return KERN_INVALID_ADDRESS
;
15199 /* Figure how much memory we need to flush (in page increments) */
15200 sync_size
= end
- start
;
15204 if (attribute
!= MATTR_CACHE
) {
15205 /* If we don't have to find physical addresses, we */
15206 /* don't have to do an explicit traversal here. */
15207 ret
= pmap_attribute(map
->pmap
, start
, end
- start
,
15209 vm_map_unlock(map
);
15213 ret
= KERN_SUCCESS
; /* Assume it all worked */
15215 while (sync_size
) {
15216 if (vm_map_lookup_entry(map
, start
, &entry
)) {
15217 vm_map_size_t sub_size
;
15218 if ((entry
->vme_end
- start
) > sync_size
) {
15219 sub_size
= sync_size
;
15222 sub_size
= entry
->vme_end
- start
;
15223 sync_size
-= sub_size
;
15225 if (entry
->is_sub_map
) {
15226 vm_map_offset_t sub_start
;
15227 vm_map_offset_t sub_end
;
15229 sub_start
= (start
- entry
->vme_start
)
15230 + VME_OFFSET(entry
);
15231 sub_end
= sub_start
+ sub_size
;
15232 vm_map_machine_attribute(
15238 if (VME_OBJECT(entry
)) {
15240 vm_object_t object
;
15241 vm_object_t base_object
;
15242 vm_object_t last_object
;
15243 vm_object_offset_t offset
;
15244 vm_object_offset_t base_offset
;
15245 vm_map_size_t range
;
15247 offset
= (start
- entry
->vme_start
)
15248 + VME_OFFSET(entry
);
15249 offset
= vm_object_trunc_page(offset
);
15250 base_offset
= offset
;
15251 object
= VME_OBJECT(entry
);
15252 base_object
= object
;
15253 last_object
= NULL
;
15255 vm_object_lock(object
);
15258 m
= vm_page_lookup(
15261 if (m
&& !m
->vmp_fictitious
) {
15263 pmap_attribute_cache_sync(
15264 VM_PAGE_GET_PHYS_PAGE(m
),
15267 } else if (object
->shadow
) {
15268 offset
= offset
+ object
->vo_shadow_offset
;
15269 last_object
= object
;
15270 object
= object
->shadow
;
15271 vm_object_lock(last_object
->shadow
);
15272 vm_object_unlock(last_object
);
15275 if (range
< PAGE_SIZE
) {
15278 range
-= PAGE_SIZE
;
15281 if (base_object
!= object
) {
15282 vm_object_unlock(object
);
15283 vm_object_lock(base_object
);
15284 object
= base_object
;
15286 /* Bump to the next page */
15287 base_offset
+= PAGE_SIZE
;
15288 offset
= base_offset
;
15290 vm_object_unlock(object
);
15295 vm_map_unlock(map
);
15296 return KERN_FAILURE
;
15300 vm_map_unlock(map
);
15306 * vm_map_behavior_set:
15308 * Sets the paging reference behavior of the specified address
15309 * range in the target map. Paging reference behavior affects
15310 * how pagein operations resulting from faults on the map will be
15314 vm_map_behavior_set(
15316 vm_map_offset_t start
,
15317 vm_map_offset_t end
,
15318 vm_behavior_t new_behavior
)
15320 vm_map_entry_t entry
;
15321 vm_map_entry_t temp_entry
;
15324 start
< vm_map_min(map
) ||
15325 end
> vm_map_max(map
)) {
15326 return KERN_NO_SPACE
;
15329 switch (new_behavior
) {
15331 * This first block of behaviors all set a persistent state on the specified
15332 * memory range. All we have to do here is to record the desired behavior
15333 * in the vm_map_entry_t's.
15336 case VM_BEHAVIOR_DEFAULT
:
15337 case VM_BEHAVIOR_RANDOM
:
15338 case VM_BEHAVIOR_SEQUENTIAL
:
15339 case VM_BEHAVIOR_RSEQNTL
:
15340 case VM_BEHAVIOR_ZERO_WIRED_PAGES
:
15344 * The entire address range must be valid for the map.
15345 * Note that vm_map_range_check() does a
15346 * vm_map_lookup_entry() internally and returns the
15347 * entry containing the start of the address range if
15348 * the entire range is valid.
15350 if (vm_map_range_check(map
, start
, end
, &temp_entry
)) {
15351 entry
= temp_entry
;
15352 vm_map_clip_start(map
, entry
, start
);
15354 vm_map_unlock(map
);
15355 return KERN_INVALID_ADDRESS
;
15358 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
15359 vm_map_clip_end(map
, entry
, end
);
15360 if (entry
->is_sub_map
) {
15361 assert(!entry
->use_pmap
);
15364 if (new_behavior
== VM_BEHAVIOR_ZERO_WIRED_PAGES
) {
15365 entry
->zero_wired_pages
= TRUE
;
15367 entry
->behavior
= new_behavior
;
15369 entry
= entry
->vme_next
;
15372 vm_map_unlock(map
);
15376 * The rest of these are different from the above in that they cause
15377 * an immediate action to take place as opposed to setting a behavior that
15378 * affects future actions.
15381 case VM_BEHAVIOR_WILLNEED
:
15382 return vm_map_willneed(map
, start
, end
);
15384 case VM_BEHAVIOR_DONTNEED
:
15385 return vm_map_msync(map
, start
, end
- start
, VM_SYNC_DEACTIVATE
| VM_SYNC_CONTIGUOUS
);
15387 case VM_BEHAVIOR_FREE
:
15388 return vm_map_msync(map
, start
, end
- start
, VM_SYNC_KILLPAGES
| VM_SYNC_CONTIGUOUS
);
15390 case VM_BEHAVIOR_REUSABLE
:
15391 return vm_map_reusable_pages(map
, start
, end
);
15393 case VM_BEHAVIOR_REUSE
:
15394 return vm_map_reuse_pages(map
, start
, end
);
15396 case VM_BEHAVIOR_CAN_REUSE
:
15397 return vm_map_can_reuse(map
, start
, end
);
15400 case VM_BEHAVIOR_PAGEOUT
:
15401 return vm_map_pageout(map
, start
, end
);
15402 #endif /* MACH_ASSERT */
15405 return KERN_INVALID_ARGUMENT
;
15408 return KERN_SUCCESS
;
15413 * Internals for madvise(MADV_WILLNEED) system call.
15415 * The implementation is to do:-
15416 * a) read-ahead if the mapping corresponds to a mapped regular file
15417 * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
15421 static kern_return_t
15424 vm_map_offset_t start
,
15425 vm_map_offset_t end
15428 vm_map_entry_t entry
;
15429 vm_object_t object
;
15430 memory_object_t pager
;
15431 struct vm_object_fault_info fault_info
= {};
15433 vm_object_size_t len
;
15434 vm_object_offset_t offset
;
15436 fault_info
.interruptible
= THREAD_UNINT
; /* ignored value */
15437 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
15438 fault_info
.stealth
= TRUE
;
15441 * The MADV_WILLNEED operation doesn't require any changes to the
15442 * vm_map_entry_t's, so the read lock is sufficient.
15445 vm_map_lock_read(map
);
15448 * The madvise semantics require that the address range be fully
15449 * allocated with no holes. Otherwise, we're required to return
15453 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15454 vm_map_unlock_read(map
);
15455 return KERN_INVALID_ADDRESS
;
15459 * Examine each vm_map_entry_t in the range.
15461 for (; entry
!= vm_map_to_entry(map
) && start
< end
;) {
15463 * The first time through, the start address could be anywhere
15464 * within the vm_map_entry we found. So adjust the offset to
15465 * correspond. After that, the offset will always be zero to
15466 * correspond to the beginning of the current vm_map_entry.
15468 offset
= (start
- entry
->vme_start
) + VME_OFFSET(entry
);
15471 * Set the length so we don't go beyond the end of the
15472 * map_entry or beyond the end of the range we were given.
15473 * This range could span also multiple map entries all of which
15474 * map different files, so make sure we only do the right amount
15475 * of I/O for each object. Note that it's possible for there
15476 * to be multiple map entries all referring to the same object
15477 * but with different page permissions, but it's not worth
15478 * trying to optimize that case.
15480 len
= MIN(entry
->vme_end
- start
, end
- start
);
15482 if ((vm_size_t
) len
!= len
) {
15483 /* 32-bit overflow */
15484 len
= (vm_size_t
) (0 - PAGE_SIZE
);
15486 fault_info
.cluster_size
= (vm_size_t
) len
;
15487 fault_info
.lo_offset
= offset
;
15488 fault_info
.hi_offset
= offset
+ len
;
15489 fault_info
.user_tag
= VME_ALIAS(entry
);
15490 fault_info
.pmap_options
= 0;
15491 if (entry
->iokit_acct
||
15492 (!entry
->is_sub_map
&& !entry
->use_pmap
)) {
15493 fault_info
.pmap_options
|= PMAP_OPTIONS_ALT_ACCT
;
15497 * If the entry is a submap OR there's no read permission
15498 * to this mapping, then just skip it.
15500 if ((entry
->is_sub_map
) || (entry
->protection
& VM_PROT_READ
) == 0) {
15501 entry
= entry
->vme_next
;
15502 start
= entry
->vme_start
;
15506 object
= VME_OBJECT(entry
);
15508 if (object
== NULL
||
15509 (object
&& object
->internal
)) {
15511 * Memory range backed by anonymous memory.
15513 vm_size_t region_size
= 0, effective_page_size
= 0;
15514 vm_map_offset_t addr
= 0, effective_page_mask
= 0;
15519 effective_page_mask
= MIN(vm_map_page_mask(current_map()), PAGE_MASK
);
15520 effective_page_size
= effective_page_mask
+ 1;
15522 vm_map_unlock_read(map
);
15524 while (region_size
) {
15526 vm_map_trunc_page(addr
, effective_page_mask
),
15527 VM_PROT_READ
| VM_PROT_WRITE
);
15529 region_size
-= effective_page_size
;
15530 addr
+= effective_page_size
;
15534 * Find the file object backing this map entry. If there is
15535 * none, then we simply ignore the "will need" advice for this
15536 * entry and go on to the next one.
15538 if ((object
= find_vnode_object(entry
)) == VM_OBJECT_NULL
) {
15539 entry
= entry
->vme_next
;
15540 start
= entry
->vme_start
;
15544 vm_object_paging_begin(object
);
15545 pager
= object
->pager
;
15546 vm_object_unlock(object
);
15549 * The data_request() could take a long time, so let's
15550 * release the map lock to avoid blocking other threads.
15552 vm_map_unlock_read(map
);
15555 * Get the data from the object asynchronously.
15557 * Note that memory_object_data_request() places limits on the
15558 * amount of I/O it will do. Regardless of the len we
15559 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15560 * silently truncates the len to that size. This isn't
15561 * necessarily bad since madvise shouldn't really be used to
15562 * page in unlimited amounts of data. Other Unix variants
15563 * limit the willneed case as well. If this turns out to be an
15564 * issue for developers, then we can always adjust the policy
15565 * here and still be backwards compatible since this is all
15568 kr
= memory_object_data_request(
15570 vm_object_trunc_page(offset
) + object
->paging_offset
,
15573 (memory_object_fault_info_t
)&fault_info
);
15575 vm_object_lock(object
);
15576 vm_object_paging_end(object
);
15577 vm_object_unlock(object
);
15580 * If we couldn't do the I/O for some reason, just give up on
15581 * the madvise. We still return success to the user since
15582 * madvise isn't supposed to fail when the advice can't be
15586 if (kr
!= KERN_SUCCESS
) {
15587 return KERN_SUCCESS
;
15592 if (start
>= end
) {
15594 return KERN_SUCCESS
;
15597 /* look up next entry */
15598 vm_map_lock_read(map
);
15599 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
15601 * There's a new hole in the address range.
15603 vm_map_unlock_read(map
);
15604 return KERN_INVALID_ADDRESS
;
15608 vm_map_unlock_read(map
);
15609 return KERN_SUCCESS
;
15613 vm_map_entry_is_reusable(
15614 vm_map_entry_t entry
)
15616 /* Only user map entries */
15618 vm_object_t object
;
15620 if (entry
->is_sub_map
) {
15624 switch (VME_ALIAS(entry
)) {
15625 case VM_MEMORY_MALLOC
:
15626 case VM_MEMORY_MALLOC_SMALL
:
15627 case VM_MEMORY_MALLOC_LARGE
:
15628 case VM_MEMORY_REALLOC
:
15629 case VM_MEMORY_MALLOC_TINY
:
15630 case VM_MEMORY_MALLOC_LARGE_REUSABLE
:
15631 case VM_MEMORY_MALLOC_LARGE_REUSED
:
15633 * This is a malloc() memory region: check if it's still
15634 * in its original state and can be re-used for more
15635 * malloc() allocations.
15640 * Not a malloc() memory region: let the caller decide if
15646 if (/*entry->is_shared ||*/
15647 entry
->is_sub_map
||
15648 entry
->in_transition
||
15649 entry
->protection
!= VM_PROT_DEFAULT
||
15650 entry
->max_protection
!= VM_PROT_ALL
||
15651 entry
->inheritance
!= VM_INHERIT_DEFAULT
||
15653 entry
->permanent
||
15654 entry
->superpage_size
!= FALSE
||
15655 entry
->zero_wired_pages
||
15656 entry
->wired_count
!= 0 ||
15657 entry
->user_wired_count
!= 0) {
15661 object
= VME_OBJECT(entry
);
15662 if (object
== VM_OBJECT_NULL
) {
15668 * Let's proceed even if the VM object is potentially
15670 * We check for this later when processing the actual
15671 * VM pages, so the contents will be safe if shared.
15673 * But we can still mark this memory region as "reusable" to
15674 * acknowledge that the caller did let us know that the memory
15675 * could be re-used and should not be penalized for holding
15676 * on to it. This allows its "resident size" to not include
15677 * the reusable range.
15679 object
->ref_count
== 1 &&
15681 object
->wired_page_count
== 0 &&
15682 object
->copy
== VM_OBJECT_NULL
&&
15683 object
->shadow
== VM_OBJECT_NULL
&&
15684 object
->internal
&&
15685 object
->purgable
== VM_PURGABLE_DENY
&&
15686 object
->wimg_bits
== VM_WIMG_USE_DEFAULT
&&
15687 !object
->code_signed
) {
15693 static kern_return_t
15694 vm_map_reuse_pages(
15696 vm_map_offset_t start
,
15697 vm_map_offset_t end
)
15699 vm_map_entry_t entry
;
15700 vm_object_t object
;
15701 vm_object_offset_t start_offset
, end_offset
;
15704 * The MADV_REUSE operation doesn't require any changes to the
15705 * vm_map_entry_t's, so the read lock is sufficient.
15708 if (VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
) {
15711 * need to figure out what reusable means for a
15712 * portion of a native page.
15714 return KERN_SUCCESS
;
15717 vm_map_lock_read(map
);
15718 assert(map
->pmap
!= kernel_pmap
); /* protect alias access */
15721 * The madvise semantics require that the address range be fully
15722 * allocated with no holes. Otherwise, we're required to return
15726 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15727 vm_map_unlock_read(map
);
15728 vm_page_stats_reusable
.reuse_pages_failure
++;
15729 return KERN_INVALID_ADDRESS
;
15733 * Examine each vm_map_entry_t in the range.
15735 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
15736 entry
= entry
->vme_next
) {
15738 * Sanity check on the VM map entry.
15740 if (!vm_map_entry_is_reusable(entry
)) {
15741 vm_map_unlock_read(map
);
15742 vm_page_stats_reusable
.reuse_pages_failure
++;
15743 return KERN_INVALID_ADDRESS
;
15747 * The first time through, the start address could be anywhere
15748 * within the vm_map_entry we found. So adjust the offset to
15751 if (entry
->vme_start
< start
) {
15752 start_offset
= start
- entry
->vme_start
;
15756 end_offset
= MIN(end
, entry
->vme_end
) - entry
->vme_start
;
15757 start_offset
+= VME_OFFSET(entry
);
15758 end_offset
+= VME_OFFSET(entry
);
15760 assert(!entry
->is_sub_map
);
15761 object
= VME_OBJECT(entry
);
15762 if (object
!= VM_OBJECT_NULL
) {
15763 vm_object_lock(object
);
15764 vm_object_reuse_pages(object
, start_offset
, end_offset
,
15766 vm_object_unlock(object
);
15769 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC_LARGE_REUSABLE
) {
15772 * We do not hold the VM map exclusively here.
15773 * The "alias" field is not that critical, so it's
15774 * safe to update it here, as long as it is the only
15775 * one that can be modified while holding the VM map
15778 VME_ALIAS_SET(entry
, VM_MEMORY_MALLOC_LARGE_REUSED
);
15782 vm_map_unlock_read(map
);
15783 vm_page_stats_reusable
.reuse_pages_success
++;
15784 return KERN_SUCCESS
;
15788 static kern_return_t
15789 vm_map_reusable_pages(
15791 vm_map_offset_t start
,
15792 vm_map_offset_t end
)
15794 vm_map_entry_t entry
;
15795 vm_object_t object
;
15796 vm_object_offset_t start_offset
, end_offset
;
15797 vm_map_offset_t pmap_offset
;
15799 if (VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
) {
15802 * need to figure out what reusable means for a portion
15803 * of a native page.
15805 return KERN_SUCCESS
;
15809 * The MADV_REUSABLE operation doesn't require any changes to the
15810 * vm_map_entry_t's, so the read lock is sufficient.
15813 vm_map_lock_read(map
);
15814 assert(map
->pmap
!= kernel_pmap
); /* protect alias access */
15817 * The madvise semantics require that the address range be fully
15818 * allocated with no holes. Otherwise, we're required to return
15822 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15823 vm_map_unlock_read(map
);
15824 vm_page_stats_reusable
.reusable_pages_failure
++;
15825 return KERN_INVALID_ADDRESS
;
15829 * Examine each vm_map_entry_t in the range.
15831 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
15832 entry
= entry
->vme_next
) {
15833 int kill_pages
= 0;
15836 * Sanity check on the VM map entry.
15838 if (!vm_map_entry_is_reusable(entry
)) {
15839 vm_map_unlock_read(map
);
15840 vm_page_stats_reusable
.reusable_pages_failure
++;
15841 return KERN_INVALID_ADDRESS
;
15844 if (!(entry
->protection
& VM_PROT_WRITE
) && !entry
->used_for_jit
) {
15845 /* not writable: can't discard contents */
15846 vm_map_unlock_read(map
);
15847 vm_page_stats_reusable
.reusable_nonwritable
++;
15848 vm_page_stats_reusable
.reusable_pages_failure
++;
15849 return KERN_PROTECTION_FAILURE
;
15853 * The first time through, the start address could be anywhere
15854 * within the vm_map_entry we found. So adjust the offset to
15857 if (entry
->vme_start
< start
) {
15858 start_offset
= start
- entry
->vme_start
;
15859 pmap_offset
= start
;
15862 pmap_offset
= entry
->vme_start
;
15864 end_offset
= MIN(end
, entry
->vme_end
) - entry
->vme_start
;
15865 start_offset
+= VME_OFFSET(entry
);
15866 end_offset
+= VME_OFFSET(entry
);
15868 assert(!entry
->is_sub_map
);
15869 object
= VME_OBJECT(entry
);
15870 if (object
== VM_OBJECT_NULL
) {
15875 vm_object_lock(object
);
15876 if (((object
->ref_count
== 1) ||
15877 (object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
&&
15878 object
->copy
== VM_OBJECT_NULL
)) &&
15879 object
->shadow
== VM_OBJECT_NULL
&&
15881 * "iokit_acct" entries are billed for their virtual size
15882 * (rather than for their resident pages only), so they
15883 * wouldn't benefit from making pages reusable, and it
15884 * would be hard to keep track of pages that are both
15885 * "iokit_acct" and "reusable" in the pmap stats and
15888 !(entry
->iokit_acct
||
15889 (!entry
->is_sub_map
&& !entry
->use_pmap
))) {
15890 if (object
->ref_count
!= 1) {
15891 vm_page_stats_reusable
.reusable_shared
++;
15897 if (kill_pages
!= -1) {
15898 vm_object_deactivate_pages(object
,
15900 end_offset
- start_offset
,
15902 TRUE
/*reusable_pages*/,
15906 vm_page_stats_reusable
.reusable_pages_shared
++;
15908 vm_object_unlock(object
);
15910 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC_LARGE
||
15911 VME_ALIAS(entry
) == VM_MEMORY_MALLOC_LARGE_REUSED
) {
15914 * We do not hold the VM map exclusively here.
15915 * The "alias" field is not that critical, so it's
15916 * safe to update it here, as long as it is the only
15917 * one that can be modified while holding the VM map
15920 VME_ALIAS_SET(entry
, VM_MEMORY_MALLOC_LARGE_REUSABLE
);
15924 vm_map_unlock_read(map
);
15925 vm_page_stats_reusable
.reusable_pages_success
++;
15926 return KERN_SUCCESS
;
15930 static kern_return_t
15933 vm_map_offset_t start
,
15934 vm_map_offset_t end
)
15936 vm_map_entry_t entry
;
15939 * The MADV_REUSABLE operation doesn't require any changes to the
15940 * vm_map_entry_t's, so the read lock is sufficient.
15943 vm_map_lock_read(map
);
15944 assert(map
->pmap
!= kernel_pmap
); /* protect alias access */
15947 * The madvise semantics require that the address range be fully
15948 * allocated with no holes. Otherwise, we're required to return
15952 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15953 vm_map_unlock_read(map
);
15954 vm_page_stats_reusable
.can_reuse_failure
++;
15955 return KERN_INVALID_ADDRESS
;
15959 * Examine each vm_map_entry_t in the range.
15961 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
15962 entry
= entry
->vme_next
) {
15964 * Sanity check on the VM map entry.
15966 if (!vm_map_entry_is_reusable(entry
)) {
15967 vm_map_unlock_read(map
);
15968 vm_page_stats_reusable
.can_reuse_failure
++;
15969 return KERN_INVALID_ADDRESS
;
15973 vm_map_unlock_read(map
);
15974 vm_page_stats_reusable
.can_reuse_success
++;
15975 return KERN_SUCCESS
;
15980 static kern_return_t
15983 vm_map_offset_t start
,
15984 vm_map_offset_t end
)
15986 vm_map_entry_t entry
;
15989 * The MADV_PAGEOUT operation doesn't require any changes to the
15990 * vm_map_entry_t's, so the read lock is sufficient.
15993 vm_map_lock_read(map
);
15996 * The madvise semantics require that the address range be fully
15997 * allocated with no holes. Otherwise, we're required to return
16001 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
16002 vm_map_unlock_read(map
);
16003 return KERN_INVALID_ADDRESS
;
16007 * Examine each vm_map_entry_t in the range.
16009 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
16010 entry
= entry
->vme_next
) {
16011 vm_object_t object
;
16014 * Sanity check on the VM map entry.
16016 if (entry
->is_sub_map
) {
16018 vm_map_offset_t submap_start
;
16019 vm_map_offset_t submap_end
;
16020 vm_map_entry_t submap_entry
;
16022 submap
= VME_SUBMAP(entry
);
16023 submap_start
= VME_OFFSET(entry
);
16024 submap_end
= submap_start
+ (entry
->vme_end
-
16027 vm_map_lock_read(submap
);
16029 if (!vm_map_range_check(submap
,
16033 vm_map_unlock_read(submap
);
16034 vm_map_unlock_read(map
);
16035 return KERN_INVALID_ADDRESS
;
16038 object
= VME_OBJECT(submap_entry
);
16039 if (submap_entry
->is_sub_map
||
16040 object
== VM_OBJECT_NULL
||
16041 !object
->internal
) {
16042 vm_map_unlock_read(submap
);
16046 vm_object_pageout(object
);
16048 vm_map_unlock_read(submap
);
16049 submap
= VM_MAP_NULL
;
16050 submap_entry
= VM_MAP_ENTRY_NULL
;
16054 object
= VME_OBJECT(entry
);
16055 if (entry
->is_sub_map
||
16056 object
== VM_OBJECT_NULL
||
16057 !object
->internal
) {
16061 vm_object_pageout(object
);
16064 vm_map_unlock_read(map
);
16065 return KERN_SUCCESS
;
16067 #endif /* MACH_ASSERT */
16071 * Routine: vm_map_entry_insert
16073 * Description: This routine inserts a new vm_entry in a locked map.
16076 vm_map_entry_insert(
16078 vm_map_entry_t insp_entry
,
16079 vm_map_offset_t start
,
16080 vm_map_offset_t end
,
16081 vm_object_t object
,
16082 vm_object_offset_t offset
,
16083 vm_map_kernel_flags_t vmk_flags
,
16084 boolean_t needs_copy
,
16085 boolean_t is_shared
,
16086 boolean_t in_transition
,
16087 vm_prot_t cur_protection
,
16088 vm_prot_t max_protection
,
16089 vm_behavior_t behavior
,
16090 vm_inherit_t inheritance
,
16091 unsigned short wired_count
,
16092 boolean_t no_cache
,
16093 boolean_t permanent
,
16094 boolean_t no_copy_on_read
,
16095 unsigned int superpage_size
,
16096 boolean_t clear_map_aligned
,
16097 boolean_t is_submap
,
16098 boolean_t used_for_jit
,
16100 boolean_t translated_allow_execute
)
16102 vm_map_entry_t new_entry
;
16104 assert(insp_entry
!= (vm_map_entry_t
)0);
16105 vm_map_lock_assert_exclusive(map
);
16107 #if DEVELOPMENT || DEBUG
16108 vm_object_offset_t end_offset
= 0;
16109 assertf(!os_add_overflow(end
- start
, offset
, &end_offset
), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end
- start
), offset
);
16110 #endif /* DEVELOPMENT || DEBUG */
16112 new_entry
= vm_map_entry_create(map
, !map
->hdr
.entries_pageable
);
16114 if (VM_MAP_PAGE_SHIFT(map
) != PAGE_SHIFT
) {
16115 new_entry
->map_aligned
= TRUE
;
16117 new_entry
->map_aligned
= FALSE
;
16119 if (clear_map_aligned
&&
16120 (!VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)) ||
16121 !VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)))) {
16122 new_entry
->map_aligned
= FALSE
;
16125 new_entry
->vme_start
= start
;
16126 new_entry
->vme_end
= end
;
16127 if (new_entry
->map_aligned
) {
16128 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_start
,
16129 VM_MAP_PAGE_MASK(map
)));
16130 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_end
,
16131 VM_MAP_PAGE_MASK(map
)));
16133 assert(page_aligned(new_entry
->vme_start
));
16134 assert(page_aligned(new_entry
->vme_end
));
16136 assert(new_entry
->vme_start
< new_entry
->vme_end
);
16138 VME_OBJECT_SET(new_entry
, object
);
16139 VME_OFFSET_SET(new_entry
, offset
);
16140 new_entry
->is_shared
= is_shared
;
16141 new_entry
->is_sub_map
= is_submap
;
16142 new_entry
->needs_copy
= needs_copy
;
16143 new_entry
->in_transition
= in_transition
;
16144 new_entry
->needs_wakeup
= FALSE
;
16145 new_entry
->inheritance
= inheritance
;
16146 new_entry
->protection
= cur_protection
;
16147 new_entry
->max_protection
= max_protection
;
16148 new_entry
->behavior
= behavior
;
16149 new_entry
->wired_count
= wired_count
;
16150 new_entry
->user_wired_count
= 0;
16153 * submap: "use_pmap" means "nested".
16156 new_entry
->use_pmap
= FALSE
;
16159 * object: "use_pmap" means "use pmap accounting" for footprint.
16162 new_entry
->use_pmap
= TRUE
;
16164 VME_ALIAS_SET(new_entry
, alias
);
16165 new_entry
->zero_wired_pages
= FALSE
;
16166 new_entry
->no_cache
= no_cache
;
16167 new_entry
->permanent
= permanent
;
16168 if (superpage_size
) {
16169 new_entry
->superpage_size
= TRUE
;
16171 new_entry
->superpage_size
= FALSE
;
16173 if (used_for_jit
) {
16174 if (!(map
->jit_entry_exists
) ||
16175 VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map
)) {
16176 new_entry
->used_for_jit
= TRUE
;
16177 map
->jit_entry_exists
= TRUE
;
16180 new_entry
->used_for_jit
= FALSE
;
16182 if (translated_allow_execute
) {
16183 new_entry
->translated_allow_execute
= TRUE
;
16185 new_entry
->translated_allow_execute
= FALSE
;
16187 new_entry
->pmap_cs_associated
= FALSE
;
16188 new_entry
->iokit_acct
= FALSE
;
16189 new_entry
->vme_resilient_codesign
= FALSE
;
16190 new_entry
->vme_resilient_media
= FALSE
;
16191 new_entry
->vme_atomic
= FALSE
;
16192 new_entry
->vme_no_copy_on_read
= no_copy_on_read
;
16195 * Insert the new entry into the list.
16198 vm_map_store_entry_link(map
, insp_entry
, new_entry
, vmk_flags
);
16199 map
->size
+= end
- start
;
16202 * Update the free space hint and the lookup hint.
16205 SAVE_HINT_MAP_WRITE(map
, new_entry
);
16210 * Routine: vm_map_remap_extract
16212 * Description: This routine returns a vm_entry list from a map.
16214 static kern_return_t
16215 vm_map_remap_extract(
16217 vm_map_offset_t addr
,
16218 vm_map_size_t size
,
16220 struct vm_map_header
*map_header
,
16221 vm_prot_t
*cur_protection
, /* IN/OUT */
16222 vm_prot_t
*max_protection
, /* IN/OUT */
16223 /* What, no behavior? */
16224 vm_inherit_t inheritance
,
16225 vm_map_kernel_flags_t vmk_flags
)
16227 kern_return_t result
;
16228 vm_map_size_t mapped_size
;
16229 vm_map_size_t tmp_size
;
16230 vm_map_entry_t src_entry
; /* result of last map lookup */
16231 vm_map_entry_t new_entry
;
16232 vm_object_offset_t offset
;
16233 vm_map_offset_t map_address
;
16234 vm_map_offset_t src_start
; /* start of entry to map */
16235 vm_map_offset_t src_end
; /* end of region to be mapped */
16236 vm_object_t object
;
16237 vm_map_version_t version
;
16238 boolean_t src_needs_copy
;
16239 boolean_t new_entry_needs_copy
;
16240 vm_map_entry_t saved_src_entry
;
16241 boolean_t src_entry_was_wired
;
16242 vm_prot_t max_prot_for_prot_copy
;
16243 vm_map_offset_t effective_page_mask
;
16244 boolean_t pageable
, same_map
;
16245 boolean_t vm_remap_legacy
;
16246 vm_prot_t required_cur_prot
, required_max_prot
;
16248 pageable
= vmk_flags
.vmkf_copy_pageable
;
16249 same_map
= vmk_flags
.vmkf_copy_same_map
;
16251 effective_page_mask
= MIN(PAGE_MASK
, VM_MAP_PAGE_MASK(map
));
16253 assert(map
!= VM_MAP_NULL
);
16255 assert(size
== vm_map_round_page(size
, effective_page_mask
));
16256 assert(inheritance
== VM_INHERIT_NONE
||
16257 inheritance
== VM_INHERIT_COPY
||
16258 inheritance
== VM_INHERIT_SHARE
);
16259 assert(!(*cur_protection
& ~VM_PROT_ALL
));
16260 assert(!(*max_protection
& ~VM_PROT_ALL
));
16261 assert((*cur_protection
& *max_protection
) == *cur_protection
);
16264 * Compute start and end of region.
16266 src_start
= vm_map_trunc_page(addr
, effective_page_mask
);
16267 src_end
= vm_map_round_page(src_start
+ size
, effective_page_mask
);
16270 * Initialize map_header.
16272 map_header
->links
.next
= CAST_TO_VM_MAP_ENTRY(&map_header
->links
);
16273 map_header
->links
.prev
= CAST_TO_VM_MAP_ENTRY(&map_header
->links
);
16274 map_header
->nentries
= 0;
16275 map_header
->entries_pageable
= pageable
;
16276 // map_header->page_shift = MIN(VM_MAP_PAGE_SHIFT(map), PAGE_SHIFT);
16277 map_header
->page_shift
= VM_MAP_PAGE_SHIFT(map
);
16278 map_header
->rb_head_store
.rbh_root
= (void *)(int)SKIP_RB_TREE
;
16280 vm_map_store_init( map_header
);
16282 if (copy
&& vmk_flags
.vmkf_remap_prot_copy
) {
16284 * Special case for vm_map_protect(VM_PROT_COPY):
16285 * we want to set the new mappings' max protection to the
16286 * specified *max_protection...
16288 max_prot_for_prot_copy
= *max_protection
& VM_PROT_ALL
;
16289 /* ... but we want to use the vm_remap() legacy mode */
16290 *max_protection
= VM_PROT_NONE
;
16291 *cur_protection
= VM_PROT_NONE
;
16293 max_prot_for_prot_copy
= VM_PROT_NONE
;
16296 if (*cur_protection
== VM_PROT_NONE
&&
16297 *max_protection
== VM_PROT_NONE
) {
16299 * vm_remap() legacy mode:
16300 * Extract all memory regions in the specified range and
16301 * collect the strictest set of protections allowed on the
16302 * entire range, so the caller knows what they can do with
16303 * the remapped range.
16304 * We start with VM_PROT_ALL and we'll remove the protections
16305 * missing from each memory region.
16307 vm_remap_legacy
= TRUE
;
16308 *cur_protection
= VM_PROT_ALL
;
16309 *max_protection
= VM_PROT_ALL
;
16310 required_cur_prot
= VM_PROT_NONE
;
16311 required_max_prot
= VM_PROT_NONE
;
16314 * vm_remap_new() mode:
16315 * Extract all memory regions in the specified range and
16316 * ensure that they have at least the protections specified
16317 * by the caller via *cur_protection and *max_protection.
16318 * The resulting mapping should have these protections.
16320 vm_remap_legacy
= FALSE
;
16322 required_cur_prot
= VM_PROT_NONE
;
16323 required_max_prot
= VM_PROT_READ
;
16325 required_cur_prot
= *cur_protection
;
16326 required_max_prot
= *max_protection
;
16332 result
= KERN_SUCCESS
;
16335 * The specified source virtual space might correspond to
16336 * multiple map entries, need to loop on them.
16339 if (VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
) {
16341 * This address space uses sub-pages so the range might
16342 * not be re-mappable in an address space with larger
16343 * pages. Re-assemble any broken-up VM map entries to
16344 * improve our chances of making it work.
16346 vm_map_simplify_range(map
, src_start
, src_end
);
16348 while (mapped_size
!= size
) {
16349 vm_map_size_t entry_size
;
16352 * Find the beginning of the region.
16354 if (!vm_map_lookup_entry(map
, src_start
, &src_entry
)) {
16355 result
= KERN_INVALID_ADDRESS
;
16359 if (src_start
< src_entry
->vme_start
||
16360 (mapped_size
&& src_start
!= src_entry
->vme_start
)) {
16361 result
= KERN_INVALID_ADDRESS
;
16365 tmp_size
= size
- mapped_size
;
16366 if (src_end
> src_entry
->vme_end
) {
16367 tmp_size
-= (src_end
- src_entry
->vme_end
);
16370 entry_size
= (vm_map_size_t
)(src_entry
->vme_end
-
16371 src_entry
->vme_start
);
16373 if (src_entry
->is_sub_map
&&
16374 vmk_flags
.vmkf_copy_single_object
) {
16376 vm_map_offset_t submap_start
;
16377 vm_map_size_t submap_size
;
16378 boolean_t submap_needs_copy
;
16381 * No check for "required protection" on "src_entry"
16382 * because the protections that matter are the ones
16383 * on the submap's VM map entry, which will be checked
16384 * during the call to vm_map_remap_extract() below.
16386 submap_size
= src_entry
->vme_end
- src_start
;
16387 if (submap_size
> size
) {
16388 submap_size
= size
;
16390 submap_start
= VME_OFFSET(src_entry
) + src_start
- src_entry
->vme_start
;
16391 submap
= VME_SUBMAP(src_entry
);
16394 * The caller wants a copy-on-write re-mapping,
16395 * so let's extract from the submap accordingly.
16397 submap_needs_copy
= TRUE
;
16398 } else if (src_entry
->needs_copy
) {
16400 * The caller wants a shared re-mapping but the
16401 * submap is mapped with "needs_copy", so its
16402 * contents can't be shared as is. Extract the
16403 * contents of the submap as "copy-on-write".
16404 * The re-mapping won't be shared with the
16405 * original mapping but this is equivalent to
16406 * what happened with the original "remap from
16408 * The shared region is mapped "needs_copy", for
16411 submap_needs_copy
= TRUE
;
16414 * The caller wants a shared re-mapping and
16415 * this mapping can be shared (no "needs_copy"),
16416 * so let's extract from the submap accordingly.
16417 * Kernel submaps are mapped without
16418 * "needs_copy", for example.
16420 submap_needs_copy
= FALSE
;
16422 vm_map_reference(submap
);
16423 vm_map_unlock(map
);
16425 if (vm_remap_legacy
) {
16426 *cur_protection
= VM_PROT_NONE
;
16427 *max_protection
= VM_PROT_NONE
;
16430 DTRACE_VM7(remap_submap_recurse
,
16432 vm_map_offset_t
, addr
,
16433 vm_map_size_t
, size
,
16435 vm_map_offset_t
, submap_start
,
16436 vm_map_size_t
, submap_size
,
16437 boolean_t
, submap_needs_copy
);
16439 result
= vm_map_remap_extract(submap
,
16448 vm_map_deallocate(submap
);
16452 if (src_entry
->is_sub_map
) {
16453 /* protections for submap mapping are irrelevant here */
16454 } else if (((src_entry
->protection
& required_cur_prot
) !=
16455 required_cur_prot
) ||
16456 ((src_entry
->max_protection
& required_max_prot
) !=
16457 required_max_prot
)) {
16458 if (vmk_flags
.vmkf_copy_single_object
&&
16459 mapped_size
!= 0) {
16461 * Single object extraction.
16462 * We can't extract more with the required
16463 * protection but we've extracted some, so
16464 * stop there and declare success.
16465 * The caller should check the size of
16466 * the copy entry we've extracted.
16468 result
= KERN_SUCCESS
;
16471 * VM range extraction.
16472 * Required proctection is not available
16473 * for this part of the range: fail.
16475 result
= KERN_PROTECTION_FAILURE
;
16480 if (src_entry
->is_sub_map
) {
16482 vm_map_offset_t submap_start
;
16483 vm_map_size_t submap_size
;
16484 vm_map_copy_t submap_copy
;
16485 vm_prot_t submap_curprot
, submap_maxprot
;
16486 boolean_t submap_needs_copy
;
16489 * No check for "required protection" on "src_entry"
16490 * because the protections that matter are the ones
16491 * on the submap's VM map entry, which will be checked
16492 * during the call to vm_map_copy_extract() below.
16494 object
= VM_OBJECT_NULL
;
16495 submap_copy
= VM_MAP_COPY_NULL
;
16497 /* find equivalent range in the submap */
16498 submap
= VME_SUBMAP(src_entry
);
16499 submap_start
= VME_OFFSET(src_entry
) + src_start
- src_entry
->vme_start
;
16500 submap_size
= tmp_size
;
16503 * The caller wants a copy-on-write re-mapping,
16504 * so let's extract from the submap accordingly.
16506 submap_needs_copy
= TRUE
;
16507 } else if (src_entry
->needs_copy
) {
16509 * The caller wants a shared re-mapping but the
16510 * submap is mapped with "needs_copy", so its
16511 * contents can't be shared as is. Extract the
16512 * contents of the submap as "copy-on-write".
16513 * The re-mapping won't be shared with the
16514 * original mapping but this is equivalent to
16515 * what happened with the original "remap from
16517 * The shared region is mapped "needs_copy", for
16520 submap_needs_copy
= TRUE
;
16523 * The caller wants a shared re-mapping and
16524 * this mapping can be shared (no "needs_copy"),
16525 * so let's extract from the submap accordingly.
16526 * Kernel submaps are mapped without
16527 * "needs_copy", for example.
16529 submap_needs_copy
= FALSE
;
16531 /* extra ref to keep submap alive */
16532 vm_map_reference(submap
);
16534 DTRACE_VM7(remap_submap_recurse
,
16536 vm_map_offset_t
, addr
,
16537 vm_map_size_t
, size
,
16539 vm_map_offset_t
, submap_start
,
16540 vm_map_size_t
, submap_size
,
16541 boolean_t
, submap_needs_copy
);
16544 * The map can be safely unlocked since we
16545 * already hold a reference on the submap.
16547 * No timestamp since we don't care if the map
16548 * gets modified while we're down in the submap.
16549 * We'll resume the extraction at src_start + tmp_size
16552 vm_map_unlock(map
);
16553 src_entry
= NULL
; /* not valid once map is unlocked */
16555 if (vm_remap_legacy
) {
16556 submap_curprot
= VM_PROT_NONE
;
16557 submap_maxprot
= VM_PROT_NONE
;
16558 if (max_prot_for_prot_copy
) {
16559 submap_maxprot
= max_prot_for_prot_copy
;
16562 assert(!max_prot_for_prot_copy
);
16563 submap_curprot
= *cur_protection
;
16564 submap_maxprot
= *max_protection
;
16566 result
= vm_map_copy_extract(submap
,
16576 /* release extra ref on submap */
16577 vm_map_deallocate(submap
);
16578 submap
= VM_MAP_NULL
;
16580 if (result
!= KERN_SUCCESS
) {
16585 /* transfer submap_copy entries to map_header */
16586 while (vm_map_copy_first_entry(submap_copy
) !=
16587 vm_map_copy_to_entry(submap_copy
)) {
16588 vm_map_entry_t copy_entry
;
16589 vm_map_size_t copy_entry_size
;
16591 copy_entry
= vm_map_copy_first_entry(submap_copy
);
16592 assert(!copy_entry
->is_sub_map
);
16593 object
= VME_OBJECT(copy_entry
);
16596 * Prevent kernel_object from being exposed to
16599 if (__improbable(object
== kernel_object
)) {
16600 printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
16602 (current_task()->bsd_info
16603 ? proc_name_address(current_task()->bsd_info
)
16605 DTRACE_VM(extract_kernel_only
);
16606 result
= KERN_INVALID_RIGHT
;
16607 vm_map_copy_discard(submap_copy
);
16608 submap_copy
= VM_MAP_COPY_NULL
;
16613 vm_map_copy_entry_unlink(submap_copy
, copy_entry
);
16614 copy_entry_size
= copy_entry
->vme_end
- copy_entry
->vme_start
;
16615 copy_entry
->vme_start
= map_address
;
16616 copy_entry
->vme_end
= map_address
+ copy_entry_size
;
16617 map_address
+= copy_entry_size
;
16618 mapped_size
+= copy_entry_size
;
16619 src_start
+= copy_entry_size
;
16620 assert(src_start
<= src_end
);
16621 _vm_map_store_entry_link(map_header
,
16622 map_header
->links
.prev
,
16625 /* done with submap_copy */
16626 vm_map_copy_discard(submap_copy
);
16628 if (vm_remap_legacy
) {
16629 *cur_protection
&= submap_curprot
;
16630 *max_protection
&= submap_maxprot
;
16633 /* re-acquire the map lock and continue to next entry */
16637 object
= VME_OBJECT(src_entry
);
16640 * Prevent kernel_object from being exposed to
16643 if (__improbable(object
== kernel_object
)) {
16644 printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
16646 (current_task()->bsd_info
16647 ? proc_name_address(current_task()->bsd_info
)
16649 DTRACE_VM(extract_kernel_only
);
16650 result
= KERN_INVALID_RIGHT
;
16654 if (src_entry
->iokit_acct
) {
16656 * This entry uses "IOKit accounting".
16658 } else if (object
!= VM_OBJECT_NULL
&&
16659 (object
->purgable
!= VM_PURGABLE_DENY
||
16660 object
->vo_ledger_tag
!= VM_LEDGER_TAG_NONE
)) {
16662 * Purgeable objects have their own accounting:
16663 * no pmap accounting for them.
16665 assertf(!src_entry
->use_pmap
,
16666 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16669 (uint64_t)src_entry
->vme_start
,
16670 (uint64_t)src_entry
->vme_end
,
16671 src_entry
->protection
,
16672 src_entry
->max_protection
,
16673 VME_ALIAS(src_entry
));
16676 * Not IOKit or purgeable:
16677 * must be accounted by pmap stats.
16679 assertf(src_entry
->use_pmap
,
16680 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16683 (uint64_t)src_entry
->vme_start
,
16684 (uint64_t)src_entry
->vme_end
,
16685 src_entry
->protection
,
16686 src_entry
->max_protection
,
16687 VME_ALIAS(src_entry
));
16690 if (object
== VM_OBJECT_NULL
) {
16691 assert(!src_entry
->needs_copy
);
16692 object
= vm_object_allocate(entry_size
);
16693 VME_OFFSET_SET(src_entry
, 0);
16694 VME_OBJECT_SET(src_entry
, object
);
16695 assert(src_entry
->use_pmap
);
16696 assert(!map
->mapped_in_other_pmaps
);
16697 } else if (src_entry
->wired_count
||
16698 object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) {
16700 * A wired memory region should not have
16701 * any pending copy-on-write and needs to
16702 * keep pointing at the VM object that
16703 * contains the wired pages.
16704 * If we're sharing this memory (copy=false),
16705 * we'll share this VM object.
16706 * If we're copying this memory (copy=true),
16707 * we'll call vm_object_copy_slowly() below
16708 * and use the new VM object for the remapping.
16710 * Or, we are already using an asymmetric
16711 * copy, and therefore we already have
16712 * the right object.
16714 assert(!src_entry
->needs_copy
);
16715 } else if (src_entry
->needs_copy
|| object
->shadowed
||
16716 (object
->internal
&& !object
->true_share
&&
16717 !src_entry
->is_shared
&&
16718 object
->vo_size
> entry_size
)) {
16719 VME_OBJECT_SHADOW(src_entry
, entry_size
);
16720 assert(src_entry
->use_pmap
);
16722 if (!src_entry
->needs_copy
&&
16723 (src_entry
->protection
& VM_PROT_WRITE
)) {
16726 assert(!pmap_has_prot_policy(map
->pmap
, src_entry
->translated_allow_execute
, src_entry
->protection
));
16728 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
16730 if (override_nx(map
,
16731 VME_ALIAS(src_entry
))
16733 prot
|= VM_PROT_EXECUTE
;
16736 assert(!pmap_has_prot_policy(map
->pmap
, src_entry
->translated_allow_execute
, prot
));
16738 if (map
->mapped_in_other_pmaps
) {
16739 vm_object_pmap_protect(
16740 VME_OBJECT(src_entry
),
16741 VME_OFFSET(src_entry
),
16745 src_entry
->vme_start
,
16748 } else if (__improbable(map
->pmap
== PMAP_NULL
)) {
16749 extern boolean_t vm_tests_in_progress
;
16750 assert(vm_tests_in_progress
);
16752 * Some VM tests (in vm_tests.c)
16753 * sometimes want to use a VM
16754 * map without a pmap.
16755 * Otherwise, this should never
16758 #endif /* MACH_ASSERT */
16760 pmap_protect(vm_map_pmap(map
),
16761 src_entry
->vme_start
,
16762 src_entry
->vme_end
,
16767 object
= VME_OBJECT(src_entry
);
16768 src_entry
->needs_copy
= FALSE
;
16772 vm_object_lock(object
);
16773 vm_object_reference_locked(object
); /* object ref. for new entry */
16774 assert(!src_entry
->needs_copy
);
16775 if (object
->copy_strategy
==
16776 MEMORY_OBJECT_COPY_SYMMETRIC
) {
16778 * If we want to share this object (copy==0),
16779 * it needs to be COPY_DELAY.
16780 * If we want to copy this object (copy==1),
16781 * we can't just set "needs_copy" on our side
16782 * and expect the other side to do the same
16783 * (symmetrically), so we can't let the object
16784 * stay COPY_SYMMETRIC.
16785 * So we always switch from COPY_SYMMETRIC to
16788 object
->copy_strategy
=
16789 MEMORY_OBJECT_COPY_DELAY
;
16790 object
->true_share
= TRUE
;
16792 vm_object_unlock(object
);
16795 offset
= (VME_OFFSET(src_entry
) +
16796 (src_start
- src_entry
->vme_start
));
16798 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
16799 vm_map_entry_copy(map
, new_entry
, src_entry
);
16800 if (new_entry
->is_sub_map
) {
16801 /* clr address space specifics */
16802 new_entry
->use_pmap
= FALSE
;
16805 * We're dealing with a copy-on-write operation,
16806 * so the resulting mapping should not inherit the
16807 * original mapping's accounting settings.
16808 * "use_pmap" should be reset to its default (TRUE)
16809 * so that the new mapping gets accounted for in
16810 * the task's memory footprint.
16812 new_entry
->use_pmap
= TRUE
;
16814 /* "iokit_acct" was cleared in vm_map_entry_copy() */
16815 assert(!new_entry
->iokit_acct
);
16817 new_entry
->map_aligned
= FALSE
;
16819 new_entry
->vme_start
= map_address
;
16820 new_entry
->vme_end
= map_address
+ tmp_size
;
16821 assert(new_entry
->vme_start
< new_entry
->vme_end
);
16822 if (copy
&& vmk_flags
.vmkf_remap_prot_copy
) {
16824 * Remapping for vm_map_protect(VM_PROT_COPY)
16825 * to convert a read-only mapping into a
16826 * copy-on-write version of itself but
16827 * with write access:
16828 * keep the original inheritance and add
16829 * VM_PROT_WRITE to the max protection.
16831 new_entry
->inheritance
= src_entry
->inheritance
;
16832 new_entry
->protection
&= max_prot_for_prot_copy
;
16833 new_entry
->max_protection
|= VM_PROT_WRITE
;
16835 new_entry
->inheritance
= inheritance
;
16836 if (!vm_remap_legacy
) {
16837 new_entry
->protection
= *cur_protection
;
16838 new_entry
->max_protection
= *max_protection
;
16841 VME_OFFSET_SET(new_entry
, offset
);
16844 * The new region has to be copied now if required.
16848 if (src_entry
->used_for_jit
== TRUE
) {
16850 } else if (!VM_MAP_POLICY_ALLOW_JIT_SHARING(map
)) {
16852 * Cannot allow an entry describing a JIT
16853 * region to be shared across address spaces.
16855 result
= KERN_INVALID_ARGUMENT
;
16860 src_entry
->is_shared
= TRUE
;
16861 new_entry
->is_shared
= TRUE
;
16862 if (!(new_entry
->is_sub_map
)) {
16863 new_entry
->needs_copy
= FALSE
;
16865 } else if (src_entry
->is_sub_map
) {
16866 /* make this a COW sub_map if not already */
16867 assert(new_entry
->wired_count
== 0);
16868 new_entry
->needs_copy
= TRUE
;
16869 object
= VM_OBJECT_NULL
;
16870 } else if (src_entry
->wired_count
== 0 &&
16871 !(debug4k_no_cow_copyin
&& VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
) &&
16872 vm_object_copy_quickly(VME_OBJECT_PTR(new_entry
),
16873 VME_OFFSET(new_entry
),
16874 (new_entry
->vme_end
-
16875 new_entry
->vme_start
),
16877 &new_entry_needs_copy
)) {
16878 new_entry
->needs_copy
= new_entry_needs_copy
;
16879 new_entry
->is_shared
= FALSE
;
16880 assertf(new_entry
->use_pmap
, "map %p new_entry %p\n", map
, new_entry
);
16883 * Handle copy_on_write semantics.
16885 if (src_needs_copy
&& !src_entry
->needs_copy
) {
16888 assert(!pmap_has_prot_policy(map
->pmap
, src_entry
->translated_allow_execute
, src_entry
->protection
));
16890 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
16892 if (override_nx(map
,
16893 VME_ALIAS(src_entry
))
16895 prot
|= VM_PROT_EXECUTE
;
16898 assert(!pmap_has_prot_policy(map
->pmap
, src_entry
->translated_allow_execute
, prot
));
16900 vm_object_pmap_protect(object
,
16903 ((src_entry
->is_shared
16904 || map
->mapped_in_other_pmaps
) ?
16905 PMAP_NULL
: map
->pmap
),
16906 VM_MAP_PAGE_SIZE(map
),
16907 src_entry
->vme_start
,
16910 assert(src_entry
->wired_count
== 0);
16911 src_entry
->needs_copy
= TRUE
;
16914 * Throw away the old object reference of the new entry.
16916 vm_object_deallocate(object
);
16918 new_entry
->is_shared
= FALSE
;
16919 assertf(new_entry
->use_pmap
, "map %p new_entry %p\n", map
, new_entry
);
16921 src_entry_was_wired
= (src_entry
->wired_count
> 0);
16922 saved_src_entry
= src_entry
;
16923 src_entry
= VM_MAP_ENTRY_NULL
;
16926 * The map can be safely unlocked since we
16927 * already hold a reference on the object.
16929 * Record the timestamp of the map for later
16930 * verification, and unlock the map.
16932 version
.main_timestamp
= map
->timestamp
;
16933 vm_map_unlock(map
); /* Increments timestamp once! */
16936 * Perform the copy.
16938 if (src_entry_was_wired
> 0 ||
16939 (debug4k_no_cow_copyin
&&
16940 VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
)) {
16941 vm_object_lock(object
);
16942 result
= vm_object_copy_slowly(
16945 (new_entry
->vme_end
-
16946 new_entry
->vme_start
),
16948 VME_OBJECT_PTR(new_entry
));
16950 VME_OFFSET_SET(new_entry
, offset
- vm_object_trunc_page(offset
));
16951 new_entry
->needs_copy
= FALSE
;
16953 vm_object_offset_t new_offset
;
16955 new_offset
= VME_OFFSET(new_entry
);
16956 result
= vm_object_copy_strategically(
16959 (new_entry
->vme_end
-
16960 new_entry
->vme_start
),
16961 VME_OBJECT_PTR(new_entry
),
16963 &new_entry_needs_copy
);
16964 if (new_offset
!= VME_OFFSET(new_entry
)) {
16965 VME_OFFSET_SET(new_entry
, new_offset
);
16968 new_entry
->needs_copy
= new_entry_needs_copy
;
16972 * Throw away the old object reference of the new entry.
16974 vm_object_deallocate(object
);
16976 if (result
!= KERN_SUCCESS
&&
16977 result
!= KERN_MEMORY_RESTART_COPY
) {
16978 _vm_map_entry_dispose(map_header
, new_entry
);
16984 * Verify that the map has not substantially
16985 * changed while the copy was being made.
16989 if (version
.main_timestamp
+ 1 != map
->timestamp
) {
16991 * Simple version comparison failed.
16993 * Retry the lookup and verify that the
16994 * same object/offset are still present.
16996 saved_src_entry
= VM_MAP_ENTRY_NULL
;
16997 vm_object_deallocate(VME_OBJECT(new_entry
));
16998 _vm_map_entry_dispose(map_header
, new_entry
);
16999 if (result
== KERN_MEMORY_RESTART_COPY
) {
17000 result
= KERN_SUCCESS
;
17004 /* map hasn't changed: src_entry is still valid */
17005 src_entry
= saved_src_entry
;
17006 saved_src_entry
= VM_MAP_ENTRY_NULL
;
17008 if (result
== KERN_MEMORY_RESTART_COPY
) {
17009 vm_object_reference(object
);
17014 _vm_map_store_entry_link(map_header
,
17015 map_header
->links
.prev
, new_entry
);
17017 /* protections for submap mapping are irrelevant here */
17018 if (vm_remap_legacy
&& !src_entry
->is_sub_map
) {
17019 *cur_protection
&= src_entry
->protection
;
17020 *max_protection
&= src_entry
->max_protection
;
17023 map_address
+= tmp_size
;
17024 mapped_size
+= tmp_size
;
17025 src_start
+= tmp_size
;
17027 if (vmk_flags
.vmkf_copy_single_object
) {
17028 if (mapped_size
!= size
) {
17029 DEBUG4K_SHARE("map %p addr 0x%llx size 0x%llx clipped copy at mapped_size 0x%llx\n", map
, (uint64_t)addr
, (uint64_t)size
, (uint64_t)mapped_size
);
17030 if (src_entry
->vme_next
!= vm_map_to_entry(map
) &&
17031 VME_OBJECT(src_entry
->vme_next
) == VME_OBJECT(src_entry
)) {
17033 DEBUG4K_ERROR("could have extended copy to next entry...\n");
17040 vm_map_unlock(map
);
17041 if (result
!= KERN_SUCCESS
) {
17043 * Free all allocated elements.
17045 for (src_entry
= map_header
->links
.next
;
17046 src_entry
!= CAST_TO_VM_MAP_ENTRY(&map_header
->links
);
17047 src_entry
= new_entry
) {
17048 new_entry
= src_entry
->vme_next
;
17049 _vm_map_store_entry_unlink(map_header
, src_entry
);
17050 if (src_entry
->is_sub_map
) {
17051 vm_map_deallocate(VME_SUBMAP(src_entry
));
17053 vm_object_deallocate(VME_OBJECT(src_entry
));
17055 _vm_map_entry_dispose(map_header
, src_entry
);
17065 return VM_MAP_IS_EXOTIC(map
);
17072 return VM_MAP_IS_ALIEN(map
);
17075 #if XNU_TARGET_OS_OSX
17081 map
->is_alien
= true;
17082 vm_map_unlock(map
);
17090 map
->single_jit
= true;
17091 vm_map_unlock(map
);
17093 #endif /* XNU_TARGET_OS_OSX */
17095 void vm_map_copy_to_physcopy(vm_map_copy_t copy_map
, vm_map_t target_map
);
17097 vm_map_copy_to_physcopy(
17098 vm_map_copy_t copy_map
,
17099 vm_map_t target_map
)
17101 vm_map_size_t size
;
17102 vm_map_entry_t entry
;
17103 vm_map_entry_t new_entry
;
17104 vm_object_t new_object
;
17105 unsigned int pmap_flags
;
17108 vm_map_address_t src_start
, src_end
, src_cur
;
17109 vm_map_address_t dst_start
, dst_end
, dst_cur
;
17114 * Perform the equivalent of vm_allocate() and memcpy().
17115 * Replace the mappings in "copy_map" with the newly allocated mapping.
17117 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) BEFORE\n", copy_map
, copy_map
->cpy_hdr
.page_shift
, copy_map
->cpy_hdr
.nentries
, copy_map
->offset
, (uint64_t)copy_map
->size
);
17119 assert(copy_map
->cpy_hdr
.page_shift
!= VM_MAP_PAGE_MASK(target_map
));
17121 /* allocate new VM object */
17122 size
= VM_MAP_ROUND_PAGE(copy_map
->size
, PAGE_MASK
);
17123 new_object
= vm_object_allocate(size
);
17124 assert(new_object
);
17126 /* allocate new VM map entry */
17127 new_entry
= vm_map_copy_entry_create(copy_map
, FALSE
);
17130 /* finish initializing new VM map entry */
17131 new_entry
->protection
= VM_PROT_DEFAULT
;
17132 new_entry
->max_protection
= VM_PROT_DEFAULT
;
17133 new_entry
->use_pmap
= TRUE
;
17135 /* make new VM map entry point to new VM object */
17136 new_entry
->vme_start
= 0;
17137 new_entry
->vme_end
= size
;
17138 VME_OBJECT_SET(new_entry
, new_object
);
17139 VME_OFFSET_SET(new_entry
, 0);
17141 /* create a new pmap to map "copy_map" */
17143 assert(copy_map
->cpy_hdr
.page_shift
== FOURK_PAGE_SHIFT
);
17144 #if PMAP_CREATE_FORCE_4K_PAGES
17145 pmap_flags
|= PMAP_CREATE_FORCE_4K_PAGES
;
17146 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
17147 pmap_flags
|= PMAP_CREATE_64BIT
;
17148 new_pmap
= pmap_create_options(NULL
, (vm_map_size_t
)0, pmap_flags
);
17151 /* create a new pageable VM map to map "copy_map" */
17152 new_map
= vm_map_create(new_pmap
, 0, MACH_VM_MAX_ADDRESS
, TRUE
);
17154 vm_map_set_page_shift(new_map
, copy_map
->cpy_hdr
.page_shift
);
17156 /* map "copy_map" in the new VM map */
17158 kr
= vm_map_copyout_internal(
17163 FALSE
, /* consume_on_success */
17166 VM_INHERIT_DEFAULT
);
17167 assert(kr
== KERN_SUCCESS
);
17168 src_end
= src_start
+ copy_map
->size
;
17170 /* map "new_object" in the new VM map */
17171 vm_object_reference(new_object
);
17173 kr
= vm_map_enter(new_map
,
17178 VM_MAP_KERNEL_FLAGS_NONE
,
17179 VM_KERN_MEMORY_OSFMK
,
17182 FALSE
, /* needs copy */
17185 VM_INHERIT_DEFAULT
);
17186 assert(kr
== KERN_SUCCESS
);
17187 dst_end
= dst_start
+ size
;
17189 /* get a kernel buffer */
17190 kbuf
= kheap_alloc(KHEAP_TEMP
, PAGE_SIZE
, Z_WAITOK
);
17193 /* physically copy "copy_map" mappings to new VM object */
17194 for (src_cur
= src_start
, dst_cur
= dst_start
;
17196 src_cur
+= PAGE_SIZE
, dst_cur
+= PAGE_SIZE
) {
17200 if (src_cur
+ PAGE_SIZE
> src_end
) {
17201 /* partial copy for last page */
17202 bytes
= src_end
- src_cur
;
17203 assert(bytes
> 0 && bytes
< PAGE_SIZE
);
17204 /* rest of dst page should be zero-filled */
17206 /* get bytes from src mapping */
17207 kr
= copyinmap(new_map
, src_cur
, kbuf
, bytes
);
17208 if (kr
!= KERN_SUCCESS
) {
17209 DEBUG4K_COPY("copyinmap(%p, 0x%llx, %p, 0x%llx) kr 0x%x\n", new_map
, (uint64_t)src_cur
, kbuf
, (uint64_t)bytes
, kr
);
17211 /* put bytes in dst mapping */
17212 assert(dst_cur
< dst_end
);
17213 assert(dst_cur
+ bytes
<= dst_end
);
17214 kr
= copyoutmap(new_map
, kbuf
, dst_cur
, bytes
);
17215 if (kr
!= KERN_SUCCESS
) {
17216 DEBUG4K_COPY("copyoutmap(%p, %p, 0x%llx, 0x%llx) kr 0x%x\n", new_map
, kbuf
, (uint64_t)dst_cur
, (uint64_t)bytes
, kr
);
17220 /* free kernel buffer */
17221 kheap_free(KHEAP_TEMP
, kbuf
, PAGE_SIZE
);
17224 /* destroy new map */
17225 vm_map_destroy(new_map
, VM_MAP_REMOVE_NO_FLAGS
);
17226 new_map
= VM_MAP_NULL
;
17228 /* dispose of the old map entries in "copy_map" */
17229 while (vm_map_copy_first_entry(copy_map
) !=
17230 vm_map_copy_to_entry(copy_map
)) {
17231 entry
= vm_map_copy_first_entry(copy_map
);
17232 vm_map_copy_entry_unlink(copy_map
, entry
);
17233 if (entry
->is_sub_map
) {
17234 vm_map_deallocate(VME_SUBMAP(entry
));
17236 vm_object_deallocate(VME_OBJECT(entry
));
17238 vm_map_copy_entry_dispose(copy_map
, entry
);
17241 /* change "copy_map"'s page_size to match "target_map" */
17242 copy_map
->cpy_hdr
.page_shift
= VM_MAP_PAGE_SHIFT(target_map
);
17243 copy_map
->offset
= 0;
17244 copy_map
->size
= size
;
17246 /* insert new map entry in "copy_map" */
17247 assert(vm_map_copy_last_entry(copy_map
) == vm_map_copy_to_entry(copy_map
));
17248 vm_map_copy_entry_link(copy_map
, vm_map_copy_last_entry(copy_map
), new_entry
);
17250 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) AFTER\n", copy_map
, copy_map
->cpy_hdr
.page_shift
, copy_map
->cpy_hdr
.nentries
, copy_map
->offset
, (uint64_t)copy_map
->size
);
17254 vm_map_copy_adjust_get_target_copy_map(
17255 vm_map_copy_t copy_map
,
17256 vm_map_copy_t
*target_copy_map_p
);
17258 vm_map_copy_adjust_get_target_copy_map(
17259 vm_map_copy_t copy_map
,
17260 vm_map_copy_t
*target_copy_map_p
)
17262 vm_map_copy_t target_copy_map
;
17263 vm_map_entry_t entry
, target_entry
;
17265 if (*target_copy_map_p
!= VM_MAP_COPY_NULL
) {
17266 /* the caller already has a "target_copy_map": use it */
17270 /* the caller wants us to create a new copy of "copy_map" */
17271 target_copy_map
= vm_map_copy_allocate();
17272 target_copy_map
->type
= copy_map
->type
;
17273 assert(target_copy_map
->type
== VM_MAP_COPY_ENTRY_LIST
);
17274 target_copy_map
->offset
= copy_map
->offset
;
17275 target_copy_map
->size
= copy_map
->size
;
17276 target_copy_map
->cpy_hdr
.page_shift
= copy_map
->cpy_hdr
.page_shift
;
17277 vm_map_store_init(&target_copy_map
->cpy_hdr
);
17278 for (entry
= vm_map_copy_first_entry(copy_map
);
17279 entry
!= vm_map_copy_to_entry(copy_map
);
17280 entry
= entry
->vme_next
) {
17281 target_entry
= vm_map_copy_entry_create(target_copy_map
, FALSE
);
17282 vm_map_entry_copy_full(target_entry
, entry
);
17283 if (target_entry
->is_sub_map
) {
17284 vm_map_reference(VME_SUBMAP(target_entry
));
17286 vm_object_reference(VME_OBJECT(target_entry
));
17288 vm_map_copy_entry_link(
17290 vm_map_copy_last_entry(target_copy_map
),
17293 entry
= VM_MAP_ENTRY_NULL
;
17294 *target_copy_map_p
= target_copy_map
;
17299 vm_map_copy_t copy_map
,
17300 int new_page_shift
,
17301 vm_map_offset_t trim_start
,
17302 vm_map_offset_t trim_end
);
17305 vm_map_copy_t copy_map
,
17306 int new_page_shift
,
17307 vm_map_offset_t trim_start
,
17308 vm_map_offset_t trim_end
)
17310 int copy_page_shift
;
17311 vm_map_entry_t entry
, next_entry
;
17313 assert(copy_map
->type
== VM_MAP_COPY_ENTRY_LIST
);
17314 assert(copy_map
->cpy_hdr
.nentries
> 0);
17316 trim_start
+= vm_map_copy_first_entry(copy_map
)->vme_start
;
17317 trim_end
+= vm_map_copy_first_entry(copy_map
)->vme_start
;
17319 /* use the new page_shift to do the clipping */
17320 copy_page_shift
= VM_MAP_COPY_PAGE_SHIFT(copy_map
);
17321 copy_map
->cpy_hdr
.page_shift
= new_page_shift
;
17323 for (entry
= vm_map_copy_first_entry(copy_map
);
17324 entry
!= vm_map_copy_to_entry(copy_map
);
17325 entry
= next_entry
) {
17326 next_entry
= entry
->vme_next
;
17327 if (entry
->vme_end
<= trim_start
) {
17328 /* entry fully before trim range: skip */
17331 if (entry
->vme_start
>= trim_end
) {
17332 /* entry fully after trim range: done */
17335 /* clip entry if needed */
17336 vm_map_copy_clip_start(copy_map
, entry
, trim_start
);
17337 vm_map_copy_clip_end(copy_map
, entry
, trim_end
);
17338 /* dispose of entry */
17339 copy_map
->size
-= entry
->vme_end
- entry
->vme_start
;
17340 vm_map_copy_entry_unlink(copy_map
, entry
);
17341 if (entry
->is_sub_map
) {
17342 vm_map_deallocate(VME_SUBMAP(entry
));
17344 vm_object_deallocate(VME_OBJECT(entry
));
17346 vm_map_copy_entry_dispose(copy_map
, entry
);
17347 entry
= VM_MAP_ENTRY_NULL
;
17350 /* restore copy_map's original page_shift */
17351 copy_map
->cpy_hdr
.page_shift
= copy_page_shift
;
17355 * Make any necessary adjustments to "copy_map" to allow it to be
17356 * mapped into "target_map".
17357 * If no changes were necessary, "target_copy_map" points to the
17358 * untouched "copy_map".
17359 * If changes are necessary, changes will be made to "target_copy_map".
17360 * If "target_copy_map" was NULL, we create a new "vm_map_copy_t" and
17361 * copy the original "copy_map" to it before applying the changes.
17362 * The caller should discard "target_copy_map" if it's not the same as
17363 * the original "copy_map".
17365 /* TODO4K: also adjust to sub-range in the copy_map -> add start&end? */
17367 vm_map_copy_adjust_to_target(
17368 vm_map_copy_t src_copy_map
,
17369 vm_map_offset_t offset
,
17370 vm_map_size_t size
,
17371 vm_map_t target_map
,
17373 vm_map_copy_t
*target_copy_map_p
,
17374 vm_map_offset_t
*overmap_start_p
,
17375 vm_map_offset_t
*overmap_end_p
,
17376 vm_map_offset_t
*trimmed_start_p
)
17378 vm_map_copy_t copy_map
, target_copy_map
;
17379 vm_map_size_t target_size
;
17380 vm_map_size_t src_copy_map_size
;
17381 vm_map_size_t overmap_start
, overmap_end
;
17383 vm_map_entry_t entry
, target_entry
;
17384 vm_map_offset_t addr_adjustment
;
17385 vm_map_offset_t new_start
, new_end
;
17386 int copy_page_mask
, target_page_mask
;
17387 int copy_page_shift
, target_page_shift
;
17388 vm_map_offset_t trimmed_end
;
17391 * Assert that the vm_map_copy is coming from the right
17392 * zone and hasn't been forged
17394 vm_map_copy_require(src_copy_map
);
17395 assert(src_copy_map
->type
== VM_MAP_COPY_ENTRY_LIST
);
17398 * Start working with "src_copy_map" but we'll switch
17399 * to "target_copy_map" as soon as we start making adjustments.
17401 copy_map
= src_copy_map
;
17402 src_copy_map_size
= src_copy_map
->size
;
17404 copy_page_shift
= VM_MAP_COPY_PAGE_SHIFT(copy_map
);
17405 copy_page_mask
= VM_MAP_COPY_PAGE_MASK(copy_map
);
17406 target_page_shift
= VM_MAP_PAGE_SHIFT(target_map
);
17407 target_page_mask
= VM_MAP_PAGE_MASK(target_map
);
17409 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p...\n", copy_map
, copy_page_shift
, (uint64_t)copy_map
->offset
, (uint64_t)copy_map
->size
, target_map
, target_page_shift
, copy
, (uint64_t)offset
, (uint64_t)size
, *target_copy_map_p
);
17411 target_copy_map
= *target_copy_map_p
;
17412 if (target_copy_map
!= VM_MAP_COPY_NULL
) {
17413 vm_map_copy_require(target_copy_map
);
17416 if (offset
+ size
> copy_map
->size
) {
17417 DEBUG4K_ERROR("copy_map %p (%d->%d) copy_map->size 0x%llx offset 0x%llx size 0x%llx KERN_INVALID_ARGUMENT\n", copy_map
, copy_page_shift
, target_page_shift
, (uint64_t)copy_map
->size
, (uint64_t)offset
, (uint64_t)size
);
17418 return KERN_INVALID_ARGUMENT
;
17423 new_end
= VM_MAP_ROUND_PAGE(offset
+ size
, target_page_mask
);
17424 if (new_end
< copy_map
->size
) {
17425 trimmed_end
= src_copy_map_size
- new_end
;
17426 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim end from 0x%llx to 0x%llx\n", copy_map
, copy_page_shift
, target_page_shift
, copy
, (uint64_t)offset
, (uint64_t)size
, target_copy_map
, (uint64_t)new_end
, (uint64_t)copy_map
->size
);
17427 /* get "target_copy_map" if needed and adjust it */
17428 vm_map_copy_adjust_get_target_copy_map(copy_map
,
17430 copy_map
= target_copy_map
;
17431 vm_map_copy_trim(target_copy_map
, target_page_shift
,
17432 new_end
, copy_map
->size
);
17435 /* trim the start */
17436 new_start
= VM_MAP_TRUNC_PAGE(offset
, target_page_mask
);
17437 if (new_start
!= 0) {
17438 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim start from 0x%llx to 0x%llx\n", copy_map
, copy_page_shift
, target_page_shift
, copy
, (uint64_t)offset
, (uint64_t)size
, target_copy_map
, (uint64_t)0, (uint64_t)new_start
);
17439 /* get "target_copy_map" if needed and adjust it */
17440 vm_map_copy_adjust_get_target_copy_map(copy_map
,
17442 copy_map
= target_copy_map
;
17443 vm_map_copy_trim(target_copy_map
, target_page_shift
,
17446 *trimmed_start_p
= new_start
;
17448 /* target_size starts with what's left after trimming */
17449 target_size
= copy_map
->size
;
17450 assertf(target_size
== src_copy_map_size
- *trimmed_start_p
- trimmed_end
,
17451 "target_size 0x%llx src_copy_map_size 0x%llx trimmed_start 0x%llx trimmed_end 0x%llx\n",
17452 (uint64_t)target_size
, (uint64_t)src_copy_map_size
,
17453 (uint64_t)*trimmed_start_p
, (uint64_t)trimmed_end
);
17455 /* check for misalignments but don't adjust yet */
17459 if (copy_page_shift
< target_page_shift
) {
17461 * Remapping from 4K to 16K: check the VM object alignments
17462 * throughout the range.
17463 * If the start and end of the range are mis-aligned, we can
17464 * over-map to re-align, and adjust the "overmap" start/end
17465 * and "target_size" of the range accordingly.
17466 * If there is any mis-alignment within the range:
17468 * we can do immediate-copy instead of copy-on-write,
17470 * no way to remap and share; fail.
17472 for (entry
= vm_map_copy_first_entry(copy_map
);
17473 entry
!= vm_map_copy_to_entry(copy_map
);
17474 entry
= entry
->vme_next
) {
17475 vm_object_offset_t object_offset_start
, object_offset_end
;
17477 object_offset_start
= VME_OFFSET(entry
);
17478 object_offset_end
= object_offset_start
;
17479 object_offset_end
+= entry
->vme_end
- entry
->vme_start
;
17480 if (object_offset_start
& target_page_mask
) {
17481 if (entry
== vm_map_copy_first_entry(copy_map
) && !copy
) {
17487 if (object_offset_end
& target_page_mask
) {
17488 if (entry
->vme_next
== vm_map_copy_to_entry(copy_map
) && !copy
) {
17496 entry
= VM_MAP_ENTRY_NULL
;
17498 /* decide how to deal with misalignments */
17499 assert(overmap_start
<= 1);
17500 assert(overmap_end
<= 1);
17501 if (!overmap_start
&& !overmap_end
&& !misalignments
) {
17502 /* copy_map is properly aligned for target_map ... */
17503 if (*trimmed_start_p
) {
17504 /* ... but we trimmed it, so still need to adjust */
17506 /* ... and we didn't trim anything: we're done */
17507 if (target_copy_map
== VM_MAP_COPY_NULL
) {
17508 target_copy_map
= copy_map
;
17510 *target_copy_map_p
= target_copy_map
;
17511 *overmap_start_p
= 0;
17512 *overmap_end_p
= 0;
17513 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map
, copy_page_shift
, (uint64_t)copy_map
->offset
, (uint64_t)copy_map
->size
, target_map
, target_page_shift
, copy
, *target_copy_map_p
, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p
), (uint64_t)(*target_copy_map_p
)->offset
, (uint64_t)(*target_copy_map_p
)->size
, (uint64_t)*trimmed_start_p
, (uint64_t)*overmap_start_p
, (uint64_t)*overmap_end_p
);
17514 return KERN_SUCCESS
;
17516 } else if (misalignments
&& !copy
) {
17517 /* can't "share" if misaligned */
17518 DEBUG4K_ADJUST("unsupported sharing\n");
17520 if (debug4k_panic_on_misaligned_sharing
) {
17521 panic("DEBUG4k %s:%d unsupported sharing\n", __FUNCTION__
, __LINE__
);
17523 #endif /* MACH_ASSERT */
17524 DEBUG4K_ADJUST("copy_map %p (%d) target_map %p (%d) copy %d target_copy_map %p -> KERN_NOT_SUPPORTED\n", copy_map
, copy_page_shift
, target_map
, target_page_shift
, copy
, *target_copy_map_p
);
17525 return KERN_NOT_SUPPORTED
;
17527 /* can't virtual-copy if misaligned (but can physical-copy) */
17528 DEBUG4K_ADJUST("mis-aligned copying\n");
17531 /* get a "target_copy_map" if needed and switch to it */
17532 vm_map_copy_adjust_get_target_copy_map(copy_map
, &target_copy_map
);
17533 copy_map
= target_copy_map
;
17535 if (misalignments
&& copy
) {
17536 vm_map_size_t target_copy_map_size
;
17539 * Can't do copy-on-write with misaligned mappings.
17540 * Replace the mappings with a physical copy of the original
17541 * mappings' contents.
17543 target_copy_map_size
= target_copy_map
->size
;
17544 vm_map_copy_to_physcopy(target_copy_map
, target_map
);
17545 *target_copy_map_p
= target_copy_map
;
17546 *overmap_start_p
= 0;
17547 *overmap_end_p
= target_copy_map
->size
- target_copy_map_size
;
17548 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx)-> trimmed 0x%llx overmap start 0x%llx end 0x%llx PHYSCOPY\n", copy_map
, copy_page_shift
, (uint64_t)copy_map
->offset
, (uint64_t)copy_map
->size
, target_map
, target_page_shift
, copy
, *target_copy_map_p
, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p
), (uint64_t)(*target_copy_map_p
)->offset
, (uint64_t)(*target_copy_map_p
)->size
, (uint64_t)*trimmed_start_p
, (uint64_t)*overmap_start_p
, (uint64_t)*overmap_end_p
);
17549 return KERN_SUCCESS
;
17552 /* apply the adjustments */
17556 /* remove copy_map->offset, so that everything starts at offset 0 */
17557 addr_adjustment
= copy_map
->offset
;
17558 /* also remove whatever we trimmed from the start */
17559 addr_adjustment
+= *trimmed_start_p
;
17560 for (target_entry
= vm_map_copy_first_entry(target_copy_map
);
17561 target_entry
!= vm_map_copy_to_entry(target_copy_map
);
17562 target_entry
= target_entry
->vme_next
) {
17563 vm_object_offset_t object_offset_start
, object_offset_end
;
17565 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx BEFORE\n", target_copy_map
, VM_MAP_COPY_PAGE_SHIFT(target_copy_map
), target_copy_map
->offset
, (uint64_t)target_copy_map
->size
, target_entry
, (uint64_t)target_entry
->vme_start
, (uint64_t)target_entry
->vme_end
, VME_OBJECT(target_entry
), VME_OFFSET(target_entry
));
17566 object_offset_start
= VME_OFFSET(target_entry
);
17567 if (object_offset_start
& target_page_mask
) {
17568 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at start\n", target_copy_map
, VM_MAP_COPY_PAGE_SHIFT(target_copy_map
), target_copy_map
->offset
, (uint64_t)target_copy_map
->size
, target_entry
, (uint64_t)target_entry
->vme_start
, (uint64_t)target_entry
->vme_end
, VME_OBJECT(target_entry
), VME_OFFSET(target_entry
));
17569 if (target_entry
== vm_map_copy_first_entry(target_copy_map
)) {
17571 * start of 1st entry is mis-aligned:
17572 * re-adjust by over-mapping.
17574 overmap_start
= object_offset_start
- trunc_page_mask_64(object_offset_start
, target_page_mask
);
17575 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_start 0x%llx\n", target_entry
, VME_OFFSET(target_entry
), copy
, (uint64_t)overmap_start
);
17576 VME_OFFSET_SET(target_entry
, VME_OFFSET(target_entry
) - overmap_start
);
17579 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry
, VME_OFFSET(target_entry
), copy
, misalignments
);
17584 if (target_entry
== vm_map_copy_first_entry(target_copy_map
)) {
17585 target_size
+= overmap_start
;
17587 target_entry
->vme_start
+= overmap_start
;
17589 target_entry
->vme_end
+= overmap_start
;
17591 object_offset_end
= VME_OFFSET(target_entry
) + target_entry
->vme_end
- target_entry
->vme_start
;
17592 if (object_offset_end
& target_page_mask
) {
17593 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at end\n", target_copy_map
, VM_MAP_COPY_PAGE_SHIFT(target_copy_map
), target_copy_map
->offset
, (uint64_t)target_copy_map
->size
, target_entry
, (uint64_t)target_entry
->vme_start
, (uint64_t)target_entry
->vme_end
, VME_OBJECT(target_entry
), VME_OFFSET(target_entry
));
17594 if (target_entry
->vme_next
== vm_map_copy_to_entry(target_copy_map
)) {
17596 * end of last entry is mis-aligned: re-adjust by over-mapping.
17598 overmap_end
= round_page_mask_64(object_offset_end
, target_page_mask
) - object_offset_end
;
17599 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_end 0x%llx\n", target_entry
, VME_OFFSET(target_entry
), copy
, (uint64_t)overmap_end
);
17600 target_entry
->vme_end
+= overmap_end
;
17601 target_size
+= overmap_end
;
17604 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry
, VME_OFFSET(target_entry
), copy
, misalignments
);
17608 target_entry
->vme_start
-= addr_adjustment
;
17609 target_entry
->vme_end
-= addr_adjustment
;
17610 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx AFTER\n", target_copy_map
, VM_MAP_COPY_PAGE_SHIFT(target_copy_map
), target_copy_map
->offset
, (uint64_t)target_copy_map
->size
, target_entry
, (uint64_t)target_entry
->vme_start
, (uint64_t)target_entry
->vme_end
, VME_OBJECT(target_entry
), VME_OFFSET(target_entry
));
17613 target_copy_map
->size
= target_size
;
17614 target_copy_map
->offset
+= overmap_start
;
17615 target_copy_map
->offset
-= addr_adjustment
;
17616 target_copy_map
->cpy_hdr
.page_shift
= target_page_shift
;
17618 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->size, target_page_mask));
17619 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->offset, FOURK_PAGE_MASK));
17620 assert(overmap_start
< VM_MAP_PAGE_SIZE(target_map
));
17621 assert(overmap_end
< VM_MAP_PAGE_SIZE(target_map
));
17623 *target_copy_map_p
= target_copy_map
;
17624 *overmap_start_p
= overmap_start
;
17625 *overmap_end_p
= overmap_end
;
17627 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map
, copy_page_shift
, (uint64_t)copy_map
->offset
, (uint64_t)copy_map
->size
, target_map
, target_page_shift
, copy
, *target_copy_map_p
, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p
), (uint64_t)(*target_copy_map_p
)->offset
, (uint64_t)(*target_copy_map_p
)->size
, (uint64_t)*trimmed_start_p
, (uint64_t)*overmap_start_p
, (uint64_t)*overmap_end_p
);
17628 return KERN_SUCCESS
;
17632 vm_map_range_physical_size(
17634 vm_map_address_t start
,
17635 mach_vm_size_t size
,
17636 mach_vm_size_t
* phys_size
)
17639 vm_map_copy_t copy_map
, target_copy_map
;
17640 vm_map_offset_t adjusted_start
, adjusted_end
;
17641 vm_map_size_t adjusted_size
;
17642 vm_prot_t cur_prot
, max_prot
;
17643 vm_map_offset_t overmap_start
, overmap_end
, trimmed_start
;
17644 vm_map_kernel_flags_t vmk_flags
;
17646 adjusted_start
= vm_map_trunc_page(start
, VM_MAP_PAGE_MASK(map
));
17647 adjusted_end
= vm_map_round_page(start
+ size
, VM_MAP_PAGE_MASK(map
));
17648 adjusted_size
= adjusted_end
- adjusted_start
;
17649 *phys_size
= adjusted_size
;
17650 if (VM_MAP_PAGE_SIZE(map
) == PAGE_SIZE
) {
17651 return KERN_SUCCESS
;
17654 adjusted_start
= vm_map_trunc_page(start
, PAGE_MASK
);
17655 adjusted_end
= vm_map_round_page(start
+ size
, PAGE_MASK
);
17656 adjusted_size
= adjusted_end
- adjusted_start
;
17657 *phys_size
= adjusted_size
;
17658 return KERN_SUCCESS
;
17660 if (adjusted_size
== 0) {
17661 DEBUG4K_SHARE("map %p start 0x%llx size 0x%llx adjusted 0x%llx -> phys_size 0!\n", map
, (uint64_t)start
, (uint64_t)size
, (uint64_t)adjusted_size
);
17663 return KERN_SUCCESS
;
17666 vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
17667 vmk_flags
.vmkf_copy_pageable
= TRUE
;
17668 vmk_flags
.vmkf_copy_same_map
= TRUE
;
17669 assert(adjusted_size
!= 0);
17670 cur_prot
= VM_PROT_NONE
; /* legacy mode */
17671 max_prot
= VM_PROT_NONE
; /* legacy mode */
17672 kr
= vm_map_copy_extract(map
, adjusted_start
, adjusted_size
,
17675 &cur_prot
, &max_prot
, VM_INHERIT_DEFAULT
,
17677 if (kr
!= KERN_SUCCESS
) {
17678 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map
, (uint64_t)start
, (uint64_t)adjusted_start
, size
, (uint64_t)adjusted_size
, kr
);
17683 assert(copy_map
!= VM_MAP_COPY_NULL
);
17684 target_copy_map
= copy_map
;
17685 DEBUG4K_ADJUST("adjusting...\n");
17686 kr
= vm_map_copy_adjust_to_target(
17688 start
- adjusted_start
, /* offset */
17696 if (kr
== KERN_SUCCESS
) {
17697 if (target_copy_map
->size
!= *phys_size
) {
17698 DEBUG4K_ADJUST("map %p (%d) start 0x%llx size 0x%llx adjusted_start 0x%llx adjusted_end 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx phys_size 0x%llx -> 0x%llx\n", map
, VM_MAP_PAGE_SHIFT(map
), (uint64_t)start
, (uint64_t)size
, (uint64_t)adjusted_start
, (uint64_t)adjusted_end
, (uint64_t)overmap_start
, (uint64_t)overmap_end
, (uint64_t)trimmed_start
, (uint64_t)*phys_size
, (uint64_t)target_copy_map
->size
);
17700 *phys_size
= target_copy_map
->size
;
17702 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map
, (uint64_t)start
, (uint64_t)adjusted_start
, size
, (uint64_t)adjusted_size
, kr
);
17706 vm_map_copy_discard(copy_map
);
17707 copy_map
= VM_MAP_COPY_NULL
;
17714 memory_entry_check_for_adjustment(
17717 vm_map_offset_t
*overmap_start
,
17718 vm_map_offset_t
*overmap_end
)
17720 kern_return_t kr
= KERN_SUCCESS
;
17721 vm_map_copy_t copy_map
= VM_MAP_COPY_NULL
, target_copy_map
= VM_MAP_COPY_NULL
;
17724 assertf(ip_kotype(port
) == IKOT_NAMED_ENTRY
, "Port Type expected: %d...received:%d\n", IKOT_NAMED_ENTRY
, ip_kotype(port
));
17726 vm_named_entry_t named_entry
;
17728 named_entry
= (vm_named_entry_t
) ipc_kobject_get(port
);
17729 named_entry_lock(named_entry
);
17730 copy_map
= named_entry
->backing
.copy
;
17731 target_copy_map
= copy_map
;
17733 if (src_map
&& VM_MAP_PAGE_SHIFT(src_map
) < PAGE_SHIFT
) {
17734 vm_map_offset_t trimmed_start
;
17737 DEBUG4K_ADJUST("adjusting...\n");
17738 kr
= vm_map_copy_adjust_to_target(
17741 copy_map
->size
, /* size */
17748 assert(trimmed_start
== 0);
17750 named_entry_unlock(named_entry
);
17757 * Routine: vm_remap
17759 * Map portion of a task's address space.
17760 * Mapped region must not overlap more than
17761 * one vm memory object. Protections and
17762 * inheritance attributes remain the same
17763 * as in the original task and are out parameters.
17764 * Source and Target task can be identical
17765 * Other attributes are identical as for vm_map()
17769 vm_map_t target_map
,
17770 vm_map_address_t
*address
,
17771 vm_map_size_t size
,
17772 vm_map_offset_t mask
,
17774 vm_map_kernel_flags_t vmk_flags
,
17777 vm_map_offset_t memory_address
,
17779 vm_prot_t
*cur_protection
, /* IN/OUT */
17780 vm_prot_t
*max_protection
, /* IN/OUT */
17781 vm_inherit_t inheritance
)
17783 kern_return_t result
;
17784 vm_map_entry_t entry
;
17785 vm_map_entry_t insp_entry
= VM_MAP_ENTRY_NULL
;
17786 vm_map_entry_t new_entry
;
17787 vm_map_copy_t copy_map
;
17788 vm_map_offset_t offset_in_mapping
;
17789 vm_map_size_t target_size
= 0;
17790 vm_map_size_t src_page_mask
, target_page_mask
;
17791 vm_map_offset_t overmap_start
, overmap_end
, trimmed_start
;
17792 vm_map_offset_t initial_memory_address
;
17793 vm_map_size_t initial_size
;
17795 if (target_map
== VM_MAP_NULL
) {
17796 return KERN_INVALID_ARGUMENT
;
17799 initial_memory_address
= memory_address
;
17800 initial_size
= size
;
17801 src_page_mask
= VM_MAP_PAGE_MASK(src_map
);
17802 target_page_mask
= VM_MAP_PAGE_MASK(target_map
);
17804 switch (inheritance
) {
17805 case VM_INHERIT_NONE
:
17806 case VM_INHERIT_COPY
:
17807 case VM_INHERIT_SHARE
:
17808 if (size
!= 0 && src_map
!= VM_MAP_NULL
) {
17813 return KERN_INVALID_ARGUMENT
;
17816 if (src_page_mask
!= target_page_mask
) {
17818 DEBUG4K_COPY("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map
, VM_MAP_PAGE_SIZE(src_map
), (uint64_t)memory_address
, (uint64_t)size
, copy
, target_map
, VM_MAP_PAGE_SIZE(target_map
));
17820 DEBUG4K_SHARE("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map
, VM_MAP_PAGE_SIZE(src_map
), (uint64_t)memory_address
, (uint64_t)size
, copy
, target_map
, VM_MAP_PAGE_SIZE(target_map
));
17825 * If the user is requesting that we return the address of the
17826 * first byte of the data (rather than the base of the page),
17827 * then we use different rounding semantics: specifically,
17828 * we assume that (memory_address, size) describes a region
17829 * all of whose pages we must cover, rather than a base to be truncated
17830 * down and a size to be added to that base. So we figure out
17831 * the highest page that the requested region includes and make
17832 * sure that the size will cover it.
17834 * The key example we're worried about it is of the form:
17836 * memory_address = 0x1ff0, size = 0x20
17838 * With the old semantics, we round down the memory_address to 0x1000
17839 * and round up the size to 0x1000, resulting in our covering *only*
17840 * page 0x1000. With the new semantics, we'd realize that the region covers
17841 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
17842 * 0x1000 and page 0x2000 in the region we remap.
17844 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
17845 vm_map_offset_t range_start
, range_end
;
17847 range_start
= vm_map_trunc_page(memory_address
, src_page_mask
);
17848 range_end
= vm_map_round_page(memory_address
+ size
, src_page_mask
);
17849 memory_address
= range_start
;
17850 size
= range_end
- range_start
;
17851 offset_in_mapping
= initial_memory_address
- memory_address
;
17855 * This legacy code path is broken: for the range mentioned
17856 * above [ memory_address = 0x1ff0,size = 0x20 ], which spans
17857 * two 4k pages, it yields [ memory_address = 0x1000,
17858 * size = 0x1000 ], which covers only the first 4k page.
17859 * BUT some code unfortunately depends on this bug, so we
17860 * can't fix it without breaking something.
17861 * New code should get automatically opted in the new
17862 * behavior with the new VM_FLAGS_RETURN_DATA_ADDR flags.
17864 offset_in_mapping
= 0;
17865 memory_address
= vm_map_trunc_page(memory_address
, src_page_mask
);
17866 size
= vm_map_round_page(size
, src_page_mask
);
17867 initial_memory_address
= memory_address
;
17868 initial_size
= size
;
17873 return KERN_INVALID_ARGUMENT
;
17876 if (flags
& VM_FLAGS_RESILIENT_MEDIA
) {
17877 /* must be copy-on-write to be "media resilient" */
17879 return KERN_INVALID_ARGUMENT
;
17883 vmk_flags
.vmkf_copy_pageable
= target_map
->hdr
.entries_pageable
;
17884 vmk_flags
.vmkf_copy_same_map
= (src_map
== target_map
);
17887 result
= vm_map_copy_extract(src_map
,
17891 cur_protection
, /* IN/OUT */
17892 max_protection
, /* IN/OUT */
17895 if (result
!= KERN_SUCCESS
) {
17898 assert(copy_map
!= VM_MAP_COPY_NULL
);
17903 target_size
= size
;
17904 if (src_page_mask
!= target_page_mask
) {
17905 vm_map_copy_t target_copy_map
;
17907 target_copy_map
= copy_map
; /* can modify "copy_map" itself */
17908 DEBUG4K_ADJUST("adjusting...\n");
17909 result
= vm_map_copy_adjust_to_target(
17911 offset_in_mapping
, /* offset */
17919 if (result
!= KERN_SUCCESS
) {
17920 DEBUG4K_COPY("failed to adjust 0x%x\n", result
);
17921 vm_map_copy_discard(copy_map
);
17924 if (trimmed_start
== 0) {
17925 /* nothing trimmed: no adjustment needed */
17926 } else if (trimmed_start
>= offset_in_mapping
) {
17927 /* trimmed more than offset_in_mapping: nothing left */
17928 assert(overmap_start
== 0);
17929 assert(overmap_end
== 0);
17930 offset_in_mapping
= 0;
17932 /* trimmed some of offset_in_mapping: adjust */
17933 assert(overmap_start
== 0);
17934 assert(overmap_end
== 0);
17935 offset_in_mapping
-= trimmed_start
;
17937 offset_in_mapping
+= overmap_start
;
17938 target_size
= target_copy_map
->size
;
17942 * Allocate/check a range of free virtual address
17943 * space for the target
17945 *address
= vm_map_trunc_page(*address
, target_page_mask
);
17946 vm_map_lock(target_map
);
17947 target_size
= vm_map_round_page(target_size
, target_page_mask
);
17948 result
= vm_map_remap_range_allocate(target_map
, address
,
17950 mask
, flags
, vmk_flags
, tag
,
17953 for (entry
= vm_map_copy_first_entry(copy_map
);
17954 entry
!= vm_map_copy_to_entry(copy_map
);
17955 entry
= new_entry
) {
17956 new_entry
= entry
->vme_next
;
17957 vm_map_copy_entry_unlink(copy_map
, entry
);
17958 if (result
== KERN_SUCCESS
) {
17959 if (flags
& VM_FLAGS_RESILIENT_CODESIGN
) {
17960 /* no codesigning -> read-only access */
17961 entry
->max_protection
= VM_PROT_READ
;
17962 entry
->protection
= VM_PROT_READ
;
17963 entry
->vme_resilient_codesign
= TRUE
;
17965 entry
->vme_start
+= *address
;
17966 entry
->vme_end
+= *address
;
17967 assert(!entry
->map_aligned
);
17968 if ((flags
& VM_FLAGS_RESILIENT_MEDIA
) &&
17969 !entry
->is_sub_map
&&
17970 (VME_OBJECT(entry
) == VM_OBJECT_NULL
||
17971 VME_OBJECT(entry
)->internal
)) {
17972 entry
->vme_resilient_media
= TRUE
;
17974 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_start
, MIN(target_page_mask
, PAGE_MASK
)));
17975 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_end
, MIN(target_page_mask
, PAGE_MASK
)));
17976 assert(VM_MAP_PAGE_ALIGNED(VME_OFFSET(entry
), MIN(target_page_mask
, PAGE_MASK
)));
17977 vm_map_store_entry_link(target_map
, insp_entry
, entry
,
17979 insp_entry
= entry
;
17981 if (!entry
->is_sub_map
) {
17982 vm_object_deallocate(VME_OBJECT(entry
));
17984 vm_map_deallocate(VME_SUBMAP(entry
));
17986 vm_map_copy_entry_dispose(copy_map
, entry
);
17990 if (flags
& VM_FLAGS_RESILIENT_CODESIGN
) {
17991 *cur_protection
= VM_PROT_READ
;
17992 *max_protection
= VM_PROT_READ
;
17995 if (target_map
->disable_vmentry_reuse
== TRUE
) {
17996 assert(!target_map
->is_nested_map
);
17997 if (target_map
->highest_entry_end
< insp_entry
->vme_end
) {
17998 target_map
->highest_entry_end
= insp_entry
->vme_end
;
18002 if (result
== KERN_SUCCESS
) {
18003 target_map
->size
+= target_size
;
18004 SAVE_HINT_MAP_WRITE(target_map
, insp_entry
);
18007 vm_map_unlock(target_map
);
18009 if (result
== KERN_SUCCESS
&& target_map
->wiring_required
) {
18010 result
= vm_map_wire_kernel(target_map
, *address
,
18011 *address
+ size
, *cur_protection
, VM_KERN_MEMORY_MLOCK
,
18016 * If requested, return the address of the data pointed to by the
18017 * request, rather than the base of the resulting page.
18019 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
18020 *address
+= offset_in_mapping
;
18023 if (src_page_mask
!= target_page_mask
) {
18024 DEBUG4K_SHARE("vm_remap(%p 0x%llx 0x%llx copy=%d-> %p 0x%llx 0x%llx result=0x%x\n", src_map
, (uint64_t)memory_address
, (uint64_t)size
, copy
, target_map
, (uint64_t)*address
, (uint64_t)offset_in_mapping
, result
);
18026 vm_map_copy_discard(copy_map
);
18027 copy_map
= VM_MAP_COPY_NULL
;
18033 * Routine: vm_map_remap_range_allocate
18036 * Allocate a range in the specified virtual address map.
18037 * returns the address and the map entry just before the allocated
18040 * Map must be locked.
18043 static kern_return_t
18044 vm_map_remap_range_allocate(
18046 vm_map_address_t
*address
, /* IN/OUT */
18047 vm_map_size_t size
,
18048 vm_map_offset_t mask
,
18050 vm_map_kernel_flags_t vmk_flags
,
18051 __unused vm_tag_t tag
,
18052 vm_map_entry_t
*map_entry
) /* OUT */
18054 vm_map_entry_t entry
;
18055 vm_map_offset_t start
;
18056 vm_map_offset_t end
;
18057 vm_map_offset_t desired_empty_end
;
18059 vm_map_entry_t hole_entry
;
18065 if (flags
& VM_FLAGS_ANYWHERE
) {
18066 if (flags
& VM_FLAGS_RANDOM_ADDR
) {
18068 * Get a random start address.
18070 kr
= vm_map_random_address_for_size(map
, address
, size
);
18071 if (kr
!= KERN_SUCCESS
) {
18078 * Calculate the first possible address.
18081 if (start
< map
->min_offset
) {
18082 start
= map
->min_offset
;
18084 if (start
> map
->max_offset
) {
18085 return KERN_NO_SPACE
;
18089 * Look for the first possible address;
18090 * if there's already something at this
18091 * address, we have to start after it.
18094 if (map
->disable_vmentry_reuse
== TRUE
) {
18095 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
18097 if (map
->holelistenabled
) {
18098 hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
18100 if (hole_entry
== NULL
) {
18102 * No more space in the map?
18104 return KERN_NO_SPACE
;
18106 boolean_t found_hole
= FALSE
;
18109 if (hole_entry
->vme_start
>= start
) {
18110 start
= hole_entry
->vme_start
;
18115 if (hole_entry
->vme_end
> start
) {
18119 hole_entry
= hole_entry
->vme_next
;
18120 } while (hole_entry
!= CAST_TO_VM_MAP_ENTRY(map
->holes_list
));
18122 if (found_hole
== FALSE
) {
18123 return KERN_NO_SPACE
;
18126 entry
= hole_entry
;
18129 assert(first_free_is_valid(map
));
18130 if (start
== map
->min_offset
) {
18131 if ((entry
= map
->first_free
) != vm_map_to_entry(map
)) {
18132 start
= entry
->vme_end
;
18135 vm_map_entry_t tmp_entry
;
18136 if (vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
18137 start
= tmp_entry
->vme_end
;
18142 start
= vm_map_round_page(start
,
18143 VM_MAP_PAGE_MASK(map
));
18147 * In any case, the "entry" always precedes
18148 * the proposed new region throughout the
18153 vm_map_entry_t next
;
18156 * Find the end of the proposed new region.
18157 * Be sure we didn't go beyond the end, or
18158 * wrap around the address.
18161 end
= ((start
+ mask
) & ~mask
);
18162 end
= vm_map_round_page(end
,
18163 VM_MAP_PAGE_MASK(map
));
18165 return KERN_NO_SPACE
;
18170 /* We want an entire page of empty space, but don't increase the allocation size. */
18171 desired_empty_end
= vm_map_round_page(end
, VM_MAP_PAGE_MASK(map
));
18173 if ((desired_empty_end
> map
->max_offset
) || (desired_empty_end
< start
)) {
18174 if (map
->wait_for_space
) {
18175 if (size
<= (map
->max_offset
-
18176 map
->min_offset
)) {
18177 assert_wait((event_t
) map
, THREAD_INTERRUPTIBLE
);
18178 vm_map_unlock(map
);
18179 thread_block(THREAD_CONTINUE_NULL
);
18185 return KERN_NO_SPACE
;
18188 next
= entry
->vme_next
;
18190 if (map
->holelistenabled
) {
18191 if (entry
->vme_end
>= desired_empty_end
) {
18196 * If there are no more entries, we must win.
18200 * If there is another entry, it must be
18201 * after the end of the potential new region.
18204 if (next
== vm_map_to_entry(map
)) {
18208 if (next
->vme_start
>= desired_empty_end
) {
18214 * Didn't fit -- move to the next entry.
18219 if (map
->holelistenabled
) {
18220 if (entry
== CAST_TO_VM_MAP_ENTRY(map
->holes_list
)) {
18224 return KERN_NO_SPACE
;
18226 start
= entry
->vme_start
;
18228 start
= entry
->vme_end
;
18232 if (map
->holelistenabled
) {
18233 if (vm_map_lookup_entry(map
, entry
->vme_start
, &entry
)) {
18234 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry
, (unsigned long long)entry
->vme_start
);
18240 vm_map_entry_t temp_entry
;
18244 * the address doesn't itself violate
18245 * the mask requirement.
18248 if ((start
& mask
) != 0) {
18249 return KERN_NO_SPACE
;
18254 * ... the address is within bounds
18257 end
= start
+ size
;
18259 if ((start
< map
->min_offset
) ||
18260 (end
> map
->max_offset
) ||
18262 return KERN_INVALID_ADDRESS
;
18266 * If we're asked to overwrite whatever was mapped in that
18267 * range, first deallocate that range.
18269 if (flags
& VM_FLAGS_OVERWRITE
) {
18271 int remove_flags
= VM_MAP_REMOVE_SAVE_ENTRIES
| VM_MAP_REMOVE_NO_MAP_ALIGN
;
18274 * We use a "zap_map" to avoid having to unlock
18275 * the "map" in vm_map_delete(), which would compromise
18276 * the atomicity of the "deallocate" and then "remap"
18279 zap_map
= vm_map_create(PMAP_NULL
,
18282 map
->hdr
.entries_pageable
);
18283 if (zap_map
== VM_MAP_NULL
) {
18284 return KERN_RESOURCE_SHORTAGE
;
18286 vm_map_set_page_shift(zap_map
, VM_MAP_PAGE_SHIFT(map
));
18287 vm_map_disable_hole_optimization(zap_map
);
18289 if (vmk_flags
.vmkf_overwrite_immutable
) {
18290 remove_flags
|= VM_MAP_REMOVE_IMMUTABLE
;
18292 kr
= vm_map_delete(map
, start
, end
,
18295 if (kr
== KERN_SUCCESS
) {
18296 vm_map_destroy(zap_map
,
18297 VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
18298 zap_map
= VM_MAP_NULL
;
18303 * ... the starting address isn't allocated
18306 if (vm_map_lookup_entry(map
, start
, &temp_entry
)) {
18307 return KERN_NO_SPACE
;
18310 entry
= temp_entry
;
18313 * ... the next region doesn't overlap the
18317 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
18318 (entry
->vme_next
->vme_start
< end
)) {
18319 return KERN_NO_SPACE
;
18322 *map_entry
= entry
;
18323 return KERN_SUCCESS
;
18329 * Set the address map for the current thread to the specified map
18337 thread_t thread
= current_thread();
18338 vm_map_t oldmap
= thread
->map
;
18340 mp_disable_preemption();
18341 mycpu
= cpu_number();
18344 * Deactivate the current map and activate the requested map
18346 PMAP_SWITCH_USER(thread
, map
, mycpu
);
18348 mp_enable_preemption();
18354 * Routine: vm_map_write_user
18357 * Copy out data from a kernel space into space in the
18358 * destination map. The space must already exist in the
18360 * NOTE: This routine should only be called by threads
18361 * which can block on a page fault. i.e. kernel mode user
18369 vm_map_address_t dst_addr
,
18372 kern_return_t kr
= KERN_SUCCESS
;
18374 if (current_map() == map
) {
18375 if (copyout(src_p
, dst_addr
, size
)) {
18376 kr
= KERN_INVALID_ADDRESS
;
18381 /* take on the identity of the target map while doing */
18384 vm_map_reference(map
);
18385 oldmap
= vm_map_switch(map
);
18386 if (copyout(src_p
, dst_addr
, size
)) {
18387 kr
= KERN_INVALID_ADDRESS
;
18389 vm_map_switch(oldmap
);
18390 vm_map_deallocate(map
);
18396 * Routine: vm_map_read_user
18399 * Copy in data from a user space source map into the
18400 * kernel map. The space must already exist in the
18402 * NOTE: This routine should only be called by threads
18403 * which can block on a page fault. i.e. kernel mode user
18410 vm_map_address_t src_addr
,
18414 kern_return_t kr
= KERN_SUCCESS
;
18416 if (current_map() == map
) {
18417 if (copyin(src_addr
, dst_p
, size
)) {
18418 kr
= KERN_INVALID_ADDRESS
;
18423 /* take on the identity of the target map while doing */
18426 vm_map_reference(map
);
18427 oldmap
= vm_map_switch(map
);
18428 if (copyin(src_addr
, dst_p
, size
)) {
18429 kr
= KERN_INVALID_ADDRESS
;
18431 vm_map_switch(oldmap
);
18432 vm_map_deallocate(map
);
18439 * vm_map_check_protection:
18441 * Assert that the target map allows the specified
18442 * privilege on the entire address region given.
18443 * The entire region must be allocated.
18446 vm_map_check_protection(vm_map_t map
, vm_map_offset_t start
,
18447 vm_map_offset_t end
, vm_prot_t protection
)
18449 vm_map_entry_t entry
;
18450 vm_map_entry_t tmp_entry
;
18454 if (start
< vm_map_min(map
) || end
> vm_map_max(map
) || start
> end
) {
18455 vm_map_unlock(map
);
18459 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
18460 vm_map_unlock(map
);
18466 while (start
< end
) {
18467 if (entry
== vm_map_to_entry(map
)) {
18468 vm_map_unlock(map
);
18473 * No holes allowed!
18476 if (start
< entry
->vme_start
) {
18477 vm_map_unlock(map
);
18482 * Check protection associated with entry.
18485 if ((entry
->protection
& protection
) != protection
) {
18486 vm_map_unlock(map
);
18490 /* go to next entry */
18492 start
= entry
->vme_end
;
18493 entry
= entry
->vme_next
;
18495 vm_map_unlock(map
);
18500 vm_map_purgable_control(
18502 vm_map_offset_t address
,
18503 vm_purgable_t control
,
18506 vm_map_entry_t entry
;
18507 vm_object_t object
;
18509 boolean_t was_nonvolatile
;
18512 * Vet all the input parameters and current type and state of the
18513 * underlaying object. Return with an error if anything is amiss.
18515 if (map
== VM_MAP_NULL
) {
18516 return KERN_INVALID_ARGUMENT
;
18519 if (control
!= VM_PURGABLE_SET_STATE
&&
18520 control
!= VM_PURGABLE_GET_STATE
&&
18521 control
!= VM_PURGABLE_PURGE_ALL
&&
18522 control
!= VM_PURGABLE_SET_STATE_FROM_KERNEL
) {
18523 return KERN_INVALID_ARGUMENT
;
18526 if (control
== VM_PURGABLE_PURGE_ALL
) {
18527 vm_purgeable_object_purge_all();
18528 return KERN_SUCCESS
;
18531 if ((control
== VM_PURGABLE_SET_STATE
||
18532 control
== VM_PURGABLE_SET_STATE_FROM_KERNEL
) &&
18533 (((*state
& ~(VM_PURGABLE_ALL_MASKS
)) != 0) ||
18534 ((*state
& VM_PURGABLE_STATE_MASK
) > VM_PURGABLE_STATE_MASK
))) {
18535 return KERN_INVALID_ARGUMENT
;
18538 vm_map_lock_read(map
);
18540 if (!vm_map_lookup_entry(map
, address
, &entry
) || entry
->is_sub_map
) {
18542 * Must pass a valid non-submap address.
18544 vm_map_unlock_read(map
);
18545 return KERN_INVALID_ADDRESS
;
18548 if ((entry
->protection
& VM_PROT_WRITE
) == 0) {
18550 * Can't apply purgable controls to something you can't write.
18552 vm_map_unlock_read(map
);
18553 return KERN_PROTECTION_FAILURE
;
18556 object
= VME_OBJECT(entry
);
18557 if (object
== VM_OBJECT_NULL
||
18558 object
->purgable
== VM_PURGABLE_DENY
) {
18560 * Object must already be present and be purgeable.
18562 vm_map_unlock_read(map
);
18563 return KERN_INVALID_ARGUMENT
;
18566 vm_object_lock(object
);
18569 if (VME_OFFSET(entry
) != 0 ||
18570 entry
->vme_end
- entry
->vme_start
!= object
->vo_size
) {
18572 * Can only apply purgable controls to the whole (existing)
18575 vm_map_unlock_read(map
);
18576 vm_object_unlock(object
);
18577 return KERN_INVALID_ARGUMENT
;
18581 assert(!entry
->is_sub_map
);
18582 assert(!entry
->use_pmap
); /* purgeable has its own accounting */
18584 vm_map_unlock_read(map
);
18586 was_nonvolatile
= (object
->purgable
== VM_PURGABLE_NONVOLATILE
);
18588 kr
= vm_object_purgable_control(object
, control
, state
);
18590 if (was_nonvolatile
&&
18591 object
->purgable
!= VM_PURGABLE_NONVOLATILE
&&
18592 map
->pmap
== kernel_pmap
) {
18594 object
->vo_purgeable_volatilizer
= kernel_task
;
18598 vm_object_unlock(object
);
18604 vm_map_footprint_query_page_info(
18606 vm_map_entry_t map_entry
,
18607 vm_map_offset_t curr_s_offset
,
18608 int *disposition_p
)
18611 vm_object_t object
;
18613 int effective_page_size
;
18615 vm_map_lock_assert_held(map
);
18616 assert(!map
->has_corpse_footprint
);
18617 assert(curr_s_offset
>= map_entry
->vme_start
);
18618 assert(curr_s_offset
< map_entry
->vme_end
);
18620 object
= VME_OBJECT(map_entry
);
18621 if (object
== VM_OBJECT_NULL
) {
18622 *disposition_p
= 0;
18626 effective_page_size
= MIN(PAGE_SIZE
, VM_MAP_PAGE_SIZE(map
));
18629 if (object
== VM_OBJECT_NULL
) {
18630 /* nothing mapped here: no need to ask */
18631 *disposition_p
= 0;
18633 } else if (map_entry
->is_sub_map
&&
18634 !map_entry
->use_pmap
) {
18635 /* nested pmap: no footprint */
18636 *disposition_p
= 0;
18643 pmap_query_page_info(map
->pmap
, curr_s_offset
, &pmap_disp
);
18646 * Compute this page's disposition.
18650 /* deal with "alternate accounting" first */
18651 if (!map_entry
->is_sub_map
&&
18652 object
->vo_no_footprint
) {
18653 /* does not count in footprint */
18654 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18655 } else if (!map_entry
->is_sub_map
&&
18656 (object
->purgable
== VM_PURGABLE_NONVOLATILE
||
18657 (object
->purgable
== VM_PURGABLE_DENY
&&
18658 object
->vo_ledger_tag
)) &&
18659 VM_OBJECT_OWNER(object
) != NULL
&&
18660 VM_OBJECT_OWNER(object
)->map
== map
) {
18661 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18662 if ((((curr_s_offset
18663 - map_entry
->vme_start
18664 + VME_OFFSET(map_entry
))
18665 / effective_page_size
) <
18666 (object
->resident_page_count
+
18667 vm_compressor_pager_get_count(object
->pager
)))) {
18669 * Non-volatile purgeable object owned
18670 * by this task: report the first
18671 * "#resident + #compressed" pages as
18672 * "resident" (to show that they
18673 * contribute to the footprint) but not
18674 * "dirty" (to avoid double-counting
18675 * with the fake "non-volatile" region
18676 * we'll report at the end of the
18677 * address space to account for all
18678 * (mapped or not) non-volatile memory
18679 * owned by this task.
18681 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
18683 } else if (!map_entry
->is_sub_map
&&
18684 (object
->purgable
== VM_PURGABLE_VOLATILE
||
18685 object
->purgable
== VM_PURGABLE_EMPTY
) &&
18686 VM_OBJECT_OWNER(object
) != NULL
&&
18687 VM_OBJECT_OWNER(object
)->map
== map
) {
18688 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18689 if ((((curr_s_offset
18690 - map_entry
->vme_start
18691 + VME_OFFSET(map_entry
))
18692 / effective_page_size
) <
18693 object
->wired_page_count
)) {
18695 * Volatile|empty purgeable object owned
18696 * by this task: report the first
18697 * "#wired" pages as "resident" (to
18698 * show that they contribute to the
18699 * footprint) but not "dirty" (to avoid
18700 * double-counting with the fake
18701 * "non-volatile" region we'll report
18702 * at the end of the address space to
18703 * account for all (mapped or not)
18704 * non-volatile memory owned by this
18707 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
18709 } else if (!map_entry
->is_sub_map
&&
18710 map_entry
->iokit_acct
&&
18711 object
->internal
&&
18712 object
->purgable
== VM_PURGABLE_DENY
) {
18714 * Non-purgeable IOKit memory: phys_footprint
18715 * includes the entire virtual mapping.
18717 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18718 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
18719 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
18720 } else if (pmap_disp
& (PMAP_QUERY_PAGE_ALTACCT
|
18721 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT
)) {
18722 /* alternate accounting */
18723 #if (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG)
18724 if (map
->pmap
->footprint_was_suspended
) {
18726 * The assertion below can fail if dyld
18727 * suspended footprint accounting
18728 * while doing some adjustments to
18729 * this page; the mapping would say
18730 * "use pmap accounting" but the page
18731 * would be marked "alternate
18735 #endif /* (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG) */
18737 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18741 if (pmap_disp
& PMAP_QUERY_PAGE_PRESENT
) {
18742 assertf(map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18743 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
18744 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
18745 if (pmap_disp
& PMAP_QUERY_PAGE_INTERNAL
) {
18746 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
18748 disposition
|= VM_PAGE_QUERY_PAGE_EXTERNAL
;
18750 if (pmap_disp
& PMAP_QUERY_PAGE_REUSABLE
) {
18751 disposition
|= VM_PAGE_QUERY_PAGE_REUSABLE
;
18753 } else if (pmap_disp
& PMAP_QUERY_PAGE_COMPRESSED
) {
18754 assertf(map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18755 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
18759 *disposition_p
= disposition
;
18763 vm_map_page_query_internal(
18764 vm_map_t target_map
,
18765 vm_map_offset_t offset
,
18770 vm_page_info_basic_data_t info
;
18771 mach_msg_type_number_t count
;
18773 count
= VM_PAGE_INFO_BASIC_COUNT
;
18774 kr
= vm_map_page_info(target_map
,
18776 VM_PAGE_INFO_BASIC
,
18777 (vm_page_info_t
) &info
,
18779 if (kr
== KERN_SUCCESS
) {
18780 *disposition
= info
.disposition
;
18781 *ref_count
= info
.ref_count
;
18793 vm_map_offset_t offset
,
18794 vm_page_info_flavor_t flavor
,
18795 vm_page_info_t info
,
18796 mach_msg_type_number_t
*count
)
18798 return vm_map_page_range_info_internal(map
,
18799 offset
, /* start of range */
18800 (offset
+ 1), /* this will get rounded in the call to the page boundary */
18801 (int)-1, /* effective_page_shift: unspecified */
18808 vm_map_page_range_info_internal(
18810 vm_map_offset_t start_offset
,
18811 vm_map_offset_t end_offset
,
18812 int effective_page_shift
,
18813 vm_page_info_flavor_t flavor
,
18814 vm_page_info_t info
,
18815 mach_msg_type_number_t
*count
)
18817 vm_map_entry_t map_entry
= VM_MAP_ENTRY_NULL
;
18818 vm_object_t object
= VM_OBJECT_NULL
, curr_object
= VM_OBJECT_NULL
;
18819 vm_page_t m
= VM_PAGE_NULL
;
18820 kern_return_t retval
= KERN_SUCCESS
;
18821 int disposition
= 0;
18823 int depth
= 0, info_idx
= 0;
18824 vm_page_info_basic_t basic_info
= 0;
18825 vm_map_offset_t offset_in_page
= 0, offset_in_object
= 0, curr_offset_in_object
= 0;
18826 vm_map_offset_t start
= 0, end
= 0, curr_s_offset
= 0, curr_e_offset
= 0;
18827 boolean_t do_region_footprint
;
18828 ledger_amount_t ledger_resident
, ledger_compressed
;
18829 int effective_page_size
;
18830 vm_map_offset_t effective_page_mask
;
18833 case VM_PAGE_INFO_BASIC
:
18834 if (*count
!= VM_PAGE_INFO_BASIC_COUNT
) {
18836 * The "vm_page_info_basic_data" structure was not
18837 * properly padded, so allow the size to be off by
18838 * one to maintain backwards binary compatibility...
18840 if (*count
!= VM_PAGE_INFO_BASIC_COUNT
- 1) {
18841 return KERN_INVALID_ARGUMENT
;
18846 return KERN_INVALID_ARGUMENT
;
18849 if (effective_page_shift
== -1) {
18850 effective_page_shift
= vm_self_region_page_shift_safely(map
);
18851 if (effective_page_shift
== -1) {
18852 return KERN_INVALID_ARGUMENT
;
18855 effective_page_size
= (1 << effective_page_shift
);
18856 effective_page_mask
= effective_page_size
- 1;
18858 do_region_footprint
= task_self_region_footprint();
18862 info_idx
= 0; /* Tracks the next index within the info structure to be filled.*/
18863 retval
= KERN_SUCCESS
;
18865 offset_in_page
= start_offset
& effective_page_mask
;
18866 start
= vm_map_trunc_page(start_offset
, effective_page_mask
);
18867 end
= vm_map_round_page(end_offset
, effective_page_mask
);
18870 return KERN_INVALID_ARGUMENT
;
18873 assert((end
- start
) <= MAX_PAGE_RANGE_QUERY
);
18875 vm_map_lock_read(map
);
18877 task_ledgers_footprint(map
->pmap
->ledger
, &ledger_resident
, &ledger_compressed
);
18879 for (curr_s_offset
= start
; curr_s_offset
< end
;) {
18881 * New lookup needs reset of these variables.
18883 curr_object
= object
= VM_OBJECT_NULL
;
18884 offset_in_object
= 0;
18888 if (do_region_footprint
&&
18889 curr_s_offset
>= vm_map_last_entry(map
)->vme_end
) {
18891 * Request for "footprint" info about a page beyond
18892 * the end of address space: this must be for
18893 * the fake region vm_map_region_recurse_64()
18894 * reported to account for non-volatile purgeable
18895 * memory owned by this task.
18899 if (curr_s_offset
- vm_map_last_entry(map
)->vme_end
<=
18900 (unsigned) ledger_compressed
) {
18902 * We haven't reported all the "non-volatile
18903 * compressed" pages yet, so report this fake
18904 * page as "compressed".
18906 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
18909 * We've reported all the non-volatile
18910 * compressed page but not all the non-volatile
18911 * pages , so report this fake page as
18912 * "resident dirty".
18914 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
18915 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
18916 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
18919 case VM_PAGE_INFO_BASIC
:
18920 basic_info
= (vm_page_info_basic_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
18921 basic_info
->disposition
= disposition
;
18922 basic_info
->ref_count
= 1;
18923 basic_info
->object_id
= VM_OBJECT_ID_FAKE(map
, task_ledgers
.purgeable_nonvolatile
);
18924 basic_info
->offset
= 0;
18925 basic_info
->depth
= 0;
18930 curr_s_offset
+= effective_page_size
;
18935 * First, find the map entry covering "curr_s_offset", going down
18936 * submaps if necessary.
18938 if (!vm_map_lookup_entry(map
, curr_s_offset
, &map_entry
)) {
18939 /* no entry -> no object -> no page */
18941 if (curr_s_offset
< vm_map_min(map
)) {
18943 * Illegal address that falls below map min.
18945 curr_e_offset
= MIN(end
, vm_map_min(map
));
18946 } else if (curr_s_offset
>= vm_map_max(map
)) {
18948 * Illegal address that falls on/after map max.
18950 curr_e_offset
= end
;
18951 } else if (map_entry
== vm_map_to_entry(map
)) {
18955 if (map_entry
->vme_next
== vm_map_to_entry(map
)) {
18959 curr_e_offset
= MIN(map
->max_offset
, end
);
18962 * Hole at start of the map.
18964 curr_e_offset
= MIN(map_entry
->vme_next
->vme_start
, end
);
18967 if (map_entry
->vme_next
== vm_map_to_entry(map
)) {
18969 * Hole at the end of the map.
18971 curr_e_offset
= MIN(map
->max_offset
, end
);
18973 curr_e_offset
= MIN(map_entry
->vme_next
->vme_start
, end
);
18977 assert(curr_e_offset
>= curr_s_offset
);
18979 uint64_t num_pages
= (curr_e_offset
- curr_s_offset
) >> effective_page_shift
;
18981 void *info_ptr
= (void*) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
18983 bzero(info_ptr
, num_pages
* sizeof(struct vm_page_info_basic
));
18985 curr_s_offset
= curr_e_offset
;
18987 info_idx
+= num_pages
;
18992 /* compute offset from this map entry's start */
18993 offset_in_object
= curr_s_offset
- map_entry
->vme_start
;
18995 /* compute offset into this map entry's object (or submap) */
18996 offset_in_object
+= VME_OFFSET(map_entry
);
18998 if (map_entry
->is_sub_map
) {
18999 vm_map_t sub_map
= VM_MAP_NULL
;
19000 vm_page_info_t submap_info
= 0;
19001 vm_map_offset_t submap_s_offset
= 0, submap_e_offset
= 0, range_len
= 0;
19003 range_len
= MIN(map_entry
->vme_end
, end
) - curr_s_offset
;
19005 submap_s_offset
= offset_in_object
;
19006 submap_e_offset
= submap_s_offset
+ range_len
;
19008 sub_map
= VME_SUBMAP(map_entry
);
19010 vm_map_reference(sub_map
);
19011 vm_map_unlock_read(map
);
19013 submap_info
= (vm_page_info_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
19015 assertf(VM_MAP_PAGE_SHIFT(sub_map
) >= VM_MAP_PAGE_SHIFT(map
),
19016 "Submap page size (%d) differs from current map (%d)\n", VM_MAP_PAGE_SIZE(sub_map
), VM_MAP_PAGE_SIZE(map
));
19018 retval
= vm_map_page_range_info_internal(sub_map
,
19021 effective_page_shift
,
19022 VM_PAGE_INFO_BASIC
,
19023 (vm_page_info_t
) submap_info
,
19026 assert(retval
== KERN_SUCCESS
);
19028 vm_map_lock_read(map
);
19029 vm_map_deallocate(sub_map
);
19031 /* Move the "info" index by the number of pages we inspected.*/
19032 info_idx
+= range_len
>> effective_page_shift
;
19034 /* Move our current offset by the size of the range we inspected.*/
19035 curr_s_offset
+= range_len
;
19040 object
= VME_OBJECT(map_entry
);
19042 if (object
== VM_OBJECT_NULL
) {
19044 * We don't have an object here and, hence,
19045 * no pages to inspect. We'll fill up the
19046 * info structure appropriately.
19049 curr_e_offset
= MIN(map_entry
->vme_end
, end
);
19051 uint64_t num_pages
= (curr_e_offset
- curr_s_offset
) >> effective_page_shift
;
19053 void *info_ptr
= (void*) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
19055 bzero(info_ptr
, num_pages
* sizeof(struct vm_page_info_basic
));
19057 curr_s_offset
= curr_e_offset
;
19059 info_idx
+= num_pages
;
19064 if (do_region_footprint
) {
19066 if (map
->has_corpse_footprint
) {
19068 * Query the page info data we saved
19069 * while forking the corpse.
19071 vm_map_corpse_footprint_query_page_info(
19077 * Query the live pmap for footprint info
19080 vm_map_footprint_query_page_info(
19087 case VM_PAGE_INFO_BASIC
:
19088 basic_info
= (vm_page_info_basic_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
19089 basic_info
->disposition
= disposition
;
19090 basic_info
->ref_count
= 1;
19091 basic_info
->object_id
= VM_OBJECT_ID_FAKE(map
, task_ledgers
.purgeable_nonvolatile
);
19092 basic_info
->offset
= 0;
19093 basic_info
->depth
= 0;
19098 curr_s_offset
+= effective_page_size
;
19102 vm_object_reference(object
);
19104 * Shared mode -- so we can allow other readers
19105 * to grab the lock too.
19107 vm_object_lock_shared(object
);
19109 curr_e_offset
= MIN(map_entry
->vme_end
, end
);
19111 vm_map_unlock_read(map
);
19113 map_entry
= NULL
; /* map is unlocked, the entry is no longer valid. */
19115 curr_object
= object
;
19117 for (; curr_s_offset
< curr_e_offset
;) {
19118 if (object
== curr_object
) {
19119 ref_count
= curr_object
->ref_count
- 1; /* account for our object reference above. */
19121 ref_count
= curr_object
->ref_count
;
19124 curr_offset_in_object
= offset_in_object
;
19127 m
= vm_page_lookup(curr_object
, vm_object_trunc_page(curr_offset_in_object
));
19129 if (m
!= VM_PAGE_NULL
) {
19130 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
19133 if (curr_object
->internal
&&
19134 curr_object
->alive
&&
19135 !curr_object
->terminating
&&
19136 curr_object
->pager_ready
) {
19137 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object
, vm_object_trunc_page(curr_offset_in_object
))
19138 == VM_EXTERNAL_STATE_EXISTS
) {
19139 /* the pager has that page */
19140 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
19146 * Go down the VM object shadow chain until we find the page
19147 * we're looking for.
19150 if (curr_object
->shadow
!= VM_OBJECT_NULL
) {
19151 vm_object_t shadow
= VM_OBJECT_NULL
;
19153 curr_offset_in_object
+= curr_object
->vo_shadow_offset
;
19154 shadow
= curr_object
->shadow
;
19156 vm_object_lock_shared(shadow
);
19157 vm_object_unlock(curr_object
);
19159 curr_object
= shadow
;
19168 /* The ref_count is not strictly accurate, it measures the number */
19169 /* of entities holding a ref on the object, they may not be mapping */
19170 /* the object or may not be mapping the section holding the */
19171 /* target page but its still a ball park number and though an over- */
19172 /* count, it picks up the copy-on-write cases */
19174 /* We could also get a picture of page sharing from pmap_attributes */
19175 /* but this would under count as only faulted-in mappings would */
19178 if ((curr_object
== object
) && curr_object
->shadow
) {
19179 disposition
|= VM_PAGE_QUERY_PAGE_COPIED
;
19182 if (!curr_object
->internal
) {
19183 disposition
|= VM_PAGE_QUERY_PAGE_EXTERNAL
;
19186 if (m
!= VM_PAGE_NULL
) {
19187 if (m
->vmp_fictitious
) {
19188 disposition
|= VM_PAGE_QUERY_PAGE_FICTITIOUS
;
19190 if (m
->vmp_dirty
|| pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m
))) {
19191 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
19194 if (m
->vmp_reference
|| pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m
))) {
19195 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
19198 if (m
->vmp_q_state
== VM_PAGE_ON_SPECULATIVE_Q
) {
19199 disposition
|= VM_PAGE_QUERY_PAGE_SPECULATIVE
;
19204 * when this routine deals with 4k
19205 * pages, check the appropriate CS bit
19208 if (m
->vmp_cs_validated
) {
19209 disposition
|= VM_PAGE_QUERY_PAGE_CS_VALIDATED
;
19211 if (m
->vmp_cs_tainted
) {
19212 disposition
|= VM_PAGE_QUERY_PAGE_CS_TAINTED
;
19214 if (m
->vmp_cs_nx
) {
19215 disposition
|= VM_PAGE_QUERY_PAGE_CS_NX
;
19217 if (m
->vmp_reusable
|| curr_object
->all_reusable
) {
19218 disposition
|= VM_PAGE_QUERY_PAGE_REUSABLE
;
19224 case VM_PAGE_INFO_BASIC
:
19225 basic_info
= (vm_page_info_basic_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
19226 basic_info
->disposition
= disposition
;
19227 basic_info
->ref_count
= ref_count
;
19228 basic_info
->object_id
= (vm_object_id_t
) (uintptr_t)
19229 VM_KERNEL_ADDRPERM(curr_object
);
19230 basic_info
->offset
=
19231 (memory_object_offset_t
) curr_offset_in_object
+ offset_in_page
;
19232 basic_info
->depth
= depth
;
19239 offset_in_page
= 0; // This doesn't really make sense for any offset other than the starting offset.
19242 * Move to next offset in the range and in our object.
19244 curr_s_offset
+= effective_page_size
;
19245 offset_in_object
+= effective_page_size
;
19246 curr_offset_in_object
= offset_in_object
;
19248 if (curr_object
!= object
) {
19249 vm_object_unlock(curr_object
);
19251 curr_object
= object
;
19253 vm_object_lock_shared(curr_object
);
19255 vm_object_lock_yield_shared(curr_object
);
19259 vm_object_unlock(curr_object
);
19260 vm_object_deallocate(curr_object
);
19262 vm_map_lock_read(map
);
19265 vm_map_unlock_read(map
);
19272 * Synchronises the memory range specified with its backing store
19273 * image by either flushing or cleaning the contents to the appropriate
19274 * memory manager engaging in a memory object synchronize dialog with
19275 * the manager. The client doesn't return until the manager issues
19276 * m_o_s_completed message. MIG Magically converts user task parameter
19277 * to the task's address map.
19279 * interpretation of sync_flags
19280 * VM_SYNC_INVALIDATE - discard pages, only return precious
19281 * pages to manager.
19283 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
19284 * - discard pages, write dirty or precious
19285 * pages back to memory manager.
19287 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
19288 * - write dirty or precious pages back to
19289 * the memory manager.
19291 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
19292 * is a hole in the region, and we would
19293 * have returned KERN_SUCCESS, return
19294 * KERN_INVALID_ADDRESS instead.
19297 * The memory object attributes have not yet been implemented, this
19298 * function will have to deal with the invalidate attribute
19301 * KERN_INVALID_TASK Bad task parameter
19302 * KERN_INVALID_ARGUMENT both sync and async were specified.
19303 * KERN_SUCCESS The usual.
19304 * KERN_INVALID_ADDRESS There was a hole in the region.
19310 vm_map_address_t address
,
19311 vm_map_size_t size
,
19312 vm_sync_t sync_flags
)
19314 vm_map_entry_t entry
;
19315 vm_map_size_t amount_left
;
19316 vm_object_offset_t offset
;
19317 vm_object_offset_t start_offset
, end_offset
;
19318 boolean_t do_sync_req
;
19319 boolean_t had_hole
= FALSE
;
19320 vm_map_offset_t pmap_offset
;
19322 if ((sync_flags
& VM_SYNC_ASYNCHRONOUS
) &&
19323 (sync_flags
& VM_SYNC_SYNCHRONOUS
)) {
19324 return KERN_INVALID_ARGUMENT
;
19327 if (VM_MAP_PAGE_MASK(map
) < PAGE_MASK
) {
19328 DEBUG4K_SHARE("map %p address 0x%llx size 0x%llx flags 0x%x\n", map
, (uint64_t)address
, (uint64_t)size
, sync_flags
);
19332 * align address and size on page boundaries
19334 size
= (vm_map_round_page(address
+ size
,
19335 VM_MAP_PAGE_MASK(map
)) -
19336 vm_map_trunc_page(address
,
19337 VM_MAP_PAGE_MASK(map
)));
19338 address
= vm_map_trunc_page(address
,
19339 VM_MAP_PAGE_MASK(map
));
19341 if (map
== VM_MAP_NULL
) {
19342 return KERN_INVALID_TASK
;
19346 return KERN_SUCCESS
;
19349 amount_left
= size
;
19351 while (amount_left
> 0) {
19352 vm_object_size_t flush_size
;
19353 vm_object_t object
;
19356 if (!vm_map_lookup_entry(map
,
19359 vm_map_size_t skip
;
19362 * hole in the address map.
19366 if (sync_flags
& VM_SYNC_KILLPAGES
) {
19368 * For VM_SYNC_KILLPAGES, there should be
19369 * no holes in the range, since we couldn't
19370 * prevent someone else from allocating in
19371 * that hole and we wouldn't want to "kill"
19374 vm_map_unlock(map
);
19379 * Check for empty map.
19381 if (entry
== vm_map_to_entry(map
) &&
19382 entry
->vme_next
== entry
) {
19383 vm_map_unlock(map
);
19387 * Check that we don't wrap and that
19388 * we have at least one real map entry.
19390 if ((map
->hdr
.nentries
== 0) ||
19391 (entry
->vme_next
->vme_start
< address
)) {
19392 vm_map_unlock(map
);
19396 * Move up to the next entry if needed
19398 skip
= (entry
->vme_next
->vme_start
- address
);
19399 if (skip
>= amount_left
) {
19402 amount_left
-= skip
;
19404 address
= entry
->vme_next
->vme_start
;
19405 vm_map_unlock(map
);
19409 offset
= address
- entry
->vme_start
;
19410 pmap_offset
= address
;
19413 * do we have more to flush than is contained in this
19416 if (amount_left
+ entry
->vme_start
+ offset
> entry
->vme_end
) {
19417 flush_size
= entry
->vme_end
-
19418 (entry
->vme_start
+ offset
);
19420 flush_size
= amount_left
;
19422 amount_left
-= flush_size
;
19423 address
+= flush_size
;
19425 if (entry
->is_sub_map
== TRUE
) {
19426 vm_map_t local_map
;
19427 vm_map_offset_t local_offset
;
19429 local_map
= VME_SUBMAP(entry
);
19430 local_offset
= VME_OFFSET(entry
);
19431 vm_map_reference(local_map
);
19432 vm_map_unlock(map
);
19437 sync_flags
) == KERN_INVALID_ADDRESS
) {
19440 vm_map_deallocate(local_map
);
19443 object
= VME_OBJECT(entry
);
19446 * We can't sync this object if the object has not been
19449 if (object
== VM_OBJECT_NULL
) {
19450 vm_map_unlock(map
);
19453 offset
+= VME_OFFSET(entry
);
19455 vm_object_lock(object
);
19457 if (sync_flags
& (VM_SYNC_KILLPAGES
| VM_SYNC_DEACTIVATE
)) {
19458 int kill_pages
= 0;
19459 boolean_t reusable_pages
= FALSE
;
19461 if (VM_MAP_PAGE_MASK(map
) < PAGE_MASK
) {
19463 * This is a destructive operation and so we
19464 * err on the side of limiting the range of
19467 start_offset
= vm_object_round_page(offset
);
19468 end_offset
= vm_object_trunc_page(offset
+ flush_size
);
19470 if (end_offset
<= start_offset
) {
19471 vm_object_unlock(object
);
19472 vm_map_unlock(map
);
19476 pmap_offset
+= start_offset
- offset
;;
19478 start_offset
= offset
;
19479 end_offset
= offset
+ flush_size
;
19482 if (sync_flags
& VM_SYNC_KILLPAGES
) {
19483 if (((object
->ref_count
== 1) ||
19484 ((object
->copy_strategy
!=
19485 MEMORY_OBJECT_COPY_SYMMETRIC
) &&
19486 (object
->copy
== VM_OBJECT_NULL
))) &&
19487 (object
->shadow
== VM_OBJECT_NULL
)) {
19488 if (object
->ref_count
!= 1) {
19489 vm_page_stats_reusable
.free_shared
++;
19496 if (kill_pages
!= -1) {
19497 vm_object_deactivate_pages(
19500 (vm_object_size_t
) (end_offset
- start_offset
),
19506 vm_object_unlock(object
);
19507 vm_map_unlock(map
);
19511 * We can't sync this object if there isn't a pager.
19512 * Don't bother to sync internal objects, since there can't
19513 * be any "permanent" storage for these objects anyway.
19515 if ((object
->pager
== MEMORY_OBJECT_NULL
) ||
19516 (object
->internal
) || (object
->private)) {
19517 vm_object_unlock(object
);
19518 vm_map_unlock(map
);
19522 * keep reference on the object until syncing is done
19524 vm_object_reference_locked(object
);
19525 vm_object_unlock(object
);
19527 vm_map_unlock(map
);
19529 if (VM_MAP_PAGE_MASK(map
) < PAGE_MASK
) {
19530 start_offset
= vm_object_trunc_page(offset
);
19531 end_offset
= vm_object_round_page(offset
+ flush_size
);
19533 start_offset
= offset
;
19534 end_offset
= offset
+ flush_size
;
19537 do_sync_req
= vm_object_sync(object
,
19539 (end_offset
- start_offset
),
19540 sync_flags
& VM_SYNC_INVALIDATE
,
19541 ((sync_flags
& VM_SYNC_SYNCHRONOUS
) ||
19542 (sync_flags
& VM_SYNC_ASYNCHRONOUS
)),
19543 sync_flags
& VM_SYNC_SYNCHRONOUS
);
19545 if ((sync_flags
& VM_SYNC_INVALIDATE
) && object
->resident_page_count
== 0) {
19547 * clear out the clustering and read-ahead hints
19549 vm_object_lock(object
);
19551 object
->pages_created
= 0;
19552 object
->pages_used
= 0;
19553 object
->sequential
= 0;
19554 object
->last_alloc
= 0;
19556 vm_object_unlock(object
);
19558 vm_object_deallocate(object
);
19561 /* for proper msync() behaviour */
19562 if (had_hole
== TRUE
&& (sync_flags
& VM_SYNC_CONTIGUOUS
)) {
19563 return KERN_INVALID_ADDRESS
;
19566 return KERN_SUCCESS
;
19570 vm_named_entry_from_vm_object(
19571 vm_named_entry_t named_entry
,
19572 vm_object_t object
,
19573 vm_object_offset_t offset
,
19574 vm_object_size_t size
,
19577 vm_map_copy_t copy
;
19578 vm_map_entry_t copy_entry
;
19580 assert(!named_entry
->is_sub_map
);
19581 assert(!named_entry
->is_copy
);
19582 assert(!named_entry
->is_object
);
19583 assert(!named_entry
->internal
);
19584 assert(named_entry
->backing
.copy
== VM_MAP_COPY_NULL
);
19586 copy
= vm_map_copy_allocate();
19587 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
19588 copy
->offset
= offset
;
19590 copy
->cpy_hdr
.page_shift
= PAGE_SHIFT
;
19591 vm_map_store_init(©
->cpy_hdr
);
19593 copy_entry
= vm_map_copy_entry_create(copy
, FALSE
);
19594 copy_entry
->protection
= prot
;
19595 copy_entry
->max_protection
= prot
;
19596 copy_entry
->use_pmap
= TRUE
;
19597 copy_entry
->vme_start
= VM_MAP_TRUNC_PAGE(offset
, PAGE_MASK
);
19598 copy_entry
->vme_end
= VM_MAP_ROUND_PAGE(offset
+ size
, PAGE_MASK
);
19599 VME_OBJECT_SET(copy_entry
, object
);
19600 VME_OFFSET_SET(copy_entry
, vm_object_trunc_page(offset
));
19601 vm_map_copy_entry_link(copy
, vm_map_copy_last_entry(copy
), copy_entry
);
19603 named_entry
->backing
.copy
= copy
;
19604 named_entry
->is_object
= TRUE
;
19605 if (object
->internal
) {
19606 named_entry
->internal
= TRUE
;
19609 DEBUG4K_MEMENTRY("named_entry %p copy %p object %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry
, copy
, object
, offset
, size
, prot
);
19611 return KERN_SUCCESS
;
19615 vm_named_entry_to_vm_object(
19616 vm_named_entry_t named_entry
)
19618 vm_map_copy_t copy
;
19619 vm_map_entry_t copy_entry
;
19620 vm_object_t object
;
19622 assert(!named_entry
->is_sub_map
);
19623 assert(!named_entry
->is_copy
);
19624 assert(named_entry
->is_object
);
19625 copy
= named_entry
->backing
.copy
;
19626 assert(copy
!= VM_MAP_COPY_NULL
);
19627 assert(copy
->cpy_hdr
.nentries
== 1);
19628 copy_entry
= vm_map_copy_first_entry(copy
);
19629 assert(!copy_entry
->is_sub_map
);
19630 object
= VME_OBJECT(copy_entry
);
19632 DEBUG4K_MEMENTRY("%p -> %p -> %p [0x%llx 0x%llx 0x%llx 0x%x/0x%x ] -> %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry
, copy
, copy_entry
, (uint64_t)copy_entry
->vme_start
, (uint64_t)copy_entry
->vme_end
, copy_entry
->vme_offset
, copy_entry
->protection
, copy_entry
->max_protection
, object
, named_entry
->offset
, named_entry
->size
, named_entry
->protection
);
19638 * Routine: convert_port_entry_to_map
19640 * Convert from a port specifying an entry or a task
19641 * to a map. Doesn't consume the port ref; produces a map ref,
19642 * which may be null. Unlike convert_port_to_map, the
19643 * port may be task or a named entry backed.
19650 convert_port_entry_to_map(
19654 vm_named_entry_t named_entry
;
19655 uint32_t try_failed_count
= 0;
19657 if (IP_VALID(port
) && (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
19660 if (ip_active(port
) && (ip_kotype(port
)
19661 == IKOT_NAMED_ENTRY
)) {
19663 (vm_named_entry_t
) ip_get_kobject(port
);
19664 if (!(lck_mtx_try_lock(&(named_entry
)->Lock
))) {
19667 try_failed_count
++;
19668 mutex_pause(try_failed_count
);
19671 named_entry
->ref_count
++;
19672 lck_mtx_unlock(&(named_entry
)->Lock
);
19674 if ((named_entry
->is_sub_map
) &&
19675 (named_entry
->protection
19676 & VM_PROT_WRITE
)) {
19677 map
= named_entry
->backing
.map
;
19678 if (map
->pmap
!= PMAP_NULL
) {
19679 if (map
->pmap
== kernel_pmap
) {
19680 panic("userspace has access "
19681 "to a kernel map %p", map
);
19683 pmap_require(map
->pmap
);
19686 mach_destroy_memory_entry(port
);
19687 return VM_MAP_NULL
;
19689 vm_map_reference(map
);
19690 mach_destroy_memory_entry(port
);
19693 return VM_MAP_NULL
;
19697 map
= convert_port_to_map(port
);
19704 * Routine: convert_port_entry_to_object
19706 * Convert from a port specifying a named entry to an
19707 * object. Doesn't consume the port ref; produces a map ref,
19708 * which may be null.
19715 convert_port_entry_to_object(
19718 vm_object_t object
= VM_OBJECT_NULL
;
19719 vm_named_entry_t named_entry
;
19720 uint32_t try_failed_count
= 0;
19722 if (IP_VALID(port
) &&
19723 (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
19726 if (ip_active(port
) &&
19727 (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
19728 named_entry
= (vm_named_entry_t
) ip_get_kobject(port
);
19729 if (!(lck_mtx_try_lock(&(named_entry
)->Lock
))) {
19731 try_failed_count
++;
19732 mutex_pause(try_failed_count
);
19735 named_entry
->ref_count
++;
19736 lck_mtx_unlock(&(named_entry
)->Lock
);
19738 if (!(named_entry
->is_sub_map
) &&
19739 !(named_entry
->is_copy
) &&
19740 (named_entry
->is_object
) &&
19741 (named_entry
->protection
& VM_PROT_WRITE
)) {
19742 vm_map_copy_t copy
;
19743 vm_map_entry_t copy_entry
;
19745 copy
= named_entry
->backing
.copy
;
19746 assert(copy
->cpy_hdr
.nentries
== 1);
19747 copy_entry
= vm_map_copy_first_entry(copy
);
19748 assert(!copy_entry
->is_sub_map
);
19749 object
= VME_OBJECT(copy_entry
);
19750 assert(object
!= VM_OBJECT_NULL
);
19751 vm_object_reference(object
);
19753 mach_destroy_memory_entry(port
);
19761 * Export routines to other components for the things we access locally through
19768 return current_map_fast();
19772 * vm_map_reference:
19774 * Takes a reference on the specified map.
19780 if (__probable(map
!= VM_MAP_NULL
)) {
19781 vm_map_require(map
);
19782 os_ref_retain(&map
->map_refcnt
);
19787 * vm_map_deallocate:
19789 * Removes a reference from the specified map,
19790 * destroying it if no references remain.
19791 * The map should not be locked.
19797 if (__probable(map
!= VM_MAP_NULL
)) {
19798 vm_map_require(map
);
19799 if (os_ref_release(&map
->map_refcnt
) == 0) {
19800 vm_map_destroy(map
, VM_MAP_REMOVE_NO_FLAGS
);
19806 vm_map_inspect_deallocate(
19807 vm_map_inspect_t map
)
19809 vm_map_deallocate((vm_map_t
)map
);
19813 vm_map_read_deallocate(
19816 vm_map_deallocate((vm_map_t
)map
);
19821 vm_map_disable_NX(vm_map_t map
)
19826 if (map
->pmap
== NULL
) {
19830 pmap_disable_NX(map
->pmap
);
19834 vm_map_disallow_data_exec(vm_map_t map
)
19840 map
->map_disallow_data_exec
= TRUE
;
19843 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
19844 * more descriptive.
19847 vm_map_set_32bit(vm_map_t map
)
19849 #if defined(__arm__) || defined(__arm64__)
19850 map
->max_offset
= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_DEVICE
);
19852 map
->max_offset
= (vm_map_offset_t
)VM_MAX_ADDRESS
;
19858 vm_map_set_64bit(vm_map_t map
)
19860 #if defined(__arm__) || defined(__arm64__)
19861 map
->max_offset
= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_DEVICE
);
19863 map
->max_offset
= (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
;
19868 * Expand the maximum size of an existing map to the maximum supported.
19871 vm_map_set_jumbo(vm_map_t map
)
19873 #if defined (__arm64__) && !defined(CONFIG_ARROW)
19874 vm_map_set_max_addr(map
, ~0);
19881 * This map has a JIT entitlement
19884 vm_map_set_jit_entitled(vm_map_t map
)
19886 #if defined (__arm64__)
19887 pmap_set_jit_entitled(map
->pmap
);
19894 * Expand the maximum size of an existing map.
19897 vm_map_set_max_addr(vm_map_t map
, vm_map_offset_t new_max_offset
)
19899 #if defined(__arm64__)
19900 vm_map_offset_t max_supported_offset
= 0;
19901 vm_map_offset_t old_max_offset
= map
->max_offset
;
19902 max_supported_offset
= pmap_max_offset(vm_map_is_64bit(map
), ARM_PMAP_MAX_OFFSET_JUMBO
);
19904 new_max_offset
= trunc_page(new_max_offset
);
19906 /* The address space cannot be shrunk using this routine. */
19907 if (old_max_offset
>= new_max_offset
) {
19911 if (max_supported_offset
< new_max_offset
) {
19912 new_max_offset
= max_supported_offset
;
19915 map
->max_offset
= new_max_offset
;
19917 if (map
->holes_list
->prev
->vme_end
== old_max_offset
) {
19919 * There is already a hole at the end of the map; simply make it bigger.
19921 map
->holes_list
->prev
->vme_end
= map
->max_offset
;
19924 * There is no hole at the end, so we need to create a new hole
19925 * for the new empty space we're creating.
19927 struct vm_map_links
*new_hole
= zalloc(vm_map_holes_zone
);
19928 new_hole
->start
= old_max_offset
;
19929 new_hole
->end
= map
->max_offset
;
19930 new_hole
->prev
= map
->holes_list
->prev
;
19931 new_hole
->next
= (struct vm_map_entry
*)map
->holes_list
;
19932 map
->holes_list
->prev
->links
.next
= (struct vm_map_entry
*)new_hole
;
19933 map
->holes_list
->prev
= (struct vm_map_entry
*)new_hole
;
19937 (void)new_max_offset
;
19942 vm_compute_max_offset(boolean_t is64
)
19944 #if defined(__arm__) || defined(__arm64__)
19945 return pmap_max_offset(is64
, ARM_PMAP_MAX_OFFSET_DEVICE
);
19947 return is64
? (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
: (vm_map_offset_t
)VM_MAX_ADDRESS
;
19952 vm_map_get_max_aslr_slide_section(
19953 vm_map_t map __unused
,
19954 int64_t *max_sections
,
19955 int64_t *section_size
)
19957 #if defined(__arm64__)
19959 *section_size
= ARM_TT_TWIG_SIZE
;
19967 vm_map_get_max_aslr_slide_pages(vm_map_t map
)
19969 #if defined(__arm64__)
19970 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
19971 * limited embedded address space; this is also meant to minimize pmap
19972 * memory usage on 16KB page systems.
19974 return 1 << (24 - VM_MAP_PAGE_SHIFT(map
));
19976 return 1 << (vm_map_is_64bit(map
) ? 16 : 8);
19981 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map
)
19983 #if defined(__arm64__)
19984 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
19985 * of independent entropy on 16KB page systems.
19987 return 1 << (22 - VM_MAP_PAGE_SHIFT(map
));
19989 return 1 << (vm_map_is_64bit(map
) ? 16 : 8);
19998 return map
->max_offset
> ((vm_map_offset_t
)VM_MAX_ADDRESS
);
20003 vm_map_has_hard_pagezero(
20005 vm_map_offset_t pagezero_size
)
20009 * We should lock the VM map (for read) here but we can get away
20010 * with it for now because there can't really be any race condition:
20011 * the VM map's min_offset is changed only when the VM map is created
20012 * and when the zero page is established (when the binary gets loaded),
20013 * and this routine gets called only when the task terminates and the
20014 * VM map is being torn down, and when a new map is created via
20015 * load_machfile()/execve().
20017 return map
->min_offset
>= pagezero_size
;
20021 * Raise a VM map's maximun offset.
20024 vm_map_raise_max_offset(
20026 vm_map_offset_t new_max_offset
)
20031 ret
= KERN_INVALID_ADDRESS
;
20033 if (new_max_offset
>= map
->max_offset
) {
20034 if (!vm_map_is_64bit(map
)) {
20035 if (new_max_offset
<= (vm_map_offset_t
)VM_MAX_ADDRESS
) {
20036 map
->max_offset
= new_max_offset
;
20037 ret
= KERN_SUCCESS
;
20040 if (new_max_offset
<= (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
) {
20041 map
->max_offset
= new_max_offset
;
20042 ret
= KERN_SUCCESS
;
20047 vm_map_unlock(map
);
20053 * Raise a VM map's minimum offset.
20054 * To strictly enforce "page zero" reservation.
20057 vm_map_raise_min_offset(
20059 vm_map_offset_t new_min_offset
)
20061 vm_map_entry_t first_entry
;
20063 new_min_offset
= vm_map_round_page(new_min_offset
,
20064 VM_MAP_PAGE_MASK(map
));
20068 if (new_min_offset
< map
->min_offset
) {
20070 * Can't move min_offset backwards, as that would expose
20071 * a part of the address space that was previously, and for
20072 * possibly good reasons, inaccessible.
20074 vm_map_unlock(map
);
20075 return KERN_INVALID_ADDRESS
;
20077 if (new_min_offset
>= map
->max_offset
) {
20078 /* can't go beyond the end of the address space */
20079 vm_map_unlock(map
);
20080 return KERN_INVALID_ADDRESS
;
20083 first_entry
= vm_map_first_entry(map
);
20084 if (first_entry
!= vm_map_to_entry(map
) &&
20085 first_entry
->vme_start
< new_min_offset
) {
20087 * Some memory was already allocated below the new
20088 * minimun offset. It's too late to change it now...
20090 vm_map_unlock(map
);
20091 return KERN_NO_SPACE
;
20094 map
->min_offset
= new_min_offset
;
20096 assert(map
->holes_list
);
20097 map
->holes_list
->start
= new_min_offset
;
20098 assert(new_min_offset
< map
->holes_list
->end
);
20100 vm_map_unlock(map
);
20102 return KERN_SUCCESS
;
20106 * Set the limit on the maximum amount of user wired memory allowed for this map.
20107 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
20108 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
20109 * don't have to reach over to the BSD data structures.
20113 vm_map_set_user_wire_limit(vm_map_t map
,
20116 map
->user_wire_limit
= limit
;
20121 vm_map_switch_protect(vm_map_t map
,
20125 map
->switch_protect
= val
;
20126 vm_map_unlock(map
);
20129 extern int cs_process_enforcement_enable
;
20131 vm_map_cs_enforcement(
20134 if (cs_process_enforcement_enable
) {
20137 return map
->cs_enforcement
;
20141 vm_map_cs_wx_enable(
20144 return pmap_cs_allow_invalid(vm_map_pmap(map
));
20148 vm_map_cs_enforcement_set(
20153 map
->cs_enforcement
= val
;
20154 pmap_set_vm_map_cs_enforced(map
->pmap
, val
);
20155 vm_map_unlock(map
);
20159 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
20160 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
20161 * bump both counters.
20164 vm_map_iokit_mapped_region(vm_map_t map
, vm_size_t bytes
)
20166 pmap_t pmap
= vm_map_pmap(map
);
20168 ledger_credit(pmap
->ledger
, task_ledgers
.iokit_mapped
, bytes
);
20169 ledger_credit(pmap
->ledger
, task_ledgers
.phys_footprint
, bytes
);
20173 vm_map_iokit_unmapped_region(vm_map_t map
, vm_size_t bytes
)
20175 pmap_t pmap
= vm_map_pmap(map
);
20177 ledger_debit(pmap
->ledger
, task_ledgers
.iokit_mapped
, bytes
);
20178 ledger_debit(pmap
->ledger
, task_ledgers
.phys_footprint
, bytes
);
20181 /* Add (generate) code signature for memory range */
20182 #if CONFIG_DYNAMIC_CODE_SIGNING
20184 vm_map_sign(vm_map_t map
,
20185 vm_map_offset_t start
,
20186 vm_map_offset_t end
)
20188 vm_map_entry_t entry
;
20190 vm_object_t object
;
20193 * Vet all the input parameters and current type and state of the
20194 * underlaying object. Return with an error if anything is amiss.
20196 if (map
== VM_MAP_NULL
) {
20197 return KERN_INVALID_ARGUMENT
;
20200 vm_map_lock_read(map
);
20202 if (!vm_map_lookup_entry(map
, start
, &entry
) || entry
->is_sub_map
) {
20204 * Must pass a valid non-submap address.
20206 vm_map_unlock_read(map
);
20207 return KERN_INVALID_ADDRESS
;
20210 if ((entry
->vme_start
> start
) || (entry
->vme_end
< end
)) {
20212 * Map entry doesn't cover the requested range. Not handling
20213 * this situation currently.
20215 vm_map_unlock_read(map
);
20216 return KERN_INVALID_ARGUMENT
;
20219 object
= VME_OBJECT(entry
);
20220 if (object
== VM_OBJECT_NULL
) {
20222 * Object must already be present or we can't sign.
20224 vm_map_unlock_read(map
);
20225 return KERN_INVALID_ARGUMENT
;
20228 vm_object_lock(object
);
20229 vm_map_unlock_read(map
);
20231 while (start
< end
) {
20234 m
= vm_page_lookup(object
,
20235 start
- entry
->vme_start
+ VME_OFFSET(entry
));
20236 if (m
== VM_PAGE_NULL
) {
20237 /* shoud we try to fault a page here? we can probably
20238 * demand it exists and is locked for this request */
20239 vm_object_unlock(object
);
20240 return KERN_FAILURE
;
20242 /* deal with special page status */
20244 (m
->vmp_unusual
&& (m
->vmp_error
|| m
->vmp_restart
|| m
->vmp_private
|| m
->vmp_absent
))) {
20245 vm_object_unlock(object
);
20246 return KERN_FAILURE
;
20249 /* Page is OK... now "validate" it */
20250 /* This is the place where we'll call out to create a code
20251 * directory, later */
20252 /* XXX TODO4K: deal with 4k subpages individually? */
20253 m
->vmp_cs_validated
= VMP_CS_ALL_TRUE
;
20255 /* The page is now "clean" for codesigning purposes. That means
20256 * we don't consider it as modified (wpmapped) anymore. But
20257 * we'll disconnect the page so we note any future modification
20259 m
->vmp_wpmapped
= FALSE
;
20260 refmod
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
));
20262 /* Pull the dirty status from the pmap, since we cleared the
20264 if ((refmod
& VM_MEM_MODIFIED
) && !m
->vmp_dirty
) {
20265 SET_PAGE_DIRTY(m
, FALSE
);
20268 /* On to the next page */
20269 start
+= PAGE_SIZE
;
20271 vm_object_unlock(object
);
20273 return KERN_SUCCESS
;
20278 vm_map_partial_reap(vm_map_t map
, unsigned int *reclaimed_resident
, unsigned int *reclaimed_compressed
)
20280 vm_map_entry_t entry
= VM_MAP_ENTRY_NULL
;
20281 vm_map_entry_t next_entry
;
20282 kern_return_t kr
= KERN_SUCCESS
;
20288 * We use a "zap_map" to avoid having to unlock
20289 * the "map" in vm_map_delete().
20291 zap_map
= vm_map_create(PMAP_NULL
,
20294 map
->hdr
.entries_pageable
);
20296 if (zap_map
== VM_MAP_NULL
) {
20297 return KERN_RESOURCE_SHORTAGE
;
20300 vm_map_set_page_shift(zap_map
,
20301 VM_MAP_PAGE_SHIFT(map
));
20302 vm_map_disable_hole_optimization(zap_map
);
20304 for (entry
= vm_map_first_entry(map
);
20305 entry
!= vm_map_to_entry(map
);
20306 entry
= next_entry
) {
20307 next_entry
= entry
->vme_next
;
20309 if (VME_OBJECT(entry
) &&
20310 !entry
->is_sub_map
&&
20311 (VME_OBJECT(entry
)->internal
== TRUE
) &&
20312 (VME_OBJECT(entry
)->ref_count
== 1)) {
20313 *reclaimed_resident
+= VME_OBJECT(entry
)->resident_page_count
;
20314 *reclaimed_compressed
+= vm_compressor_pager_get_count(VME_OBJECT(entry
)->pager
);
20316 (void)vm_map_delete(map
,
20319 VM_MAP_REMOVE_SAVE_ENTRIES
,
20324 vm_map_unlock(map
);
20327 * Get rid of the "zap_maps" and all the map entries that
20328 * they may still contain.
20330 if (zap_map
!= VM_MAP_NULL
) {
20331 vm_map_destroy(zap_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
20332 zap_map
= VM_MAP_NULL
;
20339 #if DEVELOPMENT || DEBUG
20342 vm_map_disconnect_page_mappings(
20344 boolean_t do_unnest
)
20346 vm_map_entry_t entry
;
20347 int page_count
= 0;
20349 if (do_unnest
== TRUE
) {
20350 #ifndef NO_NESTED_PMAP
20353 for (entry
= vm_map_first_entry(map
);
20354 entry
!= vm_map_to_entry(map
);
20355 entry
= entry
->vme_next
) {
20356 if (entry
->is_sub_map
&& entry
->use_pmap
) {
20358 * Make sure the range between the start of this entry and
20359 * the end of this entry is no longer nested, so that
20360 * we will only remove mappings from the pmap in use by this
20363 vm_map_clip_unnest(map
, entry
, entry
->vme_start
, entry
->vme_end
);
20366 vm_map_unlock(map
);
20369 vm_map_lock_read(map
);
20371 page_count
= map
->pmap
->stats
.resident_count
;
20373 for (entry
= vm_map_first_entry(map
);
20374 entry
!= vm_map_to_entry(map
);
20375 entry
= entry
->vme_next
) {
20376 if (!entry
->is_sub_map
&& ((VME_OBJECT(entry
) == 0) ||
20377 (VME_OBJECT(entry
)->phys_contiguous
))) {
20380 if (entry
->is_sub_map
) {
20381 assert(!entry
->use_pmap
);
20384 pmap_remove_options(map
->pmap
, entry
->vme_start
, entry
->vme_end
, 0);
20386 vm_map_unlock_read(map
);
20392 vm_map_inject_error(vm_map_t map
, vm_map_offset_t vaddr
)
20394 vm_object_t object
= NULL
;
20395 vm_object_offset_t offset
;
20398 vm_map_version_t version
;
20400 int result
= KERN_FAILURE
;
20402 vaddr
= vm_map_trunc_page(vaddr
, PAGE_MASK
);
20405 result
= vm_map_lookup_locked(&map
, vaddr
, VM_PROT_READ
,
20406 OBJECT_LOCK_EXCLUSIVE
, &version
, &object
, &offset
, &prot
, &wired
,
20407 NULL
, &real_map
, NULL
);
20408 if (object
== NULL
) {
20409 result
= KERN_MEMORY_ERROR
;
20410 } else if (object
->pager
) {
20411 result
= vm_compressor_pager_inject_error(object
->pager
,
20414 result
= KERN_MEMORY_PRESENT
;
20417 if (object
!= NULL
) {
20418 vm_object_unlock(object
);
20421 if (real_map
!= map
) {
20422 vm_map_unlock(real_map
);
20424 vm_map_unlock(map
);
20435 extern struct freezer_context freezer_context_global
;
20436 AbsoluteTime c_freezer_last_yield_ts
= 0;
20438 extern unsigned int memorystatus_freeze_private_shared_pages_ratio
;
20439 extern unsigned int memorystatus_freeze_shared_mb_per_process_max
;
20444 unsigned int *purgeable_count
,
20445 unsigned int *wired_count
,
20446 unsigned int *clean_count
,
20447 unsigned int *dirty_count
,
20448 unsigned int dirty_budget
,
20449 unsigned int *shared_count
,
20450 int *freezer_error_code
,
20451 boolean_t eval_only
)
20453 vm_map_entry_t entry2
= VM_MAP_ENTRY_NULL
;
20454 kern_return_t kr
= KERN_SUCCESS
;
20455 boolean_t evaluation_phase
= TRUE
;
20456 vm_object_t cur_shared_object
= NULL
;
20457 int cur_shared_obj_ref_cnt
= 0;
20458 unsigned int dirty_private_count
= 0, dirty_shared_count
= 0, obj_pages_snapshot
= 0;
20460 *purgeable_count
= *wired_count
= *clean_count
= *dirty_count
= *shared_count
= 0;
20463 * We need the exclusive lock here so that we can
20464 * block any page faults or lookups while we are
20465 * in the middle of freezing this vm map.
20467 vm_map_t map
= task
->map
;
20471 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT
);
20473 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20474 if (vm_compressor_low_on_space()) {
20475 *freezer_error_code
= FREEZER_ERROR_NO_COMPRESSOR_SPACE
;
20478 if (vm_swap_low_on_space()) {
20479 *freezer_error_code
= FREEZER_ERROR_NO_SWAP_SPACE
;
20482 kr
= KERN_NO_SPACE
;
20486 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
== FALSE
) {
20488 * In-memory compressor backing the freezer. No disk.
20489 * So no need to do the evaluation phase.
20491 evaluation_phase
= FALSE
;
20493 if (eval_only
== TRUE
) {
20495 * We don't support 'eval_only' mode
20496 * in this non-swap config.
20498 *freezer_error_code
= FREEZER_ERROR_GENERIC
;
20499 kr
= KERN_INVALID_ARGUMENT
;
20503 freezer_context_global
.freezer_ctx_uncompressed_pages
= 0;
20504 clock_get_uptime(&c_freezer_last_yield_ts
);
20508 for (entry2
= vm_map_first_entry(map
);
20509 entry2
!= vm_map_to_entry(map
);
20510 entry2
= entry2
->vme_next
) {
20511 vm_object_t src_object
= VME_OBJECT(entry2
);
20514 !entry2
->is_sub_map
&&
20515 !src_object
->phys_contiguous
) {
20516 /* If eligible, scan the entry, moving eligible pages over to our parent object */
20518 if (src_object
->internal
== TRUE
) {
20519 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
) {
20521 * We skip purgeable objects during evaluation phase only.
20522 * If we decide to freeze this process, we'll explicitly
20523 * purge these objects before we go around again with
20524 * 'evaluation_phase' set to FALSE.
20527 if ((src_object
->purgable
== VM_PURGABLE_EMPTY
) || (src_object
->purgable
== VM_PURGABLE_VOLATILE
)) {
20529 * We want to purge objects that may not belong to this task but are mapped
20530 * in this task alone. Since we already purged this task's purgeable memory
20531 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
20532 * on this task's purgeable objects. Hence the check for only volatile objects.
20534 if (evaluation_phase
== FALSE
&&
20535 (src_object
->purgable
== VM_PURGABLE_VOLATILE
) &&
20536 (src_object
->ref_count
== 1)) {
20537 vm_object_lock(src_object
);
20538 vm_object_purge(src_object
, 0);
20539 vm_object_unlock(src_object
);
20545 * Pages belonging to this object could be swapped to disk.
20546 * Make sure it's not a shared object because we could end
20547 * up just bringing it back in again.
20549 * We try to optimize somewhat by checking for objects that are mapped
20550 * more than once within our own map. But we don't do full searches,
20551 * we just look at the entries following our current entry.
20554 if (src_object
->ref_count
> 1) {
20555 if (src_object
!= cur_shared_object
) {
20556 obj_pages_snapshot
= (src_object
->resident_page_count
- src_object
->wired_page_count
) + vm_compressor_pager_get_count(src_object
->pager
);
20557 dirty_shared_count
+= obj_pages_snapshot
;
20559 cur_shared_object
= src_object
;
20560 cur_shared_obj_ref_cnt
= 1;
20563 cur_shared_obj_ref_cnt
++;
20564 if (src_object
->ref_count
== cur_shared_obj_ref_cnt
) {
20566 * Fall through to below and treat this object as private.
20567 * So deduct its pages from our shared total and add it to the
20571 dirty_shared_count
-= obj_pages_snapshot
;
20572 dirty_private_count
+= obj_pages_snapshot
;
20580 if (src_object
->ref_count
== 1) {
20581 dirty_private_count
+= (src_object
->resident_page_count
- src_object
->wired_page_count
) + vm_compressor_pager_get_count(src_object
->pager
);
20584 if (evaluation_phase
== TRUE
) {
20589 uint32_t paged_out_count
= vm_object_compressed_freezer_pageout(src_object
, dirty_budget
);
20590 *wired_count
+= src_object
->wired_page_count
;
20592 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20593 if (vm_compressor_low_on_space()) {
20594 *freezer_error_code
= FREEZER_ERROR_NO_COMPRESSOR_SPACE
;
20597 if (vm_swap_low_on_space()) {
20598 *freezer_error_code
= FREEZER_ERROR_NO_SWAP_SPACE
;
20601 kr
= KERN_NO_SPACE
;
20604 if (paged_out_count
>= dirty_budget
) {
20607 dirty_budget
-= paged_out_count
;
20612 *shared_count
= (unsigned int) ((dirty_shared_count
* PAGE_SIZE_64
) / (1024 * 1024ULL));
20613 if (evaluation_phase
) {
20614 unsigned int shared_pages_threshold
= (memorystatus_freeze_shared_mb_per_process_max
* 1024 * 1024ULL) / PAGE_SIZE_64
;
20616 if (dirty_shared_count
> shared_pages_threshold
) {
20617 *freezer_error_code
= FREEZER_ERROR_EXCESS_SHARED_MEMORY
;
20622 if (dirty_shared_count
&&
20623 ((dirty_private_count
/ dirty_shared_count
) < memorystatus_freeze_private_shared_pages_ratio
)) {
20624 *freezer_error_code
= FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO
;
20629 evaluation_phase
= FALSE
;
20630 dirty_shared_count
= dirty_private_count
= 0;
20632 freezer_context_global
.freezer_ctx_uncompressed_pages
= 0;
20633 clock_get_uptime(&c_freezer_last_yield_ts
);
20640 vm_purgeable_purge_task_owned(task
);
20648 vm_map_unlock(map
);
20650 if ((eval_only
== FALSE
) && (kr
== KERN_SUCCESS
)) {
20651 vm_object_compressed_freezer_done();
20659 * vm_map_entry_should_cow_for_true_share:
20661 * Determines if the map entry should be clipped and setup for copy-on-write
20662 * to avoid applying "true_share" to a large VM object when only a subset is
20665 * For now, we target only the map entries created for the Objective C
20666 * Garbage Collector, which initially have the following properties:
20667 * - alias == VM_MEMORY_MALLOC
20668 * - wired_count == 0
20670 * and a VM object with:
20672 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
20674 * - vo_size == ANON_CHUNK_SIZE
20676 * Only non-kernel map entries.
20679 vm_map_entry_should_cow_for_true_share(
20680 vm_map_entry_t entry
)
20682 vm_object_t object
;
20684 if (entry
->is_sub_map
) {
20685 /* entry does not point at a VM object */
20689 if (entry
->needs_copy
) {
20690 /* already set for copy_on_write: done! */
20694 if (VME_ALIAS(entry
) != VM_MEMORY_MALLOC
&&
20695 VME_ALIAS(entry
) != VM_MEMORY_MALLOC_SMALL
) {
20696 /* not a malloc heap or Obj-C Garbage Collector heap */
20700 if (entry
->wired_count
) {
20701 /* wired: can't change the map entry... */
20702 vm_counters
.should_cow_but_wired
++;
20706 object
= VME_OBJECT(entry
);
20708 if (object
== VM_OBJECT_NULL
) {
20709 /* no object yet... */
20713 if (!object
->internal
) {
20714 /* not an internal object */
20718 if (object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) {
20719 /* not the default copy strategy */
20723 if (object
->true_share
) {
20724 /* already true_share: too late to avoid it */
20728 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC
&&
20729 object
->vo_size
!= ANON_CHUNK_SIZE
) {
20730 /* ... not an object created for the ObjC Garbage Collector */
20734 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC_SMALL
&&
20735 object
->vo_size
!= 2048 * 4096) {
20736 /* ... not a "MALLOC_SMALL" heap */
20741 * All the criteria match: we have a large object being targeted for "true_share".
20742 * To limit the adverse side-effects linked with "true_share", tell the caller to
20743 * try and avoid setting up the entire object for "true_share" by clipping the
20744 * targeted range and setting it up for copy-on-write.
20750 vm_map_round_page_mask(
20751 vm_map_offset_t offset
,
20752 vm_map_offset_t mask
)
20754 return VM_MAP_ROUND_PAGE(offset
, mask
);
20758 vm_map_trunc_page_mask(
20759 vm_map_offset_t offset
,
20760 vm_map_offset_t mask
)
20762 return VM_MAP_TRUNC_PAGE(offset
, mask
);
20766 vm_map_page_aligned(
20767 vm_map_offset_t offset
,
20768 vm_map_offset_t mask
)
20770 return ((offset
) & mask
) == 0;
20777 return VM_MAP_PAGE_SHIFT(map
);
20784 return VM_MAP_PAGE_SIZE(map
);
20791 return VM_MAP_PAGE_MASK(map
);
20795 vm_map_set_page_shift(
20799 if (map
->hdr
.nentries
!= 0) {
20800 /* too late to change page size */
20801 return KERN_FAILURE
;
20804 map
->hdr
.page_shift
= pageshift
;
20806 return KERN_SUCCESS
;
20810 vm_map_query_volatile(
20812 mach_vm_size_t
*volatile_virtual_size_p
,
20813 mach_vm_size_t
*volatile_resident_size_p
,
20814 mach_vm_size_t
*volatile_compressed_size_p
,
20815 mach_vm_size_t
*volatile_pmap_size_p
,
20816 mach_vm_size_t
*volatile_compressed_pmap_size_p
)
20818 mach_vm_size_t volatile_virtual_size
;
20819 mach_vm_size_t volatile_resident_count
;
20820 mach_vm_size_t volatile_compressed_count
;
20821 mach_vm_size_t volatile_pmap_count
;
20822 mach_vm_size_t volatile_compressed_pmap_count
;
20823 mach_vm_size_t resident_count
;
20824 vm_map_entry_t entry
;
20825 vm_object_t object
;
20827 /* map should be locked by caller */
20829 volatile_virtual_size
= 0;
20830 volatile_resident_count
= 0;
20831 volatile_compressed_count
= 0;
20832 volatile_pmap_count
= 0;
20833 volatile_compressed_pmap_count
= 0;
20835 for (entry
= vm_map_first_entry(map
);
20836 entry
!= vm_map_to_entry(map
);
20837 entry
= entry
->vme_next
) {
20838 mach_vm_size_t pmap_resident_bytes
, pmap_compressed_bytes
;
20840 if (entry
->is_sub_map
) {
20843 if (!(entry
->protection
& VM_PROT_WRITE
)) {
20846 object
= VME_OBJECT(entry
);
20847 if (object
== VM_OBJECT_NULL
) {
20850 if (object
->purgable
!= VM_PURGABLE_VOLATILE
&&
20851 object
->purgable
!= VM_PURGABLE_EMPTY
) {
20854 if (VME_OFFSET(entry
)) {
20856 * If the map entry has been split and the object now
20857 * appears several times in the VM map, we don't want
20858 * to count the object's resident_page_count more than
20859 * once. We count it only for the first one, starting
20860 * at offset 0 and ignore the other VM map entries.
20864 resident_count
= object
->resident_page_count
;
20865 if ((VME_OFFSET(entry
) / PAGE_SIZE
) >= resident_count
) {
20866 resident_count
= 0;
20868 resident_count
-= (VME_OFFSET(entry
) / PAGE_SIZE
);
20871 volatile_virtual_size
+= entry
->vme_end
- entry
->vme_start
;
20872 volatile_resident_count
+= resident_count
;
20873 if (object
->pager
) {
20874 volatile_compressed_count
+=
20875 vm_compressor_pager_get_count(object
->pager
);
20877 pmap_compressed_bytes
= 0;
20878 pmap_resident_bytes
=
20879 pmap_query_resident(map
->pmap
,
20882 &pmap_compressed_bytes
);
20883 volatile_pmap_count
+= (pmap_resident_bytes
/ PAGE_SIZE
);
20884 volatile_compressed_pmap_count
+= (pmap_compressed_bytes
20888 /* map is still locked on return */
20890 *volatile_virtual_size_p
= volatile_virtual_size
;
20891 *volatile_resident_size_p
= volatile_resident_count
* PAGE_SIZE
;
20892 *volatile_compressed_size_p
= volatile_compressed_count
* PAGE_SIZE
;
20893 *volatile_pmap_size_p
= volatile_pmap_count
* PAGE_SIZE
;
20894 *volatile_compressed_pmap_size_p
= volatile_compressed_pmap_count
* PAGE_SIZE
;
20896 return KERN_SUCCESS
;
20900 vm_map_sizes(vm_map_t map
,
20901 vm_map_size_t
* psize
,
20902 vm_map_size_t
* pfree
,
20903 vm_map_size_t
* plargest_free
)
20905 vm_map_entry_t entry
;
20906 vm_map_offset_t prev
;
20907 vm_map_size_t free
, total_free
, largest_free
;
20911 *psize
= *pfree
= *plargest_free
= 0;
20914 total_free
= largest_free
= 0;
20916 vm_map_lock_read(map
);
20918 *psize
= map
->max_offset
- map
->min_offset
;
20921 prev
= map
->min_offset
;
20922 for (entry
= vm_map_first_entry(map
);; entry
= entry
->vme_next
) {
20923 end
= (entry
== vm_map_to_entry(map
));
20926 free
= entry
->vme_end
- prev
;
20928 free
= entry
->vme_start
- prev
;
20931 total_free
+= free
;
20932 if (free
> largest_free
) {
20933 largest_free
= free
;
20939 prev
= entry
->vme_end
;
20941 vm_map_unlock_read(map
);
20943 *pfree
= total_free
;
20945 if (plargest_free
) {
20946 *plargest_free
= largest_free
;
20950 #if VM_SCAN_FOR_SHADOW_CHAIN
20951 int vm_map_shadow_max(vm_map_t map
);
20956 int shadows
, shadows_max
;
20957 vm_map_entry_t entry
;
20958 vm_object_t object
, next_object
;
20966 vm_map_lock_read(map
);
20968 for (entry
= vm_map_first_entry(map
);
20969 entry
!= vm_map_to_entry(map
);
20970 entry
= entry
->vme_next
) {
20971 if (entry
->is_sub_map
) {
20974 object
= VME_OBJECT(entry
);
20975 if (object
== NULL
) {
20978 vm_object_lock_shared(object
);
20980 object
->shadow
!= NULL
;
20981 shadows
++, object
= next_object
) {
20982 next_object
= object
->shadow
;
20983 vm_object_lock_shared(next_object
);
20984 vm_object_unlock(object
);
20986 vm_object_unlock(object
);
20987 if (shadows
> shadows_max
) {
20988 shadows_max
= shadows
;
20992 vm_map_unlock_read(map
);
20994 return shadows_max
;
20996 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
20999 vm_commit_pagezero_status(vm_map_t lmap
)
21001 pmap_advise_pagezero_range(lmap
->pmap
, lmap
->min_offset
);
21004 #if XNU_TARGET_OS_OSX
21006 vm_map_set_high_start(
21008 vm_map_offset_t high_start
)
21010 map
->vmmap_high_start
= high_start
;
21012 #endif /* XNU_TARGET_OS_OSX */
21016 * FORKED CORPSE FOOTPRINT
21018 * A forked corpse gets a copy of the original VM map but its pmap is mostly
21019 * empty since it never ran and never got to fault in any pages.
21020 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
21021 * a forked corpse would therefore return very little information.
21023 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
21024 * to vm_map_fork() to collect footprint information from the original VM map
21025 * and its pmap, and store it in the forked corpse's VM map. That information
21026 * is stored in place of the VM map's "hole list" since we'll never need to
21027 * lookup for holes in the corpse's map.
21029 * The corpse's footprint info looks like this:
21031 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
21033 * +---------------------------------------+
21034 * header-> | cf_size |
21035 * +-------------------+-------------------+
21036 * | cf_last_region | cf_last_zeroes |
21037 * +-------------------+-------------------+
21038 * region1-> | cfr_vaddr |
21039 * +-------------------+-------------------+
21040 * | cfr_num_pages | d0 | d1 | d2 | d3 |
21041 * +---------------------------------------+
21042 * | d4 | d5 | ... |
21043 * +---------------------------------------+
21045 * +-------------------+-------------------+
21046 * | dy | dz | na | na | cfr_vaddr... | <-region2
21047 * +-------------------+-------------------+
21048 * | cfr_vaddr (ctd) | cfr_num_pages |
21049 * +---------------------------------------+
21051 * +---------------------------------------+
21053 * +---------------------------------------+
21054 * last region-> | cfr_vaddr |
21055 * +---------------------------------------+
21056 * + cfr_num_pages | d0 | d1 | d2 | d3 |
21057 * +---------------------------------------+
21059 * +---------------------------------------+
21060 * | dx | dy | dz | na | na | na | na | na |
21061 * +---------------------------------------+
21064 * cf_size: total size of the buffer (rounded to page size)
21065 * cf_last_region: offset in the buffer of the last "region" sub-header
21066 * cf_last_zeroes: number of trailing "zero" dispositions at the end
21068 * cfr_vaddr: virtual address of the start of the covered "region"
21069 * cfr_num_pages: number of pages in the covered "region"
21070 * d*: disposition of the page at that virtual address
21071 * Regions in the buffer are word-aligned.
21073 * We estimate the size of the buffer based on the number of memory regions
21074 * and the virtual size of the address space. While copying each memory region
21075 * during vm_map_fork(), we also collect the footprint info for that region
21076 * and store it in the buffer, packing it as much as possible (coalescing
21077 * contiguous memory regions to avoid having too many region headers and
21078 * avoiding long streaks of "zero" page dispositions by splitting footprint
21079 * "regions", so the number of regions in the footprint buffer might not match
21080 * the number of memory regions in the address space.
21082 * We also have to copy the original task's "nonvolatile" ledgers since that's
21083 * part of the footprint and will need to be reported to any tool asking for
21084 * the footprint information of the forked corpse.
21087 uint64_t vm_map_corpse_footprint_count
= 0;
21088 uint64_t vm_map_corpse_footprint_size_avg
= 0;
21089 uint64_t vm_map_corpse_footprint_size_max
= 0;
21090 uint64_t vm_map_corpse_footprint_full
= 0;
21091 uint64_t vm_map_corpse_footprint_no_buf
= 0;
21093 struct vm_map_corpse_footprint_header
{
21094 vm_size_t cf_size
; /* allocated buffer size */
21095 uint32_t cf_last_region
; /* offset of last region in buffer */
21097 uint32_t cfu_last_zeroes
; /* during creation:
21098 * number of "zero" dispositions at
21099 * end of last region */
21100 uint32_t cfu_hint_region
; /* during lookup:
21101 * offset of last looked up region */
21102 #define cf_last_zeroes cfu.cfu_last_zeroes
21103 #define cf_hint_region cfu.cfu_hint_region
21106 typedef uint8_t cf_disp_t
;
21107 struct vm_map_corpse_footprint_region
{
21108 vm_map_offset_t cfr_vaddr
; /* region start virtual address */
21109 uint32_t cfr_num_pages
; /* number of pages in this "region" */
21110 cf_disp_t cfr_disposition
[0]; /* disposition of each page */
21111 } __attribute__((packed
));
21114 vm_page_disposition_to_cf_disp(
21117 assert(sizeof(cf_disp_t
) == 1);
21118 /* relocate bits that don't fit in a "uint8_t" */
21119 if (disposition
& VM_PAGE_QUERY_PAGE_REUSABLE
) {
21120 disposition
|= VM_PAGE_QUERY_PAGE_FICTITIOUS
;
21122 /* cast gets rid of extra bits */
21123 return (cf_disp_t
) disposition
;
21127 vm_page_cf_disp_to_disposition(
21132 assert(sizeof(cf_disp_t
) == 1);
21133 disposition
= (int) cf_disp
;
21134 /* move relocated bits back in place */
21135 if (cf_disp
& VM_PAGE_QUERY_PAGE_FICTITIOUS
) {
21136 disposition
|= VM_PAGE_QUERY_PAGE_REUSABLE
;
21137 disposition
&= ~VM_PAGE_QUERY_PAGE_FICTITIOUS
;
21139 return disposition
;
21143 * vm_map_corpse_footprint_new_region:
21144 * closes the current footprint "region" and creates a new one
21146 * Returns NULL if there's not enough space in the buffer for a new region.
21148 static struct vm_map_corpse_footprint_region
*
21149 vm_map_corpse_footprint_new_region(
21150 struct vm_map_corpse_footprint_header
*footprint_header
)
21152 uintptr_t footprint_edge
;
21153 uint32_t new_region_offset
;
21154 struct vm_map_corpse_footprint_region
*footprint_region
;
21155 struct vm_map_corpse_footprint_region
*new_footprint_region
;
21157 footprint_edge
= ((uintptr_t)footprint_header
+
21158 footprint_header
->cf_size
);
21159 footprint_region
= ((struct vm_map_corpse_footprint_region
*)
21160 ((char *)footprint_header
+
21161 footprint_header
->cf_last_region
));
21162 assert((uintptr_t)footprint_region
+ sizeof(*footprint_region
) <=
21165 /* get rid of trailing zeroes in the last region */
21166 assert(footprint_region
->cfr_num_pages
>=
21167 footprint_header
->cf_last_zeroes
);
21168 footprint_region
->cfr_num_pages
-=
21169 footprint_header
->cf_last_zeroes
;
21170 footprint_header
->cf_last_zeroes
= 0;
21172 /* reuse this region if it's now empty */
21173 if (footprint_region
->cfr_num_pages
== 0) {
21174 return footprint_region
;
21177 /* compute offset of new region */
21178 new_region_offset
= footprint_header
->cf_last_region
;
21179 new_region_offset
+= sizeof(*footprint_region
);
21180 new_region_offset
+= (footprint_region
->cfr_num_pages
* sizeof(cf_disp_t
));
21181 new_region_offset
= roundup(new_region_offset
, sizeof(int));
21183 /* check if we're going over the edge */
21184 if (((uintptr_t)footprint_header
+
21185 new_region_offset
+
21186 sizeof(*footprint_region
)) >=
21188 /* over the edge: no new region */
21192 /* adjust offset of last region in header */
21193 footprint_header
->cf_last_region
= new_region_offset
;
21195 new_footprint_region
= (struct vm_map_corpse_footprint_region
*)
21196 ((char *)footprint_header
+
21197 footprint_header
->cf_last_region
);
21198 new_footprint_region
->cfr_vaddr
= 0;
21199 new_footprint_region
->cfr_num_pages
= 0;
21200 /* caller needs to initialize new region */
21202 return new_footprint_region
;
21206 * vm_map_corpse_footprint_collect:
21207 * collect footprint information for "old_entry" in "old_map" and
21208 * stores it in "new_map"'s vmmap_footprint_info.
21211 vm_map_corpse_footprint_collect(
21213 vm_map_entry_t old_entry
,
21216 vm_map_offset_t va
;
21218 struct vm_map_corpse_footprint_header
*footprint_header
;
21219 struct vm_map_corpse_footprint_region
*footprint_region
;
21220 struct vm_map_corpse_footprint_region
*new_footprint_region
;
21221 cf_disp_t
*next_disp_p
;
21222 uintptr_t footprint_edge
;
21223 uint32_t num_pages_tmp
;
21224 int effective_page_size
;
21226 effective_page_size
= MIN(PAGE_SIZE
, VM_MAP_PAGE_SIZE(old_map
));
21228 va
= old_entry
->vme_start
;
21230 vm_map_lock_assert_exclusive(old_map
);
21231 vm_map_lock_assert_exclusive(new_map
);
21233 assert(new_map
->has_corpse_footprint
);
21234 assert(!old_map
->has_corpse_footprint
);
21235 if (!new_map
->has_corpse_footprint
||
21236 old_map
->has_corpse_footprint
) {
21238 * This can only transfer footprint info from a
21239 * map with a live pmap to a map with a corpse footprint.
21241 return KERN_NOT_SUPPORTED
;
21244 if (new_map
->vmmap_corpse_footprint
== NULL
) {
21246 vm_size_t buf_size
;
21249 buf_size
= (sizeof(*footprint_header
) +
21250 (old_map
->hdr
.nentries
21252 (sizeof(*footprint_region
) +
21253 +3)) /* potential alignment for each region */
21255 ((old_map
->size
/ effective_page_size
)
21257 sizeof(cf_disp_t
))); /* disposition for each page */
21258 // printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
21259 buf_size
= round_page(buf_size
);
21261 /* limit buffer to 1 page to validate overflow detection */
21262 // buf_size = PAGE_SIZE;
21264 /* limit size to a somewhat sane amount */
21265 #if XNU_TARGET_OS_OSX
21266 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
21267 #else /* XNU_TARGET_OS_OSX */
21268 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
21269 #endif /* XNU_TARGET_OS_OSX */
21270 if (buf_size
> VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE
) {
21271 buf_size
= VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE
;
21275 * Allocate the pageable buffer (with a trailing guard page).
21276 * It will be zero-filled on demand.
21278 kr
= kernel_memory_allocate(kernel_map
,
21281 + PAGE_SIZE
), /* trailing guard page */
21283 KMA_PAGEABLE
| KMA_GUARD_LAST
,
21284 VM_KERN_MEMORY_DIAG
);
21285 if (kr
!= KERN_SUCCESS
) {
21286 vm_map_corpse_footprint_no_buf
++;
21290 /* initialize header and 1st region */
21291 footprint_header
= (struct vm_map_corpse_footprint_header
*)buf
;
21292 new_map
->vmmap_corpse_footprint
= footprint_header
;
21294 footprint_header
->cf_size
= buf_size
;
21295 footprint_header
->cf_last_region
=
21296 sizeof(*footprint_header
);
21297 footprint_header
->cf_last_zeroes
= 0;
21299 footprint_region
= (struct vm_map_corpse_footprint_region
*)
21300 ((char *)footprint_header
+
21301 footprint_header
->cf_last_region
);
21302 footprint_region
->cfr_vaddr
= 0;
21303 footprint_region
->cfr_num_pages
= 0;
21305 /* retrieve header and last region */
21306 footprint_header
= (struct vm_map_corpse_footprint_header
*)
21307 new_map
->vmmap_corpse_footprint
;
21308 footprint_region
= (struct vm_map_corpse_footprint_region
*)
21309 ((char *)footprint_header
+
21310 footprint_header
->cf_last_region
);
21312 footprint_edge
= ((uintptr_t)footprint_header
+
21313 footprint_header
->cf_size
);
21315 if ((footprint_region
->cfr_vaddr
+
21316 (((vm_map_offset_t
)footprint_region
->cfr_num_pages
) *
21317 effective_page_size
))
21318 != old_entry
->vme_start
) {
21319 uint64_t num_pages_delta
, num_pages_delta_size
;
21320 uint32_t region_offset_delta_size
;
21323 * Not the next contiguous virtual address:
21324 * start a new region or store "zero" dispositions for
21325 * the missing pages?
21327 /* size of gap in actual page dispositions */
21328 num_pages_delta
= ((old_entry
->vme_start
-
21329 footprint_region
->cfr_vaddr
) / effective_page_size
)
21330 - footprint_region
->cfr_num_pages
;
21331 num_pages_delta_size
= num_pages_delta
* sizeof(cf_disp_t
);
21332 /* size of gap as a new footprint region header */
21333 region_offset_delta_size
=
21334 (sizeof(*footprint_region
) +
21335 roundup(((footprint_region
->cfr_num_pages
-
21336 footprint_header
->cf_last_zeroes
) * sizeof(cf_disp_t
)),
21338 ((footprint_region
->cfr_num_pages
-
21339 footprint_header
->cf_last_zeroes
) * sizeof(cf_disp_t
)));
21340 // printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
21341 if (region_offset_delta_size
< num_pages_delta_size
||
21342 os_add3_overflow(footprint_region
->cfr_num_pages
,
21343 (uint32_t) num_pages_delta
,
21347 * Storing data for this gap would take more space
21348 * than inserting a new footprint region header:
21349 * let's start a new region and save space. If it's a
21350 * tie, let's avoid using a new region, since that
21351 * would require more region hops to find the right
21352 * range during lookups.
21354 * If the current region's cfr_num_pages would overflow
21355 * if we added "zero" page dispositions for the gap,
21356 * no choice but to start a new region.
21358 // printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
21359 new_footprint_region
=
21360 vm_map_corpse_footprint_new_region(footprint_header
);
21361 /* check that we're not going over the edge */
21362 if (new_footprint_region
== NULL
) {
21363 goto over_the_edge
;
21365 footprint_region
= new_footprint_region
;
21366 /* initialize new region as empty */
21367 footprint_region
->cfr_vaddr
= old_entry
->vme_start
;
21368 footprint_region
->cfr_num_pages
= 0;
21371 * Store "zero" page dispositions for the missing
21374 // printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
21375 for (; num_pages_delta
> 0; num_pages_delta
--) {
21376 next_disp_p
= (cf_disp_t
*)
21377 ((uintptr_t) footprint_region
+
21378 sizeof(*footprint_region
));
21379 next_disp_p
+= footprint_region
->cfr_num_pages
;
21380 /* check that we're not going over the edge */
21381 if ((uintptr_t)next_disp_p
>= footprint_edge
) {
21382 goto over_the_edge
;
21384 /* store "zero" disposition for this gap page */
21385 footprint_region
->cfr_num_pages
++;
21386 *next_disp_p
= (cf_disp_t
) 0;
21387 footprint_header
->cf_last_zeroes
++;
21392 for (va
= old_entry
->vme_start
;
21393 va
< old_entry
->vme_end
;
21394 va
+= effective_page_size
) {
21398 vm_map_footprint_query_page_info(old_map
,
21402 cf_disp
= vm_page_disposition_to_cf_disp(disposition
);
21404 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
21406 if (cf_disp
== 0 && footprint_region
->cfr_num_pages
== 0) {
21408 * Ignore "zero" dispositions at start of
21409 * region: just move start of region.
21411 footprint_region
->cfr_vaddr
+= effective_page_size
;
21415 /* would region's cfr_num_pages overflow? */
21416 if (os_add_overflow(footprint_region
->cfr_num_pages
, 1,
21418 /* overflow: create a new region */
21419 new_footprint_region
=
21420 vm_map_corpse_footprint_new_region(
21422 if (new_footprint_region
== NULL
) {
21423 goto over_the_edge
;
21425 footprint_region
= new_footprint_region
;
21426 footprint_region
->cfr_vaddr
= va
;
21427 footprint_region
->cfr_num_pages
= 0;
21430 next_disp_p
= (cf_disp_t
*) ((uintptr_t) footprint_region
+
21431 sizeof(*footprint_region
));
21432 next_disp_p
+= footprint_region
->cfr_num_pages
;
21433 /* check that we're not going over the edge */
21434 if ((uintptr_t)next_disp_p
>= footprint_edge
) {
21435 goto over_the_edge
;
21437 /* store this dispostion */
21438 *next_disp_p
= cf_disp
;
21439 footprint_region
->cfr_num_pages
++;
21441 if (cf_disp
!= 0) {
21442 /* non-zero disp: break the current zero streak */
21443 footprint_header
->cf_last_zeroes
= 0;
21448 /* zero disp: add to the current streak of zeroes */
21449 footprint_header
->cf_last_zeroes
++;
21450 if ((footprint_header
->cf_last_zeroes
+
21451 roundup(((footprint_region
->cfr_num_pages
-
21452 footprint_header
->cf_last_zeroes
) * sizeof(cf_disp_t
)) &
21455 (sizeof(*footprint_header
))) {
21457 * There are not enough trailing "zero" dispositions
21458 * (+ the extra padding we would need for the previous
21459 * region); creating a new region would not save space
21460 * at this point, so let's keep this "zero" disposition
21461 * in this region and reconsider later.
21466 * Create a new region to avoid having too many consecutive
21467 * "zero" dispositions.
21469 new_footprint_region
=
21470 vm_map_corpse_footprint_new_region(footprint_header
);
21471 if (new_footprint_region
== NULL
) {
21472 goto over_the_edge
;
21474 footprint_region
= new_footprint_region
;
21475 /* initialize the new region as empty ... */
21476 footprint_region
->cfr_num_pages
= 0;
21477 /* ... and skip this "zero" disp */
21478 footprint_region
->cfr_vaddr
= va
+ effective_page_size
;
21481 return KERN_SUCCESS
;
21484 // printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
21485 vm_map_corpse_footprint_full
++;
21486 return KERN_RESOURCE_SHORTAGE
;
21490 * vm_map_corpse_footprint_collect_done:
21491 * completes the footprint collection by getting rid of any remaining
21492 * trailing "zero" dispositions and trimming the unused part of the
21496 vm_map_corpse_footprint_collect_done(
21499 struct vm_map_corpse_footprint_header
*footprint_header
;
21500 struct vm_map_corpse_footprint_region
*footprint_region
;
21501 vm_size_t buf_size
, actual_size
;
21504 assert(new_map
->has_corpse_footprint
);
21505 if (!new_map
->has_corpse_footprint
||
21506 new_map
->vmmap_corpse_footprint
== NULL
) {
21510 footprint_header
= (struct vm_map_corpse_footprint_header
*)
21511 new_map
->vmmap_corpse_footprint
;
21512 buf_size
= footprint_header
->cf_size
;
21514 footprint_region
= (struct vm_map_corpse_footprint_region
*)
21515 ((char *)footprint_header
+
21516 footprint_header
->cf_last_region
);
21518 /* get rid of trailing zeroes in last region */
21519 assert(footprint_region
->cfr_num_pages
>= footprint_header
->cf_last_zeroes
);
21520 footprint_region
->cfr_num_pages
-= footprint_header
->cf_last_zeroes
;
21521 footprint_header
->cf_last_zeroes
= 0;
21523 actual_size
= (vm_size_t
)(footprint_header
->cf_last_region
+
21524 sizeof(*footprint_region
) +
21525 (footprint_region
->cfr_num_pages
* sizeof(cf_disp_t
)));
21527 // printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
21528 vm_map_corpse_footprint_size_avg
=
21529 (((vm_map_corpse_footprint_size_avg
*
21530 vm_map_corpse_footprint_count
) +
21532 (vm_map_corpse_footprint_count
+ 1));
21533 vm_map_corpse_footprint_count
++;
21534 if (actual_size
> vm_map_corpse_footprint_size_max
) {
21535 vm_map_corpse_footprint_size_max
= actual_size
;
21538 actual_size
= round_page(actual_size
);
21539 if (buf_size
> actual_size
) {
21540 kr
= vm_deallocate(kernel_map
,
21541 ((vm_address_t
)footprint_header
+
21543 PAGE_SIZE
), /* trailing guard page */
21544 (buf_size
- actual_size
));
21545 assertf(kr
== KERN_SUCCESS
,
21546 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21548 (uint64_t) buf_size
,
21549 (uint64_t) actual_size
,
21551 kr
= vm_protect(kernel_map
,
21552 ((vm_address_t
)footprint_header
+
21555 FALSE
, /* set_maximum */
21557 assertf(kr
== KERN_SUCCESS
,
21558 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21560 (uint64_t) buf_size
,
21561 (uint64_t) actual_size
,
21565 footprint_header
->cf_size
= actual_size
;
21569 * vm_map_corpse_footprint_query_page_info:
21570 * retrieves the disposition of the page at virtual address "vaddr"
21571 * in the forked corpse's VM map
21573 * This is the equivalent of vm_map_footprint_query_page_info() for a forked corpse.
21576 vm_map_corpse_footprint_query_page_info(
21578 vm_map_offset_t va
,
21579 int *disposition_p
)
21581 struct vm_map_corpse_footprint_header
*footprint_header
;
21582 struct vm_map_corpse_footprint_region
*footprint_region
;
21583 uint32_t footprint_region_offset
;
21584 vm_map_offset_t region_start
, region_end
;
21587 int effective_page_size
;
21590 if (!map
->has_corpse_footprint
) {
21591 *disposition_p
= 0;
21592 kr
= KERN_INVALID_ARGUMENT
;
21596 footprint_header
= map
->vmmap_corpse_footprint
;
21597 if (footprint_header
== NULL
) {
21598 *disposition_p
= 0;
21599 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21600 kr
= KERN_INVALID_ARGUMENT
;
21604 /* start looking at the hint ("cf_hint_region") */
21605 footprint_region_offset
= footprint_header
->cf_hint_region
;
21607 effective_page_size
= MIN(PAGE_SIZE
, VM_MAP_PAGE_SIZE(map
));
21610 if (footprint_region_offset
< sizeof(*footprint_header
)) {
21611 /* hint too low: start from 1st region */
21612 footprint_region_offset
= sizeof(*footprint_header
);
21614 if (footprint_region_offset
>= footprint_header
->cf_last_region
) {
21615 /* hint too high: re-start from 1st region */
21616 footprint_region_offset
= sizeof(*footprint_header
);
21618 footprint_region
= (struct vm_map_corpse_footprint_region
*)
21619 ((char *)footprint_header
+ footprint_region_offset
);
21620 region_start
= footprint_region
->cfr_vaddr
;
21621 region_end
= (region_start
+
21622 ((vm_map_offset_t
)(footprint_region
->cfr_num_pages
) *
21623 effective_page_size
));
21624 if (va
< region_start
&&
21625 footprint_region_offset
!= sizeof(*footprint_header
)) {
21626 /* our range starts before the hint region */
21628 /* reset the hint (in a racy way...) */
21629 footprint_header
->cf_hint_region
= sizeof(*footprint_header
);
21630 /* lookup "va" again from 1st region */
21631 footprint_region_offset
= sizeof(*footprint_header
);
21635 while (va
>= region_end
) {
21636 if (footprint_region_offset
>= footprint_header
->cf_last_region
) {
21639 /* skip the region's header */
21640 footprint_region_offset
+= sizeof(*footprint_region
);
21641 /* skip the region's page dispositions */
21642 footprint_region_offset
+= (footprint_region
->cfr_num_pages
* sizeof(cf_disp_t
));
21643 /* align to next word boundary */
21644 footprint_region_offset
=
21645 roundup(footprint_region_offset
,
21647 footprint_region
= (struct vm_map_corpse_footprint_region
*)
21648 ((char *)footprint_header
+ footprint_region_offset
);
21649 region_start
= footprint_region
->cfr_vaddr
;
21650 region_end
= (region_start
+
21651 ((vm_map_offset_t
)(footprint_region
->cfr_num_pages
) *
21652 effective_page_size
));
21654 if (va
< region_start
|| va
>= region_end
) {
21655 /* page not found */
21656 *disposition_p
= 0;
21657 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21662 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
21663 footprint_header
->cf_hint_region
= footprint_region_offset
;
21665 /* get page disposition for "va" in this region */
21666 disp_idx
= (int) ((va
- footprint_region
->cfr_vaddr
) / effective_page_size
);
21667 cf_disp
= footprint_region
->cfr_disposition
[disp_idx
];
21668 *disposition_p
= vm_page_cf_disp_to_disposition(cf_disp
);
21671 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21672 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
21673 DTRACE_VM4(footprint_query_page_info
,
21675 vm_map_offset_t
, va
,
21676 int, *disposition_p
,
21677 kern_return_t
, kr
);
21683 vm_map_corpse_footprint_destroy(
21686 if (map
->has_corpse_footprint
&&
21687 map
->vmmap_corpse_footprint
!= 0) {
21688 struct vm_map_corpse_footprint_header
*footprint_header
;
21689 vm_size_t buf_size
;
21692 footprint_header
= map
->vmmap_corpse_footprint
;
21693 buf_size
= footprint_header
->cf_size
;
21694 kr
= vm_deallocate(kernel_map
,
21695 (vm_offset_t
) map
->vmmap_corpse_footprint
,
21696 ((vm_size_t
) buf_size
21697 + PAGE_SIZE
)); /* trailing guard page */
21698 assertf(kr
== KERN_SUCCESS
, "kr=0x%x\n", kr
);
21699 map
->vmmap_corpse_footprint
= 0;
21700 map
->has_corpse_footprint
= FALSE
;
21705 * vm_map_copy_footprint_ledgers:
21706 * copies any ledger that's relevant to the memory footprint of "old_task"
21707 * into the forked corpse's task ("new_task")
21710 vm_map_copy_footprint_ledgers(
21714 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.phys_footprint
);
21715 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.purgeable_nonvolatile
);
21716 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.purgeable_nonvolatile_compressed
);
21717 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.internal
);
21718 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.internal_compressed
);
21719 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.iokit_mapped
);
21720 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.alternate_accounting
);
21721 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.alternate_accounting_compressed
);
21722 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.page_table
);
21723 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.tagged_footprint
);
21724 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.tagged_footprint_compressed
);
21725 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.network_nonvolatile
);
21726 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.network_nonvolatile_compressed
);
21727 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.media_footprint
);
21728 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.media_footprint_compressed
);
21729 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.graphics_footprint
);
21730 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.graphics_footprint_compressed
);
21731 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.neural_footprint
);
21732 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.neural_footprint_compressed
);
21733 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.wired_mem
);
21737 * vm_map_copy_ledger:
21738 * copy a single ledger from "old_task" to "new_task"
21741 vm_map_copy_ledger(
21746 ledger_amount_t old_balance
, new_balance
, delta
;
21748 assert(new_task
->map
->has_corpse_footprint
);
21749 if (!new_task
->map
->has_corpse_footprint
) {
21753 /* turn off sanity checks for the ledger we're about to mess with */
21754 ledger_disable_panic_on_negative(new_task
->ledger
,
21757 /* adjust "new_task" to match "old_task" */
21758 ledger_get_balance(old_task
->ledger
,
21761 ledger_get_balance(new_task
->ledger
,
21764 if (new_balance
== old_balance
) {
21765 /* new == old: done */
21766 } else if (new_balance
> old_balance
) {
21767 /* new > old ==> new -= new - old */
21768 delta
= new_balance
- old_balance
;
21769 ledger_debit(new_task
->ledger
,
21773 /* new < old ==> new += old - new */
21774 delta
= old_balance
- new_balance
;
21775 ledger_credit(new_task
->ledger
,
21783 extern int pmap_ledgers_panic
;
21784 extern int pmap_ledgers_panic_leeway
;
21786 #define LEDGER_DRIFT(__LEDGER) \
21787 int __LEDGER##_over; \
21788 ledger_amount_t __LEDGER##_over_total; \
21789 ledger_amount_t __LEDGER##_over_max; \
21790 int __LEDGER##_under; \
21791 ledger_amount_t __LEDGER##_under_total; \
21792 ledger_amount_t __LEDGER##_under_max
21795 uint64_t num_pmaps_checked
;
21797 LEDGER_DRIFT(phys_footprint
);
21798 LEDGER_DRIFT(internal
);
21799 LEDGER_DRIFT(internal_compressed
);
21800 LEDGER_DRIFT(iokit_mapped
);
21801 LEDGER_DRIFT(alternate_accounting
);
21802 LEDGER_DRIFT(alternate_accounting_compressed
);
21803 LEDGER_DRIFT(page_table
);
21804 LEDGER_DRIFT(purgeable_volatile
);
21805 LEDGER_DRIFT(purgeable_nonvolatile
);
21806 LEDGER_DRIFT(purgeable_volatile_compressed
);
21807 LEDGER_DRIFT(purgeable_nonvolatile_compressed
);
21808 LEDGER_DRIFT(tagged_nofootprint
);
21809 LEDGER_DRIFT(tagged_footprint
);
21810 LEDGER_DRIFT(tagged_nofootprint_compressed
);
21811 LEDGER_DRIFT(tagged_footprint_compressed
);
21812 LEDGER_DRIFT(network_volatile
);
21813 LEDGER_DRIFT(network_nonvolatile
);
21814 LEDGER_DRIFT(network_volatile_compressed
);
21815 LEDGER_DRIFT(network_nonvolatile_compressed
);
21816 LEDGER_DRIFT(media_nofootprint
);
21817 LEDGER_DRIFT(media_footprint
);
21818 LEDGER_DRIFT(media_nofootprint_compressed
);
21819 LEDGER_DRIFT(media_footprint_compressed
);
21820 LEDGER_DRIFT(graphics_nofootprint
);
21821 LEDGER_DRIFT(graphics_footprint
);
21822 LEDGER_DRIFT(graphics_nofootprint_compressed
);
21823 LEDGER_DRIFT(graphics_footprint_compressed
);
21824 LEDGER_DRIFT(neural_nofootprint
);
21825 LEDGER_DRIFT(neural_footprint
);
21826 LEDGER_DRIFT(neural_nofootprint_compressed
);
21827 LEDGER_DRIFT(neural_footprint_compressed
);
21828 } pmap_ledgers_drift
;
21831 vm_map_pmap_check_ledgers(
21837 ledger_amount_t bal
;
21838 boolean_t do_panic
;
21842 pmap_ledgers_drift
.num_pmaps_checked
++;
21844 #define LEDGER_CHECK_BALANCE(__LEDGER) \
21846 int panic_on_negative = TRUE; \
21847 ledger_get_balance(ledger, \
21848 task_ledgers.__LEDGER, \
21850 ledger_get_panic_on_negative(ledger, \
21851 task_ledgers.__LEDGER, \
21852 &panic_on_negative); \
21854 if (panic_on_negative || \
21855 (pmap_ledgers_panic && \
21856 pmap_ledgers_panic_leeway > 0 && \
21857 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
21858 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
21861 printf("LEDGER BALANCE proc %d (%s) " \
21862 "\"%s\" = %lld\n", \
21863 pid, procname, #__LEDGER, bal); \
21865 pmap_ledgers_drift.__LEDGER##_over++; \
21866 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
21867 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
21868 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
21870 } else if (bal < 0) { \
21871 pmap_ledgers_drift.__LEDGER##_under++; \
21872 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
21873 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
21874 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
21880 LEDGER_CHECK_BALANCE(phys_footprint
);
21881 LEDGER_CHECK_BALANCE(internal
);
21882 LEDGER_CHECK_BALANCE(internal_compressed
);
21883 LEDGER_CHECK_BALANCE(iokit_mapped
);
21884 LEDGER_CHECK_BALANCE(alternate_accounting
);
21885 LEDGER_CHECK_BALANCE(alternate_accounting_compressed
);
21886 LEDGER_CHECK_BALANCE(page_table
);
21887 LEDGER_CHECK_BALANCE(purgeable_volatile
);
21888 LEDGER_CHECK_BALANCE(purgeable_nonvolatile
);
21889 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed
);
21890 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed
);
21891 LEDGER_CHECK_BALANCE(tagged_nofootprint
);
21892 LEDGER_CHECK_BALANCE(tagged_footprint
);
21893 LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed
);
21894 LEDGER_CHECK_BALANCE(tagged_footprint_compressed
);
21895 LEDGER_CHECK_BALANCE(network_volatile
);
21896 LEDGER_CHECK_BALANCE(network_nonvolatile
);
21897 LEDGER_CHECK_BALANCE(network_volatile_compressed
);
21898 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed
);
21899 LEDGER_CHECK_BALANCE(media_nofootprint
);
21900 LEDGER_CHECK_BALANCE(media_footprint
);
21901 LEDGER_CHECK_BALANCE(media_nofootprint_compressed
);
21902 LEDGER_CHECK_BALANCE(media_footprint_compressed
);
21903 LEDGER_CHECK_BALANCE(graphics_nofootprint
);
21904 LEDGER_CHECK_BALANCE(graphics_footprint
);
21905 LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed
);
21906 LEDGER_CHECK_BALANCE(graphics_footprint_compressed
);
21907 LEDGER_CHECK_BALANCE(neural_nofootprint
);
21908 LEDGER_CHECK_BALANCE(neural_footprint
);
21909 LEDGER_CHECK_BALANCE(neural_nofootprint_compressed
);
21910 LEDGER_CHECK_BALANCE(neural_footprint_compressed
);
21913 if (pmap_ledgers_panic
) {
21914 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
21915 pmap
, pid
, procname
);
21917 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
21918 pmap
, pid
, procname
);
21922 #endif /* MACH_ASSERT */