2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
63 * Virtual memory mapping module.
66 #include <task_swapper.h>
67 #include <mach_assert.h>
69 #include <vm/vm_options.h>
71 #include <libkern/OSAtomic.h>
73 #include <mach/kern_return.h>
74 #include <mach/port.h>
75 #include <mach/vm_attributes.h>
76 #include <mach/vm_param.h>
77 #include <mach/vm_behavior.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/memory_object.h>
80 #include <mach/mach_vm.h>
81 #include <machine/cpu_capabilities.h>
84 #include <kern/assert.h>
85 #include <kern/backtrace.h>
86 #include <kern/counters.h>
87 #include <kern/exc_guard.h>
88 #include <kern/kalloc.h>
89 #include <kern/zalloc_internal.h>
92 #include <vm/vm_compressor.h>
93 #include <vm/vm_compressor_pager.h>
94 #include <vm/vm_init.h>
95 #include <vm/vm_fault.h>
96 #include <vm/vm_map.h>
97 #include <vm/vm_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
101 #include <vm/vm_kern.h>
102 #include <ipc/ipc_port.h>
103 #include <kern/sched_prim.h>
104 #include <kern/misc_protos.h>
106 #include <mach/vm_map_server.h>
107 #include <mach/mach_host_server.h>
108 #include <vm/vm_protos.h>
109 #include <vm/vm_purgeable_internal.h>
111 #include <vm/vm_protos.h>
112 #include <vm/vm_shared_region.h>
113 #include <vm/vm_map_store.h>
115 #include <san/kasan.h>
117 #include <sys/codesign.h>
118 #include <sys/mman.h>
120 #include <libkern/section_keywords.h>
121 #if DEVELOPMENT || DEBUG
122 extern int proc_selfcsflags(void);
123 int panic_on_unsigned_execute
= 0;
124 #endif /* DEVELOPMENT || DEBUG */
127 int debug4k_filter
= 0;
128 char debug4k_proc_name
[1024] = "";
129 int debug4k_proc_filter
= (int)-1 & ~(1 << __DEBUG4K_FAULT
);
130 int debug4k_panic_on_misaligned_sharing
= 0;
131 const char *debug4k_category_name
[] = {
146 #endif /* MACH_ASSERT */
147 int debug4k_no_cow_copyin
= 0;
151 extern const int fourk_binary_compatibility_unsafe
;
152 extern const int fourk_binary_compatibility_allow_wx
;
153 #endif /* __arm64__ */
154 extern int proc_selfpid(void);
155 extern char *proc_name_address(void *p
);
157 #if VM_MAP_DEBUG_APPLE_PROTECT
158 int vm_map_debug_apple_protect
= 0;
159 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
160 #if VM_MAP_DEBUG_FOURK
161 int vm_map_debug_fourk
= 0;
162 #endif /* VM_MAP_DEBUG_FOURK */
164 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable
= 1;
165 int vm_map_executable_immutable_verbose
= 0;
167 os_refgrp_decl(static, map_refgrp
, "vm_map", NULL
);
169 extern u_int32_t
random(void); /* from <libkern/libkern.h> */
170 /* Internal prototypes
173 static void vm_map_simplify_range(
175 vm_map_offset_t start
,
176 vm_map_offset_t end
); /* forward */
178 static boolean_t
vm_map_range_check(
180 vm_map_offset_t start
,
182 vm_map_entry_t
*entry
);
184 static vm_map_entry_t
_vm_map_entry_create(
185 struct vm_map_header
*map_header
, boolean_t map_locked
);
187 static void _vm_map_entry_dispose(
188 struct vm_map_header
*map_header
,
189 vm_map_entry_t entry
);
191 static void vm_map_pmap_enter(
193 vm_map_offset_t addr
,
194 vm_map_offset_t end_addr
,
196 vm_object_offset_t offset
,
197 vm_prot_t protection
);
199 static void _vm_map_clip_end(
200 struct vm_map_header
*map_header
,
201 vm_map_entry_t entry
,
202 vm_map_offset_t end
);
204 static void _vm_map_clip_start(
205 struct vm_map_header
*map_header
,
206 vm_map_entry_t entry
,
207 vm_map_offset_t start
);
209 static void vm_map_entry_delete(
211 vm_map_entry_t entry
);
213 static kern_return_t
vm_map_delete(
215 vm_map_offset_t start
,
220 static void vm_map_copy_insert(
222 vm_map_entry_t after_where
,
225 static kern_return_t
vm_map_copy_overwrite_unaligned(
227 vm_map_entry_t entry
,
229 vm_map_address_t start
,
230 boolean_t discard_on_success
);
232 static kern_return_t
vm_map_copy_overwrite_aligned(
234 vm_map_entry_t tmp_entry
,
236 vm_map_offset_t start
,
239 static kern_return_t
vm_map_copyin_kernel_buffer(
241 vm_map_address_t src_addr
,
243 boolean_t src_destroy
,
244 vm_map_copy_t
*copy_result
); /* OUT */
246 static kern_return_t
vm_map_copyout_kernel_buffer(
248 vm_map_address_t
*addr
, /* IN/OUT */
250 vm_map_size_t copy_size
,
252 boolean_t consume_on_success
);
254 static void vm_map_fork_share(
256 vm_map_entry_t old_entry
,
259 static boolean_t
vm_map_fork_copy(
261 vm_map_entry_t
*old_entry_p
,
263 int vm_map_copyin_flags
);
265 static kern_return_t
vm_map_wire_nested(
267 vm_map_offset_t start
,
269 vm_prot_t caller_prot
,
273 vm_map_offset_t pmap_addr
,
274 ppnum_t
*physpage_p
);
276 static kern_return_t
vm_map_unwire_nested(
278 vm_map_offset_t start
,
282 vm_map_offset_t pmap_addr
);
284 static kern_return_t
vm_map_overwrite_submap_recurse(
286 vm_map_offset_t dst_addr
,
287 vm_map_size_t dst_size
);
289 static kern_return_t
vm_map_copy_overwrite_nested(
291 vm_map_offset_t dst_addr
,
293 boolean_t interruptible
,
295 boolean_t discard_on_success
);
297 static kern_return_t
vm_map_remap_extract(
299 vm_map_offset_t addr
,
301 vm_prot_t required_protection
,
303 struct vm_map_header
*map_header
,
304 vm_prot_t
*cur_protection
,
305 vm_prot_t
*max_protection
,
306 vm_inherit_t inheritance
,
307 vm_map_kernel_flags_t vmk_flags
);
309 static kern_return_t
vm_map_remap_range_allocate(
311 vm_map_address_t
*address
,
313 vm_map_offset_t mask
,
315 vm_map_kernel_flags_t vmk_flags
,
317 vm_map_entry_t
*map_entry
);
319 static void vm_map_region_look_for_page(
323 vm_object_offset_t offset
,
325 unsigned short depth
,
326 vm_region_extended_info_t extended
,
327 mach_msg_type_number_t count
);
329 static int vm_map_region_count_obj_refs(
330 vm_map_entry_t entry
,
334 static kern_return_t
vm_map_willneed(
336 vm_map_offset_t start
,
337 vm_map_offset_t end
);
339 static kern_return_t
vm_map_reuse_pages(
341 vm_map_offset_t start
,
342 vm_map_offset_t end
);
344 static kern_return_t
vm_map_reusable_pages(
346 vm_map_offset_t start
,
347 vm_map_offset_t end
);
349 static kern_return_t
vm_map_can_reuse(
351 vm_map_offset_t start
,
352 vm_map_offset_t end
);
355 static kern_return_t
vm_map_pageout(
357 vm_map_offset_t start
,
358 vm_map_offset_t end
);
359 #endif /* MACH_ASSERT */
361 kern_return_t
vm_map_corpse_footprint_collect(
363 vm_map_entry_t old_entry
,
365 void vm_map_corpse_footprint_collect_done(
367 void vm_map_corpse_footprint_destroy(
369 kern_return_t
vm_map_corpse_footprint_query_page_info(
373 void vm_map_footprint_query_page_info(
375 vm_map_entry_t map_entry
,
376 vm_map_offset_t curr_s_offset
,
379 static const struct vm_map_entry vm_map_entry_template
= {
380 .behavior
= VM_BEHAVIOR_DEFAULT
,
381 .inheritance
= VM_INHERIT_DEFAULT
,
384 pid_t
find_largest_process_vm_map_entries(void);
387 * Macros to copy a vm_map_entry. We must be careful to correctly
388 * manage the wired page count. vm_map_entry_copy() creates a new
389 * map entry to the same memory - the wired count in the new entry
390 * must be set to zero. vm_map_entry_copy_full() creates a new
391 * entry that is identical to the old entry. This preserves the
392 * wire count; it's used for map splitting and zone changing in
397 vm_map_entry_copy_pmap_cs_assoc(
398 vm_map_t map __unused
,
399 vm_map_entry_t
new __unused
,
400 vm_map_entry_t old __unused
)
403 /* when pmap_cs is enabled, we want to reset on copy */
404 new->pmap_cs_associated
= FALSE
;
406 /* when pmap_cs is not enabled, assert as a sanity check */
407 assert(new->pmap_cs_associated
== FALSE
);
412 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
413 * But for security reasons on some platforms, we don't want the
414 * new mapping to be "used for jit", so we reset the flag here.
417 vm_map_entry_copy_code_signing(
420 vm_map_entry_t old __unused
)
422 if (VM_MAP_POLICY_ALLOW_JIT_COPY(map
)) {
423 assert(new->used_for_jit
== old
->used_for_jit
);
425 new->used_for_jit
= FALSE
;
435 boolean_t _vmec_reserved
= new->from_reserved_zone
;
437 new->is_shared
= FALSE
;
438 new->needs_wakeup
= FALSE
;
439 new->in_transition
= FALSE
;
440 new->wired_count
= 0;
441 new->user_wired_count
= 0;
442 new->permanent
= FALSE
;
443 vm_map_entry_copy_code_signing(map
, new, old
);
444 vm_map_entry_copy_pmap_cs_assoc(map
, new, old
);
445 new->from_reserved_zone
= _vmec_reserved
;
446 if (new->iokit_acct
) {
447 assertf(!new->use_pmap
, "old %p new %p\n", old
, new);
448 new->iokit_acct
= FALSE
;
449 new->use_pmap
= TRUE
;
451 new->vme_resilient_codesign
= FALSE
;
452 new->vme_resilient_media
= FALSE
;
453 new->vme_atomic
= FALSE
;
454 new->vme_no_copy_on_read
= FALSE
;
458 vm_map_entry_copy_full(
462 boolean_t _vmecf_reserved
= new->from_reserved_zone
;
464 new->from_reserved_zone
= _vmecf_reserved
;
468 * Normal lock_read_to_write() returns FALSE/0 on failure.
469 * These functions evaluate to zero on success and non-zero value on failure.
471 __attribute__((always_inline
))
473 vm_map_lock_read_to_write(vm_map_t map
)
475 if (lck_rw_lock_shared_to_exclusive(&(map
)->lock
)) {
476 DTRACE_VM(vm_map_lock_upgrade
);
482 __attribute__((always_inline
))
484 vm_map_try_lock(vm_map_t map
)
486 if (lck_rw_try_lock_exclusive(&(map
)->lock
)) {
487 DTRACE_VM(vm_map_lock_w
);
493 __attribute__((always_inline
))
495 vm_map_try_lock_read(vm_map_t map
)
497 if (lck_rw_try_lock_shared(&(map
)->lock
)) {
498 DTRACE_VM(vm_map_lock_r
);
505 * Routines to get the page size the caller should
506 * use while inspecting the target address space.
507 * Use the "_safely" variant if the caller is dealing with a user-provided
508 * array whose size depends on the page size, to avoid any overflow or
509 * underflow of a user-allocated buffer.
512 vm_self_region_page_shift_safely(
515 int effective_page_shift
= 0;
517 if (PAGE_SIZE
== (4096)) {
518 /* x86_64 and 4k watches: always use 4k */
521 /* did caller provide an explicit page size for this thread to use? */
522 effective_page_shift
= thread_self_region_page_shift();
523 if (effective_page_shift
) {
524 /* use the explicitly-provided page size */
525 return effective_page_shift
;
527 /* no explicit page size: use the caller's page size... */
528 effective_page_shift
= VM_MAP_PAGE_SHIFT(current_map());
529 if (effective_page_shift
== VM_MAP_PAGE_SHIFT(target_map
)) {
530 /* page size match: safe to use */
531 return effective_page_shift
;
533 /* page size mismatch */
537 vm_self_region_page_shift(
540 int effective_page_shift
;
542 effective_page_shift
= vm_self_region_page_shift_safely(target_map
);
543 if (effective_page_shift
== -1) {
544 /* no safe value but OK to guess for caller */
545 effective_page_shift
= MIN(VM_MAP_PAGE_SHIFT(current_map()),
546 VM_MAP_PAGE_SHIFT(target_map
));
548 return effective_page_shift
;
553 * Decide if we want to allow processes to execute from their data or stack areas.
554 * override_nx() returns true if we do. Data/stack execution can be enabled independently
555 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
556 * or allow_stack_exec to enable data execution for that type of data area for that particular
557 * ABI (or both by or'ing the flags together). These are initialized in the architecture
558 * specific pmap files since the default behavior varies according to architecture. The
559 * main reason it varies is because of the need to provide binary compatibility with old
560 * applications that were written before these restrictions came into being. In the old
561 * days, an app could execute anything it could read, but this has slowly been tightened
562 * up over time. The default behavior is:
564 * 32-bit PPC apps may execute from both stack and data areas
565 * 32-bit Intel apps may exeucte from data areas but not stack
566 * 64-bit PPC/Intel apps may not execute from either data or stack
568 * An application on any architecture may override these defaults by explicitly
569 * adding PROT_EXEC permission to the page in question with the mprotect(2)
570 * system call. This code here just determines what happens when an app tries to
571 * execute from a page that lacks execute permission.
573 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
574 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
575 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
576 * execution from data areas for a particular binary even if the arch normally permits it. As
577 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
578 * to support some complicated use cases, notably browsers with out-of-process plugins that
579 * are not all NX-safe.
582 extern int allow_data_exec
, allow_stack_exec
;
585 override_nx(vm_map_t map
, uint32_t user_tag
) /* map unused on arm */
589 if (map
->pmap
== kernel_pmap
) {
594 * Determine if the app is running in 32 or 64 bit mode.
597 if (vm_map_is_64bit(map
)) {
598 current_abi
= VM_ABI_64
;
600 current_abi
= VM_ABI_32
;
604 * Determine if we should allow the execution based on whether it's a
605 * stack or data area and the current architecture.
608 if (user_tag
== VM_MEMORY_STACK
) {
609 return allow_stack_exec
& current_abi
;
612 return (allow_data_exec
& current_abi
) && (map
->map_disallow_data_exec
== FALSE
);
617 * Virtual memory maps provide for the mapping, protection,
618 * and sharing of virtual memory objects. In addition,
619 * this module provides for an efficient virtual copy of
620 * memory from one map to another.
622 * Synchronization is required prior to most operations.
624 * Maps consist of an ordered doubly-linked list of simple
625 * entries; a single hint is used to speed up lookups.
627 * Sharing maps have been deleted from this version of Mach.
628 * All shared objects are now mapped directly into the respective
629 * maps. This requires a change in the copy on write strategy;
630 * the asymmetric (delayed) strategy is used for shared temporary
631 * objects instead of the symmetric (shadow) strategy. All maps
632 * are now "top level" maps (either task map, kernel map or submap
633 * of the kernel map).
635 * Since portions of maps are specified by start/end addreses,
636 * which may not align with existing map entries, all
637 * routines merely "clip" entries to these start/end values.
638 * [That is, an entry is split into two, bordering at a
639 * start or end value.] Note that these clippings may not
640 * always be necessary (as the two resulting entries are then
641 * not changed); however, the clipping is done for convenience.
642 * No attempt is currently made to "glue back together" two
645 * The symmetric (shadow) copy strategy implements virtual copy
646 * by copying VM object references from one map to
647 * another, and then marking both regions as copy-on-write.
648 * It is important to note that only one writeable reference
649 * to a VM object region exists in any map when this strategy
650 * is used -- this means that shadow object creation can be
651 * delayed until a write operation occurs. The symmetric (delayed)
652 * strategy allows multiple maps to have writeable references to
653 * the same region of a vm object, and hence cannot delay creating
654 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
655 * Copying of permanent objects is completely different; see
656 * vm_object_copy_strategically() in vm_object.c.
659 static SECURITY_READ_ONLY_LATE(zone_t
) vm_map_zone
; /* zone for vm_map structures */
660 static SECURITY_READ_ONLY_LATE(zone_t
) vm_map_entry_reserved_zone
; /* zone with reserve for non-blocking allocations */
661 static SECURITY_READ_ONLY_LATE(zone_t
) vm_map_copy_zone
; /* zone for vm_map_copy structures */
663 SECURITY_READ_ONLY_LATE(zone_t
) vm_map_entry_zone
; /* zone for vm_map_entry structures */
664 SECURITY_READ_ONLY_LATE(zone_t
) vm_map_holes_zone
; /* zone for vm map holes (vm_map_links) structures */
666 #define VM_MAP_ZONE_NAME "maps"
667 #define VM_MAP_ZFLAGS ( \
673 #define VME_RESERVED_ZONE_NAME "Reserved VM map entries"
674 #define VM_MAP_RESERVED_ZFLAGS ( \
679 ZC_KASAN_NOQUARANTINE | \
682 #define VM_MAP_HOLES_ZONE_NAME "VM map holes"
683 #define VM_MAP_HOLES_ZFLAGS ( \
690 * Asserts that a vm_map_copy object is coming from the
691 * vm_map_copy_zone to ensure that it isn't a fake constructed
695 vm_map_copy_require(struct vm_map_copy
*copy
)
697 zone_id_require(ZONE_ID_VM_MAP_COPY
, sizeof(struct vm_map_copy
), copy
);
701 * Placeholder object for submap operations. This object is dropped
702 * into the range by a call to vm_map_find, and removed when
703 * vm_map_submap creates the submap.
706 vm_object_t vm_submap_object
;
708 static __startup_data vm_offset_t map_data
;
709 static __startup_data vm_size_t map_data_size
;
710 static __startup_data vm_offset_t kentry_data
;
711 static __startup_data vm_size_t kentry_data_size
;
712 static __startup_data vm_offset_t map_holes_data
;
713 static __startup_data vm_size_t map_holes_data_size
;
715 #if XNU_TARGET_OS_OSX
716 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
717 #else /* XNU_TARGET_OS_OSX */
718 #define NO_COALESCE_LIMIT 0
719 #endif /* XNU_TARGET_OS_OSX */
721 /* Skip acquiring locks if we're in the midst of a kernel core dump */
722 unsigned int not_in_kdp
= 1;
724 unsigned int vm_map_set_cache_attr_count
= 0;
727 vm_map_set_cache_attr(
731 vm_map_entry_t map_entry
;
733 kern_return_t kr
= KERN_SUCCESS
;
735 vm_map_lock_read(map
);
737 if (!vm_map_lookup_entry(map
, va
, &map_entry
) ||
738 map_entry
->is_sub_map
) {
740 * that memory is not properly mapped
742 kr
= KERN_INVALID_ARGUMENT
;
745 object
= VME_OBJECT(map_entry
);
747 if (object
== VM_OBJECT_NULL
) {
749 * there should be a VM object here at this point
751 kr
= KERN_INVALID_ARGUMENT
;
754 vm_object_lock(object
);
755 object
->set_cache_attr
= TRUE
;
756 vm_object_unlock(object
);
758 vm_map_set_cache_attr_count
++;
760 vm_map_unlock_read(map
);
766 #if CONFIG_CODE_DECRYPTION
768 * vm_map_apple_protected:
769 * This remaps the requested part of the object with an object backed by
770 * the decrypting pager.
771 * crypt_info contains entry points and session data for the crypt module.
772 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
773 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
776 vm_map_apple_protected(
778 vm_map_offset_t start
,
780 vm_object_offset_t crypto_backing_offset
,
781 struct pager_crypt_info
*crypt_info
,
784 boolean_t map_locked
;
786 vm_map_entry_t map_entry
;
787 struct vm_map_entry tmp_entry
;
788 memory_object_t unprotected_mem_obj
;
789 vm_object_t protected_object
;
790 vm_map_offset_t map_addr
;
791 vm_map_offset_t start_aligned
, end_aligned
;
792 vm_object_offset_t crypto_start
, crypto_end
;
794 vm_map_kernel_flags_t vmk_flags
;
797 vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
800 unprotected_mem_obj
= MEMORY_OBJECT_NULL
;
802 start_aligned
= vm_map_trunc_page(start
, PAGE_MASK_64
);
803 end_aligned
= vm_map_round_page(end
, PAGE_MASK_64
);
804 start_aligned
= vm_map_trunc_page(start_aligned
, VM_MAP_PAGE_MASK(map
));
805 end_aligned
= vm_map_round_page(end_aligned
, VM_MAP_PAGE_MASK(map
));
809 * "start" and "end" might be 4K-aligned but not 16K-aligned,
810 * so we might have to loop and establish up to 3 mappings:
812 * + the first 16K-page, which might overlap with the previous
813 * 4K-aligned mapping,
815 * + the last 16K-page, which might overlap with the next
816 * 4K-aligned mapping.
817 * Each of these mapping might be backed by a vnode pager (if
818 * properly page-aligned) or a "fourk_pager", itself backed by a
819 * vnode pager (if 4K-aligned but not page-aligned).
821 #endif /* __arm64__ */
823 map_addr
= start_aligned
;
824 for (map_addr
= start_aligned
;
826 map_addr
= tmp_entry
.vme_end
) {
830 /* lookup the protected VM object */
831 if (!vm_map_lookup_entry(map
,
834 map_entry
->is_sub_map
||
835 VME_OBJECT(map_entry
) == VM_OBJECT_NULL
) {
836 /* that memory is not properly mapped */
837 kr
= KERN_INVALID_ARGUMENT
;
841 /* ensure mapped memory is mapped as executable except
842 * except for model decryption flow */
843 if ((cryptid
!= CRYPTID_MODEL_ENCRYPTION
) &&
844 !(map_entry
->protection
& VM_PROT_EXECUTE
)) {
845 kr
= KERN_INVALID_ARGUMENT
;
849 /* get the protected object to be decrypted */
850 protected_object
= VME_OBJECT(map_entry
);
851 if (protected_object
== VM_OBJECT_NULL
) {
852 /* there should be a VM object here at this point */
853 kr
= KERN_INVALID_ARGUMENT
;
856 /* ensure protected object stays alive while map is unlocked */
857 vm_object_reference(protected_object
);
859 /* limit the map entry to the area we want to cover */
860 vm_map_clip_start(map
, map_entry
, start_aligned
);
861 vm_map_clip_end(map
, map_entry
, end_aligned
);
863 tmp_entry
= *map_entry
;
864 map_entry
= VM_MAP_ENTRY_NULL
; /* not valid after unlocking map */
869 * This map entry might be only partially encrypted
870 * (if not fully "page-aligned").
873 crypto_end
= tmp_entry
.vme_end
- tmp_entry
.vme_start
;
874 if (tmp_entry
.vme_start
< start
) {
875 if (tmp_entry
.vme_start
!= start_aligned
) {
876 kr
= KERN_INVALID_ADDRESS
;
878 crypto_start
+= (start
- tmp_entry
.vme_start
);
880 if (tmp_entry
.vme_end
> end
) {
881 if (tmp_entry
.vme_end
!= end_aligned
) {
882 kr
= KERN_INVALID_ADDRESS
;
884 crypto_end
-= (tmp_entry
.vme_end
- end
);
888 * This "extra backing offset" is needed to get the decryption
889 * routine to use the right key. It adjusts for the possibly
890 * relative offset of an interposed "4K" pager...
892 if (crypto_backing_offset
== (vm_object_offset_t
) -1) {
893 crypto_backing_offset
= VME_OFFSET(&tmp_entry
);
897 * Lookup (and create if necessary) the protected memory object
898 * matching that VM object.
899 * If successful, this also grabs a reference on the memory object,
900 * to guarantee that it doesn't go away before we get a chance to map
903 unprotected_mem_obj
= apple_protect_pager_setup(
905 VME_OFFSET(&tmp_entry
),
906 crypto_backing_offset
,
911 /* release extra ref on protected object */
912 vm_object_deallocate(protected_object
);
914 if (unprotected_mem_obj
== NULL
) {
919 vm_flags
= VM_FLAGS_FIXED
| VM_FLAGS_OVERWRITE
;
920 /* can overwrite an immutable mapping */
921 vmk_flags
.vmkf_overwrite_immutable
= TRUE
;
923 if (tmp_entry
.used_for_jit
&&
924 (VM_MAP_PAGE_SHIFT(map
) != FOURK_PAGE_SHIFT
||
925 PAGE_SHIFT
!= FOURK_PAGE_SHIFT
) &&
926 fourk_binary_compatibility_unsafe
&&
927 fourk_binary_compatibility_allow_wx
) {
928 printf("** FOURK_COMPAT [%d]: "
929 "allowing write+execute at 0x%llx\n",
930 proc_selfpid(), tmp_entry
.vme_start
);
931 vmk_flags
.vmkf_map_jit
= TRUE
;
933 #endif /* __arm64__ */
935 /* map this memory object in place of the current one */
936 map_addr
= tmp_entry
.vme_start
;
937 kr
= vm_map_enter_mem_object(map
,
940 tmp_entry
.vme_start
),
941 (mach_vm_offset_t
) 0,
945 (ipc_port_t
)(uintptr_t) unprotected_mem_obj
,
948 tmp_entry
.protection
,
949 tmp_entry
.max_protection
,
950 tmp_entry
.inheritance
);
951 assertf(kr
== KERN_SUCCESS
,
953 assertf(map_addr
== tmp_entry
.vme_start
,
954 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
956 (uint64_t) tmp_entry
.vme_start
,
959 #if VM_MAP_DEBUG_APPLE_PROTECT
960 if (vm_map_debug_apple_protect
) {
961 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
962 " backing:[object:%p,offset:0x%llx,"
963 "crypto_backing_offset:0x%llx,"
964 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
967 (uint64_t) (map_addr
+ (tmp_entry
.vme_end
-
968 tmp_entry
.vme_start
)),
971 VME_OFFSET(&tmp_entry
),
972 crypto_backing_offset
,
976 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
979 * Release the reference obtained by
980 * apple_protect_pager_setup().
981 * The mapping (if it succeeded) is now holding a reference on
984 memory_object_deallocate(unprotected_mem_obj
);
985 unprotected_mem_obj
= MEMORY_OBJECT_NULL
;
987 /* continue with next map entry */
988 crypto_backing_offset
+= (tmp_entry
.vme_end
-
989 tmp_entry
.vme_start
);
990 crypto_backing_offset
-= crypto_start
;
1000 #endif /* CONFIG_CODE_DECRYPTION */
1003 LCK_GRP_DECLARE(vm_map_lck_grp
, "vm_map");
1004 LCK_ATTR_DECLARE(vm_map_lck_attr
, 0, 0);
1005 LCK_ATTR_DECLARE(vm_map_lck_rw_attr
, 0, LCK_ATTR_DEBUG
);
1007 #if XNU_TARGET_OS_OSX
1008 int malloc_no_cow
= 0;
1009 #else /* XNU_TARGET_OS_OSX */
1010 int malloc_no_cow
= 1;
1011 #endif /* XNU_TARGET_OS_OSX */
1012 uint64_t vm_memory_malloc_no_cow_mask
= 0ULL;
1014 int vm_check_map_sanity
= 0;
1020 * Initialize the vm_map module. Must be called before
1021 * any other vm_map routines.
1023 * Map and entry structures are allocated from zones -- we must
1024 * initialize those zones.
1026 * There are three zones of interest:
1028 * vm_map_zone: used to allocate maps.
1029 * vm_map_entry_zone: used to allocate map entries.
1030 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
1032 * The kernel allocates map entries from a special zone that is initially
1033 * "crammed" with memory. It would be difficult (perhaps impossible) for
1034 * the kernel to allocate more memory to a entry zone when it became
1035 * empty since the very act of allocating memory implies the creation
1042 const char *mez_name
= "VM map entries";
1046 PE_parse_boot_argn("debug4k_filter", &debug4k_filter
,
1047 sizeof(debug4k_filter
));
1048 #endif /* MACH_ASSERT */
1050 vm_map_zone
= zone_create(VM_MAP_ZONE_NAME
, sizeof(struct _vm_map
),
1053 vm_map_entry_zone
= zone_create(mez_name
, sizeof(struct vm_map_entry
),
1054 ZC_NOENCRYPT
| ZC_NOGZALLOC
| ZC_NOCALLOUT
);
1057 * Don't quarantine because we always need elements available
1058 * Disallow GC on this zone... to aid the GC.
1060 vm_map_entry_reserved_zone
= zone_create_ext(VME_RESERVED_ZONE_NAME
,
1061 sizeof(struct vm_map_entry
), VM_MAP_RESERVED_ZFLAGS
,
1062 ZONE_ID_ANY
, ^(zone_t z
) {
1063 zone_set_noexpand(z
, 64 * kentry_data_size
);
1066 vm_map_copy_zone
= zone_create_ext("VM map copies", sizeof(struct vm_map_copy
),
1067 ZC_NOENCRYPT
| ZC_CACHING
, ZONE_ID_VM_MAP_COPY
, NULL
);
1069 vm_map_holes_zone
= zone_create(VM_MAP_HOLES_ZONE_NAME
,
1070 sizeof(struct vm_map_links
), VM_MAP_HOLES_ZFLAGS
);
1073 * Add the stolen memory to zones, adjust zone size and stolen counts.
1075 zcram(vm_map_zone
, map_data
, map_data_size
);
1076 zcram(vm_map_entry_reserved_zone
, kentry_data
, kentry_data_size
);
1077 zcram(vm_map_holes_zone
, map_holes_data
, map_holes_data_size
);
1080 * Since these are covered by zones, remove them from stolen page accounting.
1082 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size
) + atop_64(kentry_data_size
) + atop_64(map_holes_data_size
));
1084 #if VM_MAP_DEBUG_APPLE_PROTECT
1085 PE_parse_boot_argn("vm_map_debug_apple_protect",
1086 &vm_map_debug_apple_protect
,
1087 sizeof(vm_map_debug_apple_protect
));
1088 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1089 #if VM_MAP_DEBUG_APPLE_FOURK
1090 PE_parse_boot_argn("vm_map_debug_fourk",
1091 &vm_map_debug_fourk
,
1092 sizeof(vm_map_debug_fourk
));
1093 #endif /* VM_MAP_DEBUG_FOURK */
1094 PE_parse_boot_argn("vm_map_executable_immutable",
1095 &vm_map_executable_immutable
,
1096 sizeof(vm_map_executable_immutable
));
1097 PE_parse_boot_argn("vm_map_executable_immutable_verbose",
1098 &vm_map_executable_immutable_verbose
,
1099 sizeof(vm_map_executable_immutable_verbose
));
1101 PE_parse_boot_argn("malloc_no_cow",
1103 sizeof(malloc_no_cow
));
1104 if (malloc_no_cow
) {
1105 vm_memory_malloc_no_cow_mask
= 0ULL;
1106 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC
;
1107 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_SMALL
;
1108 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_MEDIUM
;
1109 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_LARGE
;
1110 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1111 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1112 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_TINY
;
1113 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE
;
1114 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED
;
1115 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_NANO
;
1116 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1117 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1118 &vm_memory_malloc_no_cow_mask
,
1119 sizeof(vm_memory_malloc_no_cow_mask
));
1123 PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity
, sizeof(vm_check_map_sanity
));
1124 if (vm_check_map_sanity
) {
1125 kprintf("VM sanity checking enabled\n");
1127 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1131 #if DEVELOPMENT || DEBUG
1132 PE_parse_boot_argn("panic_on_unsigned_execute",
1133 &panic_on_unsigned_execute
,
1134 sizeof(panic_on_unsigned_execute
));
1135 #endif /* DEVELOPMENT || DEBUG */
1140 vm_map_steal_memory(void)
1142 uint16_t kentry_initial_pages
;
1144 map_data_size
= zone_get_foreign_alloc_size(VM_MAP_ZONE_NAME
,
1145 sizeof(struct _vm_map
), VM_MAP_ZFLAGS
, 1);
1148 * kentry_initial_pages corresponds to the number of kernel map entries
1149 * required during bootstrap until the asynchronous replenishment
1150 * scheme is activated and/or entries are available from the general
1153 #if defined(__LP64__)
1154 kentry_initial_pages
= 10;
1156 kentry_initial_pages
= 6;
1160 /* If using the guard allocator, reserve more memory for the kernel
1161 * reserved map entry pool.
1163 if (gzalloc_enabled()) {
1164 kentry_initial_pages
*= 1024;
1168 kentry_data_size
= zone_get_foreign_alloc_size(VME_RESERVED_ZONE_NAME
,
1169 sizeof(struct vm_map_entry
), VM_MAP_RESERVED_ZFLAGS
,
1170 kentry_initial_pages
);
1172 map_holes_data_size
= zone_get_foreign_alloc_size(VM_MAP_HOLES_ZONE_NAME
,
1173 sizeof(struct vm_map_links
), VM_MAP_HOLES_ZFLAGS
,
1174 kentry_initial_pages
);
1177 * Steal a contiguous range of memory so that a simple range check
1178 * can validate foreign addresses being freed/crammed to these
1181 vm_size_t total_size
;
1182 if (os_add3_overflow(map_data_size
, kentry_data_size
,
1183 map_holes_data_size
, &total_size
)) {
1184 panic("vm_map_steal_memory: overflow in amount of memory requested");
1186 map_data
= zone_foreign_mem_init(total_size
);
1187 kentry_data
= map_data
+ map_data_size
;
1188 map_holes_data
= kentry_data
+ kentry_data_size
;
1190 STARTUP(PMAP_STEAL
, STARTUP_RANK_FIRST
, vm_map_steal_memory
);
1192 boolean_t vm_map_supports_hole_optimization
= FALSE
;
1195 vm_kernel_reserved_entry_init(void)
1197 zone_prio_refill_configure(vm_map_entry_reserved_zone
);
1200 * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1202 zone_prio_refill_configure(vm_map_holes_zone
);
1203 vm_map_supports_hole_optimization
= TRUE
;
1207 vm_map_disable_hole_optimization(vm_map_t map
)
1209 vm_map_entry_t head_entry
, hole_entry
, next_hole_entry
;
1211 if (map
->holelistenabled
) {
1212 head_entry
= hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
1214 while (hole_entry
!= NULL
) {
1215 next_hole_entry
= hole_entry
->vme_next
;
1217 hole_entry
->vme_next
= NULL
;
1218 hole_entry
->vme_prev
= NULL
;
1219 zfree(vm_map_holes_zone
, hole_entry
);
1221 if (next_hole_entry
== head_entry
) {
1224 hole_entry
= next_hole_entry
;
1228 map
->holes_list
= NULL
;
1229 map
->holelistenabled
= FALSE
;
1231 map
->first_free
= vm_map_first_entry(map
);
1232 SAVE_HINT_HOLE_WRITE(map
, NULL
);
1237 vm_kernel_map_is_kernel(vm_map_t map
)
1239 return map
->pmap
== kernel_pmap
;
1245 * Creates and returns a new empty VM map with
1246 * the given physical map structure, and having
1247 * the given lower and upper address bounds.
1253 vm_map_offset_t min
,
1254 vm_map_offset_t max
,
1261 options
|= VM_MAP_CREATE_PAGEABLE
;
1263 return vm_map_create_options(pmap
, min
, max
, options
);
1267 vm_map_create_options(
1269 vm_map_offset_t min
,
1270 vm_map_offset_t max
,
1274 struct vm_map_links
*hole_entry
= NULL
;
1276 if (options
& ~(VM_MAP_CREATE_ALL_OPTIONS
)) {
1277 /* unknown option */
1281 result
= (vm_map_t
) zalloc(vm_map_zone
);
1282 if (result
== VM_MAP_NULL
) {
1283 panic("vm_map_create");
1286 vm_map_first_entry(result
) = vm_map_to_entry(result
);
1287 vm_map_last_entry(result
) = vm_map_to_entry(result
);
1288 result
->hdr
.nentries
= 0;
1289 if (options
& VM_MAP_CREATE_PAGEABLE
) {
1290 result
->hdr
.entries_pageable
= TRUE
;
1292 result
->hdr
.entries_pageable
= FALSE
;
1295 vm_map_store_init( &(result
->hdr
));
1297 result
->hdr
.page_shift
= PAGE_SHIFT
;
1300 result
->user_wire_limit
= MACH_VM_MAX_ADDRESS
; /* default limit is unlimited */
1301 result
->user_wire_size
= 0;
1302 #if XNU_TARGET_OS_OSX
1303 result
->vmmap_high_start
= 0;
1305 os_ref_init_count(&result
->map_refcnt
, &map_refgrp
, 1);
1307 result
->res_count
= 1;
1308 result
->sw_state
= MAP_SW_IN
;
1309 #endif /* TASK_SWAPPER */
1310 result
->pmap
= pmap
;
1311 result
->min_offset
= min
;
1312 result
->max_offset
= max
;
1313 result
->wiring_required
= FALSE
;
1314 result
->no_zero_fill
= FALSE
;
1315 result
->mapped_in_other_pmaps
= FALSE
;
1316 result
->wait_for_space
= FALSE
;
1317 result
->switch_protect
= FALSE
;
1318 result
->disable_vmentry_reuse
= FALSE
;
1319 result
->map_disallow_data_exec
= FALSE
;
1320 result
->is_nested_map
= FALSE
;
1321 result
->map_disallow_new_exec
= FALSE
;
1322 result
->terminated
= FALSE
;
1323 result
->cs_enforcement
= FALSE
;
1324 result
->highest_entry_end
= 0;
1325 result
->first_free
= vm_map_to_entry(result
);
1326 result
->hint
= vm_map_to_entry(result
);
1327 result
->jit_entry_exists
= FALSE
;
1328 result
->is_alien
= FALSE
;
1329 result
->reserved_regions
= FALSE
;
1331 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1332 if (options
& VM_MAP_CREATE_CORPSE_FOOTPRINT
) {
1333 result
->has_corpse_footprint
= TRUE
;
1334 result
->holelistenabled
= FALSE
;
1335 result
->vmmap_corpse_footprint
= NULL
;
1337 result
->has_corpse_footprint
= FALSE
;
1338 if (vm_map_supports_hole_optimization
) {
1339 hole_entry
= zalloc(vm_map_holes_zone
);
1341 hole_entry
->start
= min
;
1342 #if defined(__arm__) || defined(__arm64__)
1343 hole_entry
->end
= result
->max_offset
;
1345 hole_entry
->end
= (max
> (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
) ? max
: (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
;
1347 result
->holes_list
= result
->hole_hint
= hole_entry
;
1348 hole_entry
->prev
= hole_entry
->next
= CAST_TO_VM_MAP_ENTRY(hole_entry
);
1349 result
->holelistenabled
= TRUE
;
1351 result
->holelistenabled
= FALSE
;
1355 vm_map_lock_init(result
);
1356 lck_mtx_init_ext(&result
->s_lock
, &result
->s_lock_ext
, &vm_map_lck_grp
, &vm_map_lck_attr
);
1362 vm_map_adjusted_size(vm_map_t map
)
1364 struct vm_reserved_region
*regions
= NULL
;
1365 size_t num_regions
= 0;
1366 mach_vm_size_t reserved_size
= 0, map_size
= 0;
1368 if (map
== NULL
|| (map
->size
== 0)) {
1372 map_size
= map
->size
;
1374 if (map
->reserved_regions
== FALSE
|| !vm_map_is_exotic(map
) || map
->terminated
) {
1376 * No special reserved regions or not an exotic map or the task
1377 * is terminating and these special regions might have already
1383 num_regions
= ml_get_vm_reserved_regions(vm_map_is_64bit(map
), ®ions
);
1384 assert((num_regions
== 0) || (num_regions
> 0 && regions
!= NULL
));
1386 while (num_regions
) {
1387 reserved_size
+= regions
[--num_regions
].vmrr_size
;
1391 * There are a few places where the map is being switched out due to
1392 * 'termination' without that bit being set (e.g. exec and corpse purging).
1393 * In those cases, we could have the map's regions being deallocated on
1394 * a core while some accounting process is trying to get the map's size.
1395 * So this assert can't be enabled till all those places are uniform in
1396 * their use of the 'map->terminated' bit.
1398 * assert(map_size >= reserved_size);
1401 return (map_size
>= reserved_size
) ? (map_size
- reserved_size
) : map_size
;
1405 * vm_map_entry_create: [ internal use only ]
1407 * Allocates a VM map entry for insertion in the
1408 * given map (or map copy). No fields are filled.
1410 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1412 #define vm_map_copy_entry_create(copy, map_locked) \
1413 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1414 unsigned reserved_zalloc_count
, nonreserved_zalloc_count
;
1416 static vm_map_entry_t
1417 _vm_map_entry_create(
1418 struct vm_map_header
*map_header
, boolean_t __unused map_locked
)
1421 vm_map_entry_t entry
;
1423 zone
= vm_map_entry_zone
;
1425 assert(map_header
->entries_pageable
? !map_locked
: TRUE
);
1427 if (map_header
->entries_pageable
) {
1428 entry
= (vm_map_entry_t
) zalloc(zone
);
1430 entry
= (vm_map_entry_t
) zalloc_noblock(zone
);
1432 if (entry
== VM_MAP_ENTRY_NULL
) {
1433 zone
= vm_map_entry_reserved_zone
;
1434 entry
= (vm_map_entry_t
) zalloc(zone
);
1435 OSAddAtomic(1, &reserved_zalloc_count
);
1437 OSAddAtomic(1, &nonreserved_zalloc_count
);
1441 if (entry
== VM_MAP_ENTRY_NULL
) {
1442 panic("vm_map_entry_create");
1444 *entry
= vm_map_entry_template
;
1445 entry
->from_reserved_zone
= (zone
== vm_map_entry_reserved_zone
);
1447 vm_map_store_update((vm_map_t
) NULL
, entry
, VM_MAP_ENTRY_CREATE
);
1448 #if MAP_ENTRY_CREATION_DEBUG
1449 entry
->vme_creation_maphdr
= map_header
;
1450 backtrace(&entry
->vme_creation_bt
[0],
1451 (sizeof(entry
->vme_creation_bt
) / sizeof(uintptr_t)), NULL
);
1457 * vm_map_entry_dispose: [ internal use only ]
1459 * Inverse of vm_map_entry_create.
1461 * write map lock held so no need to
1462 * do anything special to insure correctness
1465 #define vm_map_entry_dispose(map, entry) \
1466 _vm_map_entry_dispose(&(map)->hdr, (entry))
1468 #define vm_map_copy_entry_dispose(copy, entry) \
1469 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1472 _vm_map_entry_dispose(
1473 struct vm_map_header
*map_header
,
1474 vm_map_entry_t entry
)
1478 if (map_header
->entries_pageable
|| !(entry
->from_reserved_zone
)) {
1479 zone
= vm_map_entry_zone
;
1481 zone
= vm_map_entry_reserved_zone
;
1484 if (!map_header
->entries_pageable
) {
1485 if (zone
== vm_map_entry_zone
) {
1486 OSAddAtomic(-1, &nonreserved_zalloc_count
);
1488 OSAddAtomic(-1, &reserved_zalloc_count
);
1496 static boolean_t first_free_check
= FALSE
;
1498 first_free_is_valid(
1501 if (!first_free_check
) {
1505 return first_free_is_valid_store( map
);
1507 #endif /* MACH_ASSERT */
1510 #define vm_map_copy_entry_link(copy, after_where, entry) \
1511 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1513 #define vm_map_copy_entry_unlink(copy, entry) \
1514 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1516 #if MACH_ASSERT && TASK_SWAPPER
1518 * vm_map_res_reference:
1520 * Adds another valid residence count to the given map.
1522 * Map is locked so this function can be called from
1527 vm_map_res_reference(vm_map_t map
)
1529 /* assert map is locked */
1530 assert(map
->res_count
>= 0);
1531 assert(os_ref_get_count(&map
->map_refcnt
) >= map
->res_count
);
1532 if (map
->res_count
== 0) {
1533 lck_mtx_unlock(&map
->s_lock
);
1536 lck_mtx_lock(&map
->s_lock
);
1545 * vm_map_reference_swap:
1547 * Adds valid reference and residence counts to the given map.
1549 * The map may not be in memory (i.e. zero residence count).
1553 vm_map_reference_swap(vm_map_t map
)
1555 assert(map
!= VM_MAP_NULL
);
1556 lck_mtx_lock(&map
->s_lock
);
1557 assert(map
->res_count
>= 0);
1558 assert(os_ref_get_count(&map
->map_refcnt
) >= map
->res_count
);
1559 os_ref_retain_locked(&map
->map_refcnt
);
1560 vm_map_res_reference(map
);
1561 lck_mtx_unlock(&map
->s_lock
);
1565 * vm_map_res_deallocate:
1567 * Decrement residence count on a map; possibly causing swapout.
1569 * The map must be in memory (i.e. non-zero residence count).
1571 * The map is locked, so this function is callable from vm_map_deallocate.
1575 vm_map_res_deallocate(vm_map_t map
)
1577 assert(map
->res_count
> 0);
1578 if (--map
->res_count
== 0) {
1579 lck_mtx_unlock(&map
->s_lock
);
1581 vm_map_swapout(map
);
1583 lck_mtx_lock(&map
->s_lock
);
1585 assert(os_ref_get_count(&map
->map_refcnt
) >= map
->res_count
);
1587 #endif /* MACH_ASSERT && TASK_SWAPPER */
1592 * Actually destroy a map.
1601 /* final cleanup: no need to unnest shared region */
1602 flags
|= VM_MAP_REMOVE_NO_UNNESTING
;
1603 /* final cleanup: ok to remove immutable mappings */
1604 flags
|= VM_MAP_REMOVE_IMMUTABLE
;
1605 /* final cleanup: allow gaps in range */
1606 flags
|= VM_MAP_REMOVE_GAPS_OK
;
1608 /* clean up regular map entries */
1609 (void) vm_map_delete(map
, map
->min_offset
, map
->max_offset
,
1610 flags
, VM_MAP_NULL
);
1611 /* clean up leftover special mappings (commpage, GPU carveout, etc...) */
1612 #if !defined(__arm__)
1613 (void) vm_map_delete(map
, 0x0, 0xFFFFFFFFFFFFF000ULL
,
1614 flags
, VM_MAP_NULL
);
1615 #endif /* !__arm__ */
1617 vm_map_disable_hole_optimization(map
);
1618 vm_map_corpse_footprint_destroy(map
);
1622 assert(map
->hdr
.nentries
== 0);
1625 pmap_destroy(map
->pmap
);
1628 if (vm_map_lck_attr
.lck_attr_val
& LCK_ATTR_DEBUG
) {
1630 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1631 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1632 * structure or kalloc'ed via lck_mtx_init.
1633 * An example is s_lock_ext within struct _vm_map.
1635 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1636 * can add another tag to detect embedded vs alloc'ed indirect external
1637 * mutexes but that'll be additional checks in the lock path and require
1638 * updating dependencies for the old vs new tag.
1640 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1641 * just when lock debugging is ON, we choose to forego explicitly destroying
1642 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1643 * count on vm_map_lck_grp, which has no serious side-effect.
1646 lck_rw_destroy(&(map
)->lock
, &vm_map_lck_grp
);
1647 lck_mtx_destroy(&(map
)->s_lock
, &vm_map_lck_grp
);
1650 zfree(vm_map_zone
, map
);
1654 * Returns pid of the task with the largest number of VM map entries.
1655 * Used in the zone-map-exhaustion jetsam path.
1658 find_largest_process_vm_map_entries(void)
1660 pid_t victim_pid
= -1;
1661 int max_vm_map_entries
= 0;
1662 task_t task
= TASK_NULL
;
1663 queue_head_t
*task_list
= &tasks
;
1665 lck_mtx_lock(&tasks_threads_lock
);
1666 queue_iterate(task_list
, task
, task_t
, tasks
) {
1667 if (task
== kernel_task
|| !task
->active
) {
1671 vm_map_t task_map
= task
->map
;
1672 if (task_map
!= VM_MAP_NULL
) {
1673 int task_vm_map_entries
= task_map
->hdr
.nentries
;
1674 if (task_vm_map_entries
> max_vm_map_entries
) {
1675 max_vm_map_entries
= task_vm_map_entries
;
1676 victim_pid
= pid_from_task(task
);
1680 lck_mtx_unlock(&tasks_threads_lock
);
1682 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid
, max_vm_map_entries
);
1688 * vm_map_swapin/vm_map_swapout
1690 * Swap a map in and out, either referencing or releasing its resources.
1691 * These functions are internal use only; however, they must be exported
1692 * because they may be called from macros, which are exported.
1694 * In the case of swapout, there could be races on the residence count,
1695 * so if the residence count is up, we return, assuming that a
1696 * vm_map_deallocate() call in the near future will bring us back.
1699 * -- We use the map write lock for synchronization among races.
1700 * -- The map write lock, and not the simple s_lock, protects the
1701 * swap state of the map.
1702 * -- If a map entry is a share map, then we hold both locks, in
1703 * hierarchical order.
1705 * Synchronization Notes:
1706 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1707 * will block on the map lock and proceed when swapout is through.
1708 * 2) A vm_map_reference() call at this time is illegal, and will
1709 * cause a panic. vm_map_reference() is only allowed on resident
1710 * maps, since it refuses to block.
1711 * 3) A vm_map_swapin() call during a swapin will block, and
1712 * proceeed when the first swapin is done, turning into a nop.
1713 * This is the reason the res_count is not incremented until
1714 * after the swapin is complete.
1715 * 4) There is a timing hole after the checks of the res_count, before
1716 * the map lock is taken, during which a swapin may get the lock
1717 * before a swapout about to happen. If this happens, the swapin
1718 * will detect the state and increment the reference count, causing
1719 * the swapout to be a nop, thereby delaying it until a later
1720 * vm_map_deallocate. If the swapout gets the lock first, then
1721 * the swapin will simply block until the swapout is done, and
1724 * Because vm_map_swapin() is potentially an expensive operation, it
1725 * should be used with caution.
1728 * 1) A map with a residence count of zero is either swapped, or
1730 * 2) A map with a non-zero residence count is either resident,
1731 * or being swapped in.
1734 int vm_map_swap_enable
= 1;
1737 vm_map_swapin(vm_map_t map
)
1739 vm_map_entry_t entry
;
1741 if (!vm_map_swap_enable
) { /* debug */
1747 * First deal with various races.
1749 if (map
->sw_state
== MAP_SW_IN
) {
1751 * we raced with swapout and won. Returning will incr.
1752 * the res_count, turning the swapout into a nop.
1758 * The residence count must be zero. If we raced with another
1759 * swapin, the state would have been IN; if we raced with a
1760 * swapout (after another competing swapin), we must have lost
1761 * the race to get here (see above comment), in which case
1762 * res_count is still 0.
1764 assert(map
->res_count
== 0);
1767 * There are no intermediate states of a map going out or
1768 * coming in, since the map is locked during the transition.
1770 assert(map
->sw_state
== MAP_SW_OUT
);
1773 * We now operate upon each map entry. If the entry is a sub-
1774 * or share-map, we call vm_map_res_reference upon it.
1775 * If the entry is an object, we call vm_object_res_reference
1776 * (this may iterate through the shadow chain).
1777 * Note that we hold the map locked the entire time,
1778 * even if we get back here via a recursive call in
1779 * vm_map_res_reference.
1781 entry
= vm_map_first_entry(map
);
1783 while (entry
!= vm_map_to_entry(map
)) {
1784 if (VME_OBJECT(entry
) != VM_OBJECT_NULL
) {
1785 if (entry
->is_sub_map
) {
1786 vm_map_t lmap
= VME_SUBMAP(entry
);
1787 lck_mtx_lock(&lmap
->s_lock
);
1788 vm_map_res_reference(lmap
);
1789 lck_mtx_unlock(&lmap
->s_lock
);
1791 vm_object_t object
= VME_OBEJCT(entry
);
1792 vm_object_lock(object
);
1794 * This call may iterate through the
1797 vm_object_res_reference(object
);
1798 vm_object_unlock(object
);
1801 entry
= entry
->vme_next
;
1803 assert(map
->sw_state
== MAP_SW_OUT
);
1804 map
->sw_state
= MAP_SW_IN
;
1808 vm_map_swapout(vm_map_t map
)
1810 vm_map_entry_t entry
;
1814 * First deal with various races.
1815 * If we raced with a swapin and lost, the residence count
1816 * will have been incremented to 1, and we simply return.
1818 lck_mtx_lock(&map
->s_lock
);
1819 if (map
->res_count
!= 0) {
1820 lck_mtx_unlock(&map
->s_lock
);
1823 lck_mtx_unlock(&map
->s_lock
);
1826 * There are no intermediate states of a map going out or
1827 * coming in, since the map is locked during the transition.
1829 assert(map
->sw_state
== MAP_SW_IN
);
1831 if (!vm_map_swap_enable
) {
1836 * We now operate upon each map entry. If the entry is a sub-
1837 * or share-map, we call vm_map_res_deallocate upon it.
1838 * If the entry is an object, we call vm_object_res_deallocate
1839 * (this may iterate through the shadow chain).
1840 * Note that we hold the map locked the entire time,
1841 * even if we get back here via a recursive call in
1842 * vm_map_res_deallocate.
1844 entry
= vm_map_first_entry(map
);
1846 while (entry
!= vm_map_to_entry(map
)) {
1847 if (VME_OBJECT(entry
) != VM_OBJECT_NULL
) {
1848 if (entry
->is_sub_map
) {
1849 vm_map_t lmap
= VME_SUBMAP(entry
);
1850 lck_mtx_lock(&lmap
->s_lock
);
1851 vm_map_res_deallocate(lmap
);
1852 lck_mtx_unlock(&lmap
->s_lock
);
1854 vm_object_t object
= VME_OBJECT(entry
);
1855 vm_object_lock(object
);
1857 * This call may take a long time,
1858 * since it could actively push
1859 * out pages (if we implement it
1862 vm_object_res_deallocate(object
);
1863 vm_object_unlock(object
);
1866 entry
= entry
->vme_next
;
1868 assert(map
->sw_state
== MAP_SW_IN
);
1869 map
->sw_state
= MAP_SW_OUT
;
1872 #endif /* TASK_SWAPPER */
1875 * vm_map_lookup_entry: [ internal use only ]
1877 * Calls into the vm map store layer to find the map
1878 * entry containing (or immediately preceding) the
1879 * specified address in the given map; the entry is returned
1880 * in the "entry" parameter. The boolean
1881 * result indicates whether the address is
1882 * actually contained in the map.
1885 vm_map_lookup_entry(
1887 vm_map_offset_t address
,
1888 vm_map_entry_t
*entry
) /* OUT */
1890 return vm_map_store_lookup_entry( map
, address
, entry
);
1894 * Routine: vm_map_find_space
1896 * Allocate a range in the specified virtual address map,
1897 * returning the entry allocated for that range.
1898 * Used by kmem_alloc, etc.
1900 * The map must be NOT be locked. It will be returned locked
1901 * on KERN_SUCCESS, unlocked on failure.
1903 * If an entry is allocated, the object/offset fields
1904 * are initialized to zero.
1906 * If VM_MAP_FIND_LAST_FREE flag is set, allocate from end of map. This
1907 * is currently only used for allocating memory for zones backing
1908 * one of the kalloc heaps.(rdar://65832263)
1913 vm_map_offset_t
*address
, /* OUT */
1915 vm_map_offset_t mask
,
1917 vm_map_kernel_flags_t vmk_flags
,
1919 vm_map_entry_t
*o_entry
) /* OUT */
1921 vm_map_entry_t entry
, new_entry
, hole_entry
;
1922 vm_map_offset_t start
;
1923 vm_map_offset_t end
;
1927 return KERN_INVALID_ARGUMENT
;
1930 new_entry
= vm_map_entry_create(map
, FALSE
);
1933 if (flags
& VM_MAP_FIND_LAST_FREE
) {
1934 assert(!map
->disable_vmentry_reuse
);
1935 /* TODO: Make backward lookup generic and support guard pages */
1936 assert(!vmk_flags
.vmkf_guard_after
&& !vmk_flags
.vmkf_guard_before
);
1937 assert(VM_MAP_PAGE_ALIGNED(size
, VM_MAP_PAGE_MASK(map
)));
1939 /* Allocate space from end of map */
1940 vm_map_store_find_last_free(map
, &entry
);
1946 if (entry
== vm_map_to_entry(map
)) {
1947 end
= map
->max_offset
;
1949 end
= entry
->vme_start
;
1953 vm_map_entry_t prev
;
1957 if ((start
< map
->min_offset
) || end
< start
) {
1961 prev
= entry
->vme_prev
;
1964 if (prev
== vm_map_to_entry(map
)) {
1968 if (prev
->vme_end
<= start
) {
1973 * Didn't fit -- move to the next entry.
1976 end
= entry
->vme_start
;
1979 if (vmk_flags
.vmkf_guard_after
) {
1980 /* account for the back guard page in the size */
1981 size
+= VM_MAP_PAGE_SIZE(map
);
1985 * Look for the first possible address; if there's already
1986 * something at this address, we have to start after it.
1989 if (map
->disable_vmentry_reuse
== TRUE
) {
1990 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
1992 if (map
->holelistenabled
) {
1993 hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
1995 if (hole_entry
== NULL
) {
1997 * No more space in the map?
2003 start
= entry
->vme_start
;
2005 assert(first_free_is_valid(map
));
2006 if ((entry
= map
->first_free
) == vm_map_to_entry(map
)) {
2007 start
= map
->min_offset
;
2009 start
= entry
->vme_end
;
2015 * In any case, the "entry" always precedes
2016 * the proposed new region throughout the loop:
2020 vm_map_entry_t next
;
2023 * Find the end of the proposed new region.
2024 * Be sure we didn't go beyond the end, or
2025 * wrap around the address.
2028 if (vmk_flags
.vmkf_guard_before
) {
2029 /* reserve space for the front guard page */
2030 start
+= VM_MAP_PAGE_SIZE(map
);
2032 end
= ((start
+ mask
) & ~mask
);
2038 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
2040 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
2042 if ((end
> map
->max_offset
) || (end
< start
)) {
2046 next
= entry
->vme_next
;
2048 if (map
->holelistenabled
) {
2049 if (entry
->vme_end
>= end
) {
2054 * If there are no more entries, we must win.
2058 * If there is another entry, it must be
2059 * after the end of the potential new region.
2062 if (next
== vm_map_to_entry(map
)) {
2066 if (next
->vme_start
>= end
) {
2072 * Didn't fit -- move to the next entry.
2077 if (map
->holelistenabled
) {
2078 if (entry
== CAST_TO_VM_MAP_ENTRY(map
->holes_list
)) {
2084 start
= entry
->vme_start
;
2086 start
= entry
->vme_end
;
2090 if (vmk_flags
.vmkf_guard_before
) {
2091 /* go back for the front guard page */
2092 start
-= VM_MAP_PAGE_SIZE(map
);
2096 if (map
->holelistenabled
) {
2097 if (vm_map_lookup_entry(map
, entry
->vme_start
, &entry
)) {
2098 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry
, (unsigned long long)entry
->vme_start
);
2104 * "start" and "end" should define the endpoints of the
2105 * available new range, and
2106 * "entry" should refer to the region before the new
2109 * the map should be locked.
2114 assert(start
< end
);
2115 new_entry
->vme_start
= start
;
2116 new_entry
->vme_end
= end
;
2117 assert(page_aligned(new_entry
->vme_start
));
2118 assert(page_aligned(new_entry
->vme_end
));
2119 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_start
,
2120 VM_MAP_PAGE_MASK(map
)));
2121 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_end
,
2122 VM_MAP_PAGE_MASK(map
)));
2124 new_entry
->is_shared
= FALSE
;
2125 new_entry
->is_sub_map
= FALSE
;
2126 new_entry
->use_pmap
= TRUE
;
2127 VME_OBJECT_SET(new_entry
, VM_OBJECT_NULL
);
2128 VME_OFFSET_SET(new_entry
, (vm_object_offset_t
) 0);
2130 new_entry
->needs_copy
= FALSE
;
2132 new_entry
->inheritance
= VM_INHERIT_DEFAULT
;
2133 new_entry
->protection
= VM_PROT_DEFAULT
;
2134 new_entry
->max_protection
= VM_PROT_ALL
;
2135 new_entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
2136 new_entry
->wired_count
= 0;
2137 new_entry
->user_wired_count
= 0;
2139 new_entry
->in_transition
= FALSE
;
2140 new_entry
->needs_wakeup
= FALSE
;
2141 new_entry
->no_cache
= FALSE
;
2142 new_entry
->permanent
= FALSE
;
2143 new_entry
->superpage_size
= FALSE
;
2144 if (VM_MAP_PAGE_SHIFT(map
) != PAGE_SHIFT
) {
2145 new_entry
->map_aligned
= TRUE
;
2147 new_entry
->map_aligned
= FALSE
;
2150 new_entry
->used_for_jit
= FALSE
;
2151 new_entry
->pmap_cs_associated
= FALSE
;
2152 new_entry
->zero_wired_pages
= FALSE
;
2153 new_entry
->iokit_acct
= FALSE
;
2154 new_entry
->vme_resilient_codesign
= FALSE
;
2155 new_entry
->vme_resilient_media
= FALSE
;
2156 if (vmk_flags
.vmkf_atomic_entry
) {
2157 new_entry
->vme_atomic
= TRUE
;
2159 new_entry
->vme_atomic
= FALSE
;
2162 VME_ALIAS_SET(new_entry
, tag
);
2165 * Insert the new entry into the list
2168 vm_map_store_entry_link(map
, entry
, new_entry
, VM_MAP_KERNEL_FLAGS_NONE
);
2173 * Update the lookup hint
2175 SAVE_HINT_MAP_WRITE(map
, new_entry
);
2177 *o_entry
= new_entry
;
2178 return KERN_SUCCESS
;
2182 vm_map_entry_dispose(map
, new_entry
);
2184 return KERN_NO_SPACE
;
2187 int vm_map_pmap_enter_print
= FALSE
;
2188 int vm_map_pmap_enter_enable
= FALSE
;
2191 * Routine: vm_map_pmap_enter [internal only]
2194 * Force pages from the specified object to be entered into
2195 * the pmap at the specified address if they are present.
2196 * As soon as a page not found in the object the scan ends.
2201 * In/out conditions:
2202 * The source map should not be locked on entry.
2204 __unused
static void
2207 vm_map_offset_t addr
,
2208 vm_map_offset_t end_addr
,
2210 vm_object_offset_t offset
,
2211 vm_prot_t protection
)
2215 struct vm_object_fault_info fault_info
= {};
2217 if (map
->pmap
== 0) {
2221 assert(VM_MAP_PAGE_SHIFT(map
) == PAGE_SHIFT
);
2223 while (addr
< end_addr
) {
2229 * From vm_map_enter(), we come into this function without the map
2230 * lock held or the object lock held.
2231 * We haven't taken a reference on the object either.
2232 * We should do a proper lookup on the map to make sure
2233 * that things are sane before we go locking objects that
2234 * could have been deallocated from under us.
2237 vm_object_lock(object
);
2239 m
= vm_page_lookup(object
, offset
);
2241 if (m
== VM_PAGE_NULL
|| m
->vmp_busy
|| m
->vmp_fictitious
||
2242 (m
->vmp_unusual
&& (m
->vmp_error
|| m
->vmp_restart
|| m
->vmp_absent
))) {
2243 vm_object_unlock(object
);
2247 if (vm_map_pmap_enter_print
) {
2248 printf("vm_map_pmap_enter:");
2249 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2250 map
, (unsigned long long)addr
, object
, (unsigned long long)offset
);
2252 type_of_fault
= DBG_CACHE_HIT_FAULT
;
2253 kr
= vm_fault_enter(m
, map
->pmap
,
2256 protection
, protection
,
2258 FALSE
, /* change_wiring */
2259 VM_KERN_MEMORY_NONE
, /* tag - not wiring */
2261 NULL
, /* need_retry */
2264 vm_object_unlock(object
);
2266 offset
+= PAGE_SIZE_64
;
2271 boolean_t
vm_map_pmap_is_empty(
2273 vm_map_offset_t start
,
2274 vm_map_offset_t end
);
2276 vm_map_pmap_is_empty(
2278 vm_map_offset_t start
,
2279 vm_map_offset_t end
)
2281 #ifdef MACHINE_PMAP_IS_EMPTY
2282 return pmap_is_empty(map
->pmap
, start
, end
);
2283 #else /* MACHINE_PMAP_IS_EMPTY */
2284 vm_map_offset_t offset
;
2287 if (map
->pmap
== NULL
) {
2291 for (offset
= start
;
2293 offset
+= PAGE_SIZE
) {
2294 phys_page
= pmap_find_phys(map
->pmap
, offset
);
2296 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
2297 "page %d at 0x%llx\n",
2298 map
, (long long)start
, (long long)end
,
2299 phys_page
, (long long)offset
);
2304 #endif /* MACHINE_PMAP_IS_EMPTY */
2307 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2309 vm_map_random_address_for_size(
2311 vm_map_offset_t
*address
,
2314 kern_return_t kr
= KERN_SUCCESS
;
2316 vm_map_offset_t random_addr
= 0;
2317 vm_map_offset_t hole_end
;
2319 vm_map_entry_t next_entry
= VM_MAP_ENTRY_NULL
;
2320 vm_map_entry_t prev_entry
= VM_MAP_ENTRY_NULL
;
2321 vm_map_size_t vm_hole_size
= 0;
2322 vm_map_size_t addr_space_size
;
2324 addr_space_size
= vm_map_max(map
) - vm_map_min(map
);
2326 assert(VM_MAP_PAGE_ALIGNED(size
, VM_MAP_PAGE_MASK(map
)));
2328 while (tries
< MAX_TRIES_TO_GET_RANDOM_ADDRESS
) {
2329 random_addr
= ((vm_map_offset_t
)random()) << VM_MAP_PAGE_SHIFT(map
);
2330 random_addr
= vm_map_trunc_page(
2331 vm_map_min(map
) + (random_addr
% addr_space_size
),
2332 VM_MAP_PAGE_MASK(map
));
2334 if (vm_map_lookup_entry(map
, random_addr
, &prev_entry
) == FALSE
) {
2335 if (prev_entry
== vm_map_to_entry(map
)) {
2336 next_entry
= vm_map_first_entry(map
);
2338 next_entry
= prev_entry
->vme_next
;
2340 if (next_entry
== vm_map_to_entry(map
)) {
2341 hole_end
= vm_map_max(map
);
2343 hole_end
= next_entry
->vme_start
;
2345 vm_hole_size
= hole_end
- random_addr
;
2346 if (vm_hole_size
>= size
) {
2347 *address
= random_addr
;
2354 if (tries
== MAX_TRIES_TO_GET_RANDOM_ADDRESS
) {
2361 vm_memory_malloc_no_cow(
2364 uint64_t alias_mask
;
2370 alias_mask
= 1ULL << alias
;
2371 if (alias_mask
& vm_memory_malloc_no_cow_mask
) {
2378 * Routine: vm_map_enter
2381 * Allocate a range in the specified virtual address map.
2382 * The resulting range will refer to memory defined by
2383 * the given memory object and offset into that object.
2385 * Arguments are as defined in the vm_map call.
2387 static unsigned int vm_map_enter_restore_successes
= 0;
2388 static unsigned int vm_map_enter_restore_failures
= 0;
2392 vm_map_offset_t
*address
, /* IN/OUT */
2394 vm_map_offset_t mask
,
2396 vm_map_kernel_flags_t vmk_flags
,
2399 vm_object_offset_t offset
,
2400 boolean_t needs_copy
,
2401 vm_prot_t cur_protection
,
2402 vm_prot_t max_protection
,
2403 vm_inherit_t inheritance
)
2405 vm_map_entry_t entry
, new_entry
;
2406 vm_map_offset_t start
, tmp_start
, tmp_offset
;
2407 vm_map_offset_t end
, tmp_end
;
2408 vm_map_offset_t tmp2_start
, tmp2_end
;
2409 vm_map_offset_t desired_empty_end
;
2410 vm_map_offset_t step
;
2411 kern_return_t result
= KERN_SUCCESS
;
2412 vm_map_t zap_old_map
= VM_MAP_NULL
;
2413 vm_map_t zap_new_map
= VM_MAP_NULL
;
2414 boolean_t map_locked
= FALSE
;
2415 boolean_t pmap_empty
= TRUE
;
2416 boolean_t new_mapping_established
= FALSE
;
2417 boolean_t keep_map_locked
= vmk_flags
.vmkf_keep_map_locked
;
2418 boolean_t anywhere
= ((flags
& VM_FLAGS_ANYWHERE
) != 0);
2419 boolean_t purgable
= ((flags
& VM_FLAGS_PURGABLE
) != 0);
2420 boolean_t overwrite
= ((flags
& VM_FLAGS_OVERWRITE
) != 0);
2421 boolean_t no_cache
= ((flags
& VM_FLAGS_NO_CACHE
) != 0);
2422 boolean_t is_submap
= vmk_flags
.vmkf_submap
;
2423 boolean_t permanent
= vmk_flags
.vmkf_permanent
;
2424 boolean_t no_copy_on_read
= vmk_flags
.vmkf_no_copy_on_read
;
2425 boolean_t entry_for_jit
= vmk_flags
.vmkf_map_jit
;
2426 boolean_t iokit_acct
= vmk_flags
.vmkf_iokit_acct
;
2427 boolean_t translated_allow_execute
= vmk_flags
.vmkf_translated_allow_execute
;
2428 boolean_t resilient_codesign
= ((flags
& VM_FLAGS_RESILIENT_CODESIGN
) != 0);
2429 boolean_t resilient_media
= ((flags
& VM_FLAGS_RESILIENT_MEDIA
) != 0);
2430 boolean_t random_address
= ((flags
& VM_FLAGS_RANDOM_ADDR
) != 0);
2431 unsigned int superpage_size
= ((flags
& VM_FLAGS_SUPERPAGE_MASK
) >> VM_FLAGS_SUPERPAGE_SHIFT
);
2432 vm_tag_t user_alias
;
2433 vm_map_offset_t effective_min_offset
, effective_max_offset
;
2435 boolean_t clear_map_aligned
= FALSE
;
2436 vm_map_entry_t hole_entry
;
2437 vm_map_size_t chunk_size
= 0;
2439 assertf(vmk_flags
.__vmkf_unused
== 0, "vmk_flags unused=0x%x\n", vmk_flags
.__vmkf_unused
);
2441 if (flags
& VM_FLAGS_4GB_CHUNK
) {
2442 #if defined(__LP64__)
2443 chunk_size
= (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2444 #else /* __LP64__ */
2445 chunk_size
= ANON_CHUNK_SIZE
;
2446 #endif /* __LP64__ */
2448 chunk_size
= ANON_CHUNK_SIZE
;
2451 if (superpage_size
) {
2452 switch (superpage_size
) {
2454 * Note that the current implementation only supports
2455 * a single size for superpages, SUPERPAGE_SIZE, per
2456 * architecture. As soon as more sizes are supposed
2457 * to be supported, SUPERPAGE_SIZE has to be replaced
2458 * with a lookup of the size depending on superpage_size.
2461 case SUPERPAGE_SIZE_ANY
:
2462 /* handle it like 2 MB and round up to page size */
2463 size
= (size
+ 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2465 case SUPERPAGE_SIZE_2MB
:
2469 return KERN_INVALID_ARGUMENT
;
2471 mask
= SUPERPAGE_SIZE
- 1;
2472 if (size
& (SUPERPAGE_SIZE
- 1)) {
2473 return KERN_INVALID_ARGUMENT
;
2475 inheritance
= VM_INHERIT_NONE
; /* fork() children won't inherit superpages */
2479 if ((cur_protection
& VM_PROT_WRITE
) &&
2480 (cur_protection
& VM_PROT_EXECUTE
) &&
2481 #if XNU_TARGET_OS_OSX
2482 map
->pmap
!= kernel_pmap
&&
2483 (cs_process_global_enforcement() ||
2484 (vmk_flags
.vmkf_cs_enforcement_override
2485 ? vmk_flags
.vmkf_cs_enforcement
2486 : (vm_map_cs_enforcement(map
)
2488 || !VM_MAP_IS_EXOTIC(map
)
2489 #endif /* __arm64__ */
2491 #endif /* XNU_TARGET_OS_OSX */
2493 !pmap_cs_exempt(map
->pmap
) &&
2495 (VM_MAP_POLICY_WX_FAIL(map
) ||
2496 VM_MAP_POLICY_WX_STRIP_X(map
)) &&
2498 boolean_t vm_protect_wx_fail
= VM_MAP_POLICY_WX_FAIL(map
);
2503 vm_prot_t
, cur_protection
);
2504 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. %s\n",
2506 (current_task()->bsd_info
2507 ? proc_name_address(current_task()->bsd_info
)
2510 (vm_protect_wx_fail
? "failing" : "turning off execute"));
2511 cur_protection
&= ~VM_PROT_EXECUTE
;
2512 if (vm_protect_wx_fail
) {
2513 return KERN_PROTECTION_FAILURE
;
2518 * If the task has requested executable lockdown,
2519 * deny any new executable mapping.
2521 if (map
->map_disallow_new_exec
== TRUE
) {
2522 if (cur_protection
& VM_PROT_EXECUTE
) {
2523 return KERN_PROTECTION_FAILURE
;
2527 if (resilient_codesign
) {
2529 int reject_prot
= (needs_copy
? VM_PROT_EXECUTE
: (VM_PROT_WRITE
| VM_PROT_EXECUTE
));
2530 if ((cur_protection
| max_protection
) & reject_prot
) {
2531 return KERN_PROTECTION_FAILURE
;
2535 if (resilient_media
) {
2537 // assert(!needs_copy);
2538 if (object
!= VM_OBJECT_NULL
&&
2539 !object
->internal
) {
2541 * This mapping is directly backed by an external
2542 * memory manager (e.g. a vnode pager for a file):
2543 * we would not have any safe place to inject
2544 * a zero-filled page if an actual page is not
2545 * available, without possibly impacting the actual
2546 * contents of the mapped object (e.g. the file),
2547 * so we can't provide any media resiliency here.
2549 return KERN_INVALID_ARGUMENT
;
2555 /* submaps can not be purgeable */
2556 return KERN_INVALID_ARGUMENT
;
2558 if (object
== VM_OBJECT_NULL
) {
2559 /* submaps can not be created lazily */
2560 return KERN_INVALID_ARGUMENT
;
2563 if (vmk_flags
.vmkf_already
) {
2565 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2566 * is already present. For it to be meaningul, the requested
2567 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2568 * we shouldn't try and remove what was mapped there first
2569 * (!VM_FLAGS_OVERWRITE).
2571 if ((flags
& VM_FLAGS_ANYWHERE
) ||
2572 (flags
& VM_FLAGS_OVERWRITE
)) {
2573 return KERN_INVALID_ARGUMENT
;
2577 effective_min_offset
= map
->min_offset
;
2579 if (vmk_flags
.vmkf_beyond_max
) {
2581 * Allow an insertion beyond the map's max offset.
2583 #if !defined(__arm__)
2584 if (vm_map_is_64bit(map
)) {
2585 effective_max_offset
= 0xFFFFFFFFFFFFF000ULL
;
2587 #endif /* __arm__ */
2588 effective_max_offset
= 0x00000000FFFFF000ULL
;
2590 #if XNU_TARGET_OS_OSX
2591 if (__improbable(vmk_flags
.vmkf_32bit_map_va
)) {
2592 effective_max_offset
= MIN(map
->max_offset
, 0x00000000FFFFF000ULL
);
2594 effective_max_offset
= map
->max_offset
;
2596 #else /* XNU_TARGET_OS_OSX */
2597 effective_max_offset
= map
->max_offset
;
2598 #endif /* XNU_TARGET_OS_OSX */
2602 (offset
& MIN(VM_MAP_PAGE_MASK(map
), PAGE_MASK_64
)) != 0) {
2604 return KERN_INVALID_ARGUMENT
;
2607 if (map
->pmap
== kernel_pmap
) {
2608 user_alias
= VM_KERN_MEMORY_NONE
;
2613 if (user_alias
== VM_MEMORY_MALLOC_MEDIUM
) {
2614 chunk_size
= MALLOC_MEDIUM_CHUNK_SIZE
;
2617 #define RETURN(value) { result = value; goto BailOut; }
2619 assertf(VM_MAP_PAGE_ALIGNED(*address
, FOURK_PAGE_MASK
), "0x%llx", (uint64_t)*address
);
2620 assertf(VM_MAP_PAGE_ALIGNED(size
, FOURK_PAGE_MASK
), "0x%llx", (uint64_t)size
);
2621 if (VM_MAP_PAGE_MASK(map
) >= PAGE_MASK
) {
2622 assertf(page_aligned(*address
), "0x%llx", (uint64_t)*address
);
2623 assertf(page_aligned(size
), "0x%llx", (uint64_t)size
);
2626 if (VM_MAP_PAGE_MASK(map
) >= PAGE_MASK
&&
2627 !VM_MAP_PAGE_ALIGNED(size
, VM_MAP_PAGE_MASK(map
))) {
2629 * In most cases, the caller rounds the size up to the
2631 * If we get a size that is explicitly not map-aligned here,
2632 * we'll have to respect the caller's wish and mark the
2633 * mapping as "not map-aligned" to avoid tripping the
2634 * map alignment checks later.
2636 clear_map_aligned
= TRUE
;
2639 VM_MAP_PAGE_MASK(map
) >= PAGE_MASK
&&
2640 !VM_MAP_PAGE_ALIGNED(*address
, VM_MAP_PAGE_MASK(map
))) {
2642 * We've been asked to map at a fixed address and that
2643 * address is not aligned to the map's specific alignment.
2644 * The caller should know what it's doing (i.e. most likely
2645 * mapping some fragmented copy map, transferring memory from
2646 * a VM map with a different alignment), so clear map_aligned
2647 * for this new VM map entry and proceed.
2649 clear_map_aligned
= TRUE
;
2653 * Only zero-fill objects are allowed to be purgable.
2654 * LP64todo - limit purgable objects to 32-bits for now
2658 (object
!= VM_OBJECT_NULL
&&
2659 (object
->vo_size
!= size
||
2660 object
->purgable
== VM_PURGABLE_DENY
))
2661 || size
> ANON_MAX_SIZE
)) { /* LP64todo: remove when dp capable */
2662 return KERN_INVALID_ARGUMENT
;
2665 if (!anywhere
&& overwrite
) {
2667 * Create a temporary VM map to hold the old mappings in the
2668 * affected area while we create the new one.
2669 * This avoids releasing the VM map lock in
2670 * vm_map_entry_delete() and allows atomicity
2671 * when we want to replace some mappings with a new one.
2672 * It also allows us to restore the old VM mappings if the
2673 * new mapping fails.
2675 zap_old_map
= vm_map_create(PMAP_NULL
,
2678 map
->hdr
.entries_pageable
);
2679 vm_map_set_page_shift(zap_old_map
, VM_MAP_PAGE_SHIFT(map
));
2680 vm_map_disable_hole_optimization(zap_old_map
);
2691 if (entry_for_jit
) {
2692 if (map
->jit_entry_exists
&&
2693 !VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map
)) {
2694 result
= KERN_INVALID_ARGUMENT
;
2697 if (VM_MAP_POLICY_ALLOW_JIT_RANDOM_ADDRESS(map
)) {
2698 random_address
= TRUE
;
2702 if (random_address
) {
2704 * Get a random start address.
2706 result
= vm_map_random_address_for_size(map
, address
, size
);
2707 if (result
!= KERN_SUCCESS
) {
2712 #if XNU_TARGET_OS_OSX
2713 else if ((start
== 0 || start
== vm_map_min(map
)) &&
2714 !map
->disable_vmentry_reuse
&&
2715 map
->vmmap_high_start
!= 0) {
2716 start
= map
->vmmap_high_start
;
2718 #endif /* XNU_TARGET_OS_OSX */
2722 * Calculate the first possible address.
2725 if (start
< effective_min_offset
) {
2726 start
= effective_min_offset
;
2728 if (start
> effective_max_offset
) {
2729 RETURN(KERN_NO_SPACE
);
2733 * Look for the first possible address;
2734 * if there's already something at this
2735 * address, we have to start after it.
2738 if (map
->disable_vmentry_reuse
== TRUE
) {
2739 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
2741 if (map
->holelistenabled
) {
2742 hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
2744 if (hole_entry
== NULL
) {
2746 * No more space in the map?
2748 result
= KERN_NO_SPACE
;
2751 boolean_t found_hole
= FALSE
;
2754 if (hole_entry
->vme_start
>= start
) {
2755 start
= hole_entry
->vme_start
;
2760 if (hole_entry
->vme_end
> start
) {
2764 hole_entry
= hole_entry
->vme_next
;
2765 } while (hole_entry
!= CAST_TO_VM_MAP_ENTRY(map
->holes_list
));
2767 if (found_hole
== FALSE
) {
2768 result
= KERN_NO_SPACE
;
2775 start
+= PAGE_SIZE_64
;
2779 assert(first_free_is_valid(map
));
2781 entry
= map
->first_free
;
2783 if (entry
== vm_map_to_entry(map
)) {
2786 if (entry
->vme_next
== vm_map_to_entry(map
)) {
2788 * Hole at the end of the map.
2792 if (start
< (entry
->vme_next
)->vme_start
) {
2793 start
= entry
->vme_end
;
2794 start
= vm_map_round_page(start
,
2795 VM_MAP_PAGE_MASK(map
));
2798 * Need to do a lookup.
2805 if (entry
== NULL
) {
2806 vm_map_entry_t tmp_entry
;
2807 if (vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
2808 assert(!entry_for_jit
);
2809 start
= tmp_entry
->vme_end
;
2810 start
= vm_map_round_page(start
,
2811 VM_MAP_PAGE_MASK(map
));
2819 * In any case, the "entry" always precedes
2820 * the proposed new region throughout the
2825 vm_map_entry_t next
;
2828 * Find the end of the proposed new region.
2829 * Be sure we didn't go beyond the end, or
2830 * wrap around the address.
2833 end
= ((start
+ mask
) & ~mask
);
2834 end
= vm_map_round_page(end
,
2835 VM_MAP_PAGE_MASK(map
));
2837 RETURN(KERN_NO_SPACE
);
2840 assert(VM_MAP_PAGE_ALIGNED(start
,
2841 VM_MAP_PAGE_MASK(map
)));
2844 /* We want an entire page of empty space, but don't increase the allocation size. */
2845 desired_empty_end
= vm_map_round_page(end
, VM_MAP_PAGE_MASK(map
));
2847 if ((desired_empty_end
> effective_max_offset
) || (desired_empty_end
< start
)) {
2848 if (map
->wait_for_space
) {
2849 assert(!keep_map_locked
);
2850 if (size
<= (effective_max_offset
-
2851 effective_min_offset
)) {
2852 assert_wait((event_t
)map
,
2856 thread_block(THREAD_CONTINUE_NULL
);
2860 RETURN(KERN_NO_SPACE
);
2863 next
= entry
->vme_next
;
2865 if (map
->holelistenabled
) {
2866 if (entry
->vme_end
>= desired_empty_end
) {
2871 * If there are no more entries, we must win.
2875 * If there is another entry, it must be
2876 * after the end of the potential new region.
2879 if (next
== vm_map_to_entry(map
)) {
2883 if (next
->vme_start
>= desired_empty_end
) {
2889 * Didn't fit -- move to the next entry.
2894 if (map
->holelistenabled
) {
2895 if (entry
== CAST_TO_VM_MAP_ENTRY(map
->holes_list
)) {
2899 result
= KERN_NO_SPACE
;
2902 start
= entry
->vme_start
;
2904 start
= entry
->vme_end
;
2907 start
= vm_map_round_page(start
,
2908 VM_MAP_PAGE_MASK(map
));
2911 if (map
->holelistenabled
) {
2912 if (vm_map_lookup_entry(map
, entry
->vme_start
, &entry
)) {
2913 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry
, (unsigned long long)entry
->vme_start
);
2918 assert(VM_MAP_PAGE_ALIGNED(*address
,
2919 VM_MAP_PAGE_MASK(map
)));
2921 if (VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
&&
2923 user_alias
== VM_MEMORY_REALLOC
) {
2925 * Force realloc() to switch to a new allocation,
2926 * to prevent 4k-fragmented virtual ranges.
2928 // DEBUG4K_ERROR("no realloc in place");
2929 return KERN_NO_SPACE
;
2934 * the address doesn't itself violate
2935 * the mask requirement.
2940 if ((start
& mask
) != 0) {
2941 RETURN(KERN_NO_SPACE
);
2945 * ... the address is within bounds
2950 if ((start
< effective_min_offset
) ||
2951 (end
> effective_max_offset
) ||
2953 RETURN(KERN_INVALID_ADDRESS
);
2956 if (overwrite
&& zap_old_map
!= VM_MAP_NULL
) {
2959 * Fixed mapping and "overwrite" flag: attempt to
2960 * remove all existing mappings in the specified
2961 * address range, saving them in our "zap_old_map".
2963 remove_flags
= VM_MAP_REMOVE_SAVE_ENTRIES
;
2964 remove_flags
|= VM_MAP_REMOVE_NO_MAP_ALIGN
;
2965 if (vmk_flags
.vmkf_overwrite_immutable
) {
2966 /* we can overwrite immutable mappings */
2967 remove_flags
|= VM_MAP_REMOVE_IMMUTABLE
;
2969 (void) vm_map_delete(map
, start
, end
,
2975 * ... the starting address isn't allocated
2978 if (vm_map_lookup_entry(map
, start
, &entry
)) {
2979 if (!(vmk_flags
.vmkf_already
)) {
2980 RETURN(KERN_NO_SPACE
);
2983 * Check if what's already there is what we want.
2986 tmp_offset
= offset
;
2987 if (entry
->vme_start
< start
) {
2988 tmp_start
-= start
- entry
->vme_start
;
2989 tmp_offset
-= start
- entry
->vme_start
;
2991 for (; entry
->vme_start
< end
;
2992 entry
= entry
->vme_next
) {
2994 * Check if the mapping's attributes
2995 * match the existing map entry.
2997 if (entry
== vm_map_to_entry(map
) ||
2998 entry
->vme_start
!= tmp_start
||
2999 entry
->is_sub_map
!= is_submap
||
3000 VME_OFFSET(entry
) != tmp_offset
||
3001 entry
->needs_copy
!= needs_copy
||
3002 entry
->protection
!= cur_protection
||
3003 entry
->max_protection
!= max_protection
||
3004 entry
->inheritance
!= inheritance
||
3005 entry
->iokit_acct
!= iokit_acct
||
3006 VME_ALIAS(entry
) != alias
) {
3007 /* not the same mapping ! */
3008 RETURN(KERN_NO_SPACE
);
3011 * Check if the same object is being mapped.
3014 if (VME_SUBMAP(entry
) !=
3015 (vm_map_t
) object
) {
3016 /* not the same submap */
3017 RETURN(KERN_NO_SPACE
);
3020 if (VME_OBJECT(entry
) != object
) {
3021 /* not the same VM object... */
3024 obj2
= VME_OBJECT(entry
);
3025 if ((obj2
== VM_OBJECT_NULL
||
3027 (object
== VM_OBJECT_NULL
||
3028 object
->internal
)) {
3035 RETURN(KERN_NO_SPACE
);
3040 tmp_offset
+= entry
->vme_end
- entry
->vme_start
;
3041 tmp_start
+= entry
->vme_end
- entry
->vme_start
;
3042 if (entry
->vme_end
>= end
) {
3043 /* reached the end of our mapping */
3047 /* it all matches: let's use what's already there ! */
3048 RETURN(KERN_MEMORY_PRESENT
);
3052 * ... the next region doesn't overlap the
3056 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
3057 (entry
->vme_next
->vme_start
< end
)) {
3058 RETURN(KERN_NO_SPACE
);
3064 * "start" and "end" should define the endpoints of the
3065 * available new range, and
3066 * "entry" should refer to the region before the new
3069 * the map should be locked.
3073 * See whether we can avoid creating a new entry (and object) by
3074 * extending one of our neighbors. [So far, we only attempt to
3075 * extend from below.] Note that we can never extend/join
3076 * purgable objects because they need to remain distinct
3077 * entities in order to implement their "volatile object"
3083 vm_memory_malloc_no_cow(user_alias
)) {
3084 if (object
== VM_OBJECT_NULL
) {
3085 object
= vm_object_allocate(size
);
3086 object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
3087 object
->true_share
= FALSE
;
3090 object
->purgable
= VM_PURGABLE_NONVOLATILE
;
3091 if (map
->pmap
== kernel_pmap
) {
3093 * Purgeable mappings made in a kernel
3094 * map are "owned" by the kernel itself
3095 * rather than the current user task
3096 * because they're likely to be used by
3097 * more than this user task (see
3098 * execargs_purgeable_allocate(), for
3101 owner
= kernel_task
;
3103 owner
= current_task();
3105 assert(object
->vo_owner
== NULL
);
3106 assert(object
->resident_page_count
== 0);
3107 assert(object
->wired_page_count
== 0);
3108 vm_object_lock(object
);
3109 vm_purgeable_nonvolatile_enqueue(object
, owner
);
3110 vm_object_unlock(object
);
3112 offset
= (vm_object_offset_t
)0;
3114 } else if (VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
) {
3115 /* no coalescing if address space uses sub-pages */
3116 } else if ((is_submap
== FALSE
) &&
3117 (object
== VM_OBJECT_NULL
) &&
3118 (entry
!= vm_map_to_entry(map
)) &&
3119 (entry
->vme_end
== start
) &&
3120 (!entry
->is_shared
) &&
3121 (!entry
->is_sub_map
) &&
3122 (!entry
->in_transition
) &&
3123 (!entry
->needs_wakeup
) &&
3124 (entry
->behavior
== VM_BEHAVIOR_DEFAULT
) &&
3125 (entry
->protection
== cur_protection
) &&
3126 (entry
->max_protection
== max_protection
) &&
3127 (entry
->inheritance
== inheritance
) &&
3128 ((user_alias
== VM_MEMORY_REALLOC
) ||
3129 (VME_ALIAS(entry
) == alias
)) &&
3130 (entry
->no_cache
== no_cache
) &&
3131 (entry
->permanent
== permanent
) &&
3132 /* no coalescing for immutable executable mappings */
3133 !((entry
->protection
& VM_PROT_EXECUTE
) &&
3134 entry
->permanent
) &&
3135 (!entry
->superpage_size
&& !superpage_size
) &&
3137 * No coalescing if not map-aligned, to avoid propagating
3138 * that condition any further than needed:
3140 (!entry
->map_aligned
|| !clear_map_aligned
) &&
3141 (!entry
->zero_wired_pages
) &&
3142 (!entry
->used_for_jit
&& !entry_for_jit
) &&
3143 (!entry
->pmap_cs_associated
) &&
3144 (entry
->iokit_acct
== iokit_acct
) &&
3145 (!entry
->vme_resilient_codesign
) &&
3146 (!entry
->vme_resilient_media
) &&
3147 (!entry
->vme_atomic
) &&
3148 (entry
->vme_no_copy_on_read
== no_copy_on_read
) &&
3150 ((entry
->vme_end
- entry
->vme_start
) + size
<=
3151 (user_alias
== VM_MEMORY_REALLOC
?
3153 NO_COALESCE_LIMIT
)) &&
3155 (entry
->wired_count
== 0)) { /* implies user_wired_count == 0 */
3156 if (vm_object_coalesce(VME_OBJECT(entry
),
3159 (vm_object_offset_t
) 0,
3160 (vm_map_size_t
)(entry
->vme_end
- entry
->vme_start
),
3161 (vm_map_size_t
)(end
- entry
->vme_end
))) {
3163 * Coalesced the two objects - can extend
3164 * the previous map entry to include the
3167 map
->size
+= (end
- entry
->vme_end
);
3168 assert(entry
->vme_start
< end
);
3169 assert(VM_MAP_PAGE_ALIGNED(end
,
3170 VM_MAP_PAGE_MASK(map
)));
3171 if (__improbable(vm_debug_events
)) {
3172 DTRACE_VM5(map_entry_extend
, vm_map_t
, map
, vm_map_entry_t
, entry
, vm_address_t
, entry
->vme_start
, vm_address_t
, entry
->vme_end
, vm_address_t
, end
);
3174 entry
->vme_end
= end
;
3175 if (map
->holelistenabled
) {
3176 vm_map_store_update_first_free(map
, entry
, TRUE
);
3178 vm_map_store_update_first_free(map
, map
->first_free
, TRUE
);
3180 new_mapping_established
= TRUE
;
3181 RETURN(KERN_SUCCESS
);
3185 step
= superpage_size
? SUPERPAGE_SIZE
: (end
- start
);
3188 for (tmp2_start
= start
; tmp2_start
< end
; tmp2_start
+= step
) {
3189 tmp2_end
= tmp2_start
+ step
;
3191 * Create a new entry
3194 * The reserved "page zero" in each process's address space can
3195 * be arbitrarily large. Splitting it into separate objects and
3196 * therefore different VM map entries serves no purpose and just
3197 * slows down operations on the VM map, so let's not split the
3198 * allocation into chunks if the max protection is NONE. That
3199 * memory should never be accessible, so it will never get to the
3202 tmp_start
= tmp2_start
;
3203 if (object
== VM_OBJECT_NULL
&&
3204 size
> chunk_size
&&
3205 max_protection
!= VM_PROT_NONE
&&
3206 superpage_size
== 0) {
3207 tmp_end
= tmp_start
+ chunk_size
;
3212 new_entry
= vm_map_entry_insert(map
,
3213 entry
, tmp_start
, tmp_end
,
3214 object
, offset
, needs_copy
,
3216 cur_protection
, max_protection
,
3217 VM_BEHAVIOR_DEFAULT
,
3218 (entry_for_jit
&& !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map
) ?
3219 VM_INHERIT_NONE
: inheritance
),
3229 translated_allow_execute
);
3231 assert((object
!= kernel_object
) || (VM_KERN_MEMORY_NONE
!= alias
));
3233 if (resilient_codesign
) {
3234 int reject_prot
= (needs_copy
? VM_PROT_EXECUTE
: (VM_PROT_WRITE
| VM_PROT_EXECUTE
));
3235 if (!((cur_protection
| max_protection
) & reject_prot
)) {
3236 new_entry
->vme_resilient_codesign
= TRUE
;
3240 if (resilient_media
&&
3241 (object
== VM_OBJECT_NULL
||
3242 object
->internal
)) {
3243 new_entry
->vme_resilient_media
= TRUE
;
3246 assert(!new_entry
->iokit_acct
);
3248 object
!= VM_OBJECT_NULL
&&
3249 (object
->purgable
!= VM_PURGABLE_DENY
||
3250 object
->vo_ledger_tag
)) {
3251 assert(new_entry
->use_pmap
);
3252 assert(!new_entry
->iokit_acct
);
3254 * Turn off pmap accounting since
3255 * purgeable (or tagged) objects have their
3258 new_entry
->use_pmap
= FALSE
;
3259 } else if (!is_submap
&&
3261 object
!= VM_OBJECT_NULL
&&
3263 /* alternate accounting */
3264 assert(!new_entry
->iokit_acct
);
3265 assert(new_entry
->use_pmap
);
3266 new_entry
->iokit_acct
= TRUE
;
3267 new_entry
->use_pmap
= FALSE
;
3269 vm_map_iokit_mapped_region
,
3271 vm_map_offset_t
, new_entry
->vme_start
,
3272 vm_map_offset_t
, new_entry
->vme_end
,
3273 int, VME_ALIAS(new_entry
));
3274 vm_map_iokit_mapped_region(
3276 (new_entry
->vme_end
-
3277 new_entry
->vme_start
));
3278 } else if (!is_submap
) {
3279 assert(!new_entry
->iokit_acct
);
3280 assert(new_entry
->use_pmap
);
3285 boolean_t submap_is_64bit
;
3288 assert(new_entry
->is_sub_map
);
3289 assert(!new_entry
->use_pmap
);
3290 assert(!new_entry
->iokit_acct
);
3291 submap
= (vm_map_t
) object
;
3292 submap_is_64bit
= vm_map_is_64bit(submap
);
3293 use_pmap
= vmk_flags
.vmkf_nested_pmap
;
3294 #ifndef NO_NESTED_PMAP
3295 if (use_pmap
&& submap
->pmap
== NULL
) {
3296 ledger_t ledger
= map
->pmap
->ledger
;
3297 /* we need a sub pmap to nest... */
3298 submap
->pmap
= pmap_create_options(ledger
, 0,
3299 submap_is_64bit
? PMAP_CREATE_64BIT
: 0);
3300 if (submap
->pmap
== NULL
) {
3301 /* let's proceed without nesting... */
3303 #if defined(__arm__) || defined(__arm64__)
3305 pmap_set_nested(submap
->pmap
);
3309 if (use_pmap
&& submap
->pmap
!= NULL
) {
3310 if (VM_MAP_PAGE_SHIFT(map
) != VM_MAP_PAGE_SHIFT(submap
)) {
3311 DEBUG4K_ERROR("map %p (%d) submap %p (%d): incompatible page sizes\n", map
, VM_MAP_PAGE_SHIFT(map
), submap
, VM_MAP_PAGE_SHIFT(submap
));
3314 kr
= pmap_nest(map
->pmap
,
3317 tmp_end
- tmp_start
);
3319 if (kr
!= KERN_SUCCESS
) {
3320 printf("vm_map_enter: "
3321 "pmap_nest(0x%llx,0x%llx) "
3323 (long long)tmp_start
,
3327 /* we're now nested ! */
3328 new_entry
->use_pmap
= TRUE
;
3332 #endif /* NO_NESTED_PMAP */
3336 if (superpage_size
) {
3338 vm_object_t sp_object
;
3339 vm_object_offset_t sp_offset
;
3341 VME_OFFSET_SET(entry
, 0);
3343 /* allocate one superpage */
3344 kr
= cpm_allocate(SUPERPAGE_SIZE
, &pages
, 0, SUPERPAGE_NBASEPAGES
- 1, TRUE
, 0);
3345 if (kr
!= KERN_SUCCESS
) {
3346 /* deallocate whole range... */
3347 new_mapping_established
= TRUE
;
3348 /* ... but only up to "tmp_end" */
3349 size
-= end
- tmp_end
;
3353 /* create one vm_object per superpage */
3354 sp_object
= vm_object_allocate((vm_map_size_t
)(entry
->vme_end
- entry
->vme_start
));
3355 sp_object
->phys_contiguous
= TRUE
;
3356 sp_object
->vo_shadow_offset
= (vm_object_offset_t
)VM_PAGE_GET_PHYS_PAGE(pages
) * PAGE_SIZE
;
3357 VME_OBJECT_SET(entry
, sp_object
);
3358 assert(entry
->use_pmap
);
3360 /* enter the base pages into the object */
3361 vm_object_lock(sp_object
);
3363 sp_offset
< SUPERPAGE_SIZE
;
3364 sp_offset
+= PAGE_SIZE
) {
3366 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m
));
3367 pages
= NEXT_PAGE(m
);
3368 *(NEXT_PAGE_PTR(m
)) = VM_PAGE_NULL
;
3369 vm_page_insert_wired(m
, sp_object
, sp_offset
, VM_KERN_MEMORY_OSFMK
);
3371 vm_object_unlock(sp_object
);
3373 } while (tmp_end
!= tmp2_end
&&
3374 (tmp_start
= tmp_end
) &&
3375 (tmp_end
= (tmp2_end
- tmp_end
> chunk_size
) ?
3376 tmp_end
+ chunk_size
: tmp2_end
));
3379 new_mapping_established
= TRUE
;
3382 assert(map_locked
== TRUE
);
3384 if (result
== KERN_SUCCESS
) {
3385 vm_prot_t pager_prot
;
3386 memory_object_t pager
;
3390 !(vmk_flags
.vmkf_no_pmap_check
)) {
3391 assert(vm_map_pmap_is_empty(map
,
3398 * For "named" VM objects, let the pager know that the
3399 * memory object is being mapped. Some pagers need to keep
3400 * track of this, to know when they can reclaim the memory
3401 * object, for example.
3402 * VM calls memory_object_map() for each mapping (specifying
3403 * the protection of each mapping) and calls
3404 * memory_object_last_unmap() when all the mappings are gone.
3406 pager_prot
= max_protection
;
3409 * Copy-On-Write mapping: won't modify
3410 * the memory object.
3412 pager_prot
&= ~VM_PROT_WRITE
;
3415 object
!= VM_OBJECT_NULL
&&
3417 object
->pager
!= MEMORY_OBJECT_NULL
) {
3418 vm_object_lock(object
);
3419 pager
= object
->pager
;
3420 if (object
->named
&&
3421 pager
!= MEMORY_OBJECT_NULL
) {
3422 assert(object
->pager_ready
);
3423 vm_object_mapping_wait(object
, THREAD_UNINT
);
3424 vm_object_mapping_begin(object
);
3425 vm_object_unlock(object
);
3427 kr
= memory_object_map(pager
, pager_prot
);
3428 assert(kr
== KERN_SUCCESS
);
3430 vm_object_lock(object
);
3431 vm_object_mapping_end(object
);
3433 vm_object_unlock(object
);
3437 assert(map_locked
== TRUE
);
3439 if (!keep_map_locked
) {
3445 * We can't hold the map lock if we enter this block.
3448 if (result
== KERN_SUCCESS
) {
3449 /* Wire down the new entry if the user
3450 * requested all new map entries be wired.
3452 if ((map
->wiring_required
) || (superpage_size
)) {
3453 assert(!keep_map_locked
);
3454 pmap_empty
= FALSE
; /* pmap won't be empty */
3455 kr
= vm_map_wire_kernel(map
, start
, end
,
3456 new_entry
->protection
, VM_KERN_MEMORY_MLOCK
,
3463 if (result
!= KERN_SUCCESS
) {
3464 if (new_mapping_established
) {
3466 * We have to get rid of the new mappings since we
3467 * won't make them available to the user.
3468 * Try and do that atomically, to minimize the risk
3469 * that someone else create new mappings that range.
3471 zap_new_map
= vm_map_create(PMAP_NULL
,
3474 map
->hdr
.entries_pageable
);
3475 vm_map_set_page_shift(zap_new_map
,
3476 VM_MAP_PAGE_SHIFT(map
));
3477 vm_map_disable_hole_optimization(zap_new_map
);
3483 (void) vm_map_delete(map
, *address
, *address
+ size
,
3484 (VM_MAP_REMOVE_SAVE_ENTRIES
|
3485 VM_MAP_REMOVE_NO_MAP_ALIGN
),
3488 if (zap_old_map
!= VM_MAP_NULL
&&
3489 zap_old_map
->hdr
.nentries
!= 0) {
3490 vm_map_entry_t entry1
, entry2
;
3493 * The new mapping failed. Attempt to restore
3494 * the old mappings, saved in the "zap_old_map".
3501 /* first check if the coast is still clear */
3502 start
= vm_map_first_entry(zap_old_map
)->vme_start
;
3503 end
= vm_map_last_entry(zap_old_map
)->vme_end
;
3504 if (vm_map_lookup_entry(map
, start
, &entry1
) ||
3505 vm_map_lookup_entry(map
, end
, &entry2
) ||
3508 * Part of that range has already been
3509 * re-mapped: we can't restore the old
3512 vm_map_enter_restore_failures
++;
3515 * Transfer the saved map entries from
3516 * "zap_old_map" to the original "map",
3517 * inserting them all after "entry1".
3519 for (entry2
= vm_map_first_entry(zap_old_map
);
3520 entry2
!= vm_map_to_entry(zap_old_map
);
3521 entry2
= vm_map_first_entry(zap_old_map
)) {
3522 vm_map_size_t entry_size
;
3524 entry_size
= (entry2
->vme_end
-
3526 vm_map_store_entry_unlink(zap_old_map
,
3528 zap_old_map
->size
-= entry_size
;
3529 vm_map_store_entry_link(map
, entry1
, entry2
,
3530 VM_MAP_KERNEL_FLAGS_NONE
);
3531 map
->size
+= entry_size
;
3534 if (map
->wiring_required
) {
3536 * XXX TODO: we should rewire the
3540 vm_map_enter_restore_successes
++;
3546 * The caller is responsible for releasing the lock if it requested to
3547 * keep the map locked.
3549 if (map_locked
&& !keep_map_locked
) {
3554 * Get rid of the "zap_maps" and all the map entries that
3555 * they may still contain.
3557 if (zap_old_map
!= VM_MAP_NULL
) {
3558 vm_map_destroy(zap_old_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3559 zap_old_map
= VM_MAP_NULL
;
3561 if (zap_new_map
!= VM_MAP_NULL
) {
3562 vm_map_destroy(zap_new_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3563 zap_new_map
= VM_MAP_NULL
;
3572 extern const struct memory_object_pager_ops fourk_pager_ops
;
3576 vm_map_offset_t
*address
, /* IN/OUT */
3578 vm_map_offset_t mask
,
3580 vm_map_kernel_flags_t vmk_flags
,
3583 vm_object_offset_t offset
,
3584 boolean_t needs_copy
,
3585 vm_prot_t cur_protection
,
3586 vm_prot_t max_protection
,
3587 vm_inherit_t inheritance
)
3589 vm_map_entry_t entry
, new_entry
;
3590 vm_map_offset_t start
, fourk_start
;
3591 vm_map_offset_t end
, fourk_end
;
3592 vm_map_size_t fourk_size
;
3593 kern_return_t result
= KERN_SUCCESS
;
3594 vm_map_t zap_old_map
= VM_MAP_NULL
;
3595 vm_map_t zap_new_map
= VM_MAP_NULL
;
3596 boolean_t map_locked
= FALSE
;
3597 boolean_t pmap_empty
= TRUE
;
3598 boolean_t new_mapping_established
= FALSE
;
3599 boolean_t keep_map_locked
= vmk_flags
.vmkf_keep_map_locked
;
3600 boolean_t anywhere
= ((flags
& VM_FLAGS_ANYWHERE
) != 0);
3601 boolean_t purgable
= ((flags
& VM_FLAGS_PURGABLE
) != 0);
3602 boolean_t overwrite
= ((flags
& VM_FLAGS_OVERWRITE
) != 0);
3603 boolean_t no_cache
= ((flags
& VM_FLAGS_NO_CACHE
) != 0);
3604 boolean_t is_submap
= vmk_flags
.vmkf_submap
;
3605 boolean_t permanent
= vmk_flags
.vmkf_permanent
;
3606 boolean_t no_copy_on_read
= vmk_flags
.vmkf_permanent
;
3607 boolean_t entry_for_jit
= vmk_flags
.vmkf_map_jit
;
3608 // boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3609 boolean_t translated_allow_execute
= vmk_flags
.vmkf_translated_allow_execute
;
3610 unsigned int superpage_size
= ((flags
& VM_FLAGS_SUPERPAGE_MASK
) >> VM_FLAGS_SUPERPAGE_SHIFT
);
3611 vm_map_offset_t effective_min_offset
, effective_max_offset
;
3613 boolean_t clear_map_aligned
= FALSE
;
3614 memory_object_t fourk_mem_obj
;
3615 vm_object_t fourk_object
;
3616 vm_map_offset_t fourk_pager_offset
;
3617 int fourk_pager_index_start
, fourk_pager_index_num
;
3619 boolean_t fourk_copy
;
3620 vm_object_t copy_object
;
3621 vm_object_offset_t copy_offset
;
3623 if (VM_MAP_PAGE_MASK(map
) < PAGE_MASK
) {
3624 panic("%s:%d\n", __FUNCTION__
, __LINE__
);
3626 fourk_mem_obj
= MEMORY_OBJECT_NULL
;
3627 fourk_object
= VM_OBJECT_NULL
;
3629 if (superpage_size
) {
3630 return KERN_NOT_SUPPORTED
;
3633 if ((cur_protection
& VM_PROT_WRITE
) &&
3634 (cur_protection
& VM_PROT_EXECUTE
) &&
3635 #if XNU_TARGET_OS_OSX
3636 map
->pmap
!= kernel_pmap
&&
3637 (vm_map_cs_enforcement(map
)
3639 || !VM_MAP_IS_EXOTIC(map
)
3640 #endif /* __arm64__ */
3642 #endif /* XNU_TARGET_OS_OSX */
3644 !pmap_cs_exempt(map
->pmap
) &&
3650 vm_prot_t
, cur_protection
);
3651 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3652 "turning off execute\n",
3654 (current_task()->bsd_info
3655 ? proc_name_address(current_task()->bsd_info
)
3658 cur_protection
&= ~VM_PROT_EXECUTE
;
3662 * If the task has requested executable lockdown,
3663 * deny any new executable mapping.
3665 if (map
->map_disallow_new_exec
== TRUE
) {
3666 if (cur_protection
& VM_PROT_EXECUTE
) {
3667 return KERN_PROTECTION_FAILURE
;
3672 return KERN_NOT_SUPPORTED
;
3674 if (vmk_flags
.vmkf_already
) {
3675 return KERN_NOT_SUPPORTED
;
3677 if (purgable
|| entry_for_jit
) {
3678 return KERN_NOT_SUPPORTED
;
3681 effective_min_offset
= map
->min_offset
;
3683 if (vmk_flags
.vmkf_beyond_max
) {
3684 return KERN_NOT_SUPPORTED
;
3686 effective_max_offset
= map
->max_offset
;
3690 (offset
& FOURK_PAGE_MASK
) != 0) {
3692 return KERN_INVALID_ARGUMENT
;
3695 #define RETURN(value) { result = value; goto BailOut; }
3697 assert(VM_MAP_PAGE_ALIGNED(*address
, FOURK_PAGE_MASK
));
3698 assert(VM_MAP_PAGE_ALIGNED(size
, FOURK_PAGE_MASK
));
3700 if (!anywhere
&& overwrite
) {
3701 return KERN_NOT_SUPPORTED
;
3703 if (!anywhere
&& overwrite
) {
3705 * Create a temporary VM map to hold the old mappings in the
3706 * affected area while we create the new one.
3707 * This avoids releasing the VM map lock in
3708 * vm_map_entry_delete() and allows atomicity
3709 * when we want to replace some mappings with a new one.
3710 * It also allows us to restore the old VM mappings if the
3711 * new mapping fails.
3713 zap_old_map
= vm_map_create(PMAP_NULL
,
3716 map
->hdr
.entries_pageable
);
3717 vm_map_set_page_shift(zap_old_map
, VM_MAP_PAGE_SHIFT(map
));
3718 vm_map_disable_hole_optimization(zap_old_map
);
3721 fourk_start
= *address
;
3723 fourk_end
= fourk_start
+ fourk_size
;
3725 start
= vm_map_trunc_page(*address
, VM_MAP_PAGE_MASK(map
));
3726 end
= vm_map_round_page(fourk_end
, VM_MAP_PAGE_MASK(map
));
3730 return KERN_NOT_SUPPORTED
;
3734 * the address doesn't itself violate
3735 * the mask requirement.
3740 if ((start
& mask
) != 0) {
3741 RETURN(KERN_NO_SPACE
);
3745 * ... the address is within bounds
3750 if ((start
< effective_min_offset
) ||
3751 (end
> effective_max_offset
) ||
3753 RETURN(KERN_INVALID_ADDRESS
);
3756 if (overwrite
&& zap_old_map
!= VM_MAP_NULL
) {
3758 * Fixed mapping and "overwrite" flag: attempt to
3759 * remove all existing mappings in the specified
3760 * address range, saving them in our "zap_old_map".
3762 (void) vm_map_delete(map
, start
, end
,
3763 (VM_MAP_REMOVE_SAVE_ENTRIES
|
3764 VM_MAP_REMOVE_NO_MAP_ALIGN
),
3769 * ... the starting address isn't allocated
3771 if (vm_map_lookup_entry(map
, start
, &entry
)) {
3772 vm_object_t cur_object
, shadow_object
;
3775 * We might already some 4K mappings
3776 * in a 16K page here.
3779 if (entry
->vme_end
- entry
->vme_start
3780 != SIXTEENK_PAGE_SIZE
) {
3781 RETURN(KERN_NO_SPACE
);
3783 if (entry
->is_sub_map
) {
3784 RETURN(KERN_NO_SPACE
);
3786 if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
3787 RETURN(KERN_NO_SPACE
);
3790 /* go all the way down the shadow chain */
3791 cur_object
= VME_OBJECT(entry
);
3792 vm_object_lock(cur_object
);
3793 while (cur_object
->shadow
!= VM_OBJECT_NULL
) {
3794 shadow_object
= cur_object
->shadow
;
3795 vm_object_lock(shadow_object
);
3796 vm_object_unlock(cur_object
);
3797 cur_object
= shadow_object
;
3798 shadow_object
= VM_OBJECT_NULL
;
3800 if (cur_object
->internal
||
3801 cur_object
->pager
== NULL
) {
3802 vm_object_unlock(cur_object
);
3803 RETURN(KERN_NO_SPACE
);
3805 if (cur_object
->pager
->mo_pager_ops
3806 != &fourk_pager_ops
) {
3807 vm_object_unlock(cur_object
);
3808 RETURN(KERN_NO_SPACE
);
3810 fourk_object
= cur_object
;
3811 fourk_mem_obj
= fourk_object
->pager
;
3813 /* keep the "4K" object alive */
3814 vm_object_reference_locked(fourk_object
);
3815 memory_object_reference(fourk_mem_obj
);
3816 vm_object_unlock(fourk_object
);
3818 /* merge permissions */
3819 entry
->protection
|= cur_protection
;
3820 entry
->max_protection
|= max_protection
;
3821 if ((entry
->protection
& (VM_PROT_WRITE
|
3822 VM_PROT_EXECUTE
)) ==
3823 (VM_PROT_WRITE
| VM_PROT_EXECUTE
) &&
3824 fourk_binary_compatibility_unsafe
&&
3825 fourk_binary_compatibility_allow_wx
) {
3826 /* write+execute: need to be "jit" */
3827 entry
->used_for_jit
= TRUE
;
3829 goto map_in_fourk_pager
;
3833 * ... the next region doesn't overlap the
3837 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
3838 (entry
->vme_next
->vme_start
< end
)) {
3839 RETURN(KERN_NO_SPACE
);
3845 * "start" and "end" should define the endpoints of the
3846 * available new range, and
3847 * "entry" should refer to the region before the new
3850 * the map should be locked.
3853 /* create a new "4K" pager */
3854 fourk_mem_obj
= fourk_pager_create();
3855 fourk_object
= fourk_pager_to_vm_object(fourk_mem_obj
);
3856 assert(fourk_object
);
3858 /* keep the "4" object alive */
3859 vm_object_reference(fourk_object
);
3861 /* create a "copy" object, to map the "4K" object copy-on-write */
3863 result
= vm_object_copy_strategically(fourk_object
,
3869 assert(result
== KERN_SUCCESS
);
3870 assert(copy_object
!= VM_OBJECT_NULL
);
3871 assert(copy_offset
== 0);
3873 /* map the "4K" pager's copy object */
3875 vm_map_entry_insert(map
, entry
,
3876 vm_map_trunc_page(start
,
3877 VM_MAP_PAGE_MASK(map
)),
3878 vm_map_round_page(end
,
3879 VM_MAP_PAGE_MASK(map
)),
3882 FALSE
, /* needs_copy */
3885 cur_protection
, max_protection
,
3886 VM_BEHAVIOR_DEFAULT
,
3887 (entry_for_jit
&& !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map
) ?
3888 VM_INHERIT_NONE
: inheritance
),
3898 translated_allow_execute
);
3901 #if VM_MAP_DEBUG_FOURK
3902 if (vm_map_debug_fourk
) {
3903 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3905 (uint64_t) entry
->vme_start
,
3906 (uint64_t) entry
->vme_end
,
3909 #endif /* VM_MAP_DEBUG_FOURK */
3911 new_mapping_established
= TRUE
;
3914 /* "map" the original "object" where it belongs in the "4K" pager */
3915 fourk_pager_offset
= (fourk_start
& SIXTEENK_PAGE_MASK
);
3916 fourk_pager_index_start
= (int) (fourk_pager_offset
/ FOURK_PAGE_SIZE
);
3917 if (fourk_size
> SIXTEENK_PAGE_SIZE
) {
3918 fourk_pager_index_num
= 4;
3920 fourk_pager_index_num
= (int) (fourk_size
/ FOURK_PAGE_SIZE
);
3922 if (fourk_pager_index_start
+ fourk_pager_index_num
> 4) {
3923 fourk_pager_index_num
= 4 - fourk_pager_index_start
;
3926 cur_idx
< fourk_pager_index_num
;
3928 vm_object_t old_object
;
3929 vm_object_offset_t old_offset
;
3931 kr
= fourk_pager_populate(fourk_mem_obj
,
3932 TRUE
, /* overwrite */
3933 fourk_pager_index_start
+ cur_idx
,
3937 (cur_idx
* FOURK_PAGE_SIZE
))
3941 #if VM_MAP_DEBUG_FOURK
3942 if (vm_map_debug_fourk
) {
3943 if (old_object
== (vm_object_t
) -1 &&
3944 old_offset
== (vm_object_offset_t
) -1) {
3945 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3946 "pager [%p:0x%llx] "
3948 "[object:%p,offset:0x%llx]\n",
3950 (uint64_t) entry
->vme_start
,
3951 (uint64_t) entry
->vme_end
,
3954 fourk_pager_index_start
+ cur_idx
,
3957 ? (offset
+ (cur_idx
* FOURK_PAGE_SIZE
))
3960 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3961 "pager [%p:0x%llx] "
3962 "populate[%d] [object:%p,offset:0x%llx] "
3963 "old [%p:0x%llx]\n",
3965 (uint64_t) entry
->vme_start
,
3966 (uint64_t) entry
->vme_end
,
3969 fourk_pager_index_start
+ cur_idx
,
3972 ? (offset
+ (cur_idx
* FOURK_PAGE_SIZE
))
3978 #endif /* VM_MAP_DEBUG_FOURK */
3980 assert(kr
== KERN_SUCCESS
);
3981 if (object
!= old_object
&&
3982 object
!= VM_OBJECT_NULL
&&
3983 object
!= (vm_object_t
) -1) {
3984 vm_object_reference(object
);
3986 if (object
!= old_object
&&
3987 old_object
!= VM_OBJECT_NULL
&&
3988 old_object
!= (vm_object_t
) -1) {
3989 vm_object_deallocate(old_object
);
3994 assert(map_locked
== TRUE
);
3996 if (result
== KERN_SUCCESS
) {
3997 vm_prot_t pager_prot
;
3998 memory_object_t pager
;
4002 !(vmk_flags
.vmkf_no_pmap_check
)) {
4003 assert(vm_map_pmap_is_empty(map
,
4010 * For "named" VM objects, let the pager know that the
4011 * memory object is being mapped. Some pagers need to keep
4012 * track of this, to know when they can reclaim the memory
4013 * object, for example.
4014 * VM calls memory_object_map() for each mapping (specifying
4015 * the protection of each mapping) and calls
4016 * memory_object_last_unmap() when all the mappings are gone.
4018 pager_prot
= max_protection
;
4021 * Copy-On-Write mapping: won't modify
4022 * the memory object.
4024 pager_prot
&= ~VM_PROT_WRITE
;
4027 object
!= VM_OBJECT_NULL
&&
4029 object
->pager
!= MEMORY_OBJECT_NULL
) {
4030 vm_object_lock(object
);
4031 pager
= object
->pager
;
4032 if (object
->named
&&
4033 pager
!= MEMORY_OBJECT_NULL
) {
4034 assert(object
->pager_ready
);
4035 vm_object_mapping_wait(object
, THREAD_UNINT
);
4036 vm_object_mapping_begin(object
);
4037 vm_object_unlock(object
);
4039 kr
= memory_object_map(pager
, pager_prot
);
4040 assert(kr
== KERN_SUCCESS
);
4042 vm_object_lock(object
);
4043 vm_object_mapping_end(object
);
4045 vm_object_unlock(object
);
4048 fourk_object
!= VM_OBJECT_NULL
&&
4049 fourk_object
->named
&&
4050 fourk_object
->pager
!= MEMORY_OBJECT_NULL
) {
4051 vm_object_lock(fourk_object
);
4052 pager
= fourk_object
->pager
;
4053 if (fourk_object
->named
&&
4054 pager
!= MEMORY_OBJECT_NULL
) {
4055 assert(fourk_object
->pager_ready
);
4056 vm_object_mapping_wait(fourk_object
,
4058 vm_object_mapping_begin(fourk_object
);
4059 vm_object_unlock(fourk_object
);
4061 kr
= memory_object_map(pager
, VM_PROT_READ
);
4062 assert(kr
== KERN_SUCCESS
);
4064 vm_object_lock(fourk_object
);
4065 vm_object_mapping_end(fourk_object
);
4067 vm_object_unlock(fourk_object
);
4071 if (fourk_object
!= VM_OBJECT_NULL
) {
4072 vm_object_deallocate(fourk_object
);
4073 fourk_object
= VM_OBJECT_NULL
;
4074 memory_object_deallocate(fourk_mem_obj
);
4075 fourk_mem_obj
= MEMORY_OBJECT_NULL
;
4078 assert(map_locked
== TRUE
);
4080 if (!keep_map_locked
) {
4086 * We can't hold the map lock if we enter this block.
4089 if (result
== KERN_SUCCESS
) {
4090 /* Wire down the new entry if the user
4091 * requested all new map entries be wired.
4093 if ((map
->wiring_required
) || (superpage_size
)) {
4094 assert(!keep_map_locked
);
4095 pmap_empty
= FALSE
; /* pmap won't be empty */
4096 kr
= vm_map_wire_kernel(map
, start
, end
,
4097 new_entry
->protection
, VM_KERN_MEMORY_MLOCK
,
4104 if (result
!= KERN_SUCCESS
) {
4105 if (new_mapping_established
) {
4107 * We have to get rid of the new mappings since we
4108 * won't make them available to the user.
4109 * Try and do that atomically, to minimize the risk
4110 * that someone else create new mappings that range.
4112 zap_new_map
= vm_map_create(PMAP_NULL
,
4115 map
->hdr
.entries_pageable
);
4116 vm_map_set_page_shift(zap_new_map
,
4117 VM_MAP_PAGE_SHIFT(map
));
4118 vm_map_disable_hole_optimization(zap_new_map
);
4124 (void) vm_map_delete(map
, *address
, *address
+ size
,
4125 (VM_MAP_REMOVE_SAVE_ENTRIES
|
4126 VM_MAP_REMOVE_NO_MAP_ALIGN
),
4129 if (zap_old_map
!= VM_MAP_NULL
&&
4130 zap_old_map
->hdr
.nentries
!= 0) {
4131 vm_map_entry_t entry1
, entry2
;
4134 * The new mapping failed. Attempt to restore
4135 * the old mappings, saved in the "zap_old_map".
4142 /* first check if the coast is still clear */
4143 start
= vm_map_first_entry(zap_old_map
)->vme_start
;
4144 end
= vm_map_last_entry(zap_old_map
)->vme_end
;
4145 if (vm_map_lookup_entry(map
, start
, &entry1
) ||
4146 vm_map_lookup_entry(map
, end
, &entry2
) ||
4149 * Part of that range has already been
4150 * re-mapped: we can't restore the old
4153 vm_map_enter_restore_failures
++;
4156 * Transfer the saved map entries from
4157 * "zap_old_map" to the original "map",
4158 * inserting them all after "entry1".
4160 for (entry2
= vm_map_first_entry(zap_old_map
);
4161 entry2
!= vm_map_to_entry(zap_old_map
);
4162 entry2
= vm_map_first_entry(zap_old_map
)) {
4163 vm_map_size_t entry_size
;
4165 entry_size
= (entry2
->vme_end
-
4167 vm_map_store_entry_unlink(zap_old_map
,
4169 zap_old_map
->size
-= entry_size
;
4170 vm_map_store_entry_link(map
, entry1
, entry2
,
4171 VM_MAP_KERNEL_FLAGS_NONE
);
4172 map
->size
+= entry_size
;
4175 if (map
->wiring_required
) {
4177 * XXX TODO: we should rewire the
4181 vm_map_enter_restore_successes
++;
4187 * The caller is responsible for releasing the lock if it requested to
4188 * keep the map locked.
4190 if (map_locked
&& !keep_map_locked
) {
4195 * Get rid of the "zap_maps" and all the map entries that
4196 * they may still contain.
4198 if (zap_old_map
!= VM_MAP_NULL
) {
4199 vm_map_destroy(zap_old_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
4200 zap_old_map
= VM_MAP_NULL
;
4202 if (zap_new_map
!= VM_MAP_NULL
) {
4203 vm_map_destroy(zap_new_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
4204 zap_new_map
= VM_MAP_NULL
;
4211 #endif /* __arm64__ */
4214 * Counters for the prefault optimization.
4216 int64_t vm_prefault_nb_pages
= 0;
4217 int64_t vm_prefault_nb_bailout
= 0;
4219 static kern_return_t
4220 vm_map_enter_mem_object_helper(
4221 vm_map_t target_map
,
4222 vm_map_offset_t
*address
,
4223 vm_map_size_t initial_size
,
4224 vm_map_offset_t mask
,
4226 vm_map_kernel_flags_t vmk_flags
,
4229 vm_object_offset_t offset
,
4231 vm_prot_t cur_protection
,
4232 vm_prot_t max_protection
,
4233 vm_inherit_t inheritance
,
4234 upl_page_list_ptr_t page_list
,
4235 unsigned int page_list_count
)
4237 vm_map_address_t map_addr
;
4238 vm_map_size_t map_size
;
4240 vm_object_size_t size
;
4241 kern_return_t result
;
4242 boolean_t mask_cur_protection
, mask_max_protection
;
4243 boolean_t kernel_prefault
, try_prefault
= (page_list_count
!= 0);
4244 vm_map_offset_t offset_in_mapping
= 0;
4246 boolean_t fourk
= vmk_flags
.vmkf_fourk
;
4247 #endif /* __arm64__ */
4249 if (VM_MAP_PAGE_SHIFT(target_map
) < PAGE_SHIFT
) {
4250 /* XXX TODO4K prefaulting depends on page size... */
4251 try_prefault
= FALSE
;
4254 assertf(vmk_flags
.__vmkf_unused
== 0, "vmk_flags unused=0x%x\n", vmk_flags
.__vmkf_unused
);
4256 mask_cur_protection
= cur_protection
& VM_PROT_IS_MASK
;
4257 mask_max_protection
= max_protection
& VM_PROT_IS_MASK
;
4258 cur_protection
&= ~VM_PROT_IS_MASK
;
4259 max_protection
&= ~VM_PROT_IS_MASK
;
4262 * Check arguments for validity
4264 if ((target_map
== VM_MAP_NULL
) ||
4265 (cur_protection
& ~VM_PROT_ALL
) ||
4266 (max_protection
& ~VM_PROT_ALL
) ||
4267 (inheritance
> VM_INHERIT_LAST_VALID
) ||
4268 (try_prefault
&& (copy
|| !page_list
)) ||
4269 initial_size
== 0) {
4270 return KERN_INVALID_ARGUMENT
;
4274 if (fourk
&& VM_MAP_PAGE_SHIFT(target_map
) < PAGE_SHIFT
) {
4275 /* no "fourk" if map is using a sub-page page size */
4279 map_addr
= vm_map_trunc_page(*address
, FOURK_PAGE_MASK
);
4280 map_size
= vm_map_round_page(initial_size
, FOURK_PAGE_MASK
);
4282 #endif /* __arm64__ */
4284 map_addr
= vm_map_trunc_page(*address
,
4285 VM_MAP_PAGE_MASK(target_map
));
4286 map_size
= vm_map_round_page(initial_size
,
4287 VM_MAP_PAGE_MASK(target_map
));
4289 size
= vm_object_round_page(initial_size
);
4292 * Find the vm object (if any) corresponding to this port.
4294 if (!IP_VALID(port
)) {
4295 object
= VM_OBJECT_NULL
;
4298 } else if (ip_kotype(port
) == IKOT_NAMED_ENTRY
) {
4299 vm_named_entry_t named_entry
;
4300 vm_object_offset_t data_offset
;
4302 named_entry
= (vm_named_entry_t
) ip_get_kobject(port
);
4304 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4305 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4306 data_offset
= named_entry
->data_offset
;
4307 offset
+= named_entry
->data_offset
;
4312 /* a few checks to make sure user is obeying rules */
4314 if (offset
>= named_entry
->size
) {
4315 return KERN_INVALID_RIGHT
;
4317 size
= named_entry
->size
- offset
;
4319 if (mask_max_protection
) {
4320 max_protection
&= named_entry
->protection
;
4322 if (mask_cur_protection
) {
4323 cur_protection
&= named_entry
->protection
;
4325 if ((named_entry
->protection
& max_protection
) !=
4327 return KERN_INVALID_RIGHT
;
4329 if ((named_entry
->protection
& cur_protection
) !=
4331 return KERN_INVALID_RIGHT
;
4333 if (offset
+ size
< offset
) {
4335 return KERN_INVALID_ARGUMENT
;
4337 if (named_entry
->size
< (offset
+ initial_size
)) {
4338 return KERN_INVALID_ARGUMENT
;
4341 if (named_entry
->is_copy
) {
4342 /* for a vm_map_copy, we can only map it whole */
4343 if ((size
!= named_entry
->size
) &&
4344 (vm_map_round_page(size
,
4345 VM_MAP_PAGE_MASK(target_map
)) ==
4346 named_entry
->size
)) {
4347 /* XXX FBDP use the rounded size... */
4348 size
= vm_map_round_page(
4350 VM_MAP_PAGE_MASK(target_map
));
4354 /* the callers parameter offset is defined to be the */
4355 /* offset from beginning of named entry offset in object */
4356 offset
= offset
+ named_entry
->offset
;
4358 if (!VM_MAP_PAGE_ALIGNED(size
,
4359 VM_MAP_PAGE_MASK(target_map
))) {
4361 * Let's not map more than requested;
4362 * vm_map_enter() will handle this "not map-aligned"
4368 named_entry_lock(named_entry
);
4369 if (named_entry
->is_sub_map
) {
4372 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4373 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4374 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4377 submap
= named_entry
->backing
.map
;
4378 vm_map_reference(submap
);
4379 named_entry_unlock(named_entry
);
4381 vmk_flags
.vmkf_submap
= TRUE
;
4383 result
= vm_map_enter(target_map
,
4390 (vm_object_t
)(uintptr_t) submap
,
4396 if (result
!= KERN_SUCCESS
) {
4397 vm_map_deallocate(submap
);
4400 * No need to lock "submap" just to check its
4401 * "mapped" flag: that flag is never reset
4402 * once it's been set and if we race, we'll
4403 * just end up setting it twice, which is OK.
4405 if (submap
->mapped_in_other_pmaps
== FALSE
&&
4406 vm_map_pmap(submap
) != PMAP_NULL
&&
4407 vm_map_pmap(submap
) !=
4408 vm_map_pmap(target_map
)) {
4410 * This submap is being mapped in a map
4411 * that uses a different pmap.
4412 * Set its "mapped_in_other_pmaps" flag
4413 * to indicate that we now need to
4414 * remove mappings from all pmaps rather
4415 * than just the submap's pmap.
4417 vm_map_lock(submap
);
4418 submap
->mapped_in_other_pmaps
= TRUE
;
4419 vm_map_unlock(submap
);
4421 *address
= map_addr
;
4424 } else if (named_entry
->is_copy
) {
4426 vm_map_copy_t copy_map
;
4427 vm_map_entry_t copy_entry
;
4428 vm_map_offset_t copy_addr
;
4429 vm_map_copy_t target_copy_map
;
4430 vm_map_offset_t overmap_start
, overmap_end
;
4431 vm_map_offset_t trimmed_start
;
4432 vm_map_size_t target_size
;
4434 if (flags
& ~(VM_FLAGS_FIXED
|
4436 VM_FLAGS_OVERWRITE
|
4437 VM_FLAGS_RETURN_4K_DATA_ADDR
|
4438 VM_FLAGS_RETURN_DATA_ADDR
|
4439 VM_FLAGS_ALIAS_MASK
)) {
4440 named_entry_unlock(named_entry
);
4441 return KERN_INVALID_ARGUMENT
;
4444 copy_map
= named_entry
->backing
.copy
;
4445 assert(copy_map
->type
== VM_MAP_COPY_ENTRY_LIST
);
4446 if (copy_map
->type
!= VM_MAP_COPY_ENTRY_LIST
) {
4447 /* unsupported type; should not happen */
4448 printf("vm_map_enter_mem_object: "
4449 "memory_entry->backing.copy "
4450 "unsupported type 0x%x\n",
4452 named_entry_unlock(named_entry
);
4453 return KERN_INVALID_ARGUMENT
;
4456 if (VM_MAP_PAGE_SHIFT(target_map
) != copy_map
->cpy_hdr
.page_shift
) {
4457 DEBUG4K_SHARE("copy_map %p offset %llx size 0x%llx pgshift %d -> target_map %p pgshift %d\n", copy_map
, offset
, (uint64_t)map_size
, copy_map
->cpy_hdr
.page_shift
, target_map
, VM_MAP_PAGE_SHIFT(target_map
));
4460 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4461 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4462 offset_in_mapping
= offset
& VM_MAP_PAGE_MASK(target_map
);
4463 if (flags
& VM_FLAGS_RETURN_4K_DATA_ADDR
) {
4464 offset_in_mapping
&= ~((signed)(0xFFF));
4468 target_copy_map
= VM_MAP_COPY_NULL
;
4469 target_size
= copy_map
->size
;
4473 if (copy_map
->cpy_hdr
.page_shift
!= VM_MAP_PAGE_SHIFT(target_map
)) {
4474 DEBUG4K_ADJUST("adjusting...\n");
4475 kr
= vm_map_copy_adjust_to_target(
4477 offset
/* includes data_offset */,
4485 if (kr
!= KERN_SUCCESS
) {
4486 named_entry_unlock(named_entry
);
4489 target_size
= target_copy_map
->size
;
4490 if (trimmed_start
>= data_offset
) {
4491 data_offset
= offset
& VM_MAP_PAGE_MASK(target_map
);
4493 data_offset
-= trimmed_start
;
4496 target_copy_map
= copy_map
;
4499 /* reserve a contiguous range */
4500 kr
= vm_map_enter(target_map
,
4502 vm_map_round_page(target_size
, VM_MAP_PAGE_MASK(target_map
)),
4504 flags
& (VM_FLAGS_ANYWHERE
|
4505 VM_FLAGS_OVERWRITE
|
4506 VM_FLAGS_RETURN_4K_DATA_ADDR
|
4507 VM_FLAGS_RETURN_DATA_ADDR
),
4516 if (kr
!= KERN_SUCCESS
) {
4517 DEBUG4K_ERROR("kr 0x%x\n", kr
);
4518 if (target_copy_map
!= copy_map
) {
4519 vm_map_copy_discard(target_copy_map
);
4520 target_copy_map
= VM_MAP_COPY_NULL
;
4522 named_entry_unlock(named_entry
);
4526 copy_addr
= map_addr
;
4528 for (copy_entry
= vm_map_copy_first_entry(target_copy_map
);
4529 copy_entry
!= vm_map_copy_to_entry(target_copy_map
);
4530 copy_entry
= copy_entry
->vme_next
) {
4532 vm_map_kernel_flags_t vmk_remap_flags
;
4533 vm_map_t copy_submap
;
4534 vm_object_t copy_object
;
4535 vm_map_size_t copy_size
;
4536 vm_object_offset_t copy_offset
;
4540 vmk_remap_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
4542 copy_object
= VME_OBJECT(copy_entry
);
4543 copy_offset
= VME_OFFSET(copy_entry
);
4544 copy_size
= (copy_entry
->vme_end
-
4545 copy_entry
->vme_start
);
4546 VM_GET_FLAGS_ALIAS(flags
, copy_vm_alias
);
4547 if (copy_vm_alias
== 0) {
4549 * Caller does not want a specific
4550 * alias for this new mapping: use
4551 * the alias of the original mapping.
4553 copy_vm_alias
= VME_ALIAS(copy_entry
);
4557 if ((copy_addr
+ copy_size
) >
4559 overmap_start
+ overmap_end
+
4560 named_entry
->size
/* XXX full size */)) {
4561 /* over-mapping too much !? */
4562 kr
= KERN_INVALID_ARGUMENT
;
4563 DEBUG4K_ERROR("kr 0x%x\n", kr
);
4568 /* take a reference on the object */
4569 if (copy_entry
->is_sub_map
) {
4570 vmk_remap_flags
.vmkf_submap
= TRUE
;
4571 copy_submap
= VME_SUBMAP(copy_entry
);
4572 vm_map_lock(copy_submap
);
4573 vm_map_reference(copy_submap
);
4574 vm_map_unlock(copy_submap
);
4575 copy_object
= (vm_object_t
)(uintptr_t) copy_submap
;
4577 copy_object
!= VM_OBJECT_NULL
&&
4578 (copy_entry
->needs_copy
||
4579 copy_object
->shadowed
||
4580 (!copy_object
->true_share
&&
4581 !copy_entry
->is_shared
&&
4582 copy_object
->vo_size
> copy_size
))) {
4584 * We need to resolve our side of this
4585 * "symmetric" copy-on-write now; we
4586 * need a new object to map and share,
4587 * instead of the current one which
4588 * might still be shared with the
4591 * Note: A "vm_map_copy_t" does not
4592 * have a lock but we're protected by
4593 * the named entry's lock here.
4595 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4596 VME_OBJECT_SHADOW(copy_entry
, copy_size
);
4597 if (!copy_entry
->needs_copy
&&
4598 copy_entry
->protection
& VM_PROT_WRITE
) {
4601 prot
= copy_entry
->protection
& ~VM_PROT_WRITE
;
4602 vm_object_pmap_protect(copy_object
,
4611 copy_entry
->needs_copy
= FALSE
;
4612 copy_entry
->is_shared
= TRUE
;
4613 copy_object
= VME_OBJECT(copy_entry
);
4614 copy_offset
= VME_OFFSET(copy_entry
);
4615 vm_object_lock(copy_object
);
4616 vm_object_reference_locked(copy_object
);
4617 if (copy_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
) {
4618 /* we're about to make a shared mapping of this object */
4619 copy_object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
4620 copy_object
->true_share
= TRUE
;
4622 vm_object_unlock(copy_object
);
4625 * We already have the right object
4628 copy_object
= VME_OBJECT(copy_entry
);
4629 vm_object_reference(copy_object
);
4632 /* over-map the object into destination */
4633 remap_flags
|= flags
;
4634 remap_flags
|= VM_FLAGS_FIXED
;
4635 remap_flags
|= VM_FLAGS_OVERWRITE
;
4636 remap_flags
&= ~VM_FLAGS_ANYWHERE
;
4637 if (!copy
&& !copy_entry
->is_sub_map
) {
4639 * copy-on-write should have been
4640 * resolved at this point, or we would
4641 * end up sharing instead of copying.
4643 assert(!copy_entry
->needs_copy
);
4645 #if XNU_TARGET_OS_OSX
4646 if (copy_entry
->used_for_jit
) {
4647 vmk_remap_flags
.vmkf_map_jit
= TRUE
;
4649 #endif /* XNU_TARGET_OS_OSX */
4651 assertf((copy_vm_alias
& VME_ALIAS_MASK
) == copy_vm_alias
,
4652 "VM Tag truncated from 0x%x to 0x%x\n", copy_vm_alias
, (copy_vm_alias
& VME_ALIAS_MASK
));
4653 kr
= vm_map_enter(target_map
,
4656 (vm_map_offset_t
) 0,
4659 (vm_tag_t
) copy_vm_alias
, /* see comment at end of vm_fault_unwire re. cast*/
4662 ((copy_object
== NULL
) ? FALSE
: copy
),
4666 if (kr
!= KERN_SUCCESS
) {
4667 DEBUG4K_SHARE("failed kr 0x%x\n", kr
);
4668 if (copy_entry
->is_sub_map
) {
4669 vm_map_deallocate(copy_submap
);
4671 vm_object_deallocate(copy_object
);
4678 copy_addr
+= copy_size
;
4681 if (kr
== KERN_SUCCESS
) {
4682 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4683 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4684 *address
= map_addr
+ offset_in_mapping
;
4686 *address
= map_addr
;
4688 if (overmap_start
) {
4689 *address
+= overmap_start
;
4690 DEBUG4K_SHARE("map %p map_addr 0x%llx offset_in_mapping 0x%llx overmap_start 0x%llx -> *address 0x%llx\n", target_map
, (uint64_t)map_addr
, (uint64_t) offset_in_mapping
, (uint64_t)overmap_start
, (uint64_t)*address
);
4693 named_entry_unlock(named_entry
);
4694 if (target_copy_map
!= copy_map
) {
4695 vm_map_copy_discard(target_copy_map
);
4696 target_copy_map
= VM_MAP_COPY_NULL
;
4699 if (kr
!= KERN_SUCCESS
) {
4700 if (!(flags
& VM_FLAGS_OVERWRITE
)) {
4701 /* deallocate the contiguous range */
4702 (void) vm_deallocate(target_map
,
4711 if (named_entry
->is_object
) {
4712 unsigned int access
;
4713 vm_prot_t protections
;
4714 unsigned int wimg_mode
;
4716 /* we are mapping a VM object */
4718 protections
= named_entry
->protection
& VM_PROT_ALL
;
4719 access
= GET_MAP_MEM(named_entry
->protection
);
4721 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4722 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4723 offset_in_mapping
= offset
- VM_MAP_TRUNC_PAGE(offset
, VM_MAP_PAGE_MASK(target_map
));
4724 if (flags
& VM_FLAGS_RETURN_4K_DATA_ADDR
) {
4725 offset_in_mapping
&= ~((signed)(0xFFF));
4727 offset
= VM_MAP_TRUNC_PAGE(offset
, VM_MAP_PAGE_MASK(target_map
));
4728 map_size
= VM_MAP_ROUND_PAGE((offset
+ offset_in_mapping
+ initial_size
) - offset
, VM_MAP_PAGE_MASK(target_map
));
4731 object
= vm_named_entry_to_vm_object(named_entry
);
4732 assert(object
!= VM_OBJECT_NULL
);
4733 vm_object_lock(object
);
4734 named_entry_unlock(named_entry
);
4736 vm_object_reference_locked(object
);
4738 wimg_mode
= object
->wimg_bits
;
4739 vm_prot_to_wimg(access
, &wimg_mode
);
4740 if (object
->wimg_bits
!= wimg_mode
) {
4741 vm_object_change_wimg_mode(object
, wimg_mode
);
4744 vm_object_unlock(object
);
4746 panic("invalid VM named entry %p", named_entry
);
4748 } else if (ip_kotype(port
) == IKOT_MEMORY_OBJECT
) {
4750 * JMM - This is temporary until we unify named entries
4751 * and raw memory objects.
4753 * Detected fake ip_kotype for a memory object. In
4754 * this case, the port isn't really a port at all, but
4755 * instead is just a raw memory object.
4757 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4758 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4759 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4762 object
= memory_object_to_vm_object((memory_object_t
)port
);
4763 if (object
== VM_OBJECT_NULL
) {
4764 return KERN_INVALID_OBJECT
;
4766 vm_object_reference(object
);
4768 /* wait for object (if any) to be ready */
4769 if (object
!= VM_OBJECT_NULL
) {
4770 if (object
== kernel_object
) {
4771 printf("Warning: Attempt to map kernel object"
4772 " by a non-private kernel entity\n");
4773 return KERN_INVALID_OBJECT
;
4775 if (!object
->pager_ready
) {
4776 vm_object_lock(object
);
4778 while (!object
->pager_ready
) {
4779 vm_object_wait(object
,
4780 VM_OBJECT_EVENT_PAGER_READY
,
4782 vm_object_lock(object
);
4784 vm_object_unlock(object
);
4788 return KERN_INVALID_OBJECT
;
4791 if (object
!= VM_OBJECT_NULL
&&
4793 object
->pager
!= MEMORY_OBJECT_NULL
&&
4794 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
4795 memory_object_t pager
;
4796 vm_prot_t pager_prot
;
4800 * For "named" VM objects, let the pager know that the
4801 * memory object is being mapped. Some pagers need to keep
4802 * track of this, to know when they can reclaim the memory
4803 * object, for example.
4804 * VM calls memory_object_map() for each mapping (specifying
4805 * the protection of each mapping) and calls
4806 * memory_object_last_unmap() when all the mappings are gone.
4808 pager_prot
= max_protection
;
4811 * Copy-On-Write mapping: won't modify the
4814 pager_prot
&= ~VM_PROT_WRITE
;
4816 vm_object_lock(object
);
4817 pager
= object
->pager
;
4818 if (object
->named
&&
4819 pager
!= MEMORY_OBJECT_NULL
&&
4820 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
4821 assert(object
->pager_ready
);
4822 vm_object_mapping_wait(object
, THREAD_UNINT
);
4823 vm_object_mapping_begin(object
);
4824 vm_object_unlock(object
);
4826 kr
= memory_object_map(pager
, pager_prot
);
4827 assert(kr
== KERN_SUCCESS
);
4829 vm_object_lock(object
);
4830 vm_object_mapping_end(object
);
4832 vm_object_unlock(object
);
4836 * Perform the copy if requested
4840 vm_object_t new_object
;
4841 vm_object_offset_t new_offset
;
4843 result
= vm_object_copy_strategically(object
, offset
,
4845 &new_object
, &new_offset
,
4849 if (result
== KERN_MEMORY_RESTART_COPY
) {
4851 boolean_t src_needs_copy
;
4855 * We currently ignore src_needs_copy.
4856 * This really is the issue of how to make
4857 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4858 * non-kernel users to use. Solution forthcoming.
4859 * In the meantime, since we don't allow non-kernel
4860 * memory managers to specify symmetric copy,
4861 * we won't run into problems here.
4863 new_object
= object
;
4864 new_offset
= offset
;
4865 success
= vm_object_copy_quickly(&new_object
,
4871 result
= KERN_SUCCESS
;
4874 * Throw away the reference to the
4875 * original object, as it won't be mapped.
4878 vm_object_deallocate(object
);
4880 if (result
!= KERN_SUCCESS
) {
4884 object
= new_object
;
4885 offset
= new_offset
;
4889 * If non-kernel users want to try to prefault pages, the mapping and prefault
4890 * needs to be atomic.
4892 kernel_prefault
= (try_prefault
&& vm_kernel_map_is_kernel(target_map
));
4893 vmk_flags
.vmkf_keep_map_locked
= (try_prefault
&& !kernel_prefault
);
4897 /* map this object in a "4K" pager */
4898 result
= vm_map_enter_fourk(target_map
,
4901 (vm_map_offset_t
) mask
,
4912 #endif /* __arm64__ */
4914 result
= vm_map_enter(target_map
,
4915 &map_addr
, map_size
,
4916 (vm_map_offset_t
)mask
,
4922 cur_protection
, max_protection
,
4925 if (result
!= KERN_SUCCESS
) {
4926 vm_object_deallocate(object
);
4930 * Try to prefault, and do not forget to release the vm map lock.
4932 if (result
== KERN_SUCCESS
&& try_prefault
) {
4933 mach_vm_address_t va
= map_addr
;
4934 kern_return_t kr
= KERN_SUCCESS
;
4938 pmap_options
= kernel_prefault
? 0 : PMAP_OPTIONS_NOWAIT
;
4939 if (object
->internal
) {
4940 pmap_options
|= PMAP_OPTIONS_INTERNAL
;
4943 for (i
= 0; i
< page_list_count
; ++i
) {
4944 if (!UPL_VALID_PAGE(page_list
, i
)) {
4945 if (kernel_prefault
) {
4946 assertf(FALSE
, "kernel_prefault && !UPL_VALID_PAGE");
4947 result
= KERN_MEMORY_ERROR
;
4952 * If this function call failed, we should stop
4953 * trying to optimize, other calls are likely
4954 * going to fail too.
4956 * We are not gonna report an error for such
4957 * failure though. That's an optimization, not
4958 * something critical.
4960 kr
= pmap_enter_options(target_map
->pmap
,
4961 va
, UPL_PHYS_PAGE(page_list
, i
),
4962 cur_protection
, VM_PROT_NONE
,
4963 0, TRUE
, pmap_options
, NULL
);
4964 if (kr
!= KERN_SUCCESS
) {
4965 OSIncrementAtomic64(&vm_prefault_nb_bailout
);
4966 if (kernel_prefault
) {
4971 OSIncrementAtomic64(&vm_prefault_nb_pages
);
4974 /* Next virtual address */
4977 if (vmk_flags
.vmkf_keep_map_locked
) {
4978 vm_map_unlock(target_map
);
4982 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4983 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4984 *address
= map_addr
+ offset_in_mapping
;
4986 *address
= map_addr
;
4992 vm_map_enter_mem_object(
4993 vm_map_t target_map
,
4994 vm_map_offset_t
*address
,
4995 vm_map_size_t initial_size
,
4996 vm_map_offset_t mask
,
4998 vm_map_kernel_flags_t vmk_flags
,
5001 vm_object_offset_t offset
,
5003 vm_prot_t cur_protection
,
5004 vm_prot_t max_protection
,
5005 vm_inherit_t inheritance
)
5009 ret
= vm_map_enter_mem_object_helper(target_map
,
5026 if (ret
== KERN_SUCCESS
&& address
&& target_map
->pmap
== kernel_pmap
) {
5027 kasan_notify_address(*address
, initial_size
);
5035 vm_map_enter_mem_object_prefault(
5036 vm_map_t target_map
,
5037 vm_map_offset_t
*address
,
5038 vm_map_size_t initial_size
,
5039 vm_map_offset_t mask
,
5041 vm_map_kernel_flags_t vmk_flags
,
5044 vm_object_offset_t offset
,
5045 vm_prot_t cur_protection
,
5046 vm_prot_t max_protection
,
5047 upl_page_list_ptr_t page_list
,
5048 unsigned int page_list_count
)
5052 ret
= vm_map_enter_mem_object_helper(target_map
,
5069 if (ret
== KERN_SUCCESS
&& address
&& target_map
->pmap
== kernel_pmap
) {
5070 kasan_notify_address(*address
, initial_size
);
5079 vm_map_enter_mem_object_control(
5080 vm_map_t target_map
,
5081 vm_map_offset_t
*address
,
5082 vm_map_size_t initial_size
,
5083 vm_map_offset_t mask
,
5085 vm_map_kernel_flags_t vmk_flags
,
5087 memory_object_control_t control
,
5088 vm_object_offset_t offset
,
5090 vm_prot_t cur_protection
,
5091 vm_prot_t max_protection
,
5092 vm_inherit_t inheritance
)
5094 vm_map_address_t map_addr
;
5095 vm_map_size_t map_size
;
5097 vm_object_size_t size
;
5098 kern_return_t result
;
5099 memory_object_t pager
;
5100 vm_prot_t pager_prot
;
5103 boolean_t fourk
= vmk_flags
.vmkf_fourk
;
5104 #endif /* __arm64__ */
5107 * Check arguments for validity
5109 if ((target_map
== VM_MAP_NULL
) ||
5110 (cur_protection
& ~VM_PROT_ALL
) ||
5111 (max_protection
& ~VM_PROT_ALL
) ||
5112 (inheritance
> VM_INHERIT_LAST_VALID
) ||
5113 initial_size
== 0) {
5114 return KERN_INVALID_ARGUMENT
;
5118 if (fourk
&& VM_MAP_PAGE_MASK(target_map
) < PAGE_MASK
) {
5123 map_addr
= vm_map_trunc_page(*address
,
5125 map_size
= vm_map_round_page(initial_size
,
5128 #endif /* __arm64__ */
5130 map_addr
= vm_map_trunc_page(*address
,
5131 VM_MAP_PAGE_MASK(target_map
));
5132 map_size
= vm_map_round_page(initial_size
,
5133 VM_MAP_PAGE_MASK(target_map
));
5135 size
= vm_object_round_page(initial_size
);
5137 object
= memory_object_control_to_vm_object(control
);
5139 if (object
== VM_OBJECT_NULL
) {
5140 return KERN_INVALID_OBJECT
;
5143 if (object
== kernel_object
) {
5144 printf("Warning: Attempt to map kernel object"
5145 " by a non-private kernel entity\n");
5146 return KERN_INVALID_OBJECT
;
5149 vm_object_lock(object
);
5150 object
->ref_count
++;
5151 vm_object_res_reference(object
);
5154 * For "named" VM objects, let the pager know that the
5155 * memory object is being mapped. Some pagers need to keep
5156 * track of this, to know when they can reclaim the memory
5157 * object, for example.
5158 * VM calls memory_object_map() for each mapping (specifying
5159 * the protection of each mapping) and calls
5160 * memory_object_last_unmap() when all the mappings are gone.
5162 pager_prot
= max_protection
;
5164 pager_prot
&= ~VM_PROT_WRITE
;
5166 pager
= object
->pager
;
5167 if (object
->named
&&
5168 pager
!= MEMORY_OBJECT_NULL
&&
5169 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
5170 assert(object
->pager_ready
);
5171 vm_object_mapping_wait(object
, THREAD_UNINT
);
5172 vm_object_mapping_begin(object
);
5173 vm_object_unlock(object
);
5175 kr
= memory_object_map(pager
, pager_prot
);
5176 assert(kr
== KERN_SUCCESS
);
5178 vm_object_lock(object
);
5179 vm_object_mapping_end(object
);
5181 vm_object_unlock(object
);
5184 * Perform the copy if requested
5188 vm_object_t new_object
;
5189 vm_object_offset_t new_offset
;
5191 result
= vm_object_copy_strategically(object
, offset
, size
,
5192 &new_object
, &new_offset
,
5196 if (result
== KERN_MEMORY_RESTART_COPY
) {
5198 boolean_t src_needs_copy
;
5202 * We currently ignore src_needs_copy.
5203 * This really is the issue of how to make
5204 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
5205 * non-kernel users to use. Solution forthcoming.
5206 * In the meantime, since we don't allow non-kernel
5207 * memory managers to specify symmetric copy,
5208 * we won't run into problems here.
5210 new_object
= object
;
5211 new_offset
= offset
;
5212 success
= vm_object_copy_quickly(&new_object
,
5217 result
= KERN_SUCCESS
;
5220 * Throw away the reference to the
5221 * original object, as it won't be mapped.
5224 vm_object_deallocate(object
);
5226 if (result
!= KERN_SUCCESS
) {
5230 object
= new_object
;
5231 offset
= new_offset
;
5236 result
= vm_map_enter_fourk(target_map
,
5239 (vm_map_offset_t
)mask
,
5245 cur_protection
, max_protection
,
5248 #endif /* __arm64__ */
5250 result
= vm_map_enter(target_map
,
5251 &map_addr
, map_size
,
5252 (vm_map_offset_t
)mask
,
5258 cur_protection
, max_protection
,
5261 if (result
!= KERN_SUCCESS
) {
5262 vm_object_deallocate(object
);
5264 *address
= map_addr
;
5273 extern pmap_paddr_t avail_start
, avail_end
;
5277 * Allocate memory in the specified map, with the caveat that
5278 * the memory is physically contiguous. This call may fail
5279 * if the system can't find sufficient contiguous memory.
5280 * This call may cause or lead to heart-stopping amounts of
5283 * Memory obtained from this call should be freed in the
5284 * normal way, viz., via vm_deallocate.
5289 vm_map_offset_t
*addr
,
5293 vm_object_t cpm_obj
;
5297 vm_map_offset_t va
, start
, end
, offset
;
5299 vm_map_offset_t prev_addr
= 0;
5300 #endif /* MACH_ASSERT */
5302 boolean_t anywhere
= ((VM_FLAGS_ANYWHERE
& flags
) != 0);
5305 if (VM_MAP_PAGE_SHIFT(map
) != PAGE_SHIFT
) {
5306 /* XXX TODO4K do we need to support this? */
5308 return KERN_NOT_SUPPORTED
;
5311 VM_GET_FLAGS_ALIAS(flags
, tag
);
5315 return KERN_SUCCESS
;
5318 *addr
= vm_map_min(map
);
5320 *addr
= vm_map_trunc_page(*addr
,
5321 VM_MAP_PAGE_MASK(map
));
5323 size
= vm_map_round_page(size
,
5324 VM_MAP_PAGE_MASK(map
));
5327 * LP64todo - cpm_allocate should probably allow
5328 * allocations of >4GB, but not with the current
5329 * algorithm, so just cast down the size for now.
5331 if (size
> VM_MAX_ADDRESS
) {
5332 return KERN_RESOURCE_SHORTAGE
;
5334 if ((kr
= cpm_allocate(CAST_DOWN(vm_size_t
, size
),
5335 &pages
, 0, 0, TRUE
, flags
)) != KERN_SUCCESS
) {
5339 cpm_obj
= vm_object_allocate((vm_object_size_t
)size
);
5340 assert(cpm_obj
!= VM_OBJECT_NULL
);
5341 assert(cpm_obj
->internal
);
5342 assert(cpm_obj
->vo_size
== (vm_object_size_t
)size
);
5343 assert(cpm_obj
->can_persist
== FALSE
);
5344 assert(cpm_obj
->pager_created
== FALSE
);
5345 assert(cpm_obj
->pageout
== FALSE
);
5346 assert(cpm_obj
->shadow
== VM_OBJECT_NULL
);
5349 * Insert pages into object.
5352 vm_object_lock(cpm_obj
);
5353 for (offset
= 0; offset
< size
; offset
+= PAGE_SIZE
) {
5355 pages
= NEXT_PAGE(m
);
5356 *(NEXT_PAGE_PTR(m
)) = VM_PAGE_NULL
;
5358 assert(!m
->vmp_gobbled
);
5359 assert(!m
->vmp_wanted
);
5360 assert(!m
->vmp_pageout
);
5361 assert(!m
->vmp_tabled
);
5362 assert(VM_PAGE_WIRED(m
));
5363 assert(m
->vmp_busy
);
5364 assert(VM_PAGE_GET_PHYS_PAGE(m
) >= (avail_start
>> PAGE_SHIFT
) && VM_PAGE_GET_PHYS_PAGE(m
) <= (avail_end
>> PAGE_SHIFT
));
5366 m
->vmp_busy
= FALSE
;
5367 vm_page_insert(m
, cpm_obj
, offset
);
5369 assert(cpm_obj
->resident_page_count
== size
/ PAGE_SIZE
);
5370 vm_object_unlock(cpm_obj
);
5373 * Hang onto a reference on the object in case a
5374 * multi-threaded application for some reason decides
5375 * to deallocate the portion of the address space into
5376 * which we will insert this object.
5378 * Unfortunately, we must insert the object now before
5379 * we can talk to the pmap module about which addresses
5380 * must be wired down. Hence, the race with a multi-
5383 vm_object_reference(cpm_obj
);
5386 * Insert object into map.
5395 VM_MAP_KERNEL_FLAGS_NONE
,
5397 (vm_object_offset_t
)0,
5401 VM_INHERIT_DEFAULT
);
5403 if (kr
!= KERN_SUCCESS
) {
5405 * A CPM object doesn't have can_persist set,
5406 * so all we have to do is deallocate it to
5407 * free up these pages.
5409 assert(cpm_obj
->pager_created
== FALSE
);
5410 assert(cpm_obj
->can_persist
== FALSE
);
5411 assert(cpm_obj
->pageout
== FALSE
);
5412 assert(cpm_obj
->shadow
== VM_OBJECT_NULL
);
5413 vm_object_deallocate(cpm_obj
); /* kill acquired ref */
5414 vm_object_deallocate(cpm_obj
); /* kill creation ref */
5418 * Inform the physical mapping system that the
5419 * range of addresses may not fault, so that
5420 * page tables and such can be locked down as well.
5424 pmap
= vm_map_pmap(map
);
5425 pmap_pageable(pmap
, start
, end
, FALSE
);
5428 * Enter each page into the pmap, to avoid faults.
5429 * Note that this loop could be coded more efficiently,
5430 * if the need arose, rather than looking up each page
5433 for (offset
= 0, va
= start
; offset
< size
;
5434 va
+= PAGE_SIZE
, offset
+= PAGE_SIZE
) {
5437 vm_object_lock(cpm_obj
);
5438 m
= vm_page_lookup(cpm_obj
, (vm_object_offset_t
)offset
);
5439 assert(m
!= VM_PAGE_NULL
);
5441 vm_page_zero_fill(m
);
5443 type_of_fault
= DBG_ZERO_FILL_FAULT
;
5445 vm_fault_enter(m
, pmap
, va
,
5447 VM_PROT_ALL
, VM_PROT_WRITE
,
5449 FALSE
, /* change_wiring */
5450 VM_KERN_MEMORY_NONE
, /* tag - not wiring */
5451 FALSE
, /* no_cache */
5452 FALSE
, /* cs_bypass */
5454 0, /* pmap_options */
5455 NULL
, /* need_retry */
5458 vm_object_unlock(cpm_obj
);
5463 * Verify ordering in address space.
5465 for (offset
= 0; offset
< size
; offset
+= PAGE_SIZE
) {
5466 vm_object_lock(cpm_obj
);
5467 m
= vm_page_lookup(cpm_obj
, (vm_object_offset_t
)offset
);
5468 vm_object_unlock(cpm_obj
);
5469 if (m
== VM_PAGE_NULL
) {
5470 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
5471 cpm_obj
, (uint64_t)offset
);
5473 assert(m
->vmp_tabled
);
5474 assert(!m
->vmp_busy
);
5475 assert(!m
->vmp_wanted
);
5476 assert(!m
->vmp_fictitious
);
5477 assert(!m
->vmp_private
);
5478 assert(!m
->vmp_absent
);
5479 assert(!m
->vmp_error
);
5480 assert(!m
->vmp_cleaning
);
5481 assert(!m
->vmp_laundry
);
5482 assert(!m
->vmp_precious
);
5483 assert(!m
->vmp_clustered
);
5485 if (VM_PAGE_GET_PHYS_PAGE(m
) != prev_addr
+ 1) {
5486 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5487 (uint64_t)start
, (uint64_t)end
, (uint64_t)va
);
5488 printf("obj %p off 0x%llx\n", cpm_obj
, (uint64_t)offset
);
5489 printf("m %p prev_address 0x%llx\n", m
, (uint64_t)prev_addr
);
5490 panic("vm_allocate_cpm: pages not contig!");
5493 prev_addr
= VM_PAGE_GET_PHYS_PAGE(m
);
5495 #endif /* MACH_ASSERT */
5497 vm_object_deallocate(cpm_obj
); /* kill extra ref */
5506 * Interface is defined in all cases, but unless the kernel
5507 * is built explicitly for this option, the interface does
5513 __unused vm_map_t map
,
5514 __unused vm_map_offset_t
*addr
,
5515 __unused vm_map_size_t size
,
5518 return KERN_FAILURE
;
5522 /* Not used without nested pmaps */
5523 #ifndef NO_NESTED_PMAP
5525 * Clip and unnest a portion of a nested submap mapping.
5532 vm_map_entry_t entry
,
5533 vm_map_offset_t start_unnest
,
5534 vm_map_offset_t end_unnest
)
5536 vm_map_offset_t old_start_unnest
= start_unnest
;
5537 vm_map_offset_t old_end_unnest
= end_unnest
;
5539 assert(entry
->is_sub_map
);
5540 assert(VME_SUBMAP(entry
) != NULL
);
5541 assert(entry
->use_pmap
);
5544 * Query the platform for the optimal unnest range.
5545 * DRK: There's some duplication of effort here, since
5546 * callers may have adjusted the range to some extent. This
5547 * routine was introduced to support 1GiB subtree nesting
5548 * for x86 platforms, which can also nest on 2MiB boundaries
5549 * depending on size/alignment.
5551 if (pmap_adjust_unnest_parameters(map
->pmap
, &start_unnest
, &end_unnest
)) {
5552 assert(VME_SUBMAP(entry
)->is_nested_map
);
5553 assert(!VME_SUBMAP(entry
)->disable_vmentry_reuse
);
5554 log_unnest_badness(map
,
5557 VME_SUBMAP(entry
)->is_nested_map
,
5559 VME_SUBMAP(entry
)->lowest_unnestable_start
-
5560 VME_OFFSET(entry
)));
5563 if (entry
->vme_start
> start_unnest
||
5564 entry
->vme_end
< end_unnest
) {
5565 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5566 "bad nested entry: start=0x%llx end=0x%llx\n",
5567 (long long)start_unnest
, (long long)end_unnest
,
5568 (long long)entry
->vme_start
, (long long)entry
->vme_end
);
5571 if (start_unnest
> entry
->vme_start
) {
5572 _vm_map_clip_start(&map
->hdr
,
5575 if (map
->holelistenabled
) {
5576 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5578 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5581 if (entry
->vme_end
> end_unnest
) {
5582 _vm_map_clip_end(&map
->hdr
,
5585 if (map
->holelistenabled
) {
5586 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5588 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5592 pmap_unnest(map
->pmap
,
5594 entry
->vme_end
- entry
->vme_start
);
5595 if ((map
->mapped_in_other_pmaps
) && os_ref_get_count(&map
->map_refcnt
) != 0) {
5596 /* clean up parent map/maps */
5597 vm_map_submap_pmap_clean(
5598 map
, entry
->vme_start
,
5603 entry
->use_pmap
= FALSE
;
5604 if ((map
->pmap
!= kernel_pmap
) &&
5605 (VME_ALIAS(entry
) == VM_MEMORY_SHARED_PMAP
)) {
5606 VME_ALIAS_SET(entry
, VM_MEMORY_UNSHARED_PMAP
);
5609 #endif /* NO_NESTED_PMAP */
5612 * vm_map_clip_start: [ internal use only ]
5614 * Asserts that the given entry begins at or after
5615 * the specified address; if necessary,
5616 * it splits the entry into two.
5621 vm_map_entry_t entry
,
5622 vm_map_offset_t startaddr
)
5624 #ifndef NO_NESTED_PMAP
5625 if (entry
->is_sub_map
&&
5627 startaddr
>= entry
->vme_start
) {
5628 vm_map_offset_t start_unnest
, end_unnest
;
5631 * Make sure "startaddr" is no longer in a nested range
5632 * before we clip. Unnest only the minimum range the platform
5634 * vm_map_clip_unnest may perform additional adjustments to
5637 start_unnest
= startaddr
& ~(pmap_shared_region_size_min(map
->pmap
) - 1);
5638 end_unnest
= start_unnest
+ pmap_shared_region_size_min(map
->pmap
);
5639 vm_map_clip_unnest(map
, entry
, start_unnest
, end_unnest
);
5641 #endif /* NO_NESTED_PMAP */
5642 if (startaddr
> entry
->vme_start
) {
5643 if (VME_OBJECT(entry
) &&
5644 !entry
->is_sub_map
&&
5645 VME_OBJECT(entry
)->phys_contiguous
) {
5646 pmap_remove(map
->pmap
,
5647 (addr64_t
)(entry
->vme_start
),
5648 (addr64_t
)(entry
->vme_end
));
5650 if (entry
->vme_atomic
) {
5651 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map
, entry
);
5657 vm_map_offset_t
, entry
->vme_start
,
5658 vm_map_offset_t
, entry
->vme_end
,
5659 vm_map_offset_t
, startaddr
,
5660 int, VME_ALIAS(entry
));
5662 _vm_map_clip_start(&map
->hdr
, entry
, startaddr
);
5663 if (map
->holelistenabled
) {
5664 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5666 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5672 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5674 if ((startaddr) > (entry)->vme_start) \
5675 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5679 * This routine is called only when it is known that
5680 * the entry must be split.
5684 struct vm_map_header
*map_header
,
5685 vm_map_entry_t entry
,
5686 vm_map_offset_t start
)
5688 vm_map_entry_t new_entry
;
5691 * Split off the front portion --
5692 * note that we must insert the new
5693 * entry BEFORE this one, so that
5694 * this entry has the specified starting
5698 if (entry
->map_aligned
) {
5699 assert(VM_MAP_PAGE_ALIGNED(start
,
5700 VM_MAP_HDR_PAGE_MASK(map_header
)));
5703 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
5704 vm_map_entry_copy_full(new_entry
, entry
);
5706 new_entry
->vme_end
= start
;
5707 assert(new_entry
->vme_start
< new_entry
->vme_end
);
5708 VME_OFFSET_SET(entry
, VME_OFFSET(entry
) + (start
- entry
->vme_start
));
5709 assert(start
< entry
->vme_end
);
5710 entry
->vme_start
= start
;
5712 _vm_map_store_entry_link(map_header
, entry
->vme_prev
, new_entry
);
5714 if (entry
->is_sub_map
) {
5715 vm_map_reference(VME_SUBMAP(new_entry
));
5717 vm_object_reference(VME_OBJECT(new_entry
));
5723 * vm_map_clip_end: [ internal use only ]
5725 * Asserts that the given entry ends at or before
5726 * the specified address; if necessary,
5727 * it splits the entry into two.
5732 vm_map_entry_t entry
,
5733 vm_map_offset_t endaddr
)
5735 if (endaddr
> entry
->vme_end
) {
5737 * Within the scope of this clipping, limit "endaddr" to
5738 * the end of this map entry...
5740 endaddr
= entry
->vme_end
;
5742 #ifndef NO_NESTED_PMAP
5743 if (entry
->is_sub_map
&& entry
->use_pmap
) {
5744 vm_map_offset_t start_unnest
, end_unnest
;
5747 * Make sure the range between the start of this entry and
5748 * the new "endaddr" is no longer nested before we clip.
5749 * Unnest only the minimum range the platform can handle.
5750 * vm_map_clip_unnest may perform additional adjustments to
5753 start_unnest
= entry
->vme_start
;
5755 (endaddr
+ pmap_shared_region_size_min(map
->pmap
) - 1) &
5756 ~(pmap_shared_region_size_min(map
->pmap
) - 1);
5757 vm_map_clip_unnest(map
, entry
, start_unnest
, end_unnest
);
5759 #endif /* NO_NESTED_PMAP */
5760 if (endaddr
< entry
->vme_end
) {
5761 if (VME_OBJECT(entry
) &&
5762 !entry
->is_sub_map
&&
5763 VME_OBJECT(entry
)->phys_contiguous
) {
5764 pmap_remove(map
->pmap
,
5765 (addr64_t
)(entry
->vme_start
),
5766 (addr64_t
)(entry
->vme_end
));
5768 if (entry
->vme_atomic
) {
5769 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map
, entry
);
5774 vm_map_offset_t
, entry
->vme_start
,
5775 vm_map_offset_t
, entry
->vme_end
,
5776 vm_map_offset_t
, endaddr
,
5777 int, VME_ALIAS(entry
));
5779 _vm_map_clip_end(&map
->hdr
, entry
, endaddr
);
5780 if (map
->holelistenabled
) {
5781 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5783 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5789 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5791 if ((endaddr) < (entry)->vme_end) \
5792 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5796 * This routine is called only when it is known that
5797 * the entry must be split.
5801 struct vm_map_header
*map_header
,
5802 vm_map_entry_t entry
,
5803 vm_map_offset_t end
)
5805 vm_map_entry_t new_entry
;
5808 * Create a new entry and insert it
5809 * AFTER the specified entry
5812 if (entry
->map_aligned
) {
5813 assert(VM_MAP_PAGE_ALIGNED(end
,
5814 VM_MAP_HDR_PAGE_MASK(map_header
)));
5817 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
5818 vm_map_entry_copy_full(new_entry
, entry
);
5820 assert(entry
->vme_start
< end
);
5821 new_entry
->vme_start
= entry
->vme_end
= end
;
5822 VME_OFFSET_SET(new_entry
,
5823 VME_OFFSET(new_entry
) + (end
- entry
->vme_start
));
5824 assert(new_entry
->vme_start
< new_entry
->vme_end
);
5826 _vm_map_store_entry_link(map_header
, entry
, new_entry
);
5828 if (entry
->is_sub_map
) {
5829 vm_map_reference(VME_SUBMAP(new_entry
));
5831 vm_object_reference(VME_OBJECT(new_entry
));
5837 * VM_MAP_RANGE_CHECK: [ internal use only ]
5839 * Asserts that the starting and ending region
5840 * addresses fall within the valid range of the map.
5842 #define VM_MAP_RANGE_CHECK(map, start, end) \
5844 if (start < vm_map_min(map)) \
5845 start = vm_map_min(map); \
5846 if (end > vm_map_max(map)) \
5847 end = vm_map_max(map); \
5853 * vm_map_range_check: [ internal use only ]
5855 * Check that the region defined by the specified start and
5856 * end addresses are wholly contained within a single map
5857 * entry or set of adjacent map entries of the spacified map,
5858 * i.e. the specified region contains no unmapped space.
5859 * If any or all of the region is unmapped, FALSE is returned.
5860 * Otherwise, TRUE is returned and if the output argument 'entry'
5861 * is not NULL it points to the map entry containing the start
5864 * The map is locked for reading on entry and is left locked.
5869 vm_map_offset_t start
,
5870 vm_map_offset_t end
,
5871 vm_map_entry_t
*entry
)
5874 vm_map_offset_t prev
;
5877 * Basic sanity checks first
5879 if (start
< vm_map_min(map
) || end
> vm_map_max(map
) || start
> end
) {
5884 * Check first if the region starts within a valid
5885 * mapping for the map.
5887 if (!vm_map_lookup_entry(map
, start
, &cur
)) {
5892 * Optimize for the case that the region is contained
5893 * in a single map entry.
5895 if (entry
!= (vm_map_entry_t
*) NULL
) {
5898 if (end
<= cur
->vme_end
) {
5903 * If the region is not wholly contained within a
5904 * single entry, walk the entries looking for holes.
5906 prev
= cur
->vme_end
;
5907 cur
= cur
->vme_next
;
5908 while ((cur
!= vm_map_to_entry(map
)) && (prev
== cur
->vme_start
)) {
5909 if (end
<= cur
->vme_end
) {
5912 prev
= cur
->vme_end
;
5913 cur
= cur
->vme_next
;
5919 * vm_map_submap: [ kernel use only ]
5921 * Mark the given range as handled by a subordinate map.
5923 * This range must have been created with vm_map_find using
5924 * the vm_submap_object, and no other operations may have been
5925 * performed on this range prior to calling vm_map_submap.
5927 * Only a limited number of operations can be performed
5928 * within this rage after calling vm_map_submap:
5930 * [Don't try vm_map_copyin!]
5932 * To remove a submapping, one must first remove the
5933 * range from the superior map, and then destroy the
5934 * submap (if desired). [Better yet, don't try it.]
5939 vm_map_offset_t start
,
5940 vm_map_offset_t end
,
5942 vm_map_offset_t offset
,
5943 #ifdef NO_NESTED_PMAP
5945 #endif /* NO_NESTED_PMAP */
5948 vm_map_entry_t entry
;
5949 kern_return_t result
= KERN_INVALID_ARGUMENT
;
5954 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
5955 entry
= entry
->vme_next
;
5958 if (entry
== vm_map_to_entry(map
) ||
5959 entry
->is_sub_map
) {
5961 return KERN_INVALID_ARGUMENT
;
5964 vm_map_clip_start(map
, entry
, start
);
5965 vm_map_clip_end(map
, entry
, end
);
5967 if ((entry
->vme_start
== start
) && (entry
->vme_end
== end
) &&
5968 (!entry
->is_sub_map
) &&
5969 ((object
= VME_OBJECT(entry
)) == vm_submap_object
) &&
5970 (object
->resident_page_count
== 0) &&
5971 (object
->copy
== VM_OBJECT_NULL
) &&
5972 (object
->shadow
== VM_OBJECT_NULL
) &&
5973 (!object
->pager_created
)) {
5974 VME_OFFSET_SET(entry
, (vm_object_offset_t
)offset
);
5975 VME_OBJECT_SET(entry
, VM_OBJECT_NULL
);
5976 vm_object_deallocate(object
);
5977 entry
->is_sub_map
= TRUE
;
5978 entry
->use_pmap
= FALSE
;
5979 VME_SUBMAP_SET(entry
, submap
);
5980 vm_map_reference(submap
);
5981 if (submap
->mapped_in_other_pmaps
== FALSE
&&
5982 vm_map_pmap(submap
) != PMAP_NULL
&&
5983 vm_map_pmap(submap
) != vm_map_pmap(map
)) {
5985 * This submap is being mapped in a map
5986 * that uses a different pmap.
5987 * Set its "mapped_in_other_pmaps" flag
5988 * to indicate that we now need to
5989 * remove mappings from all pmaps rather
5990 * than just the submap's pmap.
5992 submap
->mapped_in_other_pmaps
= TRUE
;
5995 #ifndef NO_NESTED_PMAP
5997 /* nest if platform code will allow */
5998 if (submap
->pmap
== NULL
) {
5999 ledger_t ledger
= map
->pmap
->ledger
;
6000 submap
->pmap
= pmap_create_options(ledger
,
6001 (vm_map_size_t
) 0, 0);
6002 if (submap
->pmap
== PMAP_NULL
) {
6004 return KERN_NO_SPACE
;
6006 #if defined(__arm__) || defined(__arm64__)
6007 pmap_set_nested(submap
->pmap
);
6010 result
= pmap_nest(map
->pmap
,
6011 (VME_SUBMAP(entry
))->pmap
,
6013 (uint64_t)(end
- start
));
6015 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result
);
6017 entry
->use_pmap
= TRUE
;
6019 #else /* NO_NESTED_PMAP */
6020 pmap_remove(map
->pmap
, (addr64_t
)start
, (addr64_t
)end
);
6021 #endif /* NO_NESTED_PMAP */
6022 result
= KERN_SUCCESS
;
6032 * Sets the protection of the specified address
6033 * region in the target map. If "set_max" is
6034 * specified, the maximum protection is to be set;
6035 * otherwise, only the current protection is affected.
6040 vm_map_offset_t start
,
6041 vm_map_offset_t end
,
6045 vm_map_entry_t current
;
6046 vm_map_offset_t prev
;
6047 vm_map_entry_t entry
;
6049 int pmap_options
= 0;
6052 if (new_prot
& VM_PROT_COPY
) {
6053 vm_map_offset_t new_start
;
6054 vm_prot_t cur_prot
, max_prot
;
6055 vm_map_kernel_flags_t kflags
;
6057 /* LP64todo - see below */
6058 if (start
>= map
->max_offset
) {
6059 return KERN_INVALID_ADDRESS
;
6062 if ((new_prot
& VM_PROT_EXECUTE
) &&
6063 map
->pmap
!= kernel_pmap
&&
6064 (vm_map_cs_enforcement(map
)
6065 #if XNU_TARGET_OS_OSX && __arm64__
6066 || !VM_MAP_IS_EXOTIC(map
)
6067 #endif /* XNU_TARGET_OS_OSX && __arm64__ */
6069 VM_MAP_POLICY_WX_FAIL(map
)) {
6071 uint64_t, (uint64_t) start
,
6072 uint64_t, (uint64_t) end
,
6073 vm_prot_t
, new_prot
);
6074 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
6076 (current_task()->bsd_info
6077 ? proc_name_address(current_task()->bsd_info
)
6080 return KERN_PROTECTION_FAILURE
;
6084 * Let vm_map_remap_extract() know that it will need to:
6085 * + make a copy of the mapping
6086 * + add VM_PROT_WRITE to the max protections
6087 * + remove any protections that are no longer allowed from the
6088 * max protections (to avoid any WRITE/EXECUTE conflict, for
6090 * Note that "max_prot" is an IN/OUT parameter only for this
6091 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
6094 max_prot
= new_prot
& VM_PROT_ALL
;
6095 kflags
= VM_MAP_KERNEL_FLAGS_NONE
;
6096 kflags
.vmkf_remap_prot_copy
= TRUE
;
6097 kflags
.vmkf_overwrite_immutable
= TRUE
;
6099 kr
= vm_map_remap(map
,
6103 VM_FLAGS_FIXED
| VM_FLAGS_OVERWRITE
,
6108 TRUE
, /* copy-on-write remapping! */
6111 VM_INHERIT_DEFAULT
);
6112 if (kr
!= KERN_SUCCESS
) {
6115 new_prot
&= ~VM_PROT_COPY
;
6120 /* LP64todo - remove this check when vm_map_commpage64()
6121 * no longer has to stuff in a map_entry for the commpage
6122 * above the map's max_offset.
6124 if (start
>= map
->max_offset
) {
6126 return KERN_INVALID_ADDRESS
;
6131 * Lookup the entry. If it doesn't start in a valid
6132 * entry, return an error.
6134 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
6136 return KERN_INVALID_ADDRESS
;
6139 if (entry
->superpage_size
&& (start
& (SUPERPAGE_SIZE
- 1))) { /* extend request to whole entry */
6140 start
= SUPERPAGE_ROUND_DOWN(start
);
6145 if (entry
->superpage_size
) {
6146 end
= SUPERPAGE_ROUND_UP(end
);
6150 * Make a first pass to check for protection and address
6155 prev
= current
->vme_start
;
6156 while ((current
!= vm_map_to_entry(map
)) &&
6157 (current
->vme_start
< end
)) {
6159 * If there is a hole, return an error.
6161 if (current
->vme_start
!= prev
) {
6163 return KERN_INVALID_ADDRESS
;
6166 new_max
= current
->max_protection
;
6168 if (set_max
&& (new_prot
& VM_PROT_EXECUTE
) && pmap_cs_exempt(map
->pmap
)) {
6169 new_max
|= VM_PROT_EXECUTE
;
6172 if ((new_prot
& new_max
) != new_prot
) {
6174 return KERN_PROTECTION_FAILURE
;
6177 if ((new_prot
& VM_PROT_WRITE
) &&
6178 (new_prot
& VM_PROT_EXECUTE
) &&
6179 #if XNU_TARGET_OS_OSX
6180 map
->pmap
!= kernel_pmap
&&
6181 (vm_map_cs_enforcement(map
)
6183 || !VM_MAP_IS_EXOTIC(map
)
6184 #endif /* __arm64__ */
6186 #endif /* XNU_TARGET_OS_OSX */
6188 !pmap_cs_exempt(map
->pmap
) &&
6190 !(current
->used_for_jit
)) {
6192 uint64_t, (uint64_t) current
->vme_start
,
6193 uint64_t, (uint64_t) current
->vme_end
,
6194 vm_prot_t
, new_prot
);
6195 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
6197 (current_task()->bsd_info
6198 ? proc_name_address(current_task()->bsd_info
)
6201 new_prot
&= ~VM_PROT_EXECUTE
;
6202 if (VM_MAP_POLICY_WX_FAIL(map
)) {
6204 return KERN_PROTECTION_FAILURE
;
6209 * If the task has requested executable lockdown,
6211 * - adding executable protections OR
6212 * - adding write protections to an existing executable mapping.
6214 if (map
->map_disallow_new_exec
== TRUE
) {
6215 if ((new_prot
& VM_PROT_EXECUTE
) ||
6216 ((current
->protection
& VM_PROT_EXECUTE
) && (new_prot
& VM_PROT_WRITE
))) {
6218 return KERN_PROTECTION_FAILURE
;
6222 prev
= current
->vme_end
;
6223 current
= current
->vme_next
;
6228 end
== vm_map_round_page(prev
, VM_MAP_PAGE_MASK(map
))) {
6229 vm_map_entry_t prev_entry
;
6231 prev_entry
= current
->vme_prev
;
6232 if (prev_entry
!= vm_map_to_entry(map
) &&
6233 !prev_entry
->map_aligned
&&
6234 (vm_map_round_page(prev_entry
->vme_end
,
6235 VM_MAP_PAGE_MASK(map
))
6238 * The last entry in our range is not "map-aligned"
6239 * but it would have reached all the way to "end"
6240 * if it had been map-aligned, so this is not really
6241 * a hole in the range and we can proceed.
6246 #endif /* __arm64__ */
6250 return KERN_INVALID_ADDRESS
;
6254 * Go back and fix up protections.
6255 * Clip to start here if the range starts within
6260 if (current
!= vm_map_to_entry(map
)) {
6261 /* clip and unnest if necessary */
6262 vm_map_clip_start(map
, current
, start
);
6265 while ((current
!= vm_map_to_entry(map
)) &&
6266 (current
->vme_start
< end
)) {
6269 vm_map_clip_end(map
, current
, end
);
6271 if (current
->is_sub_map
) {
6272 /* clipping did unnest if needed */
6273 assert(!current
->use_pmap
);
6276 old_prot
= current
->protection
;
6279 current
->max_protection
= new_prot
;
6280 current
->protection
= new_prot
& old_prot
;
6282 current
->protection
= new_prot
;
6286 * Update physical map if necessary.
6287 * If the request is to turn off write protection,
6288 * we won't do it for real (in pmap). This is because
6289 * it would cause copy-on-write to fail. We've already
6290 * set, the new protection in the map, so if a
6291 * write-protect fault occurred, it will be fixed up
6292 * properly, COW or not.
6294 if (current
->protection
!= old_prot
) {
6295 /* Look one level in we support nested pmaps */
6296 /* from mapped submaps which are direct entries */
6301 prot
= current
->protection
;
6302 if (current
->is_sub_map
|| (VME_OBJECT(current
) == NULL
) || (VME_OBJECT(current
) != compressor_object
)) {
6303 prot
&= ~VM_PROT_WRITE
;
6305 assert(!VME_OBJECT(current
)->code_signed
);
6306 assert(VME_OBJECT(current
)->copy_strategy
== MEMORY_OBJECT_COPY_NONE
);
6309 if (override_nx(map
, VME_ALIAS(current
)) && prot
) {
6310 prot
|= VM_PROT_EXECUTE
;
6313 #if DEVELOPMENT || DEBUG
6314 if (!(old_prot
& VM_PROT_EXECUTE
) &&
6315 (prot
& VM_PROT_EXECUTE
) &&
6316 panic_on_unsigned_execute
&&
6317 (proc_selfcsflags() & CS_KILL
)) {
6318 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map
, (uint64_t)current
->vme_start
, (uint64_t)current
->vme_end
, old_prot
, prot
);
6320 #endif /* DEVELOPMENT || DEBUG */
6322 if (pmap_has_prot_policy(map
->pmap
, current
->translated_allow_execute
, prot
)) {
6323 if (current
->wired_count
) {
6324 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
6325 map
, (uint64_t)current
->vme_start
, (uint64_t)current
->vme_end
, prot
, current
->wired_count
);
6328 /* If the pmap layer cares about this
6329 * protection type, force a fault for
6330 * each page so that vm_fault will
6331 * repopulate the page with the full
6332 * set of protections.
6335 * TODO: We don't seem to need this,
6336 * but this is due to an internal
6337 * implementation detail of
6338 * pmap_protect. Do we want to rely
6341 prot
= VM_PROT_NONE
;
6344 if (current
->is_sub_map
&& current
->use_pmap
) {
6345 pmap_protect(VME_SUBMAP(current
)->pmap
,
6350 if (prot
& VM_PROT_WRITE
) {
6351 if (VME_OBJECT(current
) == compressor_object
) {
6353 * For write requests on the
6354 * compressor, we wil ask the
6355 * pmap layer to prevent us from
6356 * taking a write fault when we
6357 * attempt to access the mapping
6360 pmap_options
|= PMAP_OPTIONS_PROTECT_IMMEDIATE
;
6364 pmap_protect_options(map
->pmap
,
6372 current
= current
->vme_next
;
6376 while ((current
!= vm_map_to_entry(map
)) &&
6377 (current
->vme_start
<= end
)) {
6378 vm_map_simplify_entry(map
, current
);
6379 current
= current
->vme_next
;
6383 return KERN_SUCCESS
;
6389 * Sets the inheritance of the specified address
6390 * range in the target map. Inheritance
6391 * affects how the map will be shared with
6392 * child maps at the time of vm_map_fork.
6397 vm_map_offset_t start
,
6398 vm_map_offset_t end
,
6399 vm_inherit_t new_inheritance
)
6401 vm_map_entry_t entry
;
6402 vm_map_entry_t temp_entry
;
6406 VM_MAP_RANGE_CHECK(map
, start
, end
);
6408 if (vm_map_lookup_entry(map
, start
, &temp_entry
)) {
6411 temp_entry
= temp_entry
->vme_next
;
6415 /* first check entire range for submaps which can't support the */
6416 /* given inheritance. */
6417 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
6418 if (entry
->is_sub_map
) {
6419 if (new_inheritance
== VM_INHERIT_COPY
) {
6421 return KERN_INVALID_ARGUMENT
;
6425 entry
= entry
->vme_next
;
6429 if (entry
!= vm_map_to_entry(map
)) {
6430 /* clip and unnest if necessary */
6431 vm_map_clip_start(map
, entry
, start
);
6434 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
6435 vm_map_clip_end(map
, entry
, end
);
6436 if (entry
->is_sub_map
) {
6437 /* clip did unnest if needed */
6438 assert(!entry
->use_pmap
);
6441 entry
->inheritance
= new_inheritance
;
6443 entry
= entry
->vme_next
;
6447 return KERN_SUCCESS
;
6451 * Update the accounting for the amount of wired memory in this map. If the user has
6452 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6455 static kern_return_t
6458 vm_map_entry_t entry
,
6459 boolean_t user_wire
)
6464 unsigned int total_wire_count
= vm_page_wire_count
+ vm_lopage_free_count
;
6467 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6471 if (entry
->user_wired_count
== 0) {
6472 size
= entry
->vme_end
- entry
->vme_start
;
6475 * Since this is the first time the user is wiring this map entry, check to see if we're
6476 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6477 * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value. There is also
6478 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6479 * limit, then we fail.
6482 if (size
+ map
->user_wire_size
> MIN(map
->user_wire_limit
, vm_per_task_user_wire_limit
) ||
6483 size
+ ptoa_64(total_wire_count
) > vm_global_user_wire_limit
) {
6484 if (size
+ ptoa_64(total_wire_count
) > vm_global_user_wire_limit
) {
6485 os_atomic_inc(&vm_add_wire_count_over_global_limit
, relaxed
);
6487 os_atomic_inc(&vm_add_wire_count_over_user_limit
, relaxed
);
6489 return KERN_RESOURCE_SHORTAGE
;
6493 * The first time the user wires an entry, we also increment the wired_count and add this to
6494 * the total that has been wired in the map.
6497 if (entry
->wired_count
>= MAX_WIRE_COUNT
) {
6498 return KERN_FAILURE
;
6501 entry
->wired_count
++;
6502 map
->user_wire_size
+= size
;
6505 if (entry
->user_wired_count
>= MAX_WIRE_COUNT
) {
6506 return KERN_FAILURE
;
6509 entry
->user_wired_count
++;
6512 * The kernel's wiring the memory. Just bump the count and continue.
6515 if (entry
->wired_count
>= MAX_WIRE_COUNT
) {
6516 panic("vm_map_wire: too many wirings");
6519 entry
->wired_count
++;
6522 return KERN_SUCCESS
;
6526 * Update the memory wiring accounting now that the given map entry is being unwired.
6530 subtract_wire_counts(
6532 vm_map_entry_t entry
,
6533 boolean_t user_wire
)
6537 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6540 if (entry
->user_wired_count
== 1) {
6542 * We're removing the last user wire reference. Decrement the wired_count and the total
6543 * user wired memory for this map.
6546 assert(entry
->wired_count
>= 1);
6547 entry
->wired_count
--;
6548 map
->user_wire_size
-= entry
->vme_end
- entry
->vme_start
;
6551 assert(entry
->user_wired_count
>= 1);
6552 entry
->user_wired_count
--;
6555 * The kernel is unwiring the memory. Just update the count.
6558 assert(entry
->wired_count
>= 1);
6559 entry
->wired_count
--;
6563 int cs_executable_wire
= 0;
6568 * Sets the pageability of the specified address range in the
6569 * target map as wired. Regions specified as not pageable require
6570 * locked-down physical memory and physical page maps. The
6571 * access_type variable indicates types of accesses that must not
6572 * generate page faults. This is checked against protection of
6573 * memory being locked-down.
6575 * The map must not be locked, but a reference must remain to the
6576 * map throughout the call.
6578 static kern_return_t
6581 vm_map_offset_t start
,
6582 vm_map_offset_t end
,
6583 vm_prot_t caller_prot
,
6585 boolean_t user_wire
,
6587 vm_map_offset_t pmap_addr
,
6588 ppnum_t
*physpage_p
)
6590 vm_map_entry_t entry
;
6591 vm_prot_t access_type
;
6592 struct vm_map_entry
*first_entry
, tmp_entry
;
6594 vm_map_offset_t s
, e
;
6596 boolean_t need_wakeup
;
6597 boolean_t main_map
= FALSE
;
6598 wait_interrupt_t interruptible_state
;
6599 thread_t cur_thread
;
6600 unsigned int last_timestamp
;
6602 boolean_t wire_and_extract
;
6603 vm_prot_t extra_prots
;
6605 extra_prots
= VM_PROT_COPY
;
6606 extra_prots
|= VM_PROT_COPY_FAIL_IF_EXECUTABLE
;
6607 #if XNU_TARGET_OS_OSX
6608 if (map
->pmap
== kernel_pmap
||
6609 !vm_map_cs_enforcement(map
)) {
6610 extra_prots
&= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE
;
6612 #endif /* XNU_TARGET_OS_OSX */
6614 if (pmap_cs_exempt(map
->pmap
)) {
6615 extra_prots
&= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE
;
6617 #endif /* PMAP_CS */
6619 access_type
= (caller_prot
& VM_PROT_ALL
);
6621 wire_and_extract
= FALSE
;
6622 if (physpage_p
!= NULL
) {
6624 * The caller wants the physical page number of the
6625 * wired page. We return only one physical page number
6626 * so this works for only one page at a time.
6628 if ((end
- start
) != PAGE_SIZE
) {
6629 return KERN_INVALID_ARGUMENT
;
6631 wire_and_extract
= TRUE
;
6636 if (map_pmap
== NULL
) {
6639 last_timestamp
= map
->timestamp
;
6641 VM_MAP_RANGE_CHECK(map
, start
, end
);
6642 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
6643 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
6646 /* We wired what the caller asked for, zero pages */
6648 return KERN_SUCCESS
;
6651 need_wakeup
= FALSE
;
6652 cur_thread
= current_thread();
6657 if (vm_map_lookup_entry(map
, s
, &first_entry
)) {
6658 entry
= first_entry
;
6660 * vm_map_clip_start will be done later.
6661 * We don't want to unnest any nested submaps here !
6664 /* Start address is not in map */
6665 rc
= KERN_INVALID_ADDRESS
;
6669 while ((entry
!= vm_map_to_entry(map
)) && (s
< end
)) {
6671 * At this point, we have wired from "start" to "s".
6672 * We still need to wire from "s" to "end".
6674 * "entry" hasn't been clipped, so it could start before "s"
6675 * and/or end after "end".
6678 /* "e" is how far we want to wire in this entry */
6685 * If another thread is wiring/unwiring this entry then
6686 * block after informing other thread to wake us up.
6688 if (entry
->in_transition
) {
6689 wait_result_t wait_result
;
6692 * We have not clipped the entry. Make sure that
6693 * the start address is in range so that the lookup
6694 * below will succeed.
6695 * "s" is the current starting point: we've already
6696 * wired from "start" to "s" and we still have
6697 * to wire from "s" to "end".
6700 entry
->needs_wakeup
= TRUE
;
6703 * wake up anybody waiting on entries that we have
6707 vm_map_entry_wakeup(map
);
6708 need_wakeup
= FALSE
;
6711 * User wiring is interruptible
6713 wait_result
= vm_map_entry_wait(map
,
6714 (user_wire
) ? THREAD_ABORTSAFE
:
6716 if (user_wire
&& wait_result
== THREAD_INTERRUPTED
) {
6718 * undo the wirings we have done so far
6719 * We do not clear the needs_wakeup flag,
6720 * because we cannot tell if we were the
6728 * Cannot avoid a lookup here. reset timestamp.
6730 last_timestamp
= map
->timestamp
;
6733 * The entry could have been clipped, look it up again.
6734 * Worse that can happen is, it may not exist anymore.
6736 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
6738 * User: undo everything upto the previous
6739 * entry. let vm_map_unwire worry about
6740 * checking the validity of the range.
6745 entry
= first_entry
;
6749 if (entry
->is_sub_map
) {
6750 vm_map_offset_t sub_start
;
6751 vm_map_offset_t sub_end
;
6752 vm_map_offset_t local_start
;
6753 vm_map_offset_t local_end
;
6756 if (wire_and_extract
) {
6758 * Wiring would result in copy-on-write
6759 * which would not be compatible with
6760 * the sharing we have with the original
6761 * provider of this memory.
6763 rc
= KERN_INVALID_ARGUMENT
;
6767 vm_map_clip_start(map
, entry
, s
);
6768 vm_map_clip_end(map
, entry
, end
);
6770 sub_start
= VME_OFFSET(entry
);
6771 sub_end
= entry
->vme_end
;
6772 sub_end
+= VME_OFFSET(entry
) - entry
->vme_start
;
6774 local_end
= entry
->vme_end
;
6775 if (map_pmap
== NULL
) {
6777 vm_object_offset_t offset
;
6780 vm_map_entry_t local_entry
;
6781 vm_map_version_t version
;
6782 vm_map_t lookup_map
;
6784 if (entry
->use_pmap
) {
6785 pmap
= VME_SUBMAP(entry
)->pmap
;
6786 /* ppc implementation requires that */
6787 /* submaps pmap address ranges line */
6788 /* up with parent map */
6790 pmap_addr
= sub_start
;
6798 if (entry
->wired_count
) {
6799 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
6804 * The map was not unlocked:
6805 * no need to goto re-lookup.
6806 * Just go directly to next entry.
6808 entry
= entry
->vme_next
;
6809 s
= entry
->vme_start
;
6813 /* call vm_map_lookup_locked to */
6814 /* cause any needs copy to be */
6816 local_start
= entry
->vme_start
;
6818 vm_map_lock_write_to_read(map
);
6819 rc
= vm_map_lookup_locked(
6820 &lookup_map
, local_start
,
6821 (access_type
| extra_prots
),
6822 OBJECT_LOCK_EXCLUSIVE
,
6824 &offset
, &prot
, &wired
,
6827 if (rc
!= KERN_SUCCESS
) {
6828 vm_map_unlock_read(lookup_map
);
6829 assert(map_pmap
== NULL
);
6830 vm_map_unwire(map
, start
,
6834 vm_object_unlock(object
);
6835 if (real_map
!= lookup_map
) {
6836 vm_map_unlock(real_map
);
6838 vm_map_unlock_read(lookup_map
);
6841 /* we unlocked, so must re-lookup */
6842 if (!vm_map_lookup_entry(map
,
6850 * entry could have been "simplified",
6853 entry
= local_entry
;
6854 assert(s
== local_start
);
6855 vm_map_clip_start(map
, entry
, s
);
6856 vm_map_clip_end(map
, entry
, end
);
6857 /* re-compute "e" */
6863 /* did we have a change of type? */
6864 if (!entry
->is_sub_map
) {
6865 last_timestamp
= map
->timestamp
;
6869 local_start
= entry
->vme_start
;
6873 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
6877 entry
->in_transition
= TRUE
;
6880 rc
= vm_map_wire_nested(VME_SUBMAP(entry
),
6883 user_wire
, pmap
, pmap_addr
,
6888 * Find the entry again. It could have been clipped
6889 * after we unlocked the map.
6891 if (!vm_map_lookup_entry(map
, local_start
,
6893 panic("vm_map_wire: re-lookup failed");
6895 entry
= first_entry
;
6897 assert(local_start
== s
);
6898 /* re-compute "e" */
6904 last_timestamp
= map
->timestamp
;
6905 while ((entry
!= vm_map_to_entry(map
)) &&
6906 (entry
->vme_start
< e
)) {
6907 assert(entry
->in_transition
);
6908 entry
->in_transition
= FALSE
;
6909 if (entry
->needs_wakeup
) {
6910 entry
->needs_wakeup
= FALSE
;
6913 if (rc
!= KERN_SUCCESS
) {/* from vm_*_wire */
6914 subtract_wire_counts(map
, entry
, user_wire
);
6916 entry
= entry
->vme_next
;
6918 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
6922 /* no need to relookup again */
6923 s
= entry
->vme_start
;
6928 * If this entry is already wired then increment
6929 * the appropriate wire reference count.
6931 if (entry
->wired_count
) {
6932 if ((entry
->protection
& access_type
) != access_type
) {
6933 /* found a protection problem */
6937 * We should always return an error
6938 * in this case but since we didn't
6939 * enforce it before, let's do
6940 * it only for the new "wire_and_extract"
6941 * code path for now...
6943 if (wire_and_extract
) {
6944 rc
= KERN_PROTECTION_FAILURE
;
6950 * entry is already wired down, get our reference
6951 * after clipping to our range.
6953 vm_map_clip_start(map
, entry
, s
);
6954 vm_map_clip_end(map
, entry
, end
);
6956 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
6960 if (wire_and_extract
) {
6962 vm_object_offset_t offset
;
6966 * We don't have to "wire" the page again
6967 * bit we still have to "extract" its
6968 * physical page number, after some sanity
6971 assert((entry
->vme_end
- entry
->vme_start
)
6973 assert(!entry
->needs_copy
);
6974 assert(!entry
->is_sub_map
);
6975 assert(VME_OBJECT(entry
));
6976 if (((entry
->vme_end
- entry
->vme_start
)
6978 entry
->needs_copy
||
6979 entry
->is_sub_map
||
6980 VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
6981 rc
= KERN_INVALID_ARGUMENT
;
6985 object
= VME_OBJECT(entry
);
6986 offset
= VME_OFFSET(entry
);
6987 /* need exclusive lock to update m->dirty */
6988 if (entry
->protection
& VM_PROT_WRITE
) {
6989 vm_object_lock(object
);
6991 vm_object_lock_shared(object
);
6993 m
= vm_page_lookup(object
, offset
);
6994 assert(m
!= VM_PAGE_NULL
);
6995 assert(VM_PAGE_WIRED(m
));
6996 if (m
!= VM_PAGE_NULL
&& VM_PAGE_WIRED(m
)) {
6997 *physpage_p
= VM_PAGE_GET_PHYS_PAGE(m
);
6998 if (entry
->protection
& VM_PROT_WRITE
) {
6999 vm_object_lock_assert_exclusive(
7001 m
->vmp_dirty
= TRUE
;
7004 /* not already wired !? */
7007 vm_object_unlock(object
);
7010 /* map was not unlocked: no need to relookup */
7011 entry
= entry
->vme_next
;
7012 s
= entry
->vme_start
;
7017 * Unwired entry or wire request transmitted via submap
7021 * Wiring would copy the pages to the shadow object.
7022 * The shadow object would not be code-signed so
7023 * attempting to execute code from these copied pages
7024 * would trigger a code-signing violation.
7027 if ((entry
->protection
& VM_PROT_EXECUTE
)
7028 #if XNU_TARGET_OS_OSX
7030 map
->pmap
!= kernel_pmap
&&
7031 (vm_map_cs_enforcement(map
)
7033 || !VM_MAP_IS_EXOTIC(map
)
7034 #endif /* __arm64__ */
7036 #endif /* XNU_TARGET_OS_OSX */
7039 !pmap_cs_exempt(map
->pmap
)
7043 printf("pid %d[%s] wiring executable range from "
7044 "0x%llx to 0x%llx: rejected to preserve "
7047 (current_task()->bsd_info
7048 ? proc_name_address(current_task()->bsd_info
)
7050 (uint64_t) entry
->vme_start
,
7051 (uint64_t) entry
->vme_end
);
7052 #endif /* MACH_ASSERT */
7053 DTRACE_VM2(cs_executable_wire
,
7054 uint64_t, (uint64_t)entry
->vme_start
,
7055 uint64_t, (uint64_t)entry
->vme_end
);
7056 cs_executable_wire
++;
7057 rc
= KERN_PROTECTION_FAILURE
;
7062 * Perform actions of vm_map_lookup that need the write
7063 * lock on the map: create a shadow object for a
7064 * copy-on-write region, or an object for a zero-fill
7067 size
= entry
->vme_end
- entry
->vme_start
;
7069 * If wiring a copy-on-write page, we need to copy it now
7070 * even if we're only (currently) requesting read access.
7071 * This is aggressive, but once it's wired we can't move it.
7073 if (entry
->needs_copy
) {
7074 if (wire_and_extract
) {
7076 * We're supposed to share with the original
7077 * provider so should not be "needs_copy"
7079 rc
= KERN_INVALID_ARGUMENT
;
7083 VME_OBJECT_SHADOW(entry
, size
);
7084 entry
->needs_copy
= FALSE
;
7085 } else if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
7086 if (wire_and_extract
) {
7088 * We're supposed to share with the original
7089 * provider so should already have an object.
7091 rc
= KERN_INVALID_ARGUMENT
;
7094 VME_OBJECT_SET(entry
, vm_object_allocate(size
));
7095 VME_OFFSET_SET(entry
, (vm_object_offset_t
)0);
7096 assert(entry
->use_pmap
);
7099 vm_map_clip_start(map
, entry
, s
);
7100 vm_map_clip_end(map
, entry
, end
);
7102 /* re-compute "e" */
7109 * Check for holes and protection mismatch.
7110 * Holes: Next entry should be contiguous unless this
7111 * is the end of the region.
7112 * Protection: Access requested must be allowed, unless
7113 * wiring is by protection class
7115 if ((entry
->vme_end
< end
) &&
7116 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
7117 (entry
->vme_next
->vme_start
> entry
->vme_end
))) {
7119 rc
= KERN_INVALID_ADDRESS
;
7122 if ((entry
->protection
& access_type
) != access_type
) {
7123 /* found a protection problem */
7124 rc
= KERN_PROTECTION_FAILURE
;
7128 assert(entry
->wired_count
== 0 && entry
->user_wired_count
== 0);
7130 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
7134 entry
->in_transition
= TRUE
;
7137 * This entry might get split once we unlock the map.
7138 * In vm_fault_wire(), we need the current range as
7139 * defined by this entry. In order for this to work
7140 * along with a simultaneous clip operation, we make a
7141 * temporary copy of this entry and use that for the
7142 * wiring. Note that the underlying objects do not
7143 * change during a clip.
7148 * The in_transition state guarentees that the entry
7149 * (or entries for this range, if split occured) will be
7150 * there when the map lock is acquired for the second time.
7154 if (!user_wire
&& cur_thread
!= THREAD_NULL
) {
7155 interruptible_state
= thread_interrupt_level(THREAD_UNINT
);
7157 interruptible_state
= THREAD_UNINT
;
7161 rc
= vm_fault_wire(map
,
7162 &tmp_entry
, caller_prot
, tag
, map_pmap
, pmap_addr
,
7165 rc
= vm_fault_wire(map
,
7166 &tmp_entry
, caller_prot
, tag
, map
->pmap
,
7167 tmp_entry
.vme_start
,
7171 if (!user_wire
&& cur_thread
!= THREAD_NULL
) {
7172 thread_interrupt_level(interruptible_state
);
7177 if (last_timestamp
+ 1 != map
->timestamp
) {
7179 * Find the entry again. It could have been clipped
7180 * after we unlocked the map.
7182 if (!vm_map_lookup_entry(map
, tmp_entry
.vme_start
,
7184 panic("vm_map_wire: re-lookup failed");
7187 entry
= first_entry
;
7190 last_timestamp
= map
->timestamp
;
7192 while ((entry
!= vm_map_to_entry(map
)) &&
7193 (entry
->vme_start
< tmp_entry
.vme_end
)) {
7194 assert(entry
->in_transition
);
7195 entry
->in_transition
= FALSE
;
7196 if (entry
->needs_wakeup
) {
7197 entry
->needs_wakeup
= FALSE
;
7200 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
7201 subtract_wire_counts(map
, entry
, user_wire
);
7203 entry
= entry
->vme_next
;
7206 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
7210 if ((entry
!= vm_map_to_entry(map
)) && /* we still have entries in the map */
7211 (tmp_entry
.vme_end
!= end
) && /* AND, we are not at the end of the requested range */
7212 (entry
->vme_start
!= tmp_entry
.vme_end
)) { /* AND, the next entry is not contiguous. */
7213 /* found a "new" hole */
7214 s
= tmp_entry
.vme_end
;
7215 rc
= KERN_INVALID_ADDRESS
;
7219 s
= entry
->vme_start
;
7220 } /* end while loop through map entries */
7223 if (rc
== KERN_SUCCESS
) {
7224 /* repair any damage we may have made to the VM map */
7225 vm_map_simplify_range(map
, start
, end
);
7231 * wake up anybody waiting on entries we wired.
7234 vm_map_entry_wakeup(map
);
7237 if (rc
!= KERN_SUCCESS
) {
7238 /* undo what has been wired so far */
7239 vm_map_unwire_nested(map
, start
, s
, user_wire
,
7240 map_pmap
, pmap_addr
);
7250 vm_map_wire_external(
7252 vm_map_offset_t start
,
7253 vm_map_offset_t end
,
7254 vm_prot_t caller_prot
,
7255 boolean_t user_wire
)
7259 kret
= vm_map_wire_nested(map
, start
, end
, caller_prot
, vm_tag_bt(),
7260 user_wire
, (pmap_t
)NULL
, 0, NULL
);
7267 vm_map_offset_t start
,
7268 vm_map_offset_t end
,
7269 vm_prot_t caller_prot
,
7271 boolean_t user_wire
)
7275 kret
= vm_map_wire_nested(map
, start
, end
, caller_prot
, tag
,
7276 user_wire
, (pmap_t
)NULL
, 0, NULL
);
7281 vm_map_wire_and_extract_external(
7283 vm_map_offset_t start
,
7284 vm_prot_t caller_prot
,
7285 boolean_t user_wire
,
7286 ppnum_t
*physpage_p
)
7290 kret
= vm_map_wire_nested(map
,
7292 start
+ VM_MAP_PAGE_SIZE(map
),
7299 if (kret
!= KERN_SUCCESS
&&
7300 physpage_p
!= NULL
) {
7307 vm_map_wire_and_extract_kernel(
7309 vm_map_offset_t start
,
7310 vm_prot_t caller_prot
,
7312 boolean_t user_wire
,
7313 ppnum_t
*physpage_p
)
7317 kret
= vm_map_wire_nested(map
,
7319 start
+ VM_MAP_PAGE_SIZE(map
),
7326 if (kret
!= KERN_SUCCESS
&&
7327 physpage_p
!= NULL
) {
7336 * Sets the pageability of the specified address range in the target
7337 * as pageable. Regions specified must have been wired previously.
7339 * The map must not be locked, but a reference must remain to the map
7340 * throughout the call.
7342 * Kernel will panic on failures. User unwire ignores holes and
7343 * unwired and intransition entries to avoid losing memory by leaving
7346 static kern_return_t
7347 vm_map_unwire_nested(
7349 vm_map_offset_t start
,
7350 vm_map_offset_t end
,
7351 boolean_t user_wire
,
7353 vm_map_offset_t pmap_addr
)
7355 vm_map_entry_t entry
;
7356 struct vm_map_entry
*first_entry
, tmp_entry
;
7357 boolean_t need_wakeup
;
7358 boolean_t main_map
= FALSE
;
7359 unsigned int last_timestamp
;
7362 if (map_pmap
== NULL
) {
7365 last_timestamp
= map
->timestamp
;
7367 VM_MAP_RANGE_CHECK(map
, start
, end
);
7368 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
7369 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
7372 /* We unwired what the caller asked for: zero pages */
7374 return KERN_SUCCESS
;
7377 if (vm_map_lookup_entry(map
, start
, &first_entry
)) {
7378 entry
= first_entry
;
7380 * vm_map_clip_start will be done later.
7381 * We don't want to unnest any nested sub maps here !
7385 panic("vm_map_unwire: start not found");
7387 /* Start address is not in map. */
7389 return KERN_INVALID_ADDRESS
;
7392 if (entry
->superpage_size
) {
7393 /* superpages are always wired */
7395 return KERN_INVALID_ADDRESS
;
7398 need_wakeup
= FALSE
;
7399 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
7400 if (entry
->in_transition
) {
7403 * Another thread is wiring down this entry. Note
7404 * that if it is not for the other thread we would
7405 * be unwiring an unwired entry. This is not
7406 * permitted. If we wait, we will be unwiring memory
7410 * Another thread is unwiring this entry. We did not
7411 * have a reference to it, because if we did, this
7412 * entry will not be getting unwired now.
7417 * This could happen: there could be some
7418 * overlapping vslock/vsunlock operations
7420 * We should probably just wait and retry,
7421 * but then we have to be careful that this
7422 * entry could get "simplified" after
7423 * "in_transition" gets unset and before
7424 * we re-lookup the entry, so we would
7425 * have to re-clip the entry to avoid
7426 * re-unwiring what we have already unwired...
7427 * See vm_map_wire_nested().
7429 * Or we could just ignore "in_transition"
7430 * here and proceed to decement the wired
7431 * count(s) on this entry. That should be fine
7432 * as long as "wired_count" doesn't drop all
7433 * the way to 0 (and we should panic if THAT
7436 panic("vm_map_unwire: in_transition entry");
7439 entry
= entry
->vme_next
;
7443 if (entry
->is_sub_map
) {
7444 vm_map_offset_t sub_start
;
7445 vm_map_offset_t sub_end
;
7446 vm_map_offset_t local_end
;
7449 vm_map_clip_start(map
, entry
, start
);
7450 vm_map_clip_end(map
, entry
, end
);
7452 sub_start
= VME_OFFSET(entry
);
7453 sub_end
= entry
->vme_end
- entry
->vme_start
;
7454 sub_end
+= VME_OFFSET(entry
);
7455 local_end
= entry
->vme_end
;
7456 if (map_pmap
== NULL
) {
7457 if (entry
->use_pmap
) {
7458 pmap
= VME_SUBMAP(entry
)->pmap
;
7459 pmap_addr
= sub_start
;
7464 if (entry
->wired_count
== 0 ||
7465 (user_wire
&& entry
->user_wired_count
== 0)) {
7467 panic("vm_map_unwire: entry is unwired");
7469 entry
= entry
->vme_next
;
7475 * Holes: Next entry should be contiguous unless
7476 * this is the end of the region.
7478 if (((entry
->vme_end
< end
) &&
7479 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
7480 (entry
->vme_next
->vme_start
7481 > entry
->vme_end
)))) {
7483 panic("vm_map_unwire: non-contiguous region");
7486 * entry = entry->vme_next;
7491 subtract_wire_counts(map
, entry
, user_wire
);
7493 if (entry
->wired_count
!= 0) {
7494 entry
= entry
->vme_next
;
7498 entry
->in_transition
= TRUE
;
7499 tmp_entry
= *entry
;/* see comment in vm_map_wire() */
7502 * We can unlock the map now. The in_transition state
7503 * guarantees existance of the entry.
7506 vm_map_unwire_nested(VME_SUBMAP(entry
),
7507 sub_start
, sub_end
, user_wire
, pmap
, pmap_addr
);
7510 if (last_timestamp
+ 1 != map
->timestamp
) {
7512 * Find the entry again. It could have been
7513 * clipped or deleted after we unlocked the map.
7515 if (!vm_map_lookup_entry(map
,
7516 tmp_entry
.vme_start
,
7519 panic("vm_map_unwire: re-lookup failed");
7521 entry
= first_entry
->vme_next
;
7523 entry
= first_entry
;
7526 last_timestamp
= map
->timestamp
;
7529 * clear transition bit for all constituent entries
7530 * that were in the original entry (saved in
7531 * tmp_entry). Also check for waiters.
7533 while ((entry
!= vm_map_to_entry(map
)) &&
7534 (entry
->vme_start
< tmp_entry
.vme_end
)) {
7535 assert(entry
->in_transition
);
7536 entry
->in_transition
= FALSE
;
7537 if (entry
->needs_wakeup
) {
7538 entry
->needs_wakeup
= FALSE
;
7541 entry
= entry
->vme_next
;
7546 vm_map_unwire_nested(VME_SUBMAP(entry
),
7547 sub_start
, sub_end
, user_wire
, map_pmap
,
7551 if (last_timestamp
+ 1 != map
->timestamp
) {
7553 * Find the entry again. It could have been
7554 * clipped or deleted after we unlocked the map.
7556 if (!vm_map_lookup_entry(map
,
7557 tmp_entry
.vme_start
,
7560 panic("vm_map_unwire: re-lookup failed");
7562 entry
= first_entry
->vme_next
;
7564 entry
= first_entry
;
7567 last_timestamp
= map
->timestamp
;
7572 if ((entry
->wired_count
== 0) ||
7573 (user_wire
&& entry
->user_wired_count
== 0)) {
7575 panic("vm_map_unwire: entry is unwired");
7578 entry
= entry
->vme_next
;
7582 assert(entry
->wired_count
> 0 &&
7583 (!user_wire
|| entry
->user_wired_count
> 0));
7585 vm_map_clip_start(map
, entry
, start
);
7586 vm_map_clip_end(map
, entry
, end
);
7590 * Holes: Next entry should be contiguous unless
7591 * this is the end of the region.
7593 if (((entry
->vme_end
< end
) &&
7594 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
7595 (entry
->vme_next
->vme_start
> entry
->vme_end
)))) {
7597 panic("vm_map_unwire: non-contiguous region");
7599 entry
= entry
->vme_next
;
7603 subtract_wire_counts(map
, entry
, user_wire
);
7605 if (entry
->wired_count
!= 0) {
7606 entry
= entry
->vme_next
;
7610 if (entry
->zero_wired_pages
) {
7611 entry
->zero_wired_pages
= FALSE
;
7614 entry
->in_transition
= TRUE
;
7615 tmp_entry
= *entry
; /* see comment in vm_map_wire() */
7618 * We can unlock the map now. The in_transition state
7619 * guarantees existance of the entry.
7623 vm_fault_unwire(map
,
7624 &tmp_entry
, FALSE
, map_pmap
, pmap_addr
);
7626 vm_fault_unwire(map
,
7627 &tmp_entry
, FALSE
, map
->pmap
,
7628 tmp_entry
.vme_start
);
7632 if (last_timestamp
+ 1 != map
->timestamp
) {
7634 * Find the entry again. It could have been clipped
7635 * or deleted after we unlocked the map.
7637 if (!vm_map_lookup_entry(map
, tmp_entry
.vme_start
,
7640 panic("vm_map_unwire: re-lookup failed");
7642 entry
= first_entry
->vme_next
;
7644 entry
= first_entry
;
7647 last_timestamp
= map
->timestamp
;
7650 * clear transition bit for all constituent entries that
7651 * were in the original entry (saved in tmp_entry). Also
7652 * check for waiters.
7654 while ((entry
!= vm_map_to_entry(map
)) &&
7655 (entry
->vme_start
< tmp_entry
.vme_end
)) {
7656 assert(entry
->in_transition
);
7657 entry
->in_transition
= FALSE
;
7658 if (entry
->needs_wakeup
) {
7659 entry
->needs_wakeup
= FALSE
;
7662 entry
= entry
->vme_next
;
7667 * We might have fragmented the address space when we wired this
7668 * range of addresses. Attempt to re-coalesce these VM map entries
7669 * with their neighbors now that they're no longer wired.
7670 * Under some circumstances, address space fragmentation can
7671 * prevent VM object shadow chain collapsing, which can cause
7674 vm_map_simplify_range(map
, start
, end
);
7678 * wake up anybody waiting on entries that we have unwired.
7681 vm_map_entry_wakeup(map
);
7683 return KERN_SUCCESS
;
7689 vm_map_offset_t start
,
7690 vm_map_offset_t end
,
7691 boolean_t user_wire
)
7693 return vm_map_unwire_nested(map
, start
, end
,
7694 user_wire
, (pmap_t
)NULL
, 0);
7699 * vm_map_entry_delete: [ internal use only ]
7701 * Deallocate the given entry from the target map.
7704 vm_map_entry_delete(
7706 vm_map_entry_t entry
)
7708 vm_map_offset_t s
, e
;
7712 s
= entry
->vme_start
;
7714 assert(VM_MAP_PAGE_ALIGNED(s
, FOURK_PAGE_MASK
));
7715 assert(VM_MAP_PAGE_ALIGNED(e
, FOURK_PAGE_MASK
));
7716 if (VM_MAP_PAGE_MASK(map
) >= PAGE_MASK
) {
7717 assert(page_aligned(s
));
7718 assert(page_aligned(e
));
7720 if (entry
->map_aligned
== TRUE
) {
7721 assert(VM_MAP_PAGE_ALIGNED(s
, VM_MAP_PAGE_MASK(map
)));
7722 assert(VM_MAP_PAGE_ALIGNED(e
, VM_MAP_PAGE_MASK(map
)));
7724 assert(entry
->wired_count
== 0);
7725 assert(entry
->user_wired_count
== 0);
7726 assert(!entry
->permanent
);
7728 if (entry
->is_sub_map
) {
7730 submap
= VME_SUBMAP(entry
);
7733 object
= VME_OBJECT(entry
);
7736 vm_map_store_entry_unlink(map
, entry
);
7739 vm_map_entry_dispose(map
, entry
);
7743 * Deallocate the object only after removing all
7744 * pmap entries pointing to its pages.
7747 vm_map_deallocate(submap
);
7749 vm_object_deallocate(object
);
7754 vm_map_submap_pmap_clean(
7756 vm_map_offset_t start
,
7757 vm_map_offset_t end
,
7759 vm_map_offset_t offset
)
7761 vm_map_offset_t submap_start
;
7762 vm_map_offset_t submap_end
;
7763 vm_map_size_t remove_size
;
7764 vm_map_entry_t entry
;
7766 submap_end
= offset
+ (end
- start
);
7767 submap_start
= offset
;
7769 vm_map_lock_read(sub_map
);
7770 if (vm_map_lookup_entry(sub_map
, offset
, &entry
)) {
7771 remove_size
= (entry
->vme_end
- entry
->vme_start
);
7772 if (offset
> entry
->vme_start
) {
7773 remove_size
-= offset
- entry
->vme_start
;
7777 if (submap_end
< entry
->vme_end
) {
7779 entry
->vme_end
- submap_end
;
7781 if (entry
->is_sub_map
) {
7782 vm_map_submap_pmap_clean(
7785 start
+ remove_size
,
7789 if (map
->mapped_in_other_pmaps
&&
7790 os_ref_get_count(&map
->map_refcnt
) != 0 &&
7791 VME_OBJECT(entry
) != NULL
) {
7792 vm_object_pmap_protect_options(
7794 (VME_OFFSET(entry
) +
7802 PMAP_OPTIONS_REMOVE
);
7804 pmap_remove(map
->pmap
,
7806 (addr64_t
)(start
+ remove_size
));
7811 entry
= entry
->vme_next
;
7813 while ((entry
!= vm_map_to_entry(sub_map
))
7814 && (entry
->vme_start
< submap_end
)) {
7815 remove_size
= (entry
->vme_end
- entry
->vme_start
);
7816 if (submap_end
< entry
->vme_end
) {
7817 remove_size
-= entry
->vme_end
- submap_end
;
7819 if (entry
->is_sub_map
) {
7820 vm_map_submap_pmap_clean(
7822 (start
+ entry
->vme_start
) - offset
,
7823 ((start
+ entry
->vme_start
) - offset
) + remove_size
,
7827 if (map
->mapped_in_other_pmaps
&&
7828 os_ref_get_count(&map
->map_refcnt
) != 0 &&
7829 VME_OBJECT(entry
) != NULL
) {
7830 vm_object_pmap_protect_options(
7838 PMAP_OPTIONS_REMOVE
);
7840 pmap_remove(map
->pmap
,
7841 (addr64_t
)((start
+ entry
->vme_start
)
7843 (addr64_t
)(((start
+ entry
->vme_start
)
7844 - offset
) + remove_size
));
7847 entry
= entry
->vme_next
;
7849 vm_map_unlock_read(sub_map
);
7854 * virt_memory_guard_ast:
7856 * Handle the AST callout for a virtual memory guard.
7857 * raise an EXC_GUARD exception and terminate the task
7858 * if configured to do so.
7861 virt_memory_guard_ast(
7863 mach_exception_data_type_t code
,
7864 mach_exception_data_type_t subcode
)
7866 task_t task
= thread
->task
;
7867 assert(task
!= kernel_task
);
7868 assert(task
== current_task());
7871 behavior
= task
->task_exc_guard
;
7873 /* Is delivery enabled */
7874 if ((behavior
& TASK_EXC_GUARD_VM_DELIVER
) == 0) {
7878 /* If only once, make sure we're that once */
7879 while (behavior
& TASK_EXC_GUARD_VM_ONCE
) {
7880 uint32_t new_behavior
= behavior
& ~TASK_EXC_GUARD_VM_DELIVER
;
7882 if (OSCompareAndSwap(behavior
, new_behavior
, &task
->task_exc_guard
)) {
7885 behavior
= task
->task_exc_guard
;
7886 if ((behavior
& TASK_EXC_GUARD_VM_DELIVER
) == 0) {
7891 /* Raise exception via corpse fork or synchronously */
7892 if ((task
->task_exc_guard
& TASK_EXC_GUARD_VM_CORPSE
) &&
7893 (task
->task_exc_guard
& TASK_EXC_GUARD_VM_FATAL
) == 0) {
7894 task_violated_guard(code
, subcode
, NULL
);
7896 task_exception_notify(EXC_GUARD
, code
, subcode
);
7899 /* Terminate the task if desired */
7900 if (task
->task_exc_guard
& TASK_EXC_GUARD_VM_FATAL
) {
7901 task_bsdtask_kill(current_task());
7906 * vm_map_guard_exception:
7908 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7910 * Right now, we do this when we find nothing mapped, or a
7911 * gap in the mapping when a user address space deallocate
7912 * was requested. We report the address of the first gap found.
7915 vm_map_guard_exception(
7916 vm_map_offset_t gap_start
,
7919 mach_exception_code_t code
= 0;
7920 unsigned int guard_type
= GUARD_TYPE_VIRT_MEMORY
;
7921 unsigned int target
= 0; /* should we pass in pid associated with map? */
7922 mach_exception_data_type_t subcode
= (uint64_t)gap_start
;
7923 boolean_t fatal
= FALSE
;
7925 task_t task
= current_task();
7927 /* Can't deliver exceptions to kernel task */
7928 if (task
== kernel_task
) {
7932 EXC_GUARD_ENCODE_TYPE(code
, guard_type
);
7933 EXC_GUARD_ENCODE_FLAVOR(code
, reason
);
7934 EXC_GUARD_ENCODE_TARGET(code
, target
);
7936 if (task
->task_exc_guard
& TASK_EXC_GUARD_VM_FATAL
) {
7939 thread_guard_violation(current_thread(), code
, subcode
, fatal
);
7943 * vm_map_delete: [ internal use only ]
7945 * Deallocates the given address range from the target map.
7946 * Removes all user wirings. Unwires one kernel wiring if
7947 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7948 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7949 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7951 * This routine is called with map locked and leaves map locked.
7953 static kern_return_t
7956 vm_map_offset_t start
,
7957 vm_map_offset_t end
,
7961 vm_map_entry_t entry
, next
;
7962 struct vm_map_entry
*first_entry
, tmp_entry
;
7965 boolean_t need_wakeup
;
7966 unsigned int last_timestamp
= ~0; /* unlikely value */
7968 vm_map_offset_t gap_start
;
7969 __unused vm_map_offset_t save_start
= start
;
7970 __unused vm_map_offset_t save_end
= end
;
7971 const vm_map_offset_t FIND_GAP
= 1; /* a not page aligned value */
7972 const vm_map_offset_t GAPS_OK
= 2; /* a different not page aligned value */
7974 if (map
!= kernel_map
&& !(flags
& VM_MAP_REMOVE_GAPS_OK
) && !map
->terminated
) {
7975 gap_start
= FIND_GAP
;
7977 gap_start
= GAPS_OK
;
7980 interruptible
= (flags
& VM_MAP_REMOVE_INTERRUPTIBLE
) ?
7981 THREAD_ABORTSAFE
: THREAD_UNINT
;
7984 * All our DMA I/O operations in IOKit are currently done by
7985 * wiring through the map entries of the task requesting the I/O.
7986 * Because of this, we must always wait for kernel wirings
7987 * to go away on the entries before deleting them.
7989 * Any caller who wants to actually remove a kernel wiring
7990 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7991 * properly remove one wiring instead of blasting through
7994 flags
|= VM_MAP_REMOVE_WAIT_FOR_KWIRE
;
7998 * Find the start of the region, and clip it
8000 if (vm_map_lookup_entry(map
, start
, &first_entry
)) {
8001 entry
= first_entry
;
8002 if (map
== kalloc_map
&&
8003 (entry
->vme_start
!= start
||
8004 entry
->vme_end
!= end
)) {
8005 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8006 "mismatched entry %p [0x%llx:0x%llx]\n",
8011 (uint64_t)entry
->vme_start
,
8012 (uint64_t)entry
->vme_end
);
8016 * If in a superpage, extend the range to include the start of the mapping.
8018 if (entry
->superpage_size
&& (start
& ~SUPERPAGE_MASK
)) {
8019 start
= SUPERPAGE_ROUND_DOWN(start
);
8023 if (start
== entry
->vme_start
) {
8025 * No need to clip. We don't want to cause
8026 * any unnecessary unnesting in this case...
8029 if ((flags
& VM_MAP_REMOVE_NO_MAP_ALIGN
) &&
8030 entry
->map_aligned
&&
8031 !VM_MAP_PAGE_ALIGNED(
8033 VM_MAP_PAGE_MASK(map
))) {
8035 * The entry will no longer be
8036 * map-aligned after clipping
8037 * and the caller said it's OK.
8039 entry
->map_aligned
= FALSE
;
8041 if (map
== kalloc_map
) {
8042 panic("vm_map_delete(%p,0x%llx,0x%llx):"
8043 " clipping %p at 0x%llx\n",
8050 vm_map_clip_start(map
, entry
, start
);
8054 * Fix the lookup hint now, rather than each
8055 * time through the loop.
8057 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8059 if (map
->pmap
== kernel_pmap
&&
8060 os_ref_get_count(&map
->map_refcnt
) != 0) {
8061 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8062 "no map entry at 0x%llx\n",
8068 entry
= first_entry
->vme_next
;
8069 if (gap_start
== FIND_GAP
) {
8075 if (entry
->superpage_size
) {
8076 end
= SUPERPAGE_ROUND_UP(end
);
8079 need_wakeup
= FALSE
;
8081 * Step through all entries in this region
8083 s
= entry
->vme_start
;
8084 while ((entry
!= vm_map_to_entry(map
)) && (s
< end
)) {
8086 * At this point, we have deleted all the memory entries
8087 * between "start" and "s". We still need to delete
8088 * all memory entries between "s" and "end".
8089 * While we were blocked and the map was unlocked, some
8090 * new memory entries could have been re-allocated between
8091 * "start" and "s" and we don't want to mess with those.
8092 * Some of those entries could even have been re-assembled
8093 * with an entry after "s" (in vm_map_simplify_entry()), so
8094 * we may have to vm_map_clip_start() again.
8097 if (entry
->vme_start
>= s
) {
8099 * This entry starts on or after "s"
8100 * so no need to clip its start.
8104 * This entry has been re-assembled by a
8105 * vm_map_simplify_entry(). We need to
8106 * re-clip its start.
8108 if ((flags
& VM_MAP_REMOVE_NO_MAP_ALIGN
) &&
8109 entry
->map_aligned
&&
8110 !VM_MAP_PAGE_ALIGNED(s
,
8111 VM_MAP_PAGE_MASK(map
))) {
8113 * The entry will no longer be map-aligned
8114 * after clipping and the caller said it's OK.
8116 entry
->map_aligned
= FALSE
;
8118 if (map
== kalloc_map
) {
8119 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8120 "clipping %p at 0x%llx\n",
8127 vm_map_clip_start(map
, entry
, s
);
8129 if (entry
->vme_end
<= end
) {
8131 * This entry is going away completely, so no need
8132 * to clip and possibly cause an unnecessary unnesting.
8135 if ((flags
& VM_MAP_REMOVE_NO_MAP_ALIGN
) &&
8136 entry
->map_aligned
&&
8137 !VM_MAP_PAGE_ALIGNED(end
,
8138 VM_MAP_PAGE_MASK(map
))) {
8140 * The entry will no longer be map-aligned
8141 * after clipping and the caller said it's OK.
8143 entry
->map_aligned
= FALSE
;
8145 if (map
== kalloc_map
) {
8146 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8147 "clipping %p at 0x%llx\n",
8154 vm_map_clip_end(map
, entry
, end
);
8157 if (entry
->permanent
) {
8158 if (map
->pmap
== kernel_pmap
) {
8159 panic("%s(%p,0x%llx,0x%llx): "
8160 "attempt to remove permanent "
8162 "%p [0x%llx:0x%llx]\n",
8168 (uint64_t) entry
->vme_start
,
8169 (uint64_t) entry
->vme_end
);
8170 } else if (flags
& VM_MAP_REMOVE_IMMUTABLE
) {
8171 // printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
8172 entry
->permanent
= FALSE
;
8174 } else if ((entry
->protection
& VM_PROT_EXECUTE
) && !pmap_cs_enforced(map
->pmap
)) {
8175 entry
->permanent
= FALSE
;
8177 printf("%d[%s] %s(0x%llx,0x%llx): "
8178 "pmap_cs disabled, allowing for permanent executable entry [0x%llx:0x%llx] "
8181 (current_task()->bsd_info
8182 ? proc_name_address(current_task()->bsd_info
)
8187 (uint64_t)entry
->vme_start
,
8188 (uint64_t)entry
->vme_end
,
8190 entry
->max_protection
);
8193 if (vm_map_executable_immutable_verbose
) {
8194 printf("%d[%s] %s(0x%llx,0x%llx): "
8195 "permanent entry [0x%llx:0x%llx] "
8198 (current_task()->bsd_info
8199 ? proc_name_address(current_task()->bsd_info
)
8204 (uint64_t)entry
->vme_start
,
8205 (uint64_t)entry
->vme_end
,
8207 entry
->max_protection
);
8210 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
8212 DTRACE_VM5(vm_map_delete_permanent
,
8213 vm_map_offset_t
, entry
->vme_start
,
8214 vm_map_offset_t
, entry
->vme_end
,
8215 vm_prot_t
, entry
->protection
,
8216 vm_prot_t
, entry
->max_protection
,
8217 int, VME_ALIAS(entry
));
8222 if (entry
->in_transition
) {
8223 wait_result_t wait_result
;
8226 * Another thread is wiring/unwiring this entry.
8227 * Let the other thread know we are waiting.
8229 assert(s
== entry
->vme_start
);
8230 entry
->needs_wakeup
= TRUE
;
8233 * wake up anybody waiting on entries that we have
8234 * already unwired/deleted.
8237 vm_map_entry_wakeup(map
);
8238 need_wakeup
= FALSE
;
8241 wait_result
= vm_map_entry_wait(map
, interruptible
);
8243 if (interruptible
&&
8244 wait_result
== THREAD_INTERRUPTED
) {
8246 * We do not clear the needs_wakeup flag,
8247 * since we cannot tell if we were the only one.
8249 return KERN_ABORTED
;
8253 * The entry could have been clipped or it
8254 * may not exist anymore. Look it up again.
8256 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
8258 * User: use the next entry
8260 if (gap_start
== FIND_GAP
) {
8263 entry
= first_entry
->vme_next
;
8264 s
= entry
->vme_start
;
8266 entry
= first_entry
;
8267 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8269 last_timestamp
= map
->timestamp
;
8271 } /* end in_transition */
8273 if (entry
->wired_count
) {
8274 boolean_t user_wire
;
8276 user_wire
= entry
->user_wired_count
> 0;
8279 * Remove a kernel wiring if requested
8281 if (flags
& VM_MAP_REMOVE_KUNWIRE
) {
8282 entry
->wired_count
--;
8286 * Remove all user wirings for proper accounting
8288 if (entry
->user_wired_count
> 0) {
8289 while (entry
->user_wired_count
) {
8290 subtract_wire_counts(map
, entry
, user_wire
);
8294 if (entry
->wired_count
!= 0) {
8295 assert(map
!= kernel_map
);
8297 * Cannot continue. Typical case is when
8298 * a user thread has physical io pending on
8299 * on this page. Either wait for the
8300 * kernel wiring to go away or return an
8303 if (flags
& VM_MAP_REMOVE_WAIT_FOR_KWIRE
) {
8304 wait_result_t wait_result
;
8306 assert(s
== entry
->vme_start
);
8307 entry
->needs_wakeup
= TRUE
;
8308 wait_result
= vm_map_entry_wait(map
,
8311 if (interruptible
&&
8312 wait_result
== THREAD_INTERRUPTED
) {
8314 * We do not clear the
8315 * needs_wakeup flag, since we
8316 * cannot tell if we were the
8319 return KERN_ABORTED
;
8323 * The entry could have been clipped or
8324 * it may not exist anymore. Look it
8327 if (!vm_map_lookup_entry(map
, s
,
8329 assert(map
!= kernel_map
);
8331 * User: use the next entry
8333 if (gap_start
== FIND_GAP
) {
8336 entry
= first_entry
->vme_next
;
8337 s
= entry
->vme_start
;
8339 entry
= first_entry
;
8340 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8342 last_timestamp
= map
->timestamp
;
8345 return KERN_FAILURE
;
8349 entry
->in_transition
= TRUE
;
8351 * copy current entry. see comment in vm_map_wire()
8354 assert(s
== entry
->vme_start
);
8357 * We can unlock the map now. The in_transition
8358 * state guarentees existance of the entry.
8362 if (tmp_entry
.is_sub_map
) {
8364 vm_map_offset_t sub_start
, sub_end
;
8366 vm_map_offset_t pmap_addr
;
8369 sub_map
= VME_SUBMAP(&tmp_entry
);
8370 sub_start
= VME_OFFSET(&tmp_entry
);
8371 sub_end
= sub_start
+ (tmp_entry
.vme_end
-
8372 tmp_entry
.vme_start
);
8373 if (tmp_entry
.use_pmap
) {
8374 pmap
= sub_map
->pmap
;
8375 pmap_addr
= tmp_entry
.vme_start
;
8378 pmap_addr
= tmp_entry
.vme_start
;
8380 (void) vm_map_unwire_nested(sub_map
,
8385 if (VME_OBJECT(&tmp_entry
) == kernel_object
) {
8386 pmap_protect_options(
8388 tmp_entry
.vme_start
,
8391 PMAP_OPTIONS_REMOVE
,
8394 vm_fault_unwire(map
, &tmp_entry
,
8395 VME_OBJECT(&tmp_entry
) == kernel_object
,
8396 map
->pmap
, tmp_entry
.vme_start
);
8401 if (last_timestamp
+ 1 != map
->timestamp
) {
8403 * Find the entry again. It could have
8404 * been clipped after we unlocked the map.
8406 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
8407 assert((map
!= kernel_map
) &&
8408 (!entry
->is_sub_map
));
8409 if (gap_start
== FIND_GAP
) {
8412 first_entry
= first_entry
->vme_next
;
8413 s
= first_entry
->vme_start
;
8415 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8418 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8419 first_entry
= entry
;
8422 last_timestamp
= map
->timestamp
;
8424 entry
= first_entry
;
8425 while ((entry
!= vm_map_to_entry(map
)) &&
8426 (entry
->vme_start
< tmp_entry
.vme_end
)) {
8427 assert(entry
->in_transition
);
8428 entry
->in_transition
= FALSE
;
8429 if (entry
->needs_wakeup
) {
8430 entry
->needs_wakeup
= FALSE
;
8433 entry
= entry
->vme_next
;
8436 * We have unwired the entry(s). Go back and
8439 entry
= first_entry
;
8443 /* entry is unwired */
8444 assert(entry
->wired_count
== 0);
8445 assert(entry
->user_wired_count
== 0);
8447 assert(s
== entry
->vme_start
);
8449 if (flags
& VM_MAP_REMOVE_NO_PMAP_CLEANUP
) {
8451 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8452 * vm_map_delete(), some map entries might have been
8453 * transferred to a "zap_map", which doesn't have a
8454 * pmap. The original pmap has already been flushed
8455 * in the vm_map_delete() call targeting the original
8456 * map, but when we get to destroying the "zap_map",
8457 * we don't have any pmap to flush, so let's just skip
8460 } else if (entry
->is_sub_map
) {
8461 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry
)) >= VM_MAP_PAGE_SHIFT(map
),
8462 "map %p (%d) entry %p submap %p (%d)\n",
8463 map
, VM_MAP_PAGE_SHIFT(map
), entry
,
8465 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry
)));
8466 if (entry
->use_pmap
) {
8467 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry
)) == VM_MAP_PAGE_SHIFT(map
),
8468 "map %p (%d) entry %p submap %p (%d)\n",
8469 map
, VM_MAP_PAGE_SHIFT(map
), entry
,
8471 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry
)));
8472 #ifndef NO_NESTED_PMAP
8475 if (flags
& VM_MAP_REMOVE_NO_UNNESTING
) {
8477 * This is the final cleanup of the
8478 * address space being terminated.
8479 * No new mappings are expected and
8480 * we don't really need to unnest the
8481 * shared region (and lose the "global"
8482 * pmap mappings, if applicable).
8484 * Tell the pmap layer that we're
8485 * "clean" wrt nesting.
8487 pmap_flags
= PMAP_UNNEST_CLEAN
;
8490 * We're unmapping part of the nested
8491 * shared region, so we can't keep the
8496 pmap_unnest_options(
8498 (addr64_t
)entry
->vme_start
,
8499 entry
->vme_end
- entry
->vme_start
,
8501 #endif /* NO_NESTED_PMAP */
8502 if (map
->mapped_in_other_pmaps
&&
8503 os_ref_get_count(&map
->map_refcnt
) != 0) {
8504 /* clean up parent map/maps */
8505 vm_map_submap_pmap_clean(
8506 map
, entry
->vme_start
,
8512 vm_map_submap_pmap_clean(
8513 map
, entry
->vme_start
, entry
->vme_end
,
8517 } else if (VME_OBJECT(entry
) != kernel_object
&&
8518 VME_OBJECT(entry
) != compressor_object
) {
8519 object
= VME_OBJECT(entry
);
8520 if (map
->mapped_in_other_pmaps
&&
8521 os_ref_get_count(&map
->map_refcnt
) != 0) {
8522 vm_object_pmap_protect_options(
8523 object
, VME_OFFSET(entry
),
8524 entry
->vme_end
- entry
->vme_start
,
8529 PMAP_OPTIONS_REMOVE
);
8530 } else if ((VME_OBJECT(entry
) != VM_OBJECT_NULL
) ||
8531 (map
->pmap
== kernel_pmap
)) {
8532 /* Remove translations associated
8533 * with this range unless the entry
8534 * does not have an object, or
8535 * it's the kernel map or a descendant
8536 * since the platform could potentially
8537 * create "backdoor" mappings invisible
8538 * to the VM. It is expected that
8539 * objectless, non-kernel ranges
8540 * do not have such VM invisible
8543 pmap_remove_options(map
->pmap
,
8544 (addr64_t
)entry
->vme_start
,
8545 (addr64_t
)entry
->vme_end
,
8546 PMAP_OPTIONS_REMOVE
);
8550 if (entry
->iokit_acct
) {
8551 /* alternate accounting */
8552 DTRACE_VM4(vm_map_iokit_unmapped_region
,
8554 vm_map_offset_t
, entry
->vme_start
,
8555 vm_map_offset_t
, entry
->vme_end
,
8556 int, VME_ALIAS(entry
));
8557 vm_map_iokit_unmapped_region(map
,
8560 entry
->iokit_acct
= FALSE
;
8561 entry
->use_pmap
= FALSE
;
8565 * All pmap mappings for this map entry must have been
8569 assert(vm_map_pmap_is_empty(map
,
8574 next
= entry
->vme_next
;
8576 if (map
->pmap
== kernel_pmap
&&
8577 os_ref_get_count(&map
->map_refcnt
) != 0 &&
8578 entry
->vme_end
< end
&&
8579 (next
== vm_map_to_entry(map
) ||
8580 next
->vme_start
!= entry
->vme_end
)) {
8581 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8582 "hole after %p at 0x%llx\n",
8587 (uint64_t)entry
->vme_end
);
8591 * If the desired range didn't end with "entry", then there is a gap if
8592 * we wrapped around to the start of the map or if "entry" and "next"
8593 * aren't contiguous.
8595 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8596 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8598 if (gap_start
== FIND_GAP
&&
8599 vm_map_round_page(entry
->vme_end
, VM_MAP_PAGE_MASK(map
)) < end
&&
8600 (next
== vm_map_to_entry(map
) || entry
->vme_end
!= next
->vme_start
)) {
8601 gap_start
= entry
->vme_end
;
8603 s
= next
->vme_start
;
8604 last_timestamp
= map
->timestamp
;
8606 if (entry
->permanent
) {
8608 * A permanent entry can not be removed, so leave it
8609 * in place but remove all access permissions.
8611 entry
->protection
= VM_PROT_NONE
;
8612 entry
->max_protection
= VM_PROT_NONE
;
8613 } else if ((flags
& VM_MAP_REMOVE_SAVE_ENTRIES
) &&
8614 zap_map
!= VM_MAP_NULL
) {
8615 vm_map_size_t entry_size
;
8617 * The caller wants to save the affected VM map entries
8618 * into the "zap_map". The caller will take care of
8621 /* unlink the entry from "map" ... */
8622 vm_map_store_entry_unlink(map
, entry
);
8623 /* ... and add it to the end of the "zap_map" */
8624 vm_map_store_entry_link(zap_map
,
8625 vm_map_last_entry(zap_map
),
8627 VM_MAP_KERNEL_FLAGS_NONE
);
8628 entry_size
= entry
->vme_end
- entry
->vme_start
;
8629 map
->size
-= entry_size
;
8630 zap_map
->size
+= entry_size
;
8631 /* we didn't unlock the map, so no timestamp increase */
8634 vm_map_entry_delete(map
, entry
);
8635 /* vm_map_entry_delete unlocks the map */
8641 if (entry
== vm_map_to_entry(map
)) {
8644 if (last_timestamp
+ 1 != map
->timestamp
) {
8646 * We are responsible for deleting everything
8647 * from the given space. If someone has interfered,
8648 * we pick up where we left off. Back fills should
8649 * be all right for anyone, except map_delete, and
8650 * we have to assume that the task has been fully
8651 * disabled before we get here
8653 if (!vm_map_lookup_entry(map
, s
, &entry
)) {
8654 entry
= entry
->vme_next
;
8657 * Nothing found for s. If we weren't already done, then there is a gap.
8659 if (gap_start
== FIND_GAP
&& s
< end
) {
8662 s
= entry
->vme_start
;
8664 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8667 * others can not only allocate behind us, we can
8668 * also see coalesce while we don't have the map lock
8670 if (entry
== vm_map_to_entry(map
)) {
8674 last_timestamp
= map
->timestamp
;
8677 if (map
->wait_for_space
) {
8678 thread_wakeup((event_t
) map
);
8681 * wake up anybody waiting on entries that we have already deleted.
8684 vm_map_entry_wakeup(map
);
8687 if (gap_start
!= FIND_GAP
&& gap_start
!= GAPS_OK
) {
8688 DTRACE_VM3(kern_vm_deallocate_gap
,
8689 vm_map_offset_t
, gap_start
,
8690 vm_map_offset_t
, save_start
,
8691 vm_map_offset_t
, save_end
);
8692 if (!(flags
& VM_MAP_REMOVE_GAPS_OK
)) {
8693 vm_map_guard_exception(gap_start
, kGUARD_EXC_DEALLOC_GAP
);
8697 return KERN_SUCCESS
;
8704 * Clean out a task's map.
8711 map
->terminated
= TRUE
;
8714 return vm_map_remove(map
,
8720 * + remove immutable mappings
8721 * + allow gaps in range
8723 (VM_MAP_REMOVE_NO_UNNESTING
|
8724 VM_MAP_REMOVE_IMMUTABLE
|
8725 VM_MAP_REMOVE_GAPS_OK
));
8731 * Remove the given address range from the target map.
8732 * This is the exported form of vm_map_delete.
8737 vm_map_offset_t start
,
8738 vm_map_offset_t end
,
8741 kern_return_t result
;
8744 VM_MAP_RANGE_CHECK(map
, start
, end
);
8746 * For the zone maps, the kernel controls the allocation/freeing of memory.
8747 * Any free to the zone maps should be within the bounds of the map and
8748 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8749 * free to the zone maps into a no-op, there is a problem and we should
8752 if ((start
== end
) && zone_maps_owned(start
, 1)) {
8753 panic("Nothing being freed to a zone map. start = end = %p\n", (void *)start
);
8755 result
= vm_map_delete(map
, start
, end
, flags
, VM_MAP_NULL
);
8762 * vm_map_remove_locked:
8764 * Remove the given address range from the target locked map.
8765 * This is the exported form of vm_map_delete.
8768 vm_map_remove_locked(
8770 vm_map_offset_t start
,
8771 vm_map_offset_t end
,
8774 kern_return_t result
;
8776 VM_MAP_RANGE_CHECK(map
, start
, end
);
8777 result
= vm_map_delete(map
, start
, end
, flags
, VM_MAP_NULL
);
8783 * Routine: vm_map_copy_allocate
8786 * Allocates and initializes a map copy object.
8788 static vm_map_copy_t
8789 vm_map_copy_allocate(void)
8791 vm_map_copy_t new_copy
;
8793 new_copy
= zalloc(vm_map_copy_zone
);
8794 bzero(new_copy
, sizeof(*new_copy
));
8795 new_copy
->c_u
.hdr
.rb_head_store
.rbh_root
= (void*)(int)SKIP_RB_TREE
;
8796 vm_map_copy_first_entry(new_copy
) = vm_map_copy_to_entry(new_copy
);
8797 vm_map_copy_last_entry(new_copy
) = vm_map_copy_to_entry(new_copy
);
8802 * Routine: vm_map_copy_discard
8805 * Dispose of a map copy object (returned by
8809 vm_map_copy_discard(
8812 if (copy
== VM_MAP_COPY_NULL
) {
8816 switch (copy
->type
) {
8817 case VM_MAP_COPY_ENTRY_LIST
:
8818 while (vm_map_copy_first_entry(copy
) !=
8819 vm_map_copy_to_entry(copy
)) {
8820 vm_map_entry_t entry
= vm_map_copy_first_entry(copy
);
8822 vm_map_copy_entry_unlink(copy
, entry
);
8823 if (entry
->is_sub_map
) {
8824 vm_map_deallocate(VME_SUBMAP(entry
));
8826 vm_object_deallocate(VME_OBJECT(entry
));
8828 vm_map_copy_entry_dispose(copy
, entry
);
8831 case VM_MAP_COPY_OBJECT
:
8832 vm_object_deallocate(copy
->cpy_object
);
8834 case VM_MAP_COPY_KERNEL_BUFFER
:
8837 * The vm_map_copy_t and possibly the data buffer were
8838 * allocated by a single call to kheap_alloc(), i.e. the
8839 * vm_map_copy_t was not allocated out of the zone.
8841 if (copy
->size
> msg_ool_size_small
|| copy
->offset
) {
8842 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8843 (long long)copy
->size
, (long long)copy
->offset
);
8845 kheap_free(KHEAP_DATA_BUFFERS
, copy
->cpy_kdata
, copy
->size
);
8847 zfree(vm_map_copy_zone
, copy
);
8851 * Routine: vm_map_copy_copy
8854 * Move the information in a map copy object to
8855 * a new map copy object, leaving the old one
8858 * This is used by kernel routines that need
8859 * to look at out-of-line data (in copyin form)
8860 * before deciding whether to return SUCCESS.
8861 * If the routine returns FAILURE, the original
8862 * copy object will be deallocated; therefore,
8863 * these routines must make a copy of the copy
8864 * object and leave the original empty so that
8865 * deallocation will not fail.
8871 vm_map_copy_t new_copy
;
8873 if (copy
== VM_MAP_COPY_NULL
) {
8874 return VM_MAP_COPY_NULL
;
8878 * Allocate a new copy object, and copy the information
8879 * from the old one into it.
8882 new_copy
= (vm_map_copy_t
) zalloc(vm_map_copy_zone
);
8883 memcpy((void *) new_copy
, (void *) copy
, sizeof(struct vm_map_copy
));
8884 #if __has_feature(ptrauth_calls)
8885 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
8886 new_copy
->cpy_kdata
= copy
->cpy_kdata
;
8890 if (copy
->type
== VM_MAP_COPY_ENTRY_LIST
) {
8892 * The links in the entry chain must be
8893 * changed to point to the new copy object.
8895 vm_map_copy_first_entry(copy
)->vme_prev
8896 = vm_map_copy_to_entry(new_copy
);
8897 vm_map_copy_last_entry(copy
)->vme_next
8898 = vm_map_copy_to_entry(new_copy
);
8902 * Change the old copy object into one that contains
8903 * nothing to be deallocated.
8905 copy
->type
= VM_MAP_COPY_OBJECT
;
8906 copy
->cpy_object
= VM_OBJECT_NULL
;
8909 * Return the new object.
8914 static kern_return_t
8915 vm_map_overwrite_submap_recurse(
8917 vm_map_offset_t dst_addr
,
8918 vm_map_size_t dst_size
)
8920 vm_map_offset_t dst_end
;
8921 vm_map_entry_t tmp_entry
;
8922 vm_map_entry_t entry
;
8923 kern_return_t result
;
8924 boolean_t encountered_sub_map
= FALSE
;
8929 * Verify that the destination is all writeable
8930 * initially. We have to trunc the destination
8931 * address and round the copy size or we'll end up
8932 * splitting entries in strange ways.
8935 dst_end
= vm_map_round_page(dst_addr
+ dst_size
,
8936 VM_MAP_PAGE_MASK(dst_map
));
8937 vm_map_lock(dst_map
);
8940 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &tmp_entry
)) {
8941 vm_map_unlock(dst_map
);
8942 return KERN_INVALID_ADDRESS
;
8945 vm_map_clip_start(dst_map
,
8947 vm_map_trunc_page(dst_addr
,
8948 VM_MAP_PAGE_MASK(dst_map
)));
8949 if (tmp_entry
->is_sub_map
) {
8950 /* clipping did unnest if needed */
8951 assert(!tmp_entry
->use_pmap
);
8954 for (entry
= tmp_entry
;;) {
8955 vm_map_entry_t next
;
8957 next
= entry
->vme_next
;
8958 while (entry
->is_sub_map
) {
8959 vm_map_offset_t sub_start
;
8960 vm_map_offset_t sub_end
;
8961 vm_map_offset_t local_end
;
8963 if (entry
->in_transition
) {
8965 * Say that we are waiting, and wait for entry.
8967 entry
->needs_wakeup
= TRUE
;
8968 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8973 encountered_sub_map
= TRUE
;
8974 sub_start
= VME_OFFSET(entry
);
8976 if (entry
->vme_end
< dst_end
) {
8977 sub_end
= entry
->vme_end
;
8981 sub_end
-= entry
->vme_start
;
8982 sub_end
+= VME_OFFSET(entry
);
8983 local_end
= entry
->vme_end
;
8984 vm_map_unlock(dst_map
);
8986 result
= vm_map_overwrite_submap_recurse(
8989 sub_end
- sub_start
);
8991 if (result
!= KERN_SUCCESS
) {
8994 if (dst_end
<= entry
->vme_end
) {
8995 return KERN_SUCCESS
;
8997 vm_map_lock(dst_map
);
8998 if (!vm_map_lookup_entry(dst_map
, local_end
,
9000 vm_map_unlock(dst_map
);
9001 return KERN_INVALID_ADDRESS
;
9004 next
= entry
->vme_next
;
9007 if (!(entry
->protection
& VM_PROT_WRITE
)) {
9008 vm_map_unlock(dst_map
);
9009 return KERN_PROTECTION_FAILURE
;
9013 * If the entry is in transition, we must wait
9014 * for it to exit that state. Anything could happen
9015 * when we unlock the map, so start over.
9017 if (entry
->in_transition
) {
9019 * Say that we are waiting, and wait for entry.
9021 entry
->needs_wakeup
= TRUE
;
9022 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
9028 * our range is contained completely within this map entry
9030 if (dst_end
<= entry
->vme_end
) {
9031 vm_map_unlock(dst_map
);
9032 return KERN_SUCCESS
;
9035 * check that range specified is contiguous region
9037 if ((next
== vm_map_to_entry(dst_map
)) ||
9038 (next
->vme_start
!= entry
->vme_end
)) {
9039 vm_map_unlock(dst_map
);
9040 return KERN_INVALID_ADDRESS
;
9044 * Check for permanent objects in the destination.
9046 if ((VME_OBJECT(entry
) != VM_OBJECT_NULL
) &&
9047 ((!VME_OBJECT(entry
)->internal
) ||
9048 (VME_OBJECT(entry
)->true_share
))) {
9049 if (encountered_sub_map
) {
9050 vm_map_unlock(dst_map
);
9051 return KERN_FAILURE
;
9058 vm_map_unlock(dst_map
);
9059 return KERN_SUCCESS
;
9063 * Routine: vm_map_copy_overwrite
9066 * Copy the memory described by the map copy
9067 * object (copy; returned by vm_map_copyin) onto
9068 * the specified destination region (dst_map, dst_addr).
9069 * The destination must be writeable.
9071 * Unlike vm_map_copyout, this routine actually
9072 * writes over previously-mapped memory. If the
9073 * previous mapping was to a permanent (user-supplied)
9074 * memory object, it is preserved.
9076 * The attributes (protection and inheritance) of the
9077 * destination region are preserved.
9079 * If successful, consumes the copy object.
9080 * Otherwise, the caller is responsible for it.
9082 * Implementation notes:
9083 * To overwrite aligned temporary virtual memory, it is
9084 * sufficient to remove the previous mapping and insert
9085 * the new copy. This replacement is done either on
9086 * the whole region (if no permanent virtual memory
9087 * objects are embedded in the destination region) or
9088 * in individual map entries.
9090 * To overwrite permanent virtual memory , it is necessary
9091 * to copy each page, as the external memory management
9092 * interface currently does not provide any optimizations.
9094 * Unaligned memory also has to be copied. It is possible
9095 * to use 'vm_trickery' to copy the aligned data. This is
9096 * not done but not hard to implement.
9098 * Once a page of permanent memory has been overwritten,
9099 * it is impossible to interrupt this function; otherwise,
9100 * the call would be neither atomic nor location-independent.
9101 * The kernel-state portion of a user thread must be
9104 * It may be expensive to forward all requests that might
9105 * overwrite permanent memory (vm_write, vm_copy) to
9106 * uninterruptible kernel threads. This routine may be
9107 * called by interruptible threads; however, success is
9108 * not guaranteed -- if the request cannot be performed
9109 * atomically and interruptibly, an error indication is
9113 static kern_return_t
9114 vm_map_copy_overwrite_nested(
9116 vm_map_address_t dst_addr
,
9118 boolean_t interruptible
,
9120 boolean_t discard_on_success
)
9122 vm_map_offset_t dst_end
;
9123 vm_map_entry_t tmp_entry
;
9124 vm_map_entry_t entry
;
9126 boolean_t aligned
= TRUE
;
9127 boolean_t contains_permanent_objects
= FALSE
;
9128 boolean_t encountered_sub_map
= FALSE
;
9129 vm_map_offset_t base_addr
;
9130 vm_map_size_t copy_size
;
9131 vm_map_size_t total_size
;
9132 int copy_page_shift
;
9136 * Check for null copy object.
9139 if (copy
== VM_MAP_COPY_NULL
) {
9140 return KERN_SUCCESS
;
9144 * Assert that the vm_map_copy is coming from the right
9145 * zone and hasn't been forged
9147 vm_map_copy_require(copy
);
9150 * Check for special kernel buffer allocated
9151 * by new_ipc_kmsg_copyin.
9154 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
9155 return vm_map_copyout_kernel_buffer(
9157 copy
, copy
->size
, TRUE
, discard_on_success
);
9161 * Only works for entry lists at the moment. Will
9162 * support page lists later.
9165 assert(copy
->type
== VM_MAP_COPY_ENTRY_LIST
);
9167 if (copy
->size
== 0) {
9168 if (discard_on_success
) {
9169 vm_map_copy_discard(copy
);
9171 return KERN_SUCCESS
;
9174 copy_page_shift
= copy
->cpy_hdr
.page_shift
;
9177 * Verify that the destination is all writeable
9178 * initially. We have to trunc the destination
9179 * address and round the copy size or we'll end up
9180 * splitting entries in strange ways.
9183 if (!VM_MAP_PAGE_ALIGNED(copy
->size
,
9184 VM_MAP_PAGE_MASK(dst_map
)) ||
9185 !VM_MAP_PAGE_ALIGNED(copy
->offset
,
9186 VM_MAP_PAGE_MASK(dst_map
)) ||
9187 !VM_MAP_PAGE_ALIGNED(dst_addr
,
9188 VM_MAP_PAGE_MASK(dst_map
)) ||
9189 copy_page_shift
!= VM_MAP_PAGE_SHIFT(dst_map
)) {
9191 dst_end
= vm_map_round_page(dst_addr
+ copy
->size
,
9192 VM_MAP_PAGE_MASK(dst_map
));
9194 dst_end
= dst_addr
+ copy
->size
;
9197 vm_map_lock(dst_map
);
9199 /* LP64todo - remove this check when vm_map_commpage64()
9200 * no longer has to stuff in a map_entry for the commpage
9201 * above the map's max_offset.
9203 if (dst_addr
>= dst_map
->max_offset
) {
9204 vm_map_unlock(dst_map
);
9205 return KERN_INVALID_ADDRESS
;
9209 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &tmp_entry
)) {
9210 vm_map_unlock(dst_map
);
9211 return KERN_INVALID_ADDRESS
;
9213 vm_map_clip_start(dst_map
,
9215 vm_map_trunc_page(dst_addr
,
9216 VM_MAP_PAGE_MASK(dst_map
)));
9217 for (entry
= tmp_entry
;;) {
9218 vm_map_entry_t next
= entry
->vme_next
;
9220 while (entry
->is_sub_map
) {
9221 vm_map_offset_t sub_start
;
9222 vm_map_offset_t sub_end
;
9223 vm_map_offset_t local_end
;
9225 if (entry
->in_transition
) {
9227 * Say that we are waiting, and wait for entry.
9229 entry
->needs_wakeup
= TRUE
;
9230 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
9235 local_end
= entry
->vme_end
;
9236 if (!(entry
->needs_copy
)) {
9237 /* if needs_copy we are a COW submap */
9238 /* in such a case we just replace so */
9239 /* there is no need for the follow- */
9241 encountered_sub_map
= TRUE
;
9242 sub_start
= VME_OFFSET(entry
);
9244 if (entry
->vme_end
< dst_end
) {
9245 sub_end
= entry
->vme_end
;
9249 sub_end
-= entry
->vme_start
;
9250 sub_end
+= VME_OFFSET(entry
);
9251 vm_map_unlock(dst_map
);
9253 kr
= vm_map_overwrite_submap_recurse(
9256 sub_end
- sub_start
);
9257 if (kr
!= KERN_SUCCESS
) {
9260 vm_map_lock(dst_map
);
9263 if (dst_end
<= entry
->vme_end
) {
9264 goto start_overwrite
;
9266 if (!vm_map_lookup_entry(dst_map
, local_end
,
9268 vm_map_unlock(dst_map
);
9269 return KERN_INVALID_ADDRESS
;
9271 next
= entry
->vme_next
;
9274 if (!(entry
->protection
& VM_PROT_WRITE
)) {
9275 vm_map_unlock(dst_map
);
9276 return KERN_PROTECTION_FAILURE
;
9280 * If the entry is in transition, we must wait
9281 * for it to exit that state. Anything could happen
9282 * when we unlock the map, so start over.
9284 if (entry
->in_transition
) {
9286 * Say that we are waiting, and wait for entry.
9288 entry
->needs_wakeup
= TRUE
;
9289 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
9295 * our range is contained completely within this map entry
9297 if (dst_end
<= entry
->vme_end
) {
9301 * check that range specified is contiguous region
9303 if ((next
== vm_map_to_entry(dst_map
)) ||
9304 (next
->vme_start
!= entry
->vme_end
)) {
9305 vm_map_unlock(dst_map
);
9306 return KERN_INVALID_ADDRESS
;
9311 * Check for permanent objects in the destination.
9313 if ((VME_OBJECT(entry
) != VM_OBJECT_NULL
) &&
9314 ((!VME_OBJECT(entry
)->internal
) ||
9315 (VME_OBJECT(entry
)->true_share
))) {
9316 contains_permanent_objects
= TRUE
;
9324 * If there are permanent objects in the destination, then
9325 * the copy cannot be interrupted.
9328 if (interruptible
&& contains_permanent_objects
) {
9329 vm_map_unlock(dst_map
);
9330 return KERN_FAILURE
; /* XXX */
9335 * Make a second pass, overwriting the data
9336 * At the beginning of each loop iteration,
9337 * the next entry to be overwritten is "tmp_entry"
9338 * (initially, the value returned from the lookup above),
9339 * and the starting address expected in that entry
9343 total_size
= copy
->size
;
9344 if (encountered_sub_map
) {
9346 /* re-calculate tmp_entry since we've had the map */
9348 if (!vm_map_lookup_entry( dst_map
, dst_addr
, &tmp_entry
)) {
9349 vm_map_unlock(dst_map
);
9350 return KERN_INVALID_ADDRESS
;
9353 copy_size
= copy
->size
;
9356 base_addr
= dst_addr
;
9358 /* deconstruct the copy object and do in parts */
9359 /* only in sub_map, interruptable case */
9360 vm_map_entry_t copy_entry
;
9361 vm_map_entry_t previous_prev
= VM_MAP_ENTRY_NULL
;
9362 vm_map_entry_t next_copy
= VM_MAP_ENTRY_NULL
;
9364 int remaining_entries
= 0;
9365 vm_map_offset_t new_offset
= 0;
9367 for (entry
= tmp_entry
; copy_size
== 0;) {
9368 vm_map_entry_t next
;
9370 next
= entry
->vme_next
;
9372 /* tmp_entry and base address are moved along */
9373 /* each time we encounter a sub-map. Otherwise */
9374 /* entry can outpase tmp_entry, and the copy_size */
9375 /* may reflect the distance between them */
9376 /* if the current entry is found to be in transition */
9377 /* we will start over at the beginning or the last */
9378 /* encounter of a submap as dictated by base_addr */
9379 /* we will zero copy_size accordingly. */
9380 if (entry
->in_transition
) {
9382 * Say that we are waiting, and wait for entry.
9384 entry
->needs_wakeup
= TRUE
;
9385 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
9387 if (!vm_map_lookup_entry(dst_map
, base_addr
,
9389 vm_map_unlock(dst_map
);
9390 return KERN_INVALID_ADDRESS
;
9396 if (entry
->is_sub_map
) {
9397 vm_map_offset_t sub_start
;
9398 vm_map_offset_t sub_end
;
9399 vm_map_offset_t local_end
;
9401 if (entry
->needs_copy
) {
9402 /* if this is a COW submap */
9403 /* just back the range with a */
9404 /* anonymous entry */
9405 if (entry
->vme_end
< dst_end
) {
9406 sub_end
= entry
->vme_end
;
9410 if (entry
->vme_start
< base_addr
) {
9411 sub_start
= base_addr
;
9413 sub_start
= entry
->vme_start
;
9416 dst_map
, entry
, sub_end
);
9418 dst_map
, entry
, sub_start
);
9419 assert(!entry
->use_pmap
);
9420 assert(!entry
->iokit_acct
);
9421 entry
->use_pmap
= TRUE
;
9422 entry
->is_sub_map
= FALSE
;
9425 VME_OBJECT_SET(entry
, VM_OBJECT_NULL
);
9426 VME_OFFSET_SET(entry
, 0);
9427 entry
->is_shared
= FALSE
;
9428 entry
->needs_copy
= FALSE
;
9429 entry
->protection
= VM_PROT_DEFAULT
;
9430 entry
->max_protection
= VM_PROT_ALL
;
9431 entry
->wired_count
= 0;
9432 entry
->user_wired_count
= 0;
9433 if (entry
->inheritance
9434 == VM_INHERIT_SHARE
) {
9435 entry
->inheritance
= VM_INHERIT_COPY
;
9439 /* first take care of any non-sub_map */
9440 /* entries to send */
9441 if (base_addr
< entry
->vme_start
) {
9444 entry
->vme_start
- base_addr
;
9447 sub_start
= VME_OFFSET(entry
);
9449 if (entry
->vme_end
< dst_end
) {
9450 sub_end
= entry
->vme_end
;
9454 sub_end
-= entry
->vme_start
;
9455 sub_end
+= VME_OFFSET(entry
);
9456 local_end
= entry
->vme_end
;
9457 vm_map_unlock(dst_map
);
9458 copy_size
= sub_end
- sub_start
;
9460 /* adjust the copy object */
9461 if (total_size
> copy_size
) {
9462 vm_map_size_t local_size
= 0;
9463 vm_map_size_t entry_size
;
9466 new_offset
= copy
->offset
;
9467 copy_entry
= vm_map_copy_first_entry(copy
);
9468 while (copy_entry
!=
9469 vm_map_copy_to_entry(copy
)) {
9470 entry_size
= copy_entry
->vme_end
-
9471 copy_entry
->vme_start
;
9472 if ((local_size
< copy_size
) &&
9473 ((local_size
+ entry_size
)
9475 vm_map_copy_clip_end(copy
,
9477 copy_entry
->vme_start
+
9478 (copy_size
- local_size
));
9479 entry_size
= copy_entry
->vme_end
-
9480 copy_entry
->vme_start
;
9481 local_size
+= entry_size
;
9482 new_offset
+= entry_size
;
9484 if (local_size
>= copy_size
) {
9485 next_copy
= copy_entry
->vme_next
;
9486 copy_entry
->vme_next
=
9487 vm_map_copy_to_entry(copy
);
9489 copy
->cpy_hdr
.links
.prev
;
9490 copy
->cpy_hdr
.links
.prev
= copy_entry
;
9491 copy
->size
= copy_size
;
9493 copy
->cpy_hdr
.nentries
;
9494 remaining_entries
-= nentries
;
9495 copy
->cpy_hdr
.nentries
= nentries
;
9498 local_size
+= entry_size
;
9499 new_offset
+= entry_size
;
9502 copy_entry
= copy_entry
->vme_next
;
9506 if ((entry
->use_pmap
) && (pmap
== NULL
)) {
9507 kr
= vm_map_copy_overwrite_nested(
9512 VME_SUBMAP(entry
)->pmap
,
9514 } else if (pmap
!= NULL
) {
9515 kr
= vm_map_copy_overwrite_nested(
9519 interruptible
, pmap
,
9522 kr
= vm_map_copy_overwrite_nested(
9530 if (kr
!= KERN_SUCCESS
) {
9531 if (next_copy
!= NULL
) {
9532 copy
->cpy_hdr
.nentries
+=
9534 copy
->cpy_hdr
.links
.prev
->vme_next
=
9536 copy
->cpy_hdr
.links
.prev
9538 copy
->size
= total_size
;
9542 if (dst_end
<= local_end
) {
9543 return KERN_SUCCESS
;
9545 /* otherwise copy no longer exists, it was */
9546 /* destroyed after successful copy_overwrite */
9547 copy
= vm_map_copy_allocate();
9548 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
9549 copy
->offset
= new_offset
;
9550 copy
->cpy_hdr
.page_shift
= copy_page_shift
;
9554 * this does not seem to deal with
9555 * the VM map store (R&B tree)
9558 total_size
-= copy_size
;
9560 /* put back remainder of copy in container */
9561 if (next_copy
!= NULL
) {
9562 copy
->cpy_hdr
.nentries
= remaining_entries
;
9563 copy
->cpy_hdr
.links
.next
= next_copy
;
9564 copy
->cpy_hdr
.links
.prev
= previous_prev
;
9565 copy
->size
= total_size
;
9566 next_copy
->vme_prev
=
9567 vm_map_copy_to_entry(copy
);
9570 base_addr
= local_end
;
9571 vm_map_lock(dst_map
);
9572 if (!vm_map_lookup_entry(dst_map
,
9573 local_end
, &tmp_entry
)) {
9574 vm_map_unlock(dst_map
);
9575 return KERN_INVALID_ADDRESS
;
9580 if (dst_end
<= entry
->vme_end
) {
9581 copy_size
= dst_end
- base_addr
;
9585 if ((next
== vm_map_to_entry(dst_map
)) ||
9586 (next
->vme_start
!= entry
->vme_end
)) {
9587 vm_map_unlock(dst_map
);
9588 return KERN_INVALID_ADDRESS
;
9597 /* adjust the copy object */
9598 if (total_size
> copy_size
) {
9599 vm_map_size_t local_size
= 0;
9600 vm_map_size_t entry_size
;
9602 new_offset
= copy
->offset
;
9603 copy_entry
= vm_map_copy_first_entry(copy
);
9604 while (copy_entry
!= vm_map_copy_to_entry(copy
)) {
9605 entry_size
= copy_entry
->vme_end
-
9606 copy_entry
->vme_start
;
9607 if ((local_size
< copy_size
) &&
9608 ((local_size
+ entry_size
)
9610 vm_map_copy_clip_end(copy
, copy_entry
,
9611 copy_entry
->vme_start
+
9612 (copy_size
- local_size
));
9613 entry_size
= copy_entry
->vme_end
-
9614 copy_entry
->vme_start
;
9615 local_size
+= entry_size
;
9616 new_offset
+= entry_size
;
9618 if (local_size
>= copy_size
) {
9619 next_copy
= copy_entry
->vme_next
;
9620 copy_entry
->vme_next
=
9621 vm_map_copy_to_entry(copy
);
9623 copy
->cpy_hdr
.links
.prev
;
9624 copy
->cpy_hdr
.links
.prev
= copy_entry
;
9625 copy
->size
= copy_size
;
9627 copy
->cpy_hdr
.nentries
;
9628 remaining_entries
-= nentries
;
9629 copy
->cpy_hdr
.nentries
= nentries
;
9632 local_size
+= entry_size
;
9633 new_offset
+= entry_size
;
9636 copy_entry
= copy_entry
->vme_next
;
9646 local_pmap
= dst_map
->pmap
;
9649 if ((kr
= vm_map_copy_overwrite_aligned(
9650 dst_map
, tmp_entry
, copy
,
9651 base_addr
, local_pmap
)) != KERN_SUCCESS
) {
9652 if (next_copy
!= NULL
) {
9653 copy
->cpy_hdr
.nentries
+=
9655 copy
->cpy_hdr
.links
.prev
->vme_next
=
9657 copy
->cpy_hdr
.links
.prev
=
9659 copy
->size
+= copy_size
;
9663 vm_map_unlock(dst_map
);
9668 * if the copy and dst address are misaligned but the same
9669 * offset within the page we can copy_not_aligned the
9670 * misaligned parts and copy aligned the rest. If they are
9671 * aligned but len is unaligned we simply need to copy
9672 * the end bit unaligned. We'll need to split the misaligned
9673 * bits of the region in this case !
9675 /* ALWAYS UNLOCKS THE dst_map MAP */
9676 kr
= vm_map_copy_overwrite_unaligned(
9681 discard_on_success
);
9682 if (kr
!= KERN_SUCCESS
) {
9683 if (next_copy
!= NULL
) {
9684 copy
->cpy_hdr
.nentries
+=
9686 copy
->cpy_hdr
.links
.prev
->vme_next
=
9688 copy
->cpy_hdr
.links
.prev
=
9690 copy
->size
+= copy_size
;
9695 total_size
-= copy_size
;
9696 if (total_size
== 0) {
9699 base_addr
+= copy_size
;
9701 copy
->offset
= new_offset
;
9702 if (next_copy
!= NULL
) {
9703 copy
->cpy_hdr
.nentries
= remaining_entries
;
9704 copy
->cpy_hdr
.links
.next
= next_copy
;
9705 copy
->cpy_hdr
.links
.prev
= previous_prev
;
9706 next_copy
->vme_prev
= vm_map_copy_to_entry(copy
);
9707 copy
->size
= total_size
;
9709 vm_map_lock(dst_map
);
9711 if (!vm_map_lookup_entry(dst_map
,
9712 base_addr
, &tmp_entry
)) {
9713 vm_map_unlock(dst_map
);
9714 return KERN_INVALID_ADDRESS
;
9716 if (tmp_entry
->in_transition
) {
9717 entry
->needs_wakeup
= TRUE
;
9718 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
9723 vm_map_clip_start(dst_map
,
9725 vm_map_trunc_page(base_addr
,
9726 VM_MAP_PAGE_MASK(dst_map
)));
9732 * Throw away the vm_map_copy object
9734 if (discard_on_success
) {
9735 vm_map_copy_discard(copy
);
9738 return KERN_SUCCESS
;
9739 }/* vm_map_copy_overwrite */
9742 vm_map_copy_overwrite(
9744 vm_map_offset_t dst_addr
,
9746 vm_map_size_t copy_size
,
9747 boolean_t interruptible
)
9749 vm_map_size_t head_size
, tail_size
;
9750 vm_map_copy_t head_copy
, tail_copy
;
9751 vm_map_offset_t head_addr
, tail_addr
;
9752 vm_map_entry_t entry
;
9754 vm_map_offset_t effective_page_mask
, effective_page_size
;
9755 int copy_page_shift
;
9764 if (interruptible
||
9765 copy
== VM_MAP_COPY_NULL
||
9766 copy
->type
!= VM_MAP_COPY_ENTRY_LIST
) {
9768 * We can't split the "copy" map if we're interruptible
9769 * or if we don't have a "copy" map...
9772 return vm_map_copy_overwrite_nested(dst_map
,
9780 copy_page_shift
= VM_MAP_COPY_PAGE_SHIFT(copy
);
9781 if (copy_page_shift
< PAGE_SHIFT
||
9782 VM_MAP_PAGE_SHIFT(dst_map
) < PAGE_SHIFT
) {
9786 if (VM_MAP_PAGE_SHIFT(dst_map
) < PAGE_SHIFT
) {
9787 effective_page_mask
= VM_MAP_PAGE_MASK(dst_map
);
9789 effective_page_mask
= MAX(VM_MAP_PAGE_MASK(dst_map
), PAGE_MASK
);
9790 effective_page_mask
= MAX(VM_MAP_COPY_PAGE_MASK(copy
),
9791 effective_page_mask
);
9793 effective_page_size
= effective_page_mask
+ 1;
9795 if (copy_size
< VM_MAP_COPY_OVERWRITE_OPTIMIZATION_THRESHOLD_PAGES
* effective_page_size
) {
9797 * Too small to bother with optimizing...
9802 if ((dst_addr
& effective_page_mask
) !=
9803 (copy
->offset
& effective_page_mask
)) {
9805 * Incompatible mis-alignment of source and destination...
9811 * Proper alignment or identical mis-alignment at the beginning.
9812 * Let's try and do a small unaligned copy first (if needed)
9813 * and then an aligned copy for the rest.
9815 if (!vm_map_page_aligned(dst_addr
, effective_page_mask
)) {
9816 head_addr
= dst_addr
;
9817 head_size
= (effective_page_size
-
9818 (copy
->offset
& effective_page_mask
));
9819 head_size
= MIN(head_size
, copy_size
);
9821 if (!vm_map_page_aligned(copy
->offset
+ copy_size
,
9822 effective_page_mask
)) {
9824 * Mis-alignment at the end.
9825 * Do an aligned copy up to the last page and
9826 * then an unaligned copy for the remaining bytes.
9828 tail_size
= ((copy
->offset
+ copy_size
) &
9829 effective_page_mask
);
9830 tail_size
= MIN(tail_size
, copy_size
);
9831 tail_addr
= dst_addr
+ copy_size
- tail_size
;
9832 assert(tail_addr
>= head_addr
+ head_size
);
9834 assert(head_size
+ tail_size
<= copy_size
);
9836 if (head_size
+ tail_size
== copy_size
) {
9838 * It's all unaligned, no optimization possible...
9844 * Can't optimize if there are any submaps in the
9845 * destination due to the way we free the "copy" map
9846 * progressively in vm_map_copy_overwrite_nested()
9849 vm_map_lock_read(dst_map
);
9850 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &entry
)) {
9851 vm_map_unlock_read(dst_map
);
9855 (entry
!= vm_map_copy_to_entry(copy
) &&
9856 entry
->vme_start
< dst_addr
+ copy_size
);
9857 entry
= entry
->vme_next
) {
9858 if (entry
->is_sub_map
) {
9859 vm_map_unlock_read(dst_map
);
9863 vm_map_unlock_read(dst_map
);
9867 * Unaligned copy of the first "head_size" bytes, to reach
9872 * Extract "head_copy" out of "copy".
9874 head_copy
= vm_map_copy_allocate();
9875 head_copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
9876 head_copy
->cpy_hdr
.entries_pageable
=
9877 copy
->cpy_hdr
.entries_pageable
;
9878 vm_map_store_init(&head_copy
->cpy_hdr
);
9879 head_copy
->cpy_hdr
.page_shift
= copy_page_shift
;
9881 entry
= vm_map_copy_first_entry(copy
);
9882 if (entry
->vme_end
< copy
->offset
+ head_size
) {
9883 head_size
= entry
->vme_end
- copy
->offset
;
9886 head_copy
->offset
= copy
->offset
;
9887 head_copy
->size
= head_size
;
9888 copy
->offset
+= head_size
;
9889 copy
->size
-= head_size
;
9890 copy_size
-= head_size
;
9891 assert(copy_size
> 0);
9893 vm_map_copy_clip_end(copy
, entry
, copy
->offset
);
9894 vm_map_copy_entry_unlink(copy
, entry
);
9895 vm_map_copy_entry_link(head_copy
,
9896 vm_map_copy_to_entry(head_copy
),
9900 * Do the unaligned copy.
9902 kr
= vm_map_copy_overwrite_nested(dst_map
,
9908 if (kr
!= KERN_SUCCESS
) {
9915 * Extract "tail_copy" out of "copy".
9917 tail_copy
= vm_map_copy_allocate();
9918 tail_copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
9919 tail_copy
->cpy_hdr
.entries_pageable
=
9920 copy
->cpy_hdr
.entries_pageable
;
9921 vm_map_store_init(&tail_copy
->cpy_hdr
);
9922 tail_copy
->cpy_hdr
.page_shift
= copy_page_shift
;
9924 tail_copy
->offset
= copy
->offset
+ copy_size
- tail_size
;
9925 tail_copy
->size
= tail_size
;
9927 copy
->size
-= tail_size
;
9928 copy_size
-= tail_size
;
9929 assert(copy_size
> 0);
9931 entry
= vm_map_copy_last_entry(copy
);
9932 vm_map_copy_clip_start(copy
, entry
, tail_copy
->offset
);
9933 entry
= vm_map_copy_last_entry(copy
);
9934 vm_map_copy_entry_unlink(copy
, entry
);
9935 vm_map_copy_entry_link(tail_copy
,
9936 vm_map_copy_last_entry(tail_copy
),
9941 * If we are here from ipc_kmsg_copyout_ool_descriptor(),
9942 * we want to avoid TOCTOU issues w.r.t copy->size but
9943 * we don't need to change vm_map_copy_overwrite_nested()
9944 * and all other vm_map_copy_overwrite variants.
9946 * So we assign the original copy_size that was passed into
9947 * this routine back to copy.
9949 * This use of local 'copy_size' passed into this routine is
9950 * to try and protect against TOCTOU attacks where the kernel
9951 * has been exploited. We don't expect this to be an issue
9952 * during normal system operation.
9954 assertf(copy
->size
== copy_size
,
9955 "Mismatch of copy sizes. Expected 0x%llx, Got 0x%llx\n", (uint64_t) copy_size
, (uint64_t) copy
->size
);
9956 copy
->size
= copy_size
;
9959 * Copy most (or possibly all) of the data.
9961 kr
= vm_map_copy_overwrite_nested(dst_map
,
9962 dst_addr
+ head_size
,
9967 if (kr
!= KERN_SUCCESS
) {
9972 kr
= vm_map_copy_overwrite_nested(dst_map
,
9981 assert(copy
->type
== VM_MAP_COPY_ENTRY_LIST
);
9982 if (kr
== KERN_SUCCESS
) {
9984 * Discard all the copy maps.
9987 vm_map_copy_discard(head_copy
);
9990 vm_map_copy_discard(copy
);
9992 vm_map_copy_discard(tail_copy
);
9997 * Re-assemble the original copy map.
10000 entry
= vm_map_copy_first_entry(head_copy
);
10001 vm_map_copy_entry_unlink(head_copy
, entry
);
10002 vm_map_copy_entry_link(copy
,
10003 vm_map_copy_to_entry(copy
),
10005 copy
->offset
-= head_size
;
10006 copy
->size
+= head_size
;
10007 vm_map_copy_discard(head_copy
);
10011 entry
= vm_map_copy_last_entry(tail_copy
);
10012 vm_map_copy_entry_unlink(tail_copy
, entry
);
10013 vm_map_copy_entry_link(copy
,
10014 vm_map_copy_last_entry(copy
),
10016 copy
->size
+= tail_size
;
10017 vm_map_copy_discard(tail_copy
);
10026 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
10029 * Physically copy unaligned data
10032 * Unaligned parts of pages have to be physically copied. We use
10033 * a modified form of vm_fault_copy (which understands none-aligned
10034 * page offsets and sizes) to do the copy. We attempt to copy as
10035 * much memory in one go as possibly, however vm_fault_copy copies
10036 * within 1 memory object so we have to find the smaller of "amount left"
10037 * "source object data size" and "target object data size". With
10038 * unaligned data we don't need to split regions, therefore the source
10039 * (copy) object should be one map entry, the target range may be split
10040 * over multiple map entries however. In any event we are pessimistic
10041 * about these assumptions.
10044 * dst_map is locked on entry and is return locked on success,
10045 * unlocked on error.
10048 static kern_return_t
10049 vm_map_copy_overwrite_unaligned(
10051 vm_map_entry_t entry
,
10052 vm_map_copy_t copy
,
10053 vm_map_offset_t start
,
10054 boolean_t discard_on_success
)
10056 vm_map_entry_t copy_entry
;
10057 vm_map_entry_t copy_entry_next
;
10058 vm_map_version_t version
;
10059 vm_object_t dst_object
;
10060 vm_object_offset_t dst_offset
;
10061 vm_object_offset_t src_offset
;
10062 vm_object_offset_t entry_offset
;
10063 vm_map_offset_t entry_end
;
10064 vm_map_size_t src_size
,
10068 kern_return_t kr
= KERN_SUCCESS
;
10071 copy_entry
= vm_map_copy_first_entry(copy
);
10073 vm_map_lock_write_to_read(dst_map
);
10075 src_offset
= copy
->offset
- trunc_page_mask_64(copy
->offset
, VM_MAP_COPY_PAGE_MASK(copy
));
10076 amount_left
= copy
->size
;
10078 * unaligned so we never clipped this entry, we need the offset into
10079 * the vm_object not just the data.
10081 while (amount_left
> 0) {
10082 if (entry
== vm_map_to_entry(dst_map
)) {
10083 vm_map_unlock_read(dst_map
);
10084 return KERN_INVALID_ADDRESS
;
10087 /* "start" must be within the current map entry */
10088 assert((start
>= entry
->vme_start
) && (start
< entry
->vme_end
));
10090 dst_offset
= start
- entry
->vme_start
;
10092 dst_size
= entry
->vme_end
- start
;
10094 src_size
= copy_entry
->vme_end
-
10095 (copy_entry
->vme_start
+ src_offset
);
10097 if (dst_size
< src_size
) {
10099 * we can only copy dst_size bytes before
10100 * we have to get the next destination entry
10102 copy_size
= dst_size
;
10105 * we can only copy src_size bytes before
10106 * we have to get the next source copy entry
10108 copy_size
= src_size
;
10111 if (copy_size
> amount_left
) {
10112 copy_size
= amount_left
;
10115 * Entry needs copy, create a shadow shadow object for
10116 * Copy on write region.
10118 if (entry
->needs_copy
&&
10119 ((entry
->protection
& VM_PROT_WRITE
) != 0)) {
10120 if (vm_map_lock_read_to_write(dst_map
)) {
10121 vm_map_lock_read(dst_map
);
10124 VME_OBJECT_SHADOW(entry
,
10125 (vm_map_size_t
)(entry
->vme_end
10126 - entry
->vme_start
));
10127 entry
->needs_copy
= FALSE
;
10128 vm_map_lock_write_to_read(dst_map
);
10130 dst_object
= VME_OBJECT(entry
);
10132 * unlike with the virtual (aligned) copy we're going
10133 * to fault on it therefore we need a target object.
10135 if (dst_object
== VM_OBJECT_NULL
) {
10136 if (vm_map_lock_read_to_write(dst_map
)) {
10137 vm_map_lock_read(dst_map
);
10140 dst_object
= vm_object_allocate((vm_map_size_t
)
10141 entry
->vme_end
- entry
->vme_start
);
10142 VME_OBJECT_SET(entry
, dst_object
);
10143 VME_OFFSET_SET(entry
, 0);
10144 assert(entry
->use_pmap
);
10145 vm_map_lock_write_to_read(dst_map
);
10148 * Take an object reference and unlock map. The "entry" may
10149 * disappear or change when the map is unlocked.
10151 vm_object_reference(dst_object
);
10152 version
.main_timestamp
= dst_map
->timestamp
;
10153 entry_offset
= VME_OFFSET(entry
);
10154 entry_end
= entry
->vme_end
;
10155 vm_map_unlock_read(dst_map
);
10157 * Copy as much as possible in one pass
10159 kr
= vm_fault_copy(
10160 VME_OBJECT(copy_entry
),
10161 VME_OFFSET(copy_entry
) + src_offset
,
10164 entry_offset
+ dst_offset
,
10169 start
+= copy_size
;
10170 src_offset
+= copy_size
;
10171 amount_left
-= copy_size
;
10173 * Release the object reference
10175 vm_object_deallocate(dst_object
);
10177 * If a hard error occurred, return it now
10179 if (kr
!= KERN_SUCCESS
) {
10183 if ((copy_entry
->vme_start
+ src_offset
) == copy_entry
->vme_end
10184 || amount_left
== 0) {
10186 * all done with this copy entry, dispose.
10188 copy_entry_next
= copy_entry
->vme_next
;
10190 if (discard_on_success
) {
10191 vm_map_copy_entry_unlink(copy
, copy_entry
);
10192 assert(!copy_entry
->is_sub_map
);
10193 vm_object_deallocate(VME_OBJECT(copy_entry
));
10194 vm_map_copy_entry_dispose(copy
, copy_entry
);
10197 if (copy_entry_next
== vm_map_copy_to_entry(copy
) &&
10200 * not finished copying but run out of source
10202 return KERN_INVALID_ADDRESS
;
10205 copy_entry
= copy_entry_next
;
10210 if (amount_left
== 0) {
10211 return KERN_SUCCESS
;
10214 vm_map_lock_read(dst_map
);
10215 if (version
.main_timestamp
== dst_map
->timestamp
) {
10216 if (start
== entry_end
) {
10218 * destination region is split. Use the version
10219 * information to avoid a lookup in the normal
10222 entry
= entry
->vme_next
;
10224 * should be contiguous. Fail if we encounter
10225 * a hole in the destination.
10227 if (start
!= entry
->vme_start
) {
10228 vm_map_unlock_read(dst_map
);
10229 return KERN_INVALID_ADDRESS
;
10234 * Map version check failed.
10235 * we must lookup the entry because somebody
10236 * might have changed the map behind our backs.
10239 if (!vm_map_lookup_entry(dst_map
, start
, &entry
)) {
10240 vm_map_unlock_read(dst_map
);
10241 return KERN_INVALID_ADDRESS
;
10246 return KERN_SUCCESS
;
10247 }/* vm_map_copy_overwrite_unaligned */
10250 * Routine: vm_map_copy_overwrite_aligned [internal use only]
10253 * Does all the vm_trickery possible for whole pages.
10257 * If there are no permanent objects in the destination,
10258 * and the source and destination map entry zones match,
10259 * and the destination map entry is not shared,
10260 * then the map entries can be deleted and replaced
10261 * with those from the copy. The following code is the
10262 * basic idea of what to do, but there are lots of annoying
10263 * little details about getting protection and inheritance
10264 * right. Should add protection, inheritance, and sharing checks
10265 * to the above pass and make sure that no wiring is involved.
10268 int vm_map_copy_overwrite_aligned_src_not_internal
= 0;
10269 int vm_map_copy_overwrite_aligned_src_not_symmetric
= 0;
10270 int vm_map_copy_overwrite_aligned_src_large
= 0;
10272 static kern_return_t
10273 vm_map_copy_overwrite_aligned(
10275 vm_map_entry_t tmp_entry
,
10276 vm_map_copy_t copy
,
10277 vm_map_offset_t start
,
10278 __unused pmap_t pmap
)
10280 vm_object_t object
;
10281 vm_map_entry_t copy_entry
;
10282 vm_map_size_t copy_size
;
10283 vm_map_size_t size
;
10284 vm_map_entry_t entry
;
10286 while ((copy_entry
= vm_map_copy_first_entry(copy
))
10287 != vm_map_copy_to_entry(copy
)) {
10288 copy_size
= (copy_entry
->vme_end
- copy_entry
->vme_start
);
10291 if (entry
->is_sub_map
) {
10292 /* unnested when clipped earlier */
10293 assert(!entry
->use_pmap
);
10295 if (entry
== vm_map_to_entry(dst_map
)) {
10296 vm_map_unlock(dst_map
);
10297 return KERN_INVALID_ADDRESS
;
10299 size
= (entry
->vme_end
- entry
->vme_start
);
10301 * Make sure that no holes popped up in the
10302 * address map, and that the protection is
10303 * still valid, in case the map was unlocked
10307 if ((entry
->vme_start
!= start
) || ((entry
->is_sub_map
)
10308 && !entry
->needs_copy
)) {
10309 vm_map_unlock(dst_map
);
10310 return KERN_INVALID_ADDRESS
;
10312 assert(entry
!= vm_map_to_entry(dst_map
));
10315 * Check protection again
10318 if (!(entry
->protection
& VM_PROT_WRITE
)) {
10319 vm_map_unlock(dst_map
);
10320 return KERN_PROTECTION_FAILURE
;
10324 * Adjust to source size first
10327 if (copy_size
< size
) {
10328 if (entry
->map_aligned
&&
10329 !VM_MAP_PAGE_ALIGNED(entry
->vme_start
+ copy_size
,
10330 VM_MAP_PAGE_MASK(dst_map
))) {
10331 /* no longer map-aligned */
10332 entry
->map_aligned
= FALSE
;
10334 vm_map_clip_end(dst_map
, entry
, entry
->vme_start
+ copy_size
);
10339 * Adjust to destination size
10342 if (size
< copy_size
) {
10343 vm_map_copy_clip_end(copy
, copy_entry
,
10344 copy_entry
->vme_start
+ size
);
10348 assert((entry
->vme_end
- entry
->vme_start
) == size
);
10349 assert((tmp_entry
->vme_end
- tmp_entry
->vme_start
) == size
);
10350 assert((copy_entry
->vme_end
- copy_entry
->vme_start
) == size
);
10353 * If the destination contains temporary unshared memory,
10354 * we can perform the copy by throwing it away and
10355 * installing the source data.
10358 object
= VME_OBJECT(entry
);
10359 if ((!entry
->is_shared
&&
10360 ((object
== VM_OBJECT_NULL
) ||
10361 (object
->internal
&& !object
->true_share
))) ||
10362 entry
->needs_copy
) {
10363 vm_object_t old_object
= VME_OBJECT(entry
);
10364 vm_object_offset_t old_offset
= VME_OFFSET(entry
);
10365 vm_object_offset_t offset
;
10368 * Ensure that the source and destination aren't
10371 if (old_object
== VME_OBJECT(copy_entry
) &&
10372 old_offset
== VME_OFFSET(copy_entry
)) {
10373 vm_map_copy_entry_unlink(copy
, copy_entry
);
10374 vm_map_copy_entry_dispose(copy
, copy_entry
);
10376 if (old_object
!= VM_OBJECT_NULL
) {
10377 vm_object_deallocate(old_object
);
10380 start
= tmp_entry
->vme_end
;
10381 tmp_entry
= tmp_entry
->vme_next
;
10385 #if XNU_TARGET_OS_OSX
10386 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
10387 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
10388 if (VME_OBJECT(copy_entry
) != VM_OBJECT_NULL
&&
10389 VME_OBJECT(copy_entry
)->vo_size
>= __TRADEOFF1_OBJ_SIZE
&&
10390 copy_size
<= __TRADEOFF1_COPY_SIZE
) {
10392 * Virtual vs. Physical copy tradeoff #1.
10394 * Copying only a few pages out of a large
10395 * object: do a physical copy instead of
10396 * a virtual copy, to avoid possibly keeping
10397 * the entire large object alive because of
10398 * those few copy-on-write pages.
10400 vm_map_copy_overwrite_aligned_src_large
++;
10403 #endif /* XNU_TARGET_OS_OSX */
10405 if ((dst_map
->pmap
!= kernel_pmap
) &&
10406 (VME_ALIAS(entry
) >= VM_MEMORY_MALLOC
) &&
10407 (VME_ALIAS(entry
) <= VM_MEMORY_MALLOC_MEDIUM
)) {
10408 vm_object_t new_object
, new_shadow
;
10411 * We're about to map something over a mapping
10412 * established by malloc()...
10414 new_object
= VME_OBJECT(copy_entry
);
10415 if (new_object
!= VM_OBJECT_NULL
) {
10416 vm_object_lock_shared(new_object
);
10418 while (new_object
!= VM_OBJECT_NULL
&&
10419 #if XNU_TARGET_OS_OSX
10420 !new_object
->true_share
&&
10421 new_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
&&
10422 #endif /* XNU_TARGET_OS_OSX */
10423 new_object
->internal
) {
10424 new_shadow
= new_object
->shadow
;
10425 if (new_shadow
== VM_OBJECT_NULL
) {
10428 vm_object_lock_shared(new_shadow
);
10429 vm_object_unlock(new_object
);
10430 new_object
= new_shadow
;
10432 if (new_object
!= VM_OBJECT_NULL
) {
10433 if (!new_object
->internal
) {
10435 * The new mapping is backed
10436 * by an external object. We
10437 * don't want malloc'ed memory
10438 * to be replaced with such a
10439 * non-anonymous mapping, so
10440 * let's go off the optimized
10443 vm_map_copy_overwrite_aligned_src_not_internal
++;
10444 vm_object_unlock(new_object
);
10447 #if XNU_TARGET_OS_OSX
10448 if (new_object
->true_share
||
10449 new_object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) {
10451 * Same if there's a "true_share"
10452 * object in the shadow chain, or
10453 * an object with a non-default
10454 * (SYMMETRIC) copy strategy.
10456 vm_map_copy_overwrite_aligned_src_not_symmetric
++;
10457 vm_object_unlock(new_object
);
10460 #endif /* XNU_TARGET_OS_OSX */
10461 vm_object_unlock(new_object
);
10464 * The new mapping is still backed by
10465 * anonymous (internal) memory, so it's
10466 * OK to substitute it for the original
10467 * malloc() mapping.
10471 if (old_object
!= VM_OBJECT_NULL
) {
10472 if (entry
->is_sub_map
) {
10473 if (entry
->use_pmap
) {
10474 #ifndef NO_NESTED_PMAP
10475 pmap_unnest(dst_map
->pmap
,
10476 (addr64_t
)entry
->vme_start
,
10477 entry
->vme_end
- entry
->vme_start
);
10478 #endif /* NO_NESTED_PMAP */
10479 if (dst_map
->mapped_in_other_pmaps
) {
10480 /* clean up parent */
10482 vm_map_submap_pmap_clean(
10483 dst_map
, entry
->vme_start
,
10486 VME_OFFSET(entry
));
10489 vm_map_submap_pmap_clean(
10490 dst_map
, entry
->vme_start
,
10493 VME_OFFSET(entry
));
10495 vm_map_deallocate(VME_SUBMAP(entry
));
10497 if (dst_map
->mapped_in_other_pmaps
) {
10498 vm_object_pmap_protect_options(
10502 - entry
->vme_start
,
10507 PMAP_OPTIONS_REMOVE
);
10509 pmap_remove_options(
10511 (addr64_t
)(entry
->vme_start
),
10512 (addr64_t
)(entry
->vme_end
),
10513 PMAP_OPTIONS_REMOVE
);
10515 vm_object_deallocate(old_object
);
10519 if (entry
->iokit_acct
) {
10520 /* keep using iokit accounting */
10521 entry
->use_pmap
= FALSE
;
10523 /* use pmap accounting */
10524 entry
->use_pmap
= TRUE
;
10526 entry
->is_sub_map
= FALSE
;
10527 VME_OBJECT_SET(entry
, VME_OBJECT(copy_entry
));
10528 object
= VME_OBJECT(entry
);
10529 entry
->needs_copy
= copy_entry
->needs_copy
;
10530 entry
->wired_count
= 0;
10531 entry
->user_wired_count
= 0;
10532 offset
= VME_OFFSET(copy_entry
);
10533 VME_OFFSET_SET(entry
, offset
);
10535 vm_map_copy_entry_unlink(copy
, copy_entry
);
10536 vm_map_copy_entry_dispose(copy
, copy_entry
);
10539 * we could try to push pages into the pmap at this point, BUT
10540 * this optimization only saved on average 2 us per page if ALL
10541 * the pages in the source were currently mapped
10542 * and ALL the pages in the dest were touched, if there were fewer
10543 * than 2/3 of the pages touched, this optimization actually cost more cycles
10544 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10548 * Set up for the next iteration. The map
10549 * has not been unlocked, so the next
10550 * address should be at the end of this
10551 * entry, and the next map entry should be
10552 * the one following it.
10555 start
= tmp_entry
->vme_end
;
10556 tmp_entry
= tmp_entry
->vme_next
;
10558 vm_map_version_t version
;
10559 vm_object_t dst_object
;
10560 vm_object_offset_t dst_offset
;
10564 if (entry
->needs_copy
) {
10565 VME_OBJECT_SHADOW(entry
,
10567 entry
->vme_start
));
10568 entry
->needs_copy
= FALSE
;
10571 dst_object
= VME_OBJECT(entry
);
10572 dst_offset
= VME_OFFSET(entry
);
10575 * Take an object reference, and record
10576 * the map version information so that the
10577 * map can be safely unlocked.
10580 if (dst_object
== VM_OBJECT_NULL
) {
10582 * We would usually have just taken the
10583 * optimized path above if the destination
10584 * object has not been allocated yet. But we
10585 * now disable that optimization if the copy
10586 * entry's object is not backed by anonymous
10587 * memory to avoid replacing malloc'ed
10588 * (i.e. re-usable) anonymous memory with a
10589 * not-so-anonymous mapping.
10590 * So we have to handle this case here and
10591 * allocate a new VM object for this map entry.
10593 dst_object
= vm_object_allocate(
10594 entry
->vme_end
- entry
->vme_start
);
10596 VME_OBJECT_SET(entry
, dst_object
);
10597 VME_OFFSET_SET(entry
, dst_offset
);
10598 assert(entry
->use_pmap
);
10601 vm_object_reference(dst_object
);
10603 /* account for unlock bumping up timestamp */
10604 version
.main_timestamp
= dst_map
->timestamp
+ 1;
10606 vm_map_unlock(dst_map
);
10609 * Copy as much as possible in one pass
10614 VME_OBJECT(copy_entry
),
10615 VME_OFFSET(copy_entry
),
10624 * Release the object reference
10627 vm_object_deallocate(dst_object
);
10630 * If a hard error occurred, return it now
10633 if (r
!= KERN_SUCCESS
) {
10637 if (copy_size
!= 0) {
10639 * Dispose of the copied region
10642 vm_map_copy_clip_end(copy
, copy_entry
,
10643 copy_entry
->vme_start
+ copy_size
);
10644 vm_map_copy_entry_unlink(copy
, copy_entry
);
10645 vm_object_deallocate(VME_OBJECT(copy_entry
));
10646 vm_map_copy_entry_dispose(copy
, copy_entry
);
10650 * Pick up in the destination map where we left off.
10652 * Use the version information to avoid a lookup
10653 * in the normal case.
10656 start
+= copy_size
;
10657 vm_map_lock(dst_map
);
10658 if (version
.main_timestamp
== dst_map
->timestamp
&&
10660 /* We can safely use saved tmp_entry value */
10662 if (tmp_entry
->map_aligned
&&
10663 !VM_MAP_PAGE_ALIGNED(
10665 VM_MAP_PAGE_MASK(dst_map
))) {
10666 /* no longer map-aligned */
10667 tmp_entry
->map_aligned
= FALSE
;
10669 vm_map_clip_end(dst_map
, tmp_entry
, start
);
10670 tmp_entry
= tmp_entry
->vme_next
;
10672 /* Must do lookup of tmp_entry */
10674 if (!vm_map_lookup_entry(dst_map
, start
, &tmp_entry
)) {
10675 vm_map_unlock(dst_map
);
10676 return KERN_INVALID_ADDRESS
;
10678 if (tmp_entry
->map_aligned
&&
10679 !VM_MAP_PAGE_ALIGNED(
10681 VM_MAP_PAGE_MASK(dst_map
))) {
10682 /* no longer map-aligned */
10683 tmp_entry
->map_aligned
= FALSE
;
10685 vm_map_clip_start(dst_map
, tmp_entry
, start
);
10690 return KERN_SUCCESS
;
10691 }/* vm_map_copy_overwrite_aligned */
10694 * Routine: vm_map_copyin_kernel_buffer [internal use only]
10697 * Copy in data to a kernel buffer from space in the
10698 * source map. The original space may be optionally
10701 * If successful, returns a new copy object.
10703 static kern_return_t
10704 vm_map_copyin_kernel_buffer(
10706 vm_map_offset_t src_addr
,
10708 boolean_t src_destroy
,
10709 vm_map_copy_t
*copy_result
)
10712 vm_map_copy_t copy
;
10714 if (len
> msg_ool_size_small
) {
10715 return KERN_INVALID_ARGUMENT
;
10718 copy
= zalloc_flags(vm_map_copy_zone
, Z_WAITOK
| Z_ZERO
);
10719 if (copy
== VM_MAP_COPY_NULL
) {
10720 return KERN_RESOURCE_SHORTAGE
;
10722 copy
->cpy_kdata
= kheap_alloc(KHEAP_DATA_BUFFERS
, len
, Z_WAITOK
);
10723 if (copy
->cpy_kdata
== NULL
) {
10724 zfree(vm_map_copy_zone
, copy
);
10725 return KERN_RESOURCE_SHORTAGE
;
10728 copy
->type
= VM_MAP_COPY_KERNEL_BUFFER
;
10732 kr
= copyinmap(src_map
, src_addr
, copy
->cpy_kdata
, (vm_size_t
)len
);
10733 if (kr
!= KERN_SUCCESS
) {
10734 kheap_free(KHEAP_DATA_BUFFERS
, copy
->cpy_kdata
, len
);
10735 zfree(vm_map_copy_zone
, copy
);
10739 (void) vm_map_remove(
10741 vm_map_trunc_page(src_addr
,
10742 VM_MAP_PAGE_MASK(src_map
)),
10743 vm_map_round_page(src_addr
+ len
,
10744 VM_MAP_PAGE_MASK(src_map
)),
10745 (VM_MAP_REMOVE_INTERRUPTIBLE
|
10746 VM_MAP_REMOVE_WAIT_FOR_KWIRE
|
10747 ((src_map
== kernel_map
) ? VM_MAP_REMOVE_KUNWIRE
: VM_MAP_REMOVE_NO_FLAGS
)));
10749 *copy_result
= copy
;
10750 return KERN_SUCCESS
;
10754 * Routine: vm_map_copyout_kernel_buffer [internal use only]
10757 * Copy out data from a kernel buffer into space in the
10758 * destination map. The space may be otpionally dynamically
10761 * If successful, consumes the copy object.
10762 * Otherwise, the caller is responsible for it.
10764 static int vm_map_copyout_kernel_buffer_failures
= 0;
10765 static kern_return_t
10766 vm_map_copyout_kernel_buffer(
10768 vm_map_address_t
*addr
, /* IN/OUT */
10769 vm_map_copy_t copy
,
10770 vm_map_size_t copy_size
,
10771 boolean_t overwrite
,
10772 boolean_t consume_on_success
)
10774 kern_return_t kr
= KERN_SUCCESS
;
10775 thread_t thread
= current_thread();
10777 assert(copy
->size
== copy_size
);
10780 * check for corrupted vm_map_copy structure
10782 if (copy_size
> msg_ool_size_small
|| copy
->offset
) {
10783 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10784 (long long)copy
->size
, (long long)copy
->offset
);
10789 * Allocate space in the target map for the data
10792 kr
= vm_map_enter(map
,
10794 vm_map_round_page(copy_size
,
10795 VM_MAP_PAGE_MASK(map
)),
10796 (vm_map_offset_t
) 0,
10798 VM_MAP_KERNEL_FLAGS_NONE
,
10799 VM_KERN_MEMORY_NONE
,
10801 (vm_object_offset_t
) 0,
10805 VM_INHERIT_DEFAULT
);
10806 if (kr
!= KERN_SUCCESS
) {
10810 if (map
->pmap
== kernel_pmap
) {
10811 kasan_notify_address(*addr
, copy
->size
);
10817 * Copyout the data from the kernel buffer to the target map.
10819 if (thread
->map
== map
) {
10821 * If the target map is the current map, just do
10824 assert((vm_size_t
)copy_size
== copy_size
);
10825 if (copyout(copy
->cpy_kdata
, *addr
, (vm_size_t
)copy_size
)) {
10826 kr
= KERN_INVALID_ADDRESS
;
10832 * If the target map is another map, assume the
10833 * target's address space identity for the duration
10836 vm_map_reference(map
);
10837 oldmap
= vm_map_switch(map
);
10839 assert((vm_size_t
)copy_size
== copy_size
);
10840 if (copyout(copy
->cpy_kdata
, *addr
, (vm_size_t
)copy_size
)) {
10841 vm_map_copyout_kernel_buffer_failures
++;
10842 kr
= KERN_INVALID_ADDRESS
;
10845 (void) vm_map_switch(oldmap
);
10846 vm_map_deallocate(map
);
10849 if (kr
!= KERN_SUCCESS
) {
10850 /* the copy failed, clean up */
10853 * Deallocate the space we allocated in the target map.
10855 (void) vm_map_remove(
10857 vm_map_trunc_page(*addr
,
10858 VM_MAP_PAGE_MASK(map
)),
10859 vm_map_round_page((*addr
+
10860 vm_map_round_page(copy_size
,
10861 VM_MAP_PAGE_MASK(map
))),
10862 VM_MAP_PAGE_MASK(map
)),
10863 VM_MAP_REMOVE_NO_FLAGS
);
10867 /* copy was successful, dicard the copy structure */
10868 if (consume_on_success
) {
10869 kheap_free(KHEAP_DATA_BUFFERS
, copy
->cpy_kdata
, copy_size
);
10870 zfree(vm_map_copy_zone
, copy
);
10878 * Routine: vm_map_copy_insert [internal use only]
10881 * Link a copy chain ("copy") into a map at the
10882 * specified location (after "where").
10884 * The copy chain is destroyed.
10887 vm_map_copy_insert(
10889 vm_map_entry_t after_where
,
10890 vm_map_copy_t copy
)
10892 vm_map_entry_t entry
;
10894 while (vm_map_copy_first_entry(copy
) != vm_map_copy_to_entry(copy
)) {
10895 entry
= vm_map_copy_first_entry(copy
);
10896 vm_map_copy_entry_unlink(copy
, entry
);
10897 vm_map_store_entry_link(map
, after_where
, entry
,
10898 VM_MAP_KERNEL_FLAGS_NONE
);
10899 after_where
= entry
;
10901 zfree(vm_map_copy_zone
, copy
);
10907 vm_map_entry_t where
,
10908 vm_map_copy_t copy
,
10909 vm_map_offset_t adjustment
,
10910 vm_prot_t cur_prot
,
10911 vm_prot_t max_prot
,
10912 vm_inherit_t inheritance
)
10914 vm_map_entry_t copy_entry
, new_entry
;
10916 for (copy_entry
= vm_map_copy_first_entry(copy
);
10917 copy_entry
!= vm_map_copy_to_entry(copy
);
10918 copy_entry
= copy_entry
->vme_next
) {
10919 /* get a new VM map entry for the map */
10920 new_entry
= vm_map_entry_create(map
,
10921 !map
->hdr
.entries_pageable
);
10922 /* copy the "copy entry" to the new entry */
10923 vm_map_entry_copy(map
, new_entry
, copy_entry
);
10924 /* adjust "start" and "end" */
10925 new_entry
->vme_start
+= adjustment
;
10926 new_entry
->vme_end
+= adjustment
;
10927 /* clear some attributes */
10928 new_entry
->inheritance
= inheritance
;
10929 new_entry
->protection
= cur_prot
;
10930 new_entry
->max_protection
= max_prot
;
10931 new_entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
10932 /* take an extra reference on the entry's "object" */
10933 if (new_entry
->is_sub_map
) {
10934 assert(!new_entry
->use_pmap
); /* not nested */
10935 vm_map_lock(VME_SUBMAP(new_entry
));
10936 vm_map_reference(VME_SUBMAP(new_entry
));
10937 vm_map_unlock(VME_SUBMAP(new_entry
));
10939 vm_object_reference(VME_OBJECT(new_entry
));
10941 /* insert the new entry in the map */
10942 vm_map_store_entry_link(map
, where
, new_entry
,
10943 VM_MAP_KERNEL_FLAGS_NONE
);
10944 /* continue inserting the "copy entries" after the new entry */
10951 * Returns true if *size matches (or is in the range of) copy->size.
10952 * Upon returning true, the *size field is updated with the actual size of the
10953 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10956 vm_map_copy_validate_size(
10958 vm_map_copy_t copy
,
10959 vm_map_size_t
*size
)
10961 if (copy
== VM_MAP_COPY_NULL
) {
10964 vm_map_size_t copy_sz
= copy
->size
;
10965 vm_map_size_t sz
= *size
;
10966 switch (copy
->type
) {
10967 case VM_MAP_COPY_OBJECT
:
10968 case VM_MAP_COPY_KERNEL_BUFFER
:
10969 if (sz
== copy_sz
) {
10973 case VM_MAP_COPY_ENTRY_LIST
:
10975 * potential page-size rounding prevents us from exactly
10976 * validating this flavor of vm_map_copy, but we can at least
10977 * assert that it's within a range.
10979 if (copy_sz
>= sz
&&
10980 copy_sz
<= vm_map_round_page(sz
, VM_MAP_PAGE_MASK(dst_map
))) {
10992 * Routine: vm_map_copyout_size
10995 * Copy out a copy chain ("copy") into newly-allocated
10996 * space in the destination map. Uses a prevalidated
10997 * size for the copy object (vm_map_copy_validate_size).
10999 * If successful, consumes the copy object.
11000 * Otherwise, the caller is responsible for it.
11003 vm_map_copyout_size(
11005 vm_map_address_t
*dst_addr
, /* OUT */
11006 vm_map_copy_t copy
,
11007 vm_map_size_t copy_size
)
11009 return vm_map_copyout_internal(dst_map
, dst_addr
, copy
, copy_size
,
11010 TRUE
, /* consume_on_success */
11013 VM_INHERIT_DEFAULT
);
11017 * Routine: vm_map_copyout
11020 * Copy out a copy chain ("copy") into newly-allocated
11021 * space in the destination map.
11023 * If successful, consumes the copy object.
11024 * Otherwise, the caller is responsible for it.
11029 vm_map_address_t
*dst_addr
, /* OUT */
11030 vm_map_copy_t copy
)
11032 return vm_map_copyout_internal(dst_map
, dst_addr
, copy
, copy
? copy
->size
: 0,
11033 TRUE
, /* consume_on_success */
11036 VM_INHERIT_DEFAULT
);
11040 vm_map_copyout_internal(
11042 vm_map_address_t
*dst_addr
, /* OUT */
11043 vm_map_copy_t copy
,
11044 vm_map_size_t copy_size
,
11045 boolean_t consume_on_success
,
11046 vm_prot_t cur_protection
,
11047 vm_prot_t max_protection
,
11048 vm_inherit_t inheritance
)
11050 vm_map_size_t size
;
11051 vm_map_size_t adjustment
;
11052 vm_map_offset_t start
;
11053 vm_object_offset_t vm_copy_start
;
11054 vm_map_entry_t last
;
11055 vm_map_entry_t entry
;
11056 vm_map_entry_t hole_entry
;
11057 vm_map_copy_t original_copy
;
11060 * Check for null copy object.
11063 if (copy
== VM_MAP_COPY_NULL
) {
11065 return KERN_SUCCESS
;
11069 * Assert that the vm_map_copy is coming from the right
11070 * zone and hasn't been forged
11072 vm_map_copy_require(copy
);
11074 if (copy
->size
!= copy_size
) {
11076 return KERN_FAILURE
;
11080 * Check for special copy object, created
11081 * by vm_map_copyin_object.
11084 if (copy
->type
== VM_MAP_COPY_OBJECT
) {
11085 vm_object_t object
= copy
->cpy_object
;
11087 vm_object_offset_t offset
;
11089 offset
= vm_object_trunc_page(copy
->offset
);
11090 size
= vm_map_round_page((copy_size
+
11091 (vm_map_size_t
)(copy
->offset
-
11093 VM_MAP_PAGE_MASK(dst_map
));
11095 kr
= vm_map_enter(dst_map
, dst_addr
, size
,
11096 (vm_map_offset_t
) 0, VM_FLAGS_ANYWHERE
,
11097 VM_MAP_KERNEL_FLAGS_NONE
,
11098 VM_KERN_MEMORY_NONE
,
11099 object
, offset
, FALSE
,
11100 VM_PROT_DEFAULT
, VM_PROT_ALL
,
11101 VM_INHERIT_DEFAULT
);
11102 if (kr
!= KERN_SUCCESS
) {
11105 /* Account for non-pagealigned copy object */
11106 *dst_addr
+= (vm_map_offset_t
)(copy
->offset
- offset
);
11107 if (consume_on_success
) {
11108 zfree(vm_map_copy_zone
, copy
);
11110 return KERN_SUCCESS
;
11114 * Check for special kernel buffer allocated
11115 * by new_ipc_kmsg_copyin.
11118 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
11119 return vm_map_copyout_kernel_buffer(dst_map
, dst_addr
,
11120 copy
, copy_size
, FALSE
,
11121 consume_on_success
);
11124 original_copy
= copy
;
11125 if (copy
->cpy_hdr
.page_shift
!= VM_MAP_PAGE_SHIFT(dst_map
)) {
11127 vm_map_copy_t target_copy
;
11128 vm_map_offset_t overmap_start
, overmap_end
, trimmed_start
;
11130 target_copy
= VM_MAP_COPY_NULL
;
11131 DEBUG4K_ADJUST("adjusting...\n");
11132 kr
= vm_map_copy_adjust_to_target(
11135 copy
->size
, /* size */
11142 if (kr
!= KERN_SUCCESS
) {
11143 DEBUG4K_COPY("adjust failed 0x%x\n", kr
);
11146 DEBUG4K_COPY("copy %p (%d 0x%llx 0x%llx) dst_map %p (%d) target_copy %p (%d 0x%llx 0x%llx) overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx\n", copy
, copy
->cpy_hdr
.page_shift
, copy
->offset
, (uint64_t)copy
->size
, dst_map
, VM_MAP_PAGE_SHIFT(dst_map
), target_copy
, target_copy
->cpy_hdr
.page_shift
, target_copy
->offset
, (uint64_t)target_copy
->size
, (uint64_t)overmap_start
, (uint64_t)overmap_end
, (uint64_t)trimmed_start
);
11147 if (target_copy
!= copy
) {
11148 copy
= target_copy
;
11150 copy_size
= copy
->size
;
11154 * Find space for the data
11157 vm_copy_start
= vm_map_trunc_page((vm_map_size_t
)copy
->offset
,
11158 VM_MAP_COPY_PAGE_MASK(copy
));
11159 size
= vm_map_round_page((vm_map_size_t
)copy
->offset
+ copy_size
,
11160 VM_MAP_COPY_PAGE_MASK(copy
))
11166 vm_map_lock(dst_map
);
11167 if (dst_map
->disable_vmentry_reuse
== TRUE
) {
11168 VM_MAP_HIGHEST_ENTRY(dst_map
, entry
, start
);
11171 if (dst_map
->holelistenabled
) {
11172 hole_entry
= CAST_TO_VM_MAP_ENTRY(dst_map
->holes_list
);
11174 if (hole_entry
== NULL
) {
11176 * No more space in the map?
11178 vm_map_unlock(dst_map
);
11179 return KERN_NO_SPACE
;
11183 start
= last
->vme_start
;
11185 assert(first_free_is_valid(dst_map
));
11186 start
= ((last
= dst_map
->first_free
) == vm_map_to_entry(dst_map
)) ?
11187 vm_map_min(dst_map
) : last
->vme_end
;
11189 start
= vm_map_round_page(start
,
11190 VM_MAP_PAGE_MASK(dst_map
));
11194 vm_map_entry_t next
= last
->vme_next
;
11195 vm_map_offset_t end
= start
+ size
;
11197 if ((end
> dst_map
->max_offset
) || (end
< start
)) {
11198 if (dst_map
->wait_for_space
) {
11199 if (size
<= (dst_map
->max_offset
- dst_map
->min_offset
)) {
11200 assert_wait((event_t
) dst_map
,
11201 THREAD_INTERRUPTIBLE
);
11202 vm_map_unlock(dst_map
);
11203 thread_block(THREAD_CONTINUE_NULL
);
11207 vm_map_unlock(dst_map
);
11208 return KERN_NO_SPACE
;
11211 if (dst_map
->holelistenabled
) {
11212 if (last
->vme_end
>= end
) {
11217 * If there are no more entries, we must win.
11221 * If there is another entry, it must be
11222 * after the end of the potential new region.
11225 if (next
== vm_map_to_entry(dst_map
)) {
11229 if (next
->vme_start
>= end
) {
11236 if (dst_map
->holelistenabled
) {
11237 if (last
== CAST_TO_VM_MAP_ENTRY(dst_map
->holes_list
)) {
11241 vm_map_unlock(dst_map
);
11242 return KERN_NO_SPACE
;
11244 start
= last
->vme_start
;
11246 start
= last
->vme_end
;
11248 start
= vm_map_round_page(start
,
11249 VM_MAP_PAGE_MASK(dst_map
));
11252 if (dst_map
->holelistenabled
) {
11253 if (vm_map_lookup_entry(dst_map
, last
->vme_start
, &last
)) {
11254 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last
, (unsigned long long)last
->vme_start
);
11259 adjustment
= start
- vm_copy_start
;
11260 if (!consume_on_success
) {
11262 * We're not allowed to consume "copy", so we'll have to
11263 * copy its map entries into the destination map below.
11264 * No need to re-allocate map entries from the correct
11265 * (pageable or not) zone, since we'll get new map entries
11266 * during the transfer.
11267 * We'll also adjust the map entries's "start" and "end"
11268 * during the transfer, to keep "copy"'s entries consistent
11269 * with its "offset".
11271 goto after_adjustments
;
11275 * Since we're going to just drop the map
11276 * entries from the copy into the destination
11277 * map, they must come from the same pool.
11280 if (copy
->cpy_hdr
.entries_pageable
!= dst_map
->hdr
.entries_pageable
) {
11282 * Mismatches occur when dealing with the default
11286 vm_map_entry_t next
, new;
11289 * Find the zone that the copies were allocated from
11292 entry
= vm_map_copy_first_entry(copy
);
11295 * Reinitialize the copy so that vm_map_copy_entry_link
11298 vm_map_store_copy_reset(copy
, entry
);
11299 copy
->cpy_hdr
.entries_pageable
= dst_map
->hdr
.entries_pageable
;
11304 while (entry
!= vm_map_copy_to_entry(copy
)) {
11305 new = vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
11306 vm_map_entry_copy_full(new, entry
);
11307 new->vme_no_copy_on_read
= FALSE
;
11308 assert(!new->iokit_acct
);
11309 if (new->is_sub_map
) {
11310 /* clr address space specifics */
11311 new->use_pmap
= FALSE
;
11313 vm_map_copy_entry_link(copy
,
11314 vm_map_copy_last_entry(copy
),
11316 next
= entry
->vme_next
;
11317 old_zone
= entry
->from_reserved_zone
? vm_map_entry_reserved_zone
: vm_map_entry_zone
;
11318 zfree(old_zone
, entry
);
11324 * Adjust the addresses in the copy chain, and
11325 * reset the region attributes.
11328 for (entry
= vm_map_copy_first_entry(copy
);
11329 entry
!= vm_map_copy_to_entry(copy
);
11330 entry
= entry
->vme_next
) {
11331 if (VM_MAP_PAGE_SHIFT(dst_map
) == PAGE_SHIFT
) {
11333 * We're injecting this copy entry into a map that
11334 * has the standard page alignment, so clear
11335 * "map_aligned" (which might have been inherited
11336 * from the original map entry).
11338 entry
->map_aligned
= FALSE
;
11341 entry
->vme_start
+= adjustment
;
11342 entry
->vme_end
+= adjustment
;
11344 if (entry
->map_aligned
) {
11345 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_start
,
11346 VM_MAP_PAGE_MASK(dst_map
)));
11347 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_end
,
11348 VM_MAP_PAGE_MASK(dst_map
)));
11351 entry
->inheritance
= VM_INHERIT_DEFAULT
;
11352 entry
->protection
= VM_PROT_DEFAULT
;
11353 entry
->max_protection
= VM_PROT_ALL
;
11354 entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
11357 * If the entry is now wired,
11358 * map the pages into the destination map.
11360 if (entry
->wired_count
!= 0) {
11361 vm_map_offset_t va
;
11362 vm_object_offset_t offset
;
11363 vm_object_t object
;
11367 /* TODO4K would need to use actual page size */
11368 assert(VM_MAP_PAGE_SHIFT(dst_map
) == PAGE_SHIFT
);
11370 object
= VME_OBJECT(entry
);
11371 offset
= VME_OFFSET(entry
);
11372 va
= entry
->vme_start
;
11374 pmap_pageable(dst_map
->pmap
,
11379 while (va
< entry
->vme_end
) {
11381 struct vm_object_fault_info fault_info
= {};
11384 * Look up the page in the object.
11385 * Assert that the page will be found in the
11388 * the object was newly created by
11389 * vm_object_copy_slowly, and has
11390 * copies of all of the pages from
11391 * the source object
11393 * the object was moved from the old
11394 * map entry; because the old map
11395 * entry was wired, all of the pages
11396 * were in the top-level object.
11397 * (XXX not true if we wire pages for
11400 vm_object_lock(object
);
11402 m
= vm_page_lookup(object
, offset
);
11403 if (m
== VM_PAGE_NULL
|| !VM_PAGE_WIRED(m
) ||
11405 panic("vm_map_copyout: wiring %p", m
);
11408 prot
= entry
->protection
;
11410 if (override_nx(dst_map
, VME_ALIAS(entry
)) &&
11412 prot
|= VM_PROT_EXECUTE
;
11415 type_of_fault
= DBG_CACHE_HIT_FAULT
;
11417 fault_info
.user_tag
= VME_ALIAS(entry
);
11418 fault_info
.pmap_options
= 0;
11419 if (entry
->iokit_acct
||
11420 (!entry
->is_sub_map
&& !entry
->use_pmap
)) {
11421 fault_info
.pmap_options
|= PMAP_OPTIONS_ALT_ACCT
;
11431 FALSE
, /* change_wiring */
11432 VM_KERN_MEMORY_NONE
, /* tag - not wiring */
11434 NULL
, /* need_retry */
11437 vm_object_unlock(object
);
11439 offset
+= PAGE_SIZE_64
;
11448 * Correct the page alignment for the result
11451 *dst_addr
= start
+ (copy
->offset
- vm_copy_start
);
11454 kasan_notify_address(*dst_addr
, size
);
11458 * Update the hints and the map size
11461 if (consume_on_success
) {
11462 SAVE_HINT_MAP_WRITE(dst_map
, vm_map_copy_last_entry(copy
));
11464 SAVE_HINT_MAP_WRITE(dst_map
, last
);
11467 dst_map
->size
+= size
;
11473 if (consume_on_success
) {
11474 vm_map_copy_insert(dst_map
, last
, copy
);
11475 if (copy
!= original_copy
) {
11476 vm_map_copy_discard(original_copy
);
11477 original_copy
= VM_MAP_COPY_NULL
;
11480 vm_map_copy_remap(dst_map
, last
, copy
, adjustment
,
11481 cur_protection
, max_protection
,
11483 if (copy
!= original_copy
&& original_copy
!= VM_MAP_COPY_NULL
) {
11484 vm_map_copy_discard(copy
);
11485 copy
= original_copy
;
11490 vm_map_unlock(dst_map
);
11493 * XXX If wiring_required, call vm_map_pageable
11496 return KERN_SUCCESS
;
11500 * Routine: vm_map_copyin
11503 * see vm_map_copyin_common. Exported via Unsupported.exports.
11507 #undef vm_map_copyin
11512 vm_map_address_t src_addr
,
11514 boolean_t src_destroy
,
11515 vm_map_copy_t
*copy_result
) /* OUT */
11517 return vm_map_copyin_common(src_map
, src_addr
, len
, src_destroy
,
11518 FALSE
, copy_result
, FALSE
);
11522 * Routine: vm_map_copyin_common
11525 * Copy the specified region (src_addr, len) from the
11526 * source address space (src_map), possibly removing
11527 * the region from the source address space (src_destroy).
11530 * A vm_map_copy_t object (copy_result), suitable for
11531 * insertion into another address space (using vm_map_copyout),
11532 * copying over another address space region (using
11533 * vm_map_copy_overwrite). If the copy is unused, it
11534 * should be destroyed (using vm_map_copy_discard).
11536 * In/out conditions:
11537 * The source map should not be locked on entry.
11540 typedef struct submap_map
{
11541 vm_map_t parent_map
;
11542 vm_map_offset_t base_start
;
11543 vm_map_offset_t base_end
;
11544 vm_map_size_t base_len
;
11545 struct submap_map
*next
;
11549 vm_map_copyin_common(
11551 vm_map_address_t src_addr
,
11553 boolean_t src_destroy
,
11554 __unused boolean_t src_volatile
,
11555 vm_map_copy_t
*copy_result
, /* OUT */
11556 boolean_t use_maxprot
)
11562 flags
|= VM_MAP_COPYIN_SRC_DESTROY
;
11565 flags
|= VM_MAP_COPYIN_USE_MAXPROT
;
11567 return vm_map_copyin_internal(src_map
,
11574 vm_map_copyin_internal(
11576 vm_map_address_t src_addr
,
11579 vm_map_copy_t
*copy_result
) /* OUT */
11581 vm_map_entry_t tmp_entry
; /* Result of last map lookup --
11582 * in multi-level lookup, this
11583 * entry contains the actual
11584 * vm_object/offset.
11586 vm_map_entry_t new_entry
= VM_MAP_ENTRY_NULL
; /* Map entry for copy */
11588 vm_map_offset_t src_start
; /* Start of current entry --
11589 * where copy is taking place now
11591 vm_map_offset_t src_end
; /* End of entire region to be
11593 vm_map_offset_t src_base
;
11594 vm_map_t base_map
= src_map
;
11595 boolean_t map_share
= FALSE
;
11596 submap_map_t
*parent_maps
= NULL
;
11598 vm_map_copy_t copy
; /* Resulting copy */
11599 vm_map_address_t copy_addr
;
11600 vm_map_size_t copy_size
;
11601 boolean_t src_destroy
;
11602 boolean_t use_maxprot
;
11603 boolean_t preserve_purgeable
;
11604 boolean_t entry_was_shared
;
11605 vm_map_entry_t saved_src_entry
;
11607 if (flags
& ~VM_MAP_COPYIN_ALL_FLAGS
) {
11608 return KERN_INVALID_ARGUMENT
;
11611 src_destroy
= (flags
& VM_MAP_COPYIN_SRC_DESTROY
) ? TRUE
: FALSE
;
11612 use_maxprot
= (flags
& VM_MAP_COPYIN_USE_MAXPROT
) ? TRUE
: FALSE
;
11613 preserve_purgeable
=
11614 (flags
& VM_MAP_COPYIN_PRESERVE_PURGEABLE
) ? TRUE
: FALSE
;
11617 * Check for copies of zero bytes.
11621 *copy_result
= VM_MAP_COPY_NULL
;
11622 return KERN_SUCCESS
;
11626 * Check that the end address doesn't overflow
11628 src_end
= src_addr
+ len
;
11629 if (src_end
< src_addr
) {
11630 return KERN_INVALID_ADDRESS
;
11634 * Compute (page aligned) start and end of region
11636 src_start
= vm_map_trunc_page(src_addr
,
11637 VM_MAP_PAGE_MASK(src_map
));
11638 src_end
= vm_map_round_page(src_end
,
11639 VM_MAP_PAGE_MASK(src_map
));
11642 * If the copy is sufficiently small, use a kernel buffer instead
11643 * of making a virtual copy. The theory being that the cost of
11644 * setting up VM (and taking C-O-W faults) dominates the copy costs
11645 * for small regions.
11647 if ((len
< msg_ool_size_small
) &&
11649 !preserve_purgeable
&&
11650 !(flags
& VM_MAP_COPYIN_ENTRY_LIST
) &&
11652 * Since the "msg_ool_size_small" threshold was increased and
11653 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11654 * address space limits, we revert to doing a virtual copy if the
11655 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11656 * of the commpage would now fail when it used to work.
11658 (src_start
>= vm_map_min(src_map
) &&
11659 src_start
< vm_map_max(src_map
) &&
11660 src_end
>= vm_map_min(src_map
) &&
11661 src_end
< vm_map_max(src_map
))) {
11662 return vm_map_copyin_kernel_buffer(src_map
, src_addr
, len
,
11663 src_destroy
, copy_result
);
11667 * Allocate a header element for the list.
11669 * Use the start and end in the header to
11670 * remember the endpoints prior to rounding.
11673 copy
= vm_map_copy_allocate();
11674 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
11675 copy
->cpy_hdr
.entries_pageable
= TRUE
;
11676 copy
->cpy_hdr
.page_shift
= VM_MAP_PAGE_SHIFT(src_map
);
11678 vm_map_store_init( &(copy
->cpy_hdr
));
11680 copy
->offset
= src_addr
;
11683 new_entry
= vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
11685 #define RETURN(x) \
11687 vm_map_unlock(src_map); \
11688 if(src_map != base_map) \
11689 vm_map_deallocate(src_map); \
11690 if (new_entry != VM_MAP_ENTRY_NULL) \
11691 vm_map_copy_entry_dispose(copy,new_entry); \
11692 vm_map_copy_discard(copy); \
11694 submap_map_t *_ptr; \
11696 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11697 parent_maps=parent_maps->next; \
11698 if (_ptr->parent_map != base_map) \
11699 vm_map_deallocate(_ptr->parent_map); \
11700 kfree(_ptr, sizeof(submap_map_t)); \
11707 * Find the beginning of the region.
11710 vm_map_lock(src_map
);
11713 * Lookup the original "src_addr" rather than the truncated
11714 * "src_start", in case "src_start" falls in a non-map-aligned
11715 * map entry *before* the map entry that contains "src_addr"...
11717 if (!vm_map_lookup_entry(src_map
, src_addr
, &tmp_entry
)) {
11718 RETURN(KERN_INVALID_ADDRESS
);
11720 if (!tmp_entry
->is_sub_map
) {
11722 * ... but clip to the map-rounded "src_start" rather than
11723 * "src_addr" to preserve map-alignment. We'll adjust the
11724 * first copy entry at the end, if needed.
11726 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
11728 if (src_start
< tmp_entry
->vme_start
) {
11730 * Move "src_start" up to the start of the
11731 * first map entry to copy.
11733 src_start
= tmp_entry
->vme_start
;
11735 /* set for later submap fix-up */
11736 copy_addr
= src_start
;
11739 * Go through entries until we get to the end.
11743 vm_map_entry_t src_entry
= tmp_entry
; /* Top-level entry */
11744 vm_map_size_t src_size
; /* Size of source
11745 * map entry (in both
11749 vm_object_t src_object
; /* Object to copy */
11750 vm_object_offset_t src_offset
;
11752 boolean_t src_needs_copy
; /* Should source map
11753 * be made read-only
11754 * for copy-on-write?
11757 boolean_t new_entry_needs_copy
; /* Will new entry be COW? */
11759 boolean_t was_wired
; /* Was source wired? */
11760 vm_map_version_t version
; /* Version before locks
11761 * dropped to make copy
11763 kern_return_t result
; /* Return value from
11764 * copy_strategically.
11766 while (tmp_entry
->is_sub_map
) {
11767 vm_map_size_t submap_len
;
11770 ptr
= (submap_map_t
*)kalloc(sizeof(submap_map_t
));
11771 ptr
->next
= parent_maps
;
11773 ptr
->parent_map
= src_map
;
11774 ptr
->base_start
= src_start
;
11775 ptr
->base_end
= src_end
;
11776 submap_len
= tmp_entry
->vme_end
- src_start
;
11777 if (submap_len
> (src_end
- src_start
)) {
11778 submap_len
= src_end
- src_start
;
11780 ptr
->base_len
= submap_len
;
11782 src_start
-= tmp_entry
->vme_start
;
11783 src_start
+= VME_OFFSET(tmp_entry
);
11784 src_end
= src_start
+ submap_len
;
11785 src_map
= VME_SUBMAP(tmp_entry
);
11786 vm_map_lock(src_map
);
11787 /* keep an outstanding reference for all maps in */
11788 /* the parents tree except the base map */
11789 vm_map_reference(src_map
);
11790 vm_map_unlock(ptr
->parent_map
);
11791 if (!vm_map_lookup_entry(
11792 src_map
, src_start
, &tmp_entry
)) {
11793 RETURN(KERN_INVALID_ADDRESS
);
11796 if (!tmp_entry
->is_sub_map
) {
11797 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
11799 src_entry
= tmp_entry
;
11801 /* we are now in the lowest level submap... */
11803 if ((VME_OBJECT(tmp_entry
) != VM_OBJECT_NULL
) &&
11804 (VME_OBJECT(tmp_entry
)->phys_contiguous
)) {
11805 /* This is not, supported for now.In future */
11806 /* we will need to detect the phys_contig */
11807 /* condition and then upgrade copy_slowly */
11808 /* to do physical copy from the device mem */
11809 /* based object. We can piggy-back off of */
11810 /* the was wired boolean to set-up the */
11811 /* proper handling */
11812 RETURN(KERN_PROTECTION_FAILURE
);
11815 * Create a new address map entry to hold the result.
11816 * Fill in the fields from the appropriate source entries.
11817 * We must unlock the source map to do this if we need
11818 * to allocate a map entry.
11820 if (new_entry
== VM_MAP_ENTRY_NULL
) {
11821 version
.main_timestamp
= src_map
->timestamp
;
11822 vm_map_unlock(src_map
);
11824 new_entry
= vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
11826 vm_map_lock(src_map
);
11827 if ((version
.main_timestamp
+ 1) != src_map
->timestamp
) {
11828 if (!vm_map_lookup_entry(src_map
, src_start
,
11830 RETURN(KERN_INVALID_ADDRESS
);
11832 if (!tmp_entry
->is_sub_map
) {
11833 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
11835 continue; /* restart w/ new tmp_entry */
11840 * Verify that the region can be read.
11842 if (((src_entry
->protection
& VM_PROT_READ
) == VM_PROT_NONE
&&
11844 (src_entry
->max_protection
& VM_PROT_READ
) == 0) {
11845 RETURN(KERN_PROTECTION_FAILURE
);
11849 * Clip against the endpoints of the entire region.
11852 vm_map_clip_end(src_map
, src_entry
, src_end
);
11854 src_size
= src_entry
->vme_end
- src_start
;
11855 src_object
= VME_OBJECT(src_entry
);
11856 src_offset
= VME_OFFSET(src_entry
);
11857 was_wired
= (src_entry
->wired_count
!= 0);
11859 vm_map_entry_copy(src_map
, new_entry
, src_entry
);
11860 if (new_entry
->is_sub_map
) {
11861 /* clr address space specifics */
11862 new_entry
->use_pmap
= FALSE
;
11865 * We're dealing with a copy-on-write operation,
11866 * so the resulting mapping should not inherit the
11867 * original mapping's accounting settings.
11868 * "iokit_acct" should have been cleared in
11869 * vm_map_entry_copy().
11870 * "use_pmap" should be reset to its default (TRUE)
11871 * so that the new mapping gets accounted for in
11872 * the task's memory footprint.
11874 assert(!new_entry
->iokit_acct
);
11875 new_entry
->use_pmap
= TRUE
;
11879 * Attempt non-blocking copy-on-write optimizations.
11883 * If we are destroying the source, and the object
11884 * is internal, we could move the object reference
11885 * from the source to the copy. The copy is
11886 * copy-on-write only if the source is.
11887 * We make another reference to the object, because
11888 * destroying the source entry will deallocate it.
11890 * This memory transfer has to be atomic, (to prevent
11891 * the VM object from being shared or copied while
11892 * it's being moved here), so we could only do this
11893 * if we won't have to unlock the VM map until the
11894 * original mapping has been fully removed.
11898 if ((src_object
== VM_OBJECT_NULL
||
11899 (!was_wired
&& !map_share
&& !tmp_entry
->is_shared
11900 && !(debug4k_no_cow_copyin
&& VM_MAP_PAGE_SHIFT(src_map
) < PAGE_SHIFT
))) &&
11901 vm_object_copy_quickly(
11902 VME_OBJECT_PTR(new_entry
),
11906 &new_entry_needs_copy
)) {
11907 new_entry
->needs_copy
= new_entry_needs_copy
;
11910 * Handle copy-on-write obligations
11913 if (src_needs_copy
&& !tmp_entry
->needs_copy
) {
11916 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
11918 if (override_nx(src_map
, VME_ALIAS(src_entry
))
11920 prot
|= VM_PROT_EXECUTE
;
11923 vm_object_pmap_protect(
11927 (src_entry
->is_shared
?
11930 VM_MAP_PAGE_SIZE(src_map
),
11931 src_entry
->vme_start
,
11934 assert(tmp_entry
->wired_count
== 0);
11935 tmp_entry
->needs_copy
= TRUE
;
11939 * The map has never been unlocked, so it's safe
11940 * to move to the next entry rather than doing
11944 goto CopySuccessful
;
11947 entry_was_shared
= tmp_entry
->is_shared
;
11950 * Take an object reference, so that we may
11951 * release the map lock(s).
11954 assert(src_object
!= VM_OBJECT_NULL
);
11955 vm_object_reference(src_object
);
11958 * Record the timestamp for later verification.
11962 version
.main_timestamp
= src_map
->timestamp
;
11963 vm_map_unlock(src_map
); /* Increments timestamp once! */
11964 saved_src_entry
= src_entry
;
11965 tmp_entry
= VM_MAP_ENTRY_NULL
;
11966 src_entry
= VM_MAP_ENTRY_NULL
;
11973 (debug4k_no_cow_copyin
&&
11974 VM_MAP_PAGE_SHIFT(src_map
) < PAGE_SHIFT
)) {
11976 vm_object_lock(src_object
);
11977 result
= vm_object_copy_slowly(
11982 VME_OBJECT_PTR(new_entry
));
11983 VME_OFFSET_SET(new_entry
,
11984 src_offset
- vm_object_trunc_page(src_offset
));
11985 new_entry
->needs_copy
= FALSE
;
11986 } else if (src_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
&&
11987 (entry_was_shared
|| map_share
)) {
11988 vm_object_t new_object
;
11990 vm_object_lock_shared(src_object
);
11991 new_object
= vm_object_copy_delayed(
11996 if (new_object
== VM_OBJECT_NULL
) {
12000 VME_OBJECT_SET(new_entry
, new_object
);
12001 assert(new_entry
->wired_count
== 0);
12002 new_entry
->needs_copy
= TRUE
;
12003 assert(!new_entry
->iokit_acct
);
12004 assert(new_object
->purgable
== VM_PURGABLE_DENY
);
12005 assertf(new_entry
->use_pmap
, "src_map %p new_entry %p\n", src_map
, new_entry
);
12006 result
= KERN_SUCCESS
;
12008 vm_object_offset_t new_offset
;
12009 new_offset
= VME_OFFSET(new_entry
);
12010 result
= vm_object_copy_strategically(src_object
,
12013 VME_OBJECT_PTR(new_entry
),
12015 &new_entry_needs_copy
);
12016 if (new_offset
!= VME_OFFSET(new_entry
)) {
12017 VME_OFFSET_SET(new_entry
, new_offset
);
12020 new_entry
->needs_copy
= new_entry_needs_copy
;
12023 if (result
== KERN_SUCCESS
&&
12024 ((preserve_purgeable
&&
12025 src_object
->purgable
!= VM_PURGABLE_DENY
) ||
12026 new_entry
->used_for_jit
)) {
12028 * Purgeable objects should be COPY_NONE, true share;
12029 * this should be propogated to the copy.
12031 * Also force mappings the pmap specially protects to
12032 * be COPY_NONE; trying to COW these mappings would
12033 * change the effective protections, which could have
12034 * side effects if the pmap layer relies on the
12035 * specified protections.
12038 vm_object_t new_object
;
12040 new_object
= VME_OBJECT(new_entry
);
12041 assert(new_object
!= src_object
);
12042 vm_object_lock(new_object
);
12043 assert(new_object
->ref_count
== 1);
12044 assert(new_object
->shadow
== VM_OBJECT_NULL
);
12045 assert(new_object
->copy
== VM_OBJECT_NULL
);
12046 assert(new_object
->vo_owner
== NULL
);
12048 new_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
12050 if (preserve_purgeable
&&
12051 src_object
->purgable
!= VM_PURGABLE_DENY
) {
12052 new_object
->true_share
= TRUE
;
12054 /* start as non-volatile with no owner... */
12055 new_object
->purgable
= VM_PURGABLE_NONVOLATILE
;
12056 vm_purgeable_nonvolatile_enqueue(new_object
, NULL
);
12057 /* ... and move to src_object's purgeable state */
12058 if (src_object
->purgable
!= VM_PURGABLE_NONVOLATILE
) {
12060 state
= src_object
->purgable
;
12061 vm_object_purgable_control(
12063 VM_PURGABLE_SET_STATE_FROM_KERNEL
,
12066 /* no pmap accounting for purgeable objects */
12067 new_entry
->use_pmap
= FALSE
;
12070 vm_object_unlock(new_object
);
12071 new_object
= VM_OBJECT_NULL
;
12074 if (result
!= KERN_SUCCESS
&&
12075 result
!= KERN_MEMORY_RESTART_COPY
) {
12076 vm_map_lock(src_map
);
12081 * Throw away the extra reference
12084 vm_object_deallocate(src_object
);
12087 * Verify that the map has not substantially
12088 * changed while the copy was being made.
12091 vm_map_lock(src_map
);
12093 if ((version
.main_timestamp
+ 1) == src_map
->timestamp
) {
12094 /* src_map hasn't changed: src_entry is still valid */
12095 src_entry
= saved_src_entry
;
12096 goto VerificationSuccessful
;
12100 * Simple version comparison failed.
12102 * Retry the lookup and verify that the
12103 * same object/offset are still present.
12105 * [Note: a memory manager that colludes with
12106 * the calling task can detect that we have
12107 * cheated. While the map was unlocked, the
12108 * mapping could have been changed and restored.]
12111 if (!vm_map_lookup_entry(src_map
, src_start
, &tmp_entry
)) {
12112 if (result
!= KERN_MEMORY_RESTART_COPY
) {
12113 vm_object_deallocate(VME_OBJECT(new_entry
));
12114 VME_OBJECT_SET(new_entry
, VM_OBJECT_NULL
);
12115 /* reset accounting state */
12116 new_entry
->iokit_acct
= FALSE
;
12117 new_entry
->use_pmap
= TRUE
;
12119 RETURN(KERN_INVALID_ADDRESS
);
12122 src_entry
= tmp_entry
;
12123 vm_map_clip_start(src_map
, src_entry
, src_start
);
12125 if ((((src_entry
->protection
& VM_PROT_READ
) == VM_PROT_NONE
) &&
12127 ((src_entry
->max_protection
& VM_PROT_READ
) == 0)) {
12128 goto VerificationFailed
;
12131 if (src_entry
->vme_end
< new_entry
->vme_end
) {
12133 * This entry might have been shortened
12134 * (vm_map_clip_end) or been replaced with
12135 * an entry that ends closer to "src_start"
12137 * Adjust "new_entry" accordingly; copying
12138 * less memory would be correct but we also
12139 * redo the copy (see below) if the new entry
12140 * no longer points at the same object/offset.
12142 assert(VM_MAP_PAGE_ALIGNED(src_entry
->vme_end
,
12143 VM_MAP_COPY_PAGE_MASK(copy
)));
12144 new_entry
->vme_end
= src_entry
->vme_end
;
12145 src_size
= new_entry
->vme_end
- src_start
;
12146 } else if (src_entry
->vme_end
> new_entry
->vme_end
) {
12148 * This entry might have been extended
12149 * (vm_map_entry_simplify() or coalesce)
12150 * or been replaced with an entry that ends farther
12151 * from "src_start" than before.
12153 * We've called vm_object_copy_*() only on
12154 * the previous <start:end> range, so we can't
12155 * just extend new_entry. We have to re-do
12156 * the copy based on the new entry as if it was
12157 * pointing at a different object/offset (see
12158 * "Verification failed" below).
12162 if ((VME_OBJECT(src_entry
) != src_object
) ||
12163 (VME_OFFSET(src_entry
) != src_offset
) ||
12164 (src_entry
->vme_end
> new_entry
->vme_end
)) {
12166 * Verification failed.
12168 * Start over with this top-level entry.
12171 VerificationFailed
: ;
12173 vm_object_deallocate(VME_OBJECT(new_entry
));
12174 tmp_entry
= src_entry
;
12179 * Verification succeeded.
12182 VerificationSuccessful
:;
12184 if (result
== KERN_MEMORY_RESTART_COPY
) {
12195 * Link in the new copy entry.
12198 vm_map_copy_entry_link(copy
, vm_map_copy_last_entry(copy
),
12202 * Determine whether the entire region
12205 src_base
= src_start
;
12206 src_start
= new_entry
->vme_end
;
12207 new_entry
= VM_MAP_ENTRY_NULL
;
12208 while ((src_start
>= src_end
) && (src_end
!= 0)) {
12211 if (src_map
== base_map
) {
12212 /* back to the top */
12217 assert(ptr
!= NULL
);
12218 parent_maps
= parent_maps
->next
;
12220 /* fix up the damage we did in that submap */
12221 vm_map_simplify_range(src_map
,
12225 vm_map_unlock(src_map
);
12226 vm_map_deallocate(src_map
);
12227 vm_map_lock(ptr
->parent_map
);
12228 src_map
= ptr
->parent_map
;
12229 src_base
= ptr
->base_start
;
12230 src_start
= ptr
->base_start
+ ptr
->base_len
;
12231 src_end
= ptr
->base_end
;
12232 if (!vm_map_lookup_entry(src_map
,
12235 (src_end
> src_start
)) {
12236 RETURN(KERN_INVALID_ADDRESS
);
12238 kfree(ptr
, sizeof(submap_map_t
));
12239 if (parent_maps
== NULL
) {
12242 src_entry
= tmp_entry
->vme_prev
;
12245 if ((VM_MAP_PAGE_SHIFT(src_map
) != PAGE_SHIFT
) &&
12246 (src_start
>= src_addr
+ len
) &&
12247 (src_addr
+ len
!= 0)) {
12249 * Stop copying now, even though we haven't reached
12250 * "src_end". We'll adjust the end of the last copy
12251 * entry at the end, if needed.
12253 * If src_map's aligment is different from the
12254 * system's page-alignment, there could be
12255 * extra non-map-aligned map entries between
12256 * the original (non-rounded) "src_addr + len"
12257 * and the rounded "src_end".
12258 * We do not want to copy those map entries since
12259 * they're not part of the copied range.
12264 if ((src_start
>= src_end
) && (src_end
!= 0)) {
12269 * Verify that there are no gaps in the region
12272 tmp_entry
= src_entry
->vme_next
;
12273 if ((tmp_entry
->vme_start
!= src_start
) ||
12274 (tmp_entry
== vm_map_to_entry(src_map
))) {
12275 RETURN(KERN_INVALID_ADDRESS
);
12280 * If the source should be destroyed, do it now, since the
12281 * copy was successful.
12284 (void) vm_map_delete(
12286 vm_map_trunc_page(src_addr
,
12287 VM_MAP_PAGE_MASK(src_map
)),
12289 ((src_map
== kernel_map
) ?
12290 VM_MAP_REMOVE_KUNWIRE
:
12291 VM_MAP_REMOVE_NO_FLAGS
),
12294 /* fix up the damage we did in the base map */
12295 vm_map_simplify_range(
12297 vm_map_trunc_page(src_addr
,
12298 VM_MAP_PAGE_MASK(src_map
)),
12299 vm_map_round_page(src_end
,
12300 VM_MAP_PAGE_MASK(src_map
)));
12303 vm_map_unlock(src_map
);
12304 tmp_entry
= VM_MAP_ENTRY_NULL
;
12306 if (VM_MAP_PAGE_SHIFT(src_map
) > PAGE_SHIFT
&&
12307 VM_MAP_PAGE_SHIFT(src_map
) != VM_MAP_COPY_PAGE_SHIFT(copy
)) {
12308 vm_map_offset_t original_start
, original_offset
, original_end
;
12310 assert(VM_MAP_COPY_PAGE_MASK(copy
) == PAGE_MASK
);
12312 /* adjust alignment of first copy_entry's "vme_start" */
12313 tmp_entry
= vm_map_copy_first_entry(copy
);
12314 if (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
12315 vm_map_offset_t adjustment
;
12317 original_start
= tmp_entry
->vme_start
;
12318 original_offset
= VME_OFFSET(tmp_entry
);
12320 /* map-align the start of the first copy entry... */
12321 adjustment
= (tmp_entry
->vme_start
-
12323 tmp_entry
->vme_start
,
12324 VM_MAP_PAGE_MASK(src_map
)));
12325 tmp_entry
->vme_start
-= adjustment
;
12326 VME_OFFSET_SET(tmp_entry
,
12327 VME_OFFSET(tmp_entry
) - adjustment
);
12328 copy_addr
-= adjustment
;
12329 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
12330 /* ... adjust for mis-aligned start of copy range */
12332 (vm_map_trunc_page(copy
->offset
,
12334 vm_map_trunc_page(copy
->offset
,
12335 VM_MAP_PAGE_MASK(src_map
)));
12337 assert(page_aligned(adjustment
));
12338 assert(adjustment
< VM_MAP_PAGE_SIZE(src_map
));
12339 tmp_entry
->vme_start
+= adjustment
;
12340 VME_OFFSET_SET(tmp_entry
,
12341 (VME_OFFSET(tmp_entry
) +
12343 copy_addr
+= adjustment
;
12344 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
12348 * Assert that the adjustments haven't exposed
12349 * more than was originally copied...
12351 assert(tmp_entry
->vme_start
>= original_start
);
12352 assert(VME_OFFSET(tmp_entry
) >= original_offset
);
12354 * ... and that it did not adjust outside of a
12355 * a single 16K page.
12357 assert(vm_map_trunc_page(tmp_entry
->vme_start
,
12358 VM_MAP_PAGE_MASK(src_map
)) ==
12359 vm_map_trunc_page(original_start
,
12360 VM_MAP_PAGE_MASK(src_map
)));
12363 /* adjust alignment of last copy_entry's "vme_end" */
12364 tmp_entry
= vm_map_copy_last_entry(copy
);
12365 if (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
12366 vm_map_offset_t adjustment
;
12368 original_end
= tmp_entry
->vme_end
;
12370 /* map-align the end of the last copy entry... */
12371 tmp_entry
->vme_end
=
12372 vm_map_round_page(tmp_entry
->vme_end
,
12373 VM_MAP_PAGE_MASK(src_map
));
12374 /* ... adjust for mis-aligned end of copy range */
12376 (vm_map_round_page((copy
->offset
+
12378 VM_MAP_PAGE_MASK(src_map
)) -
12379 vm_map_round_page((copy
->offset
+
12383 assert(page_aligned(adjustment
));
12384 assert(adjustment
< VM_MAP_PAGE_SIZE(src_map
));
12385 tmp_entry
->vme_end
-= adjustment
;
12386 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
12390 * Assert that the adjustments haven't exposed
12391 * more than was originally copied...
12393 assert(tmp_entry
->vme_end
<= original_end
);
12395 * ... and that it did not adjust outside of a
12396 * a single 16K page.
12398 assert(vm_map_round_page(tmp_entry
->vme_end
,
12399 VM_MAP_PAGE_MASK(src_map
)) ==
12400 vm_map_round_page(original_end
,
12401 VM_MAP_PAGE_MASK(src_map
)));
12405 /* Fix-up start and end points in copy. This is necessary */
12406 /* when the various entries in the copy object were picked */
12407 /* up from different sub-maps */
12409 tmp_entry
= vm_map_copy_first_entry(copy
);
12410 copy_size
= 0; /* compute actual size */
12411 while (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
12412 assert(VM_MAP_PAGE_ALIGNED(
12413 copy_addr
+ (tmp_entry
->vme_end
-
12414 tmp_entry
->vme_start
),
12415 MIN(VM_MAP_COPY_PAGE_MASK(copy
), PAGE_MASK
)));
12416 assert(VM_MAP_PAGE_ALIGNED(
12418 MIN(VM_MAP_COPY_PAGE_MASK(copy
), PAGE_MASK
)));
12421 * The copy_entries will be injected directly into the
12422 * destination map and might not be "map aligned" there...
12424 tmp_entry
->map_aligned
= FALSE
;
12426 tmp_entry
->vme_end
= copy_addr
+
12427 (tmp_entry
->vme_end
- tmp_entry
->vme_start
);
12428 tmp_entry
->vme_start
= copy_addr
;
12429 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
12430 copy_addr
+= tmp_entry
->vme_end
- tmp_entry
->vme_start
;
12431 copy_size
+= tmp_entry
->vme_end
- tmp_entry
->vme_start
;
12432 tmp_entry
= (struct vm_map_entry
*)tmp_entry
->vme_next
;
12435 if (VM_MAP_PAGE_SHIFT(src_map
) != PAGE_SHIFT
&&
12436 copy_size
< copy
->size
) {
12438 * The actual size of the VM map copy is smaller than what
12439 * was requested by the caller. This must be because some
12440 * PAGE_SIZE-sized pages are missing at the end of the last
12441 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
12442 * The caller might not have been aware of those missing
12443 * pages and might not want to be aware of it, which is
12444 * fine as long as they don't try to access (and crash on)
12445 * those missing pages.
12446 * Let's adjust the size of the "copy", to avoid failing
12447 * in vm_map_copyout() or vm_map_copy_overwrite().
12449 assert(vm_map_round_page(copy_size
,
12450 VM_MAP_PAGE_MASK(src_map
)) ==
12451 vm_map_round_page(copy
->size
,
12452 VM_MAP_PAGE_MASK(src_map
)));
12453 copy
->size
= copy_size
;
12456 *copy_result
= copy
;
12457 return KERN_SUCCESS
;
12463 vm_map_copy_extract(
12465 vm_map_address_t src_addr
,
12467 vm_prot_t required_prot
,
12469 vm_map_copy_t
*copy_result
, /* OUT */
12470 vm_prot_t
*cur_prot
, /* OUT */
12471 vm_prot_t
*max_prot
, /* OUT */
12472 vm_inherit_t inheritance
,
12473 vm_map_kernel_flags_t vmk_flags
)
12475 vm_map_copy_t copy
;
12479 * Check for copies of zero bytes.
12483 *copy_result
= VM_MAP_COPY_NULL
;
12484 return KERN_SUCCESS
;
12488 * Check that the end address doesn't overflow
12490 if (src_addr
+ len
< src_addr
) {
12491 return KERN_INVALID_ADDRESS
;
12494 if (VM_MAP_PAGE_SIZE(src_map
) < PAGE_SIZE
) {
12495 DEBUG4K_SHARE("src_map %p src_addr 0x%llx src_end 0x%llx\n", src_map
, (uint64_t)src_addr
, (uint64_t)(src_addr
+ len
));
12499 * Allocate a header element for the list.
12501 * Use the start and end in the header to
12502 * remember the endpoints prior to rounding.
12505 copy
= vm_map_copy_allocate();
12506 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
12507 copy
->cpy_hdr
.entries_pageable
= vmk_flags
.vmkf_copy_pageable
;
12509 vm_map_store_init(©
->cpy_hdr
);
12514 kr
= vm_map_remap_extract(src_map
,
12518 do_copy
, /* copy */
12524 if (kr
!= KERN_SUCCESS
) {
12525 vm_map_copy_discard(copy
);
12528 assert((*cur_prot
& required_prot
) == required_prot
);
12529 assert((*max_prot
& required_prot
) == required_prot
);
12531 *copy_result
= copy
;
12532 return KERN_SUCCESS
;
12536 * vm_map_copyin_object:
12538 * Create a copy object from an object.
12539 * Our caller donates an object reference.
12543 vm_map_copyin_object(
12544 vm_object_t object
,
12545 vm_object_offset_t offset
, /* offset of region in object */
12546 vm_object_size_t size
, /* size of region in object */
12547 vm_map_copy_t
*copy_result
) /* OUT */
12549 vm_map_copy_t copy
; /* Resulting copy */
12552 * We drop the object into a special copy object
12553 * that contains the object directly.
12556 copy
= vm_map_copy_allocate();
12557 copy
->type
= VM_MAP_COPY_OBJECT
;
12558 copy
->cpy_object
= object
;
12559 copy
->offset
= offset
;
12562 *copy_result
= copy
;
12563 return KERN_SUCCESS
;
12569 vm_map_entry_t old_entry
,
12572 vm_object_t object
;
12573 vm_map_entry_t new_entry
;
12576 * New sharing code. New map entry
12577 * references original object. Internal
12578 * objects use asynchronous copy algorithm for
12579 * future copies. First make sure we have
12580 * the right object. If we need a shadow,
12581 * or someone else already has one, then
12582 * make a new shadow and share it.
12585 object
= VME_OBJECT(old_entry
);
12586 if (old_entry
->is_sub_map
) {
12587 assert(old_entry
->wired_count
== 0);
12588 #ifndef NO_NESTED_PMAP
12589 if (old_entry
->use_pmap
) {
12590 kern_return_t result
;
12592 result
= pmap_nest(new_map
->pmap
,
12593 (VME_SUBMAP(old_entry
))->pmap
,
12594 (addr64_t
)old_entry
->vme_start
,
12595 (uint64_t)(old_entry
->vme_end
- old_entry
->vme_start
));
12597 panic("vm_map_fork_share: pmap_nest failed!");
12600 #endif /* NO_NESTED_PMAP */
12601 } else if (object
== VM_OBJECT_NULL
) {
12602 object
= vm_object_allocate((vm_map_size_t
)(old_entry
->vme_end
-
12603 old_entry
->vme_start
));
12604 VME_OFFSET_SET(old_entry
, 0);
12605 VME_OBJECT_SET(old_entry
, object
);
12606 old_entry
->use_pmap
= TRUE
;
12607 // assert(!old_entry->needs_copy);
12608 } else if (object
->copy_strategy
!=
12609 MEMORY_OBJECT_COPY_SYMMETRIC
) {
12611 * We are already using an asymmetric
12612 * copy, and therefore we already have
12613 * the right object.
12616 assert(!old_entry
->needs_copy
);
12617 } else if (old_entry
->needs_copy
|| /* case 1 */
12618 object
->shadowed
|| /* case 2 */
12619 (!object
->true_share
&& /* case 3 */
12620 !old_entry
->is_shared
&&
12622 (vm_map_size_t
)(old_entry
->vme_end
-
12623 old_entry
->vme_start
)))) {
12625 * We need to create a shadow.
12626 * There are three cases here.
12627 * In the first case, we need to
12628 * complete a deferred symmetrical
12629 * copy that we participated in.
12630 * In the second and third cases,
12631 * we need to create the shadow so
12632 * that changes that we make to the
12633 * object do not interfere with
12634 * any symmetrical copies which
12635 * have occured (case 2) or which
12636 * might occur (case 3).
12638 * The first case is when we had
12639 * deferred shadow object creation
12640 * via the entry->needs_copy mechanism.
12641 * This mechanism only works when
12642 * only one entry points to the source
12643 * object, and we are about to create
12644 * a second entry pointing to the
12645 * same object. The problem is that
12646 * there is no way of mapping from
12647 * an object to the entries pointing
12648 * to it. (Deferred shadow creation
12649 * works with one entry because occurs
12650 * at fault time, and we walk from the
12651 * entry to the object when handling
12654 * The second case is when the object
12655 * to be shared has already been copied
12656 * with a symmetric copy, but we point
12657 * directly to the object without
12658 * needs_copy set in our entry. (This
12659 * can happen because different ranges
12660 * of an object can be pointed to by
12661 * different entries. In particular,
12662 * a single entry pointing to an object
12663 * can be split by a call to vm_inherit,
12664 * which, combined with task_create, can
12665 * result in the different entries
12666 * having different needs_copy values.)
12667 * The shadowed flag in the object allows
12668 * us to detect this case. The problem
12669 * with this case is that if this object
12670 * has or will have shadows, then we
12671 * must not perform an asymmetric copy
12672 * of this object, since such a copy
12673 * allows the object to be changed, which
12674 * will break the previous symmetrical
12675 * copies (which rely upon the object
12676 * not changing). In a sense, the shadowed
12677 * flag says "don't change this object".
12678 * We fix this by creating a shadow
12679 * object for this object, and sharing
12680 * that. This works because we are free
12681 * to change the shadow object (and thus
12682 * to use an asymmetric copy strategy);
12683 * this is also semantically correct,
12684 * since this object is temporary, and
12685 * therefore a copy of the object is
12686 * as good as the object itself. (This
12687 * is not true for permanent objects,
12688 * since the pager needs to see changes,
12689 * which won't happen if the changes
12690 * are made to a copy.)
12692 * The third case is when the object
12693 * to be shared has parts sticking
12694 * outside of the entry we're working
12695 * with, and thus may in the future
12696 * be subject to a symmetrical copy.
12697 * (This is a preemptive version of
12700 VME_OBJECT_SHADOW(old_entry
,
12701 (vm_map_size_t
) (old_entry
->vme_end
-
12702 old_entry
->vme_start
));
12705 * If we're making a shadow for other than
12706 * copy on write reasons, then we have
12707 * to remove write permission.
12710 if (!old_entry
->needs_copy
&&
12711 (old_entry
->protection
& VM_PROT_WRITE
)) {
12714 assert(!pmap_has_prot_policy(old_map
->pmap
, old_entry
->translated_allow_execute
, old_entry
->protection
));
12716 prot
= old_entry
->protection
& ~VM_PROT_WRITE
;
12718 assert(!pmap_has_prot_policy(old_map
->pmap
, old_entry
->translated_allow_execute
, prot
));
12720 if (override_nx(old_map
, VME_ALIAS(old_entry
)) && prot
) {
12721 prot
|= VM_PROT_EXECUTE
;
12725 if (old_map
->mapped_in_other_pmaps
) {
12726 vm_object_pmap_protect(
12727 VME_OBJECT(old_entry
),
12728 VME_OFFSET(old_entry
),
12729 (old_entry
->vme_end
-
12730 old_entry
->vme_start
),
12733 old_entry
->vme_start
,
12736 pmap_protect(old_map
->pmap
,
12737 old_entry
->vme_start
,
12738 old_entry
->vme_end
,
12743 old_entry
->needs_copy
= FALSE
;
12744 object
= VME_OBJECT(old_entry
);
12749 * If object was using a symmetric copy strategy,
12750 * change its copy strategy to the default
12751 * asymmetric copy strategy, which is copy_delay
12752 * in the non-norma case and copy_call in the
12753 * norma case. Bump the reference count for the
12757 if (old_entry
->is_sub_map
) {
12758 vm_map_lock(VME_SUBMAP(old_entry
));
12759 vm_map_reference(VME_SUBMAP(old_entry
));
12760 vm_map_unlock(VME_SUBMAP(old_entry
));
12762 vm_object_lock(object
);
12763 vm_object_reference_locked(object
);
12764 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
) {
12765 object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
12767 vm_object_unlock(object
);
12771 * Clone the entry, using object ref from above.
12772 * Mark both entries as shared.
12775 new_entry
= vm_map_entry_create(new_map
, FALSE
); /* Never the kernel
12776 * map or descendants */
12777 vm_map_entry_copy(old_map
, new_entry
, old_entry
);
12778 old_entry
->is_shared
= TRUE
;
12779 new_entry
->is_shared
= TRUE
;
12782 * We're dealing with a shared mapping, so the resulting mapping
12783 * should inherit some of the original mapping's accounting settings.
12784 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12785 * "use_pmap" should stay the same as before (if it hasn't been reset
12786 * to TRUE when we cleared "iokit_acct").
12788 assert(!new_entry
->iokit_acct
);
12791 * If old entry's inheritence is VM_INHERIT_NONE,
12792 * the new entry is for corpse fork, remove the
12793 * write permission from the new entry.
12795 if (old_entry
->inheritance
== VM_INHERIT_NONE
) {
12796 new_entry
->protection
&= ~VM_PROT_WRITE
;
12797 new_entry
->max_protection
&= ~VM_PROT_WRITE
;
12801 * Insert the entry into the new map -- we
12802 * know we're inserting at the end of the new
12806 vm_map_store_entry_link(new_map
, vm_map_last_entry(new_map
), new_entry
,
12807 VM_MAP_KERNEL_FLAGS_NONE
);
12810 * Update the physical map
12813 if (old_entry
->is_sub_map
) {
12814 /* Bill Angell pmap support goes here */
12816 pmap_copy(new_map
->pmap
, old_map
->pmap
, new_entry
->vme_start
,
12817 old_entry
->vme_end
- old_entry
->vme_start
,
12818 old_entry
->vme_start
);
12825 vm_map_entry_t
*old_entry_p
,
12827 int vm_map_copyin_flags
)
12829 vm_map_entry_t old_entry
= *old_entry_p
;
12830 vm_map_size_t entry_size
= old_entry
->vme_end
- old_entry
->vme_start
;
12831 vm_map_offset_t start
= old_entry
->vme_start
;
12832 vm_map_copy_t copy
;
12833 vm_map_entry_t last
= vm_map_last_entry(new_map
);
12835 vm_map_unlock(old_map
);
12837 * Use maxprot version of copyin because we
12838 * care about whether this memory can ever
12839 * be accessed, not just whether it's accessible
12842 vm_map_copyin_flags
|= VM_MAP_COPYIN_USE_MAXPROT
;
12843 if (vm_map_copyin_internal(old_map
, start
, entry_size
,
12844 vm_map_copyin_flags
, ©
)
12847 * The map might have changed while it
12848 * was unlocked, check it again. Skip
12849 * any blank space or permanently
12850 * unreadable region.
12852 vm_map_lock(old_map
);
12853 if (!vm_map_lookup_entry(old_map
, start
, &last
) ||
12854 (last
->max_protection
& VM_PROT_READ
) == VM_PROT_NONE
) {
12855 last
= last
->vme_next
;
12857 *old_entry_p
= last
;
12860 * XXX For some error returns, want to
12861 * XXX skip to the next element. Note
12862 * that INVALID_ADDRESS and
12863 * PROTECTION_FAILURE are handled above.
12870 * Assert that the vm_map_copy is coming from the right
12871 * zone and hasn't been forged
12873 vm_map_copy_require(copy
);
12876 * Insert the copy into the new map
12878 vm_map_copy_insert(new_map
, last
, copy
);
12881 * Pick up the traversal at the end of
12882 * the copied region.
12885 vm_map_lock(old_map
);
12886 start
+= entry_size
;
12887 if (!vm_map_lookup_entry(old_map
, start
, &last
)) {
12888 last
= last
->vme_next
;
12890 if (last
->vme_start
== start
) {
12892 * No need to clip here and we don't
12893 * want to cause any unnecessary
12897 vm_map_clip_start(old_map
, last
, start
);
12900 *old_entry_p
= last
;
12908 * Create and return a new map based on the old
12909 * map, according to the inheritance values on the
12910 * regions in that map and the options.
12912 * The source map must not be locked.
12922 vm_map_entry_t old_entry
;
12923 vm_map_size_t new_size
= 0, entry_size
;
12924 vm_map_entry_t new_entry
;
12925 boolean_t src_needs_copy
;
12926 boolean_t new_entry_needs_copy
;
12927 boolean_t pmap_is64bit
;
12928 int vm_map_copyin_flags
;
12929 vm_inherit_t old_entry_inheritance
;
12930 int map_create_options
;
12931 kern_return_t footprint_collect_kr
;
12933 if (options
& ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE
|
12934 VM_MAP_FORK_PRESERVE_PURGEABLE
|
12935 VM_MAP_FORK_CORPSE_FOOTPRINT
)) {
12936 /* unsupported option */
12937 return VM_MAP_NULL
;
12941 #if defined(__i386__) || defined(__x86_64__)
12942 old_map
->pmap
->pm_task_map
!= TASK_MAP_32BIT
;
12943 #elif defined(__arm64__)
12944 old_map
->pmap
->max
== MACH_VM_MAX_ADDRESS
;
12945 #elif defined(__arm__)
12948 #error Unknown architecture.
12951 unsigned int pmap_flags
= 0;
12952 pmap_flags
|= pmap_is64bit
? PMAP_CREATE_64BIT
: 0;
12953 #if defined(HAS_APPLE_PAC)
12954 pmap_flags
|= old_map
->pmap
->disable_jop
? PMAP_CREATE_DISABLE_JOP
: 0;
12956 #if PMAP_CREATE_FORCE_4K_PAGES
12957 if (VM_MAP_PAGE_SIZE(old_map
) == FOURK_PAGE_SIZE
&&
12958 PAGE_SIZE
!= FOURK_PAGE_SIZE
) {
12959 pmap_flags
|= PMAP_CREATE_FORCE_4K_PAGES
;
12961 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
12962 new_pmap
= pmap_create_options(ledger
, (vm_map_size_t
) 0, pmap_flags
);
12964 vm_map_reference_swap(old_map
);
12965 vm_map_lock(old_map
);
12967 map_create_options
= 0;
12968 if (old_map
->hdr
.entries_pageable
) {
12969 map_create_options
|= VM_MAP_CREATE_PAGEABLE
;
12971 if (options
& VM_MAP_FORK_CORPSE_FOOTPRINT
) {
12972 map_create_options
|= VM_MAP_CREATE_CORPSE_FOOTPRINT
;
12973 footprint_collect_kr
= KERN_SUCCESS
;
12975 new_map
= vm_map_create_options(new_pmap
,
12976 old_map
->min_offset
,
12977 old_map
->max_offset
,
12978 map_create_options
);
12979 /* inherit cs_enforcement */
12980 vm_map_cs_enforcement_set(new_map
, old_map
->cs_enforcement
);
12981 vm_map_lock(new_map
);
12982 vm_commit_pagezero_status(new_map
);
12983 /* inherit the parent map's page size */
12984 vm_map_set_page_shift(new_map
, VM_MAP_PAGE_SHIFT(old_map
));
12986 old_entry
= vm_map_first_entry(old_map
);
12987 old_entry
!= vm_map_to_entry(old_map
);
12989 entry_size
= old_entry
->vme_end
- old_entry
->vme_start
;
12991 old_entry_inheritance
= old_entry
->inheritance
;
12993 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12994 * share VM_INHERIT_NONE entries that are not backed by a
12997 if (old_entry_inheritance
== VM_INHERIT_NONE
&&
12998 (options
& VM_MAP_FORK_SHARE_IF_INHERIT_NONE
) &&
12999 (old_entry
->protection
& VM_PROT_READ
) &&
13000 !(!old_entry
->is_sub_map
&&
13001 VME_OBJECT(old_entry
) != NULL
&&
13002 VME_OBJECT(old_entry
)->pager
!= NULL
&&
13003 is_device_pager_ops(
13004 VME_OBJECT(old_entry
)->pager
->mo_pager_ops
))) {
13005 old_entry_inheritance
= VM_INHERIT_SHARE
;
13008 if (old_entry_inheritance
!= VM_INHERIT_NONE
&&
13009 (options
& VM_MAP_FORK_CORPSE_FOOTPRINT
) &&
13010 footprint_collect_kr
== KERN_SUCCESS
) {
13012 * The corpse won't have old_map->pmap to query
13013 * footprint information, so collect that data now
13014 * and store it in new_map->vmmap_corpse_footprint
13015 * for later autopsy.
13017 footprint_collect_kr
=
13018 vm_map_corpse_footprint_collect(old_map
,
13023 switch (old_entry_inheritance
) {
13024 case VM_INHERIT_NONE
:
13027 case VM_INHERIT_SHARE
:
13028 vm_map_fork_share(old_map
, old_entry
, new_map
);
13029 new_size
+= entry_size
;
13032 case VM_INHERIT_COPY
:
13035 * Inline the copy_quickly case;
13036 * upon failure, fall back on call
13037 * to vm_map_fork_copy.
13040 if (old_entry
->is_sub_map
) {
13043 if ((old_entry
->wired_count
!= 0) ||
13044 ((VME_OBJECT(old_entry
) != NULL
) &&
13045 (VME_OBJECT(old_entry
)->true_share
))) {
13046 goto slow_vm_map_fork_copy
;
13049 new_entry
= vm_map_entry_create(new_map
, FALSE
); /* never the kernel map or descendants */
13050 vm_map_entry_copy(old_map
, new_entry
, old_entry
);
13052 if (new_entry
->used_for_jit
== TRUE
&& new_map
->jit_entry_exists
== FALSE
) {
13053 new_map
->jit_entry_exists
= TRUE
;
13056 if (new_entry
->is_sub_map
) {
13057 /* clear address space specifics */
13058 new_entry
->use_pmap
= FALSE
;
13061 * We're dealing with a copy-on-write operation,
13062 * so the resulting mapping should not inherit
13063 * the original mapping's accounting settings.
13064 * "iokit_acct" should have been cleared in
13065 * vm_map_entry_copy().
13066 * "use_pmap" should be reset to its default
13067 * (TRUE) so that the new mapping gets
13068 * accounted for in the task's memory footprint.
13070 assert(!new_entry
->iokit_acct
);
13071 new_entry
->use_pmap
= TRUE
;
13074 if (!vm_object_copy_quickly(
13075 VME_OBJECT_PTR(new_entry
),
13076 VME_OFFSET(old_entry
),
13077 (old_entry
->vme_end
-
13078 old_entry
->vme_start
),
13080 &new_entry_needs_copy
)) {
13081 vm_map_entry_dispose(new_map
, new_entry
);
13082 goto slow_vm_map_fork_copy
;
13086 * Handle copy-on-write obligations
13089 if (src_needs_copy
&& !old_entry
->needs_copy
) {
13092 assert(!pmap_has_prot_policy(old_map
->pmap
, old_entry
->translated_allow_execute
, old_entry
->protection
));
13094 prot
= old_entry
->protection
& ~VM_PROT_WRITE
;
13096 if (override_nx(old_map
, VME_ALIAS(old_entry
))
13098 prot
|= VM_PROT_EXECUTE
;
13101 assert(!pmap_has_prot_policy(old_map
->pmap
, old_entry
->translated_allow_execute
, prot
));
13103 vm_object_pmap_protect(
13104 VME_OBJECT(old_entry
),
13105 VME_OFFSET(old_entry
),
13106 (old_entry
->vme_end
-
13107 old_entry
->vme_start
),
13108 ((old_entry
->is_shared
13109 || old_map
->mapped_in_other_pmaps
)
13112 VM_MAP_PAGE_SIZE(old_map
),
13113 old_entry
->vme_start
,
13116 assert(old_entry
->wired_count
== 0);
13117 old_entry
->needs_copy
= TRUE
;
13119 new_entry
->needs_copy
= new_entry_needs_copy
;
13122 * Insert the entry at the end
13126 vm_map_store_entry_link(new_map
,
13127 vm_map_last_entry(new_map
),
13129 VM_MAP_KERNEL_FLAGS_NONE
);
13130 new_size
+= entry_size
;
13133 slow_vm_map_fork_copy
:
13134 vm_map_copyin_flags
= 0;
13135 if (options
& VM_MAP_FORK_PRESERVE_PURGEABLE
) {
13136 vm_map_copyin_flags
|=
13137 VM_MAP_COPYIN_PRESERVE_PURGEABLE
;
13139 if (vm_map_fork_copy(old_map
,
13142 vm_map_copyin_flags
)) {
13143 new_size
+= entry_size
;
13147 old_entry
= old_entry
->vme_next
;
13150 #if defined(__arm64__)
13151 pmap_insert_sharedpage(new_map
->pmap
);
13152 #endif /* __arm64__ */
13154 new_map
->size
= new_size
;
13156 if (options
& VM_MAP_FORK_CORPSE_FOOTPRINT
) {
13157 vm_map_corpse_footprint_collect_done(new_map
);
13160 /* Propagate JIT entitlement for the pmap layer. */
13161 if (pmap_get_jit_entitled(old_map
->pmap
)) {
13162 /* Tell the pmap that it supports JIT. */
13163 pmap_set_jit_entitled(new_map
->pmap
);
13166 vm_map_unlock(new_map
);
13167 vm_map_unlock(old_map
);
13168 vm_map_deallocate(old_map
);
13176 * Setup the "new_map" with the proper execution environment according
13177 * to the type of executable (platform, 64bit, chroot environment).
13178 * Map the comm page and shared region, etc...
13187 cpu_subtype_t cpu_subtype
,
13190 SHARED_REGION_TRACE_DEBUG(
13191 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
13192 (void *)VM_KERNEL_ADDRPERM(current_task()),
13193 (void *)VM_KERNEL_ADDRPERM(new_map
),
13194 (void *)VM_KERNEL_ADDRPERM(task
),
13195 (void *)VM_KERNEL_ADDRPERM(fsroot
),
13198 (void) vm_commpage_enter(new_map
, task
, is64bit
);
13200 (void) vm_shared_region_enter(new_map
, task
, is64bit
, fsroot
, cpu
, cpu_subtype
, reslide
);
13202 SHARED_REGION_TRACE_DEBUG(
13203 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
13204 (void *)VM_KERNEL_ADDRPERM(current_task()),
13205 (void *)VM_KERNEL_ADDRPERM(new_map
),
13206 (void *)VM_KERNEL_ADDRPERM(task
),
13207 (void *)VM_KERNEL_ADDRPERM(fsroot
),
13212 * Some devices have region(s) of memory that shouldn't get allocated by
13213 * user processes. The following code creates dummy vm_map_entry_t's for each
13214 * of the regions that needs to be reserved to prevent any allocations in
13217 kern_return_t kr
= KERN_FAILURE
;
13218 vm_map_kernel_flags_t vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
13219 vmk_flags
.vmkf_permanent
= TRUE
;
13220 vmk_flags
.vmkf_beyond_max
= TRUE
;
13222 struct vm_reserved_region
*regions
= NULL
;
13223 size_t num_regions
= ml_get_vm_reserved_regions(is64bit
, ®ions
);
13224 assert((num_regions
== 0) || (num_regions
> 0 && regions
!= NULL
));
13226 for (size_t i
= 0; i
< num_regions
; ++i
) {
13229 ®ions
[i
].vmrr_addr
,
13230 regions
[i
].vmrr_size
,
13231 (vm_map_offset_t
)0,
13234 VM_KERN_MEMORY_NONE
,
13236 (vm_object_offset_t
)0,
13242 if (kr
!= KERN_SUCCESS
) {
13243 panic("Failed to reserve %s region in user map %p %d", regions
[i
].vmrr_name
, new_map
, kr
);
13247 new_map
->reserved_regions
= (num_regions
? TRUE
: FALSE
);
13249 return KERN_SUCCESS
;
13253 * vm_map_lookup_locked:
13255 * Finds the VM object, offset, and
13256 * protection for a given virtual address in the
13257 * specified map, assuming a page fault of the
13260 * Returns the (object, offset, protection) for
13261 * this address, whether it is wired down, and whether
13262 * this map has the only reference to the data in question.
13263 * In order to later verify this lookup, a "version"
13265 * If contended != NULL, *contended will be set to
13266 * true iff the thread had to spin or block to acquire
13267 * an exclusive lock.
13269 * The map MUST be locked by the caller and WILL be
13270 * locked on exit. In order to guarantee the
13271 * existence of the returned object, it is returned
13274 * If a lookup is requested with "write protection"
13275 * specified, the map may be changed to perform virtual
13276 * copying operations, although the data referenced will
13280 vm_map_lookup_locked(
13281 vm_map_t
*var_map
, /* IN/OUT */
13282 vm_map_offset_t vaddr
,
13283 vm_prot_t fault_type
,
13284 int object_lock_type
,
13285 vm_map_version_t
*out_version
, /* OUT */
13286 vm_object_t
*object
, /* OUT */
13287 vm_object_offset_t
*offset
, /* OUT */
13288 vm_prot_t
*out_prot
, /* OUT */
13289 boolean_t
*wired
, /* OUT */
13290 vm_object_fault_info_t fault_info
, /* OUT */
13291 vm_map_t
*real_map
, /* OUT */
13292 bool *contended
) /* OUT */
13294 vm_map_entry_t entry
;
13295 vm_map_t map
= *var_map
;
13296 vm_map_t old_map
= *var_map
;
13297 vm_map_t cow_sub_map_parent
= VM_MAP_NULL
;
13298 vm_map_offset_t cow_parent_vaddr
= 0;
13299 vm_map_offset_t old_start
= 0;
13300 vm_map_offset_t old_end
= 0;
13302 boolean_t mask_protections
;
13303 boolean_t force_copy
;
13304 boolean_t no_force_copy_if_executable
;
13305 vm_prot_t original_fault_type
;
13306 vm_map_size_t fault_page_mask
;
13309 * VM_PROT_MASK means that the caller wants us to use "fault_type"
13310 * as a mask against the mapping's actual protections, not as an
13313 mask_protections
= (fault_type
& VM_PROT_IS_MASK
) ? TRUE
: FALSE
;
13314 force_copy
= (fault_type
& VM_PROT_COPY
) ? TRUE
: FALSE
;
13315 no_force_copy_if_executable
= (fault_type
& VM_PROT_COPY_FAIL_IF_EXECUTABLE
) ? TRUE
: FALSE
;
13316 fault_type
&= VM_PROT_ALL
;
13317 original_fault_type
= fault_type
;
13319 *contended
= false;
13324 fault_page_mask
= MIN(VM_MAP_PAGE_MASK(map
), PAGE_MASK
);
13325 vaddr
= VM_MAP_TRUNC_PAGE(vaddr
, fault_page_mask
);
13328 fault_type
= original_fault_type
;
13331 * If the map has an interesting hint, try it before calling
13332 * full blown lookup routine.
13336 if ((entry
== vm_map_to_entry(map
)) ||
13337 (vaddr
< entry
->vme_start
) || (vaddr
>= entry
->vme_end
)) {
13338 vm_map_entry_t tmp_entry
;
13341 * Entry was either not a valid hint, or the vaddr
13342 * was not contained in the entry, so do a full lookup.
13344 if (!vm_map_lookup_entry(map
, vaddr
, &tmp_entry
)) {
13345 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
13346 vm_map_unlock(cow_sub_map_parent
);
13348 if ((*real_map
!= map
)
13349 && (*real_map
!= cow_sub_map_parent
)) {
13350 vm_map_unlock(*real_map
);
13352 return KERN_INVALID_ADDRESS
;
13357 if (map
== old_map
) {
13358 old_start
= entry
->vme_start
;
13359 old_end
= entry
->vme_end
;
13363 * Handle submaps. Drop lock on upper map, submap is
13368 if (entry
->is_sub_map
) {
13369 vm_map_offset_t local_vaddr
;
13370 vm_map_offset_t end_delta
;
13371 vm_map_offset_t start_delta
;
13372 vm_map_entry_t submap_entry
, saved_submap_entry
;
13373 vm_object_offset_t submap_entry_offset
;
13374 vm_object_size_t submap_entry_size
;
13375 vm_prot_t subentry_protection
;
13376 vm_prot_t subentry_max_protection
;
13377 boolean_t subentry_no_copy_on_read
;
13378 boolean_t mapped_needs_copy
= FALSE
;
13379 vm_map_version_t version
;
13381 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry
)) >= VM_MAP_PAGE_SHIFT(map
),
13382 "map %p (%d) entry %p submap %p (%d)\n",
13383 map
, VM_MAP_PAGE_SHIFT(map
), entry
,
13384 VME_SUBMAP(entry
), VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry
)));
13386 local_vaddr
= vaddr
;
13388 if ((entry
->use_pmap
&&
13389 !((fault_type
& VM_PROT_WRITE
) ||
13391 /* if real_map equals map we unlock below */
13392 if ((*real_map
!= map
) &&
13393 (*real_map
!= cow_sub_map_parent
)) {
13394 vm_map_unlock(*real_map
);
13396 *real_map
= VME_SUBMAP(entry
);
13399 if (entry
->needs_copy
&&
13400 ((fault_type
& VM_PROT_WRITE
) ||
13402 if (!mapped_needs_copy
) {
13403 if (vm_map_lock_read_to_write(map
)) {
13404 vm_map_lock_read(map
);
13408 vm_map_lock_read(VME_SUBMAP(entry
));
13409 *var_map
= VME_SUBMAP(entry
);
13410 cow_sub_map_parent
= map
;
13411 /* reset base to map before cow object */
13412 /* this is the map which will accept */
13413 /* the new cow object */
13414 old_start
= entry
->vme_start
;
13415 old_end
= entry
->vme_end
;
13416 cow_parent_vaddr
= vaddr
;
13417 mapped_needs_copy
= TRUE
;
13419 vm_map_lock_read(VME_SUBMAP(entry
));
13420 *var_map
= VME_SUBMAP(entry
);
13421 if ((cow_sub_map_parent
!= map
) &&
13422 (*real_map
!= map
)) {
13423 vm_map_unlock(map
);
13427 vm_map_lock_read(VME_SUBMAP(entry
));
13428 *var_map
= VME_SUBMAP(entry
);
13429 /* leave map locked if it is a target */
13430 /* cow sub_map above otherwise, just */
13431 /* follow the maps down to the object */
13432 /* here we unlock knowing we are not */
13433 /* revisiting the map. */
13434 if ((*real_map
!= map
) && (map
!= cow_sub_map_parent
)) {
13435 vm_map_unlock_read(map
);
13441 /* calculate the offset in the submap for vaddr */
13442 local_vaddr
= (local_vaddr
- entry
->vme_start
) + VME_OFFSET(entry
);
13443 assertf(VM_MAP_PAGE_ALIGNED(local_vaddr
, fault_page_mask
),
13444 "local_vaddr 0x%llx entry->vme_start 0x%llx fault_page_mask 0x%llx\n",
13445 (uint64_t)local_vaddr
, (uint64_t)entry
->vme_start
, (uint64_t)fault_page_mask
);
13448 if (!vm_map_lookup_entry(map
, local_vaddr
, &submap_entry
)) {
13449 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
13450 vm_map_unlock(cow_sub_map_parent
);
13452 if ((*real_map
!= map
)
13453 && (*real_map
!= cow_sub_map_parent
)) {
13454 vm_map_unlock(*real_map
);
13457 return KERN_INVALID_ADDRESS
;
13460 /* find the attenuated shadow of the underlying object */
13461 /* on our target map */
13463 /* in english the submap object may extend beyond the */
13464 /* region mapped by the entry or, may only fill a portion */
13465 /* of it. For our purposes, we only care if the object */
13466 /* doesn't fill. In this case the area which will */
13467 /* ultimately be clipped in the top map will only need */
13468 /* to be as big as the portion of the underlying entry */
13469 /* which is mapped */
13470 start_delta
= submap_entry
->vme_start
> VME_OFFSET(entry
) ?
13471 submap_entry
->vme_start
- VME_OFFSET(entry
) : 0;
13474 (VME_OFFSET(entry
) + start_delta
+ (old_end
- old_start
)) <=
13475 submap_entry
->vme_end
?
13476 0 : (VME_OFFSET(entry
) +
13477 (old_end
- old_start
))
13478 - submap_entry
->vme_end
;
13480 old_start
+= start_delta
;
13481 old_end
-= end_delta
;
13483 if (submap_entry
->is_sub_map
) {
13484 entry
= submap_entry
;
13485 vaddr
= local_vaddr
;
13486 goto submap_recurse
;
13489 if (((fault_type
& VM_PROT_WRITE
) ||
13491 && cow_sub_map_parent
) {
13492 vm_object_t sub_object
, copy_object
;
13493 vm_object_offset_t copy_offset
;
13494 vm_map_offset_t local_start
;
13495 vm_map_offset_t local_end
;
13496 boolean_t copied_slowly
= FALSE
;
13497 vm_object_offset_t copied_slowly_phys_offset
= 0;
13498 kern_return_t kr
= KERN_SUCCESS
;
13500 if (vm_map_lock_read_to_write(map
)) {
13501 vm_map_lock_read(map
);
13502 old_start
-= start_delta
;
13503 old_end
+= end_delta
;
13508 sub_object
= VME_OBJECT(submap_entry
);
13509 if (sub_object
== VM_OBJECT_NULL
) {
13511 vm_object_allocate(
13513 (submap_entry
->vme_end
-
13514 submap_entry
->vme_start
));
13515 VME_OBJECT_SET(submap_entry
, sub_object
);
13516 VME_OFFSET_SET(submap_entry
, 0);
13517 assert(!submap_entry
->is_sub_map
);
13518 assert(submap_entry
->use_pmap
);
13520 local_start
= local_vaddr
-
13521 (cow_parent_vaddr
- old_start
);
13522 local_end
= local_vaddr
+
13523 (old_end
- cow_parent_vaddr
);
13524 vm_map_clip_start(map
, submap_entry
, local_start
);
13525 vm_map_clip_end(map
, submap_entry
, local_end
);
13526 if (submap_entry
->is_sub_map
) {
13527 /* unnesting was done when clipping */
13528 assert(!submap_entry
->use_pmap
);
13531 /* This is the COW case, lets connect */
13532 /* an entry in our space to the underlying */
13533 /* object in the submap, bypassing the */
13536 if (submap_entry
->wired_count
!= 0 ||
13537 (sub_object
->copy_strategy
!=
13538 MEMORY_OBJECT_COPY_SYMMETRIC
)) {
13539 if ((submap_entry
->protection
& VM_PROT_EXECUTE
) &&
13540 no_force_copy_if_executable
) {
13541 // printf("FBDP map %p entry %p start 0x%llx end 0x%llx wired %d strat %d\n", map, submap_entry, (uint64_t)local_start, (uint64_t)local_end, submap_entry->wired_count, sub_object->copy_strategy);
13542 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
13543 vm_map_unlock(cow_sub_map_parent
);
13545 if ((*real_map
!= map
)
13546 && (*real_map
!= cow_sub_map_parent
)) {
13547 vm_map_unlock(*real_map
);
13550 vm_map_lock_write_to_read(map
);
13551 kr
= KERN_PROTECTION_FAILURE
;
13552 DTRACE_VM4(submap_no_copy_executable
,
13554 vm_object_offset_t
, submap_entry_offset
,
13555 vm_object_size_t
, submap_entry_size
,
13560 vm_object_reference(sub_object
);
13562 assertf(VM_MAP_PAGE_ALIGNED(VME_OFFSET(submap_entry
), VM_MAP_PAGE_MASK(map
)),
13563 "submap_entry %p offset 0x%llx\n",
13564 submap_entry
, VME_OFFSET(submap_entry
));
13565 submap_entry_offset
= VME_OFFSET(submap_entry
);
13566 submap_entry_size
= submap_entry
->vme_end
- submap_entry
->vme_start
;
13568 DTRACE_VM6(submap_copy_slowly
,
13569 vm_map_t
, cow_sub_map_parent
,
13570 vm_map_offset_t
, vaddr
,
13572 vm_object_size_t
, submap_entry_size
,
13573 int, submap_entry
->wired_count
,
13574 int, sub_object
->copy_strategy
);
13576 saved_submap_entry
= submap_entry
;
13577 version
.main_timestamp
= map
->timestamp
;
13578 vm_map_unlock(map
); /* Increments timestamp by 1 */
13579 submap_entry
= VM_MAP_ENTRY_NULL
;
13581 vm_object_lock(sub_object
);
13582 kr
= vm_object_copy_slowly(sub_object
,
13583 submap_entry_offset
,
13587 copied_slowly
= TRUE
;
13588 /* 4k: account for extra offset in physical page */
13589 copied_slowly_phys_offset
= submap_entry_offset
- vm_object_trunc_page(submap_entry_offset
);
13590 vm_object_deallocate(sub_object
);
13594 if (kr
!= KERN_SUCCESS
&&
13595 kr
!= KERN_MEMORY_RESTART_COPY
) {
13596 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
13597 vm_map_unlock(cow_sub_map_parent
);
13599 if ((*real_map
!= map
)
13600 && (*real_map
!= cow_sub_map_parent
)) {
13601 vm_map_unlock(*real_map
);
13604 vm_object_deallocate(copy_object
);
13605 copy_object
= VM_OBJECT_NULL
;
13606 vm_map_lock_write_to_read(map
);
13607 DTRACE_VM4(submap_copy_slowly
,
13608 vm_object_t
, sub_object
,
13609 vm_object_offset_t
, submap_entry_offset
,
13610 vm_object_size_t
, submap_entry_size
,
13615 if ((kr
== KERN_SUCCESS
) &&
13616 (version
.main_timestamp
+ 1) == map
->timestamp
) {
13617 submap_entry
= saved_submap_entry
;
13619 saved_submap_entry
= NULL
;
13620 old_start
-= start_delta
;
13621 old_end
+= end_delta
;
13622 vm_object_deallocate(copy_object
);
13623 copy_object
= VM_OBJECT_NULL
;
13624 vm_map_lock_write_to_read(map
);
13628 /* set up shadow object */
13629 copy_object
= sub_object
;
13630 vm_object_lock(sub_object
);
13631 vm_object_reference_locked(sub_object
);
13632 sub_object
->shadowed
= TRUE
;
13633 vm_object_unlock(sub_object
);
13635 assert(submap_entry
->wired_count
== 0);
13636 submap_entry
->needs_copy
= TRUE
;
13638 prot
= submap_entry
->protection
;
13639 assert(!pmap_has_prot_policy(map
->pmap
, submap_entry
->translated_allow_execute
, prot
));
13640 prot
= prot
& ~VM_PROT_WRITE
;
13641 assert(!pmap_has_prot_policy(map
->pmap
, submap_entry
->translated_allow_execute
, prot
));
13643 if (override_nx(old_map
,
13644 VME_ALIAS(submap_entry
))
13646 prot
|= VM_PROT_EXECUTE
;
13649 vm_object_pmap_protect(
13651 VME_OFFSET(submap_entry
),
13652 submap_entry
->vme_end
-
13653 submap_entry
->vme_start
,
13654 (submap_entry
->is_shared
13655 || map
->mapped_in_other_pmaps
) ?
13656 PMAP_NULL
: map
->pmap
,
13657 VM_MAP_PAGE_SIZE(map
),
13658 submap_entry
->vme_start
,
13663 * Adjust the fault offset to the submap entry.
13665 copy_offset
= (local_vaddr
-
13666 submap_entry
->vme_start
+
13667 VME_OFFSET(submap_entry
));
13669 /* This works diffently than the */
13670 /* normal submap case. We go back */
13671 /* to the parent of the cow map and*/
13672 /* clip out the target portion of */
13673 /* the sub_map, substituting the */
13674 /* new copy object, */
13676 subentry_protection
= submap_entry
->protection
;
13677 subentry_max_protection
= submap_entry
->max_protection
;
13678 subentry_no_copy_on_read
= submap_entry
->vme_no_copy_on_read
;
13679 vm_map_unlock(map
);
13680 submap_entry
= NULL
; /* not valid after map unlock */
13682 local_start
= old_start
;
13683 local_end
= old_end
;
13684 map
= cow_sub_map_parent
;
13685 *var_map
= cow_sub_map_parent
;
13686 vaddr
= cow_parent_vaddr
;
13687 cow_sub_map_parent
= NULL
;
13689 if (!vm_map_lookup_entry(map
,
13691 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
13692 vm_map_unlock(cow_sub_map_parent
);
13694 if ((*real_map
!= map
)
13695 && (*real_map
!= cow_sub_map_parent
)) {
13696 vm_map_unlock(*real_map
);
13699 vm_object_deallocate(
13701 copy_object
= VM_OBJECT_NULL
;
13702 vm_map_lock_write_to_read(map
);
13703 DTRACE_VM4(submap_lookup_post_unlock
,
13704 uint64_t, (uint64_t)entry
->vme_start
,
13705 uint64_t, (uint64_t)entry
->vme_end
,
13706 vm_map_offset_t
, vaddr
,
13707 int, copied_slowly
);
13708 return KERN_INVALID_ADDRESS
;
13711 /* clip out the portion of space */
13712 /* mapped by the sub map which */
13713 /* corresponds to the underlying */
13717 * Clip (and unnest) the smallest nested chunk
13718 * possible around the faulting address...
13720 local_start
= vaddr
& ~(pmap_shared_region_size_min(map
->pmap
) - 1);
13721 local_end
= local_start
+ pmap_shared_region_size_min(map
->pmap
);
13723 * ... but don't go beyond the "old_start" to "old_end"
13724 * range, to avoid spanning over another VM region
13725 * with a possibly different VM object and/or offset.
13727 if (local_start
< old_start
) {
13728 local_start
= old_start
;
13730 if (local_end
> old_end
) {
13731 local_end
= old_end
;
13734 * Adjust copy_offset to the start of the range.
13736 copy_offset
-= (vaddr
- local_start
);
13738 vm_map_clip_start(map
, entry
, local_start
);
13739 vm_map_clip_end(map
, entry
, local_end
);
13740 if (entry
->is_sub_map
) {
13741 /* unnesting was done when clipping */
13742 assert(!entry
->use_pmap
);
13745 /* substitute copy object for */
13746 /* shared map entry */
13747 vm_map_deallocate(VME_SUBMAP(entry
));
13748 assert(!entry
->iokit_acct
);
13749 entry
->is_sub_map
= FALSE
;
13750 entry
->use_pmap
= TRUE
;
13751 VME_OBJECT_SET(entry
, copy_object
);
13753 /* propagate the submap entry's protections */
13754 if (entry
->protection
!= VM_PROT_READ
) {
13756 * Someone has already altered the top entry's
13757 * protections via vm_protect(VM_PROT_COPY).
13758 * Respect these new values and ignore the
13759 * submap entry's protections.
13763 * Regular copy-on-write: propagate the submap
13764 * entry's protections to the top map entry.
13766 entry
->protection
|= subentry_protection
;
13768 entry
->max_protection
|= subentry_max_protection
;
13769 /* propagate no_copy_on_read */
13770 entry
->vme_no_copy_on_read
= subentry_no_copy_on_read
;
13772 if ((entry
->protection
& VM_PROT_WRITE
) &&
13773 (entry
->protection
& VM_PROT_EXECUTE
) &&
13774 #if XNU_TARGET_OS_OSX
13775 map
->pmap
!= kernel_pmap
&&
13776 (vm_map_cs_enforcement(map
)
13778 || !VM_MAP_IS_EXOTIC(map
)
13779 #endif /* __arm64__ */
13781 #endif /* XNU_TARGET_OS_OSX */
13783 !pmap_cs_exempt(map
->pmap
) &&
13785 !(entry
->used_for_jit
) &&
13786 VM_MAP_POLICY_WX_STRIP_X(map
)) {
13788 uint64_t, (uint64_t)entry
->vme_start
,
13789 uint64_t, (uint64_t)entry
->vme_end
,
13790 vm_prot_t
, entry
->protection
);
13791 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13793 (current_task()->bsd_info
13794 ? proc_name_address(current_task()->bsd_info
)
13797 entry
->protection
&= ~VM_PROT_EXECUTE
;
13800 if (copied_slowly
) {
13801 VME_OFFSET_SET(entry
, local_start
- old_start
+ copied_slowly_phys_offset
);
13802 entry
->needs_copy
= FALSE
;
13803 entry
->is_shared
= FALSE
;
13805 VME_OFFSET_SET(entry
, copy_offset
);
13806 assert(entry
->wired_count
== 0);
13807 entry
->needs_copy
= TRUE
;
13808 if (entry
->inheritance
== VM_INHERIT_SHARE
) {
13809 entry
->inheritance
= VM_INHERIT_COPY
;
13811 if (map
!= old_map
) {
13812 entry
->is_shared
= TRUE
;
13815 if (entry
->inheritance
== VM_INHERIT_SHARE
) {
13816 entry
->inheritance
= VM_INHERIT_COPY
;
13819 vm_map_lock_write_to_read(map
);
13821 if ((cow_sub_map_parent
)
13822 && (cow_sub_map_parent
!= *real_map
)
13823 && (cow_sub_map_parent
!= map
)) {
13824 vm_map_unlock(cow_sub_map_parent
);
13826 entry
= submap_entry
;
13827 vaddr
= local_vaddr
;
13832 * Check whether this task is allowed to have
13836 prot
= entry
->protection
;
13838 if (override_nx(old_map
, VME_ALIAS(entry
)) && prot
) {
13840 * HACK -- if not a stack, then allow execution
13842 prot
|= VM_PROT_EXECUTE
;
13845 if (mask_protections
) {
13846 fault_type
&= prot
;
13847 if (fault_type
== VM_PROT_NONE
) {
13848 goto protection_failure
;
13851 if (((fault_type
& prot
) != fault_type
)
13853 /* prefetch abort in execute-only page */
13854 && !(prot
== VM_PROT_EXECUTE
&& fault_type
== (VM_PROT_READ
| VM_PROT_EXECUTE
))
13857 protection_failure
:
13858 if (*real_map
!= map
) {
13859 vm_map_unlock(*real_map
);
13863 if ((fault_type
& VM_PROT_EXECUTE
) && prot
) {
13864 log_stack_execution_failure((addr64_t
)vaddr
, prot
);
13867 DTRACE_VM2(prot_fault
, int, 1, (uint64_t *), NULL
);
13868 return KERN_PROTECTION_FAILURE
;
13872 * If this page is not pageable, we have to get
13873 * it for all possible accesses.
13876 *wired
= (entry
->wired_count
!= 0);
13882 * If the entry was copy-on-write, we either ...
13885 if (entry
->needs_copy
) {
13887 * If we want to write the page, we may as well
13888 * handle that now since we've got the map locked.
13890 * If we don't need to write the page, we just
13891 * demote the permissions allowed.
13894 if ((fault_type
& VM_PROT_WRITE
) || *wired
|| force_copy
) {
13896 * Make a new object, and place it in the
13897 * object chain. Note that no new references
13898 * have appeared -- one just moved from the
13899 * map to the new object.
13902 if (vm_map_lock_read_to_write(map
)) {
13903 vm_map_lock_read(map
);
13907 if (VME_OBJECT(entry
)->shadowed
== FALSE
) {
13908 vm_object_lock(VME_OBJECT(entry
));
13909 VME_OBJECT(entry
)->shadowed
= TRUE
;
13910 vm_object_unlock(VME_OBJECT(entry
));
13912 VME_OBJECT_SHADOW(entry
,
13913 (vm_map_size_t
) (entry
->vme_end
-
13914 entry
->vme_start
));
13915 entry
->needs_copy
= FALSE
;
13917 vm_map_lock_write_to_read(map
);
13919 if ((fault_type
& VM_PROT_WRITE
) == 0 && *wired
== 0) {
13921 * We're attempting to read a copy-on-write
13922 * page -- don't allow writes.
13925 prot
&= (~VM_PROT_WRITE
);
13930 * Create an object if necessary.
13932 if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
13933 if (vm_map_lock_read_to_write(map
)) {
13934 vm_map_lock_read(map
);
13938 VME_OBJECT_SET(entry
,
13939 vm_object_allocate(
13940 (vm_map_size_t
)(entry
->vme_end
-
13941 entry
->vme_start
)));
13942 VME_OFFSET_SET(entry
, 0);
13943 assert(entry
->use_pmap
);
13944 vm_map_lock_write_to_read(map
);
13948 * Return the object/offset from this entry. If the entry
13949 * was copy-on-write or empty, it has been fixed up. Also
13950 * return the protection.
13953 *offset
= (vaddr
- entry
->vme_start
) + VME_OFFSET(entry
);
13954 *object
= VME_OBJECT(entry
);
13956 KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET
, VM_MAP_LOOKUP_OBJECT
), VM_KERNEL_UNSLIDE_OR_PERM(*object
), (unsigned long) VME_ALIAS(entry
), 0, 0);
13959 fault_info
->interruptible
= THREAD_UNINT
; /* for now... */
13960 /* ... the caller will change "interruptible" if needed */
13961 fault_info
->cluster_size
= 0;
13962 fault_info
->user_tag
= VME_ALIAS(entry
);
13963 fault_info
->pmap_options
= 0;
13964 if (entry
->iokit_acct
||
13965 (!entry
->is_sub_map
&& !entry
->use_pmap
)) {
13966 fault_info
->pmap_options
|= PMAP_OPTIONS_ALT_ACCT
;
13968 fault_info
->behavior
= entry
->behavior
;
13969 fault_info
->lo_offset
= VME_OFFSET(entry
);
13970 fault_info
->hi_offset
=
13971 (entry
->vme_end
- entry
->vme_start
) + VME_OFFSET(entry
);
13972 fault_info
->no_cache
= entry
->no_cache
;
13973 fault_info
->stealth
= FALSE
;
13974 fault_info
->io_sync
= FALSE
;
13975 if (entry
->used_for_jit
||
13977 pmap_cs_exempt(map
->pmap
) ||
13979 entry
->vme_resilient_codesign
) {
13980 fault_info
->cs_bypass
= TRUE
;
13982 fault_info
->cs_bypass
= FALSE
;
13984 fault_info
->pmap_cs_associated
= FALSE
;
13986 if (entry
->pmap_cs_associated
) {
13988 * The pmap layer will validate this page
13989 * before allowing it to be executed from.
13991 fault_info
->pmap_cs_associated
= TRUE
;
13993 #endif /* CONFIG_PMAP_CS */
13994 fault_info
->mark_zf_absent
= FALSE
;
13995 fault_info
->batch_pmap_op
= FALSE
;
13996 fault_info
->resilient_media
= entry
->vme_resilient_media
;
13997 fault_info
->no_copy_on_read
= entry
->vme_no_copy_on_read
;
13998 if (entry
->translated_allow_execute
) {
13999 fault_info
->pmap_options
|= PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE
;
14004 * Lock the object to prevent it from disappearing
14006 if (object_lock_type
== OBJECT_LOCK_EXCLUSIVE
) {
14007 if (contended
== NULL
) {
14008 vm_object_lock(*object
);
14010 *contended
= vm_object_lock_check_contended(*object
);
14013 vm_object_lock_shared(*object
);
14017 * Save the version number
14020 out_version
->main_timestamp
= map
->timestamp
;
14022 return KERN_SUCCESS
;
14029 * Verifies that the map in question has not changed
14030 * since the given version. The map has to be locked
14031 * ("shared" mode is fine) before calling this function
14032 * and it will be returned locked too.
14037 vm_map_version_t
*version
) /* REF */
14041 vm_map_lock_assert_held(map
);
14042 result
= (map
->timestamp
== version
->main_timestamp
);
14048 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
14049 * Goes away after regular vm_region_recurse function migrates to
14051 * vm_region_recurse: A form of vm_region which follows the
14052 * submaps in a target map
14057 vm_map_region_recurse_64(
14059 vm_map_offset_t
*address
, /* IN/OUT */
14060 vm_map_size_t
*size
, /* OUT */
14061 natural_t
*nesting_depth
, /* IN/OUT */
14062 vm_region_submap_info_64_t submap_info
, /* IN/OUT */
14063 mach_msg_type_number_t
*count
) /* IN/OUT */
14065 mach_msg_type_number_t original_count
;
14066 vm_region_extended_info_data_t extended
;
14067 vm_map_entry_t tmp_entry
;
14068 vm_map_offset_t user_address
;
14069 unsigned int user_max_depth
;
14072 * "curr_entry" is the VM map entry preceding or including the
14073 * address we're looking for.
14074 * "curr_map" is the map or sub-map containing "curr_entry".
14075 * "curr_address" is the equivalent of the top map's "user_address"
14076 * in the current map.
14077 * "curr_offset" is the cumulated offset of "curr_map" in the
14078 * target task's address space.
14079 * "curr_depth" is the depth of "curr_map" in the chain of
14082 * "curr_max_below" and "curr_max_above" limit the range (around
14083 * "curr_address") we should take into account in the current (sub)map.
14084 * They limit the range to what's visible through the map entries
14085 * we've traversed from the top map to the current map.
14088 vm_map_entry_t curr_entry
;
14089 vm_map_address_t curr_address
;
14090 vm_map_offset_t curr_offset
;
14092 unsigned int curr_depth
;
14093 vm_map_offset_t curr_max_below
, curr_max_above
;
14094 vm_map_offset_t curr_skip
;
14097 * "next_" is the same as "curr_" but for the VM region immediately
14098 * after the address we're looking for. We need to keep track of this
14099 * too because we want to return info about that region if the
14100 * address we're looking for is not mapped.
14102 vm_map_entry_t next_entry
;
14103 vm_map_offset_t next_offset
;
14104 vm_map_offset_t next_address
;
14106 unsigned int next_depth
;
14107 vm_map_offset_t next_max_below
, next_max_above
;
14108 vm_map_offset_t next_skip
;
14110 boolean_t look_for_pages
;
14111 vm_region_submap_short_info_64_t short_info
;
14112 boolean_t do_region_footprint
;
14113 int effective_page_size
, effective_page_shift
;
14115 if (map
== VM_MAP_NULL
) {
14116 /* no address space to work on */
14117 return KERN_INVALID_ARGUMENT
;
14120 effective_page_shift
= vm_self_region_page_shift(map
);
14121 effective_page_size
= (1 << effective_page_shift
);
14123 if (*count
< VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
) {
14125 * "info" structure is not big enough and
14128 return KERN_INVALID_ARGUMENT
;
14131 do_region_footprint
= task_self_region_footprint();
14132 original_count
= *count
;
14134 if (original_count
< VM_REGION_SUBMAP_INFO_V0_COUNT_64
) {
14135 *count
= VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
;
14136 look_for_pages
= FALSE
;
14137 short_info
= (vm_region_submap_short_info_64_t
) submap_info
;
14138 submap_info
= NULL
;
14140 look_for_pages
= TRUE
;
14141 *count
= VM_REGION_SUBMAP_INFO_V0_COUNT_64
;
14144 if (original_count
>= VM_REGION_SUBMAP_INFO_V1_COUNT_64
) {
14145 *count
= VM_REGION_SUBMAP_INFO_V1_COUNT_64
;
14147 if (original_count
>= VM_REGION_SUBMAP_INFO_V2_COUNT_64
) {
14148 *count
= VM_REGION_SUBMAP_INFO_V2_COUNT_64
;
14152 user_address
= *address
;
14153 user_max_depth
= *nesting_depth
;
14156 vm_map_lock_read(map
);
14162 curr_address
= user_address
;
14166 curr_max_above
= ((vm_map_offset_t
) -1) - curr_address
;
14167 curr_max_below
= curr_address
;
14175 next_max_above
= (vm_map_offset_t
) -1;
14176 next_max_below
= (vm_map_offset_t
) -1;
14179 if (vm_map_lookup_entry(curr_map
,
14182 /* tmp_entry contains the address we're looking for */
14183 curr_entry
= tmp_entry
;
14185 vm_map_offset_t skip
;
14187 * The address is not mapped. "tmp_entry" is the
14188 * map entry preceding the address. We want the next
14189 * one, if it exists.
14191 curr_entry
= tmp_entry
->vme_next
;
14193 if (curr_entry
== vm_map_to_entry(curr_map
) ||
14194 (curr_entry
->vme_start
>=
14195 curr_address
+ curr_max_above
)) {
14196 /* no next entry at this level: stop looking */
14198 vm_map_unlock_read(curr_map
);
14205 curr_max_above
= 0;
14206 curr_max_below
= 0;
14210 /* adjust current address and offset */
14211 skip
= curr_entry
->vme_start
- curr_address
;
14212 curr_address
= curr_entry
->vme_start
;
14214 curr_offset
+= skip
;
14215 curr_max_above
-= skip
;
14216 curr_max_below
= 0;
14220 * Is the next entry at this level closer to the address (or
14221 * deeper in the submap chain) than the one we had
14224 tmp_entry
= curr_entry
->vme_next
;
14225 if (tmp_entry
== vm_map_to_entry(curr_map
)) {
14226 /* no next entry at this level */
14227 } else if (tmp_entry
->vme_start
>=
14228 curr_address
+ curr_max_above
) {
14230 * tmp_entry is beyond the scope of what we mapped of
14231 * this submap in the upper level: ignore it.
14233 } else if ((next_entry
== NULL
) ||
14234 (tmp_entry
->vme_start
+ curr_offset
<=
14235 next_entry
->vme_start
+ next_offset
)) {
14237 * We didn't have a "next_entry" or this one is
14238 * closer to the address we're looking for:
14239 * use this "tmp_entry" as the new "next_entry".
14241 if (next_entry
!= NULL
) {
14242 /* unlock the last "next_map" */
14243 if (next_map
!= curr_map
&& not_in_kdp
) {
14244 vm_map_unlock_read(next_map
);
14247 next_entry
= tmp_entry
;
14248 next_map
= curr_map
;
14249 next_depth
= curr_depth
;
14250 next_address
= next_entry
->vme_start
;
14251 next_skip
= curr_skip
;
14252 next_skip
+= (next_address
- curr_address
);
14253 next_offset
= curr_offset
;
14254 next_offset
+= (next_address
- curr_address
);
14255 next_max_above
= MIN(next_max_above
, curr_max_above
);
14256 next_max_above
= MIN(next_max_above
,
14257 next_entry
->vme_end
- next_address
);
14258 next_max_below
= MIN(next_max_below
, curr_max_below
);
14259 next_max_below
= MIN(next_max_below
,
14260 next_address
- next_entry
->vme_start
);
14264 * "curr_max_{above,below}" allow us to keep track of the
14265 * portion of the submap that is actually mapped at this level:
14266 * the rest of that submap is irrelevant to us, since it's not
14268 * The relevant portion of the map starts at
14269 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
14271 curr_max_above
= MIN(curr_max_above
,
14272 curr_entry
->vme_end
- curr_address
);
14273 curr_max_below
= MIN(curr_max_below
,
14274 curr_address
- curr_entry
->vme_start
);
14276 if (!curr_entry
->is_sub_map
||
14277 curr_depth
>= user_max_depth
) {
14279 * We hit a leaf map or we reached the maximum depth
14280 * we could, so stop looking. Keep the current map
14287 * Get down to the next submap level.
14291 * Lock the next level and unlock the current level,
14292 * unless we need to keep it locked to access the "next_entry"
14296 vm_map_lock_read(VME_SUBMAP(curr_entry
));
14298 if (curr_map
== next_map
) {
14299 /* keep "next_map" locked in case we need it */
14301 /* release this map */
14303 vm_map_unlock_read(curr_map
);
14308 * Adjust the offset. "curr_entry" maps the submap
14309 * at relative address "curr_entry->vme_start" in the
14310 * curr_map but skips the first "VME_OFFSET(curr_entry)"
14311 * bytes of the submap.
14312 * "curr_offset" always represents the offset of a virtual
14313 * address in the curr_map relative to the absolute address
14314 * space (i.e. the top-level VM map).
14317 (VME_OFFSET(curr_entry
) - curr_entry
->vme_start
);
14318 curr_address
= user_address
+ curr_offset
;
14319 /* switch to the submap */
14320 curr_map
= VME_SUBMAP(curr_entry
);
14325 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
14326 // so probably should be a real 32b ID vs. ptr.
14327 // Current users just check for equality
14329 if (curr_entry
== NULL
) {
14330 /* no VM region contains the address... */
14332 if (do_region_footprint
&& /* we want footprint numbers */
14333 next_entry
== NULL
&& /* & there are no more regions */
14334 /* & we haven't already provided our fake region: */
14335 user_address
<= vm_map_last_entry(map
)->vme_end
) {
14336 ledger_amount_t ledger_resident
, ledger_compressed
;
14339 * Add a fake memory region to account for
14340 * purgeable and/or ledger-tagged memory that
14341 * counts towards this task's memory footprint,
14342 * i.e. the resident/compressed pages of non-volatile
14343 * objects owned by that task.
14345 task_ledgers_footprint(map
->pmap
->ledger
,
14347 &ledger_compressed
);
14348 if (ledger_resident
+ ledger_compressed
== 0) {
14349 /* no purgeable memory usage to report */
14350 return KERN_INVALID_ADDRESS
;
14352 /* fake region to show nonvolatile footprint */
14353 if (look_for_pages
) {
14354 submap_info
->protection
= VM_PROT_DEFAULT
;
14355 submap_info
->max_protection
= VM_PROT_DEFAULT
;
14356 submap_info
->inheritance
= VM_INHERIT_DEFAULT
;
14357 submap_info
->offset
= 0;
14358 submap_info
->user_tag
= -1;
14359 submap_info
->pages_resident
= (unsigned int) (ledger_resident
/ effective_page_size
);
14360 submap_info
->pages_shared_now_private
= 0;
14361 submap_info
->pages_swapped_out
= (unsigned int) (ledger_compressed
/ effective_page_size
);
14362 submap_info
->pages_dirtied
= submap_info
->pages_resident
;
14363 submap_info
->ref_count
= 1;
14364 submap_info
->shadow_depth
= 0;
14365 submap_info
->external_pager
= 0;
14366 submap_info
->share_mode
= SM_PRIVATE
;
14367 submap_info
->is_submap
= 0;
14368 submap_info
->behavior
= VM_BEHAVIOR_DEFAULT
;
14369 submap_info
->object_id
= VM_OBJECT_ID_FAKE(map
, task_ledgers
.purgeable_nonvolatile
);
14370 submap_info
->user_wired_count
= 0;
14371 submap_info
->pages_reusable
= 0;
14373 short_info
->user_tag
= -1;
14374 short_info
->offset
= 0;
14375 short_info
->protection
= VM_PROT_DEFAULT
;
14376 short_info
->inheritance
= VM_INHERIT_DEFAULT
;
14377 short_info
->max_protection
= VM_PROT_DEFAULT
;
14378 short_info
->behavior
= VM_BEHAVIOR_DEFAULT
;
14379 short_info
->user_wired_count
= 0;
14380 short_info
->is_submap
= 0;
14381 short_info
->object_id
= VM_OBJECT_ID_FAKE(map
, task_ledgers
.purgeable_nonvolatile
);
14382 short_info
->external_pager
= 0;
14383 short_info
->shadow_depth
= 0;
14384 short_info
->share_mode
= SM_PRIVATE
;
14385 short_info
->ref_count
= 1;
14387 *nesting_depth
= 0;
14388 *size
= (vm_map_size_t
) (ledger_resident
+ ledger_compressed
);
14389 // *address = user_address;
14390 *address
= vm_map_last_entry(map
)->vme_end
;
14391 return KERN_SUCCESS
;
14394 if (next_entry
== NULL
) {
14395 /* ... and no VM region follows it either */
14396 return KERN_INVALID_ADDRESS
;
14398 /* ... gather info about the next VM region */
14399 curr_entry
= next_entry
;
14400 curr_map
= next_map
; /* still locked ... */
14401 curr_address
= next_address
;
14402 curr_skip
= next_skip
;
14403 curr_offset
= next_offset
;
14404 curr_depth
= next_depth
;
14405 curr_max_above
= next_max_above
;
14406 curr_max_below
= next_max_below
;
14408 /* we won't need "next_entry" after all */
14409 if (next_entry
!= NULL
) {
14410 /* release "next_map" */
14411 if (next_map
!= curr_map
&& not_in_kdp
) {
14412 vm_map_unlock_read(next_map
);
14421 next_max_below
= -1;
14422 next_max_above
= -1;
14424 if (curr_entry
->is_sub_map
&&
14425 curr_depth
< user_max_depth
) {
14427 * We're not as deep as we could be: we must have
14428 * gone back up after not finding anything mapped
14429 * below the original top-level map entry's.
14430 * Let's move "curr_address" forward and recurse again.
14432 user_address
= curr_address
;
14433 goto recurse_again
;
14436 *nesting_depth
= curr_depth
;
14437 *size
= curr_max_above
+ curr_max_below
;
14438 *address
= user_address
+ curr_skip
- curr_max_below
;
14440 if (look_for_pages
) {
14441 submap_info
->user_tag
= VME_ALIAS(curr_entry
);
14442 submap_info
->offset
= VME_OFFSET(curr_entry
);
14443 submap_info
->protection
= curr_entry
->protection
;
14444 submap_info
->inheritance
= curr_entry
->inheritance
;
14445 submap_info
->max_protection
= curr_entry
->max_protection
;
14446 submap_info
->behavior
= curr_entry
->behavior
;
14447 submap_info
->user_wired_count
= curr_entry
->user_wired_count
;
14448 submap_info
->is_submap
= curr_entry
->is_sub_map
;
14449 submap_info
->object_id
= VM_OBJECT_ID(VME_OBJECT(curr_entry
));
14451 short_info
->user_tag
= VME_ALIAS(curr_entry
);
14452 short_info
->offset
= VME_OFFSET(curr_entry
);
14453 short_info
->protection
= curr_entry
->protection
;
14454 short_info
->inheritance
= curr_entry
->inheritance
;
14455 short_info
->max_protection
= curr_entry
->max_protection
;
14456 short_info
->behavior
= curr_entry
->behavior
;
14457 short_info
->user_wired_count
= curr_entry
->user_wired_count
;
14458 short_info
->is_submap
= curr_entry
->is_sub_map
;
14459 short_info
->object_id
= VM_OBJECT_ID(VME_OBJECT(curr_entry
));
14462 extended
.pages_resident
= 0;
14463 extended
.pages_swapped_out
= 0;
14464 extended
.pages_shared_now_private
= 0;
14465 extended
.pages_dirtied
= 0;
14466 extended
.pages_reusable
= 0;
14467 extended
.external_pager
= 0;
14468 extended
.shadow_depth
= 0;
14469 extended
.share_mode
= SM_EMPTY
;
14470 extended
.ref_count
= 0;
14473 if (!curr_entry
->is_sub_map
) {
14474 vm_map_offset_t range_start
, range_end
;
14475 range_start
= MAX((curr_address
- curr_max_below
),
14476 curr_entry
->vme_start
);
14477 range_end
= MIN((curr_address
+ curr_max_above
),
14478 curr_entry
->vme_end
);
14479 vm_map_region_walk(curr_map
,
14482 (VME_OFFSET(curr_entry
) +
14484 curr_entry
->vme_start
)),
14485 range_end
- range_start
,
14487 look_for_pages
, VM_REGION_EXTENDED_INFO_COUNT
);
14488 if (extended
.external_pager
&&
14489 extended
.ref_count
== 2 &&
14490 extended
.share_mode
== SM_SHARED
) {
14491 extended
.share_mode
= SM_PRIVATE
;
14494 if (curr_entry
->use_pmap
) {
14495 extended
.share_mode
= SM_TRUESHARED
;
14497 extended
.share_mode
= SM_PRIVATE
;
14499 extended
.ref_count
= os_ref_get_count(&VME_SUBMAP(curr_entry
)->map_refcnt
);
14503 if (look_for_pages
) {
14504 submap_info
->pages_resident
= extended
.pages_resident
;
14505 submap_info
->pages_swapped_out
= extended
.pages_swapped_out
;
14506 submap_info
->pages_shared_now_private
=
14507 extended
.pages_shared_now_private
;
14508 submap_info
->pages_dirtied
= extended
.pages_dirtied
;
14509 submap_info
->external_pager
= extended
.external_pager
;
14510 submap_info
->shadow_depth
= extended
.shadow_depth
;
14511 submap_info
->share_mode
= extended
.share_mode
;
14512 submap_info
->ref_count
= extended
.ref_count
;
14514 if (original_count
>= VM_REGION_SUBMAP_INFO_V1_COUNT_64
) {
14515 submap_info
->pages_reusable
= extended
.pages_reusable
;
14517 if (original_count
>= VM_REGION_SUBMAP_INFO_V2_COUNT_64
) {
14518 submap_info
->object_id_full
= (vm_object_id_t
) (VME_OBJECT(curr_entry
) != NULL
) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry
)) : 0ULL;
14521 short_info
->external_pager
= extended
.external_pager
;
14522 short_info
->shadow_depth
= extended
.shadow_depth
;
14523 short_info
->share_mode
= extended
.share_mode
;
14524 short_info
->ref_count
= extended
.ref_count
;
14528 vm_map_unlock_read(curr_map
);
14531 return KERN_SUCCESS
;
14537 * User call to obtain information about a region in
14538 * a task's address map. Currently, only one flavor is
14541 * XXX The reserved and behavior fields cannot be filled
14542 * in until the vm merge from the IK is completed, and
14543 * vm_reserve is implemented.
14549 vm_map_offset_t
*address
, /* IN/OUT */
14550 vm_map_size_t
*size
, /* OUT */
14551 vm_region_flavor_t flavor
, /* IN */
14552 vm_region_info_t info
, /* OUT */
14553 mach_msg_type_number_t
*count
, /* IN/OUT */
14554 mach_port_t
*object_name
) /* OUT */
14556 vm_map_entry_t tmp_entry
;
14557 vm_map_entry_t entry
;
14558 vm_map_offset_t start
;
14560 if (map
== VM_MAP_NULL
) {
14561 return KERN_INVALID_ARGUMENT
;
14565 case VM_REGION_BASIC_INFO
:
14566 /* legacy for old 32-bit objects info */
14568 vm_region_basic_info_t basic
;
14570 if (*count
< VM_REGION_BASIC_INFO_COUNT
) {
14571 return KERN_INVALID_ARGUMENT
;
14574 basic
= (vm_region_basic_info_t
) info
;
14575 *count
= VM_REGION_BASIC_INFO_COUNT
;
14577 vm_map_lock_read(map
);
14580 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
14581 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
14582 vm_map_unlock_read(map
);
14583 return KERN_INVALID_ADDRESS
;
14589 start
= entry
->vme_start
;
14591 basic
->offset
= (uint32_t)VME_OFFSET(entry
);
14592 basic
->protection
= entry
->protection
;
14593 basic
->inheritance
= entry
->inheritance
;
14594 basic
->max_protection
= entry
->max_protection
;
14595 basic
->behavior
= entry
->behavior
;
14596 basic
->user_wired_count
= entry
->user_wired_count
;
14597 basic
->reserved
= entry
->is_sub_map
;
14599 *size
= (entry
->vme_end
- start
);
14602 *object_name
= IP_NULL
;
14604 if (entry
->is_sub_map
) {
14605 basic
->shared
= FALSE
;
14607 basic
->shared
= entry
->is_shared
;
14610 vm_map_unlock_read(map
);
14611 return KERN_SUCCESS
;
14614 case VM_REGION_BASIC_INFO_64
:
14616 vm_region_basic_info_64_t basic
;
14618 if (*count
< VM_REGION_BASIC_INFO_COUNT_64
) {
14619 return KERN_INVALID_ARGUMENT
;
14622 basic
= (vm_region_basic_info_64_t
) info
;
14623 *count
= VM_REGION_BASIC_INFO_COUNT_64
;
14625 vm_map_lock_read(map
);
14628 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
14629 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
14630 vm_map_unlock_read(map
);
14631 return KERN_INVALID_ADDRESS
;
14637 start
= entry
->vme_start
;
14639 basic
->offset
= VME_OFFSET(entry
);
14640 basic
->protection
= entry
->protection
;
14641 basic
->inheritance
= entry
->inheritance
;
14642 basic
->max_protection
= entry
->max_protection
;
14643 basic
->behavior
= entry
->behavior
;
14644 basic
->user_wired_count
= entry
->user_wired_count
;
14645 basic
->reserved
= entry
->is_sub_map
;
14647 *size
= (entry
->vme_end
- start
);
14650 *object_name
= IP_NULL
;
14652 if (entry
->is_sub_map
) {
14653 basic
->shared
= FALSE
;
14655 basic
->shared
= entry
->is_shared
;
14658 vm_map_unlock_read(map
);
14659 return KERN_SUCCESS
;
14661 case VM_REGION_EXTENDED_INFO
:
14662 if (*count
< VM_REGION_EXTENDED_INFO_COUNT
) {
14663 return KERN_INVALID_ARGUMENT
;
14666 case VM_REGION_EXTENDED_INFO__legacy
:
14667 if (*count
< VM_REGION_EXTENDED_INFO_COUNT__legacy
) {
14668 return KERN_INVALID_ARGUMENT
;
14672 vm_region_extended_info_t extended
;
14673 mach_msg_type_number_t original_count
;
14674 int effective_page_size
, effective_page_shift
;
14676 extended
= (vm_region_extended_info_t
) info
;
14678 effective_page_shift
= vm_self_region_page_shift(map
);
14679 effective_page_size
= (1 << effective_page_shift
);
14681 vm_map_lock_read(map
);
14684 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
14685 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
14686 vm_map_unlock_read(map
);
14687 return KERN_INVALID_ADDRESS
;
14692 start
= entry
->vme_start
;
14694 extended
->protection
= entry
->protection
;
14695 extended
->user_tag
= VME_ALIAS(entry
);
14696 extended
->pages_resident
= 0;
14697 extended
->pages_swapped_out
= 0;
14698 extended
->pages_shared_now_private
= 0;
14699 extended
->pages_dirtied
= 0;
14700 extended
->external_pager
= 0;
14701 extended
->shadow_depth
= 0;
14703 original_count
= *count
;
14704 if (flavor
== VM_REGION_EXTENDED_INFO__legacy
) {
14705 *count
= VM_REGION_EXTENDED_INFO_COUNT__legacy
;
14707 extended
->pages_reusable
= 0;
14708 *count
= VM_REGION_EXTENDED_INFO_COUNT
;
14711 vm_map_region_walk(map
, start
, entry
, VME_OFFSET(entry
), entry
->vme_end
- start
, extended
, TRUE
, *count
);
14713 if (extended
->external_pager
&& extended
->ref_count
== 2 && extended
->share_mode
== SM_SHARED
) {
14714 extended
->share_mode
= SM_PRIVATE
;
14718 *object_name
= IP_NULL
;
14721 *size
= (entry
->vme_end
- start
);
14723 vm_map_unlock_read(map
);
14724 return KERN_SUCCESS
;
14726 case VM_REGION_TOP_INFO
:
14728 vm_region_top_info_t top
;
14730 if (*count
< VM_REGION_TOP_INFO_COUNT
) {
14731 return KERN_INVALID_ARGUMENT
;
14734 top
= (vm_region_top_info_t
) info
;
14735 *count
= VM_REGION_TOP_INFO_COUNT
;
14737 vm_map_lock_read(map
);
14740 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
14741 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
14742 vm_map_unlock_read(map
);
14743 return KERN_INVALID_ADDRESS
;
14748 start
= entry
->vme_start
;
14750 top
->private_pages_resident
= 0;
14751 top
->shared_pages_resident
= 0;
14753 vm_map_region_top_walk(entry
, top
);
14756 *object_name
= IP_NULL
;
14759 *size
= (entry
->vme_end
- start
);
14761 vm_map_unlock_read(map
);
14762 return KERN_SUCCESS
;
14765 return KERN_INVALID_ARGUMENT
;
14769 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
14770 MIN((entry_size), \
14771 ((obj)->all_reusable ? \
14772 (obj)->wired_page_count : \
14773 (obj)->resident_page_count - (obj)->reusable_page_count))
14776 vm_map_region_top_walk(
14777 vm_map_entry_t entry
,
14778 vm_region_top_info_t top
)
14780 if (VME_OBJECT(entry
) == 0 || entry
->is_sub_map
) {
14781 top
->share_mode
= SM_EMPTY
;
14782 top
->ref_count
= 0;
14788 struct vm_object
*obj
, *tmp_obj
;
14790 uint32_t entry_size
;
14792 entry_size
= (uint32_t) ((entry
->vme_end
- entry
->vme_start
) / PAGE_SIZE_64
);
14794 obj
= VME_OBJECT(entry
);
14796 vm_object_lock(obj
);
14798 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14802 assert(obj
->reusable_page_count
<= obj
->resident_page_count
);
14804 if (ref_count
== 1) {
14805 top
->private_pages_resident
=
14806 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14808 top
->shared_pages_resident
=
14809 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14811 top
->ref_count
= ref_count
;
14812 top
->share_mode
= SM_COW
;
14814 while ((tmp_obj
= obj
->shadow
)) {
14815 vm_object_lock(tmp_obj
);
14816 vm_object_unlock(obj
);
14819 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14823 assert(obj
->reusable_page_count
<= obj
->resident_page_count
);
14824 top
->shared_pages_resident
+=
14825 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14826 top
->ref_count
+= ref_count
- 1;
14829 if (entry
->superpage_size
) {
14830 top
->share_mode
= SM_LARGE_PAGE
;
14831 top
->shared_pages_resident
= 0;
14832 top
->private_pages_resident
= entry_size
;
14833 } else if (entry
->needs_copy
) {
14834 top
->share_mode
= SM_COW
;
14835 top
->shared_pages_resident
=
14836 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14838 if (ref_count
== 1 ||
14839 (ref_count
== 2 && obj
->named
)) {
14840 top
->share_mode
= SM_PRIVATE
;
14841 top
->private_pages_resident
=
14842 OBJ_RESIDENT_COUNT(obj
,
14845 top
->share_mode
= SM_SHARED
;
14846 top
->shared_pages_resident
=
14847 OBJ_RESIDENT_COUNT(obj
,
14851 top
->ref_count
= ref_count
;
14853 /* XXX K64: obj_id will be truncated */
14854 top
->obj_id
= (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj
);
14856 vm_object_unlock(obj
);
14861 vm_map_region_walk(
14863 vm_map_offset_t va
,
14864 vm_map_entry_t entry
,
14865 vm_object_offset_t offset
,
14866 vm_object_size_t range
,
14867 vm_region_extended_info_t extended
,
14868 boolean_t look_for_pages
,
14869 mach_msg_type_number_t count
)
14871 struct vm_object
*obj
, *tmp_obj
;
14872 vm_map_offset_t last_offset
;
14875 struct vm_object
*shadow_object
;
14876 unsigned short shadow_depth
;
14877 boolean_t do_region_footprint
;
14878 int effective_page_size
, effective_page_shift
;
14879 vm_map_offset_t effective_page_mask
;
14881 do_region_footprint
= task_self_region_footprint();
14883 if ((VME_OBJECT(entry
) == 0) ||
14884 (entry
->is_sub_map
) ||
14885 (VME_OBJECT(entry
)->phys_contiguous
&&
14886 !entry
->superpage_size
)) {
14887 extended
->share_mode
= SM_EMPTY
;
14888 extended
->ref_count
= 0;
14892 if (entry
->superpage_size
) {
14893 extended
->shadow_depth
= 0;
14894 extended
->share_mode
= SM_LARGE_PAGE
;
14895 extended
->ref_count
= 1;
14896 extended
->external_pager
= 0;
14898 /* TODO4K: Superpage in 4k mode? */
14899 extended
->pages_resident
= (unsigned int)(range
>> PAGE_SHIFT
);
14900 extended
->shadow_depth
= 0;
14904 effective_page_shift
= vm_self_region_page_shift(map
);
14905 effective_page_size
= (1 << effective_page_shift
);
14906 effective_page_mask
= effective_page_size
- 1;
14908 offset
= vm_map_trunc_page(offset
, effective_page_mask
);
14910 obj
= VME_OBJECT(entry
);
14912 vm_object_lock(obj
);
14914 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14918 if (look_for_pages
) {
14919 for (last_offset
= offset
+ range
;
14920 offset
< last_offset
;
14921 offset
+= effective_page_size
, va
+= effective_page_size
) {
14922 if (do_region_footprint
) {
14926 if (map
->has_corpse_footprint
) {
14928 * Query the page info data we saved
14929 * while forking the corpse.
14931 vm_map_corpse_footprint_query_page_info(
14939 vm_map_footprint_query_page_info(
14945 if (disp
& VM_PAGE_QUERY_PAGE_PRESENT
) {
14946 extended
->pages_resident
++;
14948 if (disp
& VM_PAGE_QUERY_PAGE_REUSABLE
) {
14949 extended
->pages_reusable
++;
14951 if (disp
& VM_PAGE_QUERY_PAGE_DIRTY
) {
14952 extended
->pages_dirtied
++;
14954 if (disp
& PMAP_QUERY_PAGE_COMPRESSED
) {
14955 extended
->pages_swapped_out
++;
14960 vm_map_region_look_for_page(map
, va
, obj
,
14961 vm_object_trunc_page(offset
), ref_count
,
14962 0, extended
, count
);
14965 if (do_region_footprint
) {
14966 goto collect_object_info
;
14969 collect_object_info
:
14970 shadow_object
= obj
->shadow
;
14973 if (!(obj
->internal
)) {
14974 extended
->external_pager
= 1;
14977 if (shadow_object
!= VM_OBJECT_NULL
) {
14978 vm_object_lock(shadow_object
);
14980 shadow_object
!= VM_OBJECT_NULL
;
14982 vm_object_t next_shadow
;
14984 if (!(shadow_object
->internal
)) {
14985 extended
->external_pager
= 1;
14988 next_shadow
= shadow_object
->shadow
;
14990 vm_object_lock(next_shadow
);
14992 vm_object_unlock(shadow_object
);
14993 shadow_object
= next_shadow
;
14996 extended
->shadow_depth
= shadow_depth
;
14999 if (extended
->shadow_depth
|| entry
->needs_copy
) {
15000 extended
->share_mode
= SM_COW
;
15002 if (ref_count
== 1) {
15003 extended
->share_mode
= SM_PRIVATE
;
15005 if (obj
->true_share
) {
15006 extended
->share_mode
= SM_TRUESHARED
;
15008 extended
->share_mode
= SM_SHARED
;
15012 extended
->ref_count
= ref_count
- extended
->shadow_depth
;
15014 for (i
= 0; i
< extended
->shadow_depth
; i
++) {
15015 if ((tmp_obj
= obj
->shadow
) == 0) {
15018 vm_object_lock(tmp_obj
);
15019 vm_object_unlock(obj
);
15021 if ((ref_count
= tmp_obj
->ref_count
) > 1 && tmp_obj
->paging_in_progress
) {
15025 extended
->ref_count
+= ref_count
;
15028 vm_object_unlock(obj
);
15030 if (extended
->share_mode
== SM_SHARED
) {
15031 vm_map_entry_t cur
;
15032 vm_map_entry_t last
;
15035 obj
= VME_OBJECT(entry
);
15036 last
= vm_map_to_entry(map
);
15039 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
15042 for (cur
= vm_map_first_entry(map
); cur
!= last
; cur
= cur
->vme_next
) {
15043 my_refs
+= vm_map_region_count_obj_refs(cur
, obj
);
15046 if (my_refs
== ref_count
) {
15047 extended
->share_mode
= SM_PRIVATE_ALIASED
;
15048 } else if (my_refs
> 1) {
15049 extended
->share_mode
= SM_SHARED_ALIASED
;
15055 /* object is locked on entry and locked on return */
15059 vm_map_region_look_for_page(
15060 __unused vm_map_t map
,
15061 __unused vm_map_offset_t va
,
15062 vm_object_t object
,
15063 vm_object_offset_t offset
,
15065 unsigned short depth
,
15066 vm_region_extended_info_t extended
,
15067 mach_msg_type_number_t count
)
15070 vm_object_t shadow
;
15072 vm_object_t caller_object
;
15074 shadow
= object
->shadow
;
15075 caller_object
= object
;
15079 if (!(object
->internal
)) {
15080 extended
->external_pager
= 1;
15083 if ((p
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
15084 if (shadow
&& (max_refcnt
== 1)) {
15085 extended
->pages_shared_now_private
++;
15088 if (!p
->vmp_fictitious
&&
15089 (p
->vmp_dirty
|| pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p
)))) {
15090 extended
->pages_dirtied
++;
15091 } else if (count
>= VM_REGION_EXTENDED_INFO_COUNT
) {
15092 if (p
->vmp_reusable
|| object
->all_reusable
) {
15093 extended
->pages_reusable
++;
15097 extended
->pages_resident
++;
15099 if (object
!= caller_object
) {
15100 vm_object_unlock(object
);
15105 if (object
->internal
&&
15107 !object
->terminating
&&
15108 object
->pager_ready
) {
15109 if (VM_COMPRESSOR_PAGER_STATE_GET(object
, offset
)
15110 == VM_EXTERNAL_STATE_EXISTS
) {
15111 /* the pager has that page */
15112 extended
->pages_swapped_out
++;
15113 if (object
!= caller_object
) {
15114 vm_object_unlock(object
);
15121 vm_object_lock(shadow
);
15123 if ((ref_count
= shadow
->ref_count
) > 1 && shadow
->paging_in_progress
) {
15127 if (++depth
> extended
->shadow_depth
) {
15128 extended
->shadow_depth
= depth
;
15131 if (ref_count
> max_refcnt
) {
15132 max_refcnt
= ref_count
;
15135 if (object
!= caller_object
) {
15136 vm_object_unlock(object
);
15139 offset
= offset
+ object
->vo_shadow_offset
;
15141 shadow
= object
->shadow
;
15144 if (object
!= caller_object
) {
15145 vm_object_unlock(object
);
15152 vm_map_region_count_obj_refs(
15153 vm_map_entry_t entry
,
15154 vm_object_t object
)
15157 vm_object_t chk_obj
;
15158 vm_object_t tmp_obj
;
15160 if (VME_OBJECT(entry
) == 0) {
15164 if (entry
->is_sub_map
) {
15169 chk_obj
= VME_OBJECT(entry
);
15170 vm_object_lock(chk_obj
);
15173 if (chk_obj
== object
) {
15176 tmp_obj
= chk_obj
->shadow
;
15178 vm_object_lock(tmp_obj
);
15180 vm_object_unlock(chk_obj
);
15190 * Routine: vm_map_simplify
15193 * Attempt to simplify the map representation in
15194 * the vicinity of the given starting address.
15196 * This routine is intended primarily to keep the
15197 * kernel maps more compact -- they generally don't
15198 * benefit from the "expand a map entry" technology
15199 * at allocation time because the adjacent entry
15200 * is often wired down.
15203 vm_map_simplify_entry(
15205 vm_map_entry_t this_entry
)
15207 vm_map_entry_t prev_entry
;
15209 counter(c_vm_map_simplify_entry_called
++);
15211 prev_entry
= this_entry
->vme_prev
;
15213 if ((this_entry
!= vm_map_to_entry(map
)) &&
15214 (prev_entry
!= vm_map_to_entry(map
)) &&
15216 (prev_entry
->vme_end
== this_entry
->vme_start
) &&
15218 (prev_entry
->is_sub_map
== this_entry
->is_sub_map
) &&
15219 (VME_OBJECT(prev_entry
) == VME_OBJECT(this_entry
)) &&
15220 ((VME_OFFSET(prev_entry
) + (prev_entry
->vme_end
-
15221 prev_entry
->vme_start
))
15222 == VME_OFFSET(this_entry
)) &&
15224 (prev_entry
->behavior
== this_entry
->behavior
) &&
15225 (prev_entry
->needs_copy
== this_entry
->needs_copy
) &&
15226 (prev_entry
->protection
== this_entry
->protection
) &&
15227 (prev_entry
->max_protection
== this_entry
->max_protection
) &&
15228 (prev_entry
->inheritance
== this_entry
->inheritance
) &&
15229 (prev_entry
->use_pmap
== this_entry
->use_pmap
) &&
15230 (VME_ALIAS(prev_entry
) == VME_ALIAS(this_entry
)) &&
15231 (prev_entry
->no_cache
== this_entry
->no_cache
) &&
15232 (prev_entry
->permanent
== this_entry
->permanent
) &&
15233 (prev_entry
->map_aligned
== this_entry
->map_aligned
) &&
15234 (prev_entry
->zero_wired_pages
== this_entry
->zero_wired_pages
) &&
15235 (prev_entry
->used_for_jit
== this_entry
->used_for_jit
) &&
15236 (prev_entry
->pmap_cs_associated
== this_entry
->pmap_cs_associated
) &&
15237 /* from_reserved_zone: OK if that field doesn't match */
15238 (prev_entry
->iokit_acct
== this_entry
->iokit_acct
) &&
15239 (prev_entry
->vme_resilient_codesign
==
15240 this_entry
->vme_resilient_codesign
) &&
15241 (prev_entry
->vme_resilient_media
==
15242 this_entry
->vme_resilient_media
) &&
15243 (prev_entry
->vme_no_copy_on_read
== this_entry
->vme_no_copy_on_read
) &&
15245 (prev_entry
->wired_count
== this_entry
->wired_count
) &&
15246 (prev_entry
->user_wired_count
== this_entry
->user_wired_count
) &&
15248 ((prev_entry
->vme_atomic
== FALSE
) && (this_entry
->vme_atomic
== FALSE
)) &&
15249 (prev_entry
->in_transition
== FALSE
) &&
15250 (this_entry
->in_transition
== FALSE
) &&
15251 (prev_entry
->needs_wakeup
== FALSE
) &&
15252 (this_entry
->needs_wakeup
== FALSE
) &&
15253 (prev_entry
->is_shared
== this_entry
->is_shared
) &&
15254 (prev_entry
->superpage_size
== FALSE
) &&
15255 (this_entry
->superpage_size
== FALSE
)
15257 vm_map_store_entry_unlink(map
, prev_entry
);
15258 assert(prev_entry
->vme_start
< this_entry
->vme_end
);
15259 if (prev_entry
->map_aligned
) {
15260 assert(VM_MAP_PAGE_ALIGNED(prev_entry
->vme_start
,
15261 VM_MAP_PAGE_MASK(map
)));
15263 this_entry
->vme_start
= prev_entry
->vme_start
;
15264 VME_OFFSET_SET(this_entry
, VME_OFFSET(prev_entry
));
15266 if (map
->holelistenabled
) {
15267 vm_map_store_update_first_free(map
, this_entry
, TRUE
);
15270 if (prev_entry
->is_sub_map
) {
15271 vm_map_deallocate(VME_SUBMAP(prev_entry
));
15273 vm_object_deallocate(VME_OBJECT(prev_entry
));
15275 vm_map_entry_dispose(map
, prev_entry
);
15276 SAVE_HINT_MAP_WRITE(map
, this_entry
);
15277 counter(c_vm_map_simplified
++);
15284 vm_map_offset_t start
)
15286 vm_map_entry_t this_entry
;
15289 if (vm_map_lookup_entry(map
, start
, &this_entry
)) {
15290 vm_map_simplify_entry(map
, this_entry
);
15291 vm_map_simplify_entry(map
, this_entry
->vme_next
);
15293 counter(c_vm_map_simplify_called
++);
15294 vm_map_unlock(map
);
15298 vm_map_simplify_range(
15300 vm_map_offset_t start
,
15301 vm_map_offset_t end
)
15303 vm_map_entry_t entry
;
15306 * The map should be locked (for "write") by the caller.
15309 if (start
>= end
) {
15310 /* invalid address range */
15314 start
= vm_map_trunc_page(start
,
15315 VM_MAP_PAGE_MASK(map
));
15316 end
= vm_map_round_page(end
,
15317 VM_MAP_PAGE_MASK(map
));
15319 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
15320 /* "start" is not mapped and "entry" ends before "start" */
15321 if (entry
== vm_map_to_entry(map
)) {
15322 /* start with first entry in the map */
15323 entry
= vm_map_first_entry(map
);
15325 /* start with next entry */
15326 entry
= entry
->vme_next
;
15330 while (entry
!= vm_map_to_entry(map
) &&
15331 entry
->vme_start
<= end
) {
15332 /* try and coalesce "entry" with its previous entry */
15333 vm_map_simplify_entry(map
, entry
);
15334 entry
= entry
->vme_next
;
15340 * Routine: vm_map_machine_attribute
15342 * Provide machine-specific attributes to mappings,
15343 * such as cachability etc. for machines that provide
15344 * them. NUMA architectures and machines with big/strange
15345 * caches will use this.
15347 * Responsibilities for locking and checking are handled here,
15348 * everything else in the pmap module. If any non-volatile
15349 * information must be kept, the pmap module should handle
15350 * it itself. [This assumes that attributes do not
15351 * need to be inherited, which seems ok to me]
15354 vm_map_machine_attribute(
15356 vm_map_offset_t start
,
15357 vm_map_offset_t end
,
15358 vm_machine_attribute_t attribute
,
15359 vm_machine_attribute_val_t
* value
) /* IN/OUT */
15362 vm_map_size_t sync_size
;
15363 vm_map_entry_t entry
;
15365 if (start
< vm_map_min(map
) || end
> vm_map_max(map
)) {
15366 return KERN_INVALID_ADDRESS
;
15369 /* Figure how much memory we need to flush (in page increments) */
15370 sync_size
= end
- start
;
15374 if (attribute
!= MATTR_CACHE
) {
15375 /* If we don't have to find physical addresses, we */
15376 /* don't have to do an explicit traversal here. */
15377 ret
= pmap_attribute(map
->pmap
, start
, end
- start
,
15379 vm_map_unlock(map
);
15383 ret
= KERN_SUCCESS
; /* Assume it all worked */
15385 while (sync_size
) {
15386 if (vm_map_lookup_entry(map
, start
, &entry
)) {
15387 vm_map_size_t sub_size
;
15388 if ((entry
->vme_end
- start
) > sync_size
) {
15389 sub_size
= sync_size
;
15392 sub_size
= entry
->vme_end
- start
;
15393 sync_size
-= sub_size
;
15395 if (entry
->is_sub_map
) {
15396 vm_map_offset_t sub_start
;
15397 vm_map_offset_t sub_end
;
15399 sub_start
= (start
- entry
->vme_start
)
15400 + VME_OFFSET(entry
);
15401 sub_end
= sub_start
+ sub_size
;
15402 vm_map_machine_attribute(
15408 if (VME_OBJECT(entry
)) {
15410 vm_object_t object
;
15411 vm_object_t base_object
;
15412 vm_object_t last_object
;
15413 vm_object_offset_t offset
;
15414 vm_object_offset_t base_offset
;
15415 vm_map_size_t range
;
15417 offset
= (start
- entry
->vme_start
)
15418 + VME_OFFSET(entry
);
15419 offset
= vm_object_trunc_page(offset
);
15420 base_offset
= offset
;
15421 object
= VME_OBJECT(entry
);
15422 base_object
= object
;
15423 last_object
= NULL
;
15425 vm_object_lock(object
);
15428 m
= vm_page_lookup(
15431 if (m
&& !m
->vmp_fictitious
) {
15433 pmap_attribute_cache_sync(
15434 VM_PAGE_GET_PHYS_PAGE(m
),
15437 } else if (object
->shadow
) {
15438 offset
= offset
+ object
->vo_shadow_offset
;
15439 last_object
= object
;
15440 object
= object
->shadow
;
15441 vm_object_lock(last_object
->shadow
);
15442 vm_object_unlock(last_object
);
15445 if (range
< PAGE_SIZE
) {
15448 range
-= PAGE_SIZE
;
15451 if (base_object
!= object
) {
15452 vm_object_unlock(object
);
15453 vm_object_lock(base_object
);
15454 object
= base_object
;
15456 /* Bump to the next page */
15457 base_offset
+= PAGE_SIZE
;
15458 offset
= base_offset
;
15460 vm_object_unlock(object
);
15465 vm_map_unlock(map
);
15466 return KERN_FAILURE
;
15470 vm_map_unlock(map
);
15476 * vm_map_behavior_set:
15478 * Sets the paging reference behavior of the specified address
15479 * range in the target map. Paging reference behavior affects
15480 * how pagein operations resulting from faults on the map will be
15484 vm_map_behavior_set(
15486 vm_map_offset_t start
,
15487 vm_map_offset_t end
,
15488 vm_behavior_t new_behavior
)
15490 vm_map_entry_t entry
;
15491 vm_map_entry_t temp_entry
;
15494 start
< vm_map_min(map
) ||
15495 end
> vm_map_max(map
)) {
15496 return KERN_NO_SPACE
;
15499 switch (new_behavior
) {
15501 * This first block of behaviors all set a persistent state on the specified
15502 * memory range. All we have to do here is to record the desired behavior
15503 * in the vm_map_entry_t's.
15506 case VM_BEHAVIOR_DEFAULT
:
15507 case VM_BEHAVIOR_RANDOM
:
15508 case VM_BEHAVIOR_SEQUENTIAL
:
15509 case VM_BEHAVIOR_RSEQNTL
:
15510 case VM_BEHAVIOR_ZERO_WIRED_PAGES
:
15514 * The entire address range must be valid for the map.
15515 * Note that vm_map_range_check() does a
15516 * vm_map_lookup_entry() internally and returns the
15517 * entry containing the start of the address range if
15518 * the entire range is valid.
15520 if (vm_map_range_check(map
, start
, end
, &temp_entry
)) {
15521 entry
= temp_entry
;
15522 vm_map_clip_start(map
, entry
, start
);
15524 vm_map_unlock(map
);
15525 return KERN_INVALID_ADDRESS
;
15528 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
15529 vm_map_clip_end(map
, entry
, end
);
15530 if (entry
->is_sub_map
) {
15531 assert(!entry
->use_pmap
);
15534 if (new_behavior
== VM_BEHAVIOR_ZERO_WIRED_PAGES
) {
15535 entry
->zero_wired_pages
= TRUE
;
15537 entry
->behavior
= new_behavior
;
15539 entry
= entry
->vme_next
;
15542 vm_map_unlock(map
);
15546 * The rest of these are different from the above in that they cause
15547 * an immediate action to take place as opposed to setting a behavior that
15548 * affects future actions.
15551 case VM_BEHAVIOR_WILLNEED
:
15552 return vm_map_willneed(map
, start
, end
);
15554 case VM_BEHAVIOR_DONTNEED
:
15555 return vm_map_msync(map
, start
, end
- start
, VM_SYNC_DEACTIVATE
| VM_SYNC_CONTIGUOUS
);
15557 case VM_BEHAVIOR_FREE
:
15558 return vm_map_msync(map
, start
, end
- start
, VM_SYNC_KILLPAGES
| VM_SYNC_CONTIGUOUS
);
15560 case VM_BEHAVIOR_REUSABLE
:
15561 return vm_map_reusable_pages(map
, start
, end
);
15563 case VM_BEHAVIOR_REUSE
:
15564 return vm_map_reuse_pages(map
, start
, end
);
15566 case VM_BEHAVIOR_CAN_REUSE
:
15567 return vm_map_can_reuse(map
, start
, end
);
15570 case VM_BEHAVIOR_PAGEOUT
:
15571 return vm_map_pageout(map
, start
, end
);
15572 #endif /* MACH_ASSERT */
15575 return KERN_INVALID_ARGUMENT
;
15578 return KERN_SUCCESS
;
15583 * Internals for madvise(MADV_WILLNEED) system call.
15585 * The implementation is to do:-
15586 * a) read-ahead if the mapping corresponds to a mapped regular file
15587 * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
15591 static kern_return_t
15594 vm_map_offset_t start
,
15595 vm_map_offset_t end
15598 vm_map_entry_t entry
;
15599 vm_object_t object
;
15600 memory_object_t pager
;
15601 struct vm_object_fault_info fault_info
= {};
15603 vm_object_size_t len
;
15604 vm_object_offset_t offset
;
15606 fault_info
.interruptible
= THREAD_UNINT
; /* ignored value */
15607 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
15608 fault_info
.stealth
= TRUE
;
15611 * The MADV_WILLNEED operation doesn't require any changes to the
15612 * vm_map_entry_t's, so the read lock is sufficient.
15615 vm_map_lock_read(map
);
15618 * The madvise semantics require that the address range be fully
15619 * allocated with no holes. Otherwise, we're required to return
15623 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15624 vm_map_unlock_read(map
);
15625 return KERN_INVALID_ADDRESS
;
15629 * Examine each vm_map_entry_t in the range.
15631 for (; entry
!= vm_map_to_entry(map
) && start
< end
;) {
15633 * The first time through, the start address could be anywhere
15634 * within the vm_map_entry we found. So adjust the offset to
15635 * correspond. After that, the offset will always be zero to
15636 * correspond to the beginning of the current vm_map_entry.
15638 offset
= (start
- entry
->vme_start
) + VME_OFFSET(entry
);
15641 * Set the length so we don't go beyond the end of the
15642 * map_entry or beyond the end of the range we were given.
15643 * This range could span also multiple map entries all of which
15644 * map different files, so make sure we only do the right amount
15645 * of I/O for each object. Note that it's possible for there
15646 * to be multiple map entries all referring to the same object
15647 * but with different page permissions, but it's not worth
15648 * trying to optimize that case.
15650 len
= MIN(entry
->vme_end
- start
, end
- start
);
15652 if ((vm_size_t
) len
!= len
) {
15653 /* 32-bit overflow */
15654 len
= (vm_size_t
) (0 - PAGE_SIZE
);
15656 fault_info
.cluster_size
= (vm_size_t
) len
;
15657 fault_info
.lo_offset
= offset
;
15658 fault_info
.hi_offset
= offset
+ len
;
15659 fault_info
.user_tag
= VME_ALIAS(entry
);
15660 fault_info
.pmap_options
= 0;
15661 if (entry
->iokit_acct
||
15662 (!entry
->is_sub_map
&& !entry
->use_pmap
)) {
15663 fault_info
.pmap_options
|= PMAP_OPTIONS_ALT_ACCT
;
15667 * If the entry is a submap OR there's no read permission
15668 * to this mapping, then just skip it.
15670 if ((entry
->is_sub_map
) || (entry
->protection
& VM_PROT_READ
) == 0) {
15671 entry
= entry
->vme_next
;
15672 start
= entry
->vme_start
;
15676 object
= VME_OBJECT(entry
);
15678 if (object
== NULL
||
15679 (object
&& object
->internal
)) {
15681 * Memory range backed by anonymous memory.
15683 vm_size_t region_size
= 0, effective_page_size
= 0;
15684 vm_map_offset_t addr
= 0, effective_page_mask
= 0;
15689 effective_page_mask
= MIN(vm_map_page_mask(current_map()), PAGE_MASK
);
15690 effective_page_size
= effective_page_mask
+ 1;
15692 vm_map_unlock_read(map
);
15694 while (region_size
) {
15696 vm_map_trunc_page(addr
, effective_page_mask
),
15697 VM_PROT_READ
| VM_PROT_WRITE
);
15699 region_size
-= effective_page_size
;
15700 addr
+= effective_page_size
;
15704 * Find the file object backing this map entry. If there is
15705 * none, then we simply ignore the "will need" advice for this
15706 * entry and go on to the next one.
15708 if ((object
= find_vnode_object(entry
)) == VM_OBJECT_NULL
) {
15709 entry
= entry
->vme_next
;
15710 start
= entry
->vme_start
;
15714 vm_object_paging_begin(object
);
15715 pager
= object
->pager
;
15716 vm_object_unlock(object
);
15719 * The data_request() could take a long time, so let's
15720 * release the map lock to avoid blocking other threads.
15722 vm_map_unlock_read(map
);
15725 * Get the data from the object asynchronously.
15727 * Note that memory_object_data_request() places limits on the
15728 * amount of I/O it will do. Regardless of the len we
15729 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15730 * silently truncates the len to that size. This isn't
15731 * necessarily bad since madvise shouldn't really be used to
15732 * page in unlimited amounts of data. Other Unix variants
15733 * limit the willneed case as well. If this turns out to be an
15734 * issue for developers, then we can always adjust the policy
15735 * here and still be backwards compatible since this is all
15738 kr
= memory_object_data_request(
15740 vm_object_trunc_page(offset
) + object
->paging_offset
,
15743 (memory_object_fault_info_t
)&fault_info
);
15745 vm_object_lock(object
);
15746 vm_object_paging_end(object
);
15747 vm_object_unlock(object
);
15750 * If we couldn't do the I/O for some reason, just give up on
15751 * the madvise. We still return success to the user since
15752 * madvise isn't supposed to fail when the advice can't be
15756 if (kr
!= KERN_SUCCESS
) {
15757 return KERN_SUCCESS
;
15762 if (start
>= end
) {
15764 return KERN_SUCCESS
;
15767 /* look up next entry */
15768 vm_map_lock_read(map
);
15769 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
15771 * There's a new hole in the address range.
15773 vm_map_unlock_read(map
);
15774 return KERN_INVALID_ADDRESS
;
15778 vm_map_unlock_read(map
);
15779 return KERN_SUCCESS
;
15783 vm_map_entry_is_reusable(
15784 vm_map_entry_t entry
)
15786 /* Only user map entries */
15788 vm_object_t object
;
15790 if (entry
->is_sub_map
) {
15794 switch (VME_ALIAS(entry
)) {
15795 case VM_MEMORY_MALLOC
:
15796 case VM_MEMORY_MALLOC_SMALL
:
15797 case VM_MEMORY_MALLOC_LARGE
:
15798 case VM_MEMORY_REALLOC
:
15799 case VM_MEMORY_MALLOC_TINY
:
15800 case VM_MEMORY_MALLOC_LARGE_REUSABLE
:
15801 case VM_MEMORY_MALLOC_LARGE_REUSED
:
15803 * This is a malloc() memory region: check if it's still
15804 * in its original state and can be re-used for more
15805 * malloc() allocations.
15810 * Not a malloc() memory region: let the caller decide if
15816 if (/*entry->is_shared ||*/
15817 entry
->is_sub_map
||
15818 entry
->in_transition
||
15819 entry
->protection
!= VM_PROT_DEFAULT
||
15820 entry
->max_protection
!= VM_PROT_ALL
||
15821 entry
->inheritance
!= VM_INHERIT_DEFAULT
||
15823 entry
->permanent
||
15824 entry
->superpage_size
!= FALSE
||
15825 entry
->zero_wired_pages
||
15826 entry
->wired_count
!= 0 ||
15827 entry
->user_wired_count
!= 0) {
15831 object
= VME_OBJECT(entry
);
15832 if (object
== VM_OBJECT_NULL
) {
15838 * Let's proceed even if the VM object is potentially
15840 * We check for this later when processing the actual
15841 * VM pages, so the contents will be safe if shared.
15843 * But we can still mark this memory region as "reusable" to
15844 * acknowledge that the caller did let us know that the memory
15845 * could be re-used and should not be penalized for holding
15846 * on to it. This allows its "resident size" to not include
15847 * the reusable range.
15849 object
->ref_count
== 1 &&
15851 object
->wired_page_count
== 0 &&
15852 object
->copy
== VM_OBJECT_NULL
&&
15853 object
->shadow
== VM_OBJECT_NULL
&&
15854 object
->internal
&&
15855 object
->purgable
== VM_PURGABLE_DENY
&&
15856 object
->copy_strategy
!= MEMORY_OBJECT_COPY_DELAY
&&
15857 !object
->true_share
&&
15858 object
->wimg_bits
== VM_WIMG_USE_DEFAULT
&&
15859 !object
->code_signed
) {
15865 static kern_return_t
15866 vm_map_reuse_pages(
15868 vm_map_offset_t start
,
15869 vm_map_offset_t end
)
15871 vm_map_entry_t entry
;
15872 vm_object_t object
;
15873 vm_object_offset_t start_offset
, end_offset
;
15876 * The MADV_REUSE operation doesn't require any changes to the
15877 * vm_map_entry_t's, so the read lock is sufficient.
15880 if (VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
) {
15883 * need to figure out what reusable means for a
15884 * portion of a native page.
15886 return KERN_SUCCESS
;
15889 vm_map_lock_read(map
);
15890 assert(map
->pmap
!= kernel_pmap
); /* protect alias access */
15893 * The madvise semantics require that the address range be fully
15894 * allocated with no holes. Otherwise, we're required to return
15898 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15899 vm_map_unlock_read(map
);
15900 vm_page_stats_reusable
.reuse_pages_failure
++;
15901 return KERN_INVALID_ADDRESS
;
15905 * Examine each vm_map_entry_t in the range.
15907 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
15908 entry
= entry
->vme_next
) {
15910 * Sanity check on the VM map entry.
15912 if (!vm_map_entry_is_reusable(entry
)) {
15913 vm_map_unlock_read(map
);
15914 vm_page_stats_reusable
.reuse_pages_failure
++;
15915 return KERN_INVALID_ADDRESS
;
15919 * The first time through, the start address could be anywhere
15920 * within the vm_map_entry we found. So adjust the offset to
15923 if (entry
->vme_start
< start
) {
15924 start_offset
= start
- entry
->vme_start
;
15928 end_offset
= MIN(end
, entry
->vme_end
) - entry
->vme_start
;
15929 start_offset
+= VME_OFFSET(entry
);
15930 end_offset
+= VME_OFFSET(entry
);
15932 assert(!entry
->is_sub_map
);
15933 object
= VME_OBJECT(entry
);
15934 if (object
!= VM_OBJECT_NULL
) {
15935 vm_object_lock(object
);
15936 vm_object_reuse_pages(object
, start_offset
, end_offset
,
15938 vm_object_unlock(object
);
15941 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC_LARGE_REUSABLE
) {
15944 * We do not hold the VM map exclusively here.
15945 * The "alias" field is not that critical, so it's
15946 * safe to update it here, as long as it is the only
15947 * one that can be modified while holding the VM map
15950 VME_ALIAS_SET(entry
, VM_MEMORY_MALLOC_LARGE_REUSED
);
15954 vm_map_unlock_read(map
);
15955 vm_page_stats_reusable
.reuse_pages_success
++;
15956 return KERN_SUCCESS
;
15960 static kern_return_t
15961 vm_map_reusable_pages(
15963 vm_map_offset_t start
,
15964 vm_map_offset_t end
)
15966 vm_map_entry_t entry
;
15967 vm_object_t object
;
15968 vm_object_offset_t start_offset
, end_offset
;
15969 vm_map_offset_t pmap_offset
;
15971 if (VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
) {
15974 * need to figure out what reusable means for a portion
15975 * of a native page.
15977 return KERN_SUCCESS
;
15981 * The MADV_REUSABLE operation doesn't require any changes to the
15982 * vm_map_entry_t's, so the read lock is sufficient.
15985 vm_map_lock_read(map
);
15986 assert(map
->pmap
!= kernel_pmap
); /* protect alias access */
15989 * The madvise semantics require that the address range be fully
15990 * allocated with no holes. Otherwise, we're required to return
15994 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15995 vm_map_unlock_read(map
);
15996 vm_page_stats_reusable
.reusable_pages_failure
++;
15997 return KERN_INVALID_ADDRESS
;
16001 * Examine each vm_map_entry_t in the range.
16003 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
16004 entry
= entry
->vme_next
) {
16005 int kill_pages
= 0;
16008 * Sanity check on the VM map entry.
16010 if (!vm_map_entry_is_reusable(entry
)) {
16011 vm_map_unlock_read(map
);
16012 vm_page_stats_reusable
.reusable_pages_failure
++;
16013 return KERN_INVALID_ADDRESS
;
16016 if (!(entry
->protection
& VM_PROT_WRITE
) && !entry
->used_for_jit
) {
16017 /* not writable: can't discard contents */
16018 vm_map_unlock_read(map
);
16019 vm_page_stats_reusable
.reusable_nonwritable
++;
16020 vm_page_stats_reusable
.reusable_pages_failure
++;
16021 return KERN_PROTECTION_FAILURE
;
16025 * The first time through, the start address could be anywhere
16026 * within the vm_map_entry we found. So adjust the offset to
16029 if (entry
->vme_start
< start
) {
16030 start_offset
= start
- entry
->vme_start
;
16031 pmap_offset
= start
;
16034 pmap_offset
= entry
->vme_start
;
16036 end_offset
= MIN(end
, entry
->vme_end
) - entry
->vme_start
;
16037 start_offset
+= VME_OFFSET(entry
);
16038 end_offset
+= VME_OFFSET(entry
);
16040 assert(!entry
->is_sub_map
);
16041 object
= VME_OBJECT(entry
);
16042 if (object
== VM_OBJECT_NULL
) {
16047 vm_object_lock(object
);
16048 if (((object
->ref_count
== 1) ||
16049 (object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
&&
16050 object
->copy
== VM_OBJECT_NULL
)) &&
16051 object
->shadow
== VM_OBJECT_NULL
&&
16053 * "iokit_acct" entries are billed for their virtual size
16054 * (rather than for their resident pages only), so they
16055 * wouldn't benefit from making pages reusable, and it
16056 * would be hard to keep track of pages that are both
16057 * "iokit_acct" and "reusable" in the pmap stats and
16060 !(entry
->iokit_acct
||
16061 (!entry
->is_sub_map
&& !entry
->use_pmap
))) {
16062 if (object
->ref_count
!= 1) {
16063 vm_page_stats_reusable
.reusable_shared
++;
16069 if (kill_pages
!= -1) {
16070 vm_object_deactivate_pages(object
,
16072 end_offset
- start_offset
,
16074 TRUE
/*reusable_pages*/,
16078 vm_page_stats_reusable
.reusable_pages_shared
++;
16080 vm_object_unlock(object
);
16082 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC_LARGE
||
16083 VME_ALIAS(entry
) == VM_MEMORY_MALLOC_LARGE_REUSED
) {
16086 * We do not hold the VM map exclusively here.
16087 * The "alias" field is not that critical, so it's
16088 * safe to update it here, as long as it is the only
16089 * one that can be modified while holding the VM map
16092 VME_ALIAS_SET(entry
, VM_MEMORY_MALLOC_LARGE_REUSABLE
);
16096 vm_map_unlock_read(map
);
16097 vm_page_stats_reusable
.reusable_pages_success
++;
16098 return KERN_SUCCESS
;
16102 static kern_return_t
16105 vm_map_offset_t start
,
16106 vm_map_offset_t end
)
16108 vm_map_entry_t entry
;
16111 * The MADV_REUSABLE operation doesn't require any changes to the
16112 * vm_map_entry_t's, so the read lock is sufficient.
16115 vm_map_lock_read(map
);
16116 assert(map
->pmap
!= kernel_pmap
); /* protect alias access */
16119 * The madvise semantics require that the address range be fully
16120 * allocated with no holes. Otherwise, we're required to return
16124 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
16125 vm_map_unlock_read(map
);
16126 vm_page_stats_reusable
.can_reuse_failure
++;
16127 return KERN_INVALID_ADDRESS
;
16131 * Examine each vm_map_entry_t in the range.
16133 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
16134 entry
= entry
->vme_next
) {
16136 * Sanity check on the VM map entry.
16138 if (!vm_map_entry_is_reusable(entry
)) {
16139 vm_map_unlock_read(map
);
16140 vm_page_stats_reusable
.can_reuse_failure
++;
16141 return KERN_INVALID_ADDRESS
;
16145 vm_map_unlock_read(map
);
16146 vm_page_stats_reusable
.can_reuse_success
++;
16147 return KERN_SUCCESS
;
16152 static kern_return_t
16155 vm_map_offset_t start
,
16156 vm_map_offset_t end
)
16158 vm_map_entry_t entry
;
16161 * The MADV_PAGEOUT operation doesn't require any changes to the
16162 * vm_map_entry_t's, so the read lock is sufficient.
16165 vm_map_lock_read(map
);
16168 * The madvise semantics require that the address range be fully
16169 * allocated with no holes. Otherwise, we're required to return
16173 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
16174 vm_map_unlock_read(map
);
16175 return KERN_INVALID_ADDRESS
;
16179 * Examine each vm_map_entry_t in the range.
16181 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
16182 entry
= entry
->vme_next
) {
16183 vm_object_t object
;
16186 * Sanity check on the VM map entry.
16188 if (entry
->is_sub_map
) {
16190 vm_map_offset_t submap_start
;
16191 vm_map_offset_t submap_end
;
16192 vm_map_entry_t submap_entry
;
16194 submap
= VME_SUBMAP(entry
);
16195 submap_start
= VME_OFFSET(entry
);
16196 submap_end
= submap_start
+ (entry
->vme_end
-
16199 vm_map_lock_read(submap
);
16201 if (!vm_map_range_check(submap
,
16205 vm_map_unlock_read(submap
);
16206 vm_map_unlock_read(map
);
16207 return KERN_INVALID_ADDRESS
;
16210 object
= VME_OBJECT(submap_entry
);
16211 if (submap_entry
->is_sub_map
||
16212 object
== VM_OBJECT_NULL
||
16213 !object
->internal
) {
16214 vm_map_unlock_read(submap
);
16218 vm_object_pageout(object
);
16220 vm_map_unlock_read(submap
);
16221 submap
= VM_MAP_NULL
;
16222 submap_entry
= VM_MAP_ENTRY_NULL
;
16226 object
= VME_OBJECT(entry
);
16227 if (entry
->is_sub_map
||
16228 object
== VM_OBJECT_NULL
||
16229 !object
->internal
) {
16233 vm_object_pageout(object
);
16236 vm_map_unlock_read(map
);
16237 return KERN_SUCCESS
;
16239 #endif /* MACH_ASSERT */
16243 * Routine: vm_map_entry_insert
16245 * Description: This routine inserts a new vm_entry in a locked map.
16248 vm_map_entry_insert(
16250 vm_map_entry_t insp_entry
,
16251 vm_map_offset_t start
,
16252 vm_map_offset_t end
,
16253 vm_object_t object
,
16254 vm_object_offset_t offset
,
16255 boolean_t needs_copy
,
16256 boolean_t is_shared
,
16257 boolean_t in_transition
,
16258 vm_prot_t cur_protection
,
16259 vm_prot_t max_protection
,
16260 vm_behavior_t behavior
,
16261 vm_inherit_t inheritance
,
16262 unsigned short wired_count
,
16263 boolean_t no_cache
,
16264 boolean_t permanent
,
16265 boolean_t no_copy_on_read
,
16266 unsigned int superpage_size
,
16267 boolean_t clear_map_aligned
,
16268 boolean_t is_submap
,
16269 boolean_t used_for_jit
,
16271 boolean_t translated_allow_execute
)
16273 vm_map_entry_t new_entry
;
16275 assert(insp_entry
!= (vm_map_entry_t
)0);
16276 vm_map_lock_assert_exclusive(map
);
16278 #if DEVELOPMENT || DEBUG
16279 vm_object_offset_t end_offset
= 0;
16280 assertf(!os_add_overflow(end
- start
, offset
, &end_offset
), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end
- start
), offset
);
16281 #endif /* DEVELOPMENT || DEBUG */
16283 new_entry
= vm_map_entry_create(map
, !map
->hdr
.entries_pageable
);
16285 if (VM_MAP_PAGE_SHIFT(map
) != PAGE_SHIFT
) {
16286 new_entry
->map_aligned
= TRUE
;
16288 new_entry
->map_aligned
= FALSE
;
16290 if (clear_map_aligned
&&
16291 (!VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)) ||
16292 !VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)))) {
16293 new_entry
->map_aligned
= FALSE
;
16296 new_entry
->vme_start
= start
;
16297 new_entry
->vme_end
= end
;
16298 if (new_entry
->map_aligned
) {
16299 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_start
,
16300 VM_MAP_PAGE_MASK(map
)));
16301 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_end
,
16302 VM_MAP_PAGE_MASK(map
)));
16304 assert(page_aligned(new_entry
->vme_start
));
16305 assert(page_aligned(new_entry
->vme_end
));
16307 assert(new_entry
->vme_start
< new_entry
->vme_end
);
16309 VME_OBJECT_SET(new_entry
, object
);
16310 VME_OFFSET_SET(new_entry
, offset
);
16311 new_entry
->is_shared
= is_shared
;
16312 new_entry
->is_sub_map
= is_submap
;
16313 new_entry
->needs_copy
= needs_copy
;
16314 new_entry
->in_transition
= in_transition
;
16315 new_entry
->needs_wakeup
= FALSE
;
16316 new_entry
->inheritance
= inheritance
;
16317 new_entry
->protection
= cur_protection
;
16318 new_entry
->max_protection
= max_protection
;
16319 new_entry
->behavior
= behavior
;
16320 new_entry
->wired_count
= wired_count
;
16321 new_entry
->user_wired_count
= 0;
16324 * submap: "use_pmap" means "nested".
16327 new_entry
->use_pmap
= FALSE
;
16330 * object: "use_pmap" means "use pmap accounting" for footprint.
16333 new_entry
->use_pmap
= TRUE
;
16335 VME_ALIAS_SET(new_entry
, alias
);
16336 new_entry
->zero_wired_pages
= FALSE
;
16337 new_entry
->no_cache
= no_cache
;
16338 new_entry
->permanent
= permanent
;
16339 if (superpage_size
) {
16340 new_entry
->superpage_size
= TRUE
;
16342 new_entry
->superpage_size
= FALSE
;
16344 if (used_for_jit
) {
16345 if (!(map
->jit_entry_exists
) ||
16346 VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map
)) {
16347 new_entry
->used_for_jit
= TRUE
;
16348 map
->jit_entry_exists
= TRUE
;
16351 new_entry
->used_for_jit
= FALSE
;
16353 if (translated_allow_execute
) {
16354 new_entry
->translated_allow_execute
= TRUE
;
16356 new_entry
->translated_allow_execute
= FALSE
;
16358 new_entry
->pmap_cs_associated
= FALSE
;
16359 new_entry
->iokit_acct
= FALSE
;
16360 new_entry
->vme_resilient_codesign
= FALSE
;
16361 new_entry
->vme_resilient_media
= FALSE
;
16362 new_entry
->vme_atomic
= FALSE
;
16363 new_entry
->vme_no_copy_on_read
= no_copy_on_read
;
16366 * Insert the new entry into the list.
16369 vm_map_store_entry_link(map
, insp_entry
, new_entry
,
16370 VM_MAP_KERNEL_FLAGS_NONE
);
16371 map
->size
+= end
- start
;
16374 * Update the free space hint and the lookup hint.
16377 SAVE_HINT_MAP_WRITE(map
, new_entry
);
16381 int vm_remap_old_path
= 0;
16382 int vm_remap_new_path
= 0;
16384 * Routine: vm_map_remap_extract
16386 * Description: This routine returns a vm_entry list from a map.
16388 static kern_return_t
16389 vm_map_remap_extract(
16391 vm_map_offset_t addr
,
16392 vm_map_size_t size
,
16393 vm_prot_t required_protection
,
16395 struct vm_map_header
*map_header
,
16396 vm_prot_t
*cur_protection
,
16397 vm_prot_t
*max_protection
,
16398 /* What, no behavior? */
16399 vm_inherit_t inheritance
,
16400 vm_map_kernel_flags_t vmk_flags
)
16402 kern_return_t result
;
16403 vm_map_size_t mapped_size
;
16404 vm_map_size_t tmp_size
;
16405 vm_map_entry_t src_entry
; /* result of last map lookup */
16406 vm_map_entry_t new_entry
;
16407 vm_object_offset_t offset
;
16408 vm_map_offset_t map_address
;
16409 vm_map_offset_t src_start
; /* start of entry to map */
16410 vm_map_offset_t src_end
; /* end of region to be mapped */
16411 vm_object_t object
;
16412 vm_map_version_t version
;
16413 boolean_t src_needs_copy
;
16414 boolean_t new_entry_needs_copy
;
16415 vm_map_entry_t saved_src_entry
;
16416 boolean_t src_entry_was_wired
;
16417 vm_prot_t max_prot_for_prot_copy
;
16418 vm_map_offset_t effective_page_mask
;
16419 boolean_t pageable
, same_map
;
16421 pageable
= vmk_flags
.vmkf_copy_pageable
;
16422 same_map
= vmk_flags
.vmkf_copy_same_map
;
16424 effective_page_mask
= MIN(PAGE_MASK
, VM_MAP_PAGE_MASK(map
));
16426 assert(map
!= VM_MAP_NULL
);
16428 assert(size
== vm_map_round_page(size
, effective_page_mask
));
16429 assert(inheritance
== VM_INHERIT_NONE
||
16430 inheritance
== VM_INHERIT_COPY
||
16431 inheritance
== VM_INHERIT_SHARE
);
16432 assert(!(required_protection
& ~VM_PROT_ALL
));
16435 * Compute start and end of region.
16437 src_start
= vm_map_trunc_page(addr
, effective_page_mask
);
16438 src_end
= vm_map_round_page(src_start
+ size
, effective_page_mask
);
16441 * Initialize map_header.
16443 map_header
->links
.next
= CAST_TO_VM_MAP_ENTRY(&map_header
->links
);
16444 map_header
->links
.prev
= CAST_TO_VM_MAP_ENTRY(&map_header
->links
);
16445 map_header
->nentries
= 0;
16446 map_header
->entries_pageable
= pageable
;
16447 // map_header->page_shift = MIN(VM_MAP_PAGE_SHIFT(map), PAGE_SHIFT);
16448 map_header
->page_shift
= VM_MAP_PAGE_SHIFT(map
);
16449 map_header
->rb_head_store
.rbh_root
= (void *)(int)SKIP_RB_TREE
;
16451 vm_map_store_init( map_header
);
16453 if (copy
&& vmk_flags
.vmkf_remap_prot_copy
) {
16454 max_prot_for_prot_copy
= *max_protection
& VM_PROT_ALL
;
16456 max_prot_for_prot_copy
= VM_PROT_NONE
;
16458 *cur_protection
= VM_PROT_ALL
;
16459 *max_protection
= VM_PROT_ALL
;
16463 result
= KERN_SUCCESS
;
16466 * The specified source virtual space might correspond to
16467 * multiple map entries, need to loop on them.
16470 if (VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
) {
16472 * This address space uses sub-pages so the range might
16473 * not be re-mappable in an address space with larger
16474 * pages. Re-assemble any broken-up VM map entries to
16475 * improve our chances of making it work.
16477 vm_map_simplify_range(map
, src_start
, src_end
);
16479 while (mapped_size
!= size
) {
16480 vm_map_size_t entry_size
;
16483 * Find the beginning of the region.
16485 if (!vm_map_lookup_entry(map
, src_start
, &src_entry
)) {
16486 result
= KERN_INVALID_ADDRESS
;
16490 if (src_start
< src_entry
->vme_start
||
16491 (mapped_size
&& src_start
!= src_entry
->vme_start
)) {
16492 result
= KERN_INVALID_ADDRESS
;
16496 tmp_size
= size
- mapped_size
;
16497 if (src_end
> src_entry
->vme_end
) {
16498 tmp_size
-= (src_end
- src_entry
->vme_end
);
16501 entry_size
= (vm_map_size_t
)(src_entry
->vme_end
-
16502 src_entry
->vme_start
);
16504 if (src_entry
->is_sub_map
&&
16505 vmk_flags
.vmkf_copy_single_object
) {
16507 vm_map_offset_t submap_start
;
16508 vm_map_size_t submap_size
;
16511 * No check for "required_protection" on "src_entry"
16512 * because the protections that matter are the ones
16513 * on the submap's VM map entry, which will be checked
16514 * during the call to vm_map_remap_extract() below.
16516 submap_size
= src_entry
->vme_end
- src_start
;
16517 if (submap_size
> size
) {
16518 submap_size
= size
;
16520 submap_start
= VME_OFFSET(src_entry
) + src_start
- src_entry
->vme_start
;
16521 submap
= VME_SUBMAP(src_entry
);
16522 vm_map_reference(submap
);
16523 vm_map_unlock(map
);
16525 result
= vm_map_remap_extract(submap
,
16528 required_protection
,
16535 vm_map_deallocate(submap
);
16539 if ((src_entry
->protection
& required_protection
)
16540 != required_protection
) {
16541 if (vmk_flags
.vmkf_copy_single_object
&&
16542 mapped_size
!= 0) {
16544 * Single object extraction.
16545 * We can't extract more with the required
16546 * protection but we've extracted some, so
16547 * stop there and declare success.
16548 * The caller should check the size of
16549 * the copy entry we've extracted.
16551 result
= KERN_SUCCESS
;
16554 * VM range extraction.
16555 * Required proctection is not available
16556 * for this part of the range: fail.
16558 result
= KERN_PROTECTION_FAILURE
;
16563 if (src_entry
->is_sub_map
&&
16564 VM_MAP_PAGE_SHIFT(VME_SUBMAP(src_entry
)) < PAGE_SHIFT
) {
16566 vm_map_offset_t submap_start
;
16567 vm_map_size_t submap_size
;
16568 vm_map_copy_t submap_copy
;
16569 vm_prot_t submap_curprot
, submap_maxprot
;
16571 vm_remap_new_path
++;
16574 * No check for "required_protection" on "src_entry"
16575 * because the protections that matter are the ones
16576 * on the submap's VM map entry, which will be checked
16577 * during the call to vm_map_copy_extract() below.
16579 object
= VM_OBJECT_NULL
;
16580 submap_copy
= VM_MAP_COPY_NULL
;
16582 /* find equivalent range in the submap */
16583 submap
= VME_SUBMAP(src_entry
);
16584 submap_start
= VME_OFFSET(src_entry
) + src_start
- src_entry
->vme_start
;
16585 submap_size
= tmp_size
;
16586 /* extra ref to keep submap alive */
16587 vm_map_reference(submap
);
16589 DTRACE_VM6(remap_submap_recurse
,
16591 vm_map_offset_t
, addr
,
16592 vm_map_size_t
, size
,
16594 vm_map_offset_t
, submap_start
,
16595 vm_map_size_t
, submap_size
);
16598 * The map can be safely unlocked since we
16599 * already hold a reference on the submap.
16601 * No timestamp since we don't care if the map
16602 * gets modified while we're down in the submap.
16603 * We'll resume the extraction at src_start + tmp_size
16606 vm_map_unlock(map
);
16607 src_entry
= NULL
; /* not valid once map is unlocked */
16609 result
= vm_map_copy_extract(submap
,
16612 required_protection
,
16620 /* release extra ref on submap */
16621 vm_map_deallocate(submap
);
16622 submap
= VM_MAP_NULL
;
16624 if (result
!= KERN_SUCCESS
) {
16629 /* transfer submap_copy entries to map_header */
16630 while (vm_map_copy_first_entry(submap_copy
) !=
16631 vm_map_copy_to_entry(submap_copy
)) {
16632 vm_map_entry_t copy_entry
;
16633 vm_map_size_t copy_entry_size
;
16635 copy_entry
= vm_map_copy_first_entry(submap_copy
);
16636 assert(!copy_entry
->is_sub_map
);
16637 vm_map_copy_entry_unlink(submap_copy
, copy_entry
);
16638 copy_entry_size
= copy_entry
->vme_end
- copy_entry
->vme_start
;
16639 copy_entry
->vme_start
= map_address
;
16640 copy_entry
->vme_end
= map_address
+ copy_entry_size
;
16641 map_address
+= copy_entry_size
;
16642 mapped_size
+= copy_entry_size
;
16643 src_start
+= copy_entry_size
;
16644 assert(src_start
<= src_end
);
16645 _vm_map_store_entry_link(map_header
,
16646 map_header
->links
.prev
,
16649 /* done with submap_copy */
16650 vm_map_copy_discard(submap_copy
);
16652 *cur_protection
&= submap_curprot
;
16653 *max_protection
&= submap_maxprot
;
16655 /* re-acquire the map lock and continue to next entry */
16658 } else if (src_entry
->is_sub_map
) {
16659 vm_remap_old_path
++;
16660 DTRACE_VM4(remap_submap
,
16662 vm_map_offset_t
, addr
,
16663 vm_map_size_t
, size
,
16666 vm_map_reference(VME_SUBMAP(src_entry
));
16667 object
= VM_OBJECT_NULL
;
16669 object
= VME_OBJECT(src_entry
);
16670 if (src_entry
->iokit_acct
) {
16672 * This entry uses "IOKit accounting".
16674 } else if (object
!= VM_OBJECT_NULL
&&
16675 (object
->purgable
!= VM_PURGABLE_DENY
||
16676 object
->vo_ledger_tag
!= VM_LEDGER_TAG_NONE
)) {
16678 * Purgeable objects have their own accounting:
16679 * no pmap accounting for them.
16681 assertf(!src_entry
->use_pmap
,
16682 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16685 (uint64_t)src_entry
->vme_start
,
16686 (uint64_t)src_entry
->vme_end
,
16687 src_entry
->protection
,
16688 src_entry
->max_protection
,
16689 VME_ALIAS(src_entry
));
16692 * Not IOKit or purgeable:
16693 * must be accounted by pmap stats.
16695 assertf(src_entry
->use_pmap
,
16696 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16699 (uint64_t)src_entry
->vme_start
,
16700 (uint64_t)src_entry
->vme_end
,
16701 src_entry
->protection
,
16702 src_entry
->max_protection
,
16703 VME_ALIAS(src_entry
));
16706 if (object
== VM_OBJECT_NULL
) {
16707 assert(!src_entry
->needs_copy
);
16708 object
= vm_object_allocate(entry_size
);
16709 VME_OFFSET_SET(src_entry
, 0);
16710 VME_OBJECT_SET(src_entry
, object
);
16711 assert(src_entry
->use_pmap
);
16712 } else if (src_entry
->wired_count
||
16713 object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) {
16715 * A wired memory region should not have
16716 * any pending copy-on-write and needs to
16717 * keep pointing at the VM object that
16718 * contains the wired pages.
16719 * If we're sharing this memory (copy=false),
16720 * we'll share this VM object.
16721 * If we're copying this memory (copy=true),
16722 * we'll call vm_object_copy_slowly() below
16723 * and use the new VM object for the remapping.
16725 * Or, we are already using an asymmetric
16726 * copy, and therefore we already have
16727 * the right object.
16729 assert(!src_entry
->needs_copy
);
16730 } else if (src_entry
->needs_copy
|| object
->shadowed
||
16731 (object
->internal
&& !object
->true_share
&&
16732 !src_entry
->is_shared
&&
16733 object
->vo_size
> entry_size
)) {
16734 VME_OBJECT_SHADOW(src_entry
, entry_size
);
16735 assert(src_entry
->use_pmap
);
16737 if (!src_entry
->needs_copy
&&
16738 (src_entry
->protection
& VM_PROT_WRITE
)) {
16741 assert(!pmap_has_prot_policy(map
->pmap
, src_entry
->translated_allow_execute
, src_entry
->protection
));
16743 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
16745 if (override_nx(map
,
16746 VME_ALIAS(src_entry
))
16748 prot
|= VM_PROT_EXECUTE
;
16751 assert(!pmap_has_prot_policy(map
->pmap
, src_entry
->translated_allow_execute
, prot
));
16753 if (map
->mapped_in_other_pmaps
) {
16754 vm_object_pmap_protect(
16755 VME_OBJECT(src_entry
),
16756 VME_OFFSET(src_entry
),
16760 src_entry
->vme_start
,
16763 } else if (__improbable(map
->pmap
== PMAP_NULL
)) {
16764 extern boolean_t vm_tests_in_progress
;
16765 assert(vm_tests_in_progress
);
16767 * Some VM tests (in vm_tests.c)
16768 * sometimes want to use a VM
16769 * map without a pmap.
16770 * Otherwise, this should never
16773 #endif /* MACH_ASSERT */
16775 pmap_protect(vm_map_pmap(map
),
16776 src_entry
->vme_start
,
16777 src_entry
->vme_end
,
16782 object
= VME_OBJECT(src_entry
);
16783 src_entry
->needs_copy
= FALSE
;
16787 vm_object_lock(object
);
16788 vm_object_reference_locked(object
); /* object ref. for new entry */
16789 assert(!src_entry
->needs_copy
);
16790 if (object
->copy_strategy
==
16791 MEMORY_OBJECT_COPY_SYMMETRIC
) {
16793 * If we want to share this object (copy==0),
16794 * it needs to be COPY_DELAY.
16795 * If we want to copy this object (copy==1),
16796 * we can't just set "needs_copy" on our side
16797 * and expect the other side to do the same
16798 * (symmetrically), so we can't let the object
16799 * stay COPY_SYMMETRIC.
16800 * So we always switch from COPY_SYMMETRIC to
16803 object
->copy_strategy
=
16804 MEMORY_OBJECT_COPY_DELAY
;
16806 vm_object_unlock(object
);
16809 offset
= (VME_OFFSET(src_entry
) +
16810 (src_start
- src_entry
->vme_start
));
16812 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
16813 vm_map_entry_copy(map
, new_entry
, src_entry
);
16814 if (new_entry
->is_sub_map
) {
16815 /* clr address space specifics */
16816 new_entry
->use_pmap
= FALSE
;
16819 * We're dealing with a copy-on-write operation,
16820 * so the resulting mapping should not inherit the
16821 * original mapping's accounting settings.
16822 * "use_pmap" should be reset to its default (TRUE)
16823 * so that the new mapping gets accounted for in
16824 * the task's memory footprint.
16826 new_entry
->use_pmap
= TRUE
;
16828 /* "iokit_acct" was cleared in vm_map_entry_copy() */
16829 assert(!new_entry
->iokit_acct
);
16831 new_entry
->map_aligned
= FALSE
;
16833 new_entry
->vme_start
= map_address
;
16834 new_entry
->vme_end
= map_address
+ tmp_size
;
16835 assert(new_entry
->vme_start
< new_entry
->vme_end
);
16836 if (copy
&& vmk_flags
.vmkf_remap_prot_copy
) {
16838 * Remapping for vm_map_protect(VM_PROT_COPY)
16839 * to convert a read-only mapping into a
16840 * copy-on-write version of itself but
16841 * with write access:
16842 * keep the original inheritance and add
16843 * VM_PROT_WRITE to the max protection.
16845 new_entry
->inheritance
= src_entry
->inheritance
;
16846 new_entry
->protection
&= max_prot_for_prot_copy
;
16847 new_entry
->max_protection
|= VM_PROT_WRITE
;
16849 new_entry
->inheritance
= inheritance
;
16851 VME_OFFSET_SET(new_entry
, offset
);
16854 * The new region has to be copied now if required.
16858 if (src_entry
->used_for_jit
== TRUE
) {
16860 #if __APRR_SUPPORTED__
16862 * Disallow re-mapping of any JIT regions on APRR devices.
16864 result
= KERN_PROTECTION_FAILURE
;
16866 #endif /* __APRR_SUPPORTED__*/
16867 } else if (!VM_MAP_POLICY_ALLOW_JIT_SHARING(map
)) {
16869 * Cannot allow an entry describing a JIT
16870 * region to be shared across address spaces.
16872 result
= KERN_INVALID_ARGUMENT
;
16877 src_entry
->is_shared
= TRUE
;
16878 new_entry
->is_shared
= TRUE
;
16879 if (!(new_entry
->is_sub_map
)) {
16880 new_entry
->needs_copy
= FALSE
;
16882 } else if (src_entry
->is_sub_map
) {
16883 /* make this a COW sub_map if not already */
16884 assert(new_entry
->wired_count
== 0);
16885 new_entry
->needs_copy
= TRUE
;
16886 object
= VM_OBJECT_NULL
;
16887 } else if (src_entry
->wired_count
== 0 &&
16888 !(debug4k_no_cow_copyin
&& VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
) &&
16889 vm_object_copy_quickly(VME_OBJECT_PTR(new_entry
),
16890 VME_OFFSET(new_entry
),
16891 (new_entry
->vme_end
-
16892 new_entry
->vme_start
),
16894 &new_entry_needs_copy
)) {
16895 new_entry
->needs_copy
= new_entry_needs_copy
;
16896 new_entry
->is_shared
= FALSE
;
16897 assertf(new_entry
->use_pmap
, "map %p new_entry %p\n", map
, new_entry
);
16900 * Handle copy_on_write semantics.
16902 if (src_needs_copy
&& !src_entry
->needs_copy
) {
16905 assert(!pmap_has_prot_policy(map
->pmap
, src_entry
->translated_allow_execute
, src_entry
->protection
));
16907 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
16909 if (override_nx(map
,
16910 VME_ALIAS(src_entry
))
16912 prot
|= VM_PROT_EXECUTE
;
16915 assert(!pmap_has_prot_policy(map
->pmap
, src_entry
->translated_allow_execute
, prot
));
16917 vm_object_pmap_protect(object
,
16920 ((src_entry
->is_shared
16921 || map
->mapped_in_other_pmaps
) ?
16922 PMAP_NULL
: map
->pmap
),
16923 VM_MAP_PAGE_SIZE(map
),
16924 src_entry
->vme_start
,
16927 assert(src_entry
->wired_count
== 0);
16928 src_entry
->needs_copy
= TRUE
;
16931 * Throw away the old object reference of the new entry.
16933 vm_object_deallocate(object
);
16935 new_entry
->is_shared
= FALSE
;
16936 assertf(new_entry
->use_pmap
, "map %p new_entry %p\n", map
, new_entry
);
16938 src_entry_was_wired
= (src_entry
->wired_count
> 0);
16939 saved_src_entry
= src_entry
;
16940 src_entry
= VM_MAP_ENTRY_NULL
;
16943 * The map can be safely unlocked since we
16944 * already hold a reference on the object.
16946 * Record the timestamp of the map for later
16947 * verification, and unlock the map.
16949 version
.main_timestamp
= map
->timestamp
;
16950 vm_map_unlock(map
); /* Increments timestamp once! */
16953 * Perform the copy.
16955 if (src_entry_was_wired
> 0 ||
16956 (debug4k_no_cow_copyin
&&
16957 VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
)) {
16958 vm_object_lock(object
);
16959 result
= vm_object_copy_slowly(
16962 (new_entry
->vme_end
-
16963 new_entry
->vme_start
),
16965 VME_OBJECT_PTR(new_entry
));
16967 VME_OFFSET_SET(new_entry
, offset
- vm_object_trunc_page(offset
));
16968 new_entry
->needs_copy
= FALSE
;
16970 vm_object_offset_t new_offset
;
16972 new_offset
= VME_OFFSET(new_entry
);
16973 result
= vm_object_copy_strategically(
16976 (new_entry
->vme_end
-
16977 new_entry
->vme_start
),
16978 VME_OBJECT_PTR(new_entry
),
16980 &new_entry_needs_copy
);
16981 if (new_offset
!= VME_OFFSET(new_entry
)) {
16982 VME_OFFSET_SET(new_entry
, new_offset
);
16985 new_entry
->needs_copy
= new_entry_needs_copy
;
16989 * Throw away the old object reference of the new entry.
16991 vm_object_deallocate(object
);
16993 if (result
!= KERN_SUCCESS
&&
16994 result
!= KERN_MEMORY_RESTART_COPY
) {
16995 _vm_map_entry_dispose(map_header
, new_entry
);
17001 * Verify that the map has not substantially
17002 * changed while the copy was being made.
17006 if (version
.main_timestamp
+ 1 != map
->timestamp
) {
17008 * Simple version comparison failed.
17010 * Retry the lookup and verify that the
17011 * same object/offset are still present.
17013 saved_src_entry
= VM_MAP_ENTRY_NULL
;
17014 vm_object_deallocate(VME_OBJECT(new_entry
));
17015 _vm_map_entry_dispose(map_header
, new_entry
);
17016 if (result
== KERN_MEMORY_RESTART_COPY
) {
17017 result
= KERN_SUCCESS
;
17021 /* map hasn't changed: src_entry is still valid */
17022 src_entry
= saved_src_entry
;
17023 saved_src_entry
= VM_MAP_ENTRY_NULL
;
17025 if (result
== KERN_MEMORY_RESTART_COPY
) {
17026 vm_object_reference(object
);
17031 _vm_map_store_entry_link(map_header
,
17032 map_header
->links
.prev
, new_entry
);
17034 /*Protections for submap mapping are irrelevant here*/
17035 if (!src_entry
->is_sub_map
) {
17036 *cur_protection
&= src_entry
->protection
;
17037 *max_protection
&= src_entry
->max_protection
;
17040 map_address
+= tmp_size
;
17041 mapped_size
+= tmp_size
;
17042 src_start
+= tmp_size
;
17044 if (vmk_flags
.vmkf_copy_single_object
) {
17045 if (mapped_size
!= size
) {
17046 DEBUG4K_SHARE("map %p addr 0x%llx size 0x%llx clipped copy at mapped_size 0x%llx\n", map
, (uint64_t)addr
, (uint64_t)size
, (uint64_t)mapped_size
);
17047 if (src_entry
->vme_next
!= vm_map_to_entry(map
) &&
17048 VME_OBJECT(src_entry
->vme_next
) == VME_OBJECT(src_entry
)) {
17050 DEBUG4K_ERROR("could have extended copy to next entry...\n");
17057 vm_map_unlock(map
);
17058 if (result
!= KERN_SUCCESS
) {
17060 * Free all allocated elements.
17062 for (src_entry
= map_header
->links
.next
;
17063 src_entry
!= CAST_TO_VM_MAP_ENTRY(&map_header
->links
);
17064 src_entry
= new_entry
) {
17065 new_entry
= src_entry
->vme_next
;
17066 _vm_map_store_entry_unlink(map_header
, src_entry
);
17067 if (src_entry
->is_sub_map
) {
17068 vm_map_deallocate(VME_SUBMAP(src_entry
));
17070 vm_object_deallocate(VME_OBJECT(src_entry
));
17072 _vm_map_entry_dispose(map_header
, src_entry
);
17082 return VM_MAP_IS_EXOTIC(map
);
17089 return VM_MAP_IS_ALIEN(map
);
17092 #if XNU_TARGET_OS_OSX
17098 map
->is_alien
= true;
17099 vm_map_unlock(map
);
17101 #endif /* XNU_TARGET_OS_OSX */
17103 void vm_map_copy_to_physcopy(vm_map_copy_t copy_map
, vm_map_t target_map
);
17105 vm_map_copy_to_physcopy(
17106 vm_map_copy_t copy_map
,
17107 vm_map_t target_map
)
17109 vm_map_size_t size
;
17110 vm_map_entry_t entry
;
17111 vm_map_entry_t new_entry
;
17112 vm_object_t new_object
;
17113 unsigned int pmap_flags
;
17116 vm_map_address_t src_start
, src_end
, src_cur
;
17117 vm_map_address_t dst_start
, dst_end
, dst_cur
;
17122 * Perform the equivalent of vm_allocate() and memcpy().
17123 * Replace the mappings in "copy_map" with the newly allocated mapping.
17125 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) BEFORE\n", copy_map
, copy_map
->cpy_hdr
.page_shift
, copy_map
->cpy_hdr
.nentries
, copy_map
->offset
, (uint64_t)copy_map
->size
);
17127 assert(copy_map
->cpy_hdr
.page_shift
!= VM_MAP_PAGE_MASK(target_map
));
17129 /* allocate new VM object */
17130 size
= VM_MAP_ROUND_PAGE(copy_map
->size
, PAGE_MASK
);
17131 new_object
= vm_object_allocate(size
);
17132 assert(new_object
);
17134 /* allocate new VM map entry */
17135 new_entry
= vm_map_copy_entry_create(copy_map
, FALSE
);
17138 /* finish initializing new VM map entry */
17139 new_entry
->protection
= VM_PROT_DEFAULT
;
17140 new_entry
->max_protection
= VM_PROT_DEFAULT
;
17141 new_entry
->use_pmap
= TRUE
;
17143 /* make new VM map entry point to new VM object */
17144 new_entry
->vme_start
= 0;
17145 new_entry
->vme_end
= size
;
17146 VME_OBJECT_SET(new_entry
, new_object
);
17147 VME_OFFSET_SET(new_entry
, 0);
17149 /* create a new pmap to map "copy_map" */
17151 assert(copy_map
->cpy_hdr
.page_shift
== FOURK_PAGE_SHIFT
);
17152 #if PMAP_CREATE_FORCE_4K_PAGES
17153 pmap_flags
|= PMAP_CREATE_FORCE_4K_PAGES
;
17154 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
17155 pmap_flags
|= PMAP_CREATE_64BIT
;
17156 new_pmap
= pmap_create_options(NULL
, (vm_map_size_t
)0, pmap_flags
);
17159 /* create a new pageable VM map to map "copy_map" */
17160 new_map
= vm_map_create(new_pmap
, 0, MACH_VM_MAX_ADDRESS
, TRUE
);
17162 vm_map_set_page_shift(new_map
, copy_map
->cpy_hdr
.page_shift
);
17164 /* map "copy_map" in the new VM map */
17166 kr
= vm_map_copyout_internal(
17171 FALSE
, /* consume_on_success */
17174 VM_INHERIT_DEFAULT
);
17175 assert(kr
== KERN_SUCCESS
);
17176 src_end
= src_start
+ copy_map
->size
;
17178 /* map "new_object" in the new VM map */
17179 vm_object_reference(new_object
);
17181 kr
= vm_map_enter(new_map
,
17186 VM_MAP_KERNEL_FLAGS_NONE
,
17187 VM_KERN_MEMORY_OSFMK
,
17190 FALSE
, /* needs copy */
17193 VM_INHERIT_DEFAULT
);
17194 assert(kr
== KERN_SUCCESS
);
17195 dst_end
= dst_start
+ size
;
17197 /* get a kernel buffer */
17198 kbuf
= kheap_alloc(KHEAP_TEMP
, PAGE_SIZE
, Z_WAITOK
);
17201 /* physically copy "copy_map" mappings to new VM object */
17202 for (src_cur
= src_start
, dst_cur
= dst_start
;
17204 src_cur
+= PAGE_SIZE
, dst_cur
+= PAGE_SIZE
) {
17208 if (src_cur
+ PAGE_SIZE
> src_end
) {
17209 /* partial copy for last page */
17210 bytes
= src_end
- src_cur
;
17211 assert(bytes
> 0 && bytes
< PAGE_SIZE
);
17212 /* rest of dst page should be zero-filled */
17214 /* get bytes from src mapping */
17215 kr
= copyinmap(new_map
, src_cur
, kbuf
, bytes
);
17216 if (kr
!= KERN_SUCCESS
) {
17217 DEBUG4K_COPY("copyinmap(%p, 0x%llx, %p, 0x%llx) kr 0x%x\n", new_map
, (uint64_t)src_cur
, kbuf
, (uint64_t)bytes
, kr
);
17219 /* put bytes in dst mapping */
17220 assert(dst_cur
< dst_end
);
17221 assert(dst_cur
+ bytes
<= dst_end
);
17222 kr
= copyoutmap(new_map
, kbuf
, dst_cur
, bytes
);
17223 if (kr
!= KERN_SUCCESS
) {
17224 DEBUG4K_COPY("copyoutmap(%p, %p, 0x%llx, 0x%llx) kr 0x%x\n", new_map
, kbuf
, (uint64_t)dst_cur
, (uint64_t)bytes
, kr
);
17228 /* free kernel buffer */
17229 kheap_free(KHEAP_TEMP
, kbuf
, PAGE_SIZE
);
17232 /* destroy new map */
17233 vm_map_destroy(new_map
, VM_MAP_REMOVE_NO_FLAGS
);
17234 new_map
= VM_MAP_NULL
;
17236 /* dispose of the old map entries in "copy_map" */
17237 while (vm_map_copy_first_entry(copy_map
) !=
17238 vm_map_copy_to_entry(copy_map
)) {
17239 entry
= vm_map_copy_first_entry(copy_map
);
17240 vm_map_copy_entry_unlink(copy_map
, entry
);
17241 if (entry
->is_sub_map
) {
17242 vm_map_deallocate(VME_SUBMAP(entry
));
17244 vm_object_deallocate(VME_OBJECT(entry
));
17246 vm_map_copy_entry_dispose(copy_map
, entry
);
17249 /* change "copy_map"'s page_size to match "target_map" */
17250 copy_map
->cpy_hdr
.page_shift
= VM_MAP_PAGE_SHIFT(target_map
);
17251 copy_map
->offset
= 0;
17252 copy_map
->size
= size
;
17254 /* insert new map entry in "copy_map" */
17255 assert(vm_map_copy_last_entry(copy_map
) == vm_map_copy_to_entry(copy_map
));
17256 vm_map_copy_entry_link(copy_map
, vm_map_copy_last_entry(copy_map
), new_entry
);
17258 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) AFTER\n", copy_map
, copy_map
->cpy_hdr
.page_shift
, copy_map
->cpy_hdr
.nentries
, copy_map
->offset
, (uint64_t)copy_map
->size
);
17262 vm_map_copy_adjust_get_target_copy_map(
17263 vm_map_copy_t copy_map
,
17264 vm_map_copy_t
*target_copy_map_p
);
17266 vm_map_copy_adjust_get_target_copy_map(
17267 vm_map_copy_t copy_map
,
17268 vm_map_copy_t
*target_copy_map_p
)
17270 vm_map_copy_t target_copy_map
;
17271 vm_map_entry_t entry
, target_entry
;
17273 if (*target_copy_map_p
!= VM_MAP_COPY_NULL
) {
17274 /* the caller already has a "target_copy_map": use it */
17278 /* the caller wants us to create a new copy of "copy_map" */
17279 target_copy_map
= vm_map_copy_allocate();
17280 target_copy_map
->type
= copy_map
->type
;
17281 assert(target_copy_map
->type
== VM_MAP_COPY_ENTRY_LIST
);
17282 target_copy_map
->offset
= copy_map
->offset
;
17283 target_copy_map
->size
= copy_map
->size
;
17284 target_copy_map
->cpy_hdr
.page_shift
= copy_map
->cpy_hdr
.page_shift
;
17285 vm_map_store_init(&target_copy_map
->cpy_hdr
);
17286 for (entry
= vm_map_copy_first_entry(copy_map
);
17287 entry
!= vm_map_copy_to_entry(copy_map
);
17288 entry
= entry
->vme_next
) {
17289 target_entry
= vm_map_copy_entry_create(target_copy_map
, FALSE
);
17290 vm_map_entry_copy_full(target_entry
, entry
);
17291 if (target_entry
->is_sub_map
) {
17292 vm_map_reference(VME_SUBMAP(target_entry
));
17294 vm_object_reference(VME_OBJECT(target_entry
));
17296 vm_map_copy_entry_link(
17298 vm_map_copy_last_entry(target_copy_map
),
17301 entry
= VM_MAP_ENTRY_NULL
;
17302 *target_copy_map_p
= target_copy_map
;
17307 vm_map_copy_t copy_map
,
17308 int new_page_shift
,
17309 vm_map_offset_t trim_start
,
17310 vm_map_offset_t trim_end
);
17313 vm_map_copy_t copy_map
,
17314 int new_page_shift
,
17315 vm_map_offset_t trim_start
,
17316 vm_map_offset_t trim_end
)
17318 int copy_page_shift
;
17319 vm_map_entry_t entry
, next_entry
;
17321 assert(copy_map
->type
== VM_MAP_COPY_ENTRY_LIST
);
17322 assert(copy_map
->cpy_hdr
.nentries
> 0);
17324 trim_start
+= vm_map_copy_first_entry(copy_map
)->vme_start
;
17325 trim_end
+= vm_map_copy_first_entry(copy_map
)->vme_start
;
17327 /* use the new page_shift to do the clipping */
17328 copy_page_shift
= VM_MAP_COPY_PAGE_SHIFT(copy_map
);
17329 copy_map
->cpy_hdr
.page_shift
= new_page_shift
;
17331 for (entry
= vm_map_copy_first_entry(copy_map
);
17332 entry
!= vm_map_copy_to_entry(copy_map
);
17333 entry
= next_entry
) {
17334 next_entry
= entry
->vme_next
;
17335 if (entry
->vme_end
<= trim_start
) {
17336 /* entry fully before trim range: skip */
17339 if (entry
->vme_start
>= trim_end
) {
17340 /* entry fully after trim range: done */
17343 /* clip entry if needed */
17344 vm_map_copy_clip_start(copy_map
, entry
, trim_start
);
17345 vm_map_copy_clip_end(copy_map
, entry
, trim_end
);
17346 /* dispose of entry */
17347 copy_map
->size
-= entry
->vme_end
- entry
->vme_start
;
17348 vm_map_copy_entry_unlink(copy_map
, entry
);
17349 if (entry
->is_sub_map
) {
17350 vm_map_deallocate(VME_SUBMAP(entry
));
17352 vm_object_deallocate(VME_OBJECT(entry
));
17354 vm_map_copy_entry_dispose(copy_map
, entry
);
17355 entry
= VM_MAP_ENTRY_NULL
;
17358 /* restore copy_map's original page_shift */
17359 copy_map
->cpy_hdr
.page_shift
= copy_page_shift
;
17363 * Make any necessary adjustments to "copy_map" to allow it to be
17364 * mapped into "target_map".
17365 * If no changes were necessary, "target_copy_map" points to the
17366 * untouched "copy_map".
17367 * If changes are necessary, changes will be made to "target_copy_map".
17368 * If "target_copy_map" was NULL, we create a new "vm_map_copy_t" and
17369 * copy the original "copy_map" to it before applying the changes.
17370 * The caller should discard "target_copy_map" if it's not the same as
17371 * the original "copy_map".
17373 /* TODO4K: also adjust to sub-range in the copy_map -> add start&end? */
17375 vm_map_copy_adjust_to_target(
17376 vm_map_copy_t src_copy_map
,
17377 vm_map_offset_t offset
,
17378 vm_map_size_t size
,
17379 vm_map_t target_map
,
17381 vm_map_copy_t
*target_copy_map_p
,
17382 vm_map_offset_t
*overmap_start_p
,
17383 vm_map_offset_t
*overmap_end_p
,
17384 vm_map_offset_t
*trimmed_start_p
)
17386 vm_map_copy_t copy_map
, target_copy_map
;
17387 vm_map_size_t target_size
;
17388 vm_map_size_t src_copy_map_size
;
17389 vm_map_size_t overmap_start
, overmap_end
;
17391 vm_map_entry_t entry
, target_entry
;
17392 vm_map_offset_t addr_adjustment
;
17393 vm_map_offset_t new_start
, new_end
;
17394 int copy_page_mask
, target_page_mask
;
17395 int copy_page_shift
, target_page_shift
;
17396 vm_map_offset_t trimmed_end
;
17399 * Assert that the vm_map_copy is coming from the right
17400 * zone and hasn't been forged
17402 vm_map_copy_require(src_copy_map
);
17403 assert(src_copy_map
->type
== VM_MAP_COPY_ENTRY_LIST
);
17406 * Start working with "src_copy_map" but we'll switch
17407 * to "target_copy_map" as soon as we start making adjustments.
17409 copy_map
= src_copy_map
;
17410 src_copy_map_size
= src_copy_map
->size
;
17412 copy_page_shift
= VM_MAP_COPY_PAGE_SHIFT(copy_map
);
17413 copy_page_mask
= VM_MAP_COPY_PAGE_MASK(copy_map
);
17414 target_page_shift
= VM_MAP_PAGE_SHIFT(target_map
);
17415 target_page_mask
= VM_MAP_PAGE_MASK(target_map
);
17417 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p...\n", copy_map
, copy_page_shift
, (uint64_t)copy_map
->offset
, (uint64_t)copy_map
->size
, target_map
, target_page_shift
, copy
, (uint64_t)offset
, (uint64_t)size
, *target_copy_map_p
);
17419 target_copy_map
= *target_copy_map_p
;
17420 if (target_copy_map
!= VM_MAP_COPY_NULL
) {
17421 vm_map_copy_require(target_copy_map
);
17424 if (offset
+ size
> copy_map
->size
) {
17425 DEBUG4K_ERROR("copy_map %p (%d->%d) copy_map->size 0x%llx offset 0x%llx size 0x%llx KERN_INVALID_ARGUMENT\n", copy_map
, copy_page_shift
, target_page_shift
, (uint64_t)copy_map
->size
, (uint64_t)offset
, (uint64_t)size
);
17426 return KERN_INVALID_ARGUMENT
;
17431 new_end
= VM_MAP_ROUND_PAGE(offset
+ size
, target_page_mask
);
17432 if (new_end
< copy_map
->size
) {
17433 trimmed_end
= src_copy_map_size
- new_end
;
17434 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim end from 0x%llx to 0x%llx\n", copy_map
, copy_page_shift
, target_page_shift
, copy
, (uint64_t)offset
, (uint64_t)size
, target_copy_map
, (uint64_t)new_end
, (uint64_t)copy_map
->size
);
17435 /* get "target_copy_map" if needed and adjust it */
17436 vm_map_copy_adjust_get_target_copy_map(copy_map
,
17438 copy_map
= target_copy_map
;
17439 vm_map_copy_trim(target_copy_map
, target_page_shift
,
17440 new_end
, copy_map
->size
);
17443 /* trim the start */
17444 new_start
= VM_MAP_TRUNC_PAGE(offset
, target_page_mask
);
17445 if (new_start
!= 0) {
17446 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim start from 0x%llx to 0x%llx\n", copy_map
, copy_page_shift
, target_page_shift
, copy
, (uint64_t)offset
, (uint64_t)size
, target_copy_map
, (uint64_t)0, (uint64_t)new_start
);
17447 /* get "target_copy_map" if needed and adjust it */
17448 vm_map_copy_adjust_get_target_copy_map(copy_map
,
17450 copy_map
= target_copy_map
;
17451 vm_map_copy_trim(target_copy_map
, target_page_shift
,
17454 *trimmed_start_p
= new_start
;
17456 /* target_size starts with what's left after trimming */
17457 target_size
= copy_map
->size
;
17458 assertf(target_size
== src_copy_map_size
- *trimmed_start_p
- trimmed_end
,
17459 "target_size 0x%llx src_copy_map_size 0x%llx trimmed_start 0x%llx trimmed_end 0x%llx\n",
17460 (uint64_t)target_size
, (uint64_t)src_copy_map_size
,
17461 (uint64_t)*trimmed_start_p
, (uint64_t)trimmed_end
);
17463 /* check for misalignments but don't adjust yet */
17467 if (copy_page_shift
< target_page_shift
) {
17469 * Remapping from 4K to 16K: check the VM object alignments
17470 * throughout the range.
17471 * If the start and end of the range are mis-aligned, we can
17472 * over-map to re-align, and adjust the "overmap" start/end
17473 * and "target_size" of the range accordingly.
17474 * If there is any mis-alignment within the range:
17476 * we can do immediate-copy instead of copy-on-write,
17478 * no way to remap and share; fail.
17480 for (entry
= vm_map_copy_first_entry(copy_map
);
17481 entry
!= vm_map_copy_to_entry(copy_map
);
17482 entry
= entry
->vme_next
) {
17483 vm_object_offset_t object_offset_start
, object_offset_end
;
17485 object_offset_start
= VME_OFFSET(entry
);
17486 object_offset_end
= object_offset_start
;
17487 object_offset_end
+= entry
->vme_end
- entry
->vme_start
;
17488 if (object_offset_start
& target_page_mask
) {
17489 if (entry
== vm_map_copy_first_entry(copy_map
) && !copy
) {
17495 if (object_offset_end
& target_page_mask
) {
17496 if (entry
->vme_next
== vm_map_copy_to_entry(copy_map
) && !copy
) {
17504 entry
= VM_MAP_ENTRY_NULL
;
17506 /* decide how to deal with misalignments */
17507 assert(overmap_start
<= 1);
17508 assert(overmap_end
<= 1);
17509 if (!overmap_start
&& !overmap_end
&& !misalignments
) {
17510 /* copy_map is properly aligned for target_map ... */
17511 if (*trimmed_start_p
) {
17512 /* ... but we trimmed it, so still need to adjust */
17514 /* ... and we didn't trim anything: we're done */
17515 if (target_copy_map
== VM_MAP_COPY_NULL
) {
17516 target_copy_map
= copy_map
;
17518 *target_copy_map_p
= target_copy_map
;
17519 *overmap_start_p
= 0;
17520 *overmap_end_p
= 0;
17521 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map
, copy_page_shift
, (uint64_t)copy_map
->offset
, (uint64_t)copy_map
->size
, target_map
, target_page_shift
, copy
, *target_copy_map_p
, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p
), (uint64_t)(*target_copy_map_p
)->offset
, (uint64_t)(*target_copy_map_p
)->size
, (uint64_t)*trimmed_start_p
, (uint64_t)*overmap_start_p
, (uint64_t)*overmap_end_p
);
17522 return KERN_SUCCESS
;
17524 } else if (misalignments
&& !copy
) {
17525 /* can't "share" if misaligned */
17526 DEBUG4K_ADJUST("unsupported sharing\n");
17528 if (debug4k_panic_on_misaligned_sharing
) {
17529 panic("DEBUG4k %s:%d unsupported sharing\n", __FUNCTION__
, __LINE__
);
17531 #endif /* MACH_ASSERT */
17532 DEBUG4K_ADJUST("copy_map %p (%d) target_map %p (%d) copy %d target_copy_map %p -> KERN_NOT_SUPPORTED\n", copy_map
, copy_page_shift
, target_map
, target_page_shift
, copy
, *target_copy_map_p
);
17533 return KERN_NOT_SUPPORTED
;
17535 /* can't virtual-copy if misaligned (but can physical-copy) */
17536 DEBUG4K_ADJUST("mis-aligned copying\n");
17539 /* get a "target_copy_map" if needed and switch to it */
17540 vm_map_copy_adjust_get_target_copy_map(copy_map
, &target_copy_map
);
17541 copy_map
= target_copy_map
;
17543 if (misalignments
&& copy
) {
17544 vm_map_size_t target_copy_map_size
;
17547 * Can't do copy-on-write with misaligned mappings.
17548 * Replace the mappings with a physical copy of the original
17549 * mappings' contents.
17551 target_copy_map_size
= target_copy_map
->size
;
17552 vm_map_copy_to_physcopy(target_copy_map
, target_map
);
17553 *target_copy_map_p
= target_copy_map
;
17554 *overmap_start_p
= 0;
17555 *overmap_end_p
= target_copy_map
->size
- target_copy_map_size
;
17556 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx)-> trimmed 0x%llx overmap start 0x%llx end 0x%llx PHYSCOPY\n", copy_map
, copy_page_shift
, (uint64_t)copy_map
->offset
, (uint64_t)copy_map
->size
, target_map
, target_page_shift
, copy
, *target_copy_map_p
, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p
), (uint64_t)(*target_copy_map_p
)->offset
, (uint64_t)(*target_copy_map_p
)->size
, (uint64_t)*trimmed_start_p
, (uint64_t)*overmap_start_p
, (uint64_t)*overmap_end_p
);
17557 return KERN_SUCCESS
;
17560 /* apply the adjustments */
17564 /* remove copy_map->offset, so that everything starts at offset 0 */
17565 addr_adjustment
= copy_map
->offset
;
17566 /* also remove whatever we trimmed from the start */
17567 addr_adjustment
+= *trimmed_start_p
;
17568 for (target_entry
= vm_map_copy_first_entry(target_copy_map
);
17569 target_entry
!= vm_map_copy_to_entry(target_copy_map
);
17570 target_entry
= target_entry
->vme_next
) {
17571 vm_object_offset_t object_offset_start
, object_offset_end
;
17573 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx BEFORE\n", target_copy_map
, VM_MAP_COPY_PAGE_SHIFT(target_copy_map
), target_copy_map
->offset
, (uint64_t)target_copy_map
->size
, target_entry
, (uint64_t)target_entry
->vme_start
, (uint64_t)target_entry
->vme_end
, VME_OBJECT(target_entry
), VME_OFFSET(target_entry
));
17574 object_offset_start
= VME_OFFSET(target_entry
);
17575 if (object_offset_start
& target_page_mask
) {
17576 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at start\n", target_copy_map
, VM_MAP_COPY_PAGE_SHIFT(target_copy_map
), target_copy_map
->offset
, (uint64_t)target_copy_map
->size
, target_entry
, (uint64_t)target_entry
->vme_start
, (uint64_t)target_entry
->vme_end
, VME_OBJECT(target_entry
), VME_OFFSET(target_entry
));
17577 if (target_entry
== vm_map_copy_first_entry(target_copy_map
)) {
17579 * start of 1st entry is mis-aligned:
17580 * re-adjust by over-mapping.
17582 overmap_start
= object_offset_start
- trunc_page_mask_64(object_offset_start
, target_page_mask
);
17583 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_start 0x%llx\n", target_entry
, VME_OFFSET(target_entry
), copy
, (uint64_t)overmap_start
);
17584 VME_OFFSET_SET(target_entry
, VME_OFFSET(target_entry
) - overmap_start
);
17587 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry
, VME_OFFSET(target_entry
), copy
, misalignments
);
17592 if (target_entry
== vm_map_copy_first_entry(target_copy_map
)) {
17593 target_size
+= overmap_start
;
17595 target_entry
->vme_start
+= overmap_start
;
17597 target_entry
->vme_end
+= overmap_start
;
17599 object_offset_end
= VME_OFFSET(target_entry
) + target_entry
->vme_end
- target_entry
->vme_start
;
17600 if (object_offset_end
& target_page_mask
) {
17601 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at end\n", target_copy_map
, VM_MAP_COPY_PAGE_SHIFT(target_copy_map
), target_copy_map
->offset
, (uint64_t)target_copy_map
->size
, target_entry
, (uint64_t)target_entry
->vme_start
, (uint64_t)target_entry
->vme_end
, VME_OBJECT(target_entry
), VME_OFFSET(target_entry
));
17602 if (target_entry
->vme_next
== vm_map_copy_to_entry(target_copy_map
)) {
17604 * end of last entry is mis-aligned: re-adjust by over-mapping.
17606 overmap_end
= round_page_mask_64(object_offset_end
, target_page_mask
) - object_offset_end
;
17607 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_end 0x%llx\n", target_entry
, VME_OFFSET(target_entry
), copy
, (uint64_t)overmap_end
);
17608 target_entry
->vme_end
+= overmap_end
;
17609 target_size
+= overmap_end
;
17612 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry
, VME_OFFSET(target_entry
), copy
, misalignments
);
17616 target_entry
->vme_start
-= addr_adjustment
;
17617 target_entry
->vme_end
-= addr_adjustment
;
17618 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx AFTER\n", target_copy_map
, VM_MAP_COPY_PAGE_SHIFT(target_copy_map
), target_copy_map
->offset
, (uint64_t)target_copy_map
->size
, target_entry
, (uint64_t)target_entry
->vme_start
, (uint64_t)target_entry
->vme_end
, VME_OBJECT(target_entry
), VME_OFFSET(target_entry
));
17621 target_copy_map
->size
= target_size
;
17622 target_copy_map
->offset
+= overmap_start
;
17623 target_copy_map
->offset
-= addr_adjustment
;
17624 target_copy_map
->cpy_hdr
.page_shift
= target_page_shift
;
17626 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->size, target_page_mask));
17627 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->offset, FOURK_PAGE_MASK));
17628 assert(overmap_start
< VM_MAP_PAGE_SIZE(target_map
));
17629 assert(overmap_end
< VM_MAP_PAGE_SIZE(target_map
));
17631 *target_copy_map_p
= target_copy_map
;
17632 *overmap_start_p
= overmap_start
;
17633 *overmap_end_p
= overmap_end
;
17635 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map
, copy_page_shift
, (uint64_t)copy_map
->offset
, (uint64_t)copy_map
->size
, target_map
, target_page_shift
, copy
, *target_copy_map_p
, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p
), (uint64_t)(*target_copy_map_p
)->offset
, (uint64_t)(*target_copy_map_p
)->size
, (uint64_t)*trimmed_start_p
, (uint64_t)*overmap_start_p
, (uint64_t)*overmap_end_p
);
17636 return KERN_SUCCESS
;
17640 vm_map_range_physical_size(
17642 vm_map_address_t start
,
17643 mach_vm_size_t size
,
17644 mach_vm_size_t
* phys_size
)
17647 vm_map_copy_t copy_map
, target_copy_map
;
17648 vm_map_offset_t adjusted_start
, adjusted_end
;
17649 vm_map_size_t adjusted_size
;
17650 vm_prot_t cur_prot
, max_prot
;
17651 vm_map_offset_t overmap_start
, overmap_end
, trimmed_start
;
17652 vm_map_kernel_flags_t vmk_flags
;
17654 adjusted_start
= vm_map_trunc_page(start
, VM_MAP_PAGE_MASK(map
));
17655 adjusted_end
= vm_map_round_page(start
+ size
, VM_MAP_PAGE_MASK(map
));
17656 adjusted_size
= adjusted_end
- adjusted_start
;
17657 *phys_size
= adjusted_size
;
17658 if (VM_MAP_PAGE_SIZE(map
) == PAGE_SIZE
) {
17659 return KERN_SUCCESS
;
17662 adjusted_start
= vm_map_trunc_page(start
, PAGE_MASK
);
17663 adjusted_end
= vm_map_round_page(start
+ size
, PAGE_MASK
);
17664 adjusted_size
= adjusted_end
- adjusted_start
;
17665 *phys_size
= adjusted_size
;
17666 return KERN_SUCCESS
;
17668 if (adjusted_size
== 0) {
17669 DEBUG4K_SHARE("map %p start 0x%llx size 0x%llx adjusted 0x%llx -> phys_size 0!\n", map
, (uint64_t)start
, (uint64_t)size
, (uint64_t)adjusted_size
);
17671 return KERN_SUCCESS
;
17674 vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
17675 vmk_flags
.vmkf_copy_pageable
= TRUE
;
17676 vmk_flags
.vmkf_copy_same_map
= TRUE
;
17677 assert(adjusted_size
!= 0);
17678 kr
= vm_map_copy_extract(map
, adjusted_start
, adjusted_size
,
17679 VM_PROT_NONE
, /* required_protection: no check here */
17682 &cur_prot
, &max_prot
, VM_INHERIT_DEFAULT
,
17684 if (kr
!= KERN_SUCCESS
) {
17685 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map
, (uint64_t)start
, (uint64_t)adjusted_start
, size
, (uint64_t)adjusted_size
, kr
);
17690 assert(copy_map
!= VM_MAP_COPY_NULL
);
17691 target_copy_map
= copy_map
;
17692 DEBUG4K_ADJUST("adjusting...\n");
17693 kr
= vm_map_copy_adjust_to_target(
17695 start
- adjusted_start
, /* offset */
17703 if (kr
== KERN_SUCCESS
) {
17704 if (target_copy_map
->size
!= *phys_size
) {
17705 DEBUG4K_ADJUST("map %p (%d) start 0x%llx size 0x%llx adjusted_start 0x%llx adjusted_end 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx phys_size 0x%llx -> 0x%llx\n", map
, VM_MAP_PAGE_SHIFT(map
), (uint64_t)start
, (uint64_t)size
, (uint64_t)adjusted_start
, (uint64_t)adjusted_end
, (uint64_t)overmap_start
, (uint64_t)overmap_end
, (uint64_t)trimmed_start
, (uint64_t)*phys_size
, (uint64_t)target_copy_map
->size
);
17707 *phys_size
= target_copy_map
->size
;
17709 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map
, (uint64_t)start
, (uint64_t)adjusted_start
, size
, (uint64_t)adjusted_size
, kr
);
17713 vm_map_copy_discard(copy_map
);
17714 copy_map
= VM_MAP_COPY_NULL
;
17721 memory_entry_check_for_adjustment(
17724 vm_map_offset_t
*overmap_start
,
17725 vm_map_offset_t
*overmap_end
)
17727 kern_return_t kr
= KERN_SUCCESS
;
17728 vm_map_copy_t copy_map
= VM_MAP_COPY_NULL
, target_copy_map
= VM_MAP_COPY_NULL
;
17731 assertf(ip_kotype(port
) == IKOT_NAMED_ENTRY
, "Port Type expected: %d...received:%d\n", IKOT_NAMED_ENTRY
, ip_kotype(port
));
17733 vm_named_entry_t named_entry
;
17735 named_entry
= (vm_named_entry_t
) port
->ip_kobject
;
17736 named_entry_lock(named_entry
);
17737 copy_map
= named_entry
->backing
.copy
;
17738 target_copy_map
= copy_map
;
17740 if (src_map
&& VM_MAP_PAGE_SHIFT(src_map
) < PAGE_SHIFT
) {
17741 vm_map_offset_t trimmed_start
;
17744 DEBUG4K_ADJUST("adjusting...\n");
17745 kr
= vm_map_copy_adjust_to_target(
17748 copy_map
->size
, /* size */
17755 assert(trimmed_start
== 0);
17757 named_entry_unlock(named_entry
);
17764 * Routine: vm_remap
17766 * Map portion of a task's address space.
17767 * Mapped region must not overlap more than
17768 * one vm memory object. Protections and
17769 * inheritance attributes remain the same
17770 * as in the original task and are out parameters.
17771 * Source and Target task can be identical
17772 * Other attributes are identical as for vm_map()
17776 vm_map_t target_map
,
17777 vm_map_address_t
*address
,
17778 vm_map_size_t size
,
17779 vm_map_offset_t mask
,
17781 vm_map_kernel_flags_t vmk_flags
,
17784 vm_map_offset_t memory_address
,
17786 vm_prot_t
*cur_protection
,
17787 vm_prot_t
*max_protection
,
17788 vm_inherit_t inheritance
)
17790 kern_return_t result
;
17791 vm_map_entry_t entry
;
17792 vm_map_entry_t insp_entry
= VM_MAP_ENTRY_NULL
;
17793 vm_map_entry_t new_entry
;
17794 vm_map_copy_t copy_map
;
17795 vm_map_offset_t offset_in_mapping
;
17796 vm_map_size_t target_size
= 0;
17797 vm_map_size_t src_page_mask
, target_page_mask
;
17798 vm_map_offset_t overmap_start
, overmap_end
, trimmed_start
;
17799 vm_map_offset_t initial_memory_address
;
17800 vm_map_size_t initial_size
;
17802 if (target_map
== VM_MAP_NULL
) {
17803 return KERN_INVALID_ARGUMENT
;
17806 initial_memory_address
= memory_address
;
17807 initial_size
= size
;
17808 src_page_mask
= VM_MAP_PAGE_MASK(src_map
);
17809 target_page_mask
= VM_MAP_PAGE_MASK(target_map
);
17811 switch (inheritance
) {
17812 case VM_INHERIT_NONE
:
17813 case VM_INHERIT_COPY
:
17814 case VM_INHERIT_SHARE
:
17815 if (size
!= 0 && src_map
!= VM_MAP_NULL
) {
17820 return KERN_INVALID_ARGUMENT
;
17823 if (src_page_mask
!= target_page_mask
) {
17825 DEBUG4K_COPY("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map
, VM_MAP_PAGE_SIZE(src_map
), (uint64_t)memory_address
, (uint64_t)size
, copy
, target_map
, VM_MAP_PAGE_SIZE(target_map
));
17827 DEBUG4K_SHARE("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map
, VM_MAP_PAGE_SIZE(src_map
), (uint64_t)memory_address
, (uint64_t)size
, copy
, target_map
, VM_MAP_PAGE_SIZE(target_map
));
17832 * If the user is requesting that we return the address of the
17833 * first byte of the data (rather than the base of the page),
17834 * then we use different rounding semantics: specifically,
17835 * we assume that (memory_address, size) describes a region
17836 * all of whose pages we must cover, rather than a base to be truncated
17837 * down and a size to be added to that base. So we figure out
17838 * the highest page that the requested region includes and make
17839 * sure that the size will cover it.
17841 * The key example we're worried about it is of the form:
17843 * memory_address = 0x1ff0, size = 0x20
17845 * With the old semantics, we round down the memory_address to 0x1000
17846 * and round up the size to 0x1000, resulting in our covering *only*
17847 * page 0x1000. With the new semantics, we'd realize that the region covers
17848 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
17849 * 0x1000 and page 0x2000 in the region we remap.
17851 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
17852 vm_map_offset_t range_start
, range_end
;
17854 range_start
= vm_map_trunc_page(memory_address
, src_page_mask
);
17855 range_end
= vm_map_round_page(memory_address
+ size
, src_page_mask
);
17856 memory_address
= range_start
;
17857 size
= range_end
- range_start
;
17858 offset_in_mapping
= initial_memory_address
- memory_address
;
17862 * This legacy code path is broken: for the range mentioned
17863 * above [ memory_address = 0x1ff0,size = 0x20 ], which spans
17864 * two 4k pages, it yields [ memory_address = 0x1000,
17865 * size = 0x1000 ], which covers only the first 4k page.
17866 * BUT some code unfortunately depends on this bug, so we
17867 * can't fix it without breaking something.
17868 * New code should get automatically opted in the new
17869 * behavior with the new VM_FLAGS_RETURN_DATA_ADDR flags.
17871 offset_in_mapping
= 0;
17872 memory_address
= vm_map_trunc_page(memory_address
, src_page_mask
);
17873 size
= vm_map_round_page(size
, src_page_mask
);
17874 initial_memory_address
= memory_address
;
17875 initial_size
= size
;
17880 return KERN_INVALID_ARGUMENT
;
17883 if (flags
& VM_FLAGS_RESILIENT_MEDIA
) {
17884 /* must be copy-on-write to be "media resilient" */
17886 return KERN_INVALID_ARGUMENT
;
17890 vmk_flags
.vmkf_copy_pageable
= target_map
->hdr
.entries_pageable
;
17891 vmk_flags
.vmkf_copy_same_map
= (src_map
== target_map
);
17894 result
= vm_map_copy_extract(src_map
,
17897 VM_PROT_NONE
, /* required_protection: no check here */
17903 if (result
!= KERN_SUCCESS
) {
17906 assert(copy_map
!= VM_MAP_COPY_NULL
);
17911 target_size
= size
;
17912 if (src_page_mask
!= target_page_mask
) {
17913 vm_map_copy_t target_copy_map
;
17915 target_copy_map
= copy_map
; /* can modify "copy_map" itself */
17916 DEBUG4K_ADJUST("adjusting...\n");
17917 result
= vm_map_copy_adjust_to_target(
17919 offset_in_mapping
, /* offset */
17927 if (result
!= KERN_SUCCESS
) {
17928 DEBUG4K_COPY("failed to adjust 0x%x\n", result
);
17929 vm_map_copy_discard(copy_map
);
17932 if (trimmed_start
== 0) {
17933 /* nothing trimmed: no adjustment needed */
17934 } else if (trimmed_start
>= offset_in_mapping
) {
17935 /* trimmed more than offset_in_mapping: nothing left */
17936 assert(overmap_start
== 0);
17937 assert(overmap_end
== 0);
17938 offset_in_mapping
= 0;
17940 /* trimmed some of offset_in_mapping: adjust */
17941 assert(overmap_start
== 0);
17942 assert(overmap_end
== 0);
17943 offset_in_mapping
-= trimmed_start
;
17945 offset_in_mapping
+= overmap_start
;
17946 target_size
= target_copy_map
->size
;
17950 * Allocate/check a range of free virtual address
17951 * space for the target
17953 *address
= vm_map_trunc_page(*address
, target_page_mask
);
17954 vm_map_lock(target_map
);
17955 target_size
= vm_map_round_page(target_size
, target_page_mask
);
17956 result
= vm_map_remap_range_allocate(target_map
, address
,
17958 mask
, flags
, vmk_flags
, tag
,
17961 for (entry
= vm_map_copy_first_entry(copy_map
);
17962 entry
!= vm_map_copy_to_entry(copy_map
);
17963 entry
= new_entry
) {
17964 new_entry
= entry
->vme_next
;
17965 vm_map_copy_entry_unlink(copy_map
, entry
);
17966 if (result
== KERN_SUCCESS
) {
17967 if (flags
& VM_FLAGS_RESILIENT_CODESIGN
) {
17968 /* no codesigning -> read-only access */
17969 entry
->max_protection
= VM_PROT_READ
;
17970 entry
->protection
= VM_PROT_READ
;
17971 entry
->vme_resilient_codesign
= TRUE
;
17973 entry
->vme_start
+= *address
;
17974 entry
->vme_end
+= *address
;
17975 assert(!entry
->map_aligned
);
17976 if ((flags
& VM_FLAGS_RESILIENT_MEDIA
) &&
17977 !entry
->is_sub_map
&&
17978 (VME_OBJECT(entry
) == VM_OBJECT_NULL
||
17979 VME_OBJECT(entry
)->internal
)) {
17980 entry
->vme_resilient_media
= TRUE
;
17982 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_start
, MIN(target_page_mask
, PAGE_MASK
)));
17983 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_end
, MIN(target_page_mask
, PAGE_MASK
)));
17984 assert(VM_MAP_PAGE_ALIGNED(VME_OFFSET(entry
), MIN(target_page_mask
, PAGE_MASK
)));
17985 vm_map_store_entry_link(target_map
, insp_entry
, entry
,
17987 insp_entry
= entry
;
17989 if (!entry
->is_sub_map
) {
17990 vm_object_deallocate(VME_OBJECT(entry
));
17992 vm_map_deallocate(VME_SUBMAP(entry
));
17994 vm_map_copy_entry_dispose(copy_map
, entry
);
17998 if (flags
& VM_FLAGS_RESILIENT_CODESIGN
) {
17999 *cur_protection
= VM_PROT_READ
;
18000 *max_protection
= VM_PROT_READ
;
18003 if (target_map
->disable_vmentry_reuse
== TRUE
) {
18004 assert(!target_map
->is_nested_map
);
18005 if (target_map
->highest_entry_end
< insp_entry
->vme_end
) {
18006 target_map
->highest_entry_end
= insp_entry
->vme_end
;
18010 if (result
== KERN_SUCCESS
) {
18011 target_map
->size
+= target_size
;
18012 SAVE_HINT_MAP_WRITE(target_map
, insp_entry
);
18015 if (*max_protection
& VM_PROT_EXECUTE
) {
18016 vm_map_address_t region_start
= 0, region_size
= 0;
18017 struct pmap_cs_code_directory
*region_cd
= NULL
;
18018 vm_map_address_t base
= 0;
18019 struct pmap_cs_lookup_results results
= {};
18020 vm_map_size_t page_addr
= vm_map_trunc_page(memory_address
, PAGE_MASK
);
18021 vm_map_size_t assoc_size
= vm_map_round_page(memory_address
+ size
- page_addr
, PAGE_MASK
);
18023 pmap_cs_lookup(src_map
->pmap
, memory_address
, &results
);
18024 region_size
= results
.region_size
;
18025 region_start
= results
.region_start
;
18026 region_cd
= results
.region_cd_entry
;
18027 base
= results
.base
;
18029 if (region_cd
!= NULL
&& (page_addr
!= region_start
|| assoc_size
!= region_size
)) {
18030 *cur_protection
= VM_PROT_READ
;
18031 *max_protection
= VM_PROT_READ
;
18032 printf("mismatched remap of executable range 0x%llx-0x%llx to 0x%llx, "
18033 "region_start 0x%llx, region_size 0x%llx, cd_entry %sNULL, making non-executable.\n",
18034 page_addr
, page_addr
+ assoc_size
, *address
,
18035 region_start
, region_size
,
18036 region_cd
!= NULL
? "not " : "" // Don't leak kernel slide
18042 vm_map_unlock(target_map
);
18044 if (result
== KERN_SUCCESS
&& target_map
->wiring_required
) {
18045 result
= vm_map_wire_kernel(target_map
, *address
,
18046 *address
+ size
, *cur_protection
, VM_KERN_MEMORY_MLOCK
,
18051 * If requested, return the address of the data pointed to by the
18052 * request, rather than the base of the resulting page.
18054 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
18055 *address
+= offset_in_mapping
;
18058 if (src_page_mask
!= target_page_mask
) {
18059 DEBUG4K_SHARE("vm_remap(%p 0x%llx 0x%llx copy=%d-> %p 0x%llx 0x%llx result=0x%x\n", src_map
, (uint64_t)memory_address
, (uint64_t)size
, copy
, target_map
, (uint64_t)*address
, (uint64_t)offset_in_mapping
, result
);
18061 vm_map_copy_discard(copy_map
);
18062 copy_map
= VM_MAP_COPY_NULL
;
18068 * Routine: vm_map_remap_range_allocate
18071 * Allocate a range in the specified virtual address map.
18072 * returns the address and the map entry just before the allocated
18075 * Map must be locked.
18078 static kern_return_t
18079 vm_map_remap_range_allocate(
18081 vm_map_address_t
*address
, /* IN/OUT */
18082 vm_map_size_t size
,
18083 vm_map_offset_t mask
,
18085 vm_map_kernel_flags_t vmk_flags
,
18086 __unused vm_tag_t tag
,
18087 vm_map_entry_t
*map_entry
) /* OUT */
18089 vm_map_entry_t entry
;
18090 vm_map_offset_t start
;
18091 vm_map_offset_t end
;
18092 vm_map_offset_t desired_empty_end
;
18094 vm_map_entry_t hole_entry
;
18100 if (flags
& VM_FLAGS_ANYWHERE
) {
18101 if (flags
& VM_FLAGS_RANDOM_ADDR
) {
18103 * Get a random start address.
18105 kr
= vm_map_random_address_for_size(map
, address
, size
);
18106 if (kr
!= KERN_SUCCESS
) {
18113 * Calculate the first possible address.
18116 if (start
< map
->min_offset
) {
18117 start
= map
->min_offset
;
18119 if (start
> map
->max_offset
) {
18120 return KERN_NO_SPACE
;
18124 * Look for the first possible address;
18125 * if there's already something at this
18126 * address, we have to start after it.
18129 if (map
->disable_vmentry_reuse
== TRUE
) {
18130 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
18132 if (map
->holelistenabled
) {
18133 hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
18135 if (hole_entry
== NULL
) {
18137 * No more space in the map?
18139 return KERN_NO_SPACE
;
18141 boolean_t found_hole
= FALSE
;
18144 if (hole_entry
->vme_start
>= start
) {
18145 start
= hole_entry
->vme_start
;
18150 if (hole_entry
->vme_end
> start
) {
18154 hole_entry
= hole_entry
->vme_next
;
18155 } while (hole_entry
!= CAST_TO_VM_MAP_ENTRY(map
->holes_list
));
18157 if (found_hole
== FALSE
) {
18158 return KERN_NO_SPACE
;
18161 entry
= hole_entry
;
18164 assert(first_free_is_valid(map
));
18165 if (start
== map
->min_offset
) {
18166 if ((entry
= map
->first_free
) != vm_map_to_entry(map
)) {
18167 start
= entry
->vme_end
;
18170 vm_map_entry_t tmp_entry
;
18171 if (vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
18172 start
= tmp_entry
->vme_end
;
18177 start
= vm_map_round_page(start
,
18178 VM_MAP_PAGE_MASK(map
));
18182 * In any case, the "entry" always precedes
18183 * the proposed new region throughout the
18188 vm_map_entry_t next
;
18191 * Find the end of the proposed new region.
18192 * Be sure we didn't go beyond the end, or
18193 * wrap around the address.
18196 end
= ((start
+ mask
) & ~mask
);
18197 end
= vm_map_round_page(end
,
18198 VM_MAP_PAGE_MASK(map
));
18200 return KERN_NO_SPACE
;
18205 /* We want an entire page of empty space, but don't increase the allocation size. */
18206 desired_empty_end
= vm_map_round_page(end
, VM_MAP_PAGE_MASK(map
));
18208 if ((desired_empty_end
> map
->max_offset
) || (desired_empty_end
< start
)) {
18209 if (map
->wait_for_space
) {
18210 if (size
<= (map
->max_offset
-
18211 map
->min_offset
)) {
18212 assert_wait((event_t
) map
, THREAD_INTERRUPTIBLE
);
18213 vm_map_unlock(map
);
18214 thread_block(THREAD_CONTINUE_NULL
);
18220 return KERN_NO_SPACE
;
18223 next
= entry
->vme_next
;
18225 if (map
->holelistenabled
) {
18226 if (entry
->vme_end
>= desired_empty_end
) {
18231 * If there are no more entries, we must win.
18235 * If there is another entry, it must be
18236 * after the end of the potential new region.
18239 if (next
== vm_map_to_entry(map
)) {
18243 if (next
->vme_start
>= desired_empty_end
) {
18249 * Didn't fit -- move to the next entry.
18254 if (map
->holelistenabled
) {
18255 if (entry
== CAST_TO_VM_MAP_ENTRY(map
->holes_list
)) {
18259 return KERN_NO_SPACE
;
18261 start
= entry
->vme_start
;
18263 start
= entry
->vme_end
;
18267 if (map
->holelistenabled
) {
18268 if (vm_map_lookup_entry(map
, entry
->vme_start
, &entry
)) {
18269 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry
, (unsigned long long)entry
->vme_start
);
18275 vm_map_entry_t temp_entry
;
18279 * the address doesn't itself violate
18280 * the mask requirement.
18283 if ((start
& mask
) != 0) {
18284 return KERN_NO_SPACE
;
18289 * ... the address is within bounds
18292 end
= start
+ size
;
18294 if ((start
< map
->min_offset
) ||
18295 (end
> map
->max_offset
) ||
18297 return KERN_INVALID_ADDRESS
;
18301 * If we're asked to overwrite whatever was mapped in that
18302 * range, first deallocate that range.
18304 if (flags
& VM_FLAGS_OVERWRITE
) {
18306 int remove_flags
= VM_MAP_REMOVE_SAVE_ENTRIES
| VM_MAP_REMOVE_NO_MAP_ALIGN
;
18309 * We use a "zap_map" to avoid having to unlock
18310 * the "map" in vm_map_delete(), which would compromise
18311 * the atomicity of the "deallocate" and then "remap"
18314 zap_map
= vm_map_create(PMAP_NULL
,
18317 map
->hdr
.entries_pageable
);
18318 if (zap_map
== VM_MAP_NULL
) {
18319 return KERN_RESOURCE_SHORTAGE
;
18321 vm_map_set_page_shift(zap_map
, VM_MAP_PAGE_SHIFT(map
));
18322 vm_map_disable_hole_optimization(zap_map
);
18324 if (vmk_flags
.vmkf_overwrite_immutable
) {
18325 remove_flags
|= VM_MAP_REMOVE_IMMUTABLE
;
18327 kr
= vm_map_delete(map
, start
, end
,
18330 if (kr
== KERN_SUCCESS
) {
18331 vm_map_destroy(zap_map
,
18332 VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
18333 zap_map
= VM_MAP_NULL
;
18338 * ... the starting address isn't allocated
18341 if (vm_map_lookup_entry(map
, start
, &temp_entry
)) {
18342 return KERN_NO_SPACE
;
18345 entry
= temp_entry
;
18348 * ... the next region doesn't overlap the
18352 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
18353 (entry
->vme_next
->vme_start
< end
)) {
18354 return KERN_NO_SPACE
;
18357 *map_entry
= entry
;
18358 return KERN_SUCCESS
;
18364 * Set the address map for the current thread to the specified map
18372 thread_t thread
= current_thread();
18373 vm_map_t oldmap
= thread
->map
;
18375 mp_disable_preemption();
18376 mycpu
= cpu_number();
18379 * Deactivate the current map and activate the requested map
18381 PMAP_SWITCH_USER(thread
, map
, mycpu
);
18383 mp_enable_preemption();
18389 * Routine: vm_map_write_user
18392 * Copy out data from a kernel space into space in the
18393 * destination map. The space must already exist in the
18395 * NOTE: This routine should only be called by threads
18396 * which can block on a page fault. i.e. kernel mode user
18404 vm_map_address_t dst_addr
,
18407 kern_return_t kr
= KERN_SUCCESS
;
18409 if (current_map() == map
) {
18410 if (copyout(src_p
, dst_addr
, size
)) {
18411 kr
= KERN_INVALID_ADDRESS
;
18416 /* take on the identity of the target map while doing */
18419 vm_map_reference(map
);
18420 oldmap
= vm_map_switch(map
);
18421 if (copyout(src_p
, dst_addr
, size
)) {
18422 kr
= KERN_INVALID_ADDRESS
;
18424 vm_map_switch(oldmap
);
18425 vm_map_deallocate(map
);
18431 * Routine: vm_map_read_user
18434 * Copy in data from a user space source map into the
18435 * kernel map. The space must already exist in the
18437 * NOTE: This routine should only be called by threads
18438 * which can block on a page fault. i.e. kernel mode user
18445 vm_map_address_t src_addr
,
18449 kern_return_t kr
= KERN_SUCCESS
;
18451 if (current_map() == map
) {
18452 if (copyin(src_addr
, dst_p
, size
)) {
18453 kr
= KERN_INVALID_ADDRESS
;
18458 /* take on the identity of the target map while doing */
18461 vm_map_reference(map
);
18462 oldmap
= vm_map_switch(map
);
18463 if (copyin(src_addr
, dst_p
, size
)) {
18464 kr
= KERN_INVALID_ADDRESS
;
18466 vm_map_switch(oldmap
);
18467 vm_map_deallocate(map
);
18474 * vm_map_check_protection:
18476 * Assert that the target map allows the specified
18477 * privilege on the entire address region given.
18478 * The entire region must be allocated.
18481 vm_map_check_protection(vm_map_t map
, vm_map_offset_t start
,
18482 vm_map_offset_t end
, vm_prot_t protection
)
18484 vm_map_entry_t entry
;
18485 vm_map_entry_t tmp_entry
;
18489 if (start
< vm_map_min(map
) || end
> vm_map_max(map
) || start
> end
) {
18490 vm_map_unlock(map
);
18494 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
18495 vm_map_unlock(map
);
18501 while (start
< end
) {
18502 if (entry
== vm_map_to_entry(map
)) {
18503 vm_map_unlock(map
);
18508 * No holes allowed!
18511 if (start
< entry
->vme_start
) {
18512 vm_map_unlock(map
);
18517 * Check protection associated with entry.
18520 if ((entry
->protection
& protection
) != protection
) {
18521 vm_map_unlock(map
);
18525 /* go to next entry */
18527 start
= entry
->vme_end
;
18528 entry
= entry
->vme_next
;
18530 vm_map_unlock(map
);
18535 vm_map_purgable_control(
18537 vm_map_offset_t address
,
18538 vm_purgable_t control
,
18541 vm_map_entry_t entry
;
18542 vm_object_t object
;
18544 boolean_t was_nonvolatile
;
18547 * Vet all the input parameters and current type and state of the
18548 * underlaying object. Return with an error if anything is amiss.
18550 if (map
== VM_MAP_NULL
) {
18551 return KERN_INVALID_ARGUMENT
;
18554 if (control
!= VM_PURGABLE_SET_STATE
&&
18555 control
!= VM_PURGABLE_GET_STATE
&&
18556 control
!= VM_PURGABLE_PURGE_ALL
&&
18557 control
!= VM_PURGABLE_SET_STATE_FROM_KERNEL
) {
18558 return KERN_INVALID_ARGUMENT
;
18561 if (control
== VM_PURGABLE_PURGE_ALL
) {
18562 vm_purgeable_object_purge_all();
18563 return KERN_SUCCESS
;
18566 if ((control
== VM_PURGABLE_SET_STATE
||
18567 control
== VM_PURGABLE_SET_STATE_FROM_KERNEL
) &&
18568 (((*state
& ~(VM_PURGABLE_ALL_MASKS
)) != 0) ||
18569 ((*state
& VM_PURGABLE_STATE_MASK
) > VM_PURGABLE_STATE_MASK
))) {
18570 return KERN_INVALID_ARGUMENT
;
18573 vm_map_lock_read(map
);
18575 if (!vm_map_lookup_entry(map
, address
, &entry
) || entry
->is_sub_map
) {
18577 * Must pass a valid non-submap address.
18579 vm_map_unlock_read(map
);
18580 return KERN_INVALID_ADDRESS
;
18583 if ((entry
->protection
& VM_PROT_WRITE
) == 0) {
18585 * Can't apply purgable controls to something you can't write.
18587 vm_map_unlock_read(map
);
18588 return KERN_PROTECTION_FAILURE
;
18591 object
= VME_OBJECT(entry
);
18592 if (object
== VM_OBJECT_NULL
||
18593 object
->purgable
== VM_PURGABLE_DENY
) {
18595 * Object must already be present and be purgeable.
18597 vm_map_unlock_read(map
);
18598 return KERN_INVALID_ARGUMENT
;
18601 vm_object_lock(object
);
18604 if (VME_OFFSET(entry
) != 0 ||
18605 entry
->vme_end
- entry
->vme_start
!= object
->vo_size
) {
18607 * Can only apply purgable controls to the whole (existing)
18610 vm_map_unlock_read(map
);
18611 vm_object_unlock(object
);
18612 return KERN_INVALID_ARGUMENT
;
18616 assert(!entry
->is_sub_map
);
18617 assert(!entry
->use_pmap
); /* purgeable has its own accounting */
18619 vm_map_unlock_read(map
);
18621 was_nonvolatile
= (object
->purgable
== VM_PURGABLE_NONVOLATILE
);
18623 kr
= vm_object_purgable_control(object
, control
, state
);
18625 if (was_nonvolatile
&&
18626 object
->purgable
!= VM_PURGABLE_NONVOLATILE
&&
18627 map
->pmap
== kernel_pmap
) {
18629 object
->vo_purgeable_volatilizer
= kernel_task
;
18633 vm_object_unlock(object
);
18639 vm_map_footprint_query_page_info(
18641 vm_map_entry_t map_entry
,
18642 vm_map_offset_t curr_s_offset
,
18643 int *disposition_p
)
18646 vm_object_t object
;
18648 int effective_page_size
;
18650 vm_map_lock_assert_held(map
);
18651 assert(!map
->has_corpse_footprint
);
18652 assert(curr_s_offset
>= map_entry
->vme_start
);
18653 assert(curr_s_offset
< map_entry
->vme_end
);
18655 object
= VME_OBJECT(map_entry
);
18656 if (object
== VM_OBJECT_NULL
) {
18657 *disposition_p
= 0;
18661 effective_page_size
= MIN(PAGE_SIZE
, VM_MAP_PAGE_SIZE(map
));
18664 if (object
== VM_OBJECT_NULL
) {
18665 /* nothing mapped here: no need to ask */
18666 *disposition_p
= 0;
18668 } else if (map_entry
->is_sub_map
&&
18669 !map_entry
->use_pmap
) {
18670 /* nested pmap: no footprint */
18671 *disposition_p
= 0;
18678 pmap_query_page_info(map
->pmap
, curr_s_offset
, &pmap_disp
);
18681 * Compute this page's disposition.
18685 /* deal with "alternate accounting" first */
18686 if (!map_entry
->is_sub_map
&&
18687 object
->vo_no_footprint
) {
18688 /* does not count in footprint */
18689 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18690 } else if (!map_entry
->is_sub_map
&&
18691 (object
->purgable
== VM_PURGABLE_NONVOLATILE
||
18692 (object
->purgable
== VM_PURGABLE_DENY
&&
18693 object
->vo_ledger_tag
)) &&
18694 VM_OBJECT_OWNER(object
) != NULL
&&
18695 VM_OBJECT_OWNER(object
)->map
== map
) {
18696 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18697 if ((((curr_s_offset
18698 - map_entry
->vme_start
18699 + VME_OFFSET(map_entry
))
18700 / effective_page_size
) <
18701 (object
->resident_page_count
+
18702 vm_compressor_pager_get_count(object
->pager
)))) {
18704 * Non-volatile purgeable object owned
18705 * by this task: report the first
18706 * "#resident + #compressed" pages as
18707 * "resident" (to show that they
18708 * contribute to the footprint) but not
18709 * "dirty" (to avoid double-counting
18710 * with the fake "non-volatile" region
18711 * we'll report at the end of the
18712 * address space to account for all
18713 * (mapped or not) non-volatile memory
18714 * owned by this task.
18716 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
18718 } else if (!map_entry
->is_sub_map
&&
18719 (object
->purgable
== VM_PURGABLE_VOLATILE
||
18720 object
->purgable
== VM_PURGABLE_EMPTY
) &&
18721 VM_OBJECT_OWNER(object
) != NULL
&&
18722 VM_OBJECT_OWNER(object
)->map
== map
) {
18723 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18724 if ((((curr_s_offset
18725 - map_entry
->vme_start
18726 + VME_OFFSET(map_entry
))
18727 / effective_page_size
) <
18728 object
->wired_page_count
)) {
18730 * Volatile|empty purgeable object owned
18731 * by this task: report the first
18732 * "#wired" pages as "resident" (to
18733 * show that they contribute to the
18734 * footprint) but not "dirty" (to avoid
18735 * double-counting with the fake
18736 * "non-volatile" region we'll report
18737 * at the end of the address space to
18738 * account for all (mapped or not)
18739 * non-volatile memory owned by this
18742 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
18744 } else if (!map_entry
->is_sub_map
&&
18745 map_entry
->iokit_acct
&&
18746 object
->internal
&&
18747 object
->purgable
== VM_PURGABLE_DENY
) {
18749 * Non-purgeable IOKit memory: phys_footprint
18750 * includes the entire virtual mapping.
18752 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18753 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
18754 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
18755 } else if (pmap_disp
& (PMAP_QUERY_PAGE_ALTACCT
|
18756 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT
)) {
18757 /* alternate accounting */
18758 #if (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG)
18759 if (map
->pmap
->footprint_was_suspended
) {
18761 * The assertion below can fail if dyld
18762 * suspended footprint accounting
18763 * while doing some adjustments to
18764 * this page; the mapping would say
18765 * "use pmap accounting" but the page
18766 * would be marked "alternate
18770 #endif /* (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG) */
18772 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18776 if (pmap_disp
& PMAP_QUERY_PAGE_PRESENT
) {
18777 assertf(map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18778 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
18779 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
18780 if (pmap_disp
& PMAP_QUERY_PAGE_INTERNAL
) {
18781 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
18783 disposition
|= VM_PAGE_QUERY_PAGE_EXTERNAL
;
18785 if (pmap_disp
& PMAP_QUERY_PAGE_REUSABLE
) {
18786 disposition
|= VM_PAGE_QUERY_PAGE_REUSABLE
;
18788 } else if (pmap_disp
& PMAP_QUERY_PAGE_COMPRESSED
) {
18789 assertf(map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18790 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
18794 *disposition_p
= disposition
;
18798 vm_map_page_query_internal(
18799 vm_map_t target_map
,
18800 vm_map_offset_t offset
,
18805 vm_page_info_basic_data_t info
;
18806 mach_msg_type_number_t count
;
18808 count
= VM_PAGE_INFO_BASIC_COUNT
;
18809 kr
= vm_map_page_info(target_map
,
18811 VM_PAGE_INFO_BASIC
,
18812 (vm_page_info_t
) &info
,
18814 if (kr
== KERN_SUCCESS
) {
18815 *disposition
= info
.disposition
;
18816 *ref_count
= info
.ref_count
;
18828 vm_map_offset_t offset
,
18829 vm_page_info_flavor_t flavor
,
18830 vm_page_info_t info
,
18831 mach_msg_type_number_t
*count
)
18833 return vm_map_page_range_info_internal(map
,
18834 offset
, /* start of range */
18835 (offset
+ 1), /* this will get rounded in the call to the page boundary */
18836 (int)-1, /* effective_page_shift: unspecified */
18843 vm_map_page_range_info_internal(
18845 vm_map_offset_t start_offset
,
18846 vm_map_offset_t end_offset
,
18847 int effective_page_shift
,
18848 vm_page_info_flavor_t flavor
,
18849 vm_page_info_t info
,
18850 mach_msg_type_number_t
*count
)
18852 vm_map_entry_t map_entry
= VM_MAP_ENTRY_NULL
;
18853 vm_object_t object
= VM_OBJECT_NULL
, curr_object
= VM_OBJECT_NULL
;
18854 vm_page_t m
= VM_PAGE_NULL
;
18855 kern_return_t retval
= KERN_SUCCESS
;
18856 int disposition
= 0;
18858 int depth
= 0, info_idx
= 0;
18859 vm_page_info_basic_t basic_info
= 0;
18860 vm_map_offset_t offset_in_page
= 0, offset_in_object
= 0, curr_offset_in_object
= 0;
18861 vm_map_offset_t start
= 0, end
= 0, curr_s_offset
= 0, curr_e_offset
= 0;
18862 boolean_t do_region_footprint
;
18863 ledger_amount_t ledger_resident
, ledger_compressed
;
18864 int effective_page_size
;
18865 vm_map_offset_t effective_page_mask
;
18868 case VM_PAGE_INFO_BASIC
:
18869 if (*count
!= VM_PAGE_INFO_BASIC_COUNT
) {
18871 * The "vm_page_info_basic_data" structure was not
18872 * properly padded, so allow the size to be off by
18873 * one to maintain backwards binary compatibility...
18875 if (*count
!= VM_PAGE_INFO_BASIC_COUNT
- 1) {
18876 return KERN_INVALID_ARGUMENT
;
18881 return KERN_INVALID_ARGUMENT
;
18884 if (effective_page_shift
== -1) {
18885 effective_page_shift
= vm_self_region_page_shift_safely(map
);
18886 if (effective_page_shift
== -1) {
18887 return KERN_INVALID_ARGUMENT
;
18890 effective_page_size
= (1 << effective_page_shift
);
18891 effective_page_mask
= effective_page_size
- 1;
18893 do_region_footprint
= task_self_region_footprint();
18897 info_idx
= 0; /* Tracks the next index within the info structure to be filled.*/
18898 retval
= KERN_SUCCESS
;
18900 offset_in_page
= start_offset
& effective_page_mask
;
18901 start
= vm_map_trunc_page(start_offset
, effective_page_mask
);
18902 end
= vm_map_round_page(end_offset
, effective_page_mask
);
18905 return KERN_INVALID_ARGUMENT
;
18908 assert((end
- start
) <= MAX_PAGE_RANGE_QUERY
);
18910 vm_map_lock_read(map
);
18912 task_ledgers_footprint(map
->pmap
->ledger
, &ledger_resident
, &ledger_compressed
);
18914 for (curr_s_offset
= start
; curr_s_offset
< end
;) {
18916 * New lookup needs reset of these variables.
18918 curr_object
= object
= VM_OBJECT_NULL
;
18919 offset_in_object
= 0;
18923 if (do_region_footprint
&&
18924 curr_s_offset
>= vm_map_last_entry(map
)->vme_end
) {
18926 * Request for "footprint" info about a page beyond
18927 * the end of address space: this must be for
18928 * the fake region vm_map_region_recurse_64()
18929 * reported to account for non-volatile purgeable
18930 * memory owned by this task.
18934 if (curr_s_offset
- vm_map_last_entry(map
)->vme_end
<=
18935 (unsigned) ledger_compressed
) {
18937 * We haven't reported all the "non-volatile
18938 * compressed" pages yet, so report this fake
18939 * page as "compressed".
18941 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
18944 * We've reported all the non-volatile
18945 * compressed page but not all the non-volatile
18946 * pages , so report this fake page as
18947 * "resident dirty".
18949 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
18950 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
18951 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
18954 case VM_PAGE_INFO_BASIC
:
18955 basic_info
= (vm_page_info_basic_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
18956 basic_info
->disposition
= disposition
;
18957 basic_info
->ref_count
= 1;
18958 basic_info
->object_id
= VM_OBJECT_ID_FAKE(map
, task_ledgers
.purgeable_nonvolatile
);
18959 basic_info
->offset
= 0;
18960 basic_info
->depth
= 0;
18965 curr_s_offset
+= effective_page_size
;
18970 * First, find the map entry covering "curr_s_offset", going down
18971 * submaps if necessary.
18973 if (!vm_map_lookup_entry(map
, curr_s_offset
, &map_entry
)) {
18974 /* no entry -> no object -> no page */
18976 if (curr_s_offset
< vm_map_min(map
)) {
18978 * Illegal address that falls below map min.
18980 curr_e_offset
= MIN(end
, vm_map_min(map
));
18981 } else if (curr_s_offset
>= vm_map_max(map
)) {
18983 * Illegal address that falls on/after map max.
18985 curr_e_offset
= end
;
18986 } else if (map_entry
== vm_map_to_entry(map
)) {
18990 if (map_entry
->vme_next
== vm_map_to_entry(map
)) {
18994 curr_e_offset
= MIN(map
->max_offset
, end
);
18997 * Hole at start of the map.
18999 curr_e_offset
= MIN(map_entry
->vme_next
->vme_start
, end
);
19002 if (map_entry
->vme_next
== vm_map_to_entry(map
)) {
19004 * Hole at the end of the map.
19006 curr_e_offset
= MIN(map
->max_offset
, end
);
19008 curr_e_offset
= MIN(map_entry
->vme_next
->vme_start
, end
);
19012 assert(curr_e_offset
>= curr_s_offset
);
19014 uint64_t num_pages
= (curr_e_offset
- curr_s_offset
) >> effective_page_shift
;
19016 void *info_ptr
= (void*) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
19018 bzero(info_ptr
, num_pages
* sizeof(struct vm_page_info_basic
));
19020 curr_s_offset
= curr_e_offset
;
19022 info_idx
+= num_pages
;
19027 /* compute offset from this map entry's start */
19028 offset_in_object
= curr_s_offset
- map_entry
->vme_start
;
19030 /* compute offset into this map entry's object (or submap) */
19031 offset_in_object
+= VME_OFFSET(map_entry
);
19033 if (map_entry
->is_sub_map
) {
19034 vm_map_t sub_map
= VM_MAP_NULL
;
19035 vm_page_info_t submap_info
= 0;
19036 vm_map_offset_t submap_s_offset
= 0, submap_e_offset
= 0, range_len
= 0;
19038 range_len
= MIN(map_entry
->vme_end
, end
) - curr_s_offset
;
19040 submap_s_offset
= offset_in_object
;
19041 submap_e_offset
= submap_s_offset
+ range_len
;
19043 sub_map
= VME_SUBMAP(map_entry
);
19045 vm_map_reference(sub_map
);
19046 vm_map_unlock_read(map
);
19048 submap_info
= (vm_page_info_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
19050 assertf(VM_MAP_PAGE_SHIFT(sub_map
) >= VM_MAP_PAGE_SHIFT(map
),
19051 "Submap page size (%d) differs from current map (%d)\n", VM_MAP_PAGE_SIZE(sub_map
), VM_MAP_PAGE_SIZE(map
));
19053 retval
= vm_map_page_range_info_internal(sub_map
,
19056 effective_page_shift
,
19057 VM_PAGE_INFO_BASIC
,
19058 (vm_page_info_t
) submap_info
,
19061 assert(retval
== KERN_SUCCESS
);
19063 vm_map_lock_read(map
);
19064 vm_map_deallocate(sub_map
);
19066 /* Move the "info" index by the number of pages we inspected.*/
19067 info_idx
+= range_len
>> effective_page_shift
;
19069 /* Move our current offset by the size of the range we inspected.*/
19070 curr_s_offset
+= range_len
;
19075 object
= VME_OBJECT(map_entry
);
19077 if (object
== VM_OBJECT_NULL
) {
19079 * We don't have an object here and, hence,
19080 * no pages to inspect. We'll fill up the
19081 * info structure appropriately.
19084 curr_e_offset
= MIN(map_entry
->vme_end
, end
);
19086 uint64_t num_pages
= (curr_e_offset
- curr_s_offset
) >> effective_page_shift
;
19088 void *info_ptr
= (void*) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
19090 bzero(info_ptr
, num_pages
* sizeof(struct vm_page_info_basic
));
19092 curr_s_offset
= curr_e_offset
;
19094 info_idx
+= num_pages
;
19099 if (do_region_footprint
) {
19101 if (map
->has_corpse_footprint
) {
19103 * Query the page info data we saved
19104 * while forking the corpse.
19106 vm_map_corpse_footprint_query_page_info(
19112 * Query the live pmap for footprint info
19115 vm_map_footprint_query_page_info(
19122 case VM_PAGE_INFO_BASIC
:
19123 basic_info
= (vm_page_info_basic_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
19124 basic_info
->disposition
= disposition
;
19125 basic_info
->ref_count
= 1;
19126 basic_info
->object_id
= VM_OBJECT_ID_FAKE(map
, task_ledgers
.purgeable_nonvolatile
);
19127 basic_info
->offset
= 0;
19128 basic_info
->depth
= 0;
19133 curr_s_offset
+= effective_page_size
;
19137 vm_object_reference(object
);
19139 * Shared mode -- so we can allow other readers
19140 * to grab the lock too.
19142 vm_object_lock_shared(object
);
19144 curr_e_offset
= MIN(map_entry
->vme_end
, end
);
19146 vm_map_unlock_read(map
);
19148 map_entry
= NULL
; /* map is unlocked, the entry is no longer valid. */
19150 curr_object
= object
;
19152 for (; curr_s_offset
< curr_e_offset
;) {
19153 if (object
== curr_object
) {
19154 ref_count
= curr_object
->ref_count
- 1; /* account for our object reference above. */
19156 ref_count
= curr_object
->ref_count
;
19159 curr_offset_in_object
= offset_in_object
;
19162 m
= vm_page_lookup(curr_object
, vm_object_trunc_page(curr_offset_in_object
));
19164 if (m
!= VM_PAGE_NULL
) {
19165 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
19168 if (curr_object
->internal
&&
19169 curr_object
->alive
&&
19170 !curr_object
->terminating
&&
19171 curr_object
->pager_ready
) {
19172 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object
, vm_object_trunc_page(curr_offset_in_object
))
19173 == VM_EXTERNAL_STATE_EXISTS
) {
19174 /* the pager has that page */
19175 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
19181 * Go down the VM object shadow chain until we find the page
19182 * we're looking for.
19185 if (curr_object
->shadow
!= VM_OBJECT_NULL
) {
19186 vm_object_t shadow
= VM_OBJECT_NULL
;
19188 curr_offset_in_object
+= curr_object
->vo_shadow_offset
;
19189 shadow
= curr_object
->shadow
;
19191 vm_object_lock_shared(shadow
);
19192 vm_object_unlock(curr_object
);
19194 curr_object
= shadow
;
19203 /* The ref_count is not strictly accurate, it measures the number */
19204 /* of entities holding a ref on the object, they may not be mapping */
19205 /* the object or may not be mapping the section holding the */
19206 /* target page but its still a ball park number and though an over- */
19207 /* count, it picks up the copy-on-write cases */
19209 /* We could also get a picture of page sharing from pmap_attributes */
19210 /* but this would under count as only faulted-in mappings would */
19213 if ((curr_object
== object
) && curr_object
->shadow
) {
19214 disposition
|= VM_PAGE_QUERY_PAGE_COPIED
;
19217 if (!curr_object
->internal
) {
19218 disposition
|= VM_PAGE_QUERY_PAGE_EXTERNAL
;
19221 if (m
!= VM_PAGE_NULL
) {
19222 if (m
->vmp_fictitious
) {
19223 disposition
|= VM_PAGE_QUERY_PAGE_FICTITIOUS
;
19225 if (m
->vmp_dirty
|| pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m
))) {
19226 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
19229 if (m
->vmp_reference
|| pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m
))) {
19230 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
19233 if (m
->vmp_q_state
== VM_PAGE_ON_SPECULATIVE_Q
) {
19234 disposition
|= VM_PAGE_QUERY_PAGE_SPECULATIVE
;
19239 * when this routine deals with 4k
19240 * pages, check the appropriate CS bit
19243 if (m
->vmp_cs_validated
) {
19244 disposition
|= VM_PAGE_QUERY_PAGE_CS_VALIDATED
;
19246 if (m
->vmp_cs_tainted
) {
19247 disposition
|= VM_PAGE_QUERY_PAGE_CS_TAINTED
;
19249 if (m
->vmp_cs_nx
) {
19250 disposition
|= VM_PAGE_QUERY_PAGE_CS_NX
;
19252 if (m
->vmp_reusable
|| curr_object
->all_reusable
) {
19253 disposition
|= VM_PAGE_QUERY_PAGE_REUSABLE
;
19259 case VM_PAGE_INFO_BASIC
:
19260 basic_info
= (vm_page_info_basic_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
19261 basic_info
->disposition
= disposition
;
19262 basic_info
->ref_count
= ref_count
;
19263 basic_info
->object_id
= (vm_object_id_t
) (uintptr_t)
19264 VM_KERNEL_ADDRPERM(curr_object
);
19265 basic_info
->offset
=
19266 (memory_object_offset_t
) curr_offset_in_object
+ offset_in_page
;
19267 basic_info
->depth
= depth
;
19274 offset_in_page
= 0; // This doesn't really make sense for any offset other than the starting offset.
19277 * Move to next offset in the range and in our object.
19279 curr_s_offset
+= effective_page_size
;
19280 offset_in_object
+= effective_page_size
;
19281 curr_offset_in_object
= offset_in_object
;
19283 if (curr_object
!= object
) {
19284 vm_object_unlock(curr_object
);
19286 curr_object
= object
;
19288 vm_object_lock_shared(curr_object
);
19290 vm_object_lock_yield_shared(curr_object
);
19294 vm_object_unlock(curr_object
);
19295 vm_object_deallocate(curr_object
);
19297 vm_map_lock_read(map
);
19300 vm_map_unlock_read(map
);
19307 * Synchronises the memory range specified with its backing store
19308 * image by either flushing or cleaning the contents to the appropriate
19309 * memory manager engaging in a memory object synchronize dialog with
19310 * the manager. The client doesn't return until the manager issues
19311 * m_o_s_completed message. MIG Magically converts user task parameter
19312 * to the task's address map.
19314 * interpretation of sync_flags
19315 * VM_SYNC_INVALIDATE - discard pages, only return precious
19316 * pages to manager.
19318 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
19319 * - discard pages, write dirty or precious
19320 * pages back to memory manager.
19322 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
19323 * - write dirty or precious pages back to
19324 * the memory manager.
19326 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
19327 * is a hole in the region, and we would
19328 * have returned KERN_SUCCESS, return
19329 * KERN_INVALID_ADDRESS instead.
19332 * The memory object attributes have not yet been implemented, this
19333 * function will have to deal with the invalidate attribute
19336 * KERN_INVALID_TASK Bad task parameter
19337 * KERN_INVALID_ARGUMENT both sync and async were specified.
19338 * KERN_SUCCESS The usual.
19339 * KERN_INVALID_ADDRESS There was a hole in the region.
19345 vm_map_address_t address
,
19346 vm_map_size_t size
,
19347 vm_sync_t sync_flags
)
19349 vm_map_entry_t entry
;
19350 vm_map_size_t amount_left
;
19351 vm_object_offset_t offset
;
19352 vm_object_offset_t start_offset
, end_offset
;
19353 boolean_t do_sync_req
;
19354 boolean_t had_hole
= FALSE
;
19355 vm_map_offset_t pmap_offset
;
19357 if ((sync_flags
& VM_SYNC_ASYNCHRONOUS
) &&
19358 (sync_flags
& VM_SYNC_SYNCHRONOUS
)) {
19359 return KERN_INVALID_ARGUMENT
;
19362 if (VM_MAP_PAGE_MASK(map
) < PAGE_MASK
) {
19363 DEBUG4K_SHARE("map %p address 0x%llx size 0x%llx flags 0x%x\n", map
, (uint64_t)address
, (uint64_t)size
, sync_flags
);
19367 * align address and size on page boundaries
19369 size
= (vm_map_round_page(address
+ size
,
19370 VM_MAP_PAGE_MASK(map
)) -
19371 vm_map_trunc_page(address
,
19372 VM_MAP_PAGE_MASK(map
)));
19373 address
= vm_map_trunc_page(address
,
19374 VM_MAP_PAGE_MASK(map
));
19376 if (map
== VM_MAP_NULL
) {
19377 return KERN_INVALID_TASK
;
19381 return KERN_SUCCESS
;
19384 amount_left
= size
;
19386 while (amount_left
> 0) {
19387 vm_object_size_t flush_size
;
19388 vm_object_t object
;
19391 if (!vm_map_lookup_entry(map
,
19394 vm_map_size_t skip
;
19397 * hole in the address map.
19401 if (sync_flags
& VM_SYNC_KILLPAGES
) {
19403 * For VM_SYNC_KILLPAGES, there should be
19404 * no holes in the range, since we couldn't
19405 * prevent someone else from allocating in
19406 * that hole and we wouldn't want to "kill"
19409 vm_map_unlock(map
);
19414 * Check for empty map.
19416 if (entry
== vm_map_to_entry(map
) &&
19417 entry
->vme_next
== entry
) {
19418 vm_map_unlock(map
);
19422 * Check that we don't wrap and that
19423 * we have at least one real map entry.
19425 if ((map
->hdr
.nentries
== 0) ||
19426 (entry
->vme_next
->vme_start
< address
)) {
19427 vm_map_unlock(map
);
19431 * Move up to the next entry if needed
19433 skip
= (entry
->vme_next
->vme_start
- address
);
19434 if (skip
>= amount_left
) {
19437 amount_left
-= skip
;
19439 address
= entry
->vme_next
->vme_start
;
19440 vm_map_unlock(map
);
19444 offset
= address
- entry
->vme_start
;
19445 pmap_offset
= address
;
19448 * do we have more to flush than is contained in this
19451 if (amount_left
+ entry
->vme_start
+ offset
> entry
->vme_end
) {
19452 flush_size
= entry
->vme_end
-
19453 (entry
->vme_start
+ offset
);
19455 flush_size
= amount_left
;
19457 amount_left
-= flush_size
;
19458 address
+= flush_size
;
19460 if (entry
->is_sub_map
== TRUE
) {
19461 vm_map_t local_map
;
19462 vm_map_offset_t local_offset
;
19464 local_map
= VME_SUBMAP(entry
);
19465 local_offset
= VME_OFFSET(entry
);
19466 vm_map_reference(local_map
);
19467 vm_map_unlock(map
);
19472 sync_flags
) == KERN_INVALID_ADDRESS
) {
19475 vm_map_deallocate(local_map
);
19478 object
= VME_OBJECT(entry
);
19481 * We can't sync this object if the object has not been
19484 if (object
== VM_OBJECT_NULL
) {
19485 vm_map_unlock(map
);
19488 offset
+= VME_OFFSET(entry
);
19490 vm_object_lock(object
);
19492 if (sync_flags
& (VM_SYNC_KILLPAGES
| VM_SYNC_DEACTIVATE
)) {
19493 int kill_pages
= 0;
19494 boolean_t reusable_pages
= FALSE
;
19496 if (VM_MAP_PAGE_MASK(map
) < PAGE_MASK
) {
19498 * This is a destructive operation and so we
19499 * err on the side of limiting the range of
19502 start_offset
= vm_object_round_page(offset
);
19503 end_offset
= vm_object_trunc_page(offset
+ flush_size
);
19505 if (end_offset
<= start_offset
) {
19506 vm_object_unlock(object
);
19507 vm_map_unlock(map
);
19511 pmap_offset
+= start_offset
- offset
;;
19513 start_offset
= offset
;
19514 end_offset
= offset
+ flush_size
;
19517 if (sync_flags
& VM_SYNC_KILLPAGES
) {
19518 if (((object
->ref_count
== 1) ||
19519 ((object
->copy_strategy
!=
19520 MEMORY_OBJECT_COPY_SYMMETRIC
) &&
19521 (object
->copy
== VM_OBJECT_NULL
))) &&
19522 (object
->shadow
== VM_OBJECT_NULL
)) {
19523 if (object
->ref_count
!= 1) {
19524 vm_page_stats_reusable
.free_shared
++;
19531 if (kill_pages
!= -1) {
19532 vm_object_deactivate_pages(
19535 (vm_object_size_t
) (end_offset
- start_offset
),
19541 vm_object_unlock(object
);
19542 vm_map_unlock(map
);
19546 * We can't sync this object if there isn't a pager.
19547 * Don't bother to sync internal objects, since there can't
19548 * be any "permanent" storage for these objects anyway.
19550 if ((object
->pager
== MEMORY_OBJECT_NULL
) ||
19551 (object
->internal
) || (object
->private)) {
19552 vm_object_unlock(object
);
19553 vm_map_unlock(map
);
19557 * keep reference on the object until syncing is done
19559 vm_object_reference_locked(object
);
19560 vm_object_unlock(object
);
19562 vm_map_unlock(map
);
19564 if (VM_MAP_PAGE_MASK(map
) < PAGE_MASK
) {
19565 start_offset
= vm_object_trunc_page(offset
);
19566 end_offset
= vm_object_round_page(offset
+ flush_size
);
19568 start_offset
= offset
;
19569 end_offset
= offset
+ flush_size
;
19572 do_sync_req
= vm_object_sync(object
,
19574 (end_offset
- start_offset
),
19575 sync_flags
& VM_SYNC_INVALIDATE
,
19576 ((sync_flags
& VM_SYNC_SYNCHRONOUS
) ||
19577 (sync_flags
& VM_SYNC_ASYNCHRONOUS
)),
19578 sync_flags
& VM_SYNC_SYNCHRONOUS
);
19580 if ((sync_flags
& VM_SYNC_INVALIDATE
) && object
->resident_page_count
== 0) {
19582 * clear out the clustering and read-ahead hints
19584 vm_object_lock(object
);
19586 object
->pages_created
= 0;
19587 object
->pages_used
= 0;
19588 object
->sequential
= 0;
19589 object
->last_alloc
= 0;
19591 vm_object_unlock(object
);
19593 vm_object_deallocate(object
);
19596 /* for proper msync() behaviour */
19597 if (had_hole
== TRUE
&& (sync_flags
& VM_SYNC_CONTIGUOUS
)) {
19598 return KERN_INVALID_ADDRESS
;
19601 return KERN_SUCCESS
;
19605 vm_named_entry_from_vm_object(
19606 vm_named_entry_t named_entry
,
19607 vm_object_t object
,
19608 vm_object_offset_t offset
,
19609 vm_object_size_t size
,
19612 vm_map_copy_t copy
;
19613 vm_map_entry_t copy_entry
;
19615 assert(!named_entry
->is_sub_map
);
19616 assert(!named_entry
->is_copy
);
19617 assert(!named_entry
->is_object
);
19618 assert(!named_entry
->internal
);
19619 assert(named_entry
->backing
.copy
== VM_MAP_COPY_NULL
);
19621 copy
= vm_map_copy_allocate();
19622 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
19623 copy
->offset
= offset
;
19625 copy
->cpy_hdr
.page_shift
= PAGE_SHIFT
;
19626 vm_map_store_init(©
->cpy_hdr
);
19628 copy_entry
= vm_map_copy_entry_create(copy
, FALSE
);
19629 copy_entry
->protection
= prot
;
19630 copy_entry
->max_protection
= prot
;
19631 copy_entry
->use_pmap
= TRUE
;
19632 copy_entry
->vme_start
= VM_MAP_TRUNC_PAGE(offset
, PAGE_MASK
);
19633 copy_entry
->vme_end
= VM_MAP_ROUND_PAGE(offset
+ size
, PAGE_MASK
);
19634 VME_OBJECT_SET(copy_entry
, object
);
19635 VME_OFFSET_SET(copy_entry
, vm_object_trunc_page(offset
));
19636 vm_map_copy_entry_link(copy
, vm_map_copy_last_entry(copy
), copy_entry
);
19638 named_entry
->backing
.copy
= copy
;
19639 named_entry
->is_object
= TRUE
;
19640 if (object
->internal
) {
19641 named_entry
->internal
= TRUE
;
19644 DEBUG4K_MEMENTRY("named_entry %p copy %p object %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry
, copy
, object
, offset
, size
, prot
);
19646 return KERN_SUCCESS
;
19650 vm_named_entry_to_vm_object(
19651 vm_named_entry_t named_entry
)
19653 vm_map_copy_t copy
;
19654 vm_map_entry_t copy_entry
;
19655 vm_object_t object
;
19657 assert(!named_entry
->is_sub_map
);
19658 assert(!named_entry
->is_copy
);
19659 assert(named_entry
->is_object
);
19660 copy
= named_entry
->backing
.copy
;
19661 assert(copy
!= VM_MAP_COPY_NULL
);
19662 assert(copy
->cpy_hdr
.nentries
== 1);
19663 copy_entry
= vm_map_copy_first_entry(copy
);
19664 assert(!copy_entry
->is_sub_map
);
19665 object
= VME_OBJECT(copy_entry
);
19667 DEBUG4K_MEMENTRY("%p -> %p -> %p [0x%llx 0x%llx 0x%llx 0x%x/0x%x ] -> %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry
, copy
, copy_entry
, (uint64_t)copy_entry
->vme_start
, (uint64_t)copy_entry
->vme_end
, copy_entry
->vme_offset
, copy_entry
->protection
, copy_entry
->max_protection
, object
, named_entry
->offset
, named_entry
->size
, named_entry
->protection
);
19673 * Routine: convert_port_entry_to_map
19675 * Convert from a port specifying an entry or a task
19676 * to a map. Doesn't consume the port ref; produces a map ref,
19677 * which may be null. Unlike convert_port_to_map, the
19678 * port may be task or a named entry backed.
19685 convert_port_entry_to_map(
19689 vm_named_entry_t named_entry
;
19690 uint32_t try_failed_count
= 0;
19692 if (IP_VALID(port
) && (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
19695 if (ip_active(port
) && (ip_kotype(port
)
19696 == IKOT_NAMED_ENTRY
)) {
19698 (vm_named_entry_t
) ip_get_kobject(port
);
19699 if (!(lck_mtx_try_lock(&(named_entry
)->Lock
))) {
19702 try_failed_count
++;
19703 mutex_pause(try_failed_count
);
19706 named_entry
->ref_count
++;
19707 lck_mtx_unlock(&(named_entry
)->Lock
);
19709 if ((named_entry
->is_sub_map
) &&
19710 (named_entry
->protection
19711 & VM_PROT_WRITE
)) {
19712 map
= named_entry
->backing
.map
;
19713 if (map
->pmap
!= PMAP_NULL
) {
19714 if (map
->pmap
== kernel_pmap
) {
19715 panic("userspace has access "
19716 "to a kernel map %p", map
);
19718 pmap_require(map
->pmap
);
19721 mach_destroy_memory_entry(port
);
19722 return VM_MAP_NULL
;
19724 vm_map_reference_swap(map
);
19725 mach_destroy_memory_entry(port
);
19728 return VM_MAP_NULL
;
19732 map
= convert_port_to_map(port
);
19739 * Routine: convert_port_entry_to_object
19741 * Convert from a port specifying a named entry to an
19742 * object. Doesn't consume the port ref; produces a map ref,
19743 * which may be null.
19750 convert_port_entry_to_object(
19753 vm_object_t object
= VM_OBJECT_NULL
;
19754 vm_named_entry_t named_entry
;
19755 uint32_t try_failed_count
= 0;
19757 if (IP_VALID(port
) &&
19758 (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
19761 if (ip_active(port
) &&
19762 (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
19763 named_entry
= (vm_named_entry_t
) ip_get_kobject(port
);
19764 if (!(lck_mtx_try_lock(&(named_entry
)->Lock
))) {
19766 try_failed_count
++;
19767 mutex_pause(try_failed_count
);
19770 named_entry
->ref_count
++;
19771 lck_mtx_unlock(&(named_entry
)->Lock
);
19773 if (!(named_entry
->is_sub_map
) &&
19774 !(named_entry
->is_copy
) &&
19775 (named_entry
->is_object
) &&
19776 (named_entry
->protection
& VM_PROT_WRITE
)) {
19777 vm_map_copy_t copy
;
19778 vm_map_entry_t copy_entry
;
19780 copy
= named_entry
->backing
.copy
;
19781 assert(copy
->cpy_hdr
.nentries
== 1);
19782 copy_entry
= vm_map_copy_first_entry(copy
);
19783 assert(!copy_entry
->is_sub_map
);
19784 object
= VME_OBJECT(copy_entry
);
19785 assert(object
!= VM_OBJECT_NULL
);
19786 vm_object_reference(object
);
19788 mach_destroy_memory_entry(port
);
19796 * Export routines to other components for the things we access locally through
19803 return current_map_fast();
19807 * vm_map_reference:
19809 * Most code internal to the osfmk will go through a
19810 * macro defining this. This is always here for the
19811 * use of other kernel components.
19813 #undef vm_map_reference
19818 if (map
== VM_MAP_NULL
) {
19822 lck_mtx_lock(&map
->s_lock
);
19824 assert(map
->res_count
> 0);
19825 assert(os_ref_get_count(&map
->map_refcnt
) >= map
->res_count
);
19828 os_ref_retain_locked(&map
->map_refcnt
);
19829 lck_mtx_unlock(&map
->s_lock
);
19833 * vm_map_deallocate:
19835 * Removes a reference from the specified map,
19836 * destroying it if no references remain.
19837 * The map should not be locked.
19845 if (map
== VM_MAP_NULL
) {
19849 lck_mtx_lock(&map
->s_lock
);
19850 ref
= os_ref_release_locked(&map
->map_refcnt
);
19852 vm_map_res_deallocate(map
);
19853 lck_mtx_unlock(&map
->s_lock
);
19856 assert(os_ref_get_count(&map
->map_refcnt
) == 0);
19857 lck_mtx_unlock(&map
->s_lock
);
19861 * The map residence count isn't decremented here because
19862 * the vm_map_delete below will traverse the entire map,
19863 * deleting entries, and the residence counts on objects
19864 * and sharing maps will go away then.
19868 vm_map_destroy(map
, VM_MAP_REMOVE_NO_FLAGS
);
19872 vm_map_inspect_deallocate(
19873 vm_map_inspect_t map
)
19875 vm_map_deallocate((vm_map_t
)map
);
19879 vm_map_read_deallocate(
19882 vm_map_deallocate((vm_map_t
)map
);
19887 vm_map_disable_NX(vm_map_t map
)
19892 if (map
->pmap
== NULL
) {
19896 pmap_disable_NX(map
->pmap
);
19900 vm_map_disallow_data_exec(vm_map_t map
)
19906 map
->map_disallow_data_exec
= TRUE
;
19909 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
19910 * more descriptive.
19913 vm_map_set_32bit(vm_map_t map
)
19915 #if defined(__arm__) || defined(__arm64__)
19916 map
->max_offset
= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_DEVICE
);
19918 map
->max_offset
= (vm_map_offset_t
)VM_MAX_ADDRESS
;
19924 vm_map_set_64bit(vm_map_t map
)
19926 #if defined(__arm__) || defined(__arm64__)
19927 map
->max_offset
= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_DEVICE
);
19929 map
->max_offset
= (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
;
19934 * Expand the maximum size of an existing map to the maximum supported.
19937 vm_map_set_jumbo(vm_map_t map
)
19939 #if defined (__arm64__) && !defined(CONFIG_ARROW)
19940 vm_map_set_max_addr(map
, ~0);
19947 * This map has a JIT entitlement
19950 vm_map_set_jit_entitled(vm_map_t map
)
19952 #if defined (__arm64__)
19953 pmap_set_jit_entitled(map
->pmap
);
19960 * Expand the maximum size of an existing map.
19963 vm_map_set_max_addr(vm_map_t map
, vm_map_offset_t new_max_offset
)
19965 #if defined(__arm64__)
19966 vm_map_offset_t max_supported_offset
= 0;
19967 vm_map_offset_t old_max_offset
= map
->max_offset
;
19968 max_supported_offset
= pmap_max_offset(vm_map_is_64bit(map
), ARM_PMAP_MAX_OFFSET_JUMBO
);
19970 new_max_offset
= trunc_page(new_max_offset
);
19972 /* The address space cannot be shrunk using this routine. */
19973 if (old_max_offset
>= new_max_offset
) {
19977 if (max_supported_offset
< new_max_offset
) {
19978 new_max_offset
= max_supported_offset
;
19981 map
->max_offset
= new_max_offset
;
19983 if (map
->holes_list
->prev
->vme_end
== old_max_offset
) {
19985 * There is already a hole at the end of the map; simply make it bigger.
19987 map
->holes_list
->prev
->vme_end
= map
->max_offset
;
19990 * There is no hole at the end, so we need to create a new hole
19991 * for the new empty space we're creating.
19993 struct vm_map_links
*new_hole
= zalloc(vm_map_holes_zone
);
19994 new_hole
->start
= old_max_offset
;
19995 new_hole
->end
= map
->max_offset
;
19996 new_hole
->prev
= map
->holes_list
->prev
;
19997 new_hole
->next
= (struct vm_map_entry
*)map
->holes_list
;
19998 map
->holes_list
->prev
->links
.next
= (struct vm_map_entry
*)new_hole
;
19999 map
->holes_list
->prev
= (struct vm_map_entry
*)new_hole
;
20003 (void)new_max_offset
;
20008 vm_compute_max_offset(boolean_t is64
)
20010 #if defined(__arm__) || defined(__arm64__)
20011 return pmap_max_offset(is64
, ARM_PMAP_MAX_OFFSET_DEVICE
);
20013 return is64
? (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
: (vm_map_offset_t
)VM_MAX_ADDRESS
;
20018 vm_map_get_max_aslr_slide_section(
20019 vm_map_t map __unused
,
20020 int64_t *max_sections
,
20021 int64_t *section_size
)
20023 #if defined(__arm64__)
20025 *section_size
= ARM_TT_TWIG_SIZE
;
20033 vm_map_get_max_aslr_slide_pages(vm_map_t map
)
20035 #if defined(__arm64__)
20036 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
20037 * limited embedded address space; this is also meant to minimize pmap
20038 * memory usage on 16KB page systems.
20040 return 1 << (24 - VM_MAP_PAGE_SHIFT(map
));
20042 return 1 << (vm_map_is_64bit(map
) ? 16 : 8);
20047 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map
)
20049 #if defined(__arm64__)
20050 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
20051 * of independent entropy on 16KB page systems.
20053 return 1 << (22 - VM_MAP_PAGE_SHIFT(map
));
20055 return 1 << (vm_map_is_64bit(map
) ? 16 : 8);
20064 return map
->max_offset
> ((vm_map_offset_t
)VM_MAX_ADDRESS
);
20069 vm_map_has_hard_pagezero(
20071 vm_map_offset_t pagezero_size
)
20075 * We should lock the VM map (for read) here but we can get away
20076 * with it for now because there can't really be any race condition:
20077 * the VM map's min_offset is changed only when the VM map is created
20078 * and when the zero page is established (when the binary gets loaded),
20079 * and this routine gets called only when the task terminates and the
20080 * VM map is being torn down, and when a new map is created via
20081 * load_machfile()/execve().
20083 return map
->min_offset
>= pagezero_size
;
20087 * Raise a VM map's maximun offset.
20090 vm_map_raise_max_offset(
20092 vm_map_offset_t new_max_offset
)
20097 ret
= KERN_INVALID_ADDRESS
;
20099 if (new_max_offset
>= map
->max_offset
) {
20100 if (!vm_map_is_64bit(map
)) {
20101 if (new_max_offset
<= (vm_map_offset_t
)VM_MAX_ADDRESS
) {
20102 map
->max_offset
= new_max_offset
;
20103 ret
= KERN_SUCCESS
;
20106 if (new_max_offset
<= (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
) {
20107 map
->max_offset
= new_max_offset
;
20108 ret
= KERN_SUCCESS
;
20113 vm_map_unlock(map
);
20119 * Raise a VM map's minimum offset.
20120 * To strictly enforce "page zero" reservation.
20123 vm_map_raise_min_offset(
20125 vm_map_offset_t new_min_offset
)
20127 vm_map_entry_t first_entry
;
20129 new_min_offset
= vm_map_round_page(new_min_offset
,
20130 VM_MAP_PAGE_MASK(map
));
20134 if (new_min_offset
< map
->min_offset
) {
20136 * Can't move min_offset backwards, as that would expose
20137 * a part of the address space that was previously, and for
20138 * possibly good reasons, inaccessible.
20140 vm_map_unlock(map
);
20141 return KERN_INVALID_ADDRESS
;
20143 if (new_min_offset
>= map
->max_offset
) {
20144 /* can't go beyond the end of the address space */
20145 vm_map_unlock(map
);
20146 return KERN_INVALID_ADDRESS
;
20149 first_entry
= vm_map_first_entry(map
);
20150 if (first_entry
!= vm_map_to_entry(map
) &&
20151 first_entry
->vme_start
< new_min_offset
) {
20153 * Some memory was already allocated below the new
20154 * minimun offset. It's too late to change it now...
20156 vm_map_unlock(map
);
20157 return KERN_NO_SPACE
;
20160 map
->min_offset
= new_min_offset
;
20162 assert(map
->holes_list
);
20163 map
->holes_list
->start
= new_min_offset
;
20164 assert(new_min_offset
< map
->holes_list
->end
);
20166 vm_map_unlock(map
);
20168 return KERN_SUCCESS
;
20172 * Set the limit on the maximum amount of user wired memory allowed for this map.
20173 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
20174 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
20175 * don't have to reach over to the BSD data structures.
20179 vm_map_set_user_wire_limit(vm_map_t map
,
20182 map
->user_wire_limit
= limit
;
20187 vm_map_switch_protect(vm_map_t map
,
20191 map
->switch_protect
= val
;
20192 vm_map_unlock(map
);
20195 extern int cs_process_enforcement_enable
;
20197 vm_map_cs_enforcement(
20200 if (cs_process_enforcement_enable
) {
20203 return map
->cs_enforcement
;
20207 vm_map_cs_enforcement_set(
20212 map
->cs_enforcement
= val
;
20213 pmap_set_vm_map_cs_enforced(map
->pmap
, val
);
20214 vm_map_unlock(map
);
20218 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
20219 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
20220 * bump both counters.
20223 vm_map_iokit_mapped_region(vm_map_t map
, vm_size_t bytes
)
20225 pmap_t pmap
= vm_map_pmap(map
);
20227 ledger_credit(pmap
->ledger
, task_ledgers
.iokit_mapped
, bytes
);
20228 ledger_credit(pmap
->ledger
, task_ledgers
.phys_footprint
, bytes
);
20232 vm_map_iokit_unmapped_region(vm_map_t map
, vm_size_t bytes
)
20234 pmap_t pmap
= vm_map_pmap(map
);
20236 ledger_debit(pmap
->ledger
, task_ledgers
.iokit_mapped
, bytes
);
20237 ledger_debit(pmap
->ledger
, task_ledgers
.phys_footprint
, bytes
);
20240 /* Add (generate) code signature for memory range */
20241 #if CONFIG_DYNAMIC_CODE_SIGNING
20243 vm_map_sign(vm_map_t map
,
20244 vm_map_offset_t start
,
20245 vm_map_offset_t end
)
20247 vm_map_entry_t entry
;
20249 vm_object_t object
;
20252 * Vet all the input parameters and current type and state of the
20253 * underlaying object. Return with an error if anything is amiss.
20255 if (map
== VM_MAP_NULL
) {
20256 return KERN_INVALID_ARGUMENT
;
20259 vm_map_lock_read(map
);
20261 if (!vm_map_lookup_entry(map
, start
, &entry
) || entry
->is_sub_map
) {
20263 * Must pass a valid non-submap address.
20265 vm_map_unlock_read(map
);
20266 return KERN_INVALID_ADDRESS
;
20269 if ((entry
->vme_start
> start
) || (entry
->vme_end
< end
)) {
20271 * Map entry doesn't cover the requested range. Not handling
20272 * this situation currently.
20274 vm_map_unlock_read(map
);
20275 return KERN_INVALID_ARGUMENT
;
20278 object
= VME_OBJECT(entry
);
20279 if (object
== VM_OBJECT_NULL
) {
20281 * Object must already be present or we can't sign.
20283 vm_map_unlock_read(map
);
20284 return KERN_INVALID_ARGUMENT
;
20287 vm_object_lock(object
);
20288 vm_map_unlock_read(map
);
20290 while (start
< end
) {
20293 m
= vm_page_lookup(object
,
20294 start
- entry
->vme_start
+ VME_OFFSET(entry
));
20295 if (m
== VM_PAGE_NULL
) {
20296 /* shoud we try to fault a page here? we can probably
20297 * demand it exists and is locked for this request */
20298 vm_object_unlock(object
);
20299 return KERN_FAILURE
;
20301 /* deal with special page status */
20303 (m
->vmp_unusual
&& (m
->vmp_error
|| m
->vmp_restart
|| m
->vmp_private
|| m
->vmp_absent
))) {
20304 vm_object_unlock(object
);
20305 return KERN_FAILURE
;
20308 /* Page is OK... now "validate" it */
20309 /* This is the place where we'll call out to create a code
20310 * directory, later */
20311 /* XXX TODO4K: deal with 4k subpages individually? */
20312 m
->vmp_cs_validated
= VMP_CS_ALL_TRUE
;
20314 /* The page is now "clean" for codesigning purposes. That means
20315 * we don't consider it as modified (wpmapped) anymore. But
20316 * we'll disconnect the page so we note any future modification
20318 m
->vmp_wpmapped
= FALSE
;
20319 refmod
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
));
20321 /* Pull the dirty status from the pmap, since we cleared the
20323 if ((refmod
& VM_MEM_MODIFIED
) && !m
->vmp_dirty
) {
20324 SET_PAGE_DIRTY(m
, FALSE
);
20327 /* On to the next page */
20328 start
+= PAGE_SIZE
;
20330 vm_object_unlock(object
);
20332 return KERN_SUCCESS
;
20337 vm_map_partial_reap(vm_map_t map
, unsigned int *reclaimed_resident
, unsigned int *reclaimed_compressed
)
20339 vm_map_entry_t entry
= VM_MAP_ENTRY_NULL
;
20340 vm_map_entry_t next_entry
;
20341 kern_return_t kr
= KERN_SUCCESS
;
20347 * We use a "zap_map" to avoid having to unlock
20348 * the "map" in vm_map_delete().
20350 zap_map
= vm_map_create(PMAP_NULL
,
20353 map
->hdr
.entries_pageable
);
20355 if (zap_map
== VM_MAP_NULL
) {
20356 return KERN_RESOURCE_SHORTAGE
;
20359 vm_map_set_page_shift(zap_map
,
20360 VM_MAP_PAGE_SHIFT(map
));
20361 vm_map_disable_hole_optimization(zap_map
);
20363 for (entry
= vm_map_first_entry(map
);
20364 entry
!= vm_map_to_entry(map
);
20365 entry
= next_entry
) {
20366 next_entry
= entry
->vme_next
;
20368 if (VME_OBJECT(entry
) &&
20369 !entry
->is_sub_map
&&
20370 (VME_OBJECT(entry
)->internal
== TRUE
) &&
20371 (VME_OBJECT(entry
)->ref_count
== 1)) {
20372 *reclaimed_resident
+= VME_OBJECT(entry
)->resident_page_count
;
20373 *reclaimed_compressed
+= vm_compressor_pager_get_count(VME_OBJECT(entry
)->pager
);
20375 (void)vm_map_delete(map
,
20378 VM_MAP_REMOVE_SAVE_ENTRIES
,
20383 vm_map_unlock(map
);
20386 * Get rid of the "zap_maps" and all the map entries that
20387 * they may still contain.
20389 if (zap_map
!= VM_MAP_NULL
) {
20390 vm_map_destroy(zap_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
20391 zap_map
= VM_MAP_NULL
;
20398 #if DEVELOPMENT || DEBUG
20401 vm_map_disconnect_page_mappings(
20403 boolean_t do_unnest
)
20405 vm_map_entry_t entry
;
20406 int page_count
= 0;
20408 if (do_unnest
== TRUE
) {
20409 #ifndef NO_NESTED_PMAP
20412 for (entry
= vm_map_first_entry(map
);
20413 entry
!= vm_map_to_entry(map
);
20414 entry
= entry
->vme_next
) {
20415 if (entry
->is_sub_map
&& entry
->use_pmap
) {
20417 * Make sure the range between the start of this entry and
20418 * the end of this entry is no longer nested, so that
20419 * we will only remove mappings from the pmap in use by this
20422 vm_map_clip_unnest(map
, entry
, entry
->vme_start
, entry
->vme_end
);
20425 vm_map_unlock(map
);
20428 vm_map_lock_read(map
);
20430 page_count
= map
->pmap
->stats
.resident_count
;
20432 for (entry
= vm_map_first_entry(map
);
20433 entry
!= vm_map_to_entry(map
);
20434 entry
= entry
->vme_next
) {
20435 if (!entry
->is_sub_map
&& ((VME_OBJECT(entry
) == 0) ||
20436 (VME_OBJECT(entry
)->phys_contiguous
))) {
20439 if (entry
->is_sub_map
) {
20440 assert(!entry
->use_pmap
);
20443 pmap_remove_options(map
->pmap
, entry
->vme_start
, entry
->vme_end
, 0);
20445 vm_map_unlock_read(map
);
20451 vm_map_inject_error(vm_map_t map
, vm_map_offset_t vaddr
)
20453 vm_object_t object
= NULL
;
20454 vm_object_offset_t offset
;
20457 vm_map_version_t version
;
20459 int result
= KERN_FAILURE
;
20461 vaddr
= vm_map_trunc_page(vaddr
, PAGE_MASK
);
20464 result
= vm_map_lookup_locked(&map
, vaddr
, VM_PROT_READ
,
20465 OBJECT_LOCK_EXCLUSIVE
, &version
, &object
, &offset
, &prot
, &wired
,
20466 NULL
, &real_map
, NULL
);
20467 if (object
== NULL
) {
20468 result
= KERN_MEMORY_ERROR
;
20469 } else if (object
->pager
) {
20470 result
= vm_compressor_pager_inject_error(object
->pager
,
20473 result
= KERN_MEMORY_PRESENT
;
20476 if (object
!= NULL
) {
20477 vm_object_unlock(object
);
20480 if (real_map
!= map
) {
20481 vm_map_unlock(real_map
);
20483 vm_map_unlock(map
);
20494 extern struct freezer_context freezer_context_global
;
20495 AbsoluteTime c_freezer_last_yield_ts
= 0;
20497 extern unsigned int memorystatus_freeze_private_shared_pages_ratio
;
20498 extern unsigned int memorystatus_freeze_shared_mb_per_process_max
;
20503 unsigned int *purgeable_count
,
20504 unsigned int *wired_count
,
20505 unsigned int *clean_count
,
20506 unsigned int *dirty_count
,
20507 unsigned int dirty_budget
,
20508 unsigned int *shared_count
,
20509 int *freezer_error_code
,
20510 boolean_t eval_only
)
20512 vm_map_entry_t entry2
= VM_MAP_ENTRY_NULL
;
20513 kern_return_t kr
= KERN_SUCCESS
;
20514 boolean_t evaluation_phase
= TRUE
;
20515 vm_object_t cur_shared_object
= NULL
;
20516 int cur_shared_obj_ref_cnt
= 0;
20517 unsigned int dirty_private_count
= 0, dirty_shared_count
= 0, obj_pages_snapshot
= 0;
20519 *purgeable_count
= *wired_count
= *clean_count
= *dirty_count
= *shared_count
= 0;
20522 * We need the exclusive lock here so that we can
20523 * block any page faults or lookups while we are
20524 * in the middle of freezing this vm map.
20526 vm_map_t map
= task
->map
;
20530 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT
);
20532 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20533 if (vm_compressor_low_on_space()) {
20534 *freezer_error_code
= FREEZER_ERROR_NO_COMPRESSOR_SPACE
;
20537 if (vm_swap_low_on_space()) {
20538 *freezer_error_code
= FREEZER_ERROR_NO_SWAP_SPACE
;
20541 kr
= KERN_NO_SPACE
;
20545 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
== FALSE
) {
20547 * In-memory compressor backing the freezer. No disk.
20548 * So no need to do the evaluation phase.
20550 evaluation_phase
= FALSE
;
20552 if (eval_only
== TRUE
) {
20554 * We don't support 'eval_only' mode
20555 * in this non-swap config.
20557 *freezer_error_code
= FREEZER_ERROR_GENERIC
;
20558 kr
= KERN_INVALID_ARGUMENT
;
20562 freezer_context_global
.freezer_ctx_uncompressed_pages
= 0;
20563 clock_get_uptime(&c_freezer_last_yield_ts
);
20567 for (entry2
= vm_map_first_entry(map
);
20568 entry2
!= vm_map_to_entry(map
);
20569 entry2
= entry2
->vme_next
) {
20570 vm_object_t src_object
= VME_OBJECT(entry2
);
20573 !entry2
->is_sub_map
&&
20574 !src_object
->phys_contiguous
) {
20575 /* If eligible, scan the entry, moving eligible pages over to our parent object */
20577 if (src_object
->internal
== TRUE
) {
20578 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
) {
20580 * We skip purgeable objects during evaluation phase only.
20581 * If we decide to freeze this process, we'll explicitly
20582 * purge these objects before we go around again with
20583 * 'evaluation_phase' set to FALSE.
20586 if ((src_object
->purgable
== VM_PURGABLE_EMPTY
) || (src_object
->purgable
== VM_PURGABLE_VOLATILE
)) {
20588 * We want to purge objects that may not belong to this task but are mapped
20589 * in this task alone. Since we already purged this task's purgeable memory
20590 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
20591 * on this task's purgeable objects. Hence the check for only volatile objects.
20593 if (evaluation_phase
== FALSE
&&
20594 (src_object
->purgable
== VM_PURGABLE_VOLATILE
) &&
20595 (src_object
->ref_count
== 1)) {
20596 vm_object_lock(src_object
);
20597 vm_object_purge(src_object
, 0);
20598 vm_object_unlock(src_object
);
20604 * Pages belonging to this object could be swapped to disk.
20605 * Make sure it's not a shared object because we could end
20606 * up just bringing it back in again.
20608 * We try to optimize somewhat by checking for objects that are mapped
20609 * more than once within our own map. But we don't do full searches,
20610 * we just look at the entries following our current entry.
20613 if (src_object
->ref_count
> 1) {
20614 if (src_object
!= cur_shared_object
) {
20615 obj_pages_snapshot
= (src_object
->resident_page_count
- src_object
->wired_page_count
) + vm_compressor_pager_get_count(src_object
->pager
);
20616 dirty_shared_count
+= obj_pages_snapshot
;
20618 cur_shared_object
= src_object
;
20619 cur_shared_obj_ref_cnt
= 1;
20622 cur_shared_obj_ref_cnt
++;
20623 if (src_object
->ref_count
== cur_shared_obj_ref_cnt
) {
20625 * Fall through to below and treat this object as private.
20626 * So deduct its pages from our shared total and add it to the
20630 dirty_shared_count
-= obj_pages_snapshot
;
20631 dirty_private_count
+= obj_pages_snapshot
;
20639 if (src_object
->ref_count
== 1) {
20640 dirty_private_count
+= (src_object
->resident_page_count
- src_object
->wired_page_count
) + vm_compressor_pager_get_count(src_object
->pager
);
20643 if (evaluation_phase
== TRUE
) {
20648 uint32_t paged_out_count
= vm_object_compressed_freezer_pageout(src_object
, dirty_budget
);
20649 *wired_count
+= src_object
->wired_page_count
;
20651 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20652 if (vm_compressor_low_on_space()) {
20653 *freezer_error_code
= FREEZER_ERROR_NO_COMPRESSOR_SPACE
;
20656 if (vm_swap_low_on_space()) {
20657 *freezer_error_code
= FREEZER_ERROR_NO_SWAP_SPACE
;
20660 kr
= KERN_NO_SPACE
;
20663 if (paged_out_count
>= dirty_budget
) {
20666 dirty_budget
-= paged_out_count
;
20671 *shared_count
= (unsigned int) ((dirty_shared_count
* PAGE_SIZE_64
) / (1024 * 1024ULL));
20672 if (evaluation_phase
) {
20673 unsigned int shared_pages_threshold
= (memorystatus_freeze_shared_mb_per_process_max
* 1024 * 1024ULL) / PAGE_SIZE_64
;
20675 if (dirty_shared_count
> shared_pages_threshold
) {
20676 *freezer_error_code
= FREEZER_ERROR_EXCESS_SHARED_MEMORY
;
20681 if (dirty_shared_count
&&
20682 ((dirty_private_count
/ dirty_shared_count
) < memorystatus_freeze_private_shared_pages_ratio
)) {
20683 *freezer_error_code
= FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO
;
20688 evaluation_phase
= FALSE
;
20689 dirty_shared_count
= dirty_private_count
= 0;
20691 freezer_context_global
.freezer_ctx_uncompressed_pages
= 0;
20692 clock_get_uptime(&c_freezer_last_yield_ts
);
20699 vm_purgeable_purge_task_owned(task
);
20707 vm_map_unlock(map
);
20709 if ((eval_only
== FALSE
) && (kr
== KERN_SUCCESS
)) {
20710 vm_object_compressed_freezer_done();
20718 * vm_map_entry_should_cow_for_true_share:
20720 * Determines if the map entry should be clipped and setup for copy-on-write
20721 * to avoid applying "true_share" to a large VM object when only a subset is
20724 * For now, we target only the map entries created for the Objective C
20725 * Garbage Collector, which initially have the following properties:
20726 * - alias == VM_MEMORY_MALLOC
20727 * - wired_count == 0
20729 * and a VM object with:
20731 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
20733 * - vo_size == ANON_CHUNK_SIZE
20735 * Only non-kernel map entries.
20738 vm_map_entry_should_cow_for_true_share(
20739 vm_map_entry_t entry
)
20741 vm_object_t object
;
20743 if (entry
->is_sub_map
) {
20744 /* entry does not point at a VM object */
20748 if (entry
->needs_copy
) {
20749 /* already set for copy_on_write: done! */
20753 if (VME_ALIAS(entry
) != VM_MEMORY_MALLOC
&&
20754 VME_ALIAS(entry
) != VM_MEMORY_MALLOC_SMALL
) {
20755 /* not a malloc heap or Obj-C Garbage Collector heap */
20759 if (entry
->wired_count
) {
20760 /* wired: can't change the map entry... */
20761 vm_counters
.should_cow_but_wired
++;
20765 object
= VME_OBJECT(entry
);
20767 if (object
== VM_OBJECT_NULL
) {
20768 /* no object yet... */
20772 if (!object
->internal
) {
20773 /* not an internal object */
20777 if (object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) {
20778 /* not the default copy strategy */
20782 if (object
->true_share
) {
20783 /* already true_share: too late to avoid it */
20787 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC
&&
20788 object
->vo_size
!= ANON_CHUNK_SIZE
) {
20789 /* ... not an object created for the ObjC Garbage Collector */
20793 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC_SMALL
&&
20794 object
->vo_size
!= 2048 * 4096) {
20795 /* ... not a "MALLOC_SMALL" heap */
20800 * All the criteria match: we have a large object being targeted for "true_share".
20801 * To limit the adverse side-effects linked with "true_share", tell the caller to
20802 * try and avoid setting up the entire object for "true_share" by clipping the
20803 * targeted range and setting it up for copy-on-write.
20809 vm_map_round_page_mask(
20810 vm_map_offset_t offset
,
20811 vm_map_offset_t mask
)
20813 return VM_MAP_ROUND_PAGE(offset
, mask
);
20817 vm_map_trunc_page_mask(
20818 vm_map_offset_t offset
,
20819 vm_map_offset_t mask
)
20821 return VM_MAP_TRUNC_PAGE(offset
, mask
);
20825 vm_map_page_aligned(
20826 vm_map_offset_t offset
,
20827 vm_map_offset_t mask
)
20829 return ((offset
) & mask
) == 0;
20836 return VM_MAP_PAGE_SHIFT(map
);
20843 return VM_MAP_PAGE_SIZE(map
);
20850 return VM_MAP_PAGE_MASK(map
);
20854 vm_map_set_page_shift(
20858 if (map
->hdr
.nentries
!= 0) {
20859 /* too late to change page size */
20860 return KERN_FAILURE
;
20863 map
->hdr
.page_shift
= pageshift
;
20865 return KERN_SUCCESS
;
20869 vm_map_query_volatile(
20871 mach_vm_size_t
*volatile_virtual_size_p
,
20872 mach_vm_size_t
*volatile_resident_size_p
,
20873 mach_vm_size_t
*volatile_compressed_size_p
,
20874 mach_vm_size_t
*volatile_pmap_size_p
,
20875 mach_vm_size_t
*volatile_compressed_pmap_size_p
)
20877 mach_vm_size_t volatile_virtual_size
;
20878 mach_vm_size_t volatile_resident_count
;
20879 mach_vm_size_t volatile_compressed_count
;
20880 mach_vm_size_t volatile_pmap_count
;
20881 mach_vm_size_t volatile_compressed_pmap_count
;
20882 mach_vm_size_t resident_count
;
20883 vm_map_entry_t entry
;
20884 vm_object_t object
;
20886 /* map should be locked by caller */
20888 volatile_virtual_size
= 0;
20889 volatile_resident_count
= 0;
20890 volatile_compressed_count
= 0;
20891 volatile_pmap_count
= 0;
20892 volatile_compressed_pmap_count
= 0;
20894 for (entry
= vm_map_first_entry(map
);
20895 entry
!= vm_map_to_entry(map
);
20896 entry
= entry
->vme_next
) {
20897 mach_vm_size_t pmap_resident_bytes
, pmap_compressed_bytes
;
20899 if (entry
->is_sub_map
) {
20902 if (!(entry
->protection
& VM_PROT_WRITE
)) {
20905 object
= VME_OBJECT(entry
);
20906 if (object
== VM_OBJECT_NULL
) {
20909 if (object
->purgable
!= VM_PURGABLE_VOLATILE
&&
20910 object
->purgable
!= VM_PURGABLE_EMPTY
) {
20913 if (VME_OFFSET(entry
)) {
20915 * If the map entry has been split and the object now
20916 * appears several times in the VM map, we don't want
20917 * to count the object's resident_page_count more than
20918 * once. We count it only for the first one, starting
20919 * at offset 0 and ignore the other VM map entries.
20923 resident_count
= object
->resident_page_count
;
20924 if ((VME_OFFSET(entry
) / PAGE_SIZE
) >= resident_count
) {
20925 resident_count
= 0;
20927 resident_count
-= (VME_OFFSET(entry
) / PAGE_SIZE
);
20930 volatile_virtual_size
+= entry
->vme_end
- entry
->vme_start
;
20931 volatile_resident_count
+= resident_count
;
20932 if (object
->pager
) {
20933 volatile_compressed_count
+=
20934 vm_compressor_pager_get_count(object
->pager
);
20936 pmap_compressed_bytes
= 0;
20937 pmap_resident_bytes
=
20938 pmap_query_resident(map
->pmap
,
20941 &pmap_compressed_bytes
);
20942 volatile_pmap_count
+= (pmap_resident_bytes
/ PAGE_SIZE
);
20943 volatile_compressed_pmap_count
+= (pmap_compressed_bytes
20947 /* map is still locked on return */
20949 *volatile_virtual_size_p
= volatile_virtual_size
;
20950 *volatile_resident_size_p
= volatile_resident_count
* PAGE_SIZE
;
20951 *volatile_compressed_size_p
= volatile_compressed_count
* PAGE_SIZE
;
20952 *volatile_pmap_size_p
= volatile_pmap_count
* PAGE_SIZE
;
20953 *volatile_compressed_pmap_size_p
= volatile_compressed_pmap_count
* PAGE_SIZE
;
20955 return KERN_SUCCESS
;
20959 vm_map_sizes(vm_map_t map
,
20960 vm_map_size_t
* psize
,
20961 vm_map_size_t
* pfree
,
20962 vm_map_size_t
* plargest_free
)
20964 vm_map_entry_t entry
;
20965 vm_map_offset_t prev
;
20966 vm_map_size_t free
, total_free
, largest_free
;
20970 *psize
= *pfree
= *plargest_free
= 0;
20973 total_free
= largest_free
= 0;
20975 vm_map_lock_read(map
);
20977 *psize
= map
->max_offset
- map
->min_offset
;
20980 prev
= map
->min_offset
;
20981 for (entry
= vm_map_first_entry(map
);; entry
= entry
->vme_next
) {
20982 end
= (entry
== vm_map_to_entry(map
));
20985 free
= entry
->vme_end
- prev
;
20987 free
= entry
->vme_start
- prev
;
20990 total_free
+= free
;
20991 if (free
> largest_free
) {
20992 largest_free
= free
;
20998 prev
= entry
->vme_end
;
21000 vm_map_unlock_read(map
);
21002 *pfree
= total_free
;
21004 if (plargest_free
) {
21005 *plargest_free
= largest_free
;
21009 #if VM_SCAN_FOR_SHADOW_CHAIN
21010 int vm_map_shadow_max(vm_map_t map
);
21015 int shadows
, shadows_max
;
21016 vm_map_entry_t entry
;
21017 vm_object_t object
, next_object
;
21025 vm_map_lock_read(map
);
21027 for (entry
= vm_map_first_entry(map
);
21028 entry
!= vm_map_to_entry(map
);
21029 entry
= entry
->vme_next
) {
21030 if (entry
->is_sub_map
) {
21033 object
= VME_OBJECT(entry
);
21034 if (object
== NULL
) {
21037 vm_object_lock_shared(object
);
21039 object
->shadow
!= NULL
;
21040 shadows
++, object
= next_object
) {
21041 next_object
= object
->shadow
;
21042 vm_object_lock_shared(next_object
);
21043 vm_object_unlock(object
);
21045 vm_object_unlock(object
);
21046 if (shadows
> shadows_max
) {
21047 shadows_max
= shadows
;
21051 vm_map_unlock_read(map
);
21053 return shadows_max
;
21055 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
21058 vm_commit_pagezero_status(vm_map_t lmap
)
21060 pmap_advise_pagezero_range(lmap
->pmap
, lmap
->min_offset
);
21063 #if XNU_TARGET_OS_OSX
21065 vm_map_set_high_start(
21067 vm_map_offset_t high_start
)
21069 map
->vmmap_high_start
= high_start
;
21071 #endif /* XNU_TARGET_OS_OSX */
21075 vm_map_entry_cs_associate(
21077 vm_map_entry_t entry
,
21078 vm_map_kernel_flags_t vmk_flags
)
21080 vm_object_t cs_object
, cs_shadow
;
21081 vm_object_offset_t cs_offset
;
21083 struct vnode
*cs_vnode
;
21084 kern_return_t cs_ret
;
21086 if (map
->pmap
== NULL
||
21087 entry
->is_sub_map
|| /* XXX FBDP: recurse on sub-range? */
21088 pmap_cs_exempt(map
->pmap
) ||
21089 VME_OBJECT(entry
) == VM_OBJECT_NULL
||
21090 !(entry
->protection
& VM_PROT_EXECUTE
)) {
21091 return KERN_SUCCESS
;
21094 vm_map_lock_assert_exclusive(map
);
21096 if (entry
->used_for_jit
) {
21097 cs_ret
= pmap_cs_associate(map
->pmap
,
21098 PMAP_CS_ASSOCIATE_JIT
,
21100 entry
->vme_end
- entry
->vme_start
,
21105 if (vmk_flags
.vmkf_remap_prot_copy
) {
21106 cs_ret
= pmap_cs_associate(map
->pmap
,
21107 PMAP_CS_ASSOCIATE_COW
,
21109 entry
->vme_end
- entry
->vme_start
,
21114 vm_object_lock_shared(VME_OBJECT(entry
));
21115 cs_offset
= VME_OFFSET(entry
);
21116 for (cs_object
= VME_OBJECT(entry
);
21117 (cs_object
!= VM_OBJECT_NULL
&&
21118 !cs_object
->code_signed
);
21119 cs_object
= cs_shadow
) {
21120 cs_shadow
= cs_object
->shadow
;
21121 if (cs_shadow
!= VM_OBJECT_NULL
) {
21122 cs_offset
+= cs_object
->vo_shadow_offset
;
21123 vm_object_lock_shared(cs_shadow
);
21125 vm_object_unlock(cs_object
);
21127 if (cs_object
== VM_OBJECT_NULL
) {
21128 return KERN_SUCCESS
;
21131 cs_offset
+= cs_object
->paging_offset
;
21132 cs_vnode
= vnode_pager_lookup_vnode(cs_object
->pager
);
21133 cs_ret
= vnode_pager_get_cs_blobs(cs_vnode
,
21135 assert(cs_ret
== KERN_SUCCESS
);
21136 cs_ret
= cs_associate_blob_with_mapping(map
->pmap
,
21142 vm_object_unlock(cs_object
);
21143 cs_object
= VM_OBJECT_NULL
;
21146 if (cs_ret
== KERN_SUCCESS
) {
21147 DTRACE_VM2(vm_map_entry_cs_associate_success
,
21148 vm_map_offset_t
, entry
->vme_start
,
21149 vm_map_offset_t
, entry
->vme_end
);
21150 if (vm_map_executable_immutable
) {
21152 * Prevent this executable
21153 * mapping from being unmapped
21156 entry
->permanent
= TRUE
;
21159 * pmap says it will validate the
21160 * code-signing validity of pages
21161 * faulted in via this mapping, so
21162 * this map entry should be marked so
21163 * that vm_fault() bypasses code-signing
21164 * validation for faults coming through
21167 entry
->pmap_cs_associated
= TRUE
;
21168 } else if (cs_ret
== KERN_NOT_SUPPORTED
) {
21170 * pmap won't check the code-signing
21171 * validity of pages faulted in via
21172 * this mapping, so VM should keep
21175 DTRACE_VM3(vm_map_entry_cs_associate_off
,
21176 vm_map_offset_t
, entry
->vme_start
,
21177 vm_map_offset_t
, entry
->vme_end
,
21181 * A real error: do not allow
21182 * execution in this mapping.
21184 DTRACE_VM3(vm_map_entry_cs_associate_failure
,
21185 vm_map_offset_t
, entry
->vme_start
,
21186 vm_map_offset_t
, entry
->vme_end
,
21188 entry
->protection
&= ~VM_PROT_EXECUTE
;
21189 entry
->max_protection
&= ~VM_PROT_EXECUTE
;
21194 #endif /* PMAP_CS */
21197 * FORKED CORPSE FOOTPRINT
21199 * A forked corpse gets a copy of the original VM map but its pmap is mostly
21200 * empty since it never ran and never got to fault in any pages.
21201 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
21202 * a forked corpse would therefore return very little information.
21204 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
21205 * to vm_map_fork() to collect footprint information from the original VM map
21206 * and its pmap, and store it in the forked corpse's VM map. That information
21207 * is stored in place of the VM map's "hole list" since we'll never need to
21208 * lookup for holes in the corpse's map.
21210 * The corpse's footprint info looks like this:
21212 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
21214 * +---------------------------------------+
21215 * header-> | cf_size |
21216 * +-------------------+-------------------+
21217 * | cf_last_region | cf_last_zeroes |
21218 * +-------------------+-------------------+
21219 * region1-> | cfr_vaddr |
21220 * +-------------------+-------------------+
21221 * | cfr_num_pages | d0 | d1 | d2 | d3 |
21222 * +---------------------------------------+
21223 * | d4 | d5 | ... |
21224 * +---------------------------------------+
21226 * +-------------------+-------------------+
21227 * | dy | dz | na | na | cfr_vaddr... | <-region2
21228 * +-------------------+-------------------+
21229 * | cfr_vaddr (ctd) | cfr_num_pages |
21230 * +---------------------------------------+
21232 * +---------------------------------------+
21234 * +---------------------------------------+
21235 * last region-> | cfr_vaddr |
21236 * +---------------------------------------+
21237 * + cfr_num_pages | d0 | d1 | d2 | d3 |
21238 * +---------------------------------------+
21240 * +---------------------------------------+
21241 * | dx | dy | dz | na | na | na | na | na |
21242 * +---------------------------------------+
21245 * cf_size: total size of the buffer (rounded to page size)
21246 * cf_last_region: offset in the buffer of the last "region" sub-header
21247 * cf_last_zeroes: number of trailing "zero" dispositions at the end
21249 * cfr_vaddr: virtual address of the start of the covered "region"
21250 * cfr_num_pages: number of pages in the covered "region"
21251 * d*: disposition of the page at that virtual address
21252 * Regions in the buffer are word-aligned.
21254 * We estimate the size of the buffer based on the number of memory regions
21255 * and the virtual size of the address space. While copying each memory region
21256 * during vm_map_fork(), we also collect the footprint info for that region
21257 * and store it in the buffer, packing it as much as possible (coalescing
21258 * contiguous memory regions to avoid having too many region headers and
21259 * avoiding long streaks of "zero" page dispositions by splitting footprint
21260 * "regions", so the number of regions in the footprint buffer might not match
21261 * the number of memory regions in the address space.
21263 * We also have to copy the original task's "nonvolatile" ledgers since that's
21264 * part of the footprint and will need to be reported to any tool asking for
21265 * the footprint information of the forked corpse.
21268 uint64_t vm_map_corpse_footprint_count
= 0;
21269 uint64_t vm_map_corpse_footprint_size_avg
= 0;
21270 uint64_t vm_map_corpse_footprint_size_max
= 0;
21271 uint64_t vm_map_corpse_footprint_full
= 0;
21272 uint64_t vm_map_corpse_footprint_no_buf
= 0;
21274 struct vm_map_corpse_footprint_header
{
21275 vm_size_t cf_size
; /* allocated buffer size */
21276 uint32_t cf_last_region
; /* offset of last region in buffer */
21278 uint32_t cfu_last_zeroes
; /* during creation:
21279 * number of "zero" dispositions at
21280 * end of last region */
21281 uint32_t cfu_hint_region
; /* during lookup:
21282 * offset of last looked up region */
21283 #define cf_last_zeroes cfu.cfu_last_zeroes
21284 #define cf_hint_region cfu.cfu_hint_region
21287 typedef uint8_t cf_disp_t
;
21288 struct vm_map_corpse_footprint_region
{
21289 vm_map_offset_t cfr_vaddr
; /* region start virtual address */
21290 uint32_t cfr_num_pages
; /* number of pages in this "region" */
21291 cf_disp_t cfr_disposition
[0]; /* disposition of each page */
21292 } __attribute__((packed
));
21295 vm_page_disposition_to_cf_disp(
21298 assert(sizeof(cf_disp_t
) == 1);
21299 /* relocate bits that don't fit in a "uint8_t" */
21300 if (disposition
& VM_PAGE_QUERY_PAGE_REUSABLE
) {
21301 disposition
|= VM_PAGE_QUERY_PAGE_FICTITIOUS
;
21303 /* cast gets rid of extra bits */
21304 return (cf_disp_t
) disposition
;
21308 vm_page_cf_disp_to_disposition(
21313 assert(sizeof(cf_disp_t
) == 1);
21314 disposition
= (int) cf_disp
;
21315 /* move relocated bits back in place */
21316 if (cf_disp
& VM_PAGE_QUERY_PAGE_FICTITIOUS
) {
21317 disposition
|= VM_PAGE_QUERY_PAGE_REUSABLE
;
21318 disposition
&= ~VM_PAGE_QUERY_PAGE_FICTITIOUS
;
21320 return disposition
;
21324 * vm_map_corpse_footprint_new_region:
21325 * closes the current footprint "region" and creates a new one
21327 * Returns NULL if there's not enough space in the buffer for a new region.
21329 static struct vm_map_corpse_footprint_region
*
21330 vm_map_corpse_footprint_new_region(
21331 struct vm_map_corpse_footprint_header
*footprint_header
)
21333 uintptr_t footprint_edge
;
21334 uint32_t new_region_offset
;
21335 struct vm_map_corpse_footprint_region
*footprint_region
;
21336 struct vm_map_corpse_footprint_region
*new_footprint_region
;
21338 footprint_edge
= ((uintptr_t)footprint_header
+
21339 footprint_header
->cf_size
);
21340 footprint_region
= ((struct vm_map_corpse_footprint_region
*)
21341 ((char *)footprint_header
+
21342 footprint_header
->cf_last_region
));
21343 assert((uintptr_t)footprint_region
+ sizeof(*footprint_region
) <=
21346 /* get rid of trailing zeroes in the last region */
21347 assert(footprint_region
->cfr_num_pages
>=
21348 footprint_header
->cf_last_zeroes
);
21349 footprint_region
->cfr_num_pages
-=
21350 footprint_header
->cf_last_zeroes
;
21351 footprint_header
->cf_last_zeroes
= 0;
21353 /* reuse this region if it's now empty */
21354 if (footprint_region
->cfr_num_pages
== 0) {
21355 return footprint_region
;
21358 /* compute offset of new region */
21359 new_region_offset
= footprint_header
->cf_last_region
;
21360 new_region_offset
+= sizeof(*footprint_region
);
21361 new_region_offset
+= (footprint_region
->cfr_num_pages
* sizeof(cf_disp_t
));
21362 new_region_offset
= roundup(new_region_offset
, sizeof(int));
21364 /* check if we're going over the edge */
21365 if (((uintptr_t)footprint_header
+
21366 new_region_offset
+
21367 sizeof(*footprint_region
)) >=
21369 /* over the edge: no new region */
21373 /* adjust offset of last region in header */
21374 footprint_header
->cf_last_region
= new_region_offset
;
21376 new_footprint_region
= (struct vm_map_corpse_footprint_region
*)
21377 ((char *)footprint_header
+
21378 footprint_header
->cf_last_region
);
21379 new_footprint_region
->cfr_vaddr
= 0;
21380 new_footprint_region
->cfr_num_pages
= 0;
21381 /* caller needs to initialize new region */
21383 return new_footprint_region
;
21387 * vm_map_corpse_footprint_collect:
21388 * collect footprint information for "old_entry" in "old_map" and
21389 * stores it in "new_map"'s vmmap_footprint_info.
21392 vm_map_corpse_footprint_collect(
21394 vm_map_entry_t old_entry
,
21397 vm_map_offset_t va
;
21399 struct vm_map_corpse_footprint_header
*footprint_header
;
21400 struct vm_map_corpse_footprint_region
*footprint_region
;
21401 struct vm_map_corpse_footprint_region
*new_footprint_region
;
21402 cf_disp_t
*next_disp_p
;
21403 uintptr_t footprint_edge
;
21404 uint32_t num_pages_tmp
;
21405 int effective_page_size
;
21407 effective_page_size
= MIN(PAGE_SIZE
, VM_MAP_PAGE_SIZE(old_map
));
21409 va
= old_entry
->vme_start
;
21411 vm_map_lock_assert_exclusive(old_map
);
21412 vm_map_lock_assert_exclusive(new_map
);
21414 assert(new_map
->has_corpse_footprint
);
21415 assert(!old_map
->has_corpse_footprint
);
21416 if (!new_map
->has_corpse_footprint
||
21417 old_map
->has_corpse_footprint
) {
21419 * This can only transfer footprint info from a
21420 * map with a live pmap to a map with a corpse footprint.
21422 return KERN_NOT_SUPPORTED
;
21425 if (new_map
->vmmap_corpse_footprint
== NULL
) {
21427 vm_size_t buf_size
;
21430 buf_size
= (sizeof(*footprint_header
) +
21431 (old_map
->hdr
.nentries
21433 (sizeof(*footprint_region
) +
21434 +3)) /* potential alignment for each region */
21436 ((old_map
->size
/ effective_page_size
)
21438 sizeof(cf_disp_t
))); /* disposition for each page */
21439 // printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
21440 buf_size
= round_page(buf_size
);
21442 /* limit buffer to 1 page to validate overflow detection */
21443 // buf_size = PAGE_SIZE;
21445 /* limit size to a somewhat sane amount */
21446 #if XNU_TARGET_OS_OSX
21447 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
21448 #else /* XNU_TARGET_OS_OSX */
21449 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
21450 #endif /* XNU_TARGET_OS_OSX */
21451 if (buf_size
> VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE
) {
21452 buf_size
= VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE
;
21456 * Allocate the pageable buffer (with a trailing guard page).
21457 * It will be zero-filled on demand.
21459 kr
= kernel_memory_allocate(kernel_map
,
21462 + PAGE_SIZE
), /* trailing guard page */
21464 KMA_PAGEABLE
| KMA_GUARD_LAST
,
21465 VM_KERN_MEMORY_DIAG
);
21466 if (kr
!= KERN_SUCCESS
) {
21467 vm_map_corpse_footprint_no_buf
++;
21471 /* initialize header and 1st region */
21472 footprint_header
= (struct vm_map_corpse_footprint_header
*)buf
;
21473 new_map
->vmmap_corpse_footprint
= footprint_header
;
21475 footprint_header
->cf_size
= buf_size
;
21476 footprint_header
->cf_last_region
=
21477 sizeof(*footprint_header
);
21478 footprint_header
->cf_last_zeroes
= 0;
21480 footprint_region
= (struct vm_map_corpse_footprint_region
*)
21481 ((char *)footprint_header
+
21482 footprint_header
->cf_last_region
);
21483 footprint_region
->cfr_vaddr
= 0;
21484 footprint_region
->cfr_num_pages
= 0;
21486 /* retrieve header and last region */
21487 footprint_header
= (struct vm_map_corpse_footprint_header
*)
21488 new_map
->vmmap_corpse_footprint
;
21489 footprint_region
= (struct vm_map_corpse_footprint_region
*)
21490 ((char *)footprint_header
+
21491 footprint_header
->cf_last_region
);
21493 footprint_edge
= ((uintptr_t)footprint_header
+
21494 footprint_header
->cf_size
);
21496 if ((footprint_region
->cfr_vaddr
+
21497 (((vm_map_offset_t
)footprint_region
->cfr_num_pages
) *
21498 effective_page_size
))
21499 != old_entry
->vme_start
) {
21500 uint64_t num_pages_delta
, num_pages_delta_size
;
21501 uint32_t region_offset_delta_size
;
21504 * Not the next contiguous virtual address:
21505 * start a new region or store "zero" dispositions for
21506 * the missing pages?
21508 /* size of gap in actual page dispositions */
21509 num_pages_delta
= ((old_entry
->vme_start
-
21510 footprint_region
->cfr_vaddr
) / effective_page_size
)
21511 - footprint_region
->cfr_num_pages
;
21512 num_pages_delta_size
= num_pages_delta
* sizeof(cf_disp_t
);
21513 /* size of gap as a new footprint region header */
21514 region_offset_delta_size
=
21515 (sizeof(*footprint_region
) +
21516 roundup(((footprint_region
->cfr_num_pages
-
21517 footprint_header
->cf_last_zeroes
) * sizeof(cf_disp_t
)),
21519 ((footprint_region
->cfr_num_pages
-
21520 footprint_header
->cf_last_zeroes
) * sizeof(cf_disp_t
)));
21521 // printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
21522 if (region_offset_delta_size
< num_pages_delta_size
||
21523 os_add3_overflow(footprint_region
->cfr_num_pages
,
21524 (uint32_t) num_pages_delta
,
21528 * Storing data for this gap would take more space
21529 * than inserting a new footprint region header:
21530 * let's start a new region and save space. If it's a
21531 * tie, let's avoid using a new region, since that
21532 * would require more region hops to find the right
21533 * range during lookups.
21535 * If the current region's cfr_num_pages would overflow
21536 * if we added "zero" page dispositions for the gap,
21537 * no choice but to start a new region.
21539 // printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
21540 new_footprint_region
=
21541 vm_map_corpse_footprint_new_region(footprint_header
);
21542 /* check that we're not going over the edge */
21543 if (new_footprint_region
== NULL
) {
21544 goto over_the_edge
;
21546 footprint_region
= new_footprint_region
;
21547 /* initialize new region as empty */
21548 footprint_region
->cfr_vaddr
= old_entry
->vme_start
;
21549 footprint_region
->cfr_num_pages
= 0;
21552 * Store "zero" page dispositions for the missing
21555 // printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
21556 for (; num_pages_delta
> 0; num_pages_delta
--) {
21557 next_disp_p
= (cf_disp_t
*)
21558 ((uintptr_t) footprint_region
+
21559 sizeof(*footprint_region
));
21560 next_disp_p
+= footprint_region
->cfr_num_pages
;
21561 /* check that we're not going over the edge */
21562 if ((uintptr_t)next_disp_p
>= footprint_edge
) {
21563 goto over_the_edge
;
21565 /* store "zero" disposition for this gap page */
21566 footprint_region
->cfr_num_pages
++;
21567 *next_disp_p
= (cf_disp_t
) 0;
21568 footprint_header
->cf_last_zeroes
++;
21573 for (va
= old_entry
->vme_start
;
21574 va
< old_entry
->vme_end
;
21575 va
+= effective_page_size
) {
21579 vm_map_footprint_query_page_info(old_map
,
21583 cf_disp
= vm_page_disposition_to_cf_disp(disposition
);
21585 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
21587 if (cf_disp
== 0 && footprint_region
->cfr_num_pages
== 0) {
21589 * Ignore "zero" dispositions at start of
21590 * region: just move start of region.
21592 footprint_region
->cfr_vaddr
+= effective_page_size
;
21596 /* would region's cfr_num_pages overflow? */
21597 if (os_add_overflow(footprint_region
->cfr_num_pages
, 1,
21599 /* overflow: create a new region */
21600 new_footprint_region
=
21601 vm_map_corpse_footprint_new_region(
21603 if (new_footprint_region
== NULL
) {
21604 goto over_the_edge
;
21606 footprint_region
= new_footprint_region
;
21607 footprint_region
->cfr_vaddr
= va
;
21608 footprint_region
->cfr_num_pages
= 0;
21611 next_disp_p
= (cf_disp_t
*) ((uintptr_t) footprint_region
+
21612 sizeof(*footprint_region
));
21613 next_disp_p
+= footprint_region
->cfr_num_pages
;
21614 /* check that we're not going over the edge */
21615 if ((uintptr_t)next_disp_p
>= footprint_edge
) {
21616 goto over_the_edge
;
21618 /* store this dispostion */
21619 *next_disp_p
= cf_disp
;
21620 footprint_region
->cfr_num_pages
++;
21622 if (cf_disp
!= 0) {
21623 /* non-zero disp: break the current zero streak */
21624 footprint_header
->cf_last_zeroes
= 0;
21629 /* zero disp: add to the current streak of zeroes */
21630 footprint_header
->cf_last_zeroes
++;
21631 if ((footprint_header
->cf_last_zeroes
+
21632 roundup(((footprint_region
->cfr_num_pages
-
21633 footprint_header
->cf_last_zeroes
) * sizeof(cf_disp_t
)) &
21636 (sizeof(*footprint_header
))) {
21638 * There are not enough trailing "zero" dispositions
21639 * (+ the extra padding we would need for the previous
21640 * region); creating a new region would not save space
21641 * at this point, so let's keep this "zero" disposition
21642 * in this region and reconsider later.
21647 * Create a new region to avoid having too many consecutive
21648 * "zero" dispositions.
21650 new_footprint_region
=
21651 vm_map_corpse_footprint_new_region(footprint_header
);
21652 if (new_footprint_region
== NULL
) {
21653 goto over_the_edge
;
21655 footprint_region
= new_footprint_region
;
21656 /* initialize the new region as empty ... */
21657 footprint_region
->cfr_num_pages
= 0;
21658 /* ... and skip this "zero" disp */
21659 footprint_region
->cfr_vaddr
= va
+ effective_page_size
;
21662 return KERN_SUCCESS
;
21665 // printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
21666 vm_map_corpse_footprint_full
++;
21667 return KERN_RESOURCE_SHORTAGE
;
21671 * vm_map_corpse_footprint_collect_done:
21672 * completes the footprint collection by getting rid of any remaining
21673 * trailing "zero" dispositions and trimming the unused part of the
21677 vm_map_corpse_footprint_collect_done(
21680 struct vm_map_corpse_footprint_header
*footprint_header
;
21681 struct vm_map_corpse_footprint_region
*footprint_region
;
21682 vm_size_t buf_size
, actual_size
;
21685 assert(new_map
->has_corpse_footprint
);
21686 if (!new_map
->has_corpse_footprint
||
21687 new_map
->vmmap_corpse_footprint
== NULL
) {
21691 footprint_header
= (struct vm_map_corpse_footprint_header
*)
21692 new_map
->vmmap_corpse_footprint
;
21693 buf_size
= footprint_header
->cf_size
;
21695 footprint_region
= (struct vm_map_corpse_footprint_region
*)
21696 ((char *)footprint_header
+
21697 footprint_header
->cf_last_region
);
21699 /* get rid of trailing zeroes in last region */
21700 assert(footprint_region
->cfr_num_pages
>= footprint_header
->cf_last_zeroes
);
21701 footprint_region
->cfr_num_pages
-= footprint_header
->cf_last_zeroes
;
21702 footprint_header
->cf_last_zeroes
= 0;
21704 actual_size
= (vm_size_t
)(footprint_header
->cf_last_region
+
21705 sizeof(*footprint_region
) +
21706 (footprint_region
->cfr_num_pages
* sizeof(cf_disp_t
)));
21708 // printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
21709 vm_map_corpse_footprint_size_avg
=
21710 (((vm_map_corpse_footprint_size_avg
*
21711 vm_map_corpse_footprint_count
) +
21713 (vm_map_corpse_footprint_count
+ 1));
21714 vm_map_corpse_footprint_count
++;
21715 if (actual_size
> vm_map_corpse_footprint_size_max
) {
21716 vm_map_corpse_footprint_size_max
= actual_size
;
21719 actual_size
= round_page(actual_size
);
21720 if (buf_size
> actual_size
) {
21721 kr
= vm_deallocate(kernel_map
,
21722 ((vm_address_t
)footprint_header
+
21724 PAGE_SIZE
), /* trailing guard page */
21725 (buf_size
- actual_size
));
21726 assertf(kr
== KERN_SUCCESS
,
21727 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21729 (uint64_t) buf_size
,
21730 (uint64_t) actual_size
,
21732 kr
= vm_protect(kernel_map
,
21733 ((vm_address_t
)footprint_header
+
21736 FALSE
, /* set_maximum */
21738 assertf(kr
== KERN_SUCCESS
,
21739 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21741 (uint64_t) buf_size
,
21742 (uint64_t) actual_size
,
21746 footprint_header
->cf_size
= actual_size
;
21750 * vm_map_corpse_footprint_query_page_info:
21751 * retrieves the disposition of the page at virtual address "vaddr"
21752 * in the forked corpse's VM map
21754 * This is the equivalent of vm_map_footprint_query_page_info() for a forked corpse.
21757 vm_map_corpse_footprint_query_page_info(
21759 vm_map_offset_t va
,
21760 int *disposition_p
)
21762 struct vm_map_corpse_footprint_header
*footprint_header
;
21763 struct vm_map_corpse_footprint_region
*footprint_region
;
21764 uint32_t footprint_region_offset
;
21765 vm_map_offset_t region_start
, region_end
;
21768 int effective_page_size
;
21771 if (!map
->has_corpse_footprint
) {
21772 *disposition_p
= 0;
21773 kr
= KERN_INVALID_ARGUMENT
;
21777 footprint_header
= map
->vmmap_corpse_footprint
;
21778 if (footprint_header
== NULL
) {
21779 *disposition_p
= 0;
21780 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21781 kr
= KERN_INVALID_ARGUMENT
;
21785 /* start looking at the hint ("cf_hint_region") */
21786 footprint_region_offset
= footprint_header
->cf_hint_region
;
21788 effective_page_size
= MIN(PAGE_SIZE
, VM_MAP_PAGE_SIZE(map
));
21791 if (footprint_region_offset
< sizeof(*footprint_header
)) {
21792 /* hint too low: start from 1st region */
21793 footprint_region_offset
= sizeof(*footprint_header
);
21795 if (footprint_region_offset
>= footprint_header
->cf_last_region
) {
21796 /* hint too high: re-start from 1st region */
21797 footprint_region_offset
= sizeof(*footprint_header
);
21799 footprint_region
= (struct vm_map_corpse_footprint_region
*)
21800 ((char *)footprint_header
+ footprint_region_offset
);
21801 region_start
= footprint_region
->cfr_vaddr
;
21802 region_end
= (region_start
+
21803 ((vm_map_offset_t
)(footprint_region
->cfr_num_pages
) *
21804 effective_page_size
));
21805 if (va
< region_start
&&
21806 footprint_region_offset
!= sizeof(*footprint_header
)) {
21807 /* our range starts before the hint region */
21809 /* reset the hint (in a racy way...) */
21810 footprint_header
->cf_hint_region
= sizeof(*footprint_header
);
21811 /* lookup "va" again from 1st region */
21812 footprint_region_offset
= sizeof(*footprint_header
);
21816 while (va
>= region_end
) {
21817 if (footprint_region_offset
>= footprint_header
->cf_last_region
) {
21820 /* skip the region's header */
21821 footprint_region_offset
+= sizeof(*footprint_region
);
21822 /* skip the region's page dispositions */
21823 footprint_region_offset
+= (footprint_region
->cfr_num_pages
* sizeof(cf_disp_t
));
21824 /* align to next word boundary */
21825 footprint_region_offset
=
21826 roundup(footprint_region_offset
,
21828 footprint_region
= (struct vm_map_corpse_footprint_region
*)
21829 ((char *)footprint_header
+ footprint_region_offset
);
21830 region_start
= footprint_region
->cfr_vaddr
;
21831 region_end
= (region_start
+
21832 ((vm_map_offset_t
)(footprint_region
->cfr_num_pages
) *
21833 effective_page_size
));
21835 if (va
< region_start
|| va
>= region_end
) {
21836 /* page not found */
21837 *disposition_p
= 0;
21838 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21843 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
21844 footprint_header
->cf_hint_region
= footprint_region_offset
;
21846 /* get page disposition for "va" in this region */
21847 disp_idx
= (int) ((va
- footprint_region
->cfr_vaddr
) / effective_page_size
);
21848 cf_disp
= footprint_region
->cfr_disposition
[disp_idx
];
21849 *disposition_p
= vm_page_cf_disp_to_disposition(cf_disp
);
21852 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21853 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
21854 DTRACE_VM4(footprint_query_page_info
,
21856 vm_map_offset_t
, va
,
21857 int, *disposition_p
,
21858 kern_return_t
, kr
);
21864 vm_map_corpse_footprint_destroy(
21867 if (map
->has_corpse_footprint
&&
21868 map
->vmmap_corpse_footprint
!= 0) {
21869 struct vm_map_corpse_footprint_header
*footprint_header
;
21870 vm_size_t buf_size
;
21873 footprint_header
= map
->vmmap_corpse_footprint
;
21874 buf_size
= footprint_header
->cf_size
;
21875 kr
= vm_deallocate(kernel_map
,
21876 (vm_offset_t
) map
->vmmap_corpse_footprint
,
21877 ((vm_size_t
) buf_size
21878 + PAGE_SIZE
)); /* trailing guard page */
21879 assertf(kr
== KERN_SUCCESS
, "kr=0x%x\n", kr
);
21880 map
->vmmap_corpse_footprint
= 0;
21881 map
->has_corpse_footprint
= FALSE
;
21886 * vm_map_copy_footprint_ledgers:
21887 * copies any ledger that's relevant to the memory footprint of "old_task"
21888 * into the forked corpse's task ("new_task")
21891 vm_map_copy_footprint_ledgers(
21895 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.phys_footprint
);
21896 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.purgeable_nonvolatile
);
21897 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.purgeable_nonvolatile_compressed
);
21898 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.internal
);
21899 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.internal_compressed
);
21900 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.iokit_mapped
);
21901 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.alternate_accounting
);
21902 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.alternate_accounting_compressed
);
21903 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.page_table
);
21904 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.tagged_footprint
);
21905 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.tagged_footprint_compressed
);
21906 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.network_nonvolatile
);
21907 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.network_nonvolatile_compressed
);
21908 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.media_footprint
);
21909 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.media_footprint_compressed
);
21910 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.graphics_footprint
);
21911 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.graphics_footprint_compressed
);
21912 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.neural_footprint
);
21913 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.neural_footprint_compressed
);
21914 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.wired_mem
);
21918 * vm_map_copy_ledger:
21919 * copy a single ledger from "old_task" to "new_task"
21922 vm_map_copy_ledger(
21927 ledger_amount_t old_balance
, new_balance
, delta
;
21929 assert(new_task
->map
->has_corpse_footprint
);
21930 if (!new_task
->map
->has_corpse_footprint
) {
21934 /* turn off sanity checks for the ledger we're about to mess with */
21935 ledger_disable_panic_on_negative(new_task
->ledger
,
21938 /* adjust "new_task" to match "old_task" */
21939 ledger_get_balance(old_task
->ledger
,
21942 ledger_get_balance(new_task
->ledger
,
21945 if (new_balance
== old_balance
) {
21946 /* new == old: done */
21947 } else if (new_balance
> old_balance
) {
21948 /* new > old ==> new -= new - old */
21949 delta
= new_balance
- old_balance
;
21950 ledger_debit(new_task
->ledger
,
21954 /* new < old ==> new += old - new */
21955 delta
= old_balance
- new_balance
;
21956 ledger_credit(new_task
->ledger
,
21964 extern int pmap_ledgers_panic
;
21965 extern int pmap_ledgers_panic_leeway
;
21967 #define LEDGER_DRIFT(__LEDGER) \
21968 int __LEDGER##_over; \
21969 ledger_amount_t __LEDGER##_over_total; \
21970 ledger_amount_t __LEDGER##_over_max; \
21971 int __LEDGER##_under; \
21972 ledger_amount_t __LEDGER##_under_total; \
21973 ledger_amount_t __LEDGER##_under_max
21976 uint64_t num_pmaps_checked
;
21978 LEDGER_DRIFT(phys_footprint
);
21979 LEDGER_DRIFT(internal
);
21980 LEDGER_DRIFT(internal_compressed
);
21981 LEDGER_DRIFT(iokit_mapped
);
21982 LEDGER_DRIFT(alternate_accounting
);
21983 LEDGER_DRIFT(alternate_accounting_compressed
);
21984 LEDGER_DRIFT(page_table
);
21985 LEDGER_DRIFT(purgeable_volatile
);
21986 LEDGER_DRIFT(purgeable_nonvolatile
);
21987 LEDGER_DRIFT(purgeable_volatile_compressed
);
21988 LEDGER_DRIFT(purgeable_nonvolatile_compressed
);
21989 LEDGER_DRIFT(tagged_nofootprint
);
21990 LEDGER_DRIFT(tagged_footprint
);
21991 LEDGER_DRIFT(tagged_nofootprint_compressed
);
21992 LEDGER_DRIFT(tagged_footprint_compressed
);
21993 LEDGER_DRIFT(network_volatile
);
21994 LEDGER_DRIFT(network_nonvolatile
);
21995 LEDGER_DRIFT(network_volatile_compressed
);
21996 LEDGER_DRIFT(network_nonvolatile_compressed
);
21997 LEDGER_DRIFT(media_nofootprint
);
21998 LEDGER_DRIFT(media_footprint
);
21999 LEDGER_DRIFT(media_nofootprint_compressed
);
22000 LEDGER_DRIFT(media_footprint_compressed
);
22001 LEDGER_DRIFT(graphics_nofootprint
);
22002 LEDGER_DRIFT(graphics_footprint
);
22003 LEDGER_DRIFT(graphics_nofootprint_compressed
);
22004 LEDGER_DRIFT(graphics_footprint_compressed
);
22005 LEDGER_DRIFT(neural_nofootprint
);
22006 LEDGER_DRIFT(neural_footprint
);
22007 LEDGER_DRIFT(neural_nofootprint_compressed
);
22008 LEDGER_DRIFT(neural_footprint_compressed
);
22009 } pmap_ledgers_drift
;
22012 vm_map_pmap_check_ledgers(
22018 ledger_amount_t bal
;
22019 boolean_t do_panic
;
22023 pmap_ledgers_drift
.num_pmaps_checked
++;
22025 #define LEDGER_CHECK_BALANCE(__LEDGER) \
22027 int panic_on_negative = TRUE; \
22028 ledger_get_balance(ledger, \
22029 task_ledgers.__LEDGER, \
22031 ledger_get_panic_on_negative(ledger, \
22032 task_ledgers.__LEDGER, \
22033 &panic_on_negative); \
22035 if (panic_on_negative || \
22036 (pmap_ledgers_panic && \
22037 pmap_ledgers_panic_leeway > 0 && \
22038 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
22039 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
22042 printf("LEDGER BALANCE proc %d (%s) " \
22043 "\"%s\" = %lld\n", \
22044 pid, procname, #__LEDGER, bal); \
22046 pmap_ledgers_drift.__LEDGER##_over++; \
22047 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
22048 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
22049 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
22051 } else if (bal < 0) { \
22052 pmap_ledgers_drift.__LEDGER##_under++; \
22053 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
22054 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
22055 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
22061 LEDGER_CHECK_BALANCE(phys_footprint
);
22062 LEDGER_CHECK_BALANCE(internal
);
22063 LEDGER_CHECK_BALANCE(internal_compressed
);
22064 LEDGER_CHECK_BALANCE(iokit_mapped
);
22065 LEDGER_CHECK_BALANCE(alternate_accounting
);
22066 LEDGER_CHECK_BALANCE(alternate_accounting_compressed
);
22067 LEDGER_CHECK_BALANCE(page_table
);
22068 LEDGER_CHECK_BALANCE(purgeable_volatile
);
22069 LEDGER_CHECK_BALANCE(purgeable_nonvolatile
);
22070 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed
);
22071 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed
);
22072 LEDGER_CHECK_BALANCE(tagged_nofootprint
);
22073 LEDGER_CHECK_BALANCE(tagged_footprint
);
22074 LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed
);
22075 LEDGER_CHECK_BALANCE(tagged_footprint_compressed
);
22076 LEDGER_CHECK_BALANCE(network_volatile
);
22077 LEDGER_CHECK_BALANCE(network_nonvolatile
);
22078 LEDGER_CHECK_BALANCE(network_volatile_compressed
);
22079 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed
);
22080 LEDGER_CHECK_BALANCE(media_nofootprint
);
22081 LEDGER_CHECK_BALANCE(media_footprint
);
22082 LEDGER_CHECK_BALANCE(media_nofootprint_compressed
);
22083 LEDGER_CHECK_BALANCE(media_footprint_compressed
);
22084 LEDGER_CHECK_BALANCE(graphics_nofootprint
);
22085 LEDGER_CHECK_BALANCE(graphics_footprint
);
22086 LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed
);
22087 LEDGER_CHECK_BALANCE(graphics_footprint_compressed
);
22088 LEDGER_CHECK_BALANCE(neural_nofootprint
);
22089 LEDGER_CHECK_BALANCE(neural_footprint
);
22090 LEDGER_CHECK_BALANCE(neural_nofootprint_compressed
);
22091 LEDGER_CHECK_BALANCE(neural_footprint_compressed
);
22094 if (pmap_ledgers_panic
) {
22095 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
22096 pmap
, pid
, procname
);
22098 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
22099 pmap
, pid
, procname
);
22103 #endif /* MACH_ASSERT */