2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
63 * Virtual memory mapping module.
66 #include <task_swapper.h>
67 #include <mach_assert.h>
69 #include <vm/vm_options.h>
71 #include <libkern/OSAtomic.h>
73 #include <mach/kern_return.h>
74 #include <mach/port.h>
75 #include <mach/vm_attributes.h>
76 #include <mach/vm_param.h>
77 #include <mach/vm_behavior.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/memory_object.h>
80 #include <mach/mach_vm.h>
81 #include <machine/cpu_capabilities.h>
84 #include <kern/assert.h>
85 #include <kern/backtrace.h>
86 #include <kern/counters.h>
87 #include <kern/exc_guard.h>
88 #include <kern/kalloc.h>
89 #include <kern/zalloc_internal.h>
92 #include <vm/vm_compressor.h>
93 #include <vm/vm_compressor_pager.h>
94 #include <vm/vm_init.h>
95 #include <vm/vm_fault.h>
96 #include <vm/vm_map.h>
97 #include <vm/vm_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
101 #include <vm/vm_kern.h>
102 #include <ipc/ipc_port.h>
103 #include <kern/sched_prim.h>
104 #include <kern/misc_protos.h>
106 #include <mach/vm_map_server.h>
107 #include <mach/mach_host_server.h>
108 #include <vm/vm_protos.h>
109 #include <vm/vm_purgeable_internal.h>
111 #include <vm/vm_protos.h>
112 #include <vm/vm_shared_region.h>
113 #include <vm/vm_map_store.h>
115 #include <san/kasan.h>
117 #include <sys/codesign.h>
118 #include <sys/mman.h>
120 #include <libkern/section_keywords.h>
121 #if DEVELOPMENT || DEBUG
122 extern int proc_selfcsflags(void);
123 int panic_on_unsigned_execute
= 0;
124 #endif /* DEVELOPMENT || DEBUG */
127 int debug4k_filter
= 0;
128 char debug4k_proc_name
[1024] = "";
129 int debug4k_proc_filter
= (int)-1 & ~(1 << __DEBUG4K_FAULT
);
130 int debug4k_panic_on_misaligned_sharing
= 0;
131 const char *debug4k_category_name
[] = {
146 #endif /* MACH_ASSERT */
147 int debug4k_no_cow_copyin
= 0;
151 extern const int fourk_binary_compatibility_unsafe
;
152 extern const int fourk_binary_compatibility_allow_wx
;
153 #endif /* __arm64__ */
154 extern int proc_selfpid(void);
155 extern char *proc_name_address(void *p
);
157 #if VM_MAP_DEBUG_APPLE_PROTECT
158 int vm_map_debug_apple_protect
= 0;
159 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
160 #if VM_MAP_DEBUG_FOURK
161 int vm_map_debug_fourk
= 0;
162 #endif /* VM_MAP_DEBUG_FOURK */
164 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable
= 1;
165 int vm_map_executable_immutable_verbose
= 0;
167 os_refgrp_decl(static, map_refgrp
, "vm_map", NULL
);
169 extern u_int32_t
random(void); /* from <libkern/libkern.h> */
170 /* Internal prototypes
173 static void vm_map_simplify_range(
175 vm_map_offset_t start
,
176 vm_map_offset_t end
); /* forward */
178 static boolean_t
vm_map_range_check(
180 vm_map_offset_t start
,
182 vm_map_entry_t
*entry
);
184 static vm_map_entry_t
_vm_map_entry_create(
185 struct vm_map_header
*map_header
, boolean_t map_locked
);
187 static void _vm_map_entry_dispose(
188 struct vm_map_header
*map_header
,
189 vm_map_entry_t entry
);
191 static void vm_map_pmap_enter(
193 vm_map_offset_t addr
,
194 vm_map_offset_t end_addr
,
196 vm_object_offset_t offset
,
197 vm_prot_t protection
);
199 static void _vm_map_clip_end(
200 struct vm_map_header
*map_header
,
201 vm_map_entry_t entry
,
202 vm_map_offset_t end
);
204 static void _vm_map_clip_start(
205 struct vm_map_header
*map_header
,
206 vm_map_entry_t entry
,
207 vm_map_offset_t start
);
209 static void vm_map_entry_delete(
211 vm_map_entry_t entry
);
213 static kern_return_t
vm_map_delete(
215 vm_map_offset_t start
,
220 static void vm_map_copy_insert(
222 vm_map_entry_t after_where
,
225 static kern_return_t
vm_map_copy_overwrite_unaligned(
227 vm_map_entry_t entry
,
229 vm_map_address_t start
,
230 boolean_t discard_on_success
);
232 static kern_return_t
vm_map_copy_overwrite_aligned(
234 vm_map_entry_t tmp_entry
,
236 vm_map_offset_t start
,
239 static kern_return_t
vm_map_copyin_kernel_buffer(
241 vm_map_address_t src_addr
,
243 boolean_t src_destroy
,
244 vm_map_copy_t
*copy_result
); /* OUT */
246 static kern_return_t
vm_map_copyout_kernel_buffer(
248 vm_map_address_t
*addr
, /* IN/OUT */
250 vm_map_size_t copy_size
,
252 boolean_t consume_on_success
);
254 static void vm_map_fork_share(
256 vm_map_entry_t old_entry
,
259 static boolean_t
vm_map_fork_copy(
261 vm_map_entry_t
*old_entry_p
,
263 int vm_map_copyin_flags
);
265 static kern_return_t
vm_map_wire_nested(
267 vm_map_offset_t start
,
269 vm_prot_t caller_prot
,
273 vm_map_offset_t pmap_addr
,
274 ppnum_t
*physpage_p
);
276 static kern_return_t
vm_map_unwire_nested(
278 vm_map_offset_t start
,
282 vm_map_offset_t pmap_addr
);
284 static kern_return_t
vm_map_overwrite_submap_recurse(
286 vm_map_offset_t dst_addr
,
287 vm_map_size_t dst_size
);
289 static kern_return_t
vm_map_copy_overwrite_nested(
291 vm_map_offset_t dst_addr
,
293 boolean_t interruptible
,
295 boolean_t discard_on_success
);
297 static kern_return_t
vm_map_remap_extract(
299 vm_map_offset_t addr
,
301 vm_prot_t required_protection
,
303 struct vm_map_header
*map_header
,
304 vm_prot_t
*cur_protection
,
305 vm_prot_t
*max_protection
,
306 vm_inherit_t inheritance
,
307 vm_map_kernel_flags_t vmk_flags
);
309 static kern_return_t
vm_map_remap_range_allocate(
311 vm_map_address_t
*address
,
313 vm_map_offset_t mask
,
315 vm_map_kernel_flags_t vmk_flags
,
317 vm_map_entry_t
*map_entry
);
319 static void vm_map_region_look_for_page(
323 vm_object_offset_t offset
,
325 unsigned short depth
,
326 vm_region_extended_info_t extended
,
327 mach_msg_type_number_t count
);
329 static int vm_map_region_count_obj_refs(
330 vm_map_entry_t entry
,
334 static kern_return_t
vm_map_willneed(
336 vm_map_offset_t start
,
337 vm_map_offset_t end
);
339 static kern_return_t
vm_map_reuse_pages(
341 vm_map_offset_t start
,
342 vm_map_offset_t end
);
344 static kern_return_t
vm_map_reusable_pages(
346 vm_map_offset_t start
,
347 vm_map_offset_t end
);
349 static kern_return_t
vm_map_can_reuse(
351 vm_map_offset_t start
,
352 vm_map_offset_t end
);
355 static kern_return_t
vm_map_pageout(
357 vm_map_offset_t start
,
358 vm_map_offset_t end
);
359 #endif /* MACH_ASSERT */
361 kern_return_t
vm_map_corpse_footprint_collect(
363 vm_map_entry_t old_entry
,
365 void vm_map_corpse_footprint_collect_done(
367 void vm_map_corpse_footprint_destroy(
369 kern_return_t
vm_map_corpse_footprint_query_page_info(
373 void vm_map_footprint_query_page_info(
375 vm_map_entry_t map_entry
,
376 vm_map_offset_t curr_s_offset
,
379 static const struct vm_map_entry vm_map_entry_template
= {
380 .behavior
= VM_BEHAVIOR_DEFAULT
,
381 .inheritance
= VM_INHERIT_DEFAULT
,
384 pid_t
find_largest_process_vm_map_entries(void);
387 * Macros to copy a vm_map_entry. We must be careful to correctly
388 * manage the wired page count. vm_map_entry_copy() creates a new
389 * map entry to the same memory - the wired count in the new entry
390 * must be set to zero. vm_map_entry_copy_full() creates a new
391 * entry that is identical to the old entry. This preserves the
392 * wire count; it's used for map splitting and zone changing in
397 vm_map_entry_copy_pmap_cs_assoc(
398 vm_map_t map __unused
,
399 vm_map_entry_t
new __unused
,
400 vm_map_entry_t old __unused
)
402 /* when pmap_cs is not enabled, assert as a sanity check */
403 assert(new->pmap_cs_associated
== FALSE
);
407 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
408 * But for security reasons on some platforms, we don't want the
409 * new mapping to be "used for jit", so we reset the flag here.
412 vm_map_entry_copy_code_signing(
415 vm_map_entry_t old __unused
)
417 if (VM_MAP_POLICY_ALLOW_JIT_COPY(map
)) {
418 assert(new->used_for_jit
== old
->used_for_jit
);
420 new->used_for_jit
= FALSE
;
430 boolean_t _vmec_reserved
= new->from_reserved_zone
;
432 new->is_shared
= FALSE
;
433 new->needs_wakeup
= FALSE
;
434 new->in_transition
= FALSE
;
435 new->wired_count
= 0;
436 new->user_wired_count
= 0;
437 new->permanent
= FALSE
;
438 vm_map_entry_copy_code_signing(map
, new, old
);
439 vm_map_entry_copy_pmap_cs_assoc(map
, new, old
);
440 new->from_reserved_zone
= _vmec_reserved
;
441 if (new->iokit_acct
) {
442 assertf(!new->use_pmap
, "old %p new %p\n", old
, new);
443 new->iokit_acct
= FALSE
;
444 new->use_pmap
= TRUE
;
446 new->vme_resilient_codesign
= FALSE
;
447 new->vme_resilient_media
= FALSE
;
448 new->vme_atomic
= FALSE
;
449 new->vme_no_copy_on_read
= FALSE
;
453 vm_map_entry_copy_full(
457 boolean_t _vmecf_reserved
= new->from_reserved_zone
;
459 new->from_reserved_zone
= _vmecf_reserved
;
463 * Normal lock_read_to_write() returns FALSE/0 on failure.
464 * These functions evaluate to zero on success and non-zero value on failure.
466 __attribute__((always_inline
))
468 vm_map_lock_read_to_write(vm_map_t map
)
470 if (lck_rw_lock_shared_to_exclusive(&(map
)->lock
)) {
471 DTRACE_VM(vm_map_lock_upgrade
);
477 __attribute__((always_inline
))
479 vm_map_try_lock(vm_map_t map
)
481 if (lck_rw_try_lock_exclusive(&(map
)->lock
)) {
482 DTRACE_VM(vm_map_lock_w
);
488 __attribute__((always_inline
))
490 vm_map_try_lock_read(vm_map_t map
)
492 if (lck_rw_try_lock_shared(&(map
)->lock
)) {
493 DTRACE_VM(vm_map_lock_r
);
500 * Routines to get the page size the caller should
501 * use while inspecting the target address space.
502 * Use the "_safely" variant if the caller is dealing with a user-provided
503 * array whose size depends on the page size, to avoid any overflow or
504 * underflow of a user-allocated buffer.
507 vm_self_region_page_shift_safely(
510 int effective_page_shift
= 0;
512 if (PAGE_SIZE
== (4096)) {
513 /* x86_64 and 4k watches: always use 4k */
516 /* did caller provide an explicit page size for this thread to use? */
517 effective_page_shift
= thread_self_region_page_shift();
518 if (effective_page_shift
) {
519 /* use the explicitly-provided page size */
520 return effective_page_shift
;
522 /* no explicit page size: use the caller's page size... */
523 effective_page_shift
= VM_MAP_PAGE_SHIFT(current_map());
524 if (effective_page_shift
== VM_MAP_PAGE_SHIFT(target_map
)) {
525 /* page size match: safe to use */
526 return effective_page_shift
;
528 /* page size mismatch */
532 vm_self_region_page_shift(
535 int effective_page_shift
;
537 effective_page_shift
= vm_self_region_page_shift_safely(target_map
);
538 if (effective_page_shift
== -1) {
539 /* no safe value but OK to guess for caller */
540 effective_page_shift
= MIN(VM_MAP_PAGE_SHIFT(current_map()),
541 VM_MAP_PAGE_SHIFT(target_map
));
543 return effective_page_shift
;
548 * Decide if we want to allow processes to execute from their data or stack areas.
549 * override_nx() returns true if we do. Data/stack execution can be enabled independently
550 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
551 * or allow_stack_exec to enable data execution for that type of data area for that particular
552 * ABI (or both by or'ing the flags together). These are initialized in the architecture
553 * specific pmap files since the default behavior varies according to architecture. The
554 * main reason it varies is because of the need to provide binary compatibility with old
555 * applications that were written before these restrictions came into being. In the old
556 * days, an app could execute anything it could read, but this has slowly been tightened
557 * up over time. The default behavior is:
559 * 32-bit PPC apps may execute from both stack and data areas
560 * 32-bit Intel apps may exeucte from data areas but not stack
561 * 64-bit PPC/Intel apps may not execute from either data or stack
563 * An application on any architecture may override these defaults by explicitly
564 * adding PROT_EXEC permission to the page in question with the mprotect(2)
565 * system call. This code here just determines what happens when an app tries to
566 * execute from a page that lacks execute permission.
568 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
569 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
570 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
571 * execution from data areas for a particular binary even if the arch normally permits it. As
572 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
573 * to support some complicated use cases, notably browsers with out-of-process plugins that
574 * are not all NX-safe.
577 extern int allow_data_exec
, allow_stack_exec
;
580 override_nx(vm_map_t map
, uint32_t user_tag
) /* map unused on arm */
584 if (map
->pmap
== kernel_pmap
) {
589 * Determine if the app is running in 32 or 64 bit mode.
592 if (vm_map_is_64bit(map
)) {
593 current_abi
= VM_ABI_64
;
595 current_abi
= VM_ABI_32
;
599 * Determine if we should allow the execution based on whether it's a
600 * stack or data area and the current architecture.
603 if (user_tag
== VM_MEMORY_STACK
) {
604 return allow_stack_exec
& current_abi
;
607 return (allow_data_exec
& current_abi
) && (map
->map_disallow_data_exec
== FALSE
);
612 * Virtual memory maps provide for the mapping, protection,
613 * and sharing of virtual memory objects. In addition,
614 * this module provides for an efficient virtual copy of
615 * memory from one map to another.
617 * Synchronization is required prior to most operations.
619 * Maps consist of an ordered doubly-linked list of simple
620 * entries; a single hint is used to speed up lookups.
622 * Sharing maps have been deleted from this version of Mach.
623 * All shared objects are now mapped directly into the respective
624 * maps. This requires a change in the copy on write strategy;
625 * the asymmetric (delayed) strategy is used for shared temporary
626 * objects instead of the symmetric (shadow) strategy. All maps
627 * are now "top level" maps (either task map, kernel map or submap
628 * of the kernel map).
630 * Since portions of maps are specified by start/end addreses,
631 * which may not align with existing map entries, all
632 * routines merely "clip" entries to these start/end values.
633 * [That is, an entry is split into two, bordering at a
634 * start or end value.] Note that these clippings may not
635 * always be necessary (as the two resulting entries are then
636 * not changed); however, the clipping is done for convenience.
637 * No attempt is currently made to "glue back together" two
640 * The symmetric (shadow) copy strategy implements virtual copy
641 * by copying VM object references from one map to
642 * another, and then marking both regions as copy-on-write.
643 * It is important to note that only one writeable reference
644 * to a VM object region exists in any map when this strategy
645 * is used -- this means that shadow object creation can be
646 * delayed until a write operation occurs. The symmetric (delayed)
647 * strategy allows multiple maps to have writeable references to
648 * the same region of a vm object, and hence cannot delay creating
649 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
650 * Copying of permanent objects is completely different; see
651 * vm_object_copy_strategically() in vm_object.c.
654 static SECURITY_READ_ONLY_LATE(zone_t
) vm_map_zone
; /* zone for vm_map structures */
655 static SECURITY_READ_ONLY_LATE(zone_t
) vm_map_entry_reserved_zone
; /* zone with reserve for non-blocking allocations */
656 static SECURITY_READ_ONLY_LATE(zone_t
) vm_map_copy_zone
; /* zone for vm_map_copy structures */
658 SECURITY_READ_ONLY_LATE(zone_t
) vm_map_entry_zone
; /* zone for vm_map_entry structures */
659 SECURITY_READ_ONLY_LATE(zone_t
) vm_map_holes_zone
; /* zone for vm map holes (vm_map_links) structures */
661 #define VM_MAP_ZONE_NAME "maps"
662 #define VM_MAP_ZFLAGS ( \
668 #define VME_RESERVED_ZONE_NAME "Reserved VM map entries"
669 #define VM_MAP_RESERVED_ZFLAGS ( \
674 ZC_KASAN_NOQUARANTINE | \
677 #define VM_MAP_HOLES_ZONE_NAME "VM map holes"
678 #define VM_MAP_HOLES_ZFLAGS ( \
685 * Asserts that a vm_map_copy object is coming from the
686 * vm_map_copy_zone to ensure that it isn't a fake constructed
690 vm_map_copy_require(struct vm_map_copy
*copy
)
692 zone_id_require(ZONE_ID_VM_MAP_COPY
, sizeof(struct vm_map_copy
), copy
);
696 * Placeholder object for submap operations. This object is dropped
697 * into the range by a call to vm_map_find, and removed when
698 * vm_map_submap creates the submap.
701 vm_object_t vm_submap_object
;
703 static __startup_data vm_offset_t map_data
;
704 static __startup_data vm_size_t map_data_size
;
705 static __startup_data vm_offset_t kentry_data
;
706 static __startup_data vm_size_t kentry_data_size
;
707 static __startup_data vm_offset_t map_holes_data
;
708 static __startup_data vm_size_t map_holes_data_size
;
710 #if XNU_TARGET_OS_OSX
711 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
712 #else /* XNU_TARGET_OS_OSX */
713 #define NO_COALESCE_LIMIT 0
714 #endif /* XNU_TARGET_OS_OSX */
716 /* Skip acquiring locks if we're in the midst of a kernel core dump */
717 unsigned int not_in_kdp
= 1;
719 unsigned int vm_map_set_cache_attr_count
= 0;
722 vm_map_set_cache_attr(
726 vm_map_entry_t map_entry
;
728 kern_return_t kr
= KERN_SUCCESS
;
730 vm_map_lock_read(map
);
732 if (!vm_map_lookup_entry(map
, va
, &map_entry
) ||
733 map_entry
->is_sub_map
) {
735 * that memory is not properly mapped
737 kr
= KERN_INVALID_ARGUMENT
;
740 object
= VME_OBJECT(map_entry
);
742 if (object
== VM_OBJECT_NULL
) {
744 * there should be a VM object here at this point
746 kr
= KERN_INVALID_ARGUMENT
;
749 vm_object_lock(object
);
750 object
->set_cache_attr
= TRUE
;
751 vm_object_unlock(object
);
753 vm_map_set_cache_attr_count
++;
755 vm_map_unlock_read(map
);
761 #if CONFIG_CODE_DECRYPTION
763 * vm_map_apple_protected:
764 * This remaps the requested part of the object with an object backed by
765 * the decrypting pager.
766 * crypt_info contains entry points and session data for the crypt module.
767 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
768 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
771 vm_map_apple_protected(
773 vm_map_offset_t start
,
775 vm_object_offset_t crypto_backing_offset
,
776 struct pager_crypt_info
*crypt_info
,
779 boolean_t map_locked
;
781 vm_map_entry_t map_entry
;
782 struct vm_map_entry tmp_entry
;
783 memory_object_t unprotected_mem_obj
;
784 vm_object_t protected_object
;
785 vm_map_offset_t map_addr
;
786 vm_map_offset_t start_aligned
, end_aligned
;
787 vm_object_offset_t crypto_start
, crypto_end
;
789 vm_map_kernel_flags_t vmk_flags
;
792 vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
795 unprotected_mem_obj
= MEMORY_OBJECT_NULL
;
797 start_aligned
= vm_map_trunc_page(start
, PAGE_MASK_64
);
798 end_aligned
= vm_map_round_page(end
, PAGE_MASK_64
);
799 start_aligned
= vm_map_trunc_page(start_aligned
, VM_MAP_PAGE_MASK(map
));
800 end_aligned
= vm_map_round_page(end_aligned
, VM_MAP_PAGE_MASK(map
));
804 * "start" and "end" might be 4K-aligned but not 16K-aligned,
805 * so we might have to loop and establish up to 3 mappings:
807 * + the first 16K-page, which might overlap with the previous
808 * 4K-aligned mapping,
810 * + the last 16K-page, which might overlap with the next
811 * 4K-aligned mapping.
812 * Each of these mapping might be backed by a vnode pager (if
813 * properly page-aligned) or a "fourk_pager", itself backed by a
814 * vnode pager (if 4K-aligned but not page-aligned).
816 #endif /* __arm64__ */
818 map_addr
= start_aligned
;
819 for (map_addr
= start_aligned
;
821 map_addr
= tmp_entry
.vme_end
) {
825 /* lookup the protected VM object */
826 if (!vm_map_lookup_entry(map
,
829 map_entry
->is_sub_map
||
830 VME_OBJECT(map_entry
) == VM_OBJECT_NULL
) {
831 /* that memory is not properly mapped */
832 kr
= KERN_INVALID_ARGUMENT
;
836 /* ensure mapped memory is mapped as executable except
837 * except for model decryption flow */
838 if ((cryptid
!= CRYPTID_MODEL_ENCRYPTION
) &&
839 !(map_entry
->protection
& VM_PROT_EXECUTE
)) {
840 kr
= KERN_INVALID_ARGUMENT
;
844 /* get the protected object to be decrypted */
845 protected_object
= VME_OBJECT(map_entry
);
846 if (protected_object
== VM_OBJECT_NULL
) {
847 /* there should be a VM object here at this point */
848 kr
= KERN_INVALID_ARGUMENT
;
851 /* ensure protected object stays alive while map is unlocked */
852 vm_object_reference(protected_object
);
854 /* limit the map entry to the area we want to cover */
855 vm_map_clip_start(map
, map_entry
, start_aligned
);
856 vm_map_clip_end(map
, map_entry
, end_aligned
);
858 tmp_entry
= *map_entry
;
859 map_entry
= VM_MAP_ENTRY_NULL
; /* not valid after unlocking map */
864 * This map entry might be only partially encrypted
865 * (if not fully "page-aligned").
868 crypto_end
= tmp_entry
.vme_end
- tmp_entry
.vme_start
;
869 if (tmp_entry
.vme_start
< start
) {
870 if (tmp_entry
.vme_start
!= start_aligned
) {
871 kr
= KERN_INVALID_ADDRESS
;
873 crypto_start
+= (start
- tmp_entry
.vme_start
);
875 if (tmp_entry
.vme_end
> end
) {
876 if (tmp_entry
.vme_end
!= end_aligned
) {
877 kr
= KERN_INVALID_ADDRESS
;
879 crypto_end
-= (tmp_entry
.vme_end
- end
);
883 * This "extra backing offset" is needed to get the decryption
884 * routine to use the right key. It adjusts for the possibly
885 * relative offset of an interposed "4K" pager...
887 if (crypto_backing_offset
== (vm_object_offset_t
) -1) {
888 crypto_backing_offset
= VME_OFFSET(&tmp_entry
);
892 * Lookup (and create if necessary) the protected memory object
893 * matching that VM object.
894 * If successful, this also grabs a reference on the memory object,
895 * to guarantee that it doesn't go away before we get a chance to map
898 unprotected_mem_obj
= apple_protect_pager_setup(
900 VME_OFFSET(&tmp_entry
),
901 crypto_backing_offset
,
906 /* release extra ref on protected object */
907 vm_object_deallocate(protected_object
);
909 if (unprotected_mem_obj
== NULL
) {
914 vm_flags
= VM_FLAGS_FIXED
| VM_FLAGS_OVERWRITE
;
915 /* can overwrite an immutable mapping */
916 vmk_flags
.vmkf_overwrite_immutable
= TRUE
;
918 if (tmp_entry
.used_for_jit
&&
919 (VM_MAP_PAGE_SHIFT(map
) != FOURK_PAGE_SHIFT
||
920 PAGE_SHIFT
!= FOURK_PAGE_SHIFT
) &&
921 fourk_binary_compatibility_unsafe
&&
922 fourk_binary_compatibility_allow_wx
) {
923 printf("** FOURK_COMPAT [%d]: "
924 "allowing write+execute at 0x%llx\n",
925 proc_selfpid(), tmp_entry
.vme_start
);
926 vmk_flags
.vmkf_map_jit
= TRUE
;
928 #endif /* __arm64__ */
930 /* map this memory object in place of the current one */
931 map_addr
= tmp_entry
.vme_start
;
932 kr
= vm_map_enter_mem_object(map
,
935 tmp_entry
.vme_start
),
936 (mach_vm_offset_t
) 0,
940 (ipc_port_t
)(uintptr_t) unprotected_mem_obj
,
943 tmp_entry
.protection
,
944 tmp_entry
.max_protection
,
945 tmp_entry
.inheritance
);
946 assertf(kr
== KERN_SUCCESS
,
948 assertf(map_addr
== tmp_entry
.vme_start
,
949 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
951 (uint64_t) tmp_entry
.vme_start
,
954 #if VM_MAP_DEBUG_APPLE_PROTECT
955 if (vm_map_debug_apple_protect
) {
956 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
957 " backing:[object:%p,offset:0x%llx,"
958 "crypto_backing_offset:0x%llx,"
959 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
962 (uint64_t) (map_addr
+ (tmp_entry
.vme_end
-
963 tmp_entry
.vme_start
)),
966 VME_OFFSET(&tmp_entry
),
967 crypto_backing_offset
,
971 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
974 * Release the reference obtained by
975 * apple_protect_pager_setup().
976 * The mapping (if it succeeded) is now holding a reference on
979 memory_object_deallocate(unprotected_mem_obj
);
980 unprotected_mem_obj
= MEMORY_OBJECT_NULL
;
982 /* continue with next map entry */
983 crypto_backing_offset
+= (tmp_entry
.vme_end
-
984 tmp_entry
.vme_start
);
985 crypto_backing_offset
-= crypto_start
;
995 #endif /* CONFIG_CODE_DECRYPTION */
998 LCK_GRP_DECLARE(vm_map_lck_grp
, "vm_map");
999 LCK_ATTR_DECLARE(vm_map_lck_attr
, 0, 0);
1000 LCK_ATTR_DECLARE(vm_map_lck_rw_attr
, 0, LCK_ATTR_DEBUG
);
1002 #if XNU_TARGET_OS_OSX
1003 int malloc_no_cow
= 0;
1004 #else /* XNU_TARGET_OS_OSX */
1005 int malloc_no_cow
= 1;
1006 #endif /* XNU_TARGET_OS_OSX */
1007 uint64_t vm_memory_malloc_no_cow_mask
= 0ULL;
1009 int vm_check_map_sanity
= 0;
1015 * Initialize the vm_map module. Must be called before
1016 * any other vm_map routines.
1018 * Map and entry structures are allocated from zones -- we must
1019 * initialize those zones.
1021 * There are three zones of interest:
1023 * vm_map_zone: used to allocate maps.
1024 * vm_map_entry_zone: used to allocate map entries.
1025 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
1027 * The kernel allocates map entries from a special zone that is initially
1028 * "crammed" with memory. It would be difficult (perhaps impossible) for
1029 * the kernel to allocate more memory to a entry zone when it became
1030 * empty since the very act of allocating memory implies the creation
1037 const char *mez_name
= "VM map entries";
1041 PE_parse_boot_argn("debug4k_filter", &debug4k_filter
,
1042 sizeof(debug4k_filter
));
1043 #endif /* MACH_ASSERT */
1045 vm_map_zone
= zone_create(VM_MAP_ZONE_NAME
, sizeof(struct _vm_map
),
1048 vm_map_entry_zone
= zone_create(mez_name
, sizeof(struct vm_map_entry
),
1049 ZC_NOENCRYPT
| ZC_NOGZALLOC
| ZC_NOCALLOUT
);
1052 * Don't quarantine because we always need elements available
1053 * Disallow GC on this zone... to aid the GC.
1055 vm_map_entry_reserved_zone
= zone_create_ext(VME_RESERVED_ZONE_NAME
,
1056 sizeof(struct vm_map_entry
), VM_MAP_RESERVED_ZFLAGS
,
1057 ZONE_ID_ANY
, ^(zone_t z
) {
1058 zone_set_noexpand(z
, 64 * kentry_data_size
);
1061 vm_map_copy_zone
= zone_create_ext("VM map copies", sizeof(struct vm_map_copy
),
1062 ZC_NOENCRYPT
| ZC_CACHING
, ZONE_ID_VM_MAP_COPY
, NULL
);
1064 vm_map_holes_zone
= zone_create(VM_MAP_HOLES_ZONE_NAME
,
1065 sizeof(struct vm_map_links
), VM_MAP_HOLES_ZFLAGS
);
1068 * Add the stolen memory to zones, adjust zone size and stolen counts.
1070 zcram(vm_map_zone
, map_data
, map_data_size
);
1071 zcram(vm_map_entry_reserved_zone
, kentry_data
, kentry_data_size
);
1072 zcram(vm_map_holes_zone
, map_holes_data
, map_holes_data_size
);
1075 * Since these are covered by zones, remove them from stolen page accounting.
1077 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size
) + atop_64(kentry_data_size
) + atop_64(map_holes_data_size
));
1079 #if VM_MAP_DEBUG_APPLE_PROTECT
1080 PE_parse_boot_argn("vm_map_debug_apple_protect",
1081 &vm_map_debug_apple_protect
,
1082 sizeof(vm_map_debug_apple_protect
));
1083 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1084 #if VM_MAP_DEBUG_APPLE_FOURK
1085 PE_parse_boot_argn("vm_map_debug_fourk",
1086 &vm_map_debug_fourk
,
1087 sizeof(vm_map_debug_fourk
));
1088 #endif /* VM_MAP_DEBUG_FOURK */
1089 PE_parse_boot_argn("vm_map_executable_immutable",
1090 &vm_map_executable_immutable
,
1091 sizeof(vm_map_executable_immutable
));
1092 PE_parse_boot_argn("vm_map_executable_immutable_verbose",
1093 &vm_map_executable_immutable_verbose
,
1094 sizeof(vm_map_executable_immutable_verbose
));
1096 PE_parse_boot_argn("malloc_no_cow",
1098 sizeof(malloc_no_cow
));
1099 if (malloc_no_cow
) {
1100 vm_memory_malloc_no_cow_mask
= 0ULL;
1101 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC
;
1102 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_SMALL
;
1103 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_MEDIUM
;
1104 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_LARGE
;
1105 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1106 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1107 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_TINY
;
1108 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE
;
1109 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED
;
1110 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_NANO
;
1111 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1112 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1113 &vm_memory_malloc_no_cow_mask
,
1114 sizeof(vm_memory_malloc_no_cow_mask
));
1118 PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity
, sizeof(vm_check_map_sanity
));
1119 if (vm_check_map_sanity
) {
1120 kprintf("VM sanity checking enabled\n");
1122 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1126 #if DEVELOPMENT || DEBUG
1127 PE_parse_boot_argn("panic_on_unsigned_execute",
1128 &panic_on_unsigned_execute
,
1129 sizeof(panic_on_unsigned_execute
));
1130 #endif /* DEVELOPMENT || DEBUG */
1135 vm_map_steal_memory(void)
1137 uint16_t kentry_initial_pages
;
1139 map_data_size
= zone_get_foreign_alloc_size(VM_MAP_ZONE_NAME
,
1140 sizeof(struct _vm_map
), VM_MAP_ZFLAGS
, 1);
1143 * kentry_initial_pages corresponds to the number of kernel map entries
1144 * required during bootstrap until the asynchronous replenishment
1145 * scheme is activated and/or entries are available from the general
1148 #if defined(__LP64__)
1149 kentry_initial_pages
= 10;
1151 kentry_initial_pages
= 6;
1155 /* If using the guard allocator, reserve more memory for the kernel
1156 * reserved map entry pool.
1158 if (gzalloc_enabled()) {
1159 kentry_initial_pages
*= 1024;
1163 kentry_data_size
= zone_get_foreign_alloc_size(VME_RESERVED_ZONE_NAME
,
1164 sizeof(struct vm_map_entry
), VM_MAP_RESERVED_ZFLAGS
,
1165 kentry_initial_pages
);
1167 map_holes_data_size
= zone_get_foreign_alloc_size(VM_MAP_HOLES_ZONE_NAME
,
1168 sizeof(struct vm_map_links
), VM_MAP_HOLES_ZFLAGS
,
1169 kentry_initial_pages
);
1172 * Steal a contiguous range of memory so that a simple range check
1173 * can validate foreign addresses being freed/crammed to these
1176 vm_size_t total_size
;
1177 if (os_add3_overflow(map_data_size
, kentry_data_size
,
1178 map_holes_data_size
, &total_size
)) {
1179 panic("vm_map_steal_memory: overflow in amount of memory requested");
1181 map_data
= zone_foreign_mem_init(total_size
);
1182 kentry_data
= map_data
+ map_data_size
;
1183 map_holes_data
= kentry_data
+ kentry_data_size
;
1185 STARTUP(PMAP_STEAL
, STARTUP_RANK_FIRST
, vm_map_steal_memory
);
1187 boolean_t vm_map_supports_hole_optimization
= FALSE
;
1190 vm_kernel_reserved_entry_init(void)
1192 zone_prio_refill_configure(vm_map_entry_reserved_zone
);
1195 * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1197 zone_prio_refill_configure(vm_map_holes_zone
);
1198 vm_map_supports_hole_optimization
= TRUE
;
1202 vm_map_disable_hole_optimization(vm_map_t map
)
1204 vm_map_entry_t head_entry
, hole_entry
, next_hole_entry
;
1206 if (map
->holelistenabled
) {
1207 head_entry
= hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
1209 while (hole_entry
!= NULL
) {
1210 next_hole_entry
= hole_entry
->vme_next
;
1212 hole_entry
->vme_next
= NULL
;
1213 hole_entry
->vme_prev
= NULL
;
1214 zfree(vm_map_holes_zone
, hole_entry
);
1216 if (next_hole_entry
== head_entry
) {
1219 hole_entry
= next_hole_entry
;
1223 map
->holes_list
= NULL
;
1224 map
->holelistenabled
= FALSE
;
1226 map
->first_free
= vm_map_first_entry(map
);
1227 SAVE_HINT_HOLE_WRITE(map
, NULL
);
1232 vm_kernel_map_is_kernel(vm_map_t map
)
1234 return map
->pmap
== kernel_pmap
;
1240 * Creates and returns a new empty VM map with
1241 * the given physical map structure, and having
1242 * the given lower and upper address bounds.
1248 vm_map_offset_t min
,
1249 vm_map_offset_t max
,
1256 options
|= VM_MAP_CREATE_PAGEABLE
;
1258 return vm_map_create_options(pmap
, min
, max
, options
);
1262 vm_map_create_options(
1264 vm_map_offset_t min
,
1265 vm_map_offset_t max
,
1269 struct vm_map_links
*hole_entry
= NULL
;
1271 if (options
& ~(VM_MAP_CREATE_ALL_OPTIONS
)) {
1272 /* unknown option */
1276 result
= (vm_map_t
) zalloc(vm_map_zone
);
1277 if (result
== VM_MAP_NULL
) {
1278 panic("vm_map_create");
1281 vm_map_first_entry(result
) = vm_map_to_entry(result
);
1282 vm_map_last_entry(result
) = vm_map_to_entry(result
);
1283 result
->hdr
.nentries
= 0;
1284 if (options
& VM_MAP_CREATE_PAGEABLE
) {
1285 result
->hdr
.entries_pageable
= TRUE
;
1287 result
->hdr
.entries_pageable
= FALSE
;
1290 vm_map_store_init( &(result
->hdr
));
1292 result
->hdr
.page_shift
= PAGE_SHIFT
;
1295 result
->user_wire_limit
= MACH_VM_MAX_ADDRESS
; /* default limit is unlimited */
1296 result
->user_wire_size
= 0;
1297 #if XNU_TARGET_OS_OSX
1298 result
->vmmap_high_start
= 0;
1300 os_ref_init_count(&result
->map_refcnt
, &map_refgrp
, 1);
1302 result
->res_count
= 1;
1303 result
->sw_state
= MAP_SW_IN
;
1304 #endif /* TASK_SWAPPER */
1305 result
->pmap
= pmap
;
1306 result
->min_offset
= min
;
1307 result
->max_offset
= max
;
1308 result
->wiring_required
= FALSE
;
1309 result
->no_zero_fill
= FALSE
;
1310 result
->mapped_in_other_pmaps
= FALSE
;
1311 result
->wait_for_space
= FALSE
;
1312 result
->switch_protect
= FALSE
;
1313 result
->disable_vmentry_reuse
= FALSE
;
1314 result
->map_disallow_data_exec
= FALSE
;
1315 result
->is_nested_map
= FALSE
;
1316 result
->map_disallow_new_exec
= FALSE
;
1317 result
->terminated
= FALSE
;
1318 result
->cs_enforcement
= FALSE
;
1319 result
->highest_entry_end
= 0;
1320 result
->first_free
= vm_map_to_entry(result
);
1321 result
->hint
= vm_map_to_entry(result
);
1322 result
->jit_entry_exists
= FALSE
;
1323 result
->is_alien
= FALSE
;
1324 result
->reserved_regions
= FALSE
;
1326 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1327 if (options
& VM_MAP_CREATE_CORPSE_FOOTPRINT
) {
1328 result
->has_corpse_footprint
= TRUE
;
1329 result
->holelistenabled
= FALSE
;
1330 result
->vmmap_corpse_footprint
= NULL
;
1332 result
->has_corpse_footprint
= FALSE
;
1333 if (vm_map_supports_hole_optimization
) {
1334 hole_entry
= zalloc(vm_map_holes_zone
);
1336 hole_entry
->start
= min
;
1337 #if defined(__arm__) || defined(__arm64__)
1338 hole_entry
->end
= result
->max_offset
;
1340 hole_entry
->end
= (max
> (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
) ? max
: (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
;
1342 result
->holes_list
= result
->hole_hint
= hole_entry
;
1343 hole_entry
->prev
= hole_entry
->next
= CAST_TO_VM_MAP_ENTRY(hole_entry
);
1344 result
->holelistenabled
= TRUE
;
1346 result
->holelistenabled
= FALSE
;
1350 vm_map_lock_init(result
);
1351 lck_mtx_init_ext(&result
->s_lock
, &result
->s_lock_ext
, &vm_map_lck_grp
, &vm_map_lck_attr
);
1357 vm_map_adjusted_size(vm_map_t map
)
1359 struct vm_reserved_region
*regions
= NULL
;
1360 size_t num_regions
= 0;
1361 mach_vm_size_t reserved_size
= 0, map_size
= 0;
1363 if (map
== NULL
|| (map
->size
== 0)) {
1367 map_size
= map
->size
;
1369 if (map
->reserved_regions
== FALSE
|| !vm_map_is_exotic(map
) || map
->terminated
) {
1371 * No special reserved regions or not an exotic map or the task
1372 * is terminating and these special regions might have already
1378 num_regions
= ml_get_vm_reserved_regions(vm_map_is_64bit(map
), ®ions
);
1379 assert((num_regions
== 0) || (num_regions
> 0 && regions
!= NULL
));
1381 while (num_regions
) {
1382 reserved_size
+= regions
[--num_regions
].vmrr_size
;
1386 * There are a few places where the map is being switched out due to
1387 * 'termination' without that bit being set (e.g. exec and corpse purging).
1388 * In those cases, we could have the map's regions being deallocated on
1389 * a core while some accounting process is trying to get the map's size.
1390 * So this assert can't be enabled till all those places are uniform in
1391 * their use of the 'map->terminated' bit.
1393 * assert(map_size >= reserved_size);
1396 return (map_size
>= reserved_size
) ? (map_size
- reserved_size
) : map_size
;
1400 * vm_map_entry_create: [ internal use only ]
1402 * Allocates a VM map entry for insertion in the
1403 * given map (or map copy). No fields are filled.
1405 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1407 #define vm_map_copy_entry_create(copy, map_locked) \
1408 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1409 unsigned reserved_zalloc_count
, nonreserved_zalloc_count
;
1411 static vm_map_entry_t
1412 _vm_map_entry_create(
1413 struct vm_map_header
*map_header
, boolean_t __unused map_locked
)
1416 vm_map_entry_t entry
;
1418 zone
= vm_map_entry_zone
;
1420 assert(map_header
->entries_pageable
? !map_locked
: TRUE
);
1422 if (map_header
->entries_pageable
) {
1423 entry
= (vm_map_entry_t
) zalloc(zone
);
1425 entry
= (vm_map_entry_t
) zalloc_noblock(zone
);
1427 if (entry
== VM_MAP_ENTRY_NULL
) {
1428 zone
= vm_map_entry_reserved_zone
;
1429 entry
= (vm_map_entry_t
) zalloc(zone
);
1430 OSAddAtomic(1, &reserved_zalloc_count
);
1432 OSAddAtomic(1, &nonreserved_zalloc_count
);
1436 if (entry
== VM_MAP_ENTRY_NULL
) {
1437 panic("vm_map_entry_create");
1439 *entry
= vm_map_entry_template
;
1440 entry
->from_reserved_zone
= (zone
== vm_map_entry_reserved_zone
);
1442 vm_map_store_update((vm_map_t
) NULL
, entry
, VM_MAP_ENTRY_CREATE
);
1443 #if MAP_ENTRY_CREATION_DEBUG
1444 entry
->vme_creation_maphdr
= map_header
;
1445 backtrace(&entry
->vme_creation_bt
[0],
1446 (sizeof(entry
->vme_creation_bt
) / sizeof(uintptr_t)), NULL
);
1452 * vm_map_entry_dispose: [ internal use only ]
1454 * Inverse of vm_map_entry_create.
1456 * write map lock held so no need to
1457 * do anything special to insure correctness
1460 #define vm_map_entry_dispose(map, entry) \
1461 _vm_map_entry_dispose(&(map)->hdr, (entry))
1463 #define vm_map_copy_entry_dispose(copy, entry) \
1464 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1467 _vm_map_entry_dispose(
1468 struct vm_map_header
*map_header
,
1469 vm_map_entry_t entry
)
1473 if (map_header
->entries_pageable
|| !(entry
->from_reserved_zone
)) {
1474 zone
= vm_map_entry_zone
;
1476 zone
= vm_map_entry_reserved_zone
;
1479 if (!map_header
->entries_pageable
) {
1480 if (zone
== vm_map_entry_zone
) {
1481 OSAddAtomic(-1, &nonreserved_zalloc_count
);
1483 OSAddAtomic(-1, &reserved_zalloc_count
);
1491 static boolean_t first_free_check
= FALSE
;
1493 first_free_is_valid(
1496 if (!first_free_check
) {
1500 return first_free_is_valid_store( map
);
1502 #endif /* MACH_ASSERT */
1505 #define vm_map_copy_entry_link(copy, after_where, entry) \
1506 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1508 #define vm_map_copy_entry_unlink(copy, entry) \
1509 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1511 #if MACH_ASSERT && TASK_SWAPPER
1513 * vm_map_res_reference:
1515 * Adds another valid residence count to the given map.
1517 * Map is locked so this function can be called from
1522 vm_map_res_reference(vm_map_t map
)
1524 /* assert map is locked */
1525 assert(map
->res_count
>= 0);
1526 assert(os_ref_get_count(&map
->map_refcnt
) >= map
->res_count
);
1527 if (map
->res_count
== 0) {
1528 lck_mtx_unlock(&map
->s_lock
);
1531 lck_mtx_lock(&map
->s_lock
);
1540 * vm_map_reference_swap:
1542 * Adds valid reference and residence counts to the given map.
1544 * The map may not be in memory (i.e. zero residence count).
1548 vm_map_reference_swap(vm_map_t map
)
1550 assert(map
!= VM_MAP_NULL
);
1551 lck_mtx_lock(&map
->s_lock
);
1552 assert(map
->res_count
>= 0);
1553 assert(os_ref_get_count(&map
->map_refcnt
) >= map
->res_count
);
1554 os_ref_retain_locked(&map
->map_refcnt
);
1555 vm_map_res_reference(map
);
1556 lck_mtx_unlock(&map
->s_lock
);
1560 * vm_map_res_deallocate:
1562 * Decrement residence count on a map; possibly causing swapout.
1564 * The map must be in memory (i.e. non-zero residence count).
1566 * The map is locked, so this function is callable from vm_map_deallocate.
1570 vm_map_res_deallocate(vm_map_t map
)
1572 assert(map
->res_count
> 0);
1573 if (--map
->res_count
== 0) {
1574 lck_mtx_unlock(&map
->s_lock
);
1576 vm_map_swapout(map
);
1578 lck_mtx_lock(&map
->s_lock
);
1580 assert(os_ref_get_count(&map
->map_refcnt
) >= map
->res_count
);
1582 #endif /* MACH_ASSERT && TASK_SWAPPER */
1587 * Actually destroy a map.
1596 /* final cleanup: no need to unnest shared region */
1597 flags
|= VM_MAP_REMOVE_NO_UNNESTING
;
1598 /* final cleanup: ok to remove immutable mappings */
1599 flags
|= VM_MAP_REMOVE_IMMUTABLE
;
1600 /* final cleanup: allow gaps in range */
1601 flags
|= VM_MAP_REMOVE_GAPS_OK
;
1603 /* clean up regular map entries */
1604 (void) vm_map_delete(map
, map
->min_offset
, map
->max_offset
,
1605 flags
, VM_MAP_NULL
);
1606 /* clean up leftover special mappings (commpage, GPU carveout, etc...) */
1607 #if !defined(__arm__)
1608 (void) vm_map_delete(map
, 0x0, 0xFFFFFFFFFFFFF000ULL
,
1609 flags
, VM_MAP_NULL
);
1610 #endif /* !__arm__ */
1612 vm_map_disable_hole_optimization(map
);
1613 vm_map_corpse_footprint_destroy(map
);
1617 assert(map
->hdr
.nentries
== 0);
1620 pmap_destroy(map
->pmap
);
1623 if (vm_map_lck_attr
.lck_attr_val
& LCK_ATTR_DEBUG
) {
1625 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1626 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1627 * structure or kalloc'ed via lck_mtx_init.
1628 * An example is s_lock_ext within struct _vm_map.
1630 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1631 * can add another tag to detect embedded vs alloc'ed indirect external
1632 * mutexes but that'll be additional checks in the lock path and require
1633 * updating dependencies for the old vs new tag.
1635 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1636 * just when lock debugging is ON, we choose to forego explicitly destroying
1637 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1638 * count on vm_map_lck_grp, which has no serious side-effect.
1641 lck_rw_destroy(&(map
)->lock
, &vm_map_lck_grp
);
1642 lck_mtx_destroy(&(map
)->s_lock
, &vm_map_lck_grp
);
1645 zfree(vm_map_zone
, map
);
1649 * Returns pid of the task with the largest number of VM map entries.
1650 * Used in the zone-map-exhaustion jetsam path.
1653 find_largest_process_vm_map_entries(void)
1655 pid_t victim_pid
= -1;
1656 int max_vm_map_entries
= 0;
1657 task_t task
= TASK_NULL
;
1658 queue_head_t
*task_list
= &tasks
;
1660 lck_mtx_lock(&tasks_threads_lock
);
1661 queue_iterate(task_list
, task
, task_t
, tasks
) {
1662 if (task
== kernel_task
|| !task
->active
) {
1666 vm_map_t task_map
= task
->map
;
1667 if (task_map
!= VM_MAP_NULL
) {
1668 int task_vm_map_entries
= task_map
->hdr
.nentries
;
1669 if (task_vm_map_entries
> max_vm_map_entries
) {
1670 max_vm_map_entries
= task_vm_map_entries
;
1671 victim_pid
= pid_from_task(task
);
1675 lck_mtx_unlock(&tasks_threads_lock
);
1677 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid
, max_vm_map_entries
);
1683 * vm_map_swapin/vm_map_swapout
1685 * Swap a map in and out, either referencing or releasing its resources.
1686 * These functions are internal use only; however, they must be exported
1687 * because they may be called from macros, which are exported.
1689 * In the case of swapout, there could be races on the residence count,
1690 * so if the residence count is up, we return, assuming that a
1691 * vm_map_deallocate() call in the near future will bring us back.
1694 * -- We use the map write lock for synchronization among races.
1695 * -- The map write lock, and not the simple s_lock, protects the
1696 * swap state of the map.
1697 * -- If a map entry is a share map, then we hold both locks, in
1698 * hierarchical order.
1700 * Synchronization Notes:
1701 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1702 * will block on the map lock and proceed when swapout is through.
1703 * 2) A vm_map_reference() call at this time is illegal, and will
1704 * cause a panic. vm_map_reference() is only allowed on resident
1705 * maps, since it refuses to block.
1706 * 3) A vm_map_swapin() call during a swapin will block, and
1707 * proceeed when the first swapin is done, turning into a nop.
1708 * This is the reason the res_count is not incremented until
1709 * after the swapin is complete.
1710 * 4) There is a timing hole after the checks of the res_count, before
1711 * the map lock is taken, during which a swapin may get the lock
1712 * before a swapout about to happen. If this happens, the swapin
1713 * will detect the state and increment the reference count, causing
1714 * the swapout to be a nop, thereby delaying it until a later
1715 * vm_map_deallocate. If the swapout gets the lock first, then
1716 * the swapin will simply block until the swapout is done, and
1719 * Because vm_map_swapin() is potentially an expensive operation, it
1720 * should be used with caution.
1723 * 1) A map with a residence count of zero is either swapped, or
1725 * 2) A map with a non-zero residence count is either resident,
1726 * or being swapped in.
1729 int vm_map_swap_enable
= 1;
1732 vm_map_swapin(vm_map_t map
)
1734 vm_map_entry_t entry
;
1736 if (!vm_map_swap_enable
) { /* debug */
1742 * First deal with various races.
1744 if (map
->sw_state
== MAP_SW_IN
) {
1746 * we raced with swapout and won. Returning will incr.
1747 * the res_count, turning the swapout into a nop.
1753 * The residence count must be zero. If we raced with another
1754 * swapin, the state would have been IN; if we raced with a
1755 * swapout (after another competing swapin), we must have lost
1756 * the race to get here (see above comment), in which case
1757 * res_count is still 0.
1759 assert(map
->res_count
== 0);
1762 * There are no intermediate states of a map going out or
1763 * coming in, since the map is locked during the transition.
1765 assert(map
->sw_state
== MAP_SW_OUT
);
1768 * We now operate upon each map entry. If the entry is a sub-
1769 * or share-map, we call vm_map_res_reference upon it.
1770 * If the entry is an object, we call vm_object_res_reference
1771 * (this may iterate through the shadow chain).
1772 * Note that we hold the map locked the entire time,
1773 * even if we get back here via a recursive call in
1774 * vm_map_res_reference.
1776 entry
= vm_map_first_entry(map
);
1778 while (entry
!= vm_map_to_entry(map
)) {
1779 if (VME_OBJECT(entry
) != VM_OBJECT_NULL
) {
1780 if (entry
->is_sub_map
) {
1781 vm_map_t lmap
= VME_SUBMAP(entry
);
1782 lck_mtx_lock(&lmap
->s_lock
);
1783 vm_map_res_reference(lmap
);
1784 lck_mtx_unlock(&lmap
->s_lock
);
1786 vm_object_t object
= VME_OBEJCT(entry
);
1787 vm_object_lock(object
);
1789 * This call may iterate through the
1792 vm_object_res_reference(object
);
1793 vm_object_unlock(object
);
1796 entry
= entry
->vme_next
;
1798 assert(map
->sw_state
== MAP_SW_OUT
);
1799 map
->sw_state
= MAP_SW_IN
;
1803 vm_map_swapout(vm_map_t map
)
1805 vm_map_entry_t entry
;
1809 * First deal with various races.
1810 * If we raced with a swapin and lost, the residence count
1811 * will have been incremented to 1, and we simply return.
1813 lck_mtx_lock(&map
->s_lock
);
1814 if (map
->res_count
!= 0) {
1815 lck_mtx_unlock(&map
->s_lock
);
1818 lck_mtx_unlock(&map
->s_lock
);
1821 * There are no intermediate states of a map going out or
1822 * coming in, since the map is locked during the transition.
1824 assert(map
->sw_state
== MAP_SW_IN
);
1826 if (!vm_map_swap_enable
) {
1831 * We now operate upon each map entry. If the entry is a sub-
1832 * or share-map, we call vm_map_res_deallocate upon it.
1833 * If the entry is an object, we call vm_object_res_deallocate
1834 * (this may iterate through the shadow chain).
1835 * Note that we hold the map locked the entire time,
1836 * even if we get back here via a recursive call in
1837 * vm_map_res_deallocate.
1839 entry
= vm_map_first_entry(map
);
1841 while (entry
!= vm_map_to_entry(map
)) {
1842 if (VME_OBJECT(entry
) != VM_OBJECT_NULL
) {
1843 if (entry
->is_sub_map
) {
1844 vm_map_t lmap
= VME_SUBMAP(entry
);
1845 lck_mtx_lock(&lmap
->s_lock
);
1846 vm_map_res_deallocate(lmap
);
1847 lck_mtx_unlock(&lmap
->s_lock
);
1849 vm_object_t object
= VME_OBJECT(entry
);
1850 vm_object_lock(object
);
1852 * This call may take a long time,
1853 * since it could actively push
1854 * out pages (if we implement it
1857 vm_object_res_deallocate(object
);
1858 vm_object_unlock(object
);
1861 entry
= entry
->vme_next
;
1863 assert(map
->sw_state
== MAP_SW_IN
);
1864 map
->sw_state
= MAP_SW_OUT
;
1867 #endif /* TASK_SWAPPER */
1870 * vm_map_lookup_entry: [ internal use only ]
1872 * Calls into the vm map store layer to find the map
1873 * entry containing (or immediately preceding) the
1874 * specified address in the given map; the entry is returned
1875 * in the "entry" parameter. The boolean
1876 * result indicates whether the address is
1877 * actually contained in the map.
1880 vm_map_lookup_entry(
1882 vm_map_offset_t address
,
1883 vm_map_entry_t
*entry
) /* OUT */
1885 return vm_map_store_lookup_entry( map
, address
, entry
);
1889 * Routine: vm_map_find_space
1891 * Allocate a range in the specified virtual address map,
1892 * returning the entry allocated for that range.
1893 * Used by kmem_alloc, etc.
1895 * The map must be NOT be locked. It will be returned locked
1896 * on KERN_SUCCESS, unlocked on failure.
1898 * If an entry is allocated, the object/offset fields
1899 * are initialized to zero.
1901 * If VM_MAP_FIND_LAST_FREE flag is set, allocate from end of map. This
1902 * is currently only used for allocating memory for zones backing
1903 * one of the kalloc heaps.(rdar://65832263)
1908 vm_map_offset_t
*address
, /* OUT */
1910 vm_map_offset_t mask
,
1912 vm_map_kernel_flags_t vmk_flags
,
1914 vm_map_entry_t
*o_entry
) /* OUT */
1916 vm_map_entry_t entry
, new_entry
, hole_entry
;
1917 vm_map_offset_t start
;
1918 vm_map_offset_t end
;
1922 return KERN_INVALID_ARGUMENT
;
1925 new_entry
= vm_map_entry_create(map
, FALSE
);
1928 if (flags
& VM_MAP_FIND_LAST_FREE
) {
1929 assert(!map
->disable_vmentry_reuse
);
1930 /* TODO: Make backward lookup generic and support guard pages */
1931 assert(!vmk_flags
.vmkf_guard_after
&& !vmk_flags
.vmkf_guard_before
);
1932 assert(VM_MAP_PAGE_ALIGNED(size
, VM_MAP_PAGE_MASK(map
)));
1934 /* Allocate space from end of map */
1935 vm_map_store_find_last_free(map
, &entry
);
1941 if (entry
== vm_map_to_entry(map
)) {
1942 end
= map
->max_offset
;
1944 end
= entry
->vme_start
;
1948 vm_map_entry_t prev
;
1952 if ((start
< map
->min_offset
) || end
< start
) {
1956 prev
= entry
->vme_prev
;
1959 if (prev
== vm_map_to_entry(map
)) {
1963 if (prev
->vme_end
<= start
) {
1968 * Didn't fit -- move to the next entry.
1971 end
= entry
->vme_start
;
1974 if (vmk_flags
.vmkf_guard_after
) {
1975 /* account for the back guard page in the size */
1976 size
+= VM_MAP_PAGE_SIZE(map
);
1980 * Look for the first possible address; if there's already
1981 * something at this address, we have to start after it.
1984 if (map
->disable_vmentry_reuse
== TRUE
) {
1985 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
1987 if (map
->holelistenabled
) {
1988 hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
1990 if (hole_entry
== NULL
) {
1992 * No more space in the map?
1998 start
= entry
->vme_start
;
2000 assert(first_free_is_valid(map
));
2001 if ((entry
= map
->first_free
) == vm_map_to_entry(map
)) {
2002 start
= map
->min_offset
;
2004 start
= entry
->vme_end
;
2010 * In any case, the "entry" always precedes
2011 * the proposed new region throughout the loop:
2015 vm_map_entry_t next
;
2018 * Find the end of the proposed new region.
2019 * Be sure we didn't go beyond the end, or
2020 * wrap around the address.
2023 if (vmk_flags
.vmkf_guard_before
) {
2024 /* reserve space for the front guard page */
2025 start
+= VM_MAP_PAGE_SIZE(map
);
2027 end
= ((start
+ mask
) & ~mask
);
2033 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
2035 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
2037 if ((end
> map
->max_offset
) || (end
< start
)) {
2041 next
= entry
->vme_next
;
2043 if (map
->holelistenabled
) {
2044 if (entry
->vme_end
>= end
) {
2049 * If there are no more entries, we must win.
2053 * If there is another entry, it must be
2054 * after the end of the potential new region.
2057 if (next
== vm_map_to_entry(map
)) {
2061 if (next
->vme_start
>= end
) {
2067 * Didn't fit -- move to the next entry.
2072 if (map
->holelistenabled
) {
2073 if (entry
== CAST_TO_VM_MAP_ENTRY(map
->holes_list
)) {
2079 start
= entry
->vme_start
;
2081 start
= entry
->vme_end
;
2085 if (vmk_flags
.vmkf_guard_before
) {
2086 /* go back for the front guard page */
2087 start
-= VM_MAP_PAGE_SIZE(map
);
2091 if (map
->holelistenabled
) {
2092 if (vm_map_lookup_entry(map
, entry
->vme_start
, &entry
)) {
2093 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry
, (unsigned long long)entry
->vme_start
);
2099 * "start" and "end" should define the endpoints of the
2100 * available new range, and
2101 * "entry" should refer to the region before the new
2104 * the map should be locked.
2109 assert(start
< end
);
2110 new_entry
->vme_start
= start
;
2111 new_entry
->vme_end
= end
;
2112 assert(page_aligned(new_entry
->vme_start
));
2113 assert(page_aligned(new_entry
->vme_end
));
2114 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_start
,
2115 VM_MAP_PAGE_MASK(map
)));
2116 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_end
,
2117 VM_MAP_PAGE_MASK(map
)));
2119 new_entry
->is_shared
= FALSE
;
2120 new_entry
->is_sub_map
= FALSE
;
2121 new_entry
->use_pmap
= TRUE
;
2122 VME_OBJECT_SET(new_entry
, VM_OBJECT_NULL
);
2123 VME_OFFSET_SET(new_entry
, (vm_object_offset_t
) 0);
2125 new_entry
->needs_copy
= FALSE
;
2127 new_entry
->inheritance
= VM_INHERIT_DEFAULT
;
2128 new_entry
->protection
= VM_PROT_DEFAULT
;
2129 new_entry
->max_protection
= VM_PROT_ALL
;
2130 new_entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
2131 new_entry
->wired_count
= 0;
2132 new_entry
->user_wired_count
= 0;
2134 new_entry
->in_transition
= FALSE
;
2135 new_entry
->needs_wakeup
= FALSE
;
2136 new_entry
->no_cache
= FALSE
;
2137 new_entry
->permanent
= FALSE
;
2138 new_entry
->superpage_size
= FALSE
;
2139 if (VM_MAP_PAGE_SHIFT(map
) != PAGE_SHIFT
) {
2140 new_entry
->map_aligned
= TRUE
;
2142 new_entry
->map_aligned
= FALSE
;
2145 new_entry
->used_for_jit
= FALSE
;
2146 new_entry
->pmap_cs_associated
= FALSE
;
2147 new_entry
->zero_wired_pages
= FALSE
;
2148 new_entry
->iokit_acct
= FALSE
;
2149 new_entry
->vme_resilient_codesign
= FALSE
;
2150 new_entry
->vme_resilient_media
= FALSE
;
2151 if (vmk_flags
.vmkf_atomic_entry
) {
2152 new_entry
->vme_atomic
= TRUE
;
2154 new_entry
->vme_atomic
= FALSE
;
2157 VME_ALIAS_SET(new_entry
, tag
);
2160 * Insert the new entry into the list
2163 vm_map_store_entry_link(map
, entry
, new_entry
, VM_MAP_KERNEL_FLAGS_NONE
);
2168 * Update the lookup hint
2170 SAVE_HINT_MAP_WRITE(map
, new_entry
);
2172 *o_entry
= new_entry
;
2173 return KERN_SUCCESS
;
2177 vm_map_entry_dispose(map
, new_entry
);
2179 return KERN_NO_SPACE
;
2182 int vm_map_pmap_enter_print
= FALSE
;
2183 int vm_map_pmap_enter_enable
= FALSE
;
2186 * Routine: vm_map_pmap_enter [internal only]
2189 * Force pages from the specified object to be entered into
2190 * the pmap at the specified address if they are present.
2191 * As soon as a page not found in the object the scan ends.
2196 * In/out conditions:
2197 * The source map should not be locked on entry.
2199 __unused
static void
2202 vm_map_offset_t addr
,
2203 vm_map_offset_t end_addr
,
2205 vm_object_offset_t offset
,
2206 vm_prot_t protection
)
2210 struct vm_object_fault_info fault_info
= {};
2212 if (map
->pmap
== 0) {
2216 assert(VM_MAP_PAGE_SHIFT(map
) == PAGE_SHIFT
);
2218 while (addr
< end_addr
) {
2224 * From vm_map_enter(), we come into this function without the map
2225 * lock held or the object lock held.
2226 * We haven't taken a reference on the object either.
2227 * We should do a proper lookup on the map to make sure
2228 * that things are sane before we go locking objects that
2229 * could have been deallocated from under us.
2232 vm_object_lock(object
);
2234 m
= vm_page_lookup(object
, offset
);
2236 if (m
== VM_PAGE_NULL
|| m
->vmp_busy
|| m
->vmp_fictitious
||
2237 (m
->vmp_unusual
&& (m
->vmp_error
|| m
->vmp_restart
|| m
->vmp_absent
))) {
2238 vm_object_unlock(object
);
2242 if (vm_map_pmap_enter_print
) {
2243 printf("vm_map_pmap_enter:");
2244 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2245 map
, (unsigned long long)addr
, object
, (unsigned long long)offset
);
2247 type_of_fault
= DBG_CACHE_HIT_FAULT
;
2248 kr
= vm_fault_enter(m
, map
->pmap
,
2251 protection
, protection
,
2253 FALSE
, /* change_wiring */
2254 VM_KERN_MEMORY_NONE
, /* tag - not wiring */
2256 NULL
, /* need_retry */
2259 vm_object_unlock(object
);
2261 offset
+= PAGE_SIZE_64
;
2266 boolean_t
vm_map_pmap_is_empty(
2268 vm_map_offset_t start
,
2269 vm_map_offset_t end
);
2271 vm_map_pmap_is_empty(
2273 vm_map_offset_t start
,
2274 vm_map_offset_t end
)
2276 #ifdef MACHINE_PMAP_IS_EMPTY
2277 return pmap_is_empty(map
->pmap
, start
, end
);
2278 #else /* MACHINE_PMAP_IS_EMPTY */
2279 vm_map_offset_t offset
;
2282 if (map
->pmap
== NULL
) {
2286 for (offset
= start
;
2288 offset
+= PAGE_SIZE
) {
2289 phys_page
= pmap_find_phys(map
->pmap
, offset
);
2291 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
2292 "page %d at 0x%llx\n",
2293 map
, (long long)start
, (long long)end
,
2294 phys_page
, (long long)offset
);
2299 #endif /* MACHINE_PMAP_IS_EMPTY */
2302 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2304 vm_map_random_address_for_size(
2306 vm_map_offset_t
*address
,
2309 kern_return_t kr
= KERN_SUCCESS
;
2311 vm_map_offset_t random_addr
= 0;
2312 vm_map_offset_t hole_end
;
2314 vm_map_entry_t next_entry
= VM_MAP_ENTRY_NULL
;
2315 vm_map_entry_t prev_entry
= VM_MAP_ENTRY_NULL
;
2316 vm_map_size_t vm_hole_size
= 0;
2317 vm_map_size_t addr_space_size
;
2319 addr_space_size
= vm_map_max(map
) - vm_map_min(map
);
2321 assert(VM_MAP_PAGE_ALIGNED(size
, VM_MAP_PAGE_MASK(map
)));
2323 while (tries
< MAX_TRIES_TO_GET_RANDOM_ADDRESS
) {
2324 random_addr
= ((vm_map_offset_t
)random()) << VM_MAP_PAGE_SHIFT(map
);
2325 random_addr
= vm_map_trunc_page(
2326 vm_map_min(map
) + (random_addr
% addr_space_size
),
2327 VM_MAP_PAGE_MASK(map
));
2329 if (vm_map_lookup_entry(map
, random_addr
, &prev_entry
) == FALSE
) {
2330 if (prev_entry
== vm_map_to_entry(map
)) {
2331 next_entry
= vm_map_first_entry(map
);
2333 next_entry
= prev_entry
->vme_next
;
2335 if (next_entry
== vm_map_to_entry(map
)) {
2336 hole_end
= vm_map_max(map
);
2338 hole_end
= next_entry
->vme_start
;
2340 vm_hole_size
= hole_end
- random_addr
;
2341 if (vm_hole_size
>= size
) {
2342 *address
= random_addr
;
2349 if (tries
== MAX_TRIES_TO_GET_RANDOM_ADDRESS
) {
2356 vm_memory_malloc_no_cow(
2359 uint64_t alias_mask
;
2365 alias_mask
= 1ULL << alias
;
2366 if (alias_mask
& vm_memory_malloc_no_cow_mask
) {
2373 * Routine: vm_map_enter
2376 * Allocate a range in the specified virtual address map.
2377 * The resulting range will refer to memory defined by
2378 * the given memory object and offset into that object.
2380 * Arguments are as defined in the vm_map call.
2382 static unsigned int vm_map_enter_restore_successes
= 0;
2383 static unsigned int vm_map_enter_restore_failures
= 0;
2387 vm_map_offset_t
*address
, /* IN/OUT */
2389 vm_map_offset_t mask
,
2391 vm_map_kernel_flags_t vmk_flags
,
2394 vm_object_offset_t offset
,
2395 boolean_t needs_copy
,
2396 vm_prot_t cur_protection
,
2397 vm_prot_t max_protection
,
2398 vm_inherit_t inheritance
)
2400 vm_map_entry_t entry
, new_entry
;
2401 vm_map_offset_t start
, tmp_start
, tmp_offset
;
2402 vm_map_offset_t end
, tmp_end
;
2403 vm_map_offset_t tmp2_start
, tmp2_end
;
2404 vm_map_offset_t desired_empty_end
;
2405 vm_map_offset_t step
;
2406 kern_return_t result
= KERN_SUCCESS
;
2407 vm_map_t zap_old_map
= VM_MAP_NULL
;
2408 vm_map_t zap_new_map
= VM_MAP_NULL
;
2409 boolean_t map_locked
= FALSE
;
2410 boolean_t pmap_empty
= TRUE
;
2411 boolean_t new_mapping_established
= FALSE
;
2412 boolean_t keep_map_locked
= vmk_flags
.vmkf_keep_map_locked
;
2413 boolean_t anywhere
= ((flags
& VM_FLAGS_ANYWHERE
) != 0);
2414 boolean_t purgable
= ((flags
& VM_FLAGS_PURGABLE
) != 0);
2415 boolean_t overwrite
= ((flags
& VM_FLAGS_OVERWRITE
) != 0);
2416 boolean_t no_cache
= ((flags
& VM_FLAGS_NO_CACHE
) != 0);
2417 boolean_t is_submap
= vmk_flags
.vmkf_submap
;
2418 boolean_t permanent
= vmk_flags
.vmkf_permanent
;
2419 boolean_t no_copy_on_read
= vmk_flags
.vmkf_no_copy_on_read
;
2420 boolean_t entry_for_jit
= vmk_flags
.vmkf_map_jit
;
2421 boolean_t iokit_acct
= vmk_flags
.vmkf_iokit_acct
;
2422 boolean_t translated_allow_execute
= vmk_flags
.vmkf_translated_allow_execute
;
2423 boolean_t resilient_codesign
= ((flags
& VM_FLAGS_RESILIENT_CODESIGN
) != 0);
2424 boolean_t resilient_media
= ((flags
& VM_FLAGS_RESILIENT_MEDIA
) != 0);
2425 boolean_t random_address
= ((flags
& VM_FLAGS_RANDOM_ADDR
) != 0);
2426 unsigned int superpage_size
= ((flags
& VM_FLAGS_SUPERPAGE_MASK
) >> VM_FLAGS_SUPERPAGE_SHIFT
);
2427 vm_tag_t user_alias
;
2428 vm_map_offset_t effective_min_offset
, effective_max_offset
;
2430 boolean_t clear_map_aligned
= FALSE
;
2431 vm_map_entry_t hole_entry
;
2432 vm_map_size_t chunk_size
= 0;
2434 assertf(vmk_flags
.__vmkf_unused
== 0, "vmk_flags unused=0x%x\n", vmk_flags
.__vmkf_unused
);
2436 if (flags
& VM_FLAGS_4GB_CHUNK
) {
2437 #if defined(__LP64__)
2438 chunk_size
= (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2439 #else /* __LP64__ */
2440 chunk_size
= ANON_CHUNK_SIZE
;
2441 #endif /* __LP64__ */
2443 chunk_size
= ANON_CHUNK_SIZE
;
2446 if (superpage_size
) {
2447 switch (superpage_size
) {
2449 * Note that the current implementation only supports
2450 * a single size for superpages, SUPERPAGE_SIZE, per
2451 * architecture. As soon as more sizes are supposed
2452 * to be supported, SUPERPAGE_SIZE has to be replaced
2453 * with a lookup of the size depending on superpage_size.
2456 case SUPERPAGE_SIZE_ANY
:
2457 /* handle it like 2 MB and round up to page size */
2458 size
= (size
+ 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2460 case SUPERPAGE_SIZE_2MB
:
2464 return KERN_INVALID_ARGUMENT
;
2466 mask
= SUPERPAGE_SIZE
- 1;
2467 if (size
& (SUPERPAGE_SIZE
- 1)) {
2468 return KERN_INVALID_ARGUMENT
;
2470 inheritance
= VM_INHERIT_NONE
; /* fork() children won't inherit superpages */
2474 if ((cur_protection
& VM_PROT_WRITE
) &&
2475 (cur_protection
& VM_PROT_EXECUTE
) &&
2476 #if XNU_TARGET_OS_OSX
2477 map
->pmap
!= kernel_pmap
&&
2478 (cs_process_global_enforcement() ||
2479 (vmk_flags
.vmkf_cs_enforcement_override
2480 ? vmk_flags
.vmkf_cs_enforcement
2481 : (vm_map_cs_enforcement(map
)
2483 || !VM_MAP_IS_EXOTIC(map
)
2484 #endif /* __arm64__ */
2486 #endif /* XNU_TARGET_OS_OSX */
2487 (VM_MAP_POLICY_WX_FAIL(map
) ||
2488 VM_MAP_POLICY_WX_STRIP_X(map
)) &&
2490 boolean_t vm_protect_wx_fail
= VM_MAP_POLICY_WX_FAIL(map
);
2495 vm_prot_t
, cur_protection
);
2496 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. %s\n",
2498 (current_task()->bsd_info
2499 ? proc_name_address(current_task()->bsd_info
)
2502 (vm_protect_wx_fail
? "failing" : "turning off execute"));
2503 cur_protection
&= ~VM_PROT_EXECUTE
;
2504 if (vm_protect_wx_fail
) {
2505 return KERN_PROTECTION_FAILURE
;
2510 * If the task has requested executable lockdown,
2511 * deny any new executable mapping.
2513 if (map
->map_disallow_new_exec
== TRUE
) {
2514 if (cur_protection
& VM_PROT_EXECUTE
) {
2515 return KERN_PROTECTION_FAILURE
;
2519 if (resilient_codesign
) {
2521 int reject_prot
= (needs_copy
? VM_PROT_EXECUTE
: (VM_PROT_WRITE
| VM_PROT_EXECUTE
));
2522 if ((cur_protection
| max_protection
) & reject_prot
) {
2523 return KERN_PROTECTION_FAILURE
;
2527 if (resilient_media
) {
2529 // assert(!needs_copy);
2530 if (object
!= VM_OBJECT_NULL
&&
2531 !object
->internal
) {
2533 * This mapping is directly backed by an external
2534 * memory manager (e.g. a vnode pager for a file):
2535 * we would not have any safe place to inject
2536 * a zero-filled page if an actual page is not
2537 * available, without possibly impacting the actual
2538 * contents of the mapped object (e.g. the file),
2539 * so we can't provide any media resiliency here.
2541 return KERN_INVALID_ARGUMENT
;
2547 /* submaps can not be purgeable */
2548 return KERN_INVALID_ARGUMENT
;
2550 if (object
== VM_OBJECT_NULL
) {
2551 /* submaps can not be created lazily */
2552 return KERN_INVALID_ARGUMENT
;
2555 if (vmk_flags
.vmkf_already
) {
2557 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2558 * is already present. For it to be meaningul, the requested
2559 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2560 * we shouldn't try and remove what was mapped there first
2561 * (!VM_FLAGS_OVERWRITE).
2563 if ((flags
& VM_FLAGS_ANYWHERE
) ||
2564 (flags
& VM_FLAGS_OVERWRITE
)) {
2565 return KERN_INVALID_ARGUMENT
;
2569 effective_min_offset
= map
->min_offset
;
2571 if (vmk_flags
.vmkf_beyond_max
) {
2573 * Allow an insertion beyond the map's max offset.
2575 #if !defined(__arm__)
2576 if (vm_map_is_64bit(map
)) {
2577 effective_max_offset
= 0xFFFFFFFFFFFFF000ULL
;
2579 #endif /* __arm__ */
2580 effective_max_offset
= 0x00000000FFFFF000ULL
;
2582 #if XNU_TARGET_OS_OSX
2583 if (__improbable(vmk_flags
.vmkf_32bit_map_va
)) {
2584 effective_max_offset
= MIN(map
->max_offset
, 0x00000000FFFFF000ULL
);
2586 effective_max_offset
= map
->max_offset
;
2588 #else /* XNU_TARGET_OS_OSX */
2589 effective_max_offset
= map
->max_offset
;
2590 #endif /* XNU_TARGET_OS_OSX */
2594 (offset
& MIN(VM_MAP_PAGE_MASK(map
), PAGE_MASK_64
)) != 0) {
2596 return KERN_INVALID_ARGUMENT
;
2599 if (map
->pmap
== kernel_pmap
) {
2600 user_alias
= VM_KERN_MEMORY_NONE
;
2605 if (user_alias
== VM_MEMORY_MALLOC_MEDIUM
) {
2606 chunk_size
= MALLOC_MEDIUM_CHUNK_SIZE
;
2609 #define RETURN(value) { result = value; goto BailOut; }
2611 assertf(VM_MAP_PAGE_ALIGNED(*address
, FOURK_PAGE_MASK
), "0x%llx", (uint64_t)*address
);
2612 assertf(VM_MAP_PAGE_ALIGNED(size
, FOURK_PAGE_MASK
), "0x%llx", (uint64_t)size
);
2613 if (VM_MAP_PAGE_MASK(map
) >= PAGE_MASK
) {
2614 assertf(page_aligned(*address
), "0x%llx", (uint64_t)*address
);
2615 assertf(page_aligned(size
), "0x%llx", (uint64_t)size
);
2618 if (VM_MAP_PAGE_MASK(map
) >= PAGE_MASK
&&
2619 !VM_MAP_PAGE_ALIGNED(size
, VM_MAP_PAGE_MASK(map
))) {
2621 * In most cases, the caller rounds the size up to the
2623 * If we get a size that is explicitly not map-aligned here,
2624 * we'll have to respect the caller's wish and mark the
2625 * mapping as "not map-aligned" to avoid tripping the
2626 * map alignment checks later.
2628 clear_map_aligned
= TRUE
;
2631 VM_MAP_PAGE_MASK(map
) >= PAGE_MASK
&&
2632 !VM_MAP_PAGE_ALIGNED(*address
, VM_MAP_PAGE_MASK(map
))) {
2634 * We've been asked to map at a fixed address and that
2635 * address is not aligned to the map's specific alignment.
2636 * The caller should know what it's doing (i.e. most likely
2637 * mapping some fragmented copy map, transferring memory from
2638 * a VM map with a different alignment), so clear map_aligned
2639 * for this new VM map entry and proceed.
2641 clear_map_aligned
= TRUE
;
2645 * Only zero-fill objects are allowed to be purgable.
2646 * LP64todo - limit purgable objects to 32-bits for now
2650 (object
!= VM_OBJECT_NULL
&&
2651 (object
->vo_size
!= size
||
2652 object
->purgable
== VM_PURGABLE_DENY
))
2653 || size
> ANON_MAX_SIZE
)) { /* LP64todo: remove when dp capable */
2654 return KERN_INVALID_ARGUMENT
;
2657 if (!anywhere
&& overwrite
) {
2659 * Create a temporary VM map to hold the old mappings in the
2660 * affected area while we create the new one.
2661 * This avoids releasing the VM map lock in
2662 * vm_map_entry_delete() and allows atomicity
2663 * when we want to replace some mappings with a new one.
2664 * It also allows us to restore the old VM mappings if the
2665 * new mapping fails.
2667 zap_old_map
= vm_map_create(PMAP_NULL
,
2670 map
->hdr
.entries_pageable
);
2671 vm_map_set_page_shift(zap_old_map
, VM_MAP_PAGE_SHIFT(map
));
2672 vm_map_disable_hole_optimization(zap_old_map
);
2683 if (entry_for_jit
) {
2684 if (map
->jit_entry_exists
&&
2685 !VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map
)) {
2686 result
= KERN_INVALID_ARGUMENT
;
2689 if (VM_MAP_POLICY_ALLOW_JIT_RANDOM_ADDRESS(map
)) {
2690 random_address
= TRUE
;
2694 if (random_address
) {
2696 * Get a random start address.
2698 result
= vm_map_random_address_for_size(map
, address
, size
);
2699 if (result
!= KERN_SUCCESS
) {
2704 #if XNU_TARGET_OS_OSX
2705 else if ((start
== 0 || start
== vm_map_min(map
)) &&
2706 !map
->disable_vmentry_reuse
&&
2707 map
->vmmap_high_start
!= 0) {
2708 start
= map
->vmmap_high_start
;
2710 #endif /* XNU_TARGET_OS_OSX */
2714 * Calculate the first possible address.
2717 if (start
< effective_min_offset
) {
2718 start
= effective_min_offset
;
2720 if (start
> effective_max_offset
) {
2721 RETURN(KERN_NO_SPACE
);
2725 * Look for the first possible address;
2726 * if there's already something at this
2727 * address, we have to start after it.
2730 if (map
->disable_vmentry_reuse
== TRUE
) {
2731 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
2733 if (map
->holelistenabled
) {
2734 hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
2736 if (hole_entry
== NULL
) {
2738 * No more space in the map?
2740 result
= KERN_NO_SPACE
;
2743 boolean_t found_hole
= FALSE
;
2746 if (hole_entry
->vme_start
>= start
) {
2747 start
= hole_entry
->vme_start
;
2752 if (hole_entry
->vme_end
> start
) {
2756 hole_entry
= hole_entry
->vme_next
;
2757 } while (hole_entry
!= CAST_TO_VM_MAP_ENTRY(map
->holes_list
));
2759 if (found_hole
== FALSE
) {
2760 result
= KERN_NO_SPACE
;
2767 start
+= PAGE_SIZE_64
;
2771 assert(first_free_is_valid(map
));
2773 entry
= map
->first_free
;
2775 if (entry
== vm_map_to_entry(map
)) {
2778 if (entry
->vme_next
== vm_map_to_entry(map
)) {
2780 * Hole at the end of the map.
2784 if (start
< (entry
->vme_next
)->vme_start
) {
2785 start
= entry
->vme_end
;
2786 start
= vm_map_round_page(start
,
2787 VM_MAP_PAGE_MASK(map
));
2790 * Need to do a lookup.
2797 if (entry
== NULL
) {
2798 vm_map_entry_t tmp_entry
;
2799 if (vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
2800 assert(!entry_for_jit
);
2801 start
= tmp_entry
->vme_end
;
2802 start
= vm_map_round_page(start
,
2803 VM_MAP_PAGE_MASK(map
));
2811 * In any case, the "entry" always precedes
2812 * the proposed new region throughout the
2817 vm_map_entry_t next
;
2820 * Find the end of the proposed new region.
2821 * Be sure we didn't go beyond the end, or
2822 * wrap around the address.
2825 end
= ((start
+ mask
) & ~mask
);
2826 end
= vm_map_round_page(end
,
2827 VM_MAP_PAGE_MASK(map
));
2829 RETURN(KERN_NO_SPACE
);
2832 assert(VM_MAP_PAGE_ALIGNED(start
,
2833 VM_MAP_PAGE_MASK(map
)));
2836 /* We want an entire page of empty space, but don't increase the allocation size. */
2837 desired_empty_end
= vm_map_round_page(end
, VM_MAP_PAGE_MASK(map
));
2839 if ((desired_empty_end
> effective_max_offset
) || (desired_empty_end
< start
)) {
2840 if (map
->wait_for_space
) {
2841 assert(!keep_map_locked
);
2842 if (size
<= (effective_max_offset
-
2843 effective_min_offset
)) {
2844 assert_wait((event_t
)map
,
2848 thread_block(THREAD_CONTINUE_NULL
);
2852 RETURN(KERN_NO_SPACE
);
2855 next
= entry
->vme_next
;
2857 if (map
->holelistenabled
) {
2858 if (entry
->vme_end
>= desired_empty_end
) {
2863 * If there are no more entries, we must win.
2867 * If there is another entry, it must be
2868 * after the end of the potential new region.
2871 if (next
== vm_map_to_entry(map
)) {
2875 if (next
->vme_start
>= desired_empty_end
) {
2881 * Didn't fit -- move to the next entry.
2886 if (map
->holelistenabled
) {
2887 if (entry
== CAST_TO_VM_MAP_ENTRY(map
->holes_list
)) {
2891 result
= KERN_NO_SPACE
;
2894 start
= entry
->vme_start
;
2896 start
= entry
->vme_end
;
2899 start
= vm_map_round_page(start
,
2900 VM_MAP_PAGE_MASK(map
));
2903 if (map
->holelistenabled
) {
2904 if (vm_map_lookup_entry(map
, entry
->vme_start
, &entry
)) {
2905 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry
, (unsigned long long)entry
->vme_start
);
2910 assert(VM_MAP_PAGE_ALIGNED(*address
,
2911 VM_MAP_PAGE_MASK(map
)));
2913 if (VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
&&
2915 user_alias
== VM_MEMORY_REALLOC
) {
2917 * Force realloc() to switch to a new allocation,
2918 * to prevent 4k-fragmented virtual ranges.
2920 // DEBUG4K_ERROR("no realloc in place");
2921 return KERN_NO_SPACE
;
2926 * the address doesn't itself violate
2927 * the mask requirement.
2932 if ((start
& mask
) != 0) {
2933 RETURN(KERN_NO_SPACE
);
2937 * ... the address is within bounds
2942 if ((start
< effective_min_offset
) ||
2943 (end
> effective_max_offset
) ||
2945 RETURN(KERN_INVALID_ADDRESS
);
2948 if (overwrite
&& zap_old_map
!= VM_MAP_NULL
) {
2951 * Fixed mapping and "overwrite" flag: attempt to
2952 * remove all existing mappings in the specified
2953 * address range, saving them in our "zap_old_map".
2955 remove_flags
= VM_MAP_REMOVE_SAVE_ENTRIES
;
2956 remove_flags
|= VM_MAP_REMOVE_NO_MAP_ALIGN
;
2957 if (vmk_flags
.vmkf_overwrite_immutable
) {
2958 /* we can overwrite immutable mappings */
2959 remove_flags
|= VM_MAP_REMOVE_IMMUTABLE
;
2961 (void) vm_map_delete(map
, start
, end
,
2967 * ... the starting address isn't allocated
2970 if (vm_map_lookup_entry(map
, start
, &entry
)) {
2971 if (!(vmk_flags
.vmkf_already
)) {
2972 RETURN(KERN_NO_SPACE
);
2975 * Check if what's already there is what we want.
2978 tmp_offset
= offset
;
2979 if (entry
->vme_start
< start
) {
2980 tmp_start
-= start
- entry
->vme_start
;
2981 tmp_offset
-= start
- entry
->vme_start
;
2983 for (; entry
->vme_start
< end
;
2984 entry
= entry
->vme_next
) {
2986 * Check if the mapping's attributes
2987 * match the existing map entry.
2989 if (entry
== vm_map_to_entry(map
) ||
2990 entry
->vme_start
!= tmp_start
||
2991 entry
->is_sub_map
!= is_submap
||
2992 VME_OFFSET(entry
) != tmp_offset
||
2993 entry
->needs_copy
!= needs_copy
||
2994 entry
->protection
!= cur_protection
||
2995 entry
->max_protection
!= max_protection
||
2996 entry
->inheritance
!= inheritance
||
2997 entry
->iokit_acct
!= iokit_acct
||
2998 VME_ALIAS(entry
) != alias
) {
2999 /* not the same mapping ! */
3000 RETURN(KERN_NO_SPACE
);
3003 * Check if the same object is being mapped.
3006 if (VME_SUBMAP(entry
) !=
3007 (vm_map_t
) object
) {
3008 /* not the same submap */
3009 RETURN(KERN_NO_SPACE
);
3012 if (VME_OBJECT(entry
) != object
) {
3013 /* not the same VM object... */
3016 obj2
= VME_OBJECT(entry
);
3017 if ((obj2
== VM_OBJECT_NULL
||
3019 (object
== VM_OBJECT_NULL
||
3020 object
->internal
)) {
3027 RETURN(KERN_NO_SPACE
);
3032 tmp_offset
+= entry
->vme_end
- entry
->vme_start
;
3033 tmp_start
+= entry
->vme_end
- entry
->vme_start
;
3034 if (entry
->vme_end
>= end
) {
3035 /* reached the end of our mapping */
3039 /* it all matches: let's use what's already there ! */
3040 RETURN(KERN_MEMORY_PRESENT
);
3044 * ... the next region doesn't overlap the
3048 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
3049 (entry
->vme_next
->vme_start
< end
)) {
3050 RETURN(KERN_NO_SPACE
);
3056 * "start" and "end" should define the endpoints of the
3057 * available new range, and
3058 * "entry" should refer to the region before the new
3061 * the map should be locked.
3065 * See whether we can avoid creating a new entry (and object) by
3066 * extending one of our neighbors. [So far, we only attempt to
3067 * extend from below.] Note that we can never extend/join
3068 * purgable objects because they need to remain distinct
3069 * entities in order to implement their "volatile object"
3075 vm_memory_malloc_no_cow(user_alias
)) {
3076 if (object
== VM_OBJECT_NULL
) {
3077 object
= vm_object_allocate(size
);
3078 object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
3079 object
->true_share
= FALSE
;
3082 object
->purgable
= VM_PURGABLE_NONVOLATILE
;
3083 if (map
->pmap
== kernel_pmap
) {
3085 * Purgeable mappings made in a kernel
3086 * map are "owned" by the kernel itself
3087 * rather than the current user task
3088 * because they're likely to be used by
3089 * more than this user task (see
3090 * execargs_purgeable_allocate(), for
3093 owner
= kernel_task
;
3095 owner
= current_task();
3097 assert(object
->vo_owner
== NULL
);
3098 assert(object
->resident_page_count
== 0);
3099 assert(object
->wired_page_count
== 0);
3100 vm_object_lock(object
);
3101 vm_purgeable_nonvolatile_enqueue(object
, owner
);
3102 vm_object_unlock(object
);
3104 offset
= (vm_object_offset_t
)0;
3106 } else if (VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
) {
3107 /* no coalescing if address space uses sub-pages */
3108 } else if ((is_submap
== FALSE
) &&
3109 (object
== VM_OBJECT_NULL
) &&
3110 (entry
!= vm_map_to_entry(map
)) &&
3111 (entry
->vme_end
== start
) &&
3112 (!entry
->is_shared
) &&
3113 (!entry
->is_sub_map
) &&
3114 (!entry
->in_transition
) &&
3115 (!entry
->needs_wakeup
) &&
3116 (entry
->behavior
== VM_BEHAVIOR_DEFAULT
) &&
3117 (entry
->protection
== cur_protection
) &&
3118 (entry
->max_protection
== max_protection
) &&
3119 (entry
->inheritance
== inheritance
) &&
3120 ((user_alias
== VM_MEMORY_REALLOC
) ||
3121 (VME_ALIAS(entry
) == alias
)) &&
3122 (entry
->no_cache
== no_cache
) &&
3123 (entry
->permanent
== permanent
) &&
3124 /* no coalescing for immutable executable mappings */
3125 !((entry
->protection
& VM_PROT_EXECUTE
) &&
3126 entry
->permanent
) &&
3127 (!entry
->superpage_size
&& !superpage_size
) &&
3129 * No coalescing if not map-aligned, to avoid propagating
3130 * that condition any further than needed:
3132 (!entry
->map_aligned
|| !clear_map_aligned
) &&
3133 (!entry
->zero_wired_pages
) &&
3134 (!entry
->used_for_jit
&& !entry_for_jit
) &&
3135 (!entry
->pmap_cs_associated
) &&
3136 (entry
->iokit_acct
== iokit_acct
) &&
3137 (!entry
->vme_resilient_codesign
) &&
3138 (!entry
->vme_resilient_media
) &&
3139 (!entry
->vme_atomic
) &&
3140 (entry
->vme_no_copy_on_read
== no_copy_on_read
) &&
3142 ((entry
->vme_end
- entry
->vme_start
) + size
<=
3143 (user_alias
== VM_MEMORY_REALLOC
?
3145 NO_COALESCE_LIMIT
)) &&
3147 (entry
->wired_count
== 0)) { /* implies user_wired_count == 0 */
3148 if (vm_object_coalesce(VME_OBJECT(entry
),
3151 (vm_object_offset_t
) 0,
3152 (vm_map_size_t
)(entry
->vme_end
- entry
->vme_start
),
3153 (vm_map_size_t
)(end
- entry
->vme_end
))) {
3155 * Coalesced the two objects - can extend
3156 * the previous map entry to include the
3159 map
->size
+= (end
- entry
->vme_end
);
3160 assert(entry
->vme_start
< end
);
3161 assert(VM_MAP_PAGE_ALIGNED(end
,
3162 VM_MAP_PAGE_MASK(map
)));
3163 if (__improbable(vm_debug_events
)) {
3164 DTRACE_VM5(map_entry_extend
, vm_map_t
, map
, vm_map_entry_t
, entry
, vm_address_t
, entry
->vme_start
, vm_address_t
, entry
->vme_end
, vm_address_t
, end
);
3166 entry
->vme_end
= end
;
3167 if (map
->holelistenabled
) {
3168 vm_map_store_update_first_free(map
, entry
, TRUE
);
3170 vm_map_store_update_first_free(map
, map
->first_free
, TRUE
);
3172 new_mapping_established
= TRUE
;
3173 RETURN(KERN_SUCCESS
);
3177 step
= superpage_size
? SUPERPAGE_SIZE
: (end
- start
);
3180 for (tmp2_start
= start
; tmp2_start
< end
; tmp2_start
+= step
) {
3181 tmp2_end
= tmp2_start
+ step
;
3183 * Create a new entry
3186 * The reserved "page zero" in each process's address space can
3187 * be arbitrarily large. Splitting it into separate objects and
3188 * therefore different VM map entries serves no purpose and just
3189 * slows down operations on the VM map, so let's not split the
3190 * allocation into chunks if the max protection is NONE. That
3191 * memory should never be accessible, so it will never get to the
3194 tmp_start
= tmp2_start
;
3195 if (object
== VM_OBJECT_NULL
&&
3196 size
> chunk_size
&&
3197 max_protection
!= VM_PROT_NONE
&&
3198 superpage_size
== 0) {
3199 tmp_end
= tmp_start
+ chunk_size
;
3204 new_entry
= vm_map_entry_insert(map
,
3205 entry
, tmp_start
, tmp_end
,
3206 object
, offset
, needs_copy
,
3208 cur_protection
, max_protection
,
3209 VM_BEHAVIOR_DEFAULT
,
3210 (entry_for_jit
&& !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map
) ?
3211 VM_INHERIT_NONE
: inheritance
),
3221 translated_allow_execute
);
3223 assert((object
!= kernel_object
) || (VM_KERN_MEMORY_NONE
!= alias
));
3225 if (resilient_codesign
) {
3226 int reject_prot
= (needs_copy
? VM_PROT_EXECUTE
: (VM_PROT_WRITE
| VM_PROT_EXECUTE
));
3227 if (!((cur_protection
| max_protection
) & reject_prot
)) {
3228 new_entry
->vme_resilient_codesign
= TRUE
;
3232 if (resilient_media
&&
3233 (object
== VM_OBJECT_NULL
||
3234 object
->internal
)) {
3235 new_entry
->vme_resilient_media
= TRUE
;
3238 assert(!new_entry
->iokit_acct
);
3240 object
!= VM_OBJECT_NULL
&&
3241 (object
->purgable
!= VM_PURGABLE_DENY
||
3242 object
->vo_ledger_tag
)) {
3243 assert(new_entry
->use_pmap
);
3244 assert(!new_entry
->iokit_acct
);
3246 * Turn off pmap accounting since
3247 * purgeable (or tagged) objects have their
3250 new_entry
->use_pmap
= FALSE
;
3251 } else if (!is_submap
&&
3253 object
!= VM_OBJECT_NULL
&&
3255 /* alternate accounting */
3256 assert(!new_entry
->iokit_acct
);
3257 assert(new_entry
->use_pmap
);
3258 new_entry
->iokit_acct
= TRUE
;
3259 new_entry
->use_pmap
= FALSE
;
3261 vm_map_iokit_mapped_region
,
3263 vm_map_offset_t
, new_entry
->vme_start
,
3264 vm_map_offset_t
, new_entry
->vme_end
,
3265 int, VME_ALIAS(new_entry
));
3266 vm_map_iokit_mapped_region(
3268 (new_entry
->vme_end
-
3269 new_entry
->vme_start
));
3270 } else if (!is_submap
) {
3271 assert(!new_entry
->iokit_acct
);
3272 assert(new_entry
->use_pmap
);
3277 boolean_t submap_is_64bit
;
3280 assert(new_entry
->is_sub_map
);
3281 assert(!new_entry
->use_pmap
);
3282 assert(!new_entry
->iokit_acct
);
3283 submap
= (vm_map_t
) object
;
3284 submap_is_64bit
= vm_map_is_64bit(submap
);
3285 use_pmap
= vmk_flags
.vmkf_nested_pmap
;
3286 #ifndef NO_NESTED_PMAP
3287 if (use_pmap
&& submap
->pmap
== NULL
) {
3288 ledger_t ledger
= map
->pmap
->ledger
;
3289 /* we need a sub pmap to nest... */
3290 submap
->pmap
= pmap_create_options(ledger
, 0,
3291 submap_is_64bit
? PMAP_CREATE_64BIT
: 0);
3292 if (submap
->pmap
== NULL
) {
3293 /* let's proceed without nesting... */
3295 #if defined(__arm__) || defined(__arm64__)
3297 pmap_set_nested(submap
->pmap
);
3301 if (use_pmap
&& submap
->pmap
!= NULL
) {
3302 if (VM_MAP_PAGE_SHIFT(map
) != VM_MAP_PAGE_SHIFT(submap
)) {
3303 DEBUG4K_ERROR("map %p (%d) submap %p (%d): incompatible page sizes\n", map
, VM_MAP_PAGE_SHIFT(map
), submap
, VM_MAP_PAGE_SHIFT(submap
));
3306 kr
= pmap_nest(map
->pmap
,
3309 tmp_end
- tmp_start
);
3311 if (kr
!= KERN_SUCCESS
) {
3312 printf("vm_map_enter: "
3313 "pmap_nest(0x%llx,0x%llx) "
3315 (long long)tmp_start
,
3319 /* we're now nested ! */
3320 new_entry
->use_pmap
= TRUE
;
3324 #endif /* NO_NESTED_PMAP */
3328 if (superpage_size
) {
3330 vm_object_t sp_object
;
3331 vm_object_offset_t sp_offset
;
3333 VME_OFFSET_SET(entry
, 0);
3335 /* allocate one superpage */
3336 kr
= cpm_allocate(SUPERPAGE_SIZE
, &pages
, 0, SUPERPAGE_NBASEPAGES
- 1, TRUE
, 0);
3337 if (kr
!= KERN_SUCCESS
) {
3338 /* deallocate whole range... */
3339 new_mapping_established
= TRUE
;
3340 /* ... but only up to "tmp_end" */
3341 size
-= end
- tmp_end
;
3345 /* create one vm_object per superpage */
3346 sp_object
= vm_object_allocate((vm_map_size_t
)(entry
->vme_end
- entry
->vme_start
));
3347 sp_object
->phys_contiguous
= TRUE
;
3348 sp_object
->vo_shadow_offset
= (vm_object_offset_t
)VM_PAGE_GET_PHYS_PAGE(pages
) * PAGE_SIZE
;
3349 VME_OBJECT_SET(entry
, sp_object
);
3350 assert(entry
->use_pmap
);
3352 /* enter the base pages into the object */
3353 vm_object_lock(sp_object
);
3355 sp_offset
< SUPERPAGE_SIZE
;
3356 sp_offset
+= PAGE_SIZE
) {
3358 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m
));
3359 pages
= NEXT_PAGE(m
);
3360 *(NEXT_PAGE_PTR(m
)) = VM_PAGE_NULL
;
3361 vm_page_insert_wired(m
, sp_object
, sp_offset
, VM_KERN_MEMORY_OSFMK
);
3363 vm_object_unlock(sp_object
);
3365 } while (tmp_end
!= tmp2_end
&&
3366 (tmp_start
= tmp_end
) &&
3367 (tmp_end
= (tmp2_end
- tmp_end
> chunk_size
) ?
3368 tmp_end
+ chunk_size
: tmp2_end
));
3371 new_mapping_established
= TRUE
;
3374 assert(map_locked
== TRUE
);
3376 if (result
== KERN_SUCCESS
) {
3377 vm_prot_t pager_prot
;
3378 memory_object_t pager
;
3382 !(vmk_flags
.vmkf_no_pmap_check
)) {
3383 assert(vm_map_pmap_is_empty(map
,
3390 * For "named" VM objects, let the pager know that the
3391 * memory object is being mapped. Some pagers need to keep
3392 * track of this, to know when they can reclaim the memory
3393 * object, for example.
3394 * VM calls memory_object_map() for each mapping (specifying
3395 * the protection of each mapping) and calls
3396 * memory_object_last_unmap() when all the mappings are gone.
3398 pager_prot
= max_protection
;
3401 * Copy-On-Write mapping: won't modify
3402 * the memory object.
3404 pager_prot
&= ~VM_PROT_WRITE
;
3407 object
!= VM_OBJECT_NULL
&&
3409 object
->pager
!= MEMORY_OBJECT_NULL
) {
3410 vm_object_lock(object
);
3411 pager
= object
->pager
;
3412 if (object
->named
&&
3413 pager
!= MEMORY_OBJECT_NULL
) {
3414 assert(object
->pager_ready
);
3415 vm_object_mapping_wait(object
, THREAD_UNINT
);
3416 vm_object_mapping_begin(object
);
3417 vm_object_unlock(object
);
3419 kr
= memory_object_map(pager
, pager_prot
);
3420 assert(kr
== KERN_SUCCESS
);
3422 vm_object_lock(object
);
3423 vm_object_mapping_end(object
);
3425 vm_object_unlock(object
);
3429 assert(map_locked
== TRUE
);
3431 if (!keep_map_locked
) {
3437 * We can't hold the map lock if we enter this block.
3440 if (result
== KERN_SUCCESS
) {
3441 /* Wire down the new entry if the user
3442 * requested all new map entries be wired.
3444 if ((map
->wiring_required
) || (superpage_size
)) {
3445 assert(!keep_map_locked
);
3446 pmap_empty
= FALSE
; /* pmap won't be empty */
3447 kr
= vm_map_wire_kernel(map
, start
, end
,
3448 new_entry
->protection
, VM_KERN_MEMORY_MLOCK
,
3455 if (result
!= KERN_SUCCESS
) {
3456 if (new_mapping_established
) {
3458 * We have to get rid of the new mappings since we
3459 * won't make them available to the user.
3460 * Try and do that atomically, to minimize the risk
3461 * that someone else create new mappings that range.
3463 zap_new_map
= vm_map_create(PMAP_NULL
,
3466 map
->hdr
.entries_pageable
);
3467 vm_map_set_page_shift(zap_new_map
,
3468 VM_MAP_PAGE_SHIFT(map
));
3469 vm_map_disable_hole_optimization(zap_new_map
);
3475 (void) vm_map_delete(map
, *address
, *address
+ size
,
3476 (VM_MAP_REMOVE_SAVE_ENTRIES
|
3477 VM_MAP_REMOVE_NO_MAP_ALIGN
),
3480 if (zap_old_map
!= VM_MAP_NULL
&&
3481 zap_old_map
->hdr
.nentries
!= 0) {
3482 vm_map_entry_t entry1
, entry2
;
3485 * The new mapping failed. Attempt to restore
3486 * the old mappings, saved in the "zap_old_map".
3493 /* first check if the coast is still clear */
3494 start
= vm_map_first_entry(zap_old_map
)->vme_start
;
3495 end
= vm_map_last_entry(zap_old_map
)->vme_end
;
3496 if (vm_map_lookup_entry(map
, start
, &entry1
) ||
3497 vm_map_lookup_entry(map
, end
, &entry2
) ||
3500 * Part of that range has already been
3501 * re-mapped: we can't restore the old
3504 vm_map_enter_restore_failures
++;
3507 * Transfer the saved map entries from
3508 * "zap_old_map" to the original "map",
3509 * inserting them all after "entry1".
3511 for (entry2
= vm_map_first_entry(zap_old_map
);
3512 entry2
!= vm_map_to_entry(zap_old_map
);
3513 entry2
= vm_map_first_entry(zap_old_map
)) {
3514 vm_map_size_t entry_size
;
3516 entry_size
= (entry2
->vme_end
-
3518 vm_map_store_entry_unlink(zap_old_map
,
3520 zap_old_map
->size
-= entry_size
;
3521 vm_map_store_entry_link(map
, entry1
, entry2
,
3522 VM_MAP_KERNEL_FLAGS_NONE
);
3523 map
->size
+= entry_size
;
3526 if (map
->wiring_required
) {
3528 * XXX TODO: we should rewire the
3532 vm_map_enter_restore_successes
++;
3538 * The caller is responsible for releasing the lock if it requested to
3539 * keep the map locked.
3541 if (map_locked
&& !keep_map_locked
) {
3546 * Get rid of the "zap_maps" and all the map entries that
3547 * they may still contain.
3549 if (zap_old_map
!= VM_MAP_NULL
) {
3550 vm_map_destroy(zap_old_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3551 zap_old_map
= VM_MAP_NULL
;
3553 if (zap_new_map
!= VM_MAP_NULL
) {
3554 vm_map_destroy(zap_new_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3555 zap_new_map
= VM_MAP_NULL
;
3564 extern const struct memory_object_pager_ops fourk_pager_ops
;
3568 vm_map_offset_t
*address
, /* IN/OUT */
3570 vm_map_offset_t mask
,
3572 vm_map_kernel_flags_t vmk_flags
,
3575 vm_object_offset_t offset
,
3576 boolean_t needs_copy
,
3577 vm_prot_t cur_protection
,
3578 vm_prot_t max_protection
,
3579 vm_inherit_t inheritance
)
3581 vm_map_entry_t entry
, new_entry
;
3582 vm_map_offset_t start
, fourk_start
;
3583 vm_map_offset_t end
, fourk_end
;
3584 vm_map_size_t fourk_size
;
3585 kern_return_t result
= KERN_SUCCESS
;
3586 vm_map_t zap_old_map
= VM_MAP_NULL
;
3587 vm_map_t zap_new_map
= VM_MAP_NULL
;
3588 boolean_t map_locked
= FALSE
;
3589 boolean_t pmap_empty
= TRUE
;
3590 boolean_t new_mapping_established
= FALSE
;
3591 boolean_t keep_map_locked
= vmk_flags
.vmkf_keep_map_locked
;
3592 boolean_t anywhere
= ((flags
& VM_FLAGS_ANYWHERE
) != 0);
3593 boolean_t purgable
= ((flags
& VM_FLAGS_PURGABLE
) != 0);
3594 boolean_t overwrite
= ((flags
& VM_FLAGS_OVERWRITE
) != 0);
3595 boolean_t no_cache
= ((flags
& VM_FLAGS_NO_CACHE
) != 0);
3596 boolean_t is_submap
= vmk_flags
.vmkf_submap
;
3597 boolean_t permanent
= vmk_flags
.vmkf_permanent
;
3598 boolean_t no_copy_on_read
= vmk_flags
.vmkf_permanent
;
3599 boolean_t entry_for_jit
= vmk_flags
.vmkf_map_jit
;
3600 // boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3601 boolean_t translated_allow_execute
= vmk_flags
.vmkf_translated_allow_execute
;
3602 unsigned int superpage_size
= ((flags
& VM_FLAGS_SUPERPAGE_MASK
) >> VM_FLAGS_SUPERPAGE_SHIFT
);
3603 vm_map_offset_t effective_min_offset
, effective_max_offset
;
3605 boolean_t clear_map_aligned
= FALSE
;
3606 memory_object_t fourk_mem_obj
;
3607 vm_object_t fourk_object
;
3608 vm_map_offset_t fourk_pager_offset
;
3609 int fourk_pager_index_start
, fourk_pager_index_num
;
3611 boolean_t fourk_copy
;
3612 vm_object_t copy_object
;
3613 vm_object_offset_t copy_offset
;
3615 if (VM_MAP_PAGE_MASK(map
) < PAGE_MASK
) {
3616 panic("%s:%d\n", __FUNCTION__
, __LINE__
);
3618 fourk_mem_obj
= MEMORY_OBJECT_NULL
;
3619 fourk_object
= VM_OBJECT_NULL
;
3621 if (superpage_size
) {
3622 return KERN_NOT_SUPPORTED
;
3625 if ((cur_protection
& VM_PROT_WRITE
) &&
3626 (cur_protection
& VM_PROT_EXECUTE
) &&
3627 #if XNU_TARGET_OS_OSX
3628 map
->pmap
!= kernel_pmap
&&
3629 (vm_map_cs_enforcement(map
)
3631 || !VM_MAP_IS_EXOTIC(map
)
3632 #endif /* __arm64__ */
3634 #endif /* XNU_TARGET_OS_OSX */
3639 vm_prot_t
, cur_protection
);
3640 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3641 "turning off execute\n",
3643 (current_task()->bsd_info
3644 ? proc_name_address(current_task()->bsd_info
)
3647 cur_protection
&= ~VM_PROT_EXECUTE
;
3651 * If the task has requested executable lockdown,
3652 * deny any new executable mapping.
3654 if (map
->map_disallow_new_exec
== TRUE
) {
3655 if (cur_protection
& VM_PROT_EXECUTE
) {
3656 return KERN_PROTECTION_FAILURE
;
3661 return KERN_NOT_SUPPORTED
;
3663 if (vmk_flags
.vmkf_already
) {
3664 return KERN_NOT_SUPPORTED
;
3666 if (purgable
|| entry_for_jit
) {
3667 return KERN_NOT_SUPPORTED
;
3670 effective_min_offset
= map
->min_offset
;
3672 if (vmk_flags
.vmkf_beyond_max
) {
3673 return KERN_NOT_SUPPORTED
;
3675 effective_max_offset
= map
->max_offset
;
3679 (offset
& FOURK_PAGE_MASK
) != 0) {
3681 return KERN_INVALID_ARGUMENT
;
3684 #define RETURN(value) { result = value; goto BailOut; }
3686 assert(VM_MAP_PAGE_ALIGNED(*address
, FOURK_PAGE_MASK
));
3687 assert(VM_MAP_PAGE_ALIGNED(size
, FOURK_PAGE_MASK
));
3689 if (!anywhere
&& overwrite
) {
3690 return KERN_NOT_SUPPORTED
;
3692 if (!anywhere
&& overwrite
) {
3694 * Create a temporary VM map to hold the old mappings in the
3695 * affected area while we create the new one.
3696 * This avoids releasing the VM map lock in
3697 * vm_map_entry_delete() and allows atomicity
3698 * when we want to replace some mappings with a new one.
3699 * It also allows us to restore the old VM mappings if the
3700 * new mapping fails.
3702 zap_old_map
= vm_map_create(PMAP_NULL
,
3705 map
->hdr
.entries_pageable
);
3706 vm_map_set_page_shift(zap_old_map
, VM_MAP_PAGE_SHIFT(map
));
3707 vm_map_disable_hole_optimization(zap_old_map
);
3710 fourk_start
= *address
;
3712 fourk_end
= fourk_start
+ fourk_size
;
3714 start
= vm_map_trunc_page(*address
, VM_MAP_PAGE_MASK(map
));
3715 end
= vm_map_round_page(fourk_end
, VM_MAP_PAGE_MASK(map
));
3719 return KERN_NOT_SUPPORTED
;
3723 * the address doesn't itself violate
3724 * the mask requirement.
3729 if ((start
& mask
) != 0) {
3730 RETURN(KERN_NO_SPACE
);
3734 * ... the address is within bounds
3739 if ((start
< effective_min_offset
) ||
3740 (end
> effective_max_offset
) ||
3742 RETURN(KERN_INVALID_ADDRESS
);
3745 if (overwrite
&& zap_old_map
!= VM_MAP_NULL
) {
3747 * Fixed mapping and "overwrite" flag: attempt to
3748 * remove all existing mappings in the specified
3749 * address range, saving them in our "zap_old_map".
3751 (void) vm_map_delete(map
, start
, end
,
3752 (VM_MAP_REMOVE_SAVE_ENTRIES
|
3753 VM_MAP_REMOVE_NO_MAP_ALIGN
),
3758 * ... the starting address isn't allocated
3760 if (vm_map_lookup_entry(map
, start
, &entry
)) {
3761 vm_object_t cur_object
, shadow_object
;
3764 * We might already some 4K mappings
3765 * in a 16K page here.
3768 if (entry
->vme_end
- entry
->vme_start
3769 != SIXTEENK_PAGE_SIZE
) {
3770 RETURN(KERN_NO_SPACE
);
3772 if (entry
->is_sub_map
) {
3773 RETURN(KERN_NO_SPACE
);
3775 if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
3776 RETURN(KERN_NO_SPACE
);
3779 /* go all the way down the shadow chain */
3780 cur_object
= VME_OBJECT(entry
);
3781 vm_object_lock(cur_object
);
3782 while (cur_object
->shadow
!= VM_OBJECT_NULL
) {
3783 shadow_object
= cur_object
->shadow
;
3784 vm_object_lock(shadow_object
);
3785 vm_object_unlock(cur_object
);
3786 cur_object
= shadow_object
;
3787 shadow_object
= VM_OBJECT_NULL
;
3789 if (cur_object
->internal
||
3790 cur_object
->pager
== NULL
) {
3791 vm_object_unlock(cur_object
);
3792 RETURN(KERN_NO_SPACE
);
3794 if (cur_object
->pager
->mo_pager_ops
3795 != &fourk_pager_ops
) {
3796 vm_object_unlock(cur_object
);
3797 RETURN(KERN_NO_SPACE
);
3799 fourk_object
= cur_object
;
3800 fourk_mem_obj
= fourk_object
->pager
;
3802 /* keep the "4K" object alive */
3803 vm_object_reference_locked(fourk_object
);
3804 memory_object_reference(fourk_mem_obj
);
3805 vm_object_unlock(fourk_object
);
3807 /* merge permissions */
3808 entry
->protection
|= cur_protection
;
3809 entry
->max_protection
|= max_protection
;
3810 if ((entry
->protection
& (VM_PROT_WRITE
|
3811 VM_PROT_EXECUTE
)) ==
3812 (VM_PROT_WRITE
| VM_PROT_EXECUTE
) &&
3813 fourk_binary_compatibility_unsafe
&&
3814 fourk_binary_compatibility_allow_wx
) {
3815 /* write+execute: need to be "jit" */
3816 entry
->used_for_jit
= TRUE
;
3818 goto map_in_fourk_pager
;
3822 * ... the next region doesn't overlap the
3826 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
3827 (entry
->vme_next
->vme_start
< end
)) {
3828 RETURN(KERN_NO_SPACE
);
3834 * "start" and "end" should define the endpoints of the
3835 * available new range, and
3836 * "entry" should refer to the region before the new
3839 * the map should be locked.
3842 /* create a new "4K" pager */
3843 fourk_mem_obj
= fourk_pager_create();
3844 fourk_object
= fourk_pager_to_vm_object(fourk_mem_obj
);
3845 assert(fourk_object
);
3847 /* keep the "4" object alive */
3848 vm_object_reference(fourk_object
);
3850 /* create a "copy" object, to map the "4K" object copy-on-write */
3852 result
= vm_object_copy_strategically(fourk_object
,
3858 assert(result
== KERN_SUCCESS
);
3859 assert(copy_object
!= VM_OBJECT_NULL
);
3860 assert(copy_offset
== 0);
3862 /* map the "4K" pager's copy object */
3864 vm_map_entry_insert(map
, entry
,
3865 vm_map_trunc_page(start
,
3866 VM_MAP_PAGE_MASK(map
)),
3867 vm_map_round_page(end
,
3868 VM_MAP_PAGE_MASK(map
)),
3871 FALSE
, /* needs_copy */
3874 cur_protection
, max_protection
,
3875 VM_BEHAVIOR_DEFAULT
,
3876 (entry_for_jit
&& !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map
) ?
3877 VM_INHERIT_NONE
: inheritance
),
3887 translated_allow_execute
);
3890 #if VM_MAP_DEBUG_FOURK
3891 if (vm_map_debug_fourk
) {
3892 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3894 (uint64_t) entry
->vme_start
,
3895 (uint64_t) entry
->vme_end
,
3898 #endif /* VM_MAP_DEBUG_FOURK */
3900 new_mapping_established
= TRUE
;
3903 /* "map" the original "object" where it belongs in the "4K" pager */
3904 fourk_pager_offset
= (fourk_start
& SIXTEENK_PAGE_MASK
);
3905 fourk_pager_index_start
= (int) (fourk_pager_offset
/ FOURK_PAGE_SIZE
);
3906 if (fourk_size
> SIXTEENK_PAGE_SIZE
) {
3907 fourk_pager_index_num
= 4;
3909 fourk_pager_index_num
= (int) (fourk_size
/ FOURK_PAGE_SIZE
);
3911 if (fourk_pager_index_start
+ fourk_pager_index_num
> 4) {
3912 fourk_pager_index_num
= 4 - fourk_pager_index_start
;
3915 cur_idx
< fourk_pager_index_num
;
3917 vm_object_t old_object
;
3918 vm_object_offset_t old_offset
;
3920 kr
= fourk_pager_populate(fourk_mem_obj
,
3921 TRUE
, /* overwrite */
3922 fourk_pager_index_start
+ cur_idx
,
3926 (cur_idx
* FOURK_PAGE_SIZE
))
3930 #if VM_MAP_DEBUG_FOURK
3931 if (vm_map_debug_fourk
) {
3932 if (old_object
== (vm_object_t
) -1 &&
3933 old_offset
== (vm_object_offset_t
) -1) {
3934 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3935 "pager [%p:0x%llx] "
3937 "[object:%p,offset:0x%llx]\n",
3939 (uint64_t) entry
->vme_start
,
3940 (uint64_t) entry
->vme_end
,
3943 fourk_pager_index_start
+ cur_idx
,
3946 ? (offset
+ (cur_idx
* FOURK_PAGE_SIZE
))
3949 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3950 "pager [%p:0x%llx] "
3951 "populate[%d] [object:%p,offset:0x%llx] "
3952 "old [%p:0x%llx]\n",
3954 (uint64_t) entry
->vme_start
,
3955 (uint64_t) entry
->vme_end
,
3958 fourk_pager_index_start
+ cur_idx
,
3961 ? (offset
+ (cur_idx
* FOURK_PAGE_SIZE
))
3967 #endif /* VM_MAP_DEBUG_FOURK */
3969 assert(kr
== KERN_SUCCESS
);
3970 if (object
!= old_object
&&
3971 object
!= VM_OBJECT_NULL
&&
3972 object
!= (vm_object_t
) -1) {
3973 vm_object_reference(object
);
3975 if (object
!= old_object
&&
3976 old_object
!= VM_OBJECT_NULL
&&
3977 old_object
!= (vm_object_t
) -1) {
3978 vm_object_deallocate(old_object
);
3983 assert(map_locked
== TRUE
);
3985 if (result
== KERN_SUCCESS
) {
3986 vm_prot_t pager_prot
;
3987 memory_object_t pager
;
3991 !(vmk_flags
.vmkf_no_pmap_check
)) {
3992 assert(vm_map_pmap_is_empty(map
,
3999 * For "named" VM objects, let the pager know that the
4000 * memory object is being mapped. Some pagers need to keep
4001 * track of this, to know when they can reclaim the memory
4002 * object, for example.
4003 * VM calls memory_object_map() for each mapping (specifying
4004 * the protection of each mapping) and calls
4005 * memory_object_last_unmap() when all the mappings are gone.
4007 pager_prot
= max_protection
;
4010 * Copy-On-Write mapping: won't modify
4011 * the memory object.
4013 pager_prot
&= ~VM_PROT_WRITE
;
4016 object
!= VM_OBJECT_NULL
&&
4018 object
->pager
!= MEMORY_OBJECT_NULL
) {
4019 vm_object_lock(object
);
4020 pager
= object
->pager
;
4021 if (object
->named
&&
4022 pager
!= MEMORY_OBJECT_NULL
) {
4023 assert(object
->pager_ready
);
4024 vm_object_mapping_wait(object
, THREAD_UNINT
);
4025 vm_object_mapping_begin(object
);
4026 vm_object_unlock(object
);
4028 kr
= memory_object_map(pager
, pager_prot
);
4029 assert(kr
== KERN_SUCCESS
);
4031 vm_object_lock(object
);
4032 vm_object_mapping_end(object
);
4034 vm_object_unlock(object
);
4037 fourk_object
!= VM_OBJECT_NULL
&&
4038 fourk_object
->named
&&
4039 fourk_object
->pager
!= MEMORY_OBJECT_NULL
) {
4040 vm_object_lock(fourk_object
);
4041 pager
= fourk_object
->pager
;
4042 if (fourk_object
->named
&&
4043 pager
!= MEMORY_OBJECT_NULL
) {
4044 assert(fourk_object
->pager_ready
);
4045 vm_object_mapping_wait(fourk_object
,
4047 vm_object_mapping_begin(fourk_object
);
4048 vm_object_unlock(fourk_object
);
4050 kr
= memory_object_map(pager
, VM_PROT_READ
);
4051 assert(kr
== KERN_SUCCESS
);
4053 vm_object_lock(fourk_object
);
4054 vm_object_mapping_end(fourk_object
);
4056 vm_object_unlock(fourk_object
);
4060 if (fourk_object
!= VM_OBJECT_NULL
) {
4061 vm_object_deallocate(fourk_object
);
4062 fourk_object
= VM_OBJECT_NULL
;
4063 memory_object_deallocate(fourk_mem_obj
);
4064 fourk_mem_obj
= MEMORY_OBJECT_NULL
;
4067 assert(map_locked
== TRUE
);
4069 if (!keep_map_locked
) {
4075 * We can't hold the map lock if we enter this block.
4078 if (result
== KERN_SUCCESS
) {
4079 /* Wire down the new entry if the user
4080 * requested all new map entries be wired.
4082 if ((map
->wiring_required
) || (superpage_size
)) {
4083 assert(!keep_map_locked
);
4084 pmap_empty
= FALSE
; /* pmap won't be empty */
4085 kr
= vm_map_wire_kernel(map
, start
, end
,
4086 new_entry
->protection
, VM_KERN_MEMORY_MLOCK
,
4093 if (result
!= KERN_SUCCESS
) {
4094 if (new_mapping_established
) {
4096 * We have to get rid of the new mappings since we
4097 * won't make them available to the user.
4098 * Try and do that atomically, to minimize the risk
4099 * that someone else create new mappings that range.
4101 zap_new_map
= vm_map_create(PMAP_NULL
,
4104 map
->hdr
.entries_pageable
);
4105 vm_map_set_page_shift(zap_new_map
,
4106 VM_MAP_PAGE_SHIFT(map
));
4107 vm_map_disable_hole_optimization(zap_new_map
);
4113 (void) vm_map_delete(map
, *address
, *address
+ size
,
4114 (VM_MAP_REMOVE_SAVE_ENTRIES
|
4115 VM_MAP_REMOVE_NO_MAP_ALIGN
),
4118 if (zap_old_map
!= VM_MAP_NULL
&&
4119 zap_old_map
->hdr
.nentries
!= 0) {
4120 vm_map_entry_t entry1
, entry2
;
4123 * The new mapping failed. Attempt to restore
4124 * the old mappings, saved in the "zap_old_map".
4131 /* first check if the coast is still clear */
4132 start
= vm_map_first_entry(zap_old_map
)->vme_start
;
4133 end
= vm_map_last_entry(zap_old_map
)->vme_end
;
4134 if (vm_map_lookup_entry(map
, start
, &entry1
) ||
4135 vm_map_lookup_entry(map
, end
, &entry2
) ||
4138 * Part of that range has already been
4139 * re-mapped: we can't restore the old
4142 vm_map_enter_restore_failures
++;
4145 * Transfer the saved map entries from
4146 * "zap_old_map" to the original "map",
4147 * inserting them all after "entry1".
4149 for (entry2
= vm_map_first_entry(zap_old_map
);
4150 entry2
!= vm_map_to_entry(zap_old_map
);
4151 entry2
= vm_map_first_entry(zap_old_map
)) {
4152 vm_map_size_t entry_size
;
4154 entry_size
= (entry2
->vme_end
-
4156 vm_map_store_entry_unlink(zap_old_map
,
4158 zap_old_map
->size
-= entry_size
;
4159 vm_map_store_entry_link(map
, entry1
, entry2
,
4160 VM_MAP_KERNEL_FLAGS_NONE
);
4161 map
->size
+= entry_size
;
4164 if (map
->wiring_required
) {
4166 * XXX TODO: we should rewire the
4170 vm_map_enter_restore_successes
++;
4176 * The caller is responsible for releasing the lock if it requested to
4177 * keep the map locked.
4179 if (map_locked
&& !keep_map_locked
) {
4184 * Get rid of the "zap_maps" and all the map entries that
4185 * they may still contain.
4187 if (zap_old_map
!= VM_MAP_NULL
) {
4188 vm_map_destroy(zap_old_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
4189 zap_old_map
= VM_MAP_NULL
;
4191 if (zap_new_map
!= VM_MAP_NULL
) {
4192 vm_map_destroy(zap_new_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
4193 zap_new_map
= VM_MAP_NULL
;
4200 #endif /* __arm64__ */
4203 * Counters for the prefault optimization.
4205 int64_t vm_prefault_nb_pages
= 0;
4206 int64_t vm_prefault_nb_bailout
= 0;
4208 static kern_return_t
4209 vm_map_enter_mem_object_helper(
4210 vm_map_t target_map
,
4211 vm_map_offset_t
*address
,
4212 vm_map_size_t initial_size
,
4213 vm_map_offset_t mask
,
4215 vm_map_kernel_flags_t vmk_flags
,
4218 vm_object_offset_t offset
,
4220 vm_prot_t cur_protection
,
4221 vm_prot_t max_protection
,
4222 vm_inherit_t inheritance
,
4223 upl_page_list_ptr_t page_list
,
4224 unsigned int page_list_count
)
4226 vm_map_address_t map_addr
;
4227 vm_map_size_t map_size
;
4229 vm_object_size_t size
;
4230 kern_return_t result
;
4231 boolean_t mask_cur_protection
, mask_max_protection
;
4232 boolean_t kernel_prefault
, try_prefault
= (page_list_count
!= 0);
4233 vm_map_offset_t offset_in_mapping
= 0;
4235 boolean_t fourk
= vmk_flags
.vmkf_fourk
;
4236 #endif /* __arm64__ */
4238 if (VM_MAP_PAGE_SHIFT(target_map
) < PAGE_SHIFT
) {
4239 /* XXX TODO4K prefaulting depends on page size... */
4240 try_prefault
= FALSE
;
4243 assertf(vmk_flags
.__vmkf_unused
== 0, "vmk_flags unused=0x%x\n", vmk_flags
.__vmkf_unused
);
4245 mask_cur_protection
= cur_protection
& VM_PROT_IS_MASK
;
4246 mask_max_protection
= max_protection
& VM_PROT_IS_MASK
;
4247 cur_protection
&= ~VM_PROT_IS_MASK
;
4248 max_protection
&= ~VM_PROT_IS_MASK
;
4251 * Check arguments for validity
4253 if ((target_map
== VM_MAP_NULL
) ||
4254 (cur_protection
& ~VM_PROT_ALL
) ||
4255 (max_protection
& ~VM_PROT_ALL
) ||
4256 (inheritance
> VM_INHERIT_LAST_VALID
) ||
4257 (try_prefault
&& (copy
|| !page_list
)) ||
4258 initial_size
== 0) {
4259 return KERN_INVALID_ARGUMENT
;
4263 if (fourk
&& VM_MAP_PAGE_SHIFT(target_map
) < PAGE_SHIFT
) {
4264 /* no "fourk" if map is using a sub-page page size */
4268 map_addr
= vm_map_trunc_page(*address
, FOURK_PAGE_MASK
);
4269 map_size
= vm_map_round_page(initial_size
, FOURK_PAGE_MASK
);
4271 #endif /* __arm64__ */
4273 map_addr
= vm_map_trunc_page(*address
,
4274 VM_MAP_PAGE_MASK(target_map
));
4275 map_size
= vm_map_round_page(initial_size
,
4276 VM_MAP_PAGE_MASK(target_map
));
4278 size
= vm_object_round_page(initial_size
);
4281 * Find the vm object (if any) corresponding to this port.
4283 if (!IP_VALID(port
)) {
4284 object
= VM_OBJECT_NULL
;
4287 } else if (ip_kotype(port
) == IKOT_NAMED_ENTRY
) {
4288 vm_named_entry_t named_entry
;
4289 vm_object_offset_t data_offset
;
4291 named_entry
= (vm_named_entry_t
) ip_get_kobject(port
);
4293 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4294 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4295 data_offset
= named_entry
->data_offset
;
4296 offset
+= named_entry
->data_offset
;
4301 /* a few checks to make sure user is obeying rules */
4303 if (offset
>= named_entry
->size
) {
4304 return KERN_INVALID_RIGHT
;
4306 size
= named_entry
->size
- offset
;
4308 if (mask_max_protection
) {
4309 max_protection
&= named_entry
->protection
;
4311 if (mask_cur_protection
) {
4312 cur_protection
&= named_entry
->protection
;
4314 if ((named_entry
->protection
& max_protection
) !=
4316 return KERN_INVALID_RIGHT
;
4318 if ((named_entry
->protection
& cur_protection
) !=
4320 return KERN_INVALID_RIGHT
;
4322 if (offset
+ size
< offset
) {
4324 return KERN_INVALID_ARGUMENT
;
4326 if (named_entry
->size
< (offset
+ initial_size
)) {
4327 return KERN_INVALID_ARGUMENT
;
4330 if (named_entry
->is_copy
) {
4331 /* for a vm_map_copy, we can only map it whole */
4332 if ((size
!= named_entry
->size
) &&
4333 (vm_map_round_page(size
,
4334 VM_MAP_PAGE_MASK(target_map
)) ==
4335 named_entry
->size
)) {
4336 /* XXX FBDP use the rounded size... */
4337 size
= vm_map_round_page(
4339 VM_MAP_PAGE_MASK(target_map
));
4343 /* the callers parameter offset is defined to be the */
4344 /* offset from beginning of named entry offset in object */
4345 offset
= offset
+ named_entry
->offset
;
4347 if (!VM_MAP_PAGE_ALIGNED(size
,
4348 VM_MAP_PAGE_MASK(target_map
))) {
4350 * Let's not map more than requested;
4351 * vm_map_enter() will handle this "not map-aligned"
4357 named_entry_lock(named_entry
);
4358 if (named_entry
->is_sub_map
) {
4361 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4362 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4363 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4366 submap
= named_entry
->backing
.map
;
4367 vm_map_reference(submap
);
4368 named_entry_unlock(named_entry
);
4370 vmk_flags
.vmkf_submap
= TRUE
;
4372 result
= vm_map_enter(target_map
,
4379 (vm_object_t
)(uintptr_t) submap
,
4385 if (result
!= KERN_SUCCESS
) {
4386 vm_map_deallocate(submap
);
4389 * No need to lock "submap" just to check its
4390 * "mapped" flag: that flag is never reset
4391 * once it's been set and if we race, we'll
4392 * just end up setting it twice, which is OK.
4394 if (submap
->mapped_in_other_pmaps
== FALSE
&&
4395 vm_map_pmap(submap
) != PMAP_NULL
&&
4396 vm_map_pmap(submap
) !=
4397 vm_map_pmap(target_map
)) {
4399 * This submap is being mapped in a map
4400 * that uses a different pmap.
4401 * Set its "mapped_in_other_pmaps" flag
4402 * to indicate that we now need to
4403 * remove mappings from all pmaps rather
4404 * than just the submap's pmap.
4406 vm_map_lock(submap
);
4407 submap
->mapped_in_other_pmaps
= TRUE
;
4408 vm_map_unlock(submap
);
4410 *address
= map_addr
;
4413 } else if (named_entry
->is_copy
) {
4415 vm_map_copy_t copy_map
;
4416 vm_map_entry_t copy_entry
;
4417 vm_map_offset_t copy_addr
;
4418 vm_map_copy_t target_copy_map
;
4419 vm_map_offset_t overmap_start
, overmap_end
;
4420 vm_map_offset_t trimmed_start
;
4421 vm_map_size_t target_size
;
4423 if (flags
& ~(VM_FLAGS_FIXED
|
4425 VM_FLAGS_OVERWRITE
|
4426 VM_FLAGS_RETURN_4K_DATA_ADDR
|
4427 VM_FLAGS_RETURN_DATA_ADDR
|
4428 VM_FLAGS_ALIAS_MASK
)) {
4429 named_entry_unlock(named_entry
);
4430 return KERN_INVALID_ARGUMENT
;
4433 copy_map
= named_entry
->backing
.copy
;
4434 assert(copy_map
->type
== VM_MAP_COPY_ENTRY_LIST
);
4435 if (copy_map
->type
!= VM_MAP_COPY_ENTRY_LIST
) {
4436 /* unsupported type; should not happen */
4437 printf("vm_map_enter_mem_object: "
4438 "memory_entry->backing.copy "
4439 "unsupported type 0x%x\n",
4441 named_entry_unlock(named_entry
);
4442 return KERN_INVALID_ARGUMENT
;
4445 if (VM_MAP_PAGE_SHIFT(target_map
) != copy_map
->cpy_hdr
.page_shift
) {
4446 DEBUG4K_SHARE("copy_map %p offset %llx size 0x%llx pgshift %d -> target_map %p pgshift %d\n", copy_map
, offset
, (uint64_t)map_size
, copy_map
->cpy_hdr
.page_shift
, target_map
, VM_MAP_PAGE_SHIFT(target_map
));
4449 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4450 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4451 offset_in_mapping
= offset
& VM_MAP_PAGE_MASK(target_map
);
4452 if (flags
& VM_FLAGS_RETURN_4K_DATA_ADDR
) {
4453 offset_in_mapping
&= ~((signed)(0xFFF));
4457 target_copy_map
= VM_MAP_COPY_NULL
;
4458 target_size
= copy_map
->size
;
4462 if (copy_map
->cpy_hdr
.page_shift
!= VM_MAP_PAGE_SHIFT(target_map
)) {
4463 DEBUG4K_ADJUST("adjusting...\n");
4464 kr
= vm_map_copy_adjust_to_target(
4466 offset
/* includes data_offset */,
4474 if (kr
!= KERN_SUCCESS
) {
4475 named_entry_unlock(named_entry
);
4478 target_size
= target_copy_map
->size
;
4479 if (trimmed_start
>= data_offset
) {
4480 data_offset
= offset
& VM_MAP_PAGE_MASK(target_map
);
4482 data_offset
-= trimmed_start
;
4485 target_copy_map
= copy_map
;
4488 /* reserve a contiguous range */
4489 kr
= vm_map_enter(target_map
,
4491 vm_map_round_page(target_size
, VM_MAP_PAGE_MASK(target_map
)),
4493 flags
& (VM_FLAGS_ANYWHERE
|
4494 VM_FLAGS_OVERWRITE
|
4495 VM_FLAGS_RETURN_4K_DATA_ADDR
|
4496 VM_FLAGS_RETURN_DATA_ADDR
),
4505 if (kr
!= KERN_SUCCESS
) {
4506 DEBUG4K_ERROR("kr 0x%x\n", kr
);
4507 if (target_copy_map
!= copy_map
) {
4508 vm_map_copy_discard(target_copy_map
);
4509 target_copy_map
= VM_MAP_COPY_NULL
;
4511 named_entry_unlock(named_entry
);
4515 copy_addr
= map_addr
;
4517 for (copy_entry
= vm_map_copy_first_entry(target_copy_map
);
4518 copy_entry
!= vm_map_copy_to_entry(target_copy_map
);
4519 copy_entry
= copy_entry
->vme_next
) {
4521 vm_map_kernel_flags_t vmk_remap_flags
;
4522 vm_map_t copy_submap
;
4523 vm_object_t copy_object
;
4524 vm_map_size_t copy_size
;
4525 vm_object_offset_t copy_offset
;
4529 vmk_remap_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
4531 copy_object
= VME_OBJECT(copy_entry
);
4532 copy_offset
= VME_OFFSET(copy_entry
);
4533 copy_size
= (copy_entry
->vme_end
-
4534 copy_entry
->vme_start
);
4535 VM_GET_FLAGS_ALIAS(flags
, copy_vm_alias
);
4536 if (copy_vm_alias
== 0) {
4538 * Caller does not want a specific
4539 * alias for this new mapping: use
4540 * the alias of the original mapping.
4542 copy_vm_alias
= VME_ALIAS(copy_entry
);
4546 if ((copy_addr
+ copy_size
) >
4548 overmap_start
+ overmap_end
+
4549 named_entry
->size
/* XXX full size */)) {
4550 /* over-mapping too much !? */
4551 kr
= KERN_INVALID_ARGUMENT
;
4552 DEBUG4K_ERROR("kr 0x%x\n", kr
);
4557 /* take a reference on the object */
4558 if (copy_entry
->is_sub_map
) {
4559 vmk_remap_flags
.vmkf_submap
= TRUE
;
4560 copy_submap
= VME_SUBMAP(copy_entry
);
4561 vm_map_lock(copy_submap
);
4562 vm_map_reference(copy_submap
);
4563 vm_map_unlock(copy_submap
);
4564 copy_object
= (vm_object_t
)(uintptr_t) copy_submap
;
4566 copy_object
!= VM_OBJECT_NULL
&&
4567 (copy_entry
->needs_copy
||
4568 copy_object
->shadowed
||
4569 (!copy_object
->true_share
&&
4570 !copy_entry
->is_shared
&&
4571 copy_object
->vo_size
> copy_size
))) {
4573 * We need to resolve our side of this
4574 * "symmetric" copy-on-write now; we
4575 * need a new object to map and share,
4576 * instead of the current one which
4577 * might still be shared with the
4580 * Note: A "vm_map_copy_t" does not
4581 * have a lock but we're protected by
4582 * the named entry's lock here.
4584 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4585 VME_OBJECT_SHADOW(copy_entry
, copy_size
);
4586 if (!copy_entry
->needs_copy
&&
4587 copy_entry
->protection
& VM_PROT_WRITE
) {
4590 prot
= copy_entry
->protection
& ~VM_PROT_WRITE
;
4591 vm_object_pmap_protect(copy_object
,
4600 copy_entry
->needs_copy
= FALSE
;
4601 copy_entry
->is_shared
= TRUE
;
4602 copy_object
= VME_OBJECT(copy_entry
);
4603 copy_offset
= VME_OFFSET(copy_entry
);
4604 vm_object_lock(copy_object
);
4605 vm_object_reference_locked(copy_object
);
4606 if (copy_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
) {
4607 /* we're about to make a shared mapping of this object */
4608 copy_object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
4609 copy_object
->true_share
= TRUE
;
4611 vm_object_unlock(copy_object
);
4614 * We already have the right object
4617 copy_object
= VME_OBJECT(copy_entry
);
4618 vm_object_reference(copy_object
);
4621 /* over-map the object into destination */
4622 remap_flags
|= flags
;
4623 remap_flags
|= VM_FLAGS_FIXED
;
4624 remap_flags
|= VM_FLAGS_OVERWRITE
;
4625 remap_flags
&= ~VM_FLAGS_ANYWHERE
;
4626 if (!copy
&& !copy_entry
->is_sub_map
) {
4628 * copy-on-write should have been
4629 * resolved at this point, or we would
4630 * end up sharing instead of copying.
4632 assert(!copy_entry
->needs_copy
);
4634 #if XNU_TARGET_OS_OSX
4635 if (copy_entry
->used_for_jit
) {
4636 vmk_remap_flags
.vmkf_map_jit
= TRUE
;
4638 #endif /* XNU_TARGET_OS_OSX */
4640 assertf((copy_vm_alias
& VME_ALIAS_MASK
) == copy_vm_alias
,
4641 "VM Tag truncated from 0x%x to 0x%x\n", copy_vm_alias
, (copy_vm_alias
& VME_ALIAS_MASK
));
4642 kr
= vm_map_enter(target_map
,
4645 (vm_map_offset_t
) 0,
4648 (vm_tag_t
) copy_vm_alias
, /* see comment at end of vm_fault_unwire re. cast*/
4651 ((copy_object
== NULL
) ? FALSE
: copy
),
4655 if (kr
!= KERN_SUCCESS
) {
4656 DEBUG4K_SHARE("failed kr 0x%x\n", kr
);
4657 if (copy_entry
->is_sub_map
) {
4658 vm_map_deallocate(copy_submap
);
4660 vm_object_deallocate(copy_object
);
4667 copy_addr
+= copy_size
;
4670 if (kr
== KERN_SUCCESS
) {
4671 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4672 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4673 *address
= map_addr
+ offset_in_mapping
;
4675 *address
= map_addr
;
4677 if (overmap_start
) {
4678 *address
+= overmap_start
;
4679 DEBUG4K_SHARE("map %p map_addr 0x%llx offset_in_mapping 0x%llx overmap_start 0x%llx -> *address 0x%llx\n", target_map
, (uint64_t)map_addr
, (uint64_t) offset_in_mapping
, (uint64_t)overmap_start
, (uint64_t)*address
);
4682 named_entry_unlock(named_entry
);
4683 if (target_copy_map
!= copy_map
) {
4684 vm_map_copy_discard(target_copy_map
);
4685 target_copy_map
= VM_MAP_COPY_NULL
;
4688 if (kr
!= KERN_SUCCESS
) {
4689 if (!(flags
& VM_FLAGS_OVERWRITE
)) {
4690 /* deallocate the contiguous range */
4691 (void) vm_deallocate(target_map
,
4700 if (named_entry
->is_object
) {
4701 unsigned int access
;
4702 vm_prot_t protections
;
4703 unsigned int wimg_mode
;
4705 /* we are mapping a VM object */
4707 protections
= named_entry
->protection
& VM_PROT_ALL
;
4708 access
= GET_MAP_MEM(named_entry
->protection
);
4710 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4711 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4712 offset_in_mapping
= offset
- VM_MAP_TRUNC_PAGE(offset
, VM_MAP_PAGE_MASK(target_map
));
4713 if (flags
& VM_FLAGS_RETURN_4K_DATA_ADDR
) {
4714 offset_in_mapping
&= ~((signed)(0xFFF));
4716 offset
= VM_MAP_TRUNC_PAGE(offset
, VM_MAP_PAGE_MASK(target_map
));
4717 map_size
= VM_MAP_ROUND_PAGE((offset
+ offset_in_mapping
+ initial_size
) - offset
, VM_MAP_PAGE_MASK(target_map
));
4720 object
= vm_named_entry_to_vm_object(named_entry
);
4721 assert(object
!= VM_OBJECT_NULL
);
4722 vm_object_lock(object
);
4723 named_entry_unlock(named_entry
);
4725 vm_object_reference_locked(object
);
4727 wimg_mode
= object
->wimg_bits
;
4728 vm_prot_to_wimg(access
, &wimg_mode
);
4729 if (object
->wimg_bits
!= wimg_mode
) {
4730 vm_object_change_wimg_mode(object
, wimg_mode
);
4733 vm_object_unlock(object
);
4735 panic("invalid VM named entry %p", named_entry
);
4737 } else if (ip_kotype(port
) == IKOT_MEMORY_OBJECT
) {
4739 * JMM - This is temporary until we unify named entries
4740 * and raw memory objects.
4742 * Detected fake ip_kotype for a memory object. In
4743 * this case, the port isn't really a port at all, but
4744 * instead is just a raw memory object.
4746 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4747 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4748 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4751 object
= memory_object_to_vm_object((memory_object_t
)port
);
4752 if (object
== VM_OBJECT_NULL
) {
4753 return KERN_INVALID_OBJECT
;
4755 vm_object_reference(object
);
4757 /* wait for object (if any) to be ready */
4758 if (object
!= VM_OBJECT_NULL
) {
4759 if (object
== kernel_object
) {
4760 printf("Warning: Attempt to map kernel object"
4761 " by a non-private kernel entity\n");
4762 return KERN_INVALID_OBJECT
;
4764 if (!object
->pager_ready
) {
4765 vm_object_lock(object
);
4767 while (!object
->pager_ready
) {
4768 vm_object_wait(object
,
4769 VM_OBJECT_EVENT_PAGER_READY
,
4771 vm_object_lock(object
);
4773 vm_object_unlock(object
);
4777 return KERN_INVALID_OBJECT
;
4780 if (object
!= VM_OBJECT_NULL
&&
4782 object
->pager
!= MEMORY_OBJECT_NULL
&&
4783 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
4784 memory_object_t pager
;
4785 vm_prot_t pager_prot
;
4789 * For "named" VM objects, let the pager know that the
4790 * memory object is being mapped. Some pagers need to keep
4791 * track of this, to know when they can reclaim the memory
4792 * object, for example.
4793 * VM calls memory_object_map() for each mapping (specifying
4794 * the protection of each mapping) and calls
4795 * memory_object_last_unmap() when all the mappings are gone.
4797 pager_prot
= max_protection
;
4800 * Copy-On-Write mapping: won't modify the
4803 pager_prot
&= ~VM_PROT_WRITE
;
4805 vm_object_lock(object
);
4806 pager
= object
->pager
;
4807 if (object
->named
&&
4808 pager
!= MEMORY_OBJECT_NULL
&&
4809 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
4810 assert(object
->pager_ready
);
4811 vm_object_mapping_wait(object
, THREAD_UNINT
);
4812 vm_object_mapping_begin(object
);
4813 vm_object_unlock(object
);
4815 kr
= memory_object_map(pager
, pager_prot
);
4816 assert(kr
== KERN_SUCCESS
);
4818 vm_object_lock(object
);
4819 vm_object_mapping_end(object
);
4821 vm_object_unlock(object
);
4825 * Perform the copy if requested
4829 vm_object_t new_object
;
4830 vm_object_offset_t new_offset
;
4832 result
= vm_object_copy_strategically(object
, offset
,
4834 &new_object
, &new_offset
,
4838 if (result
== KERN_MEMORY_RESTART_COPY
) {
4840 boolean_t src_needs_copy
;
4844 * We currently ignore src_needs_copy.
4845 * This really is the issue of how to make
4846 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4847 * non-kernel users to use. Solution forthcoming.
4848 * In the meantime, since we don't allow non-kernel
4849 * memory managers to specify symmetric copy,
4850 * we won't run into problems here.
4852 new_object
= object
;
4853 new_offset
= offset
;
4854 success
= vm_object_copy_quickly(&new_object
,
4860 result
= KERN_SUCCESS
;
4863 * Throw away the reference to the
4864 * original object, as it won't be mapped.
4867 vm_object_deallocate(object
);
4869 if (result
!= KERN_SUCCESS
) {
4873 object
= new_object
;
4874 offset
= new_offset
;
4878 * If non-kernel users want to try to prefault pages, the mapping and prefault
4879 * needs to be atomic.
4881 kernel_prefault
= (try_prefault
&& vm_kernel_map_is_kernel(target_map
));
4882 vmk_flags
.vmkf_keep_map_locked
= (try_prefault
&& !kernel_prefault
);
4886 /* map this object in a "4K" pager */
4887 result
= vm_map_enter_fourk(target_map
,
4890 (vm_map_offset_t
) mask
,
4901 #endif /* __arm64__ */
4903 result
= vm_map_enter(target_map
,
4904 &map_addr
, map_size
,
4905 (vm_map_offset_t
)mask
,
4911 cur_protection
, max_protection
,
4914 if (result
!= KERN_SUCCESS
) {
4915 vm_object_deallocate(object
);
4919 * Try to prefault, and do not forget to release the vm map lock.
4921 if (result
== KERN_SUCCESS
&& try_prefault
) {
4922 mach_vm_address_t va
= map_addr
;
4923 kern_return_t kr
= KERN_SUCCESS
;
4927 pmap_options
= kernel_prefault
? 0 : PMAP_OPTIONS_NOWAIT
;
4928 if (object
->internal
) {
4929 pmap_options
|= PMAP_OPTIONS_INTERNAL
;
4932 for (i
= 0; i
< page_list_count
; ++i
) {
4933 if (!UPL_VALID_PAGE(page_list
, i
)) {
4934 if (kernel_prefault
) {
4935 assertf(FALSE
, "kernel_prefault && !UPL_VALID_PAGE");
4936 result
= KERN_MEMORY_ERROR
;
4941 * If this function call failed, we should stop
4942 * trying to optimize, other calls are likely
4943 * going to fail too.
4945 * We are not gonna report an error for such
4946 * failure though. That's an optimization, not
4947 * something critical.
4949 kr
= pmap_enter_options(target_map
->pmap
,
4950 va
, UPL_PHYS_PAGE(page_list
, i
),
4951 cur_protection
, VM_PROT_NONE
,
4952 0, TRUE
, pmap_options
, NULL
);
4953 if (kr
!= KERN_SUCCESS
) {
4954 OSIncrementAtomic64(&vm_prefault_nb_bailout
);
4955 if (kernel_prefault
) {
4960 OSIncrementAtomic64(&vm_prefault_nb_pages
);
4963 /* Next virtual address */
4966 if (vmk_flags
.vmkf_keep_map_locked
) {
4967 vm_map_unlock(target_map
);
4971 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4972 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4973 *address
= map_addr
+ offset_in_mapping
;
4975 *address
= map_addr
;
4981 vm_map_enter_mem_object(
4982 vm_map_t target_map
,
4983 vm_map_offset_t
*address
,
4984 vm_map_size_t initial_size
,
4985 vm_map_offset_t mask
,
4987 vm_map_kernel_flags_t vmk_flags
,
4990 vm_object_offset_t offset
,
4992 vm_prot_t cur_protection
,
4993 vm_prot_t max_protection
,
4994 vm_inherit_t inheritance
)
4998 ret
= vm_map_enter_mem_object_helper(target_map
,
5015 if (ret
== KERN_SUCCESS
&& address
&& target_map
->pmap
== kernel_pmap
) {
5016 kasan_notify_address(*address
, initial_size
);
5024 vm_map_enter_mem_object_prefault(
5025 vm_map_t target_map
,
5026 vm_map_offset_t
*address
,
5027 vm_map_size_t initial_size
,
5028 vm_map_offset_t mask
,
5030 vm_map_kernel_flags_t vmk_flags
,
5033 vm_object_offset_t offset
,
5034 vm_prot_t cur_protection
,
5035 vm_prot_t max_protection
,
5036 upl_page_list_ptr_t page_list
,
5037 unsigned int page_list_count
)
5041 ret
= vm_map_enter_mem_object_helper(target_map
,
5058 if (ret
== KERN_SUCCESS
&& address
&& target_map
->pmap
== kernel_pmap
) {
5059 kasan_notify_address(*address
, initial_size
);
5068 vm_map_enter_mem_object_control(
5069 vm_map_t target_map
,
5070 vm_map_offset_t
*address
,
5071 vm_map_size_t initial_size
,
5072 vm_map_offset_t mask
,
5074 vm_map_kernel_flags_t vmk_flags
,
5076 memory_object_control_t control
,
5077 vm_object_offset_t offset
,
5079 vm_prot_t cur_protection
,
5080 vm_prot_t max_protection
,
5081 vm_inherit_t inheritance
)
5083 vm_map_address_t map_addr
;
5084 vm_map_size_t map_size
;
5086 vm_object_size_t size
;
5087 kern_return_t result
;
5088 memory_object_t pager
;
5089 vm_prot_t pager_prot
;
5092 boolean_t fourk
= vmk_flags
.vmkf_fourk
;
5093 #endif /* __arm64__ */
5096 * Check arguments for validity
5098 if ((target_map
== VM_MAP_NULL
) ||
5099 (cur_protection
& ~VM_PROT_ALL
) ||
5100 (max_protection
& ~VM_PROT_ALL
) ||
5101 (inheritance
> VM_INHERIT_LAST_VALID
) ||
5102 initial_size
== 0) {
5103 return KERN_INVALID_ARGUMENT
;
5107 if (fourk
&& VM_MAP_PAGE_MASK(target_map
) < PAGE_MASK
) {
5112 map_addr
= vm_map_trunc_page(*address
,
5114 map_size
= vm_map_round_page(initial_size
,
5117 #endif /* __arm64__ */
5119 map_addr
= vm_map_trunc_page(*address
,
5120 VM_MAP_PAGE_MASK(target_map
));
5121 map_size
= vm_map_round_page(initial_size
,
5122 VM_MAP_PAGE_MASK(target_map
));
5124 size
= vm_object_round_page(initial_size
);
5126 object
= memory_object_control_to_vm_object(control
);
5128 if (object
== VM_OBJECT_NULL
) {
5129 return KERN_INVALID_OBJECT
;
5132 if (object
== kernel_object
) {
5133 printf("Warning: Attempt to map kernel object"
5134 " by a non-private kernel entity\n");
5135 return KERN_INVALID_OBJECT
;
5138 vm_object_lock(object
);
5139 object
->ref_count
++;
5140 vm_object_res_reference(object
);
5143 * For "named" VM objects, let the pager know that the
5144 * memory object is being mapped. Some pagers need to keep
5145 * track of this, to know when they can reclaim the memory
5146 * object, for example.
5147 * VM calls memory_object_map() for each mapping (specifying
5148 * the protection of each mapping) and calls
5149 * memory_object_last_unmap() when all the mappings are gone.
5151 pager_prot
= max_protection
;
5153 pager_prot
&= ~VM_PROT_WRITE
;
5155 pager
= object
->pager
;
5156 if (object
->named
&&
5157 pager
!= MEMORY_OBJECT_NULL
&&
5158 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
5159 assert(object
->pager_ready
);
5160 vm_object_mapping_wait(object
, THREAD_UNINT
);
5161 vm_object_mapping_begin(object
);
5162 vm_object_unlock(object
);
5164 kr
= memory_object_map(pager
, pager_prot
);
5165 assert(kr
== KERN_SUCCESS
);
5167 vm_object_lock(object
);
5168 vm_object_mapping_end(object
);
5170 vm_object_unlock(object
);
5173 * Perform the copy if requested
5177 vm_object_t new_object
;
5178 vm_object_offset_t new_offset
;
5180 result
= vm_object_copy_strategically(object
, offset
, size
,
5181 &new_object
, &new_offset
,
5185 if (result
== KERN_MEMORY_RESTART_COPY
) {
5187 boolean_t src_needs_copy
;
5191 * We currently ignore src_needs_copy.
5192 * This really is the issue of how to make
5193 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
5194 * non-kernel users to use. Solution forthcoming.
5195 * In the meantime, since we don't allow non-kernel
5196 * memory managers to specify symmetric copy,
5197 * we won't run into problems here.
5199 new_object
= object
;
5200 new_offset
= offset
;
5201 success
= vm_object_copy_quickly(&new_object
,
5206 result
= KERN_SUCCESS
;
5209 * Throw away the reference to the
5210 * original object, as it won't be mapped.
5213 vm_object_deallocate(object
);
5215 if (result
!= KERN_SUCCESS
) {
5219 object
= new_object
;
5220 offset
= new_offset
;
5225 result
= vm_map_enter_fourk(target_map
,
5228 (vm_map_offset_t
)mask
,
5234 cur_protection
, max_protection
,
5237 #endif /* __arm64__ */
5239 result
= vm_map_enter(target_map
,
5240 &map_addr
, map_size
,
5241 (vm_map_offset_t
)mask
,
5247 cur_protection
, max_protection
,
5250 if (result
!= KERN_SUCCESS
) {
5251 vm_object_deallocate(object
);
5253 *address
= map_addr
;
5262 extern pmap_paddr_t avail_start
, avail_end
;
5266 * Allocate memory in the specified map, with the caveat that
5267 * the memory is physically contiguous. This call may fail
5268 * if the system can't find sufficient contiguous memory.
5269 * This call may cause or lead to heart-stopping amounts of
5272 * Memory obtained from this call should be freed in the
5273 * normal way, viz., via vm_deallocate.
5278 vm_map_offset_t
*addr
,
5282 vm_object_t cpm_obj
;
5286 vm_map_offset_t va
, start
, end
, offset
;
5288 vm_map_offset_t prev_addr
= 0;
5289 #endif /* MACH_ASSERT */
5291 boolean_t anywhere
= ((VM_FLAGS_ANYWHERE
& flags
) != 0);
5294 if (VM_MAP_PAGE_SHIFT(map
) != PAGE_SHIFT
) {
5295 /* XXX TODO4K do we need to support this? */
5297 return KERN_NOT_SUPPORTED
;
5300 VM_GET_FLAGS_ALIAS(flags
, tag
);
5304 return KERN_SUCCESS
;
5307 *addr
= vm_map_min(map
);
5309 *addr
= vm_map_trunc_page(*addr
,
5310 VM_MAP_PAGE_MASK(map
));
5312 size
= vm_map_round_page(size
,
5313 VM_MAP_PAGE_MASK(map
));
5316 * LP64todo - cpm_allocate should probably allow
5317 * allocations of >4GB, but not with the current
5318 * algorithm, so just cast down the size for now.
5320 if (size
> VM_MAX_ADDRESS
) {
5321 return KERN_RESOURCE_SHORTAGE
;
5323 if ((kr
= cpm_allocate(CAST_DOWN(vm_size_t
, size
),
5324 &pages
, 0, 0, TRUE
, flags
)) != KERN_SUCCESS
) {
5328 cpm_obj
= vm_object_allocate((vm_object_size_t
)size
);
5329 assert(cpm_obj
!= VM_OBJECT_NULL
);
5330 assert(cpm_obj
->internal
);
5331 assert(cpm_obj
->vo_size
== (vm_object_size_t
)size
);
5332 assert(cpm_obj
->can_persist
== FALSE
);
5333 assert(cpm_obj
->pager_created
== FALSE
);
5334 assert(cpm_obj
->pageout
== FALSE
);
5335 assert(cpm_obj
->shadow
== VM_OBJECT_NULL
);
5338 * Insert pages into object.
5341 vm_object_lock(cpm_obj
);
5342 for (offset
= 0; offset
< size
; offset
+= PAGE_SIZE
) {
5344 pages
= NEXT_PAGE(m
);
5345 *(NEXT_PAGE_PTR(m
)) = VM_PAGE_NULL
;
5347 assert(!m
->vmp_gobbled
);
5348 assert(!m
->vmp_wanted
);
5349 assert(!m
->vmp_pageout
);
5350 assert(!m
->vmp_tabled
);
5351 assert(VM_PAGE_WIRED(m
));
5352 assert(m
->vmp_busy
);
5353 assert(VM_PAGE_GET_PHYS_PAGE(m
) >= (avail_start
>> PAGE_SHIFT
) && VM_PAGE_GET_PHYS_PAGE(m
) <= (avail_end
>> PAGE_SHIFT
));
5355 m
->vmp_busy
= FALSE
;
5356 vm_page_insert(m
, cpm_obj
, offset
);
5358 assert(cpm_obj
->resident_page_count
== size
/ PAGE_SIZE
);
5359 vm_object_unlock(cpm_obj
);
5362 * Hang onto a reference on the object in case a
5363 * multi-threaded application for some reason decides
5364 * to deallocate the portion of the address space into
5365 * which we will insert this object.
5367 * Unfortunately, we must insert the object now before
5368 * we can talk to the pmap module about which addresses
5369 * must be wired down. Hence, the race with a multi-
5372 vm_object_reference(cpm_obj
);
5375 * Insert object into map.
5384 VM_MAP_KERNEL_FLAGS_NONE
,
5386 (vm_object_offset_t
)0,
5390 VM_INHERIT_DEFAULT
);
5392 if (kr
!= KERN_SUCCESS
) {
5394 * A CPM object doesn't have can_persist set,
5395 * so all we have to do is deallocate it to
5396 * free up these pages.
5398 assert(cpm_obj
->pager_created
== FALSE
);
5399 assert(cpm_obj
->can_persist
== FALSE
);
5400 assert(cpm_obj
->pageout
== FALSE
);
5401 assert(cpm_obj
->shadow
== VM_OBJECT_NULL
);
5402 vm_object_deallocate(cpm_obj
); /* kill acquired ref */
5403 vm_object_deallocate(cpm_obj
); /* kill creation ref */
5407 * Inform the physical mapping system that the
5408 * range of addresses may not fault, so that
5409 * page tables and such can be locked down as well.
5413 pmap
= vm_map_pmap(map
);
5414 pmap_pageable(pmap
, start
, end
, FALSE
);
5417 * Enter each page into the pmap, to avoid faults.
5418 * Note that this loop could be coded more efficiently,
5419 * if the need arose, rather than looking up each page
5422 for (offset
= 0, va
= start
; offset
< size
;
5423 va
+= PAGE_SIZE
, offset
+= PAGE_SIZE
) {
5426 vm_object_lock(cpm_obj
);
5427 m
= vm_page_lookup(cpm_obj
, (vm_object_offset_t
)offset
);
5428 assert(m
!= VM_PAGE_NULL
);
5430 vm_page_zero_fill(m
);
5432 type_of_fault
= DBG_ZERO_FILL_FAULT
;
5434 vm_fault_enter(m
, pmap
, va
,
5436 VM_PROT_ALL
, VM_PROT_WRITE
,
5438 FALSE
, /* change_wiring */
5439 VM_KERN_MEMORY_NONE
, /* tag - not wiring */
5440 FALSE
, /* no_cache */
5441 FALSE
, /* cs_bypass */
5443 0, /* pmap_options */
5444 NULL
, /* need_retry */
5447 vm_object_unlock(cpm_obj
);
5452 * Verify ordering in address space.
5454 for (offset
= 0; offset
< size
; offset
+= PAGE_SIZE
) {
5455 vm_object_lock(cpm_obj
);
5456 m
= vm_page_lookup(cpm_obj
, (vm_object_offset_t
)offset
);
5457 vm_object_unlock(cpm_obj
);
5458 if (m
== VM_PAGE_NULL
) {
5459 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
5460 cpm_obj
, (uint64_t)offset
);
5462 assert(m
->vmp_tabled
);
5463 assert(!m
->vmp_busy
);
5464 assert(!m
->vmp_wanted
);
5465 assert(!m
->vmp_fictitious
);
5466 assert(!m
->vmp_private
);
5467 assert(!m
->vmp_absent
);
5468 assert(!m
->vmp_error
);
5469 assert(!m
->vmp_cleaning
);
5470 assert(!m
->vmp_laundry
);
5471 assert(!m
->vmp_precious
);
5472 assert(!m
->vmp_clustered
);
5474 if (VM_PAGE_GET_PHYS_PAGE(m
) != prev_addr
+ 1) {
5475 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5476 (uint64_t)start
, (uint64_t)end
, (uint64_t)va
);
5477 printf("obj %p off 0x%llx\n", cpm_obj
, (uint64_t)offset
);
5478 printf("m %p prev_address 0x%llx\n", m
, (uint64_t)prev_addr
);
5479 panic("vm_allocate_cpm: pages not contig!");
5482 prev_addr
= VM_PAGE_GET_PHYS_PAGE(m
);
5484 #endif /* MACH_ASSERT */
5486 vm_object_deallocate(cpm_obj
); /* kill extra ref */
5495 * Interface is defined in all cases, but unless the kernel
5496 * is built explicitly for this option, the interface does
5502 __unused vm_map_t map
,
5503 __unused vm_map_offset_t
*addr
,
5504 __unused vm_map_size_t size
,
5507 return KERN_FAILURE
;
5511 /* Not used without nested pmaps */
5512 #ifndef NO_NESTED_PMAP
5514 * Clip and unnest a portion of a nested submap mapping.
5521 vm_map_entry_t entry
,
5522 vm_map_offset_t start_unnest
,
5523 vm_map_offset_t end_unnest
)
5525 vm_map_offset_t old_start_unnest
= start_unnest
;
5526 vm_map_offset_t old_end_unnest
= end_unnest
;
5528 assert(entry
->is_sub_map
);
5529 assert(VME_SUBMAP(entry
) != NULL
);
5530 assert(entry
->use_pmap
);
5533 * Query the platform for the optimal unnest range.
5534 * DRK: There's some duplication of effort here, since
5535 * callers may have adjusted the range to some extent. This
5536 * routine was introduced to support 1GiB subtree nesting
5537 * for x86 platforms, which can also nest on 2MiB boundaries
5538 * depending on size/alignment.
5540 if (pmap_adjust_unnest_parameters(map
->pmap
, &start_unnest
, &end_unnest
)) {
5541 assert(VME_SUBMAP(entry
)->is_nested_map
);
5542 assert(!VME_SUBMAP(entry
)->disable_vmentry_reuse
);
5543 log_unnest_badness(map
,
5546 VME_SUBMAP(entry
)->is_nested_map
,
5548 VME_SUBMAP(entry
)->lowest_unnestable_start
-
5549 VME_OFFSET(entry
)));
5552 if (entry
->vme_start
> start_unnest
||
5553 entry
->vme_end
< end_unnest
) {
5554 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5555 "bad nested entry: start=0x%llx end=0x%llx\n",
5556 (long long)start_unnest
, (long long)end_unnest
,
5557 (long long)entry
->vme_start
, (long long)entry
->vme_end
);
5560 if (start_unnest
> entry
->vme_start
) {
5561 _vm_map_clip_start(&map
->hdr
,
5564 if (map
->holelistenabled
) {
5565 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5567 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5570 if (entry
->vme_end
> end_unnest
) {
5571 _vm_map_clip_end(&map
->hdr
,
5574 if (map
->holelistenabled
) {
5575 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5577 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5581 pmap_unnest(map
->pmap
,
5583 entry
->vme_end
- entry
->vme_start
);
5584 if ((map
->mapped_in_other_pmaps
) && os_ref_get_count(&map
->map_refcnt
) != 0) {
5585 /* clean up parent map/maps */
5586 vm_map_submap_pmap_clean(
5587 map
, entry
->vme_start
,
5592 entry
->use_pmap
= FALSE
;
5593 if ((map
->pmap
!= kernel_pmap
) &&
5594 (VME_ALIAS(entry
) == VM_MEMORY_SHARED_PMAP
)) {
5595 VME_ALIAS_SET(entry
, VM_MEMORY_UNSHARED_PMAP
);
5598 #endif /* NO_NESTED_PMAP */
5601 * vm_map_clip_start: [ internal use only ]
5603 * Asserts that the given entry begins at or after
5604 * the specified address; if necessary,
5605 * it splits the entry into two.
5610 vm_map_entry_t entry
,
5611 vm_map_offset_t startaddr
)
5613 #ifndef NO_NESTED_PMAP
5614 if (entry
->is_sub_map
&&
5616 startaddr
>= entry
->vme_start
) {
5617 vm_map_offset_t start_unnest
, end_unnest
;
5620 * Make sure "startaddr" is no longer in a nested range
5621 * before we clip. Unnest only the minimum range the platform
5623 * vm_map_clip_unnest may perform additional adjustments to
5626 start_unnest
= startaddr
& ~(pmap_shared_region_size_min(map
->pmap
) - 1);
5627 end_unnest
= start_unnest
+ pmap_shared_region_size_min(map
->pmap
);
5628 vm_map_clip_unnest(map
, entry
, start_unnest
, end_unnest
);
5630 #endif /* NO_NESTED_PMAP */
5631 if (startaddr
> entry
->vme_start
) {
5632 if (VME_OBJECT(entry
) &&
5633 !entry
->is_sub_map
&&
5634 VME_OBJECT(entry
)->phys_contiguous
) {
5635 pmap_remove(map
->pmap
,
5636 (addr64_t
)(entry
->vme_start
),
5637 (addr64_t
)(entry
->vme_end
));
5639 if (entry
->vme_atomic
) {
5640 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map
, entry
);
5646 vm_map_offset_t
, entry
->vme_start
,
5647 vm_map_offset_t
, entry
->vme_end
,
5648 vm_map_offset_t
, startaddr
,
5649 int, VME_ALIAS(entry
));
5651 _vm_map_clip_start(&map
->hdr
, entry
, startaddr
);
5652 if (map
->holelistenabled
) {
5653 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5655 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5661 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5663 if ((startaddr) > (entry)->vme_start) \
5664 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5668 * This routine is called only when it is known that
5669 * the entry must be split.
5673 struct vm_map_header
*map_header
,
5674 vm_map_entry_t entry
,
5675 vm_map_offset_t start
)
5677 vm_map_entry_t new_entry
;
5680 * Split off the front portion --
5681 * note that we must insert the new
5682 * entry BEFORE this one, so that
5683 * this entry has the specified starting
5687 if (entry
->map_aligned
) {
5688 assert(VM_MAP_PAGE_ALIGNED(start
,
5689 VM_MAP_HDR_PAGE_MASK(map_header
)));
5692 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
5693 vm_map_entry_copy_full(new_entry
, entry
);
5695 new_entry
->vme_end
= start
;
5696 assert(new_entry
->vme_start
< new_entry
->vme_end
);
5697 VME_OFFSET_SET(entry
, VME_OFFSET(entry
) + (start
- entry
->vme_start
));
5698 assert(start
< entry
->vme_end
);
5699 entry
->vme_start
= start
;
5701 _vm_map_store_entry_link(map_header
, entry
->vme_prev
, new_entry
);
5703 if (entry
->is_sub_map
) {
5704 vm_map_reference(VME_SUBMAP(new_entry
));
5706 vm_object_reference(VME_OBJECT(new_entry
));
5712 * vm_map_clip_end: [ internal use only ]
5714 * Asserts that the given entry ends at or before
5715 * the specified address; if necessary,
5716 * it splits the entry into two.
5721 vm_map_entry_t entry
,
5722 vm_map_offset_t endaddr
)
5724 if (endaddr
> entry
->vme_end
) {
5726 * Within the scope of this clipping, limit "endaddr" to
5727 * the end of this map entry...
5729 endaddr
= entry
->vme_end
;
5731 #ifndef NO_NESTED_PMAP
5732 if (entry
->is_sub_map
&& entry
->use_pmap
) {
5733 vm_map_offset_t start_unnest
, end_unnest
;
5736 * Make sure the range between the start of this entry and
5737 * the new "endaddr" is no longer nested before we clip.
5738 * Unnest only the minimum range the platform can handle.
5739 * vm_map_clip_unnest may perform additional adjustments to
5742 start_unnest
= entry
->vme_start
;
5744 (endaddr
+ pmap_shared_region_size_min(map
->pmap
) - 1) &
5745 ~(pmap_shared_region_size_min(map
->pmap
) - 1);
5746 vm_map_clip_unnest(map
, entry
, start_unnest
, end_unnest
);
5748 #endif /* NO_NESTED_PMAP */
5749 if (endaddr
< entry
->vme_end
) {
5750 if (VME_OBJECT(entry
) &&
5751 !entry
->is_sub_map
&&
5752 VME_OBJECT(entry
)->phys_contiguous
) {
5753 pmap_remove(map
->pmap
,
5754 (addr64_t
)(entry
->vme_start
),
5755 (addr64_t
)(entry
->vme_end
));
5757 if (entry
->vme_atomic
) {
5758 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map
, entry
);
5763 vm_map_offset_t
, entry
->vme_start
,
5764 vm_map_offset_t
, entry
->vme_end
,
5765 vm_map_offset_t
, endaddr
,
5766 int, VME_ALIAS(entry
));
5768 _vm_map_clip_end(&map
->hdr
, entry
, endaddr
);
5769 if (map
->holelistenabled
) {
5770 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5772 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5778 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5780 if ((endaddr) < (entry)->vme_end) \
5781 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5785 * This routine is called only when it is known that
5786 * the entry must be split.
5790 struct vm_map_header
*map_header
,
5791 vm_map_entry_t entry
,
5792 vm_map_offset_t end
)
5794 vm_map_entry_t new_entry
;
5797 * Create a new entry and insert it
5798 * AFTER the specified entry
5801 if (entry
->map_aligned
) {
5802 assert(VM_MAP_PAGE_ALIGNED(end
,
5803 VM_MAP_HDR_PAGE_MASK(map_header
)));
5806 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
5807 vm_map_entry_copy_full(new_entry
, entry
);
5809 assert(entry
->vme_start
< end
);
5810 new_entry
->vme_start
= entry
->vme_end
= end
;
5811 VME_OFFSET_SET(new_entry
,
5812 VME_OFFSET(new_entry
) + (end
- entry
->vme_start
));
5813 assert(new_entry
->vme_start
< new_entry
->vme_end
);
5815 _vm_map_store_entry_link(map_header
, entry
, new_entry
);
5817 if (entry
->is_sub_map
) {
5818 vm_map_reference(VME_SUBMAP(new_entry
));
5820 vm_object_reference(VME_OBJECT(new_entry
));
5826 * VM_MAP_RANGE_CHECK: [ internal use only ]
5828 * Asserts that the starting and ending region
5829 * addresses fall within the valid range of the map.
5831 #define VM_MAP_RANGE_CHECK(map, start, end) \
5833 if (start < vm_map_min(map)) \
5834 start = vm_map_min(map); \
5835 if (end > vm_map_max(map)) \
5836 end = vm_map_max(map); \
5842 * vm_map_range_check: [ internal use only ]
5844 * Check that the region defined by the specified start and
5845 * end addresses are wholly contained within a single map
5846 * entry or set of adjacent map entries of the spacified map,
5847 * i.e. the specified region contains no unmapped space.
5848 * If any or all of the region is unmapped, FALSE is returned.
5849 * Otherwise, TRUE is returned and if the output argument 'entry'
5850 * is not NULL it points to the map entry containing the start
5853 * The map is locked for reading on entry and is left locked.
5858 vm_map_offset_t start
,
5859 vm_map_offset_t end
,
5860 vm_map_entry_t
*entry
)
5863 vm_map_offset_t prev
;
5866 * Basic sanity checks first
5868 if (start
< vm_map_min(map
) || end
> vm_map_max(map
) || start
> end
) {
5873 * Check first if the region starts within a valid
5874 * mapping for the map.
5876 if (!vm_map_lookup_entry(map
, start
, &cur
)) {
5881 * Optimize for the case that the region is contained
5882 * in a single map entry.
5884 if (entry
!= (vm_map_entry_t
*) NULL
) {
5887 if (end
<= cur
->vme_end
) {
5892 * If the region is not wholly contained within a
5893 * single entry, walk the entries looking for holes.
5895 prev
= cur
->vme_end
;
5896 cur
= cur
->vme_next
;
5897 while ((cur
!= vm_map_to_entry(map
)) && (prev
== cur
->vme_start
)) {
5898 if (end
<= cur
->vme_end
) {
5901 prev
= cur
->vme_end
;
5902 cur
= cur
->vme_next
;
5908 * vm_map_submap: [ kernel use only ]
5910 * Mark the given range as handled by a subordinate map.
5912 * This range must have been created with vm_map_find using
5913 * the vm_submap_object, and no other operations may have been
5914 * performed on this range prior to calling vm_map_submap.
5916 * Only a limited number of operations can be performed
5917 * within this rage after calling vm_map_submap:
5919 * [Don't try vm_map_copyin!]
5921 * To remove a submapping, one must first remove the
5922 * range from the superior map, and then destroy the
5923 * submap (if desired). [Better yet, don't try it.]
5928 vm_map_offset_t start
,
5929 vm_map_offset_t end
,
5931 vm_map_offset_t offset
,
5932 #ifdef NO_NESTED_PMAP
5934 #endif /* NO_NESTED_PMAP */
5937 vm_map_entry_t entry
;
5938 kern_return_t result
= KERN_INVALID_ARGUMENT
;
5943 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
5944 entry
= entry
->vme_next
;
5947 if (entry
== vm_map_to_entry(map
) ||
5948 entry
->is_sub_map
) {
5950 return KERN_INVALID_ARGUMENT
;
5953 vm_map_clip_start(map
, entry
, start
);
5954 vm_map_clip_end(map
, entry
, end
);
5956 if ((entry
->vme_start
== start
) && (entry
->vme_end
== end
) &&
5957 (!entry
->is_sub_map
) &&
5958 ((object
= VME_OBJECT(entry
)) == vm_submap_object
) &&
5959 (object
->resident_page_count
== 0) &&
5960 (object
->copy
== VM_OBJECT_NULL
) &&
5961 (object
->shadow
== VM_OBJECT_NULL
) &&
5962 (!object
->pager_created
)) {
5963 VME_OFFSET_SET(entry
, (vm_object_offset_t
)offset
);
5964 VME_OBJECT_SET(entry
, VM_OBJECT_NULL
);
5965 vm_object_deallocate(object
);
5966 entry
->is_sub_map
= TRUE
;
5967 entry
->use_pmap
= FALSE
;
5968 VME_SUBMAP_SET(entry
, submap
);
5969 vm_map_reference(submap
);
5970 if (submap
->mapped_in_other_pmaps
== FALSE
&&
5971 vm_map_pmap(submap
) != PMAP_NULL
&&
5972 vm_map_pmap(submap
) != vm_map_pmap(map
)) {
5974 * This submap is being mapped in a map
5975 * that uses a different pmap.
5976 * Set its "mapped_in_other_pmaps" flag
5977 * to indicate that we now need to
5978 * remove mappings from all pmaps rather
5979 * than just the submap's pmap.
5981 submap
->mapped_in_other_pmaps
= TRUE
;
5984 #ifndef NO_NESTED_PMAP
5986 /* nest if platform code will allow */
5987 if (submap
->pmap
== NULL
) {
5988 ledger_t ledger
= map
->pmap
->ledger
;
5989 submap
->pmap
= pmap_create_options(ledger
,
5990 (vm_map_size_t
) 0, 0);
5991 if (submap
->pmap
== PMAP_NULL
) {
5993 return KERN_NO_SPACE
;
5995 #if defined(__arm__) || defined(__arm64__)
5996 pmap_set_nested(submap
->pmap
);
5999 result
= pmap_nest(map
->pmap
,
6000 (VME_SUBMAP(entry
))->pmap
,
6002 (uint64_t)(end
- start
));
6004 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result
);
6006 entry
->use_pmap
= TRUE
;
6008 #else /* NO_NESTED_PMAP */
6009 pmap_remove(map
->pmap
, (addr64_t
)start
, (addr64_t
)end
);
6010 #endif /* NO_NESTED_PMAP */
6011 result
= KERN_SUCCESS
;
6021 * Sets the protection of the specified address
6022 * region in the target map. If "set_max" is
6023 * specified, the maximum protection is to be set;
6024 * otherwise, only the current protection is affected.
6029 vm_map_offset_t start
,
6030 vm_map_offset_t end
,
6034 vm_map_entry_t current
;
6035 vm_map_offset_t prev
;
6036 vm_map_entry_t entry
;
6038 int pmap_options
= 0;
6041 if (new_prot
& VM_PROT_COPY
) {
6042 vm_map_offset_t new_start
;
6043 vm_prot_t cur_prot
, max_prot
;
6044 vm_map_kernel_flags_t kflags
;
6046 /* LP64todo - see below */
6047 if (start
>= map
->max_offset
) {
6048 return KERN_INVALID_ADDRESS
;
6051 if ((new_prot
& VM_PROT_EXECUTE
) &&
6052 map
->pmap
!= kernel_pmap
&&
6053 (vm_map_cs_enforcement(map
)
6054 #if XNU_TARGET_OS_OSX && __arm64__
6055 || !VM_MAP_IS_EXOTIC(map
)
6056 #endif /* XNU_TARGET_OS_OSX && __arm64__ */
6058 VM_MAP_POLICY_WX_FAIL(map
)) {
6060 uint64_t, (uint64_t) start
,
6061 uint64_t, (uint64_t) end
,
6062 vm_prot_t
, new_prot
);
6063 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
6065 (current_task()->bsd_info
6066 ? proc_name_address(current_task()->bsd_info
)
6069 return KERN_PROTECTION_FAILURE
;
6073 * Let vm_map_remap_extract() know that it will need to:
6074 * + make a copy of the mapping
6075 * + add VM_PROT_WRITE to the max protections
6076 * + remove any protections that are no longer allowed from the
6077 * max protections (to avoid any WRITE/EXECUTE conflict, for
6079 * Note that "max_prot" is an IN/OUT parameter only for this
6080 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
6083 max_prot
= new_prot
& VM_PROT_ALL
;
6084 kflags
= VM_MAP_KERNEL_FLAGS_NONE
;
6085 kflags
.vmkf_remap_prot_copy
= TRUE
;
6086 kflags
.vmkf_overwrite_immutable
= TRUE
;
6088 kr
= vm_map_remap(map
,
6092 VM_FLAGS_FIXED
| VM_FLAGS_OVERWRITE
,
6097 TRUE
, /* copy-on-write remapping! */
6100 VM_INHERIT_DEFAULT
);
6101 if (kr
!= KERN_SUCCESS
) {
6104 new_prot
&= ~VM_PROT_COPY
;
6109 /* LP64todo - remove this check when vm_map_commpage64()
6110 * no longer has to stuff in a map_entry for the commpage
6111 * above the map's max_offset.
6113 if (start
>= map
->max_offset
) {
6115 return KERN_INVALID_ADDRESS
;
6120 * Lookup the entry. If it doesn't start in a valid
6121 * entry, return an error.
6123 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
6125 return KERN_INVALID_ADDRESS
;
6128 if (entry
->superpage_size
&& (start
& (SUPERPAGE_SIZE
- 1))) { /* extend request to whole entry */
6129 start
= SUPERPAGE_ROUND_DOWN(start
);
6134 if (entry
->superpage_size
) {
6135 end
= SUPERPAGE_ROUND_UP(end
);
6139 * Make a first pass to check for protection and address
6144 prev
= current
->vme_start
;
6145 while ((current
!= vm_map_to_entry(map
)) &&
6146 (current
->vme_start
< end
)) {
6148 * If there is a hole, return an error.
6150 if (current
->vme_start
!= prev
) {
6152 return KERN_INVALID_ADDRESS
;
6155 new_max
= current
->max_protection
;
6156 if ((new_prot
& new_max
) != new_prot
) {
6158 return KERN_PROTECTION_FAILURE
;
6161 if ((new_prot
& VM_PROT_WRITE
) &&
6162 (new_prot
& VM_PROT_EXECUTE
) &&
6163 #if XNU_TARGET_OS_OSX
6164 map
->pmap
!= kernel_pmap
&&
6165 (vm_map_cs_enforcement(map
)
6167 || !VM_MAP_IS_EXOTIC(map
)
6168 #endif /* __arm64__ */
6170 #endif /* XNU_TARGET_OS_OSX */
6171 !(current
->used_for_jit
)) {
6173 uint64_t, (uint64_t) current
->vme_start
,
6174 uint64_t, (uint64_t) current
->vme_end
,
6175 vm_prot_t
, new_prot
);
6176 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
6178 (current_task()->bsd_info
6179 ? proc_name_address(current_task()->bsd_info
)
6182 new_prot
&= ~VM_PROT_EXECUTE
;
6183 if (VM_MAP_POLICY_WX_FAIL(map
)) {
6185 return KERN_PROTECTION_FAILURE
;
6190 * If the task has requested executable lockdown,
6192 * - adding executable protections OR
6193 * - adding write protections to an existing executable mapping.
6195 if (map
->map_disallow_new_exec
== TRUE
) {
6196 if ((new_prot
& VM_PROT_EXECUTE
) ||
6197 ((current
->protection
& VM_PROT_EXECUTE
) && (new_prot
& VM_PROT_WRITE
))) {
6199 return KERN_PROTECTION_FAILURE
;
6203 prev
= current
->vme_end
;
6204 current
= current
->vme_next
;
6209 end
== vm_map_round_page(prev
, VM_MAP_PAGE_MASK(map
))) {
6210 vm_map_entry_t prev_entry
;
6212 prev_entry
= current
->vme_prev
;
6213 if (prev_entry
!= vm_map_to_entry(map
) &&
6214 !prev_entry
->map_aligned
&&
6215 (vm_map_round_page(prev_entry
->vme_end
,
6216 VM_MAP_PAGE_MASK(map
))
6219 * The last entry in our range is not "map-aligned"
6220 * but it would have reached all the way to "end"
6221 * if it had been map-aligned, so this is not really
6222 * a hole in the range and we can proceed.
6227 #endif /* __arm64__ */
6231 return KERN_INVALID_ADDRESS
;
6235 * Go back and fix up protections.
6236 * Clip to start here if the range starts within
6241 if (current
!= vm_map_to_entry(map
)) {
6242 /* clip and unnest if necessary */
6243 vm_map_clip_start(map
, current
, start
);
6246 while ((current
!= vm_map_to_entry(map
)) &&
6247 (current
->vme_start
< end
)) {
6250 vm_map_clip_end(map
, current
, end
);
6252 if (current
->is_sub_map
) {
6253 /* clipping did unnest if needed */
6254 assert(!current
->use_pmap
);
6257 old_prot
= current
->protection
;
6260 current
->max_protection
= new_prot
;
6261 current
->protection
= new_prot
& old_prot
;
6263 current
->protection
= new_prot
;
6267 * Update physical map if necessary.
6268 * If the request is to turn off write protection,
6269 * we won't do it for real (in pmap). This is because
6270 * it would cause copy-on-write to fail. We've already
6271 * set, the new protection in the map, so if a
6272 * write-protect fault occurred, it will be fixed up
6273 * properly, COW or not.
6275 if (current
->protection
!= old_prot
) {
6276 /* Look one level in we support nested pmaps */
6277 /* from mapped submaps which are direct entries */
6282 prot
= current
->protection
;
6283 if (current
->is_sub_map
|| (VME_OBJECT(current
) == NULL
) || (VME_OBJECT(current
) != compressor_object
)) {
6284 prot
&= ~VM_PROT_WRITE
;
6286 assert(!VME_OBJECT(current
)->code_signed
);
6287 assert(VME_OBJECT(current
)->copy_strategy
== MEMORY_OBJECT_COPY_NONE
);
6290 if (override_nx(map
, VME_ALIAS(current
)) && prot
) {
6291 prot
|= VM_PROT_EXECUTE
;
6294 #if DEVELOPMENT || DEBUG
6295 if (!(old_prot
& VM_PROT_EXECUTE
) &&
6296 (prot
& VM_PROT_EXECUTE
) &&
6297 panic_on_unsigned_execute
&&
6298 (proc_selfcsflags() & CS_KILL
)) {
6299 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map
, (uint64_t)current
->vme_start
, (uint64_t)current
->vme_end
, old_prot
, prot
);
6301 #endif /* DEVELOPMENT || DEBUG */
6303 if (pmap_has_prot_policy(map
->pmap
, current
->translated_allow_execute
, prot
)) {
6304 if (current
->wired_count
) {
6305 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
6306 map
, (uint64_t)current
->vme_start
, (uint64_t)current
->vme_end
, prot
, current
->wired_count
);
6309 /* If the pmap layer cares about this
6310 * protection type, force a fault for
6311 * each page so that vm_fault will
6312 * repopulate the page with the full
6313 * set of protections.
6316 * TODO: We don't seem to need this,
6317 * but this is due to an internal
6318 * implementation detail of
6319 * pmap_protect. Do we want to rely
6322 prot
= VM_PROT_NONE
;
6325 if (current
->is_sub_map
&& current
->use_pmap
) {
6326 pmap_protect(VME_SUBMAP(current
)->pmap
,
6331 if (prot
& VM_PROT_WRITE
) {
6332 if (VME_OBJECT(current
) == compressor_object
) {
6334 * For write requests on the
6335 * compressor, we wil ask the
6336 * pmap layer to prevent us from
6337 * taking a write fault when we
6338 * attempt to access the mapping
6341 pmap_options
|= PMAP_OPTIONS_PROTECT_IMMEDIATE
;
6345 pmap_protect_options(map
->pmap
,
6353 current
= current
->vme_next
;
6357 while ((current
!= vm_map_to_entry(map
)) &&
6358 (current
->vme_start
<= end
)) {
6359 vm_map_simplify_entry(map
, current
);
6360 current
= current
->vme_next
;
6364 return KERN_SUCCESS
;
6370 * Sets the inheritance of the specified address
6371 * range in the target map. Inheritance
6372 * affects how the map will be shared with
6373 * child maps at the time of vm_map_fork.
6378 vm_map_offset_t start
,
6379 vm_map_offset_t end
,
6380 vm_inherit_t new_inheritance
)
6382 vm_map_entry_t entry
;
6383 vm_map_entry_t temp_entry
;
6387 VM_MAP_RANGE_CHECK(map
, start
, end
);
6389 if (vm_map_lookup_entry(map
, start
, &temp_entry
)) {
6392 temp_entry
= temp_entry
->vme_next
;
6396 /* first check entire range for submaps which can't support the */
6397 /* given inheritance. */
6398 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
6399 if (entry
->is_sub_map
) {
6400 if (new_inheritance
== VM_INHERIT_COPY
) {
6402 return KERN_INVALID_ARGUMENT
;
6406 entry
= entry
->vme_next
;
6410 if (entry
!= vm_map_to_entry(map
)) {
6411 /* clip and unnest if necessary */
6412 vm_map_clip_start(map
, entry
, start
);
6415 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
6416 vm_map_clip_end(map
, entry
, end
);
6417 if (entry
->is_sub_map
) {
6418 /* clip did unnest if needed */
6419 assert(!entry
->use_pmap
);
6422 entry
->inheritance
= new_inheritance
;
6424 entry
= entry
->vme_next
;
6428 return KERN_SUCCESS
;
6432 * Update the accounting for the amount of wired memory in this map. If the user has
6433 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6436 static kern_return_t
6439 vm_map_entry_t entry
,
6440 boolean_t user_wire
)
6445 unsigned int total_wire_count
= vm_page_wire_count
+ vm_lopage_free_count
;
6448 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6452 if (entry
->user_wired_count
== 0) {
6453 size
= entry
->vme_end
- entry
->vme_start
;
6456 * Since this is the first time the user is wiring this map entry, check to see if we're
6457 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6458 * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value. There is also
6459 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6460 * limit, then we fail.
6463 if (size
+ map
->user_wire_size
> MIN(map
->user_wire_limit
, vm_per_task_user_wire_limit
) ||
6464 size
+ ptoa_64(total_wire_count
) > vm_global_user_wire_limit
) {
6465 if (size
+ ptoa_64(total_wire_count
) > vm_global_user_wire_limit
) {
6466 os_atomic_inc(&vm_add_wire_count_over_global_limit
, relaxed
);
6468 os_atomic_inc(&vm_add_wire_count_over_user_limit
, relaxed
);
6470 return KERN_RESOURCE_SHORTAGE
;
6474 * The first time the user wires an entry, we also increment the wired_count and add this to
6475 * the total that has been wired in the map.
6478 if (entry
->wired_count
>= MAX_WIRE_COUNT
) {
6479 return KERN_FAILURE
;
6482 entry
->wired_count
++;
6483 map
->user_wire_size
+= size
;
6486 if (entry
->user_wired_count
>= MAX_WIRE_COUNT
) {
6487 return KERN_FAILURE
;
6490 entry
->user_wired_count
++;
6493 * The kernel's wiring the memory. Just bump the count and continue.
6496 if (entry
->wired_count
>= MAX_WIRE_COUNT
) {
6497 panic("vm_map_wire: too many wirings");
6500 entry
->wired_count
++;
6503 return KERN_SUCCESS
;
6507 * Update the memory wiring accounting now that the given map entry is being unwired.
6511 subtract_wire_counts(
6513 vm_map_entry_t entry
,
6514 boolean_t user_wire
)
6518 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6521 if (entry
->user_wired_count
== 1) {
6523 * We're removing the last user wire reference. Decrement the wired_count and the total
6524 * user wired memory for this map.
6527 assert(entry
->wired_count
>= 1);
6528 entry
->wired_count
--;
6529 map
->user_wire_size
-= entry
->vme_end
- entry
->vme_start
;
6532 assert(entry
->user_wired_count
>= 1);
6533 entry
->user_wired_count
--;
6536 * The kernel is unwiring the memory. Just update the count.
6539 assert(entry
->wired_count
>= 1);
6540 entry
->wired_count
--;
6544 int cs_executable_wire
= 0;
6549 * Sets the pageability of the specified address range in the
6550 * target map as wired. Regions specified as not pageable require
6551 * locked-down physical memory and physical page maps. The
6552 * access_type variable indicates types of accesses that must not
6553 * generate page faults. This is checked against protection of
6554 * memory being locked-down.
6556 * The map must not be locked, but a reference must remain to the
6557 * map throughout the call.
6559 static kern_return_t
6562 vm_map_offset_t start
,
6563 vm_map_offset_t end
,
6564 vm_prot_t caller_prot
,
6566 boolean_t user_wire
,
6568 vm_map_offset_t pmap_addr
,
6569 ppnum_t
*physpage_p
)
6571 vm_map_entry_t entry
;
6572 vm_prot_t access_type
;
6573 struct vm_map_entry
*first_entry
, tmp_entry
;
6575 vm_map_offset_t s
, e
;
6577 boolean_t need_wakeup
;
6578 boolean_t main_map
= FALSE
;
6579 wait_interrupt_t interruptible_state
;
6580 thread_t cur_thread
;
6581 unsigned int last_timestamp
;
6583 boolean_t wire_and_extract
;
6584 vm_prot_t extra_prots
;
6586 extra_prots
= VM_PROT_COPY
;
6587 extra_prots
|= VM_PROT_COPY_FAIL_IF_EXECUTABLE
;
6588 #if XNU_TARGET_OS_OSX
6589 if (map
->pmap
== kernel_pmap
||
6590 !vm_map_cs_enforcement(map
)) {
6591 extra_prots
&= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE
;
6593 #endif /* XNU_TARGET_OS_OSX */
6595 access_type
= (caller_prot
& VM_PROT_ALL
);
6597 wire_and_extract
= FALSE
;
6598 if (physpage_p
!= NULL
) {
6600 * The caller wants the physical page number of the
6601 * wired page. We return only one physical page number
6602 * so this works for only one page at a time.
6604 if ((end
- start
) != PAGE_SIZE
) {
6605 return KERN_INVALID_ARGUMENT
;
6607 wire_and_extract
= TRUE
;
6612 if (map_pmap
== NULL
) {
6615 last_timestamp
= map
->timestamp
;
6617 VM_MAP_RANGE_CHECK(map
, start
, end
);
6618 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
6619 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
6622 /* We wired what the caller asked for, zero pages */
6624 return KERN_SUCCESS
;
6627 need_wakeup
= FALSE
;
6628 cur_thread
= current_thread();
6633 if (vm_map_lookup_entry(map
, s
, &first_entry
)) {
6634 entry
= first_entry
;
6636 * vm_map_clip_start will be done later.
6637 * We don't want to unnest any nested submaps here !
6640 /* Start address is not in map */
6641 rc
= KERN_INVALID_ADDRESS
;
6645 while ((entry
!= vm_map_to_entry(map
)) && (s
< end
)) {
6647 * At this point, we have wired from "start" to "s".
6648 * We still need to wire from "s" to "end".
6650 * "entry" hasn't been clipped, so it could start before "s"
6651 * and/or end after "end".
6654 /* "e" is how far we want to wire in this entry */
6661 * If another thread is wiring/unwiring this entry then
6662 * block after informing other thread to wake us up.
6664 if (entry
->in_transition
) {
6665 wait_result_t wait_result
;
6668 * We have not clipped the entry. Make sure that
6669 * the start address is in range so that the lookup
6670 * below will succeed.
6671 * "s" is the current starting point: we've already
6672 * wired from "start" to "s" and we still have
6673 * to wire from "s" to "end".
6676 entry
->needs_wakeup
= TRUE
;
6679 * wake up anybody waiting on entries that we have
6683 vm_map_entry_wakeup(map
);
6684 need_wakeup
= FALSE
;
6687 * User wiring is interruptible
6689 wait_result
= vm_map_entry_wait(map
,
6690 (user_wire
) ? THREAD_ABORTSAFE
:
6692 if (user_wire
&& wait_result
== THREAD_INTERRUPTED
) {
6694 * undo the wirings we have done so far
6695 * We do not clear the needs_wakeup flag,
6696 * because we cannot tell if we were the
6704 * Cannot avoid a lookup here. reset timestamp.
6706 last_timestamp
= map
->timestamp
;
6709 * The entry could have been clipped, look it up again.
6710 * Worse that can happen is, it may not exist anymore.
6712 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
6714 * User: undo everything upto the previous
6715 * entry. let vm_map_unwire worry about
6716 * checking the validity of the range.
6721 entry
= first_entry
;
6725 if (entry
->is_sub_map
) {
6726 vm_map_offset_t sub_start
;
6727 vm_map_offset_t sub_end
;
6728 vm_map_offset_t local_start
;
6729 vm_map_offset_t local_end
;
6732 if (wire_and_extract
) {
6734 * Wiring would result in copy-on-write
6735 * which would not be compatible with
6736 * the sharing we have with the original
6737 * provider of this memory.
6739 rc
= KERN_INVALID_ARGUMENT
;
6743 vm_map_clip_start(map
, entry
, s
);
6744 vm_map_clip_end(map
, entry
, end
);
6746 sub_start
= VME_OFFSET(entry
);
6747 sub_end
= entry
->vme_end
;
6748 sub_end
+= VME_OFFSET(entry
) - entry
->vme_start
;
6750 local_end
= entry
->vme_end
;
6751 if (map_pmap
== NULL
) {
6753 vm_object_offset_t offset
;
6756 vm_map_entry_t local_entry
;
6757 vm_map_version_t version
;
6758 vm_map_t lookup_map
;
6760 if (entry
->use_pmap
) {
6761 pmap
= VME_SUBMAP(entry
)->pmap
;
6762 /* ppc implementation requires that */
6763 /* submaps pmap address ranges line */
6764 /* up with parent map */
6766 pmap_addr
= sub_start
;
6774 if (entry
->wired_count
) {
6775 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
6780 * The map was not unlocked:
6781 * no need to goto re-lookup.
6782 * Just go directly to next entry.
6784 entry
= entry
->vme_next
;
6785 s
= entry
->vme_start
;
6789 /* call vm_map_lookup_locked to */
6790 /* cause any needs copy to be */
6792 local_start
= entry
->vme_start
;
6794 vm_map_lock_write_to_read(map
);
6795 rc
= vm_map_lookup_locked(
6796 &lookup_map
, local_start
,
6797 (access_type
| extra_prots
),
6798 OBJECT_LOCK_EXCLUSIVE
,
6800 &offset
, &prot
, &wired
,
6803 if (rc
!= KERN_SUCCESS
) {
6804 vm_map_unlock_read(lookup_map
);
6805 assert(map_pmap
== NULL
);
6806 vm_map_unwire(map
, start
,
6810 vm_object_unlock(object
);
6811 if (real_map
!= lookup_map
) {
6812 vm_map_unlock(real_map
);
6814 vm_map_unlock_read(lookup_map
);
6817 /* we unlocked, so must re-lookup */
6818 if (!vm_map_lookup_entry(map
,
6826 * entry could have been "simplified",
6829 entry
= local_entry
;
6830 assert(s
== local_start
);
6831 vm_map_clip_start(map
, entry
, s
);
6832 vm_map_clip_end(map
, entry
, end
);
6833 /* re-compute "e" */
6839 /* did we have a change of type? */
6840 if (!entry
->is_sub_map
) {
6841 last_timestamp
= map
->timestamp
;
6845 local_start
= entry
->vme_start
;
6849 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
6853 entry
->in_transition
= TRUE
;
6856 rc
= vm_map_wire_nested(VME_SUBMAP(entry
),
6859 user_wire
, pmap
, pmap_addr
,
6864 * Find the entry again. It could have been clipped
6865 * after we unlocked the map.
6867 if (!vm_map_lookup_entry(map
, local_start
,
6869 panic("vm_map_wire: re-lookup failed");
6871 entry
= first_entry
;
6873 assert(local_start
== s
);
6874 /* re-compute "e" */
6880 last_timestamp
= map
->timestamp
;
6881 while ((entry
!= vm_map_to_entry(map
)) &&
6882 (entry
->vme_start
< e
)) {
6883 assert(entry
->in_transition
);
6884 entry
->in_transition
= FALSE
;
6885 if (entry
->needs_wakeup
) {
6886 entry
->needs_wakeup
= FALSE
;
6889 if (rc
!= KERN_SUCCESS
) {/* from vm_*_wire */
6890 subtract_wire_counts(map
, entry
, user_wire
);
6892 entry
= entry
->vme_next
;
6894 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
6898 /* no need to relookup again */
6899 s
= entry
->vme_start
;
6904 * If this entry is already wired then increment
6905 * the appropriate wire reference count.
6907 if (entry
->wired_count
) {
6908 if ((entry
->protection
& access_type
) != access_type
) {
6909 /* found a protection problem */
6913 * We should always return an error
6914 * in this case but since we didn't
6915 * enforce it before, let's do
6916 * it only for the new "wire_and_extract"
6917 * code path for now...
6919 if (wire_and_extract
) {
6920 rc
= KERN_PROTECTION_FAILURE
;
6926 * entry is already wired down, get our reference
6927 * after clipping to our range.
6929 vm_map_clip_start(map
, entry
, s
);
6930 vm_map_clip_end(map
, entry
, end
);
6932 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
6936 if (wire_and_extract
) {
6938 vm_object_offset_t offset
;
6942 * We don't have to "wire" the page again
6943 * bit we still have to "extract" its
6944 * physical page number, after some sanity
6947 assert((entry
->vme_end
- entry
->vme_start
)
6949 assert(!entry
->needs_copy
);
6950 assert(!entry
->is_sub_map
);
6951 assert(VME_OBJECT(entry
));
6952 if (((entry
->vme_end
- entry
->vme_start
)
6954 entry
->needs_copy
||
6955 entry
->is_sub_map
||
6956 VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
6957 rc
= KERN_INVALID_ARGUMENT
;
6961 object
= VME_OBJECT(entry
);
6962 offset
= VME_OFFSET(entry
);
6963 /* need exclusive lock to update m->dirty */
6964 if (entry
->protection
& VM_PROT_WRITE
) {
6965 vm_object_lock(object
);
6967 vm_object_lock_shared(object
);
6969 m
= vm_page_lookup(object
, offset
);
6970 assert(m
!= VM_PAGE_NULL
);
6971 assert(VM_PAGE_WIRED(m
));
6972 if (m
!= VM_PAGE_NULL
&& VM_PAGE_WIRED(m
)) {
6973 *physpage_p
= VM_PAGE_GET_PHYS_PAGE(m
);
6974 if (entry
->protection
& VM_PROT_WRITE
) {
6975 vm_object_lock_assert_exclusive(
6977 m
->vmp_dirty
= TRUE
;
6980 /* not already wired !? */
6983 vm_object_unlock(object
);
6986 /* map was not unlocked: no need to relookup */
6987 entry
= entry
->vme_next
;
6988 s
= entry
->vme_start
;
6993 * Unwired entry or wire request transmitted via submap
6997 * Wiring would copy the pages to the shadow object.
6998 * The shadow object would not be code-signed so
6999 * attempting to execute code from these copied pages
7000 * would trigger a code-signing violation.
7003 if ((entry
->protection
& VM_PROT_EXECUTE
)
7004 #if XNU_TARGET_OS_OSX
7006 map
->pmap
!= kernel_pmap
&&
7007 (vm_map_cs_enforcement(map
)
7009 || !VM_MAP_IS_EXOTIC(map
)
7010 #endif /* __arm64__ */
7012 #endif /* XNU_TARGET_OS_OSX */
7015 printf("pid %d[%s] wiring executable range from "
7016 "0x%llx to 0x%llx: rejected to preserve "
7019 (current_task()->bsd_info
7020 ? proc_name_address(current_task()->bsd_info
)
7022 (uint64_t) entry
->vme_start
,
7023 (uint64_t) entry
->vme_end
);
7024 #endif /* MACH_ASSERT */
7025 DTRACE_VM2(cs_executable_wire
,
7026 uint64_t, (uint64_t)entry
->vme_start
,
7027 uint64_t, (uint64_t)entry
->vme_end
);
7028 cs_executable_wire
++;
7029 rc
= KERN_PROTECTION_FAILURE
;
7034 * Perform actions of vm_map_lookup that need the write
7035 * lock on the map: create a shadow object for a
7036 * copy-on-write region, or an object for a zero-fill
7039 size
= entry
->vme_end
- entry
->vme_start
;
7041 * If wiring a copy-on-write page, we need to copy it now
7042 * even if we're only (currently) requesting read access.
7043 * This is aggressive, but once it's wired we can't move it.
7045 if (entry
->needs_copy
) {
7046 if (wire_and_extract
) {
7048 * We're supposed to share with the original
7049 * provider so should not be "needs_copy"
7051 rc
= KERN_INVALID_ARGUMENT
;
7055 VME_OBJECT_SHADOW(entry
, size
);
7056 entry
->needs_copy
= FALSE
;
7057 } else if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
7058 if (wire_and_extract
) {
7060 * We're supposed to share with the original
7061 * provider so should already have an object.
7063 rc
= KERN_INVALID_ARGUMENT
;
7066 VME_OBJECT_SET(entry
, vm_object_allocate(size
));
7067 VME_OFFSET_SET(entry
, (vm_object_offset_t
)0);
7068 assert(entry
->use_pmap
);
7071 vm_map_clip_start(map
, entry
, s
);
7072 vm_map_clip_end(map
, entry
, end
);
7074 /* re-compute "e" */
7081 * Check for holes and protection mismatch.
7082 * Holes: Next entry should be contiguous unless this
7083 * is the end of the region.
7084 * Protection: Access requested must be allowed, unless
7085 * wiring is by protection class
7087 if ((entry
->vme_end
< end
) &&
7088 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
7089 (entry
->vme_next
->vme_start
> entry
->vme_end
))) {
7091 rc
= KERN_INVALID_ADDRESS
;
7094 if ((entry
->protection
& access_type
) != access_type
) {
7095 /* found a protection problem */
7096 rc
= KERN_PROTECTION_FAILURE
;
7100 assert(entry
->wired_count
== 0 && entry
->user_wired_count
== 0);
7102 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
7106 entry
->in_transition
= TRUE
;
7109 * This entry might get split once we unlock the map.
7110 * In vm_fault_wire(), we need the current range as
7111 * defined by this entry. In order for this to work
7112 * along with a simultaneous clip operation, we make a
7113 * temporary copy of this entry and use that for the
7114 * wiring. Note that the underlying objects do not
7115 * change during a clip.
7120 * The in_transition state guarentees that the entry
7121 * (or entries for this range, if split occured) will be
7122 * there when the map lock is acquired for the second time.
7126 if (!user_wire
&& cur_thread
!= THREAD_NULL
) {
7127 interruptible_state
= thread_interrupt_level(THREAD_UNINT
);
7129 interruptible_state
= THREAD_UNINT
;
7133 rc
= vm_fault_wire(map
,
7134 &tmp_entry
, caller_prot
, tag
, map_pmap
, pmap_addr
,
7137 rc
= vm_fault_wire(map
,
7138 &tmp_entry
, caller_prot
, tag
, map
->pmap
,
7139 tmp_entry
.vme_start
,
7143 if (!user_wire
&& cur_thread
!= THREAD_NULL
) {
7144 thread_interrupt_level(interruptible_state
);
7149 if (last_timestamp
+ 1 != map
->timestamp
) {
7151 * Find the entry again. It could have been clipped
7152 * after we unlocked the map.
7154 if (!vm_map_lookup_entry(map
, tmp_entry
.vme_start
,
7156 panic("vm_map_wire: re-lookup failed");
7159 entry
= first_entry
;
7162 last_timestamp
= map
->timestamp
;
7164 while ((entry
!= vm_map_to_entry(map
)) &&
7165 (entry
->vme_start
< tmp_entry
.vme_end
)) {
7166 assert(entry
->in_transition
);
7167 entry
->in_transition
= FALSE
;
7168 if (entry
->needs_wakeup
) {
7169 entry
->needs_wakeup
= FALSE
;
7172 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
7173 subtract_wire_counts(map
, entry
, user_wire
);
7175 entry
= entry
->vme_next
;
7178 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
7182 if ((entry
!= vm_map_to_entry(map
)) && /* we still have entries in the map */
7183 (tmp_entry
.vme_end
!= end
) && /* AND, we are not at the end of the requested range */
7184 (entry
->vme_start
!= tmp_entry
.vme_end
)) { /* AND, the next entry is not contiguous. */
7185 /* found a "new" hole */
7186 s
= tmp_entry
.vme_end
;
7187 rc
= KERN_INVALID_ADDRESS
;
7191 s
= entry
->vme_start
;
7192 } /* end while loop through map entries */
7195 if (rc
== KERN_SUCCESS
) {
7196 /* repair any damage we may have made to the VM map */
7197 vm_map_simplify_range(map
, start
, end
);
7203 * wake up anybody waiting on entries we wired.
7206 vm_map_entry_wakeup(map
);
7209 if (rc
!= KERN_SUCCESS
) {
7210 /* undo what has been wired so far */
7211 vm_map_unwire_nested(map
, start
, s
, user_wire
,
7212 map_pmap
, pmap_addr
);
7222 vm_map_wire_external(
7224 vm_map_offset_t start
,
7225 vm_map_offset_t end
,
7226 vm_prot_t caller_prot
,
7227 boolean_t user_wire
)
7231 kret
= vm_map_wire_nested(map
, start
, end
, caller_prot
, vm_tag_bt(),
7232 user_wire
, (pmap_t
)NULL
, 0, NULL
);
7239 vm_map_offset_t start
,
7240 vm_map_offset_t end
,
7241 vm_prot_t caller_prot
,
7243 boolean_t user_wire
)
7247 kret
= vm_map_wire_nested(map
, start
, end
, caller_prot
, tag
,
7248 user_wire
, (pmap_t
)NULL
, 0, NULL
);
7253 vm_map_wire_and_extract_external(
7255 vm_map_offset_t start
,
7256 vm_prot_t caller_prot
,
7257 boolean_t user_wire
,
7258 ppnum_t
*physpage_p
)
7262 kret
= vm_map_wire_nested(map
,
7264 start
+ VM_MAP_PAGE_SIZE(map
),
7271 if (kret
!= KERN_SUCCESS
&&
7272 physpage_p
!= NULL
) {
7279 vm_map_wire_and_extract_kernel(
7281 vm_map_offset_t start
,
7282 vm_prot_t caller_prot
,
7284 boolean_t user_wire
,
7285 ppnum_t
*physpage_p
)
7289 kret
= vm_map_wire_nested(map
,
7291 start
+ VM_MAP_PAGE_SIZE(map
),
7298 if (kret
!= KERN_SUCCESS
&&
7299 physpage_p
!= NULL
) {
7308 * Sets the pageability of the specified address range in the target
7309 * as pageable. Regions specified must have been wired previously.
7311 * The map must not be locked, but a reference must remain to the map
7312 * throughout the call.
7314 * Kernel will panic on failures. User unwire ignores holes and
7315 * unwired and intransition entries to avoid losing memory by leaving
7318 static kern_return_t
7319 vm_map_unwire_nested(
7321 vm_map_offset_t start
,
7322 vm_map_offset_t end
,
7323 boolean_t user_wire
,
7325 vm_map_offset_t pmap_addr
)
7327 vm_map_entry_t entry
;
7328 struct vm_map_entry
*first_entry
, tmp_entry
;
7329 boolean_t need_wakeup
;
7330 boolean_t main_map
= FALSE
;
7331 unsigned int last_timestamp
;
7334 if (map_pmap
== NULL
) {
7337 last_timestamp
= map
->timestamp
;
7339 VM_MAP_RANGE_CHECK(map
, start
, end
);
7340 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
7341 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
7344 /* We unwired what the caller asked for: zero pages */
7346 return KERN_SUCCESS
;
7349 if (vm_map_lookup_entry(map
, start
, &first_entry
)) {
7350 entry
= first_entry
;
7352 * vm_map_clip_start will be done later.
7353 * We don't want to unnest any nested sub maps here !
7357 panic("vm_map_unwire: start not found");
7359 /* Start address is not in map. */
7361 return KERN_INVALID_ADDRESS
;
7364 if (entry
->superpage_size
) {
7365 /* superpages are always wired */
7367 return KERN_INVALID_ADDRESS
;
7370 need_wakeup
= FALSE
;
7371 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
7372 if (entry
->in_transition
) {
7375 * Another thread is wiring down this entry. Note
7376 * that if it is not for the other thread we would
7377 * be unwiring an unwired entry. This is not
7378 * permitted. If we wait, we will be unwiring memory
7382 * Another thread is unwiring this entry. We did not
7383 * have a reference to it, because if we did, this
7384 * entry will not be getting unwired now.
7389 * This could happen: there could be some
7390 * overlapping vslock/vsunlock operations
7392 * We should probably just wait and retry,
7393 * but then we have to be careful that this
7394 * entry could get "simplified" after
7395 * "in_transition" gets unset and before
7396 * we re-lookup the entry, so we would
7397 * have to re-clip the entry to avoid
7398 * re-unwiring what we have already unwired...
7399 * See vm_map_wire_nested().
7401 * Or we could just ignore "in_transition"
7402 * here and proceed to decement the wired
7403 * count(s) on this entry. That should be fine
7404 * as long as "wired_count" doesn't drop all
7405 * the way to 0 (and we should panic if THAT
7408 panic("vm_map_unwire: in_transition entry");
7411 entry
= entry
->vme_next
;
7415 if (entry
->is_sub_map
) {
7416 vm_map_offset_t sub_start
;
7417 vm_map_offset_t sub_end
;
7418 vm_map_offset_t local_end
;
7421 vm_map_clip_start(map
, entry
, start
);
7422 vm_map_clip_end(map
, entry
, end
);
7424 sub_start
= VME_OFFSET(entry
);
7425 sub_end
= entry
->vme_end
- entry
->vme_start
;
7426 sub_end
+= VME_OFFSET(entry
);
7427 local_end
= entry
->vme_end
;
7428 if (map_pmap
== NULL
) {
7429 if (entry
->use_pmap
) {
7430 pmap
= VME_SUBMAP(entry
)->pmap
;
7431 pmap_addr
= sub_start
;
7436 if (entry
->wired_count
== 0 ||
7437 (user_wire
&& entry
->user_wired_count
== 0)) {
7439 panic("vm_map_unwire: entry is unwired");
7441 entry
= entry
->vme_next
;
7447 * Holes: Next entry should be contiguous unless
7448 * this is the end of the region.
7450 if (((entry
->vme_end
< end
) &&
7451 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
7452 (entry
->vme_next
->vme_start
7453 > entry
->vme_end
)))) {
7455 panic("vm_map_unwire: non-contiguous region");
7458 * entry = entry->vme_next;
7463 subtract_wire_counts(map
, entry
, user_wire
);
7465 if (entry
->wired_count
!= 0) {
7466 entry
= entry
->vme_next
;
7470 entry
->in_transition
= TRUE
;
7471 tmp_entry
= *entry
;/* see comment in vm_map_wire() */
7474 * We can unlock the map now. The in_transition state
7475 * guarantees existance of the entry.
7478 vm_map_unwire_nested(VME_SUBMAP(entry
),
7479 sub_start
, sub_end
, user_wire
, pmap
, pmap_addr
);
7482 if (last_timestamp
+ 1 != map
->timestamp
) {
7484 * Find the entry again. It could have been
7485 * clipped or deleted after we unlocked the map.
7487 if (!vm_map_lookup_entry(map
,
7488 tmp_entry
.vme_start
,
7491 panic("vm_map_unwire: re-lookup failed");
7493 entry
= first_entry
->vme_next
;
7495 entry
= first_entry
;
7498 last_timestamp
= map
->timestamp
;
7501 * clear transition bit for all constituent entries
7502 * that were in the original entry (saved in
7503 * tmp_entry). Also check for waiters.
7505 while ((entry
!= vm_map_to_entry(map
)) &&
7506 (entry
->vme_start
< tmp_entry
.vme_end
)) {
7507 assert(entry
->in_transition
);
7508 entry
->in_transition
= FALSE
;
7509 if (entry
->needs_wakeup
) {
7510 entry
->needs_wakeup
= FALSE
;
7513 entry
= entry
->vme_next
;
7518 vm_map_unwire_nested(VME_SUBMAP(entry
),
7519 sub_start
, sub_end
, user_wire
, map_pmap
,
7523 if (last_timestamp
+ 1 != map
->timestamp
) {
7525 * Find the entry again. It could have been
7526 * clipped or deleted after we unlocked the map.
7528 if (!vm_map_lookup_entry(map
,
7529 tmp_entry
.vme_start
,
7532 panic("vm_map_unwire: re-lookup failed");
7534 entry
= first_entry
->vme_next
;
7536 entry
= first_entry
;
7539 last_timestamp
= map
->timestamp
;
7544 if ((entry
->wired_count
== 0) ||
7545 (user_wire
&& entry
->user_wired_count
== 0)) {
7547 panic("vm_map_unwire: entry is unwired");
7550 entry
= entry
->vme_next
;
7554 assert(entry
->wired_count
> 0 &&
7555 (!user_wire
|| entry
->user_wired_count
> 0));
7557 vm_map_clip_start(map
, entry
, start
);
7558 vm_map_clip_end(map
, entry
, end
);
7562 * Holes: Next entry should be contiguous unless
7563 * this is the end of the region.
7565 if (((entry
->vme_end
< end
) &&
7566 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
7567 (entry
->vme_next
->vme_start
> entry
->vme_end
)))) {
7569 panic("vm_map_unwire: non-contiguous region");
7571 entry
= entry
->vme_next
;
7575 subtract_wire_counts(map
, entry
, user_wire
);
7577 if (entry
->wired_count
!= 0) {
7578 entry
= entry
->vme_next
;
7582 if (entry
->zero_wired_pages
) {
7583 entry
->zero_wired_pages
= FALSE
;
7586 entry
->in_transition
= TRUE
;
7587 tmp_entry
= *entry
; /* see comment in vm_map_wire() */
7590 * We can unlock the map now. The in_transition state
7591 * guarantees existance of the entry.
7595 vm_fault_unwire(map
,
7596 &tmp_entry
, FALSE
, map_pmap
, pmap_addr
);
7598 vm_fault_unwire(map
,
7599 &tmp_entry
, FALSE
, map
->pmap
,
7600 tmp_entry
.vme_start
);
7604 if (last_timestamp
+ 1 != map
->timestamp
) {
7606 * Find the entry again. It could have been clipped
7607 * or deleted after we unlocked the map.
7609 if (!vm_map_lookup_entry(map
, tmp_entry
.vme_start
,
7612 panic("vm_map_unwire: re-lookup failed");
7614 entry
= first_entry
->vme_next
;
7616 entry
= first_entry
;
7619 last_timestamp
= map
->timestamp
;
7622 * clear transition bit for all constituent entries that
7623 * were in the original entry (saved in tmp_entry). Also
7624 * check for waiters.
7626 while ((entry
!= vm_map_to_entry(map
)) &&
7627 (entry
->vme_start
< tmp_entry
.vme_end
)) {
7628 assert(entry
->in_transition
);
7629 entry
->in_transition
= FALSE
;
7630 if (entry
->needs_wakeup
) {
7631 entry
->needs_wakeup
= FALSE
;
7634 entry
= entry
->vme_next
;
7639 * We might have fragmented the address space when we wired this
7640 * range of addresses. Attempt to re-coalesce these VM map entries
7641 * with their neighbors now that they're no longer wired.
7642 * Under some circumstances, address space fragmentation can
7643 * prevent VM object shadow chain collapsing, which can cause
7646 vm_map_simplify_range(map
, start
, end
);
7650 * wake up anybody waiting on entries that we have unwired.
7653 vm_map_entry_wakeup(map
);
7655 return KERN_SUCCESS
;
7661 vm_map_offset_t start
,
7662 vm_map_offset_t end
,
7663 boolean_t user_wire
)
7665 return vm_map_unwire_nested(map
, start
, end
,
7666 user_wire
, (pmap_t
)NULL
, 0);
7671 * vm_map_entry_delete: [ internal use only ]
7673 * Deallocate the given entry from the target map.
7676 vm_map_entry_delete(
7678 vm_map_entry_t entry
)
7680 vm_map_offset_t s
, e
;
7684 s
= entry
->vme_start
;
7686 assert(VM_MAP_PAGE_ALIGNED(s
, FOURK_PAGE_MASK
));
7687 assert(VM_MAP_PAGE_ALIGNED(e
, FOURK_PAGE_MASK
));
7688 if (VM_MAP_PAGE_MASK(map
) >= PAGE_MASK
) {
7689 assert(page_aligned(s
));
7690 assert(page_aligned(e
));
7692 if (entry
->map_aligned
== TRUE
) {
7693 assert(VM_MAP_PAGE_ALIGNED(s
, VM_MAP_PAGE_MASK(map
)));
7694 assert(VM_MAP_PAGE_ALIGNED(e
, VM_MAP_PAGE_MASK(map
)));
7696 assert(entry
->wired_count
== 0);
7697 assert(entry
->user_wired_count
== 0);
7698 assert(!entry
->permanent
);
7700 if (entry
->is_sub_map
) {
7702 submap
= VME_SUBMAP(entry
);
7705 object
= VME_OBJECT(entry
);
7708 vm_map_store_entry_unlink(map
, entry
);
7711 vm_map_entry_dispose(map
, entry
);
7715 * Deallocate the object only after removing all
7716 * pmap entries pointing to its pages.
7719 vm_map_deallocate(submap
);
7721 vm_object_deallocate(object
);
7726 vm_map_submap_pmap_clean(
7728 vm_map_offset_t start
,
7729 vm_map_offset_t end
,
7731 vm_map_offset_t offset
)
7733 vm_map_offset_t submap_start
;
7734 vm_map_offset_t submap_end
;
7735 vm_map_size_t remove_size
;
7736 vm_map_entry_t entry
;
7738 submap_end
= offset
+ (end
- start
);
7739 submap_start
= offset
;
7741 vm_map_lock_read(sub_map
);
7742 if (vm_map_lookup_entry(sub_map
, offset
, &entry
)) {
7743 remove_size
= (entry
->vme_end
- entry
->vme_start
);
7744 if (offset
> entry
->vme_start
) {
7745 remove_size
-= offset
- entry
->vme_start
;
7749 if (submap_end
< entry
->vme_end
) {
7751 entry
->vme_end
- submap_end
;
7753 if (entry
->is_sub_map
) {
7754 vm_map_submap_pmap_clean(
7757 start
+ remove_size
,
7761 if (map
->mapped_in_other_pmaps
&&
7762 os_ref_get_count(&map
->map_refcnt
) != 0 &&
7763 VME_OBJECT(entry
) != NULL
) {
7764 vm_object_pmap_protect_options(
7766 (VME_OFFSET(entry
) +
7774 PMAP_OPTIONS_REMOVE
);
7776 pmap_remove(map
->pmap
,
7778 (addr64_t
)(start
+ remove_size
));
7783 entry
= entry
->vme_next
;
7785 while ((entry
!= vm_map_to_entry(sub_map
))
7786 && (entry
->vme_start
< submap_end
)) {
7787 remove_size
= (entry
->vme_end
- entry
->vme_start
);
7788 if (submap_end
< entry
->vme_end
) {
7789 remove_size
-= entry
->vme_end
- submap_end
;
7791 if (entry
->is_sub_map
) {
7792 vm_map_submap_pmap_clean(
7794 (start
+ entry
->vme_start
) - offset
,
7795 ((start
+ entry
->vme_start
) - offset
) + remove_size
,
7799 if (map
->mapped_in_other_pmaps
&&
7800 os_ref_get_count(&map
->map_refcnt
) != 0 &&
7801 VME_OBJECT(entry
) != NULL
) {
7802 vm_object_pmap_protect_options(
7810 PMAP_OPTIONS_REMOVE
);
7812 pmap_remove(map
->pmap
,
7813 (addr64_t
)((start
+ entry
->vme_start
)
7815 (addr64_t
)(((start
+ entry
->vme_start
)
7816 - offset
) + remove_size
));
7819 entry
= entry
->vme_next
;
7821 vm_map_unlock_read(sub_map
);
7826 * virt_memory_guard_ast:
7828 * Handle the AST callout for a virtual memory guard.
7829 * raise an EXC_GUARD exception and terminate the task
7830 * if configured to do so.
7833 virt_memory_guard_ast(
7835 mach_exception_data_type_t code
,
7836 mach_exception_data_type_t subcode
)
7838 task_t task
= thread
->task
;
7839 assert(task
!= kernel_task
);
7840 assert(task
== current_task());
7843 behavior
= task
->task_exc_guard
;
7845 /* Is delivery enabled */
7846 if ((behavior
& TASK_EXC_GUARD_VM_DELIVER
) == 0) {
7850 /* If only once, make sure we're that once */
7851 while (behavior
& TASK_EXC_GUARD_VM_ONCE
) {
7852 uint32_t new_behavior
= behavior
& ~TASK_EXC_GUARD_VM_DELIVER
;
7854 if (OSCompareAndSwap(behavior
, new_behavior
, &task
->task_exc_guard
)) {
7857 behavior
= task
->task_exc_guard
;
7858 if ((behavior
& TASK_EXC_GUARD_VM_DELIVER
) == 0) {
7863 /* Raise exception via corpse fork or synchronously */
7864 if ((task
->task_exc_guard
& TASK_EXC_GUARD_VM_CORPSE
) &&
7865 (task
->task_exc_guard
& TASK_EXC_GUARD_VM_FATAL
) == 0) {
7866 task_violated_guard(code
, subcode
, NULL
);
7868 task_exception_notify(EXC_GUARD
, code
, subcode
);
7871 /* Terminate the task if desired */
7872 if (task
->task_exc_guard
& TASK_EXC_GUARD_VM_FATAL
) {
7873 task_bsdtask_kill(current_task());
7878 * vm_map_guard_exception:
7880 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7882 * Right now, we do this when we find nothing mapped, or a
7883 * gap in the mapping when a user address space deallocate
7884 * was requested. We report the address of the first gap found.
7887 vm_map_guard_exception(
7888 vm_map_offset_t gap_start
,
7891 mach_exception_code_t code
= 0;
7892 unsigned int guard_type
= GUARD_TYPE_VIRT_MEMORY
;
7893 unsigned int target
= 0; /* should we pass in pid associated with map? */
7894 mach_exception_data_type_t subcode
= (uint64_t)gap_start
;
7895 boolean_t fatal
= FALSE
;
7897 task_t task
= current_task();
7899 /* Can't deliver exceptions to kernel task */
7900 if (task
== kernel_task
) {
7904 EXC_GUARD_ENCODE_TYPE(code
, guard_type
);
7905 EXC_GUARD_ENCODE_FLAVOR(code
, reason
);
7906 EXC_GUARD_ENCODE_TARGET(code
, target
);
7908 if (task
->task_exc_guard
& TASK_EXC_GUARD_VM_FATAL
) {
7911 thread_guard_violation(current_thread(), code
, subcode
, fatal
);
7915 * vm_map_delete: [ internal use only ]
7917 * Deallocates the given address range from the target map.
7918 * Removes all user wirings. Unwires one kernel wiring if
7919 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7920 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7921 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7923 * This routine is called with map locked and leaves map locked.
7925 static kern_return_t
7928 vm_map_offset_t start
,
7929 vm_map_offset_t end
,
7933 vm_map_entry_t entry
, next
;
7934 struct vm_map_entry
*first_entry
, tmp_entry
;
7937 boolean_t need_wakeup
;
7938 unsigned int last_timestamp
= ~0; /* unlikely value */
7940 vm_map_offset_t gap_start
;
7941 __unused vm_map_offset_t save_start
= start
;
7942 __unused vm_map_offset_t save_end
= end
;
7943 const vm_map_offset_t FIND_GAP
= 1; /* a not page aligned value */
7944 const vm_map_offset_t GAPS_OK
= 2; /* a different not page aligned value */
7946 if (map
!= kernel_map
&& !(flags
& VM_MAP_REMOVE_GAPS_OK
) && !map
->terminated
) {
7947 gap_start
= FIND_GAP
;
7949 gap_start
= GAPS_OK
;
7952 interruptible
= (flags
& VM_MAP_REMOVE_INTERRUPTIBLE
) ?
7953 THREAD_ABORTSAFE
: THREAD_UNINT
;
7956 * All our DMA I/O operations in IOKit are currently done by
7957 * wiring through the map entries of the task requesting the I/O.
7958 * Because of this, we must always wait for kernel wirings
7959 * to go away on the entries before deleting them.
7961 * Any caller who wants to actually remove a kernel wiring
7962 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7963 * properly remove one wiring instead of blasting through
7966 flags
|= VM_MAP_REMOVE_WAIT_FOR_KWIRE
;
7970 * Find the start of the region, and clip it
7972 if (vm_map_lookup_entry(map
, start
, &first_entry
)) {
7973 entry
= first_entry
;
7974 if (map
== kalloc_map
&&
7975 (entry
->vme_start
!= start
||
7976 entry
->vme_end
!= end
)) {
7977 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7978 "mismatched entry %p [0x%llx:0x%llx]\n",
7983 (uint64_t)entry
->vme_start
,
7984 (uint64_t)entry
->vme_end
);
7988 * If in a superpage, extend the range to include the start of the mapping.
7990 if (entry
->superpage_size
&& (start
& ~SUPERPAGE_MASK
)) {
7991 start
= SUPERPAGE_ROUND_DOWN(start
);
7995 if (start
== entry
->vme_start
) {
7997 * No need to clip. We don't want to cause
7998 * any unnecessary unnesting in this case...
8001 if ((flags
& VM_MAP_REMOVE_NO_MAP_ALIGN
) &&
8002 entry
->map_aligned
&&
8003 !VM_MAP_PAGE_ALIGNED(
8005 VM_MAP_PAGE_MASK(map
))) {
8007 * The entry will no longer be
8008 * map-aligned after clipping
8009 * and the caller said it's OK.
8011 entry
->map_aligned
= FALSE
;
8013 if (map
== kalloc_map
) {
8014 panic("vm_map_delete(%p,0x%llx,0x%llx):"
8015 " clipping %p at 0x%llx\n",
8022 vm_map_clip_start(map
, entry
, start
);
8026 * Fix the lookup hint now, rather than each
8027 * time through the loop.
8029 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8031 if (map
->pmap
== kernel_pmap
&&
8032 os_ref_get_count(&map
->map_refcnt
) != 0) {
8033 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8034 "no map entry at 0x%llx\n",
8040 entry
= first_entry
->vme_next
;
8041 if (gap_start
== FIND_GAP
) {
8047 if (entry
->superpage_size
) {
8048 end
= SUPERPAGE_ROUND_UP(end
);
8051 need_wakeup
= FALSE
;
8053 * Step through all entries in this region
8055 s
= entry
->vme_start
;
8056 while ((entry
!= vm_map_to_entry(map
)) && (s
< end
)) {
8058 * At this point, we have deleted all the memory entries
8059 * between "start" and "s". We still need to delete
8060 * all memory entries between "s" and "end".
8061 * While we were blocked and the map was unlocked, some
8062 * new memory entries could have been re-allocated between
8063 * "start" and "s" and we don't want to mess with those.
8064 * Some of those entries could even have been re-assembled
8065 * with an entry after "s" (in vm_map_simplify_entry()), so
8066 * we may have to vm_map_clip_start() again.
8069 if (entry
->vme_start
>= s
) {
8071 * This entry starts on or after "s"
8072 * so no need to clip its start.
8076 * This entry has been re-assembled by a
8077 * vm_map_simplify_entry(). We need to
8078 * re-clip its start.
8080 if ((flags
& VM_MAP_REMOVE_NO_MAP_ALIGN
) &&
8081 entry
->map_aligned
&&
8082 !VM_MAP_PAGE_ALIGNED(s
,
8083 VM_MAP_PAGE_MASK(map
))) {
8085 * The entry will no longer be map-aligned
8086 * after clipping and the caller said it's OK.
8088 entry
->map_aligned
= FALSE
;
8090 if (map
== kalloc_map
) {
8091 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8092 "clipping %p at 0x%llx\n",
8099 vm_map_clip_start(map
, entry
, s
);
8101 if (entry
->vme_end
<= end
) {
8103 * This entry is going away completely, so no need
8104 * to clip and possibly cause an unnecessary unnesting.
8107 if ((flags
& VM_MAP_REMOVE_NO_MAP_ALIGN
) &&
8108 entry
->map_aligned
&&
8109 !VM_MAP_PAGE_ALIGNED(end
,
8110 VM_MAP_PAGE_MASK(map
))) {
8112 * The entry will no longer be map-aligned
8113 * after clipping and the caller said it's OK.
8115 entry
->map_aligned
= FALSE
;
8117 if (map
== kalloc_map
) {
8118 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8119 "clipping %p at 0x%llx\n",
8126 vm_map_clip_end(map
, entry
, end
);
8129 if (entry
->permanent
) {
8130 if (map
->pmap
== kernel_pmap
) {
8131 panic("%s(%p,0x%llx,0x%llx): "
8132 "attempt to remove permanent "
8134 "%p [0x%llx:0x%llx]\n",
8140 (uint64_t) entry
->vme_start
,
8141 (uint64_t) entry
->vme_end
);
8142 } else if (flags
& VM_MAP_REMOVE_IMMUTABLE
) {
8143 // printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
8144 entry
->permanent
= FALSE
;
8146 if (vm_map_executable_immutable_verbose
) {
8147 printf("%d[%s] %s(0x%llx,0x%llx): "
8148 "permanent entry [0x%llx:0x%llx] "
8151 (current_task()->bsd_info
8152 ? proc_name_address(current_task()->bsd_info
)
8157 (uint64_t)entry
->vme_start
,
8158 (uint64_t)entry
->vme_end
,
8160 entry
->max_protection
);
8163 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
8165 DTRACE_VM5(vm_map_delete_permanent
,
8166 vm_map_offset_t
, entry
->vme_start
,
8167 vm_map_offset_t
, entry
->vme_end
,
8168 vm_prot_t
, entry
->protection
,
8169 vm_prot_t
, entry
->max_protection
,
8170 int, VME_ALIAS(entry
));
8175 if (entry
->in_transition
) {
8176 wait_result_t wait_result
;
8179 * Another thread is wiring/unwiring this entry.
8180 * Let the other thread know we are waiting.
8182 assert(s
== entry
->vme_start
);
8183 entry
->needs_wakeup
= TRUE
;
8186 * wake up anybody waiting on entries that we have
8187 * already unwired/deleted.
8190 vm_map_entry_wakeup(map
);
8191 need_wakeup
= FALSE
;
8194 wait_result
= vm_map_entry_wait(map
, interruptible
);
8196 if (interruptible
&&
8197 wait_result
== THREAD_INTERRUPTED
) {
8199 * We do not clear the needs_wakeup flag,
8200 * since we cannot tell if we were the only one.
8202 return KERN_ABORTED
;
8206 * The entry could have been clipped or it
8207 * may not exist anymore. Look it up again.
8209 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
8211 * User: use the next entry
8213 if (gap_start
== FIND_GAP
) {
8216 entry
= first_entry
->vme_next
;
8217 s
= entry
->vme_start
;
8219 entry
= first_entry
;
8220 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8222 last_timestamp
= map
->timestamp
;
8224 } /* end in_transition */
8226 if (entry
->wired_count
) {
8227 boolean_t user_wire
;
8229 user_wire
= entry
->user_wired_count
> 0;
8232 * Remove a kernel wiring if requested
8234 if (flags
& VM_MAP_REMOVE_KUNWIRE
) {
8235 entry
->wired_count
--;
8239 * Remove all user wirings for proper accounting
8241 if (entry
->user_wired_count
> 0) {
8242 while (entry
->user_wired_count
) {
8243 subtract_wire_counts(map
, entry
, user_wire
);
8247 if (entry
->wired_count
!= 0) {
8248 assert(map
!= kernel_map
);
8250 * Cannot continue. Typical case is when
8251 * a user thread has physical io pending on
8252 * on this page. Either wait for the
8253 * kernel wiring to go away or return an
8256 if (flags
& VM_MAP_REMOVE_WAIT_FOR_KWIRE
) {
8257 wait_result_t wait_result
;
8259 assert(s
== entry
->vme_start
);
8260 entry
->needs_wakeup
= TRUE
;
8261 wait_result
= vm_map_entry_wait(map
,
8264 if (interruptible
&&
8265 wait_result
== THREAD_INTERRUPTED
) {
8267 * We do not clear the
8268 * needs_wakeup flag, since we
8269 * cannot tell if we were the
8272 return KERN_ABORTED
;
8276 * The entry could have been clipped or
8277 * it may not exist anymore. Look it
8280 if (!vm_map_lookup_entry(map
, s
,
8282 assert(map
!= kernel_map
);
8284 * User: use the next entry
8286 if (gap_start
== FIND_GAP
) {
8289 entry
= first_entry
->vme_next
;
8290 s
= entry
->vme_start
;
8292 entry
= first_entry
;
8293 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8295 last_timestamp
= map
->timestamp
;
8298 return KERN_FAILURE
;
8302 entry
->in_transition
= TRUE
;
8304 * copy current entry. see comment in vm_map_wire()
8307 assert(s
== entry
->vme_start
);
8310 * We can unlock the map now. The in_transition
8311 * state guarentees existance of the entry.
8315 if (tmp_entry
.is_sub_map
) {
8317 vm_map_offset_t sub_start
, sub_end
;
8319 vm_map_offset_t pmap_addr
;
8322 sub_map
= VME_SUBMAP(&tmp_entry
);
8323 sub_start
= VME_OFFSET(&tmp_entry
);
8324 sub_end
= sub_start
+ (tmp_entry
.vme_end
-
8325 tmp_entry
.vme_start
);
8326 if (tmp_entry
.use_pmap
) {
8327 pmap
= sub_map
->pmap
;
8328 pmap_addr
= tmp_entry
.vme_start
;
8331 pmap_addr
= tmp_entry
.vme_start
;
8333 (void) vm_map_unwire_nested(sub_map
,
8338 if (VME_OBJECT(&tmp_entry
) == kernel_object
) {
8339 pmap_protect_options(
8341 tmp_entry
.vme_start
,
8344 PMAP_OPTIONS_REMOVE
,
8347 vm_fault_unwire(map
, &tmp_entry
,
8348 VME_OBJECT(&tmp_entry
) == kernel_object
,
8349 map
->pmap
, tmp_entry
.vme_start
);
8354 if (last_timestamp
+ 1 != map
->timestamp
) {
8356 * Find the entry again. It could have
8357 * been clipped after we unlocked the map.
8359 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
8360 assert((map
!= kernel_map
) &&
8361 (!entry
->is_sub_map
));
8362 if (gap_start
== FIND_GAP
) {
8365 first_entry
= first_entry
->vme_next
;
8366 s
= first_entry
->vme_start
;
8368 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8371 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8372 first_entry
= entry
;
8375 last_timestamp
= map
->timestamp
;
8377 entry
= first_entry
;
8378 while ((entry
!= vm_map_to_entry(map
)) &&
8379 (entry
->vme_start
< tmp_entry
.vme_end
)) {
8380 assert(entry
->in_transition
);
8381 entry
->in_transition
= FALSE
;
8382 if (entry
->needs_wakeup
) {
8383 entry
->needs_wakeup
= FALSE
;
8386 entry
= entry
->vme_next
;
8389 * We have unwired the entry(s). Go back and
8392 entry
= first_entry
;
8396 /* entry is unwired */
8397 assert(entry
->wired_count
== 0);
8398 assert(entry
->user_wired_count
== 0);
8400 assert(s
== entry
->vme_start
);
8402 if (flags
& VM_MAP_REMOVE_NO_PMAP_CLEANUP
) {
8404 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8405 * vm_map_delete(), some map entries might have been
8406 * transferred to a "zap_map", which doesn't have a
8407 * pmap. The original pmap has already been flushed
8408 * in the vm_map_delete() call targeting the original
8409 * map, but when we get to destroying the "zap_map",
8410 * we don't have any pmap to flush, so let's just skip
8413 } else if (entry
->is_sub_map
) {
8414 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry
)) >= VM_MAP_PAGE_SHIFT(map
),
8415 "map %p (%d) entry %p submap %p (%d)\n",
8416 map
, VM_MAP_PAGE_SHIFT(map
), entry
,
8418 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry
)));
8419 if (entry
->use_pmap
) {
8420 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry
)) == VM_MAP_PAGE_SHIFT(map
),
8421 "map %p (%d) entry %p submap %p (%d)\n",
8422 map
, VM_MAP_PAGE_SHIFT(map
), entry
,
8424 VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry
)));
8425 #ifndef NO_NESTED_PMAP
8428 if (flags
& VM_MAP_REMOVE_NO_UNNESTING
) {
8430 * This is the final cleanup of the
8431 * address space being terminated.
8432 * No new mappings are expected and
8433 * we don't really need to unnest the
8434 * shared region (and lose the "global"
8435 * pmap mappings, if applicable).
8437 * Tell the pmap layer that we're
8438 * "clean" wrt nesting.
8440 pmap_flags
= PMAP_UNNEST_CLEAN
;
8443 * We're unmapping part of the nested
8444 * shared region, so we can't keep the
8449 pmap_unnest_options(
8451 (addr64_t
)entry
->vme_start
,
8452 entry
->vme_end
- entry
->vme_start
,
8454 #endif /* NO_NESTED_PMAP */
8455 if (map
->mapped_in_other_pmaps
&&
8456 os_ref_get_count(&map
->map_refcnt
) != 0) {
8457 /* clean up parent map/maps */
8458 vm_map_submap_pmap_clean(
8459 map
, entry
->vme_start
,
8465 vm_map_submap_pmap_clean(
8466 map
, entry
->vme_start
, entry
->vme_end
,
8470 } else if (VME_OBJECT(entry
) != kernel_object
&&
8471 VME_OBJECT(entry
) != compressor_object
) {
8472 object
= VME_OBJECT(entry
);
8473 if (map
->mapped_in_other_pmaps
&&
8474 os_ref_get_count(&map
->map_refcnt
) != 0) {
8475 vm_object_pmap_protect_options(
8476 object
, VME_OFFSET(entry
),
8477 entry
->vme_end
- entry
->vme_start
,
8482 PMAP_OPTIONS_REMOVE
);
8483 } else if ((VME_OBJECT(entry
) != VM_OBJECT_NULL
) ||
8484 (map
->pmap
== kernel_pmap
)) {
8485 /* Remove translations associated
8486 * with this range unless the entry
8487 * does not have an object, or
8488 * it's the kernel map or a descendant
8489 * since the platform could potentially
8490 * create "backdoor" mappings invisible
8491 * to the VM. It is expected that
8492 * objectless, non-kernel ranges
8493 * do not have such VM invisible
8496 pmap_remove_options(map
->pmap
,
8497 (addr64_t
)entry
->vme_start
,
8498 (addr64_t
)entry
->vme_end
,
8499 PMAP_OPTIONS_REMOVE
);
8503 if (entry
->iokit_acct
) {
8504 /* alternate accounting */
8505 DTRACE_VM4(vm_map_iokit_unmapped_region
,
8507 vm_map_offset_t
, entry
->vme_start
,
8508 vm_map_offset_t
, entry
->vme_end
,
8509 int, VME_ALIAS(entry
));
8510 vm_map_iokit_unmapped_region(map
,
8513 entry
->iokit_acct
= FALSE
;
8514 entry
->use_pmap
= FALSE
;
8518 * All pmap mappings for this map entry must have been
8522 assert(vm_map_pmap_is_empty(map
,
8527 next
= entry
->vme_next
;
8529 if (map
->pmap
== kernel_pmap
&&
8530 os_ref_get_count(&map
->map_refcnt
) != 0 &&
8531 entry
->vme_end
< end
&&
8532 (next
== vm_map_to_entry(map
) ||
8533 next
->vme_start
!= entry
->vme_end
)) {
8534 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8535 "hole after %p at 0x%llx\n",
8540 (uint64_t)entry
->vme_end
);
8544 * If the desired range didn't end with "entry", then there is a gap if
8545 * we wrapped around to the start of the map or if "entry" and "next"
8546 * aren't contiguous.
8548 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8549 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8551 if (gap_start
== FIND_GAP
&&
8552 vm_map_round_page(entry
->vme_end
, VM_MAP_PAGE_MASK(map
)) < end
&&
8553 (next
== vm_map_to_entry(map
) || entry
->vme_end
!= next
->vme_start
)) {
8554 gap_start
= entry
->vme_end
;
8556 s
= next
->vme_start
;
8557 last_timestamp
= map
->timestamp
;
8559 if (entry
->permanent
) {
8561 * A permanent entry can not be removed, so leave it
8562 * in place but remove all access permissions.
8564 entry
->protection
= VM_PROT_NONE
;
8565 entry
->max_protection
= VM_PROT_NONE
;
8566 } else if ((flags
& VM_MAP_REMOVE_SAVE_ENTRIES
) &&
8567 zap_map
!= VM_MAP_NULL
) {
8568 vm_map_size_t entry_size
;
8570 * The caller wants to save the affected VM map entries
8571 * into the "zap_map". The caller will take care of
8574 /* unlink the entry from "map" ... */
8575 vm_map_store_entry_unlink(map
, entry
);
8576 /* ... and add it to the end of the "zap_map" */
8577 vm_map_store_entry_link(zap_map
,
8578 vm_map_last_entry(zap_map
),
8580 VM_MAP_KERNEL_FLAGS_NONE
);
8581 entry_size
= entry
->vme_end
- entry
->vme_start
;
8582 map
->size
-= entry_size
;
8583 zap_map
->size
+= entry_size
;
8584 /* we didn't unlock the map, so no timestamp increase */
8587 vm_map_entry_delete(map
, entry
);
8588 /* vm_map_entry_delete unlocks the map */
8594 if (entry
== vm_map_to_entry(map
)) {
8597 if (last_timestamp
+ 1 != map
->timestamp
) {
8599 * We are responsible for deleting everything
8600 * from the given space. If someone has interfered,
8601 * we pick up where we left off. Back fills should
8602 * be all right for anyone, except map_delete, and
8603 * we have to assume that the task has been fully
8604 * disabled before we get here
8606 if (!vm_map_lookup_entry(map
, s
, &entry
)) {
8607 entry
= entry
->vme_next
;
8610 * Nothing found for s. If we weren't already done, then there is a gap.
8612 if (gap_start
== FIND_GAP
&& s
< end
) {
8615 s
= entry
->vme_start
;
8617 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8620 * others can not only allocate behind us, we can
8621 * also see coalesce while we don't have the map lock
8623 if (entry
== vm_map_to_entry(map
)) {
8627 last_timestamp
= map
->timestamp
;
8630 if (map
->wait_for_space
) {
8631 thread_wakeup((event_t
) map
);
8634 * wake up anybody waiting on entries that we have already deleted.
8637 vm_map_entry_wakeup(map
);
8640 if (gap_start
!= FIND_GAP
&& gap_start
!= GAPS_OK
) {
8641 DTRACE_VM3(kern_vm_deallocate_gap
,
8642 vm_map_offset_t
, gap_start
,
8643 vm_map_offset_t
, save_start
,
8644 vm_map_offset_t
, save_end
);
8645 if (!(flags
& VM_MAP_REMOVE_GAPS_OK
)) {
8646 vm_map_guard_exception(gap_start
, kGUARD_EXC_DEALLOC_GAP
);
8650 return KERN_SUCCESS
;
8657 * Clean out a task's map.
8664 map
->terminated
= TRUE
;
8667 return vm_map_remove(map
,
8673 * + remove immutable mappings
8674 * + allow gaps in range
8676 (VM_MAP_REMOVE_NO_UNNESTING
|
8677 VM_MAP_REMOVE_IMMUTABLE
|
8678 VM_MAP_REMOVE_GAPS_OK
));
8684 * Remove the given address range from the target map.
8685 * This is the exported form of vm_map_delete.
8690 vm_map_offset_t start
,
8691 vm_map_offset_t end
,
8694 kern_return_t result
;
8697 VM_MAP_RANGE_CHECK(map
, start
, end
);
8699 * For the zone maps, the kernel controls the allocation/freeing of memory.
8700 * Any free to the zone maps should be within the bounds of the map and
8701 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8702 * free to the zone maps into a no-op, there is a problem and we should
8705 if ((start
== end
) && zone_maps_owned(start
, 1)) {
8706 panic("Nothing being freed to a zone map. start = end = %p\n", (void *)start
);
8708 result
= vm_map_delete(map
, start
, end
, flags
, VM_MAP_NULL
);
8715 * vm_map_remove_locked:
8717 * Remove the given address range from the target locked map.
8718 * This is the exported form of vm_map_delete.
8721 vm_map_remove_locked(
8723 vm_map_offset_t start
,
8724 vm_map_offset_t end
,
8727 kern_return_t result
;
8729 VM_MAP_RANGE_CHECK(map
, start
, end
);
8730 result
= vm_map_delete(map
, start
, end
, flags
, VM_MAP_NULL
);
8736 * Routine: vm_map_copy_allocate
8739 * Allocates and initializes a map copy object.
8741 static vm_map_copy_t
8742 vm_map_copy_allocate(void)
8744 vm_map_copy_t new_copy
;
8746 new_copy
= zalloc(vm_map_copy_zone
);
8747 bzero(new_copy
, sizeof(*new_copy
));
8748 new_copy
->c_u
.hdr
.rb_head_store
.rbh_root
= (void*)(int)SKIP_RB_TREE
;
8749 vm_map_copy_first_entry(new_copy
) = vm_map_copy_to_entry(new_copy
);
8750 vm_map_copy_last_entry(new_copy
) = vm_map_copy_to_entry(new_copy
);
8755 * Routine: vm_map_copy_discard
8758 * Dispose of a map copy object (returned by
8762 vm_map_copy_discard(
8765 if (copy
== VM_MAP_COPY_NULL
) {
8769 switch (copy
->type
) {
8770 case VM_MAP_COPY_ENTRY_LIST
:
8771 while (vm_map_copy_first_entry(copy
) !=
8772 vm_map_copy_to_entry(copy
)) {
8773 vm_map_entry_t entry
= vm_map_copy_first_entry(copy
);
8775 vm_map_copy_entry_unlink(copy
, entry
);
8776 if (entry
->is_sub_map
) {
8777 vm_map_deallocate(VME_SUBMAP(entry
));
8779 vm_object_deallocate(VME_OBJECT(entry
));
8781 vm_map_copy_entry_dispose(copy
, entry
);
8784 case VM_MAP_COPY_OBJECT
:
8785 vm_object_deallocate(copy
->cpy_object
);
8787 case VM_MAP_COPY_KERNEL_BUFFER
:
8790 * The vm_map_copy_t and possibly the data buffer were
8791 * allocated by a single call to kheap_alloc(), i.e. the
8792 * vm_map_copy_t was not allocated out of the zone.
8794 if (copy
->size
> msg_ool_size_small
|| copy
->offset
) {
8795 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8796 (long long)copy
->size
, (long long)copy
->offset
);
8798 kheap_free(KHEAP_DATA_BUFFERS
, copy
->cpy_kdata
, copy
->size
);
8800 zfree(vm_map_copy_zone
, copy
);
8804 * Routine: vm_map_copy_copy
8807 * Move the information in a map copy object to
8808 * a new map copy object, leaving the old one
8811 * This is used by kernel routines that need
8812 * to look at out-of-line data (in copyin form)
8813 * before deciding whether to return SUCCESS.
8814 * If the routine returns FAILURE, the original
8815 * copy object will be deallocated; therefore,
8816 * these routines must make a copy of the copy
8817 * object and leave the original empty so that
8818 * deallocation will not fail.
8824 vm_map_copy_t new_copy
;
8826 if (copy
== VM_MAP_COPY_NULL
) {
8827 return VM_MAP_COPY_NULL
;
8831 * Allocate a new copy object, and copy the information
8832 * from the old one into it.
8835 new_copy
= (vm_map_copy_t
) zalloc(vm_map_copy_zone
);
8836 memcpy((void *) new_copy
, (void *) copy
, sizeof(struct vm_map_copy
));
8837 #if __has_feature(ptrauth_calls)
8838 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
8839 new_copy
->cpy_kdata
= copy
->cpy_kdata
;
8843 if (copy
->type
== VM_MAP_COPY_ENTRY_LIST
) {
8845 * The links in the entry chain must be
8846 * changed to point to the new copy object.
8848 vm_map_copy_first_entry(copy
)->vme_prev
8849 = vm_map_copy_to_entry(new_copy
);
8850 vm_map_copy_last_entry(copy
)->vme_next
8851 = vm_map_copy_to_entry(new_copy
);
8855 * Change the old copy object into one that contains
8856 * nothing to be deallocated.
8858 copy
->type
= VM_MAP_COPY_OBJECT
;
8859 copy
->cpy_object
= VM_OBJECT_NULL
;
8862 * Return the new object.
8867 static kern_return_t
8868 vm_map_overwrite_submap_recurse(
8870 vm_map_offset_t dst_addr
,
8871 vm_map_size_t dst_size
)
8873 vm_map_offset_t dst_end
;
8874 vm_map_entry_t tmp_entry
;
8875 vm_map_entry_t entry
;
8876 kern_return_t result
;
8877 boolean_t encountered_sub_map
= FALSE
;
8882 * Verify that the destination is all writeable
8883 * initially. We have to trunc the destination
8884 * address and round the copy size or we'll end up
8885 * splitting entries in strange ways.
8888 dst_end
= vm_map_round_page(dst_addr
+ dst_size
,
8889 VM_MAP_PAGE_MASK(dst_map
));
8890 vm_map_lock(dst_map
);
8893 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &tmp_entry
)) {
8894 vm_map_unlock(dst_map
);
8895 return KERN_INVALID_ADDRESS
;
8898 vm_map_clip_start(dst_map
,
8900 vm_map_trunc_page(dst_addr
,
8901 VM_MAP_PAGE_MASK(dst_map
)));
8902 if (tmp_entry
->is_sub_map
) {
8903 /* clipping did unnest if needed */
8904 assert(!tmp_entry
->use_pmap
);
8907 for (entry
= tmp_entry
;;) {
8908 vm_map_entry_t next
;
8910 next
= entry
->vme_next
;
8911 while (entry
->is_sub_map
) {
8912 vm_map_offset_t sub_start
;
8913 vm_map_offset_t sub_end
;
8914 vm_map_offset_t local_end
;
8916 if (entry
->in_transition
) {
8918 * Say that we are waiting, and wait for entry.
8920 entry
->needs_wakeup
= TRUE
;
8921 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8926 encountered_sub_map
= TRUE
;
8927 sub_start
= VME_OFFSET(entry
);
8929 if (entry
->vme_end
< dst_end
) {
8930 sub_end
= entry
->vme_end
;
8934 sub_end
-= entry
->vme_start
;
8935 sub_end
+= VME_OFFSET(entry
);
8936 local_end
= entry
->vme_end
;
8937 vm_map_unlock(dst_map
);
8939 result
= vm_map_overwrite_submap_recurse(
8942 sub_end
- sub_start
);
8944 if (result
!= KERN_SUCCESS
) {
8947 if (dst_end
<= entry
->vme_end
) {
8948 return KERN_SUCCESS
;
8950 vm_map_lock(dst_map
);
8951 if (!vm_map_lookup_entry(dst_map
, local_end
,
8953 vm_map_unlock(dst_map
);
8954 return KERN_INVALID_ADDRESS
;
8957 next
= entry
->vme_next
;
8960 if (!(entry
->protection
& VM_PROT_WRITE
)) {
8961 vm_map_unlock(dst_map
);
8962 return KERN_PROTECTION_FAILURE
;
8966 * If the entry is in transition, we must wait
8967 * for it to exit that state. Anything could happen
8968 * when we unlock the map, so start over.
8970 if (entry
->in_transition
) {
8972 * Say that we are waiting, and wait for entry.
8974 entry
->needs_wakeup
= TRUE
;
8975 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8981 * our range is contained completely within this map entry
8983 if (dst_end
<= entry
->vme_end
) {
8984 vm_map_unlock(dst_map
);
8985 return KERN_SUCCESS
;
8988 * check that range specified is contiguous region
8990 if ((next
== vm_map_to_entry(dst_map
)) ||
8991 (next
->vme_start
!= entry
->vme_end
)) {
8992 vm_map_unlock(dst_map
);
8993 return KERN_INVALID_ADDRESS
;
8997 * Check for permanent objects in the destination.
8999 if ((VME_OBJECT(entry
) != VM_OBJECT_NULL
) &&
9000 ((!VME_OBJECT(entry
)->internal
) ||
9001 (VME_OBJECT(entry
)->true_share
))) {
9002 if (encountered_sub_map
) {
9003 vm_map_unlock(dst_map
);
9004 return KERN_FAILURE
;
9011 vm_map_unlock(dst_map
);
9012 return KERN_SUCCESS
;
9016 * Routine: vm_map_copy_overwrite
9019 * Copy the memory described by the map copy
9020 * object (copy; returned by vm_map_copyin) onto
9021 * the specified destination region (dst_map, dst_addr).
9022 * The destination must be writeable.
9024 * Unlike vm_map_copyout, this routine actually
9025 * writes over previously-mapped memory. If the
9026 * previous mapping was to a permanent (user-supplied)
9027 * memory object, it is preserved.
9029 * The attributes (protection and inheritance) of the
9030 * destination region are preserved.
9032 * If successful, consumes the copy object.
9033 * Otherwise, the caller is responsible for it.
9035 * Implementation notes:
9036 * To overwrite aligned temporary virtual memory, it is
9037 * sufficient to remove the previous mapping and insert
9038 * the new copy. This replacement is done either on
9039 * the whole region (if no permanent virtual memory
9040 * objects are embedded in the destination region) or
9041 * in individual map entries.
9043 * To overwrite permanent virtual memory , it is necessary
9044 * to copy each page, as the external memory management
9045 * interface currently does not provide any optimizations.
9047 * Unaligned memory also has to be copied. It is possible
9048 * to use 'vm_trickery' to copy the aligned data. This is
9049 * not done but not hard to implement.
9051 * Once a page of permanent memory has been overwritten,
9052 * it is impossible to interrupt this function; otherwise,
9053 * the call would be neither atomic nor location-independent.
9054 * The kernel-state portion of a user thread must be
9057 * It may be expensive to forward all requests that might
9058 * overwrite permanent memory (vm_write, vm_copy) to
9059 * uninterruptible kernel threads. This routine may be
9060 * called by interruptible threads; however, success is
9061 * not guaranteed -- if the request cannot be performed
9062 * atomically and interruptibly, an error indication is
9066 static kern_return_t
9067 vm_map_copy_overwrite_nested(
9069 vm_map_address_t dst_addr
,
9071 boolean_t interruptible
,
9073 boolean_t discard_on_success
)
9075 vm_map_offset_t dst_end
;
9076 vm_map_entry_t tmp_entry
;
9077 vm_map_entry_t entry
;
9079 boolean_t aligned
= TRUE
;
9080 boolean_t contains_permanent_objects
= FALSE
;
9081 boolean_t encountered_sub_map
= FALSE
;
9082 vm_map_offset_t base_addr
;
9083 vm_map_size_t copy_size
;
9084 vm_map_size_t total_size
;
9085 int copy_page_shift
;
9089 * Check for null copy object.
9092 if (copy
== VM_MAP_COPY_NULL
) {
9093 return KERN_SUCCESS
;
9097 * Assert that the vm_map_copy is coming from the right
9098 * zone and hasn't been forged
9100 vm_map_copy_require(copy
);
9103 * Check for special kernel buffer allocated
9104 * by new_ipc_kmsg_copyin.
9107 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
9108 return vm_map_copyout_kernel_buffer(
9110 copy
, copy
->size
, TRUE
, discard_on_success
);
9114 * Only works for entry lists at the moment. Will
9115 * support page lists later.
9118 assert(copy
->type
== VM_MAP_COPY_ENTRY_LIST
);
9120 if (copy
->size
== 0) {
9121 if (discard_on_success
) {
9122 vm_map_copy_discard(copy
);
9124 return KERN_SUCCESS
;
9127 copy_page_shift
= copy
->cpy_hdr
.page_shift
;
9130 * Verify that the destination is all writeable
9131 * initially. We have to trunc the destination
9132 * address and round the copy size or we'll end up
9133 * splitting entries in strange ways.
9136 if (!VM_MAP_PAGE_ALIGNED(copy
->size
,
9137 VM_MAP_PAGE_MASK(dst_map
)) ||
9138 !VM_MAP_PAGE_ALIGNED(copy
->offset
,
9139 VM_MAP_PAGE_MASK(dst_map
)) ||
9140 !VM_MAP_PAGE_ALIGNED(dst_addr
,
9141 VM_MAP_PAGE_MASK(dst_map
)) ||
9142 copy_page_shift
!= VM_MAP_PAGE_SHIFT(dst_map
)) {
9144 dst_end
= vm_map_round_page(dst_addr
+ copy
->size
,
9145 VM_MAP_PAGE_MASK(dst_map
));
9147 dst_end
= dst_addr
+ copy
->size
;
9150 vm_map_lock(dst_map
);
9152 /* LP64todo - remove this check when vm_map_commpage64()
9153 * no longer has to stuff in a map_entry for the commpage
9154 * above the map's max_offset.
9156 if (dst_addr
>= dst_map
->max_offset
) {
9157 vm_map_unlock(dst_map
);
9158 return KERN_INVALID_ADDRESS
;
9162 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &tmp_entry
)) {
9163 vm_map_unlock(dst_map
);
9164 return KERN_INVALID_ADDRESS
;
9166 vm_map_clip_start(dst_map
,
9168 vm_map_trunc_page(dst_addr
,
9169 VM_MAP_PAGE_MASK(dst_map
)));
9170 for (entry
= tmp_entry
;;) {
9171 vm_map_entry_t next
= entry
->vme_next
;
9173 while (entry
->is_sub_map
) {
9174 vm_map_offset_t sub_start
;
9175 vm_map_offset_t sub_end
;
9176 vm_map_offset_t local_end
;
9178 if (entry
->in_transition
) {
9180 * Say that we are waiting, and wait for entry.
9182 entry
->needs_wakeup
= TRUE
;
9183 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
9188 local_end
= entry
->vme_end
;
9189 if (!(entry
->needs_copy
)) {
9190 /* if needs_copy we are a COW submap */
9191 /* in such a case we just replace so */
9192 /* there is no need for the follow- */
9194 encountered_sub_map
= TRUE
;
9195 sub_start
= VME_OFFSET(entry
);
9197 if (entry
->vme_end
< dst_end
) {
9198 sub_end
= entry
->vme_end
;
9202 sub_end
-= entry
->vme_start
;
9203 sub_end
+= VME_OFFSET(entry
);
9204 vm_map_unlock(dst_map
);
9206 kr
= vm_map_overwrite_submap_recurse(
9209 sub_end
- sub_start
);
9210 if (kr
!= KERN_SUCCESS
) {
9213 vm_map_lock(dst_map
);
9216 if (dst_end
<= entry
->vme_end
) {
9217 goto start_overwrite
;
9219 if (!vm_map_lookup_entry(dst_map
, local_end
,
9221 vm_map_unlock(dst_map
);
9222 return KERN_INVALID_ADDRESS
;
9224 next
= entry
->vme_next
;
9227 if (!(entry
->protection
& VM_PROT_WRITE
)) {
9228 vm_map_unlock(dst_map
);
9229 return KERN_PROTECTION_FAILURE
;
9233 * If the entry is in transition, we must wait
9234 * for it to exit that state. Anything could happen
9235 * when we unlock the map, so start over.
9237 if (entry
->in_transition
) {
9239 * Say that we are waiting, and wait for entry.
9241 entry
->needs_wakeup
= TRUE
;
9242 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
9248 * our range is contained completely within this map entry
9250 if (dst_end
<= entry
->vme_end
) {
9254 * check that range specified is contiguous region
9256 if ((next
== vm_map_to_entry(dst_map
)) ||
9257 (next
->vme_start
!= entry
->vme_end
)) {
9258 vm_map_unlock(dst_map
);
9259 return KERN_INVALID_ADDRESS
;
9264 * Check for permanent objects in the destination.
9266 if ((VME_OBJECT(entry
) != VM_OBJECT_NULL
) &&
9267 ((!VME_OBJECT(entry
)->internal
) ||
9268 (VME_OBJECT(entry
)->true_share
))) {
9269 contains_permanent_objects
= TRUE
;
9277 * If there are permanent objects in the destination, then
9278 * the copy cannot be interrupted.
9281 if (interruptible
&& contains_permanent_objects
) {
9282 vm_map_unlock(dst_map
);
9283 return KERN_FAILURE
; /* XXX */
9288 * Make a second pass, overwriting the data
9289 * At the beginning of each loop iteration,
9290 * the next entry to be overwritten is "tmp_entry"
9291 * (initially, the value returned from the lookup above),
9292 * and the starting address expected in that entry
9296 total_size
= copy
->size
;
9297 if (encountered_sub_map
) {
9299 /* re-calculate tmp_entry since we've had the map */
9301 if (!vm_map_lookup_entry( dst_map
, dst_addr
, &tmp_entry
)) {
9302 vm_map_unlock(dst_map
);
9303 return KERN_INVALID_ADDRESS
;
9306 copy_size
= copy
->size
;
9309 base_addr
= dst_addr
;
9311 /* deconstruct the copy object and do in parts */
9312 /* only in sub_map, interruptable case */
9313 vm_map_entry_t copy_entry
;
9314 vm_map_entry_t previous_prev
= VM_MAP_ENTRY_NULL
;
9315 vm_map_entry_t next_copy
= VM_MAP_ENTRY_NULL
;
9317 int remaining_entries
= 0;
9318 vm_map_offset_t new_offset
= 0;
9320 for (entry
= tmp_entry
; copy_size
== 0;) {
9321 vm_map_entry_t next
;
9323 next
= entry
->vme_next
;
9325 /* tmp_entry and base address are moved along */
9326 /* each time we encounter a sub-map. Otherwise */
9327 /* entry can outpase tmp_entry, and the copy_size */
9328 /* may reflect the distance between them */
9329 /* if the current entry is found to be in transition */
9330 /* we will start over at the beginning or the last */
9331 /* encounter of a submap as dictated by base_addr */
9332 /* we will zero copy_size accordingly. */
9333 if (entry
->in_transition
) {
9335 * Say that we are waiting, and wait for entry.
9337 entry
->needs_wakeup
= TRUE
;
9338 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
9340 if (!vm_map_lookup_entry(dst_map
, base_addr
,
9342 vm_map_unlock(dst_map
);
9343 return KERN_INVALID_ADDRESS
;
9349 if (entry
->is_sub_map
) {
9350 vm_map_offset_t sub_start
;
9351 vm_map_offset_t sub_end
;
9352 vm_map_offset_t local_end
;
9354 if (entry
->needs_copy
) {
9355 /* if this is a COW submap */
9356 /* just back the range with a */
9357 /* anonymous entry */
9358 if (entry
->vme_end
< dst_end
) {
9359 sub_end
= entry
->vme_end
;
9363 if (entry
->vme_start
< base_addr
) {
9364 sub_start
= base_addr
;
9366 sub_start
= entry
->vme_start
;
9369 dst_map
, entry
, sub_end
);
9371 dst_map
, entry
, sub_start
);
9372 assert(!entry
->use_pmap
);
9373 assert(!entry
->iokit_acct
);
9374 entry
->use_pmap
= TRUE
;
9375 entry
->is_sub_map
= FALSE
;
9378 VME_OBJECT_SET(entry
, VM_OBJECT_NULL
);
9379 VME_OFFSET_SET(entry
, 0);
9380 entry
->is_shared
= FALSE
;
9381 entry
->needs_copy
= FALSE
;
9382 entry
->protection
= VM_PROT_DEFAULT
;
9383 entry
->max_protection
= VM_PROT_ALL
;
9384 entry
->wired_count
= 0;
9385 entry
->user_wired_count
= 0;
9386 if (entry
->inheritance
9387 == VM_INHERIT_SHARE
) {
9388 entry
->inheritance
= VM_INHERIT_COPY
;
9392 /* first take care of any non-sub_map */
9393 /* entries to send */
9394 if (base_addr
< entry
->vme_start
) {
9397 entry
->vme_start
- base_addr
;
9400 sub_start
= VME_OFFSET(entry
);
9402 if (entry
->vme_end
< dst_end
) {
9403 sub_end
= entry
->vme_end
;
9407 sub_end
-= entry
->vme_start
;
9408 sub_end
+= VME_OFFSET(entry
);
9409 local_end
= entry
->vme_end
;
9410 vm_map_unlock(dst_map
);
9411 copy_size
= sub_end
- sub_start
;
9413 /* adjust the copy object */
9414 if (total_size
> copy_size
) {
9415 vm_map_size_t local_size
= 0;
9416 vm_map_size_t entry_size
;
9419 new_offset
= copy
->offset
;
9420 copy_entry
= vm_map_copy_first_entry(copy
);
9421 while (copy_entry
!=
9422 vm_map_copy_to_entry(copy
)) {
9423 entry_size
= copy_entry
->vme_end
-
9424 copy_entry
->vme_start
;
9425 if ((local_size
< copy_size
) &&
9426 ((local_size
+ entry_size
)
9428 vm_map_copy_clip_end(copy
,
9430 copy_entry
->vme_start
+
9431 (copy_size
- local_size
));
9432 entry_size
= copy_entry
->vme_end
-
9433 copy_entry
->vme_start
;
9434 local_size
+= entry_size
;
9435 new_offset
+= entry_size
;
9437 if (local_size
>= copy_size
) {
9438 next_copy
= copy_entry
->vme_next
;
9439 copy_entry
->vme_next
=
9440 vm_map_copy_to_entry(copy
);
9442 copy
->cpy_hdr
.links
.prev
;
9443 copy
->cpy_hdr
.links
.prev
= copy_entry
;
9444 copy
->size
= copy_size
;
9446 copy
->cpy_hdr
.nentries
;
9447 remaining_entries
-= nentries
;
9448 copy
->cpy_hdr
.nentries
= nentries
;
9451 local_size
+= entry_size
;
9452 new_offset
+= entry_size
;
9455 copy_entry
= copy_entry
->vme_next
;
9459 if ((entry
->use_pmap
) && (pmap
== NULL
)) {
9460 kr
= vm_map_copy_overwrite_nested(
9465 VME_SUBMAP(entry
)->pmap
,
9467 } else if (pmap
!= NULL
) {
9468 kr
= vm_map_copy_overwrite_nested(
9472 interruptible
, pmap
,
9475 kr
= vm_map_copy_overwrite_nested(
9483 if (kr
!= KERN_SUCCESS
) {
9484 if (next_copy
!= NULL
) {
9485 copy
->cpy_hdr
.nentries
+=
9487 copy
->cpy_hdr
.links
.prev
->vme_next
=
9489 copy
->cpy_hdr
.links
.prev
9491 copy
->size
= total_size
;
9495 if (dst_end
<= local_end
) {
9496 return KERN_SUCCESS
;
9498 /* otherwise copy no longer exists, it was */
9499 /* destroyed after successful copy_overwrite */
9500 copy
= vm_map_copy_allocate();
9501 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
9502 copy
->offset
= new_offset
;
9503 copy
->cpy_hdr
.page_shift
= copy_page_shift
;
9507 * this does not seem to deal with
9508 * the VM map store (R&B tree)
9511 total_size
-= copy_size
;
9513 /* put back remainder of copy in container */
9514 if (next_copy
!= NULL
) {
9515 copy
->cpy_hdr
.nentries
= remaining_entries
;
9516 copy
->cpy_hdr
.links
.next
= next_copy
;
9517 copy
->cpy_hdr
.links
.prev
= previous_prev
;
9518 copy
->size
= total_size
;
9519 next_copy
->vme_prev
=
9520 vm_map_copy_to_entry(copy
);
9523 base_addr
= local_end
;
9524 vm_map_lock(dst_map
);
9525 if (!vm_map_lookup_entry(dst_map
,
9526 local_end
, &tmp_entry
)) {
9527 vm_map_unlock(dst_map
);
9528 return KERN_INVALID_ADDRESS
;
9533 if (dst_end
<= entry
->vme_end
) {
9534 copy_size
= dst_end
- base_addr
;
9538 if ((next
== vm_map_to_entry(dst_map
)) ||
9539 (next
->vme_start
!= entry
->vme_end
)) {
9540 vm_map_unlock(dst_map
);
9541 return KERN_INVALID_ADDRESS
;
9550 /* adjust the copy object */
9551 if (total_size
> copy_size
) {
9552 vm_map_size_t local_size
= 0;
9553 vm_map_size_t entry_size
;
9555 new_offset
= copy
->offset
;
9556 copy_entry
= vm_map_copy_first_entry(copy
);
9557 while (copy_entry
!= vm_map_copy_to_entry(copy
)) {
9558 entry_size
= copy_entry
->vme_end
-
9559 copy_entry
->vme_start
;
9560 if ((local_size
< copy_size
) &&
9561 ((local_size
+ entry_size
)
9563 vm_map_copy_clip_end(copy
, copy_entry
,
9564 copy_entry
->vme_start
+
9565 (copy_size
- local_size
));
9566 entry_size
= copy_entry
->vme_end
-
9567 copy_entry
->vme_start
;
9568 local_size
+= entry_size
;
9569 new_offset
+= entry_size
;
9571 if (local_size
>= copy_size
) {
9572 next_copy
= copy_entry
->vme_next
;
9573 copy_entry
->vme_next
=
9574 vm_map_copy_to_entry(copy
);
9576 copy
->cpy_hdr
.links
.prev
;
9577 copy
->cpy_hdr
.links
.prev
= copy_entry
;
9578 copy
->size
= copy_size
;
9580 copy
->cpy_hdr
.nentries
;
9581 remaining_entries
-= nentries
;
9582 copy
->cpy_hdr
.nentries
= nentries
;
9585 local_size
+= entry_size
;
9586 new_offset
+= entry_size
;
9589 copy_entry
= copy_entry
->vme_next
;
9599 local_pmap
= dst_map
->pmap
;
9602 if ((kr
= vm_map_copy_overwrite_aligned(
9603 dst_map
, tmp_entry
, copy
,
9604 base_addr
, local_pmap
)) != KERN_SUCCESS
) {
9605 if (next_copy
!= NULL
) {
9606 copy
->cpy_hdr
.nentries
+=
9608 copy
->cpy_hdr
.links
.prev
->vme_next
=
9610 copy
->cpy_hdr
.links
.prev
=
9612 copy
->size
+= copy_size
;
9616 vm_map_unlock(dst_map
);
9621 * if the copy and dst address are misaligned but the same
9622 * offset within the page we can copy_not_aligned the
9623 * misaligned parts and copy aligned the rest. If they are
9624 * aligned but len is unaligned we simply need to copy
9625 * the end bit unaligned. We'll need to split the misaligned
9626 * bits of the region in this case !
9628 /* ALWAYS UNLOCKS THE dst_map MAP */
9629 kr
= vm_map_copy_overwrite_unaligned(
9634 discard_on_success
);
9635 if (kr
!= KERN_SUCCESS
) {
9636 if (next_copy
!= NULL
) {
9637 copy
->cpy_hdr
.nentries
+=
9639 copy
->cpy_hdr
.links
.prev
->vme_next
=
9641 copy
->cpy_hdr
.links
.prev
=
9643 copy
->size
+= copy_size
;
9648 total_size
-= copy_size
;
9649 if (total_size
== 0) {
9652 base_addr
+= copy_size
;
9654 copy
->offset
= new_offset
;
9655 if (next_copy
!= NULL
) {
9656 copy
->cpy_hdr
.nentries
= remaining_entries
;
9657 copy
->cpy_hdr
.links
.next
= next_copy
;
9658 copy
->cpy_hdr
.links
.prev
= previous_prev
;
9659 next_copy
->vme_prev
= vm_map_copy_to_entry(copy
);
9660 copy
->size
= total_size
;
9662 vm_map_lock(dst_map
);
9664 if (!vm_map_lookup_entry(dst_map
,
9665 base_addr
, &tmp_entry
)) {
9666 vm_map_unlock(dst_map
);
9667 return KERN_INVALID_ADDRESS
;
9669 if (tmp_entry
->in_transition
) {
9670 entry
->needs_wakeup
= TRUE
;
9671 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
9676 vm_map_clip_start(dst_map
,
9678 vm_map_trunc_page(base_addr
,
9679 VM_MAP_PAGE_MASK(dst_map
)));
9685 * Throw away the vm_map_copy object
9687 if (discard_on_success
) {
9688 vm_map_copy_discard(copy
);
9691 return KERN_SUCCESS
;
9692 }/* vm_map_copy_overwrite */
9695 vm_map_copy_overwrite(
9697 vm_map_offset_t dst_addr
,
9699 vm_map_size_t copy_size
,
9700 boolean_t interruptible
)
9702 vm_map_size_t head_size
, tail_size
;
9703 vm_map_copy_t head_copy
, tail_copy
;
9704 vm_map_offset_t head_addr
, tail_addr
;
9705 vm_map_entry_t entry
;
9707 vm_map_offset_t effective_page_mask
, effective_page_size
;
9708 int copy_page_shift
;
9717 if (interruptible
||
9718 copy
== VM_MAP_COPY_NULL
||
9719 copy
->type
!= VM_MAP_COPY_ENTRY_LIST
) {
9721 * We can't split the "copy" map if we're interruptible
9722 * or if we don't have a "copy" map...
9725 return vm_map_copy_overwrite_nested(dst_map
,
9733 copy_page_shift
= VM_MAP_COPY_PAGE_SHIFT(copy
);
9734 if (copy_page_shift
< PAGE_SHIFT
||
9735 VM_MAP_PAGE_SHIFT(dst_map
) < PAGE_SHIFT
) {
9739 if (VM_MAP_PAGE_SHIFT(dst_map
) < PAGE_SHIFT
) {
9740 effective_page_mask
= VM_MAP_PAGE_MASK(dst_map
);
9742 effective_page_mask
= MAX(VM_MAP_PAGE_MASK(dst_map
), PAGE_MASK
);
9743 effective_page_mask
= MAX(VM_MAP_COPY_PAGE_MASK(copy
),
9744 effective_page_mask
);
9746 effective_page_size
= effective_page_mask
+ 1;
9748 if (copy_size
< VM_MAP_COPY_OVERWRITE_OPTIMIZATION_THRESHOLD_PAGES
* effective_page_size
) {
9750 * Too small to bother with optimizing...
9755 if ((dst_addr
& effective_page_mask
) !=
9756 (copy
->offset
& effective_page_mask
)) {
9758 * Incompatible mis-alignment of source and destination...
9764 * Proper alignment or identical mis-alignment at the beginning.
9765 * Let's try and do a small unaligned copy first (if needed)
9766 * and then an aligned copy for the rest.
9768 if (!vm_map_page_aligned(dst_addr
, effective_page_mask
)) {
9769 head_addr
= dst_addr
;
9770 head_size
= (effective_page_size
-
9771 (copy
->offset
& effective_page_mask
));
9772 head_size
= MIN(head_size
, copy_size
);
9774 if (!vm_map_page_aligned(copy
->offset
+ copy_size
,
9775 effective_page_mask
)) {
9777 * Mis-alignment at the end.
9778 * Do an aligned copy up to the last page and
9779 * then an unaligned copy for the remaining bytes.
9781 tail_size
= ((copy
->offset
+ copy_size
) &
9782 effective_page_mask
);
9783 tail_size
= MIN(tail_size
, copy_size
);
9784 tail_addr
= dst_addr
+ copy_size
- tail_size
;
9785 assert(tail_addr
>= head_addr
+ head_size
);
9787 assert(head_size
+ tail_size
<= copy_size
);
9789 if (head_size
+ tail_size
== copy_size
) {
9791 * It's all unaligned, no optimization possible...
9797 * Can't optimize if there are any submaps in the
9798 * destination due to the way we free the "copy" map
9799 * progressively in vm_map_copy_overwrite_nested()
9802 vm_map_lock_read(dst_map
);
9803 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &entry
)) {
9804 vm_map_unlock_read(dst_map
);
9808 (entry
!= vm_map_copy_to_entry(copy
) &&
9809 entry
->vme_start
< dst_addr
+ copy_size
);
9810 entry
= entry
->vme_next
) {
9811 if (entry
->is_sub_map
) {
9812 vm_map_unlock_read(dst_map
);
9816 vm_map_unlock_read(dst_map
);
9820 * Unaligned copy of the first "head_size" bytes, to reach
9825 * Extract "head_copy" out of "copy".
9827 head_copy
= vm_map_copy_allocate();
9828 head_copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
9829 head_copy
->cpy_hdr
.entries_pageable
=
9830 copy
->cpy_hdr
.entries_pageable
;
9831 vm_map_store_init(&head_copy
->cpy_hdr
);
9832 head_copy
->cpy_hdr
.page_shift
= copy_page_shift
;
9834 entry
= vm_map_copy_first_entry(copy
);
9835 if (entry
->vme_end
< copy
->offset
+ head_size
) {
9836 head_size
= entry
->vme_end
- copy
->offset
;
9839 head_copy
->offset
= copy
->offset
;
9840 head_copy
->size
= head_size
;
9841 copy
->offset
+= head_size
;
9842 copy
->size
-= head_size
;
9843 copy_size
-= head_size
;
9844 assert(copy_size
> 0);
9846 vm_map_copy_clip_end(copy
, entry
, copy
->offset
);
9847 vm_map_copy_entry_unlink(copy
, entry
);
9848 vm_map_copy_entry_link(head_copy
,
9849 vm_map_copy_to_entry(head_copy
),
9853 * Do the unaligned copy.
9855 kr
= vm_map_copy_overwrite_nested(dst_map
,
9861 if (kr
!= KERN_SUCCESS
) {
9868 * Extract "tail_copy" out of "copy".
9870 tail_copy
= vm_map_copy_allocate();
9871 tail_copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
9872 tail_copy
->cpy_hdr
.entries_pageable
=
9873 copy
->cpy_hdr
.entries_pageable
;
9874 vm_map_store_init(&tail_copy
->cpy_hdr
);
9875 tail_copy
->cpy_hdr
.page_shift
= copy_page_shift
;
9877 tail_copy
->offset
= copy
->offset
+ copy_size
- tail_size
;
9878 tail_copy
->size
= tail_size
;
9880 copy
->size
-= tail_size
;
9881 copy_size
-= tail_size
;
9882 assert(copy_size
> 0);
9884 entry
= vm_map_copy_last_entry(copy
);
9885 vm_map_copy_clip_start(copy
, entry
, tail_copy
->offset
);
9886 entry
= vm_map_copy_last_entry(copy
);
9887 vm_map_copy_entry_unlink(copy
, entry
);
9888 vm_map_copy_entry_link(tail_copy
,
9889 vm_map_copy_last_entry(tail_copy
),
9894 * If we are here from ipc_kmsg_copyout_ool_descriptor(),
9895 * we want to avoid TOCTOU issues w.r.t copy->size but
9896 * we don't need to change vm_map_copy_overwrite_nested()
9897 * and all other vm_map_copy_overwrite variants.
9899 * So we assign the original copy_size that was passed into
9900 * this routine back to copy.
9902 * This use of local 'copy_size' passed into this routine is
9903 * to try and protect against TOCTOU attacks where the kernel
9904 * has been exploited. We don't expect this to be an issue
9905 * during normal system operation.
9907 assertf(copy
->size
== copy_size
,
9908 "Mismatch of copy sizes. Expected 0x%llx, Got 0x%llx\n", (uint64_t) copy_size
, (uint64_t) copy
->size
);
9909 copy
->size
= copy_size
;
9912 * Copy most (or possibly all) of the data.
9914 kr
= vm_map_copy_overwrite_nested(dst_map
,
9915 dst_addr
+ head_size
,
9920 if (kr
!= KERN_SUCCESS
) {
9925 kr
= vm_map_copy_overwrite_nested(dst_map
,
9934 assert(copy
->type
== VM_MAP_COPY_ENTRY_LIST
);
9935 if (kr
== KERN_SUCCESS
) {
9937 * Discard all the copy maps.
9940 vm_map_copy_discard(head_copy
);
9943 vm_map_copy_discard(copy
);
9945 vm_map_copy_discard(tail_copy
);
9950 * Re-assemble the original copy map.
9953 entry
= vm_map_copy_first_entry(head_copy
);
9954 vm_map_copy_entry_unlink(head_copy
, entry
);
9955 vm_map_copy_entry_link(copy
,
9956 vm_map_copy_to_entry(copy
),
9958 copy
->offset
-= head_size
;
9959 copy
->size
+= head_size
;
9960 vm_map_copy_discard(head_copy
);
9964 entry
= vm_map_copy_last_entry(tail_copy
);
9965 vm_map_copy_entry_unlink(tail_copy
, entry
);
9966 vm_map_copy_entry_link(copy
,
9967 vm_map_copy_last_entry(copy
),
9969 copy
->size
+= tail_size
;
9970 vm_map_copy_discard(tail_copy
);
9979 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
9982 * Physically copy unaligned data
9985 * Unaligned parts of pages have to be physically copied. We use
9986 * a modified form of vm_fault_copy (which understands none-aligned
9987 * page offsets and sizes) to do the copy. We attempt to copy as
9988 * much memory in one go as possibly, however vm_fault_copy copies
9989 * within 1 memory object so we have to find the smaller of "amount left"
9990 * "source object data size" and "target object data size". With
9991 * unaligned data we don't need to split regions, therefore the source
9992 * (copy) object should be one map entry, the target range may be split
9993 * over multiple map entries however. In any event we are pessimistic
9994 * about these assumptions.
9997 * dst_map is locked on entry and is return locked on success,
9998 * unlocked on error.
10001 static kern_return_t
10002 vm_map_copy_overwrite_unaligned(
10004 vm_map_entry_t entry
,
10005 vm_map_copy_t copy
,
10006 vm_map_offset_t start
,
10007 boolean_t discard_on_success
)
10009 vm_map_entry_t copy_entry
;
10010 vm_map_entry_t copy_entry_next
;
10011 vm_map_version_t version
;
10012 vm_object_t dst_object
;
10013 vm_object_offset_t dst_offset
;
10014 vm_object_offset_t src_offset
;
10015 vm_object_offset_t entry_offset
;
10016 vm_map_offset_t entry_end
;
10017 vm_map_size_t src_size
,
10021 kern_return_t kr
= KERN_SUCCESS
;
10024 copy_entry
= vm_map_copy_first_entry(copy
);
10026 vm_map_lock_write_to_read(dst_map
);
10028 src_offset
= copy
->offset
- trunc_page_mask_64(copy
->offset
, VM_MAP_COPY_PAGE_MASK(copy
));
10029 amount_left
= copy
->size
;
10031 * unaligned so we never clipped this entry, we need the offset into
10032 * the vm_object not just the data.
10034 while (amount_left
> 0) {
10035 if (entry
== vm_map_to_entry(dst_map
)) {
10036 vm_map_unlock_read(dst_map
);
10037 return KERN_INVALID_ADDRESS
;
10040 /* "start" must be within the current map entry */
10041 assert((start
>= entry
->vme_start
) && (start
< entry
->vme_end
));
10043 dst_offset
= start
- entry
->vme_start
;
10045 dst_size
= entry
->vme_end
- start
;
10047 src_size
= copy_entry
->vme_end
-
10048 (copy_entry
->vme_start
+ src_offset
);
10050 if (dst_size
< src_size
) {
10052 * we can only copy dst_size bytes before
10053 * we have to get the next destination entry
10055 copy_size
= dst_size
;
10058 * we can only copy src_size bytes before
10059 * we have to get the next source copy entry
10061 copy_size
= src_size
;
10064 if (copy_size
> amount_left
) {
10065 copy_size
= amount_left
;
10068 * Entry needs copy, create a shadow shadow object for
10069 * Copy on write region.
10071 if (entry
->needs_copy
&&
10072 ((entry
->protection
& VM_PROT_WRITE
) != 0)) {
10073 if (vm_map_lock_read_to_write(dst_map
)) {
10074 vm_map_lock_read(dst_map
);
10077 VME_OBJECT_SHADOW(entry
,
10078 (vm_map_size_t
)(entry
->vme_end
10079 - entry
->vme_start
));
10080 entry
->needs_copy
= FALSE
;
10081 vm_map_lock_write_to_read(dst_map
);
10083 dst_object
= VME_OBJECT(entry
);
10085 * unlike with the virtual (aligned) copy we're going
10086 * to fault on it therefore we need a target object.
10088 if (dst_object
== VM_OBJECT_NULL
) {
10089 if (vm_map_lock_read_to_write(dst_map
)) {
10090 vm_map_lock_read(dst_map
);
10093 dst_object
= vm_object_allocate((vm_map_size_t
)
10094 entry
->vme_end
- entry
->vme_start
);
10095 VME_OBJECT_SET(entry
, dst_object
);
10096 VME_OFFSET_SET(entry
, 0);
10097 assert(entry
->use_pmap
);
10098 vm_map_lock_write_to_read(dst_map
);
10101 * Take an object reference and unlock map. The "entry" may
10102 * disappear or change when the map is unlocked.
10104 vm_object_reference(dst_object
);
10105 version
.main_timestamp
= dst_map
->timestamp
;
10106 entry_offset
= VME_OFFSET(entry
);
10107 entry_end
= entry
->vme_end
;
10108 vm_map_unlock_read(dst_map
);
10110 * Copy as much as possible in one pass
10112 kr
= vm_fault_copy(
10113 VME_OBJECT(copy_entry
),
10114 VME_OFFSET(copy_entry
) + src_offset
,
10117 entry_offset
+ dst_offset
,
10122 start
+= copy_size
;
10123 src_offset
+= copy_size
;
10124 amount_left
-= copy_size
;
10126 * Release the object reference
10128 vm_object_deallocate(dst_object
);
10130 * If a hard error occurred, return it now
10132 if (kr
!= KERN_SUCCESS
) {
10136 if ((copy_entry
->vme_start
+ src_offset
) == copy_entry
->vme_end
10137 || amount_left
== 0) {
10139 * all done with this copy entry, dispose.
10141 copy_entry_next
= copy_entry
->vme_next
;
10143 if (discard_on_success
) {
10144 vm_map_copy_entry_unlink(copy
, copy_entry
);
10145 assert(!copy_entry
->is_sub_map
);
10146 vm_object_deallocate(VME_OBJECT(copy_entry
));
10147 vm_map_copy_entry_dispose(copy
, copy_entry
);
10150 if (copy_entry_next
== vm_map_copy_to_entry(copy
) &&
10153 * not finished copying but run out of source
10155 return KERN_INVALID_ADDRESS
;
10158 copy_entry
= copy_entry_next
;
10163 if (amount_left
== 0) {
10164 return KERN_SUCCESS
;
10167 vm_map_lock_read(dst_map
);
10168 if (version
.main_timestamp
== dst_map
->timestamp
) {
10169 if (start
== entry_end
) {
10171 * destination region is split. Use the version
10172 * information to avoid a lookup in the normal
10175 entry
= entry
->vme_next
;
10177 * should be contiguous. Fail if we encounter
10178 * a hole in the destination.
10180 if (start
!= entry
->vme_start
) {
10181 vm_map_unlock_read(dst_map
);
10182 return KERN_INVALID_ADDRESS
;
10187 * Map version check failed.
10188 * we must lookup the entry because somebody
10189 * might have changed the map behind our backs.
10192 if (!vm_map_lookup_entry(dst_map
, start
, &entry
)) {
10193 vm_map_unlock_read(dst_map
);
10194 return KERN_INVALID_ADDRESS
;
10199 return KERN_SUCCESS
;
10200 }/* vm_map_copy_overwrite_unaligned */
10203 * Routine: vm_map_copy_overwrite_aligned [internal use only]
10206 * Does all the vm_trickery possible for whole pages.
10210 * If there are no permanent objects in the destination,
10211 * and the source and destination map entry zones match,
10212 * and the destination map entry is not shared,
10213 * then the map entries can be deleted and replaced
10214 * with those from the copy. The following code is the
10215 * basic idea of what to do, but there are lots of annoying
10216 * little details about getting protection and inheritance
10217 * right. Should add protection, inheritance, and sharing checks
10218 * to the above pass and make sure that no wiring is involved.
10221 int vm_map_copy_overwrite_aligned_src_not_internal
= 0;
10222 int vm_map_copy_overwrite_aligned_src_not_symmetric
= 0;
10223 int vm_map_copy_overwrite_aligned_src_large
= 0;
10225 static kern_return_t
10226 vm_map_copy_overwrite_aligned(
10228 vm_map_entry_t tmp_entry
,
10229 vm_map_copy_t copy
,
10230 vm_map_offset_t start
,
10231 __unused pmap_t pmap
)
10233 vm_object_t object
;
10234 vm_map_entry_t copy_entry
;
10235 vm_map_size_t copy_size
;
10236 vm_map_size_t size
;
10237 vm_map_entry_t entry
;
10239 while ((copy_entry
= vm_map_copy_first_entry(copy
))
10240 != vm_map_copy_to_entry(copy
)) {
10241 copy_size
= (copy_entry
->vme_end
- copy_entry
->vme_start
);
10244 if (entry
->is_sub_map
) {
10245 /* unnested when clipped earlier */
10246 assert(!entry
->use_pmap
);
10248 if (entry
== vm_map_to_entry(dst_map
)) {
10249 vm_map_unlock(dst_map
);
10250 return KERN_INVALID_ADDRESS
;
10252 size
= (entry
->vme_end
- entry
->vme_start
);
10254 * Make sure that no holes popped up in the
10255 * address map, and that the protection is
10256 * still valid, in case the map was unlocked
10260 if ((entry
->vme_start
!= start
) || ((entry
->is_sub_map
)
10261 && !entry
->needs_copy
)) {
10262 vm_map_unlock(dst_map
);
10263 return KERN_INVALID_ADDRESS
;
10265 assert(entry
!= vm_map_to_entry(dst_map
));
10268 * Check protection again
10271 if (!(entry
->protection
& VM_PROT_WRITE
)) {
10272 vm_map_unlock(dst_map
);
10273 return KERN_PROTECTION_FAILURE
;
10277 * Adjust to source size first
10280 if (copy_size
< size
) {
10281 if (entry
->map_aligned
&&
10282 !VM_MAP_PAGE_ALIGNED(entry
->vme_start
+ copy_size
,
10283 VM_MAP_PAGE_MASK(dst_map
))) {
10284 /* no longer map-aligned */
10285 entry
->map_aligned
= FALSE
;
10287 vm_map_clip_end(dst_map
, entry
, entry
->vme_start
+ copy_size
);
10292 * Adjust to destination size
10295 if (size
< copy_size
) {
10296 vm_map_copy_clip_end(copy
, copy_entry
,
10297 copy_entry
->vme_start
+ size
);
10301 assert((entry
->vme_end
- entry
->vme_start
) == size
);
10302 assert((tmp_entry
->vme_end
- tmp_entry
->vme_start
) == size
);
10303 assert((copy_entry
->vme_end
- copy_entry
->vme_start
) == size
);
10306 * If the destination contains temporary unshared memory,
10307 * we can perform the copy by throwing it away and
10308 * installing the source data.
10311 object
= VME_OBJECT(entry
);
10312 if ((!entry
->is_shared
&&
10313 ((object
== VM_OBJECT_NULL
) ||
10314 (object
->internal
&& !object
->true_share
))) ||
10315 entry
->needs_copy
) {
10316 vm_object_t old_object
= VME_OBJECT(entry
);
10317 vm_object_offset_t old_offset
= VME_OFFSET(entry
);
10318 vm_object_offset_t offset
;
10321 * Ensure that the source and destination aren't
10324 if (old_object
== VME_OBJECT(copy_entry
) &&
10325 old_offset
== VME_OFFSET(copy_entry
)) {
10326 vm_map_copy_entry_unlink(copy
, copy_entry
);
10327 vm_map_copy_entry_dispose(copy
, copy_entry
);
10329 if (old_object
!= VM_OBJECT_NULL
) {
10330 vm_object_deallocate(old_object
);
10333 start
= tmp_entry
->vme_end
;
10334 tmp_entry
= tmp_entry
->vme_next
;
10338 #if XNU_TARGET_OS_OSX
10339 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
10340 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
10341 if (VME_OBJECT(copy_entry
) != VM_OBJECT_NULL
&&
10342 VME_OBJECT(copy_entry
)->vo_size
>= __TRADEOFF1_OBJ_SIZE
&&
10343 copy_size
<= __TRADEOFF1_COPY_SIZE
) {
10345 * Virtual vs. Physical copy tradeoff #1.
10347 * Copying only a few pages out of a large
10348 * object: do a physical copy instead of
10349 * a virtual copy, to avoid possibly keeping
10350 * the entire large object alive because of
10351 * those few copy-on-write pages.
10353 vm_map_copy_overwrite_aligned_src_large
++;
10356 #endif /* XNU_TARGET_OS_OSX */
10358 if ((dst_map
->pmap
!= kernel_pmap
) &&
10359 (VME_ALIAS(entry
) >= VM_MEMORY_MALLOC
) &&
10360 (VME_ALIAS(entry
) <= VM_MEMORY_MALLOC_MEDIUM
)) {
10361 vm_object_t new_object
, new_shadow
;
10364 * We're about to map something over a mapping
10365 * established by malloc()...
10367 new_object
= VME_OBJECT(copy_entry
);
10368 if (new_object
!= VM_OBJECT_NULL
) {
10369 vm_object_lock_shared(new_object
);
10371 while (new_object
!= VM_OBJECT_NULL
&&
10372 #if XNU_TARGET_OS_OSX
10373 !new_object
->true_share
&&
10374 new_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
&&
10375 #endif /* XNU_TARGET_OS_OSX */
10376 new_object
->internal
) {
10377 new_shadow
= new_object
->shadow
;
10378 if (new_shadow
== VM_OBJECT_NULL
) {
10381 vm_object_lock_shared(new_shadow
);
10382 vm_object_unlock(new_object
);
10383 new_object
= new_shadow
;
10385 if (new_object
!= VM_OBJECT_NULL
) {
10386 if (!new_object
->internal
) {
10388 * The new mapping is backed
10389 * by an external object. We
10390 * don't want malloc'ed memory
10391 * to be replaced with such a
10392 * non-anonymous mapping, so
10393 * let's go off the optimized
10396 vm_map_copy_overwrite_aligned_src_not_internal
++;
10397 vm_object_unlock(new_object
);
10400 #if XNU_TARGET_OS_OSX
10401 if (new_object
->true_share
||
10402 new_object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) {
10404 * Same if there's a "true_share"
10405 * object in the shadow chain, or
10406 * an object with a non-default
10407 * (SYMMETRIC) copy strategy.
10409 vm_map_copy_overwrite_aligned_src_not_symmetric
++;
10410 vm_object_unlock(new_object
);
10413 #endif /* XNU_TARGET_OS_OSX */
10414 vm_object_unlock(new_object
);
10417 * The new mapping is still backed by
10418 * anonymous (internal) memory, so it's
10419 * OK to substitute it for the original
10420 * malloc() mapping.
10424 if (old_object
!= VM_OBJECT_NULL
) {
10425 if (entry
->is_sub_map
) {
10426 if (entry
->use_pmap
) {
10427 #ifndef NO_NESTED_PMAP
10428 pmap_unnest(dst_map
->pmap
,
10429 (addr64_t
)entry
->vme_start
,
10430 entry
->vme_end
- entry
->vme_start
);
10431 #endif /* NO_NESTED_PMAP */
10432 if (dst_map
->mapped_in_other_pmaps
) {
10433 /* clean up parent */
10435 vm_map_submap_pmap_clean(
10436 dst_map
, entry
->vme_start
,
10439 VME_OFFSET(entry
));
10442 vm_map_submap_pmap_clean(
10443 dst_map
, entry
->vme_start
,
10446 VME_OFFSET(entry
));
10448 vm_map_deallocate(VME_SUBMAP(entry
));
10450 if (dst_map
->mapped_in_other_pmaps
) {
10451 vm_object_pmap_protect_options(
10455 - entry
->vme_start
,
10460 PMAP_OPTIONS_REMOVE
);
10462 pmap_remove_options(
10464 (addr64_t
)(entry
->vme_start
),
10465 (addr64_t
)(entry
->vme_end
),
10466 PMAP_OPTIONS_REMOVE
);
10468 vm_object_deallocate(old_object
);
10472 if (entry
->iokit_acct
) {
10473 /* keep using iokit accounting */
10474 entry
->use_pmap
= FALSE
;
10476 /* use pmap accounting */
10477 entry
->use_pmap
= TRUE
;
10479 entry
->is_sub_map
= FALSE
;
10480 VME_OBJECT_SET(entry
, VME_OBJECT(copy_entry
));
10481 object
= VME_OBJECT(entry
);
10482 entry
->needs_copy
= copy_entry
->needs_copy
;
10483 entry
->wired_count
= 0;
10484 entry
->user_wired_count
= 0;
10485 offset
= VME_OFFSET(copy_entry
);
10486 VME_OFFSET_SET(entry
, offset
);
10488 vm_map_copy_entry_unlink(copy
, copy_entry
);
10489 vm_map_copy_entry_dispose(copy
, copy_entry
);
10492 * we could try to push pages into the pmap at this point, BUT
10493 * this optimization only saved on average 2 us per page if ALL
10494 * the pages in the source were currently mapped
10495 * and ALL the pages in the dest were touched, if there were fewer
10496 * than 2/3 of the pages touched, this optimization actually cost more cycles
10497 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10501 * Set up for the next iteration. The map
10502 * has not been unlocked, so the next
10503 * address should be at the end of this
10504 * entry, and the next map entry should be
10505 * the one following it.
10508 start
= tmp_entry
->vme_end
;
10509 tmp_entry
= tmp_entry
->vme_next
;
10511 vm_map_version_t version
;
10512 vm_object_t dst_object
;
10513 vm_object_offset_t dst_offset
;
10517 if (entry
->needs_copy
) {
10518 VME_OBJECT_SHADOW(entry
,
10520 entry
->vme_start
));
10521 entry
->needs_copy
= FALSE
;
10524 dst_object
= VME_OBJECT(entry
);
10525 dst_offset
= VME_OFFSET(entry
);
10528 * Take an object reference, and record
10529 * the map version information so that the
10530 * map can be safely unlocked.
10533 if (dst_object
== VM_OBJECT_NULL
) {
10535 * We would usually have just taken the
10536 * optimized path above if the destination
10537 * object has not been allocated yet. But we
10538 * now disable that optimization if the copy
10539 * entry's object is not backed by anonymous
10540 * memory to avoid replacing malloc'ed
10541 * (i.e. re-usable) anonymous memory with a
10542 * not-so-anonymous mapping.
10543 * So we have to handle this case here and
10544 * allocate a new VM object for this map entry.
10546 dst_object
= vm_object_allocate(
10547 entry
->vme_end
- entry
->vme_start
);
10549 VME_OBJECT_SET(entry
, dst_object
);
10550 VME_OFFSET_SET(entry
, dst_offset
);
10551 assert(entry
->use_pmap
);
10554 vm_object_reference(dst_object
);
10556 /* account for unlock bumping up timestamp */
10557 version
.main_timestamp
= dst_map
->timestamp
+ 1;
10559 vm_map_unlock(dst_map
);
10562 * Copy as much as possible in one pass
10567 VME_OBJECT(copy_entry
),
10568 VME_OFFSET(copy_entry
),
10577 * Release the object reference
10580 vm_object_deallocate(dst_object
);
10583 * If a hard error occurred, return it now
10586 if (r
!= KERN_SUCCESS
) {
10590 if (copy_size
!= 0) {
10592 * Dispose of the copied region
10595 vm_map_copy_clip_end(copy
, copy_entry
,
10596 copy_entry
->vme_start
+ copy_size
);
10597 vm_map_copy_entry_unlink(copy
, copy_entry
);
10598 vm_object_deallocate(VME_OBJECT(copy_entry
));
10599 vm_map_copy_entry_dispose(copy
, copy_entry
);
10603 * Pick up in the destination map where we left off.
10605 * Use the version information to avoid a lookup
10606 * in the normal case.
10609 start
+= copy_size
;
10610 vm_map_lock(dst_map
);
10611 if (version
.main_timestamp
== dst_map
->timestamp
&&
10613 /* We can safely use saved tmp_entry value */
10615 if (tmp_entry
->map_aligned
&&
10616 !VM_MAP_PAGE_ALIGNED(
10618 VM_MAP_PAGE_MASK(dst_map
))) {
10619 /* no longer map-aligned */
10620 tmp_entry
->map_aligned
= FALSE
;
10622 vm_map_clip_end(dst_map
, tmp_entry
, start
);
10623 tmp_entry
= tmp_entry
->vme_next
;
10625 /* Must do lookup of tmp_entry */
10627 if (!vm_map_lookup_entry(dst_map
, start
, &tmp_entry
)) {
10628 vm_map_unlock(dst_map
);
10629 return KERN_INVALID_ADDRESS
;
10631 if (tmp_entry
->map_aligned
&&
10632 !VM_MAP_PAGE_ALIGNED(
10634 VM_MAP_PAGE_MASK(dst_map
))) {
10635 /* no longer map-aligned */
10636 tmp_entry
->map_aligned
= FALSE
;
10638 vm_map_clip_start(dst_map
, tmp_entry
, start
);
10643 return KERN_SUCCESS
;
10644 }/* vm_map_copy_overwrite_aligned */
10647 * Routine: vm_map_copyin_kernel_buffer [internal use only]
10650 * Copy in data to a kernel buffer from space in the
10651 * source map. The original space may be optionally
10654 * If successful, returns a new copy object.
10656 static kern_return_t
10657 vm_map_copyin_kernel_buffer(
10659 vm_map_offset_t src_addr
,
10661 boolean_t src_destroy
,
10662 vm_map_copy_t
*copy_result
)
10665 vm_map_copy_t copy
;
10667 if (len
> msg_ool_size_small
) {
10668 return KERN_INVALID_ARGUMENT
;
10671 copy
= zalloc_flags(vm_map_copy_zone
, Z_WAITOK
| Z_ZERO
);
10672 if (copy
== VM_MAP_COPY_NULL
) {
10673 return KERN_RESOURCE_SHORTAGE
;
10675 copy
->cpy_kdata
= kheap_alloc(KHEAP_DATA_BUFFERS
, len
, Z_WAITOK
);
10676 if (copy
->cpy_kdata
== NULL
) {
10677 zfree(vm_map_copy_zone
, copy
);
10678 return KERN_RESOURCE_SHORTAGE
;
10681 copy
->type
= VM_MAP_COPY_KERNEL_BUFFER
;
10685 kr
= copyinmap(src_map
, src_addr
, copy
->cpy_kdata
, (vm_size_t
)len
);
10686 if (kr
!= KERN_SUCCESS
) {
10687 kheap_free(KHEAP_DATA_BUFFERS
, copy
->cpy_kdata
, len
);
10688 zfree(vm_map_copy_zone
, copy
);
10692 (void) vm_map_remove(
10694 vm_map_trunc_page(src_addr
,
10695 VM_MAP_PAGE_MASK(src_map
)),
10696 vm_map_round_page(src_addr
+ len
,
10697 VM_MAP_PAGE_MASK(src_map
)),
10698 (VM_MAP_REMOVE_INTERRUPTIBLE
|
10699 VM_MAP_REMOVE_WAIT_FOR_KWIRE
|
10700 ((src_map
== kernel_map
) ? VM_MAP_REMOVE_KUNWIRE
: VM_MAP_REMOVE_NO_FLAGS
)));
10702 *copy_result
= copy
;
10703 return KERN_SUCCESS
;
10707 * Routine: vm_map_copyout_kernel_buffer [internal use only]
10710 * Copy out data from a kernel buffer into space in the
10711 * destination map. The space may be otpionally dynamically
10714 * If successful, consumes the copy object.
10715 * Otherwise, the caller is responsible for it.
10717 static int vm_map_copyout_kernel_buffer_failures
= 0;
10718 static kern_return_t
10719 vm_map_copyout_kernel_buffer(
10721 vm_map_address_t
*addr
, /* IN/OUT */
10722 vm_map_copy_t copy
,
10723 vm_map_size_t copy_size
,
10724 boolean_t overwrite
,
10725 boolean_t consume_on_success
)
10727 kern_return_t kr
= KERN_SUCCESS
;
10728 thread_t thread
= current_thread();
10730 assert(copy
->size
== copy_size
);
10733 * check for corrupted vm_map_copy structure
10735 if (copy_size
> msg_ool_size_small
|| copy
->offset
) {
10736 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10737 (long long)copy
->size
, (long long)copy
->offset
);
10742 * Allocate space in the target map for the data
10745 kr
= vm_map_enter(map
,
10747 vm_map_round_page(copy_size
,
10748 VM_MAP_PAGE_MASK(map
)),
10749 (vm_map_offset_t
) 0,
10751 VM_MAP_KERNEL_FLAGS_NONE
,
10752 VM_KERN_MEMORY_NONE
,
10754 (vm_object_offset_t
) 0,
10758 VM_INHERIT_DEFAULT
);
10759 if (kr
!= KERN_SUCCESS
) {
10763 if (map
->pmap
== kernel_pmap
) {
10764 kasan_notify_address(*addr
, copy
->size
);
10770 * Copyout the data from the kernel buffer to the target map.
10772 if (thread
->map
== map
) {
10774 * If the target map is the current map, just do
10777 assert((vm_size_t
)copy_size
== copy_size
);
10778 if (copyout(copy
->cpy_kdata
, *addr
, (vm_size_t
)copy_size
)) {
10779 kr
= KERN_INVALID_ADDRESS
;
10785 * If the target map is another map, assume the
10786 * target's address space identity for the duration
10789 vm_map_reference(map
);
10790 oldmap
= vm_map_switch(map
);
10792 assert((vm_size_t
)copy_size
== copy_size
);
10793 if (copyout(copy
->cpy_kdata
, *addr
, (vm_size_t
)copy_size
)) {
10794 vm_map_copyout_kernel_buffer_failures
++;
10795 kr
= KERN_INVALID_ADDRESS
;
10798 (void) vm_map_switch(oldmap
);
10799 vm_map_deallocate(map
);
10802 if (kr
!= KERN_SUCCESS
) {
10803 /* the copy failed, clean up */
10806 * Deallocate the space we allocated in the target map.
10808 (void) vm_map_remove(
10810 vm_map_trunc_page(*addr
,
10811 VM_MAP_PAGE_MASK(map
)),
10812 vm_map_round_page((*addr
+
10813 vm_map_round_page(copy_size
,
10814 VM_MAP_PAGE_MASK(map
))),
10815 VM_MAP_PAGE_MASK(map
)),
10816 VM_MAP_REMOVE_NO_FLAGS
);
10820 /* copy was successful, dicard the copy structure */
10821 if (consume_on_success
) {
10822 kheap_free(KHEAP_DATA_BUFFERS
, copy
->cpy_kdata
, copy_size
);
10823 zfree(vm_map_copy_zone
, copy
);
10831 * Routine: vm_map_copy_insert [internal use only]
10834 * Link a copy chain ("copy") into a map at the
10835 * specified location (after "where").
10837 * The copy chain is destroyed.
10840 vm_map_copy_insert(
10842 vm_map_entry_t after_where
,
10843 vm_map_copy_t copy
)
10845 vm_map_entry_t entry
;
10847 while (vm_map_copy_first_entry(copy
) != vm_map_copy_to_entry(copy
)) {
10848 entry
= vm_map_copy_first_entry(copy
);
10849 vm_map_copy_entry_unlink(copy
, entry
);
10850 vm_map_store_entry_link(map
, after_where
, entry
,
10851 VM_MAP_KERNEL_FLAGS_NONE
);
10852 after_where
= entry
;
10854 zfree(vm_map_copy_zone
, copy
);
10860 vm_map_entry_t where
,
10861 vm_map_copy_t copy
,
10862 vm_map_offset_t adjustment
,
10863 vm_prot_t cur_prot
,
10864 vm_prot_t max_prot
,
10865 vm_inherit_t inheritance
)
10867 vm_map_entry_t copy_entry
, new_entry
;
10869 for (copy_entry
= vm_map_copy_first_entry(copy
);
10870 copy_entry
!= vm_map_copy_to_entry(copy
);
10871 copy_entry
= copy_entry
->vme_next
) {
10872 /* get a new VM map entry for the map */
10873 new_entry
= vm_map_entry_create(map
,
10874 !map
->hdr
.entries_pageable
);
10875 /* copy the "copy entry" to the new entry */
10876 vm_map_entry_copy(map
, new_entry
, copy_entry
);
10877 /* adjust "start" and "end" */
10878 new_entry
->vme_start
+= adjustment
;
10879 new_entry
->vme_end
+= adjustment
;
10880 /* clear some attributes */
10881 new_entry
->inheritance
= inheritance
;
10882 new_entry
->protection
= cur_prot
;
10883 new_entry
->max_protection
= max_prot
;
10884 new_entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
10885 /* take an extra reference on the entry's "object" */
10886 if (new_entry
->is_sub_map
) {
10887 assert(!new_entry
->use_pmap
); /* not nested */
10888 vm_map_lock(VME_SUBMAP(new_entry
));
10889 vm_map_reference(VME_SUBMAP(new_entry
));
10890 vm_map_unlock(VME_SUBMAP(new_entry
));
10892 vm_object_reference(VME_OBJECT(new_entry
));
10894 /* insert the new entry in the map */
10895 vm_map_store_entry_link(map
, where
, new_entry
,
10896 VM_MAP_KERNEL_FLAGS_NONE
);
10897 /* continue inserting the "copy entries" after the new entry */
10904 * Returns true if *size matches (or is in the range of) copy->size.
10905 * Upon returning true, the *size field is updated with the actual size of the
10906 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10909 vm_map_copy_validate_size(
10911 vm_map_copy_t copy
,
10912 vm_map_size_t
*size
)
10914 if (copy
== VM_MAP_COPY_NULL
) {
10917 vm_map_size_t copy_sz
= copy
->size
;
10918 vm_map_size_t sz
= *size
;
10919 switch (copy
->type
) {
10920 case VM_MAP_COPY_OBJECT
:
10921 case VM_MAP_COPY_KERNEL_BUFFER
:
10922 if (sz
== copy_sz
) {
10926 case VM_MAP_COPY_ENTRY_LIST
:
10928 * potential page-size rounding prevents us from exactly
10929 * validating this flavor of vm_map_copy, but we can at least
10930 * assert that it's within a range.
10932 if (copy_sz
>= sz
&&
10933 copy_sz
<= vm_map_round_page(sz
, VM_MAP_PAGE_MASK(dst_map
))) {
10945 * Routine: vm_map_copyout_size
10948 * Copy out a copy chain ("copy") into newly-allocated
10949 * space in the destination map. Uses a prevalidated
10950 * size for the copy object (vm_map_copy_validate_size).
10952 * If successful, consumes the copy object.
10953 * Otherwise, the caller is responsible for it.
10956 vm_map_copyout_size(
10958 vm_map_address_t
*dst_addr
, /* OUT */
10959 vm_map_copy_t copy
,
10960 vm_map_size_t copy_size
)
10962 return vm_map_copyout_internal(dst_map
, dst_addr
, copy
, copy_size
,
10963 TRUE
, /* consume_on_success */
10966 VM_INHERIT_DEFAULT
);
10970 * Routine: vm_map_copyout
10973 * Copy out a copy chain ("copy") into newly-allocated
10974 * space in the destination map.
10976 * If successful, consumes the copy object.
10977 * Otherwise, the caller is responsible for it.
10982 vm_map_address_t
*dst_addr
, /* OUT */
10983 vm_map_copy_t copy
)
10985 return vm_map_copyout_internal(dst_map
, dst_addr
, copy
, copy
? copy
->size
: 0,
10986 TRUE
, /* consume_on_success */
10989 VM_INHERIT_DEFAULT
);
10993 vm_map_copyout_internal(
10995 vm_map_address_t
*dst_addr
, /* OUT */
10996 vm_map_copy_t copy
,
10997 vm_map_size_t copy_size
,
10998 boolean_t consume_on_success
,
10999 vm_prot_t cur_protection
,
11000 vm_prot_t max_protection
,
11001 vm_inherit_t inheritance
)
11003 vm_map_size_t size
;
11004 vm_map_size_t adjustment
;
11005 vm_map_offset_t start
;
11006 vm_object_offset_t vm_copy_start
;
11007 vm_map_entry_t last
;
11008 vm_map_entry_t entry
;
11009 vm_map_entry_t hole_entry
;
11010 vm_map_copy_t original_copy
;
11013 * Check for null copy object.
11016 if (copy
== VM_MAP_COPY_NULL
) {
11018 return KERN_SUCCESS
;
11022 * Assert that the vm_map_copy is coming from the right
11023 * zone and hasn't been forged
11025 vm_map_copy_require(copy
);
11027 if (copy
->size
!= copy_size
) {
11029 return KERN_FAILURE
;
11033 * Check for special copy object, created
11034 * by vm_map_copyin_object.
11037 if (copy
->type
== VM_MAP_COPY_OBJECT
) {
11038 vm_object_t object
= copy
->cpy_object
;
11040 vm_object_offset_t offset
;
11042 offset
= vm_object_trunc_page(copy
->offset
);
11043 size
= vm_map_round_page((copy_size
+
11044 (vm_map_size_t
)(copy
->offset
-
11046 VM_MAP_PAGE_MASK(dst_map
));
11048 kr
= vm_map_enter(dst_map
, dst_addr
, size
,
11049 (vm_map_offset_t
) 0, VM_FLAGS_ANYWHERE
,
11050 VM_MAP_KERNEL_FLAGS_NONE
,
11051 VM_KERN_MEMORY_NONE
,
11052 object
, offset
, FALSE
,
11053 VM_PROT_DEFAULT
, VM_PROT_ALL
,
11054 VM_INHERIT_DEFAULT
);
11055 if (kr
!= KERN_SUCCESS
) {
11058 /* Account for non-pagealigned copy object */
11059 *dst_addr
+= (vm_map_offset_t
)(copy
->offset
- offset
);
11060 if (consume_on_success
) {
11061 zfree(vm_map_copy_zone
, copy
);
11063 return KERN_SUCCESS
;
11067 * Check for special kernel buffer allocated
11068 * by new_ipc_kmsg_copyin.
11071 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
11072 return vm_map_copyout_kernel_buffer(dst_map
, dst_addr
,
11073 copy
, copy_size
, FALSE
,
11074 consume_on_success
);
11077 original_copy
= copy
;
11078 if (copy
->cpy_hdr
.page_shift
!= VM_MAP_PAGE_SHIFT(dst_map
)) {
11080 vm_map_copy_t target_copy
;
11081 vm_map_offset_t overmap_start
, overmap_end
, trimmed_start
;
11083 target_copy
= VM_MAP_COPY_NULL
;
11084 DEBUG4K_ADJUST("adjusting...\n");
11085 kr
= vm_map_copy_adjust_to_target(
11088 copy
->size
, /* size */
11095 if (kr
!= KERN_SUCCESS
) {
11096 DEBUG4K_COPY("adjust failed 0x%x\n", kr
);
11099 DEBUG4K_COPY("copy %p (%d 0x%llx 0x%llx) dst_map %p (%d) target_copy %p (%d 0x%llx 0x%llx) overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx\n", copy
, copy
->cpy_hdr
.page_shift
, copy
->offset
, (uint64_t)copy
->size
, dst_map
, VM_MAP_PAGE_SHIFT(dst_map
), target_copy
, target_copy
->cpy_hdr
.page_shift
, target_copy
->offset
, (uint64_t)target_copy
->size
, (uint64_t)overmap_start
, (uint64_t)overmap_end
, (uint64_t)trimmed_start
);
11100 if (target_copy
!= copy
) {
11101 copy
= target_copy
;
11103 copy_size
= copy
->size
;
11107 * Find space for the data
11110 vm_copy_start
= vm_map_trunc_page((vm_map_size_t
)copy
->offset
,
11111 VM_MAP_COPY_PAGE_MASK(copy
));
11112 size
= vm_map_round_page((vm_map_size_t
)copy
->offset
+ copy_size
,
11113 VM_MAP_COPY_PAGE_MASK(copy
))
11119 vm_map_lock(dst_map
);
11120 if (dst_map
->disable_vmentry_reuse
== TRUE
) {
11121 VM_MAP_HIGHEST_ENTRY(dst_map
, entry
, start
);
11124 if (dst_map
->holelistenabled
) {
11125 hole_entry
= CAST_TO_VM_MAP_ENTRY(dst_map
->holes_list
);
11127 if (hole_entry
== NULL
) {
11129 * No more space in the map?
11131 vm_map_unlock(dst_map
);
11132 return KERN_NO_SPACE
;
11136 start
= last
->vme_start
;
11138 assert(first_free_is_valid(dst_map
));
11139 start
= ((last
= dst_map
->first_free
) == vm_map_to_entry(dst_map
)) ?
11140 vm_map_min(dst_map
) : last
->vme_end
;
11142 start
= vm_map_round_page(start
,
11143 VM_MAP_PAGE_MASK(dst_map
));
11147 vm_map_entry_t next
= last
->vme_next
;
11148 vm_map_offset_t end
= start
+ size
;
11150 if ((end
> dst_map
->max_offset
) || (end
< start
)) {
11151 if (dst_map
->wait_for_space
) {
11152 if (size
<= (dst_map
->max_offset
- dst_map
->min_offset
)) {
11153 assert_wait((event_t
) dst_map
,
11154 THREAD_INTERRUPTIBLE
);
11155 vm_map_unlock(dst_map
);
11156 thread_block(THREAD_CONTINUE_NULL
);
11160 vm_map_unlock(dst_map
);
11161 return KERN_NO_SPACE
;
11164 if (dst_map
->holelistenabled
) {
11165 if (last
->vme_end
>= end
) {
11170 * If there are no more entries, we must win.
11174 * If there is another entry, it must be
11175 * after the end of the potential new region.
11178 if (next
== vm_map_to_entry(dst_map
)) {
11182 if (next
->vme_start
>= end
) {
11189 if (dst_map
->holelistenabled
) {
11190 if (last
== CAST_TO_VM_MAP_ENTRY(dst_map
->holes_list
)) {
11194 vm_map_unlock(dst_map
);
11195 return KERN_NO_SPACE
;
11197 start
= last
->vme_start
;
11199 start
= last
->vme_end
;
11201 start
= vm_map_round_page(start
,
11202 VM_MAP_PAGE_MASK(dst_map
));
11205 if (dst_map
->holelistenabled
) {
11206 if (vm_map_lookup_entry(dst_map
, last
->vme_start
, &last
)) {
11207 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last
, (unsigned long long)last
->vme_start
);
11212 adjustment
= start
- vm_copy_start
;
11213 if (!consume_on_success
) {
11215 * We're not allowed to consume "copy", so we'll have to
11216 * copy its map entries into the destination map below.
11217 * No need to re-allocate map entries from the correct
11218 * (pageable or not) zone, since we'll get new map entries
11219 * during the transfer.
11220 * We'll also adjust the map entries's "start" and "end"
11221 * during the transfer, to keep "copy"'s entries consistent
11222 * with its "offset".
11224 goto after_adjustments
;
11228 * Since we're going to just drop the map
11229 * entries from the copy into the destination
11230 * map, they must come from the same pool.
11233 if (copy
->cpy_hdr
.entries_pageable
!= dst_map
->hdr
.entries_pageable
) {
11235 * Mismatches occur when dealing with the default
11239 vm_map_entry_t next
, new;
11242 * Find the zone that the copies were allocated from
11245 entry
= vm_map_copy_first_entry(copy
);
11248 * Reinitialize the copy so that vm_map_copy_entry_link
11251 vm_map_store_copy_reset(copy
, entry
);
11252 copy
->cpy_hdr
.entries_pageable
= dst_map
->hdr
.entries_pageable
;
11257 while (entry
!= vm_map_copy_to_entry(copy
)) {
11258 new = vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
11259 vm_map_entry_copy_full(new, entry
);
11260 new->vme_no_copy_on_read
= FALSE
;
11261 assert(!new->iokit_acct
);
11262 if (new->is_sub_map
) {
11263 /* clr address space specifics */
11264 new->use_pmap
= FALSE
;
11266 vm_map_copy_entry_link(copy
,
11267 vm_map_copy_last_entry(copy
),
11269 next
= entry
->vme_next
;
11270 old_zone
= entry
->from_reserved_zone
? vm_map_entry_reserved_zone
: vm_map_entry_zone
;
11271 zfree(old_zone
, entry
);
11277 * Adjust the addresses in the copy chain, and
11278 * reset the region attributes.
11281 for (entry
= vm_map_copy_first_entry(copy
);
11282 entry
!= vm_map_copy_to_entry(copy
);
11283 entry
= entry
->vme_next
) {
11284 if (VM_MAP_PAGE_SHIFT(dst_map
) == PAGE_SHIFT
) {
11286 * We're injecting this copy entry into a map that
11287 * has the standard page alignment, so clear
11288 * "map_aligned" (which might have been inherited
11289 * from the original map entry).
11291 entry
->map_aligned
= FALSE
;
11294 entry
->vme_start
+= adjustment
;
11295 entry
->vme_end
+= adjustment
;
11297 if (entry
->map_aligned
) {
11298 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_start
,
11299 VM_MAP_PAGE_MASK(dst_map
)));
11300 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_end
,
11301 VM_MAP_PAGE_MASK(dst_map
)));
11304 entry
->inheritance
= VM_INHERIT_DEFAULT
;
11305 entry
->protection
= VM_PROT_DEFAULT
;
11306 entry
->max_protection
= VM_PROT_ALL
;
11307 entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
11310 * If the entry is now wired,
11311 * map the pages into the destination map.
11313 if (entry
->wired_count
!= 0) {
11314 vm_map_offset_t va
;
11315 vm_object_offset_t offset
;
11316 vm_object_t object
;
11320 /* TODO4K would need to use actual page size */
11321 assert(VM_MAP_PAGE_SHIFT(dst_map
) == PAGE_SHIFT
);
11323 object
= VME_OBJECT(entry
);
11324 offset
= VME_OFFSET(entry
);
11325 va
= entry
->vme_start
;
11327 pmap_pageable(dst_map
->pmap
,
11332 while (va
< entry
->vme_end
) {
11334 struct vm_object_fault_info fault_info
= {};
11337 * Look up the page in the object.
11338 * Assert that the page will be found in the
11341 * the object was newly created by
11342 * vm_object_copy_slowly, and has
11343 * copies of all of the pages from
11344 * the source object
11346 * the object was moved from the old
11347 * map entry; because the old map
11348 * entry was wired, all of the pages
11349 * were in the top-level object.
11350 * (XXX not true if we wire pages for
11353 vm_object_lock(object
);
11355 m
= vm_page_lookup(object
, offset
);
11356 if (m
== VM_PAGE_NULL
|| !VM_PAGE_WIRED(m
) ||
11358 panic("vm_map_copyout: wiring %p", m
);
11361 prot
= entry
->protection
;
11363 if (override_nx(dst_map
, VME_ALIAS(entry
)) &&
11365 prot
|= VM_PROT_EXECUTE
;
11368 type_of_fault
= DBG_CACHE_HIT_FAULT
;
11370 fault_info
.user_tag
= VME_ALIAS(entry
);
11371 fault_info
.pmap_options
= 0;
11372 if (entry
->iokit_acct
||
11373 (!entry
->is_sub_map
&& !entry
->use_pmap
)) {
11374 fault_info
.pmap_options
|= PMAP_OPTIONS_ALT_ACCT
;
11384 FALSE
, /* change_wiring */
11385 VM_KERN_MEMORY_NONE
, /* tag - not wiring */
11387 NULL
, /* need_retry */
11390 vm_object_unlock(object
);
11392 offset
+= PAGE_SIZE_64
;
11401 * Correct the page alignment for the result
11404 *dst_addr
= start
+ (copy
->offset
- vm_copy_start
);
11407 kasan_notify_address(*dst_addr
, size
);
11411 * Update the hints and the map size
11414 if (consume_on_success
) {
11415 SAVE_HINT_MAP_WRITE(dst_map
, vm_map_copy_last_entry(copy
));
11417 SAVE_HINT_MAP_WRITE(dst_map
, last
);
11420 dst_map
->size
+= size
;
11426 if (consume_on_success
) {
11427 vm_map_copy_insert(dst_map
, last
, copy
);
11428 if (copy
!= original_copy
) {
11429 vm_map_copy_discard(original_copy
);
11430 original_copy
= VM_MAP_COPY_NULL
;
11433 vm_map_copy_remap(dst_map
, last
, copy
, adjustment
,
11434 cur_protection
, max_protection
,
11436 if (copy
!= original_copy
&& original_copy
!= VM_MAP_COPY_NULL
) {
11437 vm_map_copy_discard(copy
);
11438 copy
= original_copy
;
11443 vm_map_unlock(dst_map
);
11446 * XXX If wiring_required, call vm_map_pageable
11449 return KERN_SUCCESS
;
11453 * Routine: vm_map_copyin
11456 * see vm_map_copyin_common. Exported via Unsupported.exports.
11460 #undef vm_map_copyin
11465 vm_map_address_t src_addr
,
11467 boolean_t src_destroy
,
11468 vm_map_copy_t
*copy_result
) /* OUT */
11470 return vm_map_copyin_common(src_map
, src_addr
, len
, src_destroy
,
11471 FALSE
, copy_result
, FALSE
);
11475 * Routine: vm_map_copyin_common
11478 * Copy the specified region (src_addr, len) from the
11479 * source address space (src_map), possibly removing
11480 * the region from the source address space (src_destroy).
11483 * A vm_map_copy_t object (copy_result), suitable for
11484 * insertion into another address space (using vm_map_copyout),
11485 * copying over another address space region (using
11486 * vm_map_copy_overwrite). If the copy is unused, it
11487 * should be destroyed (using vm_map_copy_discard).
11489 * In/out conditions:
11490 * The source map should not be locked on entry.
11493 typedef struct submap_map
{
11494 vm_map_t parent_map
;
11495 vm_map_offset_t base_start
;
11496 vm_map_offset_t base_end
;
11497 vm_map_size_t base_len
;
11498 struct submap_map
*next
;
11502 vm_map_copyin_common(
11504 vm_map_address_t src_addr
,
11506 boolean_t src_destroy
,
11507 __unused boolean_t src_volatile
,
11508 vm_map_copy_t
*copy_result
, /* OUT */
11509 boolean_t use_maxprot
)
11515 flags
|= VM_MAP_COPYIN_SRC_DESTROY
;
11518 flags
|= VM_MAP_COPYIN_USE_MAXPROT
;
11520 return vm_map_copyin_internal(src_map
,
11527 vm_map_copyin_internal(
11529 vm_map_address_t src_addr
,
11532 vm_map_copy_t
*copy_result
) /* OUT */
11534 vm_map_entry_t tmp_entry
; /* Result of last map lookup --
11535 * in multi-level lookup, this
11536 * entry contains the actual
11537 * vm_object/offset.
11539 vm_map_entry_t new_entry
= VM_MAP_ENTRY_NULL
; /* Map entry for copy */
11541 vm_map_offset_t src_start
; /* Start of current entry --
11542 * where copy is taking place now
11544 vm_map_offset_t src_end
; /* End of entire region to be
11546 vm_map_offset_t src_base
;
11547 vm_map_t base_map
= src_map
;
11548 boolean_t map_share
= FALSE
;
11549 submap_map_t
*parent_maps
= NULL
;
11551 vm_map_copy_t copy
; /* Resulting copy */
11552 vm_map_address_t copy_addr
;
11553 vm_map_size_t copy_size
;
11554 boolean_t src_destroy
;
11555 boolean_t use_maxprot
;
11556 boolean_t preserve_purgeable
;
11557 boolean_t entry_was_shared
;
11558 vm_map_entry_t saved_src_entry
;
11560 if (flags
& ~VM_MAP_COPYIN_ALL_FLAGS
) {
11561 return KERN_INVALID_ARGUMENT
;
11564 src_destroy
= (flags
& VM_MAP_COPYIN_SRC_DESTROY
) ? TRUE
: FALSE
;
11565 use_maxprot
= (flags
& VM_MAP_COPYIN_USE_MAXPROT
) ? TRUE
: FALSE
;
11566 preserve_purgeable
=
11567 (flags
& VM_MAP_COPYIN_PRESERVE_PURGEABLE
) ? TRUE
: FALSE
;
11570 * Check for copies of zero bytes.
11574 *copy_result
= VM_MAP_COPY_NULL
;
11575 return KERN_SUCCESS
;
11579 * Check that the end address doesn't overflow
11581 src_end
= src_addr
+ len
;
11582 if (src_end
< src_addr
) {
11583 return KERN_INVALID_ADDRESS
;
11587 * Compute (page aligned) start and end of region
11589 src_start
= vm_map_trunc_page(src_addr
,
11590 VM_MAP_PAGE_MASK(src_map
));
11591 src_end
= vm_map_round_page(src_end
,
11592 VM_MAP_PAGE_MASK(src_map
));
11595 * If the copy is sufficiently small, use a kernel buffer instead
11596 * of making a virtual copy. The theory being that the cost of
11597 * setting up VM (and taking C-O-W faults) dominates the copy costs
11598 * for small regions.
11600 if ((len
< msg_ool_size_small
) &&
11602 !preserve_purgeable
&&
11603 !(flags
& VM_MAP_COPYIN_ENTRY_LIST
) &&
11605 * Since the "msg_ool_size_small" threshold was increased and
11606 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11607 * address space limits, we revert to doing a virtual copy if the
11608 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11609 * of the commpage would now fail when it used to work.
11611 (src_start
>= vm_map_min(src_map
) &&
11612 src_start
< vm_map_max(src_map
) &&
11613 src_end
>= vm_map_min(src_map
) &&
11614 src_end
< vm_map_max(src_map
))) {
11615 return vm_map_copyin_kernel_buffer(src_map
, src_addr
, len
,
11616 src_destroy
, copy_result
);
11620 * Allocate a header element for the list.
11622 * Use the start and end in the header to
11623 * remember the endpoints prior to rounding.
11626 copy
= vm_map_copy_allocate();
11627 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
11628 copy
->cpy_hdr
.entries_pageable
= TRUE
;
11629 copy
->cpy_hdr
.page_shift
= VM_MAP_PAGE_SHIFT(src_map
);
11631 vm_map_store_init( &(copy
->cpy_hdr
));
11633 copy
->offset
= src_addr
;
11636 new_entry
= vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
11638 #define RETURN(x) \
11640 vm_map_unlock(src_map); \
11641 if(src_map != base_map) \
11642 vm_map_deallocate(src_map); \
11643 if (new_entry != VM_MAP_ENTRY_NULL) \
11644 vm_map_copy_entry_dispose(copy,new_entry); \
11645 vm_map_copy_discard(copy); \
11647 submap_map_t *_ptr; \
11649 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11650 parent_maps=parent_maps->next; \
11651 if (_ptr->parent_map != base_map) \
11652 vm_map_deallocate(_ptr->parent_map); \
11653 kfree(_ptr, sizeof(submap_map_t)); \
11660 * Find the beginning of the region.
11663 vm_map_lock(src_map
);
11666 * Lookup the original "src_addr" rather than the truncated
11667 * "src_start", in case "src_start" falls in a non-map-aligned
11668 * map entry *before* the map entry that contains "src_addr"...
11670 if (!vm_map_lookup_entry(src_map
, src_addr
, &tmp_entry
)) {
11671 RETURN(KERN_INVALID_ADDRESS
);
11673 if (!tmp_entry
->is_sub_map
) {
11675 * ... but clip to the map-rounded "src_start" rather than
11676 * "src_addr" to preserve map-alignment. We'll adjust the
11677 * first copy entry at the end, if needed.
11679 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
11681 if (src_start
< tmp_entry
->vme_start
) {
11683 * Move "src_start" up to the start of the
11684 * first map entry to copy.
11686 src_start
= tmp_entry
->vme_start
;
11688 /* set for later submap fix-up */
11689 copy_addr
= src_start
;
11692 * Go through entries until we get to the end.
11696 vm_map_entry_t src_entry
= tmp_entry
; /* Top-level entry */
11697 vm_map_size_t src_size
; /* Size of source
11698 * map entry (in both
11702 vm_object_t src_object
; /* Object to copy */
11703 vm_object_offset_t src_offset
;
11705 boolean_t src_needs_copy
; /* Should source map
11706 * be made read-only
11707 * for copy-on-write?
11710 boolean_t new_entry_needs_copy
; /* Will new entry be COW? */
11712 boolean_t was_wired
; /* Was source wired? */
11713 vm_map_version_t version
; /* Version before locks
11714 * dropped to make copy
11716 kern_return_t result
; /* Return value from
11717 * copy_strategically.
11719 while (tmp_entry
->is_sub_map
) {
11720 vm_map_size_t submap_len
;
11723 ptr
= (submap_map_t
*)kalloc(sizeof(submap_map_t
));
11724 ptr
->next
= parent_maps
;
11726 ptr
->parent_map
= src_map
;
11727 ptr
->base_start
= src_start
;
11728 ptr
->base_end
= src_end
;
11729 submap_len
= tmp_entry
->vme_end
- src_start
;
11730 if (submap_len
> (src_end
- src_start
)) {
11731 submap_len
= src_end
- src_start
;
11733 ptr
->base_len
= submap_len
;
11735 src_start
-= tmp_entry
->vme_start
;
11736 src_start
+= VME_OFFSET(tmp_entry
);
11737 src_end
= src_start
+ submap_len
;
11738 src_map
= VME_SUBMAP(tmp_entry
);
11739 vm_map_lock(src_map
);
11740 /* keep an outstanding reference for all maps in */
11741 /* the parents tree except the base map */
11742 vm_map_reference(src_map
);
11743 vm_map_unlock(ptr
->parent_map
);
11744 if (!vm_map_lookup_entry(
11745 src_map
, src_start
, &tmp_entry
)) {
11746 RETURN(KERN_INVALID_ADDRESS
);
11749 if (!tmp_entry
->is_sub_map
) {
11750 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
11752 src_entry
= tmp_entry
;
11754 /* we are now in the lowest level submap... */
11756 if ((VME_OBJECT(tmp_entry
) != VM_OBJECT_NULL
) &&
11757 (VME_OBJECT(tmp_entry
)->phys_contiguous
)) {
11758 /* This is not, supported for now.In future */
11759 /* we will need to detect the phys_contig */
11760 /* condition and then upgrade copy_slowly */
11761 /* to do physical copy from the device mem */
11762 /* based object. We can piggy-back off of */
11763 /* the was wired boolean to set-up the */
11764 /* proper handling */
11765 RETURN(KERN_PROTECTION_FAILURE
);
11768 * Create a new address map entry to hold the result.
11769 * Fill in the fields from the appropriate source entries.
11770 * We must unlock the source map to do this if we need
11771 * to allocate a map entry.
11773 if (new_entry
== VM_MAP_ENTRY_NULL
) {
11774 version
.main_timestamp
= src_map
->timestamp
;
11775 vm_map_unlock(src_map
);
11777 new_entry
= vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
11779 vm_map_lock(src_map
);
11780 if ((version
.main_timestamp
+ 1) != src_map
->timestamp
) {
11781 if (!vm_map_lookup_entry(src_map
, src_start
,
11783 RETURN(KERN_INVALID_ADDRESS
);
11785 if (!tmp_entry
->is_sub_map
) {
11786 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
11788 continue; /* restart w/ new tmp_entry */
11793 * Verify that the region can be read.
11795 if (((src_entry
->protection
& VM_PROT_READ
) == VM_PROT_NONE
&&
11797 (src_entry
->max_protection
& VM_PROT_READ
) == 0) {
11798 RETURN(KERN_PROTECTION_FAILURE
);
11802 * Clip against the endpoints of the entire region.
11805 vm_map_clip_end(src_map
, src_entry
, src_end
);
11807 src_size
= src_entry
->vme_end
- src_start
;
11808 src_object
= VME_OBJECT(src_entry
);
11809 src_offset
= VME_OFFSET(src_entry
);
11810 was_wired
= (src_entry
->wired_count
!= 0);
11812 vm_map_entry_copy(src_map
, new_entry
, src_entry
);
11813 if (new_entry
->is_sub_map
) {
11814 /* clr address space specifics */
11815 new_entry
->use_pmap
= FALSE
;
11818 * We're dealing with a copy-on-write operation,
11819 * so the resulting mapping should not inherit the
11820 * original mapping's accounting settings.
11821 * "iokit_acct" should have been cleared in
11822 * vm_map_entry_copy().
11823 * "use_pmap" should be reset to its default (TRUE)
11824 * so that the new mapping gets accounted for in
11825 * the task's memory footprint.
11827 assert(!new_entry
->iokit_acct
);
11828 new_entry
->use_pmap
= TRUE
;
11832 * Attempt non-blocking copy-on-write optimizations.
11836 * If we are destroying the source, and the object
11837 * is internal, we could move the object reference
11838 * from the source to the copy. The copy is
11839 * copy-on-write only if the source is.
11840 * We make another reference to the object, because
11841 * destroying the source entry will deallocate it.
11843 * This memory transfer has to be atomic, (to prevent
11844 * the VM object from being shared or copied while
11845 * it's being moved here), so we could only do this
11846 * if we won't have to unlock the VM map until the
11847 * original mapping has been fully removed.
11851 if ((src_object
== VM_OBJECT_NULL
||
11852 (!was_wired
&& !map_share
&& !tmp_entry
->is_shared
11853 && !(debug4k_no_cow_copyin
&& VM_MAP_PAGE_SHIFT(src_map
) < PAGE_SHIFT
))) &&
11854 vm_object_copy_quickly(
11855 VME_OBJECT_PTR(new_entry
),
11859 &new_entry_needs_copy
)) {
11860 new_entry
->needs_copy
= new_entry_needs_copy
;
11863 * Handle copy-on-write obligations
11866 if (src_needs_copy
&& !tmp_entry
->needs_copy
) {
11869 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
11871 if (override_nx(src_map
, VME_ALIAS(src_entry
))
11873 prot
|= VM_PROT_EXECUTE
;
11876 vm_object_pmap_protect(
11880 (src_entry
->is_shared
?
11883 VM_MAP_PAGE_SIZE(src_map
),
11884 src_entry
->vme_start
,
11887 assert(tmp_entry
->wired_count
== 0);
11888 tmp_entry
->needs_copy
= TRUE
;
11892 * The map has never been unlocked, so it's safe
11893 * to move to the next entry rather than doing
11897 goto CopySuccessful
;
11900 entry_was_shared
= tmp_entry
->is_shared
;
11903 * Take an object reference, so that we may
11904 * release the map lock(s).
11907 assert(src_object
!= VM_OBJECT_NULL
);
11908 vm_object_reference(src_object
);
11911 * Record the timestamp for later verification.
11915 version
.main_timestamp
= src_map
->timestamp
;
11916 vm_map_unlock(src_map
); /* Increments timestamp once! */
11917 saved_src_entry
= src_entry
;
11918 tmp_entry
= VM_MAP_ENTRY_NULL
;
11919 src_entry
= VM_MAP_ENTRY_NULL
;
11926 (debug4k_no_cow_copyin
&&
11927 VM_MAP_PAGE_SHIFT(src_map
) < PAGE_SHIFT
)) {
11929 vm_object_lock(src_object
);
11930 result
= vm_object_copy_slowly(
11935 VME_OBJECT_PTR(new_entry
));
11936 VME_OFFSET_SET(new_entry
,
11937 src_offset
- vm_object_trunc_page(src_offset
));
11938 new_entry
->needs_copy
= FALSE
;
11939 } else if (src_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
&&
11940 (entry_was_shared
|| map_share
)) {
11941 vm_object_t new_object
;
11943 vm_object_lock_shared(src_object
);
11944 new_object
= vm_object_copy_delayed(
11949 if (new_object
== VM_OBJECT_NULL
) {
11953 VME_OBJECT_SET(new_entry
, new_object
);
11954 assert(new_entry
->wired_count
== 0);
11955 new_entry
->needs_copy
= TRUE
;
11956 assert(!new_entry
->iokit_acct
);
11957 assert(new_object
->purgable
== VM_PURGABLE_DENY
);
11958 assertf(new_entry
->use_pmap
, "src_map %p new_entry %p\n", src_map
, new_entry
);
11959 result
= KERN_SUCCESS
;
11961 vm_object_offset_t new_offset
;
11962 new_offset
= VME_OFFSET(new_entry
);
11963 result
= vm_object_copy_strategically(src_object
,
11966 VME_OBJECT_PTR(new_entry
),
11968 &new_entry_needs_copy
);
11969 if (new_offset
!= VME_OFFSET(new_entry
)) {
11970 VME_OFFSET_SET(new_entry
, new_offset
);
11973 new_entry
->needs_copy
= new_entry_needs_copy
;
11976 if (result
== KERN_SUCCESS
&&
11977 ((preserve_purgeable
&&
11978 src_object
->purgable
!= VM_PURGABLE_DENY
) ||
11979 new_entry
->used_for_jit
)) {
11981 * Purgeable objects should be COPY_NONE, true share;
11982 * this should be propogated to the copy.
11984 * Also force mappings the pmap specially protects to
11985 * be COPY_NONE; trying to COW these mappings would
11986 * change the effective protections, which could have
11987 * side effects if the pmap layer relies on the
11988 * specified protections.
11991 vm_object_t new_object
;
11993 new_object
= VME_OBJECT(new_entry
);
11994 assert(new_object
!= src_object
);
11995 vm_object_lock(new_object
);
11996 assert(new_object
->ref_count
== 1);
11997 assert(new_object
->shadow
== VM_OBJECT_NULL
);
11998 assert(new_object
->copy
== VM_OBJECT_NULL
);
11999 assert(new_object
->vo_owner
== NULL
);
12001 new_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
12003 if (preserve_purgeable
&&
12004 src_object
->purgable
!= VM_PURGABLE_DENY
) {
12005 new_object
->true_share
= TRUE
;
12007 /* start as non-volatile with no owner... */
12008 new_object
->purgable
= VM_PURGABLE_NONVOLATILE
;
12009 vm_purgeable_nonvolatile_enqueue(new_object
, NULL
);
12010 /* ... and move to src_object's purgeable state */
12011 if (src_object
->purgable
!= VM_PURGABLE_NONVOLATILE
) {
12013 state
= src_object
->purgable
;
12014 vm_object_purgable_control(
12016 VM_PURGABLE_SET_STATE_FROM_KERNEL
,
12019 /* no pmap accounting for purgeable objects */
12020 new_entry
->use_pmap
= FALSE
;
12023 vm_object_unlock(new_object
);
12024 new_object
= VM_OBJECT_NULL
;
12027 if (result
!= KERN_SUCCESS
&&
12028 result
!= KERN_MEMORY_RESTART_COPY
) {
12029 vm_map_lock(src_map
);
12034 * Throw away the extra reference
12037 vm_object_deallocate(src_object
);
12040 * Verify that the map has not substantially
12041 * changed while the copy was being made.
12044 vm_map_lock(src_map
);
12046 if ((version
.main_timestamp
+ 1) == src_map
->timestamp
) {
12047 /* src_map hasn't changed: src_entry is still valid */
12048 src_entry
= saved_src_entry
;
12049 goto VerificationSuccessful
;
12053 * Simple version comparison failed.
12055 * Retry the lookup and verify that the
12056 * same object/offset are still present.
12058 * [Note: a memory manager that colludes with
12059 * the calling task can detect that we have
12060 * cheated. While the map was unlocked, the
12061 * mapping could have been changed and restored.]
12064 if (!vm_map_lookup_entry(src_map
, src_start
, &tmp_entry
)) {
12065 if (result
!= KERN_MEMORY_RESTART_COPY
) {
12066 vm_object_deallocate(VME_OBJECT(new_entry
));
12067 VME_OBJECT_SET(new_entry
, VM_OBJECT_NULL
);
12068 /* reset accounting state */
12069 new_entry
->iokit_acct
= FALSE
;
12070 new_entry
->use_pmap
= TRUE
;
12072 RETURN(KERN_INVALID_ADDRESS
);
12075 src_entry
= tmp_entry
;
12076 vm_map_clip_start(src_map
, src_entry
, src_start
);
12078 if ((((src_entry
->protection
& VM_PROT_READ
) == VM_PROT_NONE
) &&
12080 ((src_entry
->max_protection
& VM_PROT_READ
) == 0)) {
12081 goto VerificationFailed
;
12084 if (src_entry
->vme_end
< new_entry
->vme_end
) {
12086 * This entry might have been shortened
12087 * (vm_map_clip_end) or been replaced with
12088 * an entry that ends closer to "src_start"
12090 * Adjust "new_entry" accordingly; copying
12091 * less memory would be correct but we also
12092 * redo the copy (see below) if the new entry
12093 * no longer points at the same object/offset.
12095 assert(VM_MAP_PAGE_ALIGNED(src_entry
->vme_end
,
12096 VM_MAP_COPY_PAGE_MASK(copy
)));
12097 new_entry
->vme_end
= src_entry
->vme_end
;
12098 src_size
= new_entry
->vme_end
- src_start
;
12099 } else if (src_entry
->vme_end
> new_entry
->vme_end
) {
12101 * This entry might have been extended
12102 * (vm_map_entry_simplify() or coalesce)
12103 * or been replaced with an entry that ends farther
12104 * from "src_start" than before.
12106 * We've called vm_object_copy_*() only on
12107 * the previous <start:end> range, so we can't
12108 * just extend new_entry. We have to re-do
12109 * the copy based on the new entry as if it was
12110 * pointing at a different object/offset (see
12111 * "Verification failed" below).
12115 if ((VME_OBJECT(src_entry
) != src_object
) ||
12116 (VME_OFFSET(src_entry
) != src_offset
) ||
12117 (src_entry
->vme_end
> new_entry
->vme_end
)) {
12119 * Verification failed.
12121 * Start over with this top-level entry.
12124 VerificationFailed
: ;
12126 vm_object_deallocate(VME_OBJECT(new_entry
));
12127 tmp_entry
= src_entry
;
12132 * Verification succeeded.
12135 VerificationSuccessful
:;
12137 if (result
== KERN_MEMORY_RESTART_COPY
) {
12148 * Link in the new copy entry.
12151 vm_map_copy_entry_link(copy
, vm_map_copy_last_entry(copy
),
12155 * Determine whether the entire region
12158 src_base
= src_start
;
12159 src_start
= new_entry
->vme_end
;
12160 new_entry
= VM_MAP_ENTRY_NULL
;
12161 while ((src_start
>= src_end
) && (src_end
!= 0)) {
12164 if (src_map
== base_map
) {
12165 /* back to the top */
12170 assert(ptr
!= NULL
);
12171 parent_maps
= parent_maps
->next
;
12173 /* fix up the damage we did in that submap */
12174 vm_map_simplify_range(src_map
,
12178 vm_map_unlock(src_map
);
12179 vm_map_deallocate(src_map
);
12180 vm_map_lock(ptr
->parent_map
);
12181 src_map
= ptr
->parent_map
;
12182 src_base
= ptr
->base_start
;
12183 src_start
= ptr
->base_start
+ ptr
->base_len
;
12184 src_end
= ptr
->base_end
;
12185 if (!vm_map_lookup_entry(src_map
,
12188 (src_end
> src_start
)) {
12189 RETURN(KERN_INVALID_ADDRESS
);
12191 kfree(ptr
, sizeof(submap_map_t
));
12192 if (parent_maps
== NULL
) {
12195 src_entry
= tmp_entry
->vme_prev
;
12198 if ((VM_MAP_PAGE_SHIFT(src_map
) != PAGE_SHIFT
) &&
12199 (src_start
>= src_addr
+ len
) &&
12200 (src_addr
+ len
!= 0)) {
12202 * Stop copying now, even though we haven't reached
12203 * "src_end". We'll adjust the end of the last copy
12204 * entry at the end, if needed.
12206 * If src_map's aligment is different from the
12207 * system's page-alignment, there could be
12208 * extra non-map-aligned map entries between
12209 * the original (non-rounded) "src_addr + len"
12210 * and the rounded "src_end".
12211 * We do not want to copy those map entries since
12212 * they're not part of the copied range.
12217 if ((src_start
>= src_end
) && (src_end
!= 0)) {
12222 * Verify that there are no gaps in the region
12225 tmp_entry
= src_entry
->vme_next
;
12226 if ((tmp_entry
->vme_start
!= src_start
) ||
12227 (tmp_entry
== vm_map_to_entry(src_map
))) {
12228 RETURN(KERN_INVALID_ADDRESS
);
12233 * If the source should be destroyed, do it now, since the
12234 * copy was successful.
12237 (void) vm_map_delete(
12239 vm_map_trunc_page(src_addr
,
12240 VM_MAP_PAGE_MASK(src_map
)),
12242 ((src_map
== kernel_map
) ?
12243 VM_MAP_REMOVE_KUNWIRE
:
12244 VM_MAP_REMOVE_NO_FLAGS
),
12247 /* fix up the damage we did in the base map */
12248 vm_map_simplify_range(
12250 vm_map_trunc_page(src_addr
,
12251 VM_MAP_PAGE_MASK(src_map
)),
12252 vm_map_round_page(src_end
,
12253 VM_MAP_PAGE_MASK(src_map
)));
12256 vm_map_unlock(src_map
);
12257 tmp_entry
= VM_MAP_ENTRY_NULL
;
12259 if (VM_MAP_PAGE_SHIFT(src_map
) > PAGE_SHIFT
&&
12260 VM_MAP_PAGE_SHIFT(src_map
) != VM_MAP_COPY_PAGE_SHIFT(copy
)) {
12261 vm_map_offset_t original_start
, original_offset
, original_end
;
12263 assert(VM_MAP_COPY_PAGE_MASK(copy
) == PAGE_MASK
);
12265 /* adjust alignment of first copy_entry's "vme_start" */
12266 tmp_entry
= vm_map_copy_first_entry(copy
);
12267 if (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
12268 vm_map_offset_t adjustment
;
12270 original_start
= tmp_entry
->vme_start
;
12271 original_offset
= VME_OFFSET(tmp_entry
);
12273 /* map-align the start of the first copy entry... */
12274 adjustment
= (tmp_entry
->vme_start
-
12276 tmp_entry
->vme_start
,
12277 VM_MAP_PAGE_MASK(src_map
)));
12278 tmp_entry
->vme_start
-= adjustment
;
12279 VME_OFFSET_SET(tmp_entry
,
12280 VME_OFFSET(tmp_entry
) - adjustment
);
12281 copy_addr
-= adjustment
;
12282 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
12283 /* ... adjust for mis-aligned start of copy range */
12285 (vm_map_trunc_page(copy
->offset
,
12287 vm_map_trunc_page(copy
->offset
,
12288 VM_MAP_PAGE_MASK(src_map
)));
12290 assert(page_aligned(adjustment
));
12291 assert(adjustment
< VM_MAP_PAGE_SIZE(src_map
));
12292 tmp_entry
->vme_start
+= adjustment
;
12293 VME_OFFSET_SET(tmp_entry
,
12294 (VME_OFFSET(tmp_entry
) +
12296 copy_addr
+= adjustment
;
12297 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
12301 * Assert that the adjustments haven't exposed
12302 * more than was originally copied...
12304 assert(tmp_entry
->vme_start
>= original_start
);
12305 assert(VME_OFFSET(tmp_entry
) >= original_offset
);
12307 * ... and that it did not adjust outside of a
12308 * a single 16K page.
12310 assert(vm_map_trunc_page(tmp_entry
->vme_start
,
12311 VM_MAP_PAGE_MASK(src_map
)) ==
12312 vm_map_trunc_page(original_start
,
12313 VM_MAP_PAGE_MASK(src_map
)));
12316 /* adjust alignment of last copy_entry's "vme_end" */
12317 tmp_entry
= vm_map_copy_last_entry(copy
);
12318 if (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
12319 vm_map_offset_t adjustment
;
12321 original_end
= tmp_entry
->vme_end
;
12323 /* map-align the end of the last copy entry... */
12324 tmp_entry
->vme_end
=
12325 vm_map_round_page(tmp_entry
->vme_end
,
12326 VM_MAP_PAGE_MASK(src_map
));
12327 /* ... adjust for mis-aligned end of copy range */
12329 (vm_map_round_page((copy
->offset
+
12331 VM_MAP_PAGE_MASK(src_map
)) -
12332 vm_map_round_page((copy
->offset
+
12336 assert(page_aligned(adjustment
));
12337 assert(adjustment
< VM_MAP_PAGE_SIZE(src_map
));
12338 tmp_entry
->vme_end
-= adjustment
;
12339 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
12343 * Assert that the adjustments haven't exposed
12344 * more than was originally copied...
12346 assert(tmp_entry
->vme_end
<= original_end
);
12348 * ... and that it did not adjust outside of a
12349 * a single 16K page.
12351 assert(vm_map_round_page(tmp_entry
->vme_end
,
12352 VM_MAP_PAGE_MASK(src_map
)) ==
12353 vm_map_round_page(original_end
,
12354 VM_MAP_PAGE_MASK(src_map
)));
12358 /* Fix-up start and end points in copy. This is necessary */
12359 /* when the various entries in the copy object were picked */
12360 /* up from different sub-maps */
12362 tmp_entry
= vm_map_copy_first_entry(copy
);
12363 copy_size
= 0; /* compute actual size */
12364 while (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
12365 assert(VM_MAP_PAGE_ALIGNED(
12366 copy_addr
+ (tmp_entry
->vme_end
-
12367 tmp_entry
->vme_start
),
12368 MIN(VM_MAP_COPY_PAGE_MASK(copy
), PAGE_MASK
)));
12369 assert(VM_MAP_PAGE_ALIGNED(
12371 MIN(VM_MAP_COPY_PAGE_MASK(copy
), PAGE_MASK
)));
12374 * The copy_entries will be injected directly into the
12375 * destination map and might not be "map aligned" there...
12377 tmp_entry
->map_aligned
= FALSE
;
12379 tmp_entry
->vme_end
= copy_addr
+
12380 (tmp_entry
->vme_end
- tmp_entry
->vme_start
);
12381 tmp_entry
->vme_start
= copy_addr
;
12382 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
12383 copy_addr
+= tmp_entry
->vme_end
- tmp_entry
->vme_start
;
12384 copy_size
+= tmp_entry
->vme_end
- tmp_entry
->vme_start
;
12385 tmp_entry
= (struct vm_map_entry
*)tmp_entry
->vme_next
;
12388 if (VM_MAP_PAGE_SHIFT(src_map
) != PAGE_SHIFT
&&
12389 copy_size
< copy
->size
) {
12391 * The actual size of the VM map copy is smaller than what
12392 * was requested by the caller. This must be because some
12393 * PAGE_SIZE-sized pages are missing at the end of the last
12394 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
12395 * The caller might not have been aware of those missing
12396 * pages and might not want to be aware of it, which is
12397 * fine as long as they don't try to access (and crash on)
12398 * those missing pages.
12399 * Let's adjust the size of the "copy", to avoid failing
12400 * in vm_map_copyout() or vm_map_copy_overwrite().
12402 assert(vm_map_round_page(copy_size
,
12403 VM_MAP_PAGE_MASK(src_map
)) ==
12404 vm_map_round_page(copy
->size
,
12405 VM_MAP_PAGE_MASK(src_map
)));
12406 copy
->size
= copy_size
;
12409 *copy_result
= copy
;
12410 return KERN_SUCCESS
;
12416 vm_map_copy_extract(
12418 vm_map_address_t src_addr
,
12420 vm_prot_t required_prot
,
12422 vm_map_copy_t
*copy_result
, /* OUT */
12423 vm_prot_t
*cur_prot
, /* OUT */
12424 vm_prot_t
*max_prot
, /* OUT */
12425 vm_inherit_t inheritance
,
12426 vm_map_kernel_flags_t vmk_flags
)
12428 vm_map_copy_t copy
;
12432 * Check for copies of zero bytes.
12436 *copy_result
= VM_MAP_COPY_NULL
;
12437 return KERN_SUCCESS
;
12441 * Check that the end address doesn't overflow
12443 if (src_addr
+ len
< src_addr
) {
12444 return KERN_INVALID_ADDRESS
;
12447 if (VM_MAP_PAGE_SIZE(src_map
) < PAGE_SIZE
) {
12448 DEBUG4K_SHARE("src_map %p src_addr 0x%llx src_end 0x%llx\n", src_map
, (uint64_t)src_addr
, (uint64_t)(src_addr
+ len
));
12452 * Allocate a header element for the list.
12454 * Use the start and end in the header to
12455 * remember the endpoints prior to rounding.
12458 copy
= vm_map_copy_allocate();
12459 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
12460 copy
->cpy_hdr
.entries_pageable
= vmk_flags
.vmkf_copy_pageable
;
12462 vm_map_store_init(©
->cpy_hdr
);
12467 kr
= vm_map_remap_extract(src_map
,
12471 do_copy
, /* copy */
12477 if (kr
!= KERN_SUCCESS
) {
12478 vm_map_copy_discard(copy
);
12481 assert((*cur_prot
& required_prot
) == required_prot
);
12482 assert((*max_prot
& required_prot
) == required_prot
);
12484 *copy_result
= copy
;
12485 return KERN_SUCCESS
;
12489 * vm_map_copyin_object:
12491 * Create a copy object from an object.
12492 * Our caller donates an object reference.
12496 vm_map_copyin_object(
12497 vm_object_t object
,
12498 vm_object_offset_t offset
, /* offset of region in object */
12499 vm_object_size_t size
, /* size of region in object */
12500 vm_map_copy_t
*copy_result
) /* OUT */
12502 vm_map_copy_t copy
; /* Resulting copy */
12505 * We drop the object into a special copy object
12506 * that contains the object directly.
12509 copy
= vm_map_copy_allocate();
12510 copy
->type
= VM_MAP_COPY_OBJECT
;
12511 copy
->cpy_object
= object
;
12512 copy
->offset
= offset
;
12515 *copy_result
= copy
;
12516 return KERN_SUCCESS
;
12522 vm_map_entry_t old_entry
,
12525 vm_object_t object
;
12526 vm_map_entry_t new_entry
;
12529 * New sharing code. New map entry
12530 * references original object. Internal
12531 * objects use asynchronous copy algorithm for
12532 * future copies. First make sure we have
12533 * the right object. If we need a shadow,
12534 * or someone else already has one, then
12535 * make a new shadow and share it.
12538 object
= VME_OBJECT(old_entry
);
12539 if (old_entry
->is_sub_map
) {
12540 assert(old_entry
->wired_count
== 0);
12541 #ifndef NO_NESTED_PMAP
12542 if (old_entry
->use_pmap
) {
12543 kern_return_t result
;
12545 result
= pmap_nest(new_map
->pmap
,
12546 (VME_SUBMAP(old_entry
))->pmap
,
12547 (addr64_t
)old_entry
->vme_start
,
12548 (uint64_t)(old_entry
->vme_end
- old_entry
->vme_start
));
12550 panic("vm_map_fork_share: pmap_nest failed!");
12553 #endif /* NO_NESTED_PMAP */
12554 } else if (object
== VM_OBJECT_NULL
) {
12555 object
= vm_object_allocate((vm_map_size_t
)(old_entry
->vme_end
-
12556 old_entry
->vme_start
));
12557 VME_OFFSET_SET(old_entry
, 0);
12558 VME_OBJECT_SET(old_entry
, object
);
12559 old_entry
->use_pmap
= TRUE
;
12560 // assert(!old_entry->needs_copy);
12561 } else if (object
->copy_strategy
!=
12562 MEMORY_OBJECT_COPY_SYMMETRIC
) {
12564 * We are already using an asymmetric
12565 * copy, and therefore we already have
12566 * the right object.
12569 assert(!old_entry
->needs_copy
);
12570 } else if (old_entry
->needs_copy
|| /* case 1 */
12571 object
->shadowed
|| /* case 2 */
12572 (!object
->true_share
&& /* case 3 */
12573 !old_entry
->is_shared
&&
12575 (vm_map_size_t
)(old_entry
->vme_end
-
12576 old_entry
->vme_start
)))) {
12578 * We need to create a shadow.
12579 * There are three cases here.
12580 * In the first case, we need to
12581 * complete a deferred symmetrical
12582 * copy that we participated in.
12583 * In the second and third cases,
12584 * we need to create the shadow so
12585 * that changes that we make to the
12586 * object do not interfere with
12587 * any symmetrical copies which
12588 * have occured (case 2) or which
12589 * might occur (case 3).
12591 * The first case is when we had
12592 * deferred shadow object creation
12593 * via the entry->needs_copy mechanism.
12594 * This mechanism only works when
12595 * only one entry points to the source
12596 * object, and we are about to create
12597 * a second entry pointing to the
12598 * same object. The problem is that
12599 * there is no way of mapping from
12600 * an object to the entries pointing
12601 * to it. (Deferred shadow creation
12602 * works with one entry because occurs
12603 * at fault time, and we walk from the
12604 * entry to the object when handling
12607 * The second case is when the object
12608 * to be shared has already been copied
12609 * with a symmetric copy, but we point
12610 * directly to the object without
12611 * needs_copy set in our entry. (This
12612 * can happen because different ranges
12613 * of an object can be pointed to by
12614 * different entries. In particular,
12615 * a single entry pointing to an object
12616 * can be split by a call to vm_inherit,
12617 * which, combined with task_create, can
12618 * result in the different entries
12619 * having different needs_copy values.)
12620 * The shadowed flag in the object allows
12621 * us to detect this case. The problem
12622 * with this case is that if this object
12623 * has or will have shadows, then we
12624 * must not perform an asymmetric copy
12625 * of this object, since such a copy
12626 * allows the object to be changed, which
12627 * will break the previous symmetrical
12628 * copies (which rely upon the object
12629 * not changing). In a sense, the shadowed
12630 * flag says "don't change this object".
12631 * We fix this by creating a shadow
12632 * object for this object, and sharing
12633 * that. This works because we are free
12634 * to change the shadow object (and thus
12635 * to use an asymmetric copy strategy);
12636 * this is also semantically correct,
12637 * since this object is temporary, and
12638 * therefore a copy of the object is
12639 * as good as the object itself. (This
12640 * is not true for permanent objects,
12641 * since the pager needs to see changes,
12642 * which won't happen if the changes
12643 * are made to a copy.)
12645 * The third case is when the object
12646 * to be shared has parts sticking
12647 * outside of the entry we're working
12648 * with, and thus may in the future
12649 * be subject to a symmetrical copy.
12650 * (This is a preemptive version of
12653 VME_OBJECT_SHADOW(old_entry
,
12654 (vm_map_size_t
) (old_entry
->vme_end
-
12655 old_entry
->vme_start
));
12658 * If we're making a shadow for other than
12659 * copy on write reasons, then we have
12660 * to remove write permission.
12663 if (!old_entry
->needs_copy
&&
12664 (old_entry
->protection
& VM_PROT_WRITE
)) {
12667 assert(!pmap_has_prot_policy(old_map
->pmap
, old_entry
->translated_allow_execute
, old_entry
->protection
));
12669 prot
= old_entry
->protection
& ~VM_PROT_WRITE
;
12671 assert(!pmap_has_prot_policy(old_map
->pmap
, old_entry
->translated_allow_execute
, prot
));
12673 if (override_nx(old_map
, VME_ALIAS(old_entry
)) && prot
) {
12674 prot
|= VM_PROT_EXECUTE
;
12678 if (old_map
->mapped_in_other_pmaps
) {
12679 vm_object_pmap_protect(
12680 VME_OBJECT(old_entry
),
12681 VME_OFFSET(old_entry
),
12682 (old_entry
->vme_end
-
12683 old_entry
->vme_start
),
12686 old_entry
->vme_start
,
12689 pmap_protect(old_map
->pmap
,
12690 old_entry
->vme_start
,
12691 old_entry
->vme_end
,
12696 old_entry
->needs_copy
= FALSE
;
12697 object
= VME_OBJECT(old_entry
);
12702 * If object was using a symmetric copy strategy,
12703 * change its copy strategy to the default
12704 * asymmetric copy strategy, which is copy_delay
12705 * in the non-norma case and copy_call in the
12706 * norma case. Bump the reference count for the
12710 if (old_entry
->is_sub_map
) {
12711 vm_map_lock(VME_SUBMAP(old_entry
));
12712 vm_map_reference(VME_SUBMAP(old_entry
));
12713 vm_map_unlock(VME_SUBMAP(old_entry
));
12715 vm_object_lock(object
);
12716 vm_object_reference_locked(object
);
12717 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
) {
12718 object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
12720 vm_object_unlock(object
);
12724 * Clone the entry, using object ref from above.
12725 * Mark both entries as shared.
12728 new_entry
= vm_map_entry_create(new_map
, FALSE
); /* Never the kernel
12729 * map or descendants */
12730 vm_map_entry_copy(old_map
, new_entry
, old_entry
);
12731 old_entry
->is_shared
= TRUE
;
12732 new_entry
->is_shared
= TRUE
;
12735 * We're dealing with a shared mapping, so the resulting mapping
12736 * should inherit some of the original mapping's accounting settings.
12737 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12738 * "use_pmap" should stay the same as before (if it hasn't been reset
12739 * to TRUE when we cleared "iokit_acct").
12741 assert(!new_entry
->iokit_acct
);
12744 * If old entry's inheritence is VM_INHERIT_NONE,
12745 * the new entry is for corpse fork, remove the
12746 * write permission from the new entry.
12748 if (old_entry
->inheritance
== VM_INHERIT_NONE
) {
12749 new_entry
->protection
&= ~VM_PROT_WRITE
;
12750 new_entry
->max_protection
&= ~VM_PROT_WRITE
;
12754 * Insert the entry into the new map -- we
12755 * know we're inserting at the end of the new
12759 vm_map_store_entry_link(new_map
, vm_map_last_entry(new_map
), new_entry
,
12760 VM_MAP_KERNEL_FLAGS_NONE
);
12763 * Update the physical map
12766 if (old_entry
->is_sub_map
) {
12767 /* Bill Angell pmap support goes here */
12769 pmap_copy(new_map
->pmap
, old_map
->pmap
, new_entry
->vme_start
,
12770 old_entry
->vme_end
- old_entry
->vme_start
,
12771 old_entry
->vme_start
);
12778 vm_map_entry_t
*old_entry_p
,
12780 int vm_map_copyin_flags
)
12782 vm_map_entry_t old_entry
= *old_entry_p
;
12783 vm_map_size_t entry_size
= old_entry
->vme_end
- old_entry
->vme_start
;
12784 vm_map_offset_t start
= old_entry
->vme_start
;
12785 vm_map_copy_t copy
;
12786 vm_map_entry_t last
= vm_map_last_entry(new_map
);
12788 vm_map_unlock(old_map
);
12790 * Use maxprot version of copyin because we
12791 * care about whether this memory can ever
12792 * be accessed, not just whether it's accessible
12795 vm_map_copyin_flags
|= VM_MAP_COPYIN_USE_MAXPROT
;
12796 if (vm_map_copyin_internal(old_map
, start
, entry_size
,
12797 vm_map_copyin_flags
, ©
)
12800 * The map might have changed while it
12801 * was unlocked, check it again. Skip
12802 * any blank space or permanently
12803 * unreadable region.
12805 vm_map_lock(old_map
);
12806 if (!vm_map_lookup_entry(old_map
, start
, &last
) ||
12807 (last
->max_protection
& VM_PROT_READ
) == VM_PROT_NONE
) {
12808 last
= last
->vme_next
;
12810 *old_entry_p
= last
;
12813 * XXX For some error returns, want to
12814 * XXX skip to the next element. Note
12815 * that INVALID_ADDRESS and
12816 * PROTECTION_FAILURE are handled above.
12823 * Assert that the vm_map_copy is coming from the right
12824 * zone and hasn't been forged
12826 vm_map_copy_require(copy
);
12829 * Insert the copy into the new map
12831 vm_map_copy_insert(new_map
, last
, copy
);
12834 * Pick up the traversal at the end of
12835 * the copied region.
12838 vm_map_lock(old_map
);
12839 start
+= entry_size
;
12840 if (!vm_map_lookup_entry(old_map
, start
, &last
)) {
12841 last
= last
->vme_next
;
12843 if (last
->vme_start
== start
) {
12845 * No need to clip here and we don't
12846 * want to cause any unnecessary
12850 vm_map_clip_start(old_map
, last
, start
);
12853 *old_entry_p
= last
;
12861 * Create and return a new map based on the old
12862 * map, according to the inheritance values on the
12863 * regions in that map and the options.
12865 * The source map must not be locked.
12875 vm_map_entry_t old_entry
;
12876 vm_map_size_t new_size
= 0, entry_size
;
12877 vm_map_entry_t new_entry
;
12878 boolean_t src_needs_copy
;
12879 boolean_t new_entry_needs_copy
;
12880 boolean_t pmap_is64bit
;
12881 int vm_map_copyin_flags
;
12882 vm_inherit_t old_entry_inheritance
;
12883 int map_create_options
;
12884 kern_return_t footprint_collect_kr
;
12886 if (options
& ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE
|
12887 VM_MAP_FORK_PRESERVE_PURGEABLE
|
12888 VM_MAP_FORK_CORPSE_FOOTPRINT
)) {
12889 /* unsupported option */
12890 return VM_MAP_NULL
;
12894 #if defined(__i386__) || defined(__x86_64__)
12895 old_map
->pmap
->pm_task_map
!= TASK_MAP_32BIT
;
12896 #elif defined(__arm64__)
12897 old_map
->pmap
->max
== MACH_VM_MAX_ADDRESS
;
12898 #elif defined(__arm__)
12901 #error Unknown architecture.
12904 unsigned int pmap_flags
= 0;
12905 pmap_flags
|= pmap_is64bit
? PMAP_CREATE_64BIT
: 0;
12906 #if defined(HAS_APPLE_PAC)
12907 pmap_flags
|= old_map
->pmap
->disable_jop
? PMAP_CREATE_DISABLE_JOP
: 0;
12909 #if PMAP_CREATE_FORCE_4K_PAGES
12910 if (VM_MAP_PAGE_SIZE(old_map
) == FOURK_PAGE_SIZE
&&
12911 PAGE_SIZE
!= FOURK_PAGE_SIZE
) {
12912 pmap_flags
|= PMAP_CREATE_FORCE_4K_PAGES
;
12914 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
12915 new_pmap
= pmap_create_options(ledger
, (vm_map_size_t
) 0, pmap_flags
);
12917 vm_map_reference_swap(old_map
);
12918 vm_map_lock(old_map
);
12920 map_create_options
= 0;
12921 if (old_map
->hdr
.entries_pageable
) {
12922 map_create_options
|= VM_MAP_CREATE_PAGEABLE
;
12924 if (options
& VM_MAP_FORK_CORPSE_FOOTPRINT
) {
12925 map_create_options
|= VM_MAP_CREATE_CORPSE_FOOTPRINT
;
12926 footprint_collect_kr
= KERN_SUCCESS
;
12928 new_map
= vm_map_create_options(new_pmap
,
12929 old_map
->min_offset
,
12930 old_map
->max_offset
,
12931 map_create_options
);
12932 /* inherit cs_enforcement */
12933 vm_map_cs_enforcement_set(new_map
, old_map
->cs_enforcement
);
12934 vm_map_lock(new_map
);
12935 vm_commit_pagezero_status(new_map
);
12936 /* inherit the parent map's page size */
12937 vm_map_set_page_shift(new_map
, VM_MAP_PAGE_SHIFT(old_map
));
12939 old_entry
= vm_map_first_entry(old_map
);
12940 old_entry
!= vm_map_to_entry(old_map
);
12942 entry_size
= old_entry
->vme_end
- old_entry
->vme_start
;
12944 old_entry_inheritance
= old_entry
->inheritance
;
12946 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12947 * share VM_INHERIT_NONE entries that are not backed by a
12950 if (old_entry_inheritance
== VM_INHERIT_NONE
&&
12951 (options
& VM_MAP_FORK_SHARE_IF_INHERIT_NONE
) &&
12952 (old_entry
->protection
& VM_PROT_READ
) &&
12953 !(!old_entry
->is_sub_map
&&
12954 VME_OBJECT(old_entry
) != NULL
&&
12955 VME_OBJECT(old_entry
)->pager
!= NULL
&&
12956 is_device_pager_ops(
12957 VME_OBJECT(old_entry
)->pager
->mo_pager_ops
))) {
12958 old_entry_inheritance
= VM_INHERIT_SHARE
;
12961 if (old_entry_inheritance
!= VM_INHERIT_NONE
&&
12962 (options
& VM_MAP_FORK_CORPSE_FOOTPRINT
) &&
12963 footprint_collect_kr
== KERN_SUCCESS
) {
12965 * The corpse won't have old_map->pmap to query
12966 * footprint information, so collect that data now
12967 * and store it in new_map->vmmap_corpse_footprint
12968 * for later autopsy.
12970 footprint_collect_kr
=
12971 vm_map_corpse_footprint_collect(old_map
,
12976 switch (old_entry_inheritance
) {
12977 case VM_INHERIT_NONE
:
12980 case VM_INHERIT_SHARE
:
12981 vm_map_fork_share(old_map
, old_entry
, new_map
);
12982 new_size
+= entry_size
;
12985 case VM_INHERIT_COPY
:
12988 * Inline the copy_quickly case;
12989 * upon failure, fall back on call
12990 * to vm_map_fork_copy.
12993 if (old_entry
->is_sub_map
) {
12996 if ((old_entry
->wired_count
!= 0) ||
12997 ((VME_OBJECT(old_entry
) != NULL
) &&
12998 (VME_OBJECT(old_entry
)->true_share
))) {
12999 goto slow_vm_map_fork_copy
;
13002 new_entry
= vm_map_entry_create(new_map
, FALSE
); /* never the kernel map or descendants */
13003 vm_map_entry_copy(old_map
, new_entry
, old_entry
);
13005 if (new_entry
->used_for_jit
== TRUE
&& new_map
->jit_entry_exists
== FALSE
) {
13006 new_map
->jit_entry_exists
= TRUE
;
13009 if (new_entry
->is_sub_map
) {
13010 /* clear address space specifics */
13011 new_entry
->use_pmap
= FALSE
;
13014 * We're dealing with a copy-on-write operation,
13015 * so the resulting mapping should not inherit
13016 * the original mapping's accounting settings.
13017 * "iokit_acct" should have been cleared in
13018 * vm_map_entry_copy().
13019 * "use_pmap" should be reset to its default
13020 * (TRUE) so that the new mapping gets
13021 * accounted for in the task's memory footprint.
13023 assert(!new_entry
->iokit_acct
);
13024 new_entry
->use_pmap
= TRUE
;
13027 if (!vm_object_copy_quickly(
13028 VME_OBJECT_PTR(new_entry
),
13029 VME_OFFSET(old_entry
),
13030 (old_entry
->vme_end
-
13031 old_entry
->vme_start
),
13033 &new_entry_needs_copy
)) {
13034 vm_map_entry_dispose(new_map
, new_entry
);
13035 goto slow_vm_map_fork_copy
;
13039 * Handle copy-on-write obligations
13042 if (src_needs_copy
&& !old_entry
->needs_copy
) {
13045 assert(!pmap_has_prot_policy(old_map
->pmap
, old_entry
->translated_allow_execute
, old_entry
->protection
));
13047 prot
= old_entry
->protection
& ~VM_PROT_WRITE
;
13049 if (override_nx(old_map
, VME_ALIAS(old_entry
))
13051 prot
|= VM_PROT_EXECUTE
;
13054 assert(!pmap_has_prot_policy(old_map
->pmap
, old_entry
->translated_allow_execute
, prot
));
13056 vm_object_pmap_protect(
13057 VME_OBJECT(old_entry
),
13058 VME_OFFSET(old_entry
),
13059 (old_entry
->vme_end
-
13060 old_entry
->vme_start
),
13061 ((old_entry
->is_shared
13062 || old_map
->mapped_in_other_pmaps
)
13065 VM_MAP_PAGE_SIZE(old_map
),
13066 old_entry
->vme_start
,
13069 assert(old_entry
->wired_count
== 0);
13070 old_entry
->needs_copy
= TRUE
;
13072 new_entry
->needs_copy
= new_entry_needs_copy
;
13075 * Insert the entry at the end
13079 vm_map_store_entry_link(new_map
,
13080 vm_map_last_entry(new_map
),
13082 VM_MAP_KERNEL_FLAGS_NONE
);
13083 new_size
+= entry_size
;
13086 slow_vm_map_fork_copy
:
13087 vm_map_copyin_flags
= 0;
13088 if (options
& VM_MAP_FORK_PRESERVE_PURGEABLE
) {
13089 vm_map_copyin_flags
|=
13090 VM_MAP_COPYIN_PRESERVE_PURGEABLE
;
13092 if (vm_map_fork_copy(old_map
,
13095 vm_map_copyin_flags
)) {
13096 new_size
+= entry_size
;
13100 old_entry
= old_entry
->vme_next
;
13103 #if defined(__arm64__)
13104 pmap_insert_sharedpage(new_map
->pmap
);
13105 #endif /* __arm64__ */
13107 new_map
->size
= new_size
;
13109 if (options
& VM_MAP_FORK_CORPSE_FOOTPRINT
) {
13110 vm_map_corpse_footprint_collect_done(new_map
);
13113 /* Propagate JIT entitlement for the pmap layer. */
13114 if (pmap_get_jit_entitled(old_map
->pmap
)) {
13115 /* Tell the pmap that it supports JIT. */
13116 pmap_set_jit_entitled(new_map
->pmap
);
13119 vm_map_unlock(new_map
);
13120 vm_map_unlock(old_map
);
13121 vm_map_deallocate(old_map
);
13129 * Setup the "new_map" with the proper execution environment according
13130 * to the type of executable (platform, 64bit, chroot environment).
13131 * Map the comm page and shared region, etc...
13140 cpu_subtype_t cpu_subtype
,
13143 SHARED_REGION_TRACE_DEBUG(
13144 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
13145 (void *)VM_KERNEL_ADDRPERM(current_task()),
13146 (void *)VM_KERNEL_ADDRPERM(new_map
),
13147 (void *)VM_KERNEL_ADDRPERM(task
),
13148 (void *)VM_KERNEL_ADDRPERM(fsroot
),
13151 (void) vm_commpage_enter(new_map
, task
, is64bit
);
13153 (void) vm_shared_region_enter(new_map
, task
, is64bit
, fsroot
, cpu
, cpu_subtype
, reslide
);
13155 SHARED_REGION_TRACE_DEBUG(
13156 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
13157 (void *)VM_KERNEL_ADDRPERM(current_task()),
13158 (void *)VM_KERNEL_ADDRPERM(new_map
),
13159 (void *)VM_KERNEL_ADDRPERM(task
),
13160 (void *)VM_KERNEL_ADDRPERM(fsroot
),
13165 * Some devices have region(s) of memory that shouldn't get allocated by
13166 * user processes. The following code creates dummy vm_map_entry_t's for each
13167 * of the regions that needs to be reserved to prevent any allocations in
13170 kern_return_t kr
= KERN_FAILURE
;
13171 vm_map_kernel_flags_t vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
13172 vmk_flags
.vmkf_permanent
= TRUE
;
13173 vmk_flags
.vmkf_beyond_max
= TRUE
;
13175 struct vm_reserved_region
*regions
= NULL
;
13176 size_t num_regions
= ml_get_vm_reserved_regions(is64bit
, ®ions
);
13177 assert((num_regions
== 0) || (num_regions
> 0 && regions
!= NULL
));
13179 for (size_t i
= 0; i
< num_regions
; ++i
) {
13182 ®ions
[i
].vmrr_addr
,
13183 regions
[i
].vmrr_size
,
13184 (vm_map_offset_t
)0,
13187 VM_KERN_MEMORY_NONE
,
13189 (vm_object_offset_t
)0,
13195 if (kr
!= KERN_SUCCESS
) {
13196 panic("Failed to reserve %s region in user map %p %d", regions
[i
].vmrr_name
, new_map
, kr
);
13200 new_map
->reserved_regions
= (num_regions
? TRUE
: FALSE
);
13202 return KERN_SUCCESS
;
13206 * vm_map_lookup_locked:
13208 * Finds the VM object, offset, and
13209 * protection for a given virtual address in the
13210 * specified map, assuming a page fault of the
13213 * Returns the (object, offset, protection) for
13214 * this address, whether it is wired down, and whether
13215 * this map has the only reference to the data in question.
13216 * In order to later verify this lookup, a "version"
13218 * If contended != NULL, *contended will be set to
13219 * true iff the thread had to spin or block to acquire
13220 * an exclusive lock.
13222 * The map MUST be locked by the caller and WILL be
13223 * locked on exit. In order to guarantee the
13224 * existence of the returned object, it is returned
13227 * If a lookup is requested with "write protection"
13228 * specified, the map may be changed to perform virtual
13229 * copying operations, although the data referenced will
13233 vm_map_lookup_locked(
13234 vm_map_t
*var_map
, /* IN/OUT */
13235 vm_map_offset_t vaddr
,
13236 vm_prot_t fault_type
,
13237 int object_lock_type
,
13238 vm_map_version_t
*out_version
, /* OUT */
13239 vm_object_t
*object
, /* OUT */
13240 vm_object_offset_t
*offset
, /* OUT */
13241 vm_prot_t
*out_prot
, /* OUT */
13242 boolean_t
*wired
, /* OUT */
13243 vm_object_fault_info_t fault_info
, /* OUT */
13244 vm_map_t
*real_map
, /* OUT */
13245 bool *contended
) /* OUT */
13247 vm_map_entry_t entry
;
13248 vm_map_t map
= *var_map
;
13249 vm_map_t old_map
= *var_map
;
13250 vm_map_t cow_sub_map_parent
= VM_MAP_NULL
;
13251 vm_map_offset_t cow_parent_vaddr
= 0;
13252 vm_map_offset_t old_start
= 0;
13253 vm_map_offset_t old_end
= 0;
13255 boolean_t mask_protections
;
13256 boolean_t force_copy
;
13257 boolean_t no_force_copy_if_executable
;
13258 vm_prot_t original_fault_type
;
13259 vm_map_size_t fault_page_mask
;
13262 * VM_PROT_MASK means that the caller wants us to use "fault_type"
13263 * as a mask against the mapping's actual protections, not as an
13266 mask_protections
= (fault_type
& VM_PROT_IS_MASK
) ? TRUE
: FALSE
;
13267 force_copy
= (fault_type
& VM_PROT_COPY
) ? TRUE
: FALSE
;
13268 no_force_copy_if_executable
= (fault_type
& VM_PROT_COPY_FAIL_IF_EXECUTABLE
) ? TRUE
: FALSE
;
13269 fault_type
&= VM_PROT_ALL
;
13270 original_fault_type
= fault_type
;
13272 *contended
= false;
13277 fault_page_mask
= MIN(VM_MAP_PAGE_MASK(map
), PAGE_MASK
);
13278 vaddr
= VM_MAP_TRUNC_PAGE(vaddr
, fault_page_mask
);
13281 fault_type
= original_fault_type
;
13284 * If the map has an interesting hint, try it before calling
13285 * full blown lookup routine.
13289 if ((entry
== vm_map_to_entry(map
)) ||
13290 (vaddr
< entry
->vme_start
) || (vaddr
>= entry
->vme_end
)) {
13291 vm_map_entry_t tmp_entry
;
13294 * Entry was either not a valid hint, or the vaddr
13295 * was not contained in the entry, so do a full lookup.
13297 if (!vm_map_lookup_entry(map
, vaddr
, &tmp_entry
)) {
13298 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
13299 vm_map_unlock(cow_sub_map_parent
);
13301 if ((*real_map
!= map
)
13302 && (*real_map
!= cow_sub_map_parent
)) {
13303 vm_map_unlock(*real_map
);
13305 return KERN_INVALID_ADDRESS
;
13310 if (map
== old_map
) {
13311 old_start
= entry
->vme_start
;
13312 old_end
= entry
->vme_end
;
13316 * Handle submaps. Drop lock on upper map, submap is
13321 if (entry
->is_sub_map
) {
13322 vm_map_offset_t local_vaddr
;
13323 vm_map_offset_t end_delta
;
13324 vm_map_offset_t start_delta
;
13325 vm_map_entry_t submap_entry
, saved_submap_entry
;
13326 vm_object_offset_t submap_entry_offset
;
13327 vm_object_size_t submap_entry_size
;
13328 vm_prot_t subentry_protection
;
13329 vm_prot_t subentry_max_protection
;
13330 boolean_t subentry_no_copy_on_read
;
13331 boolean_t mapped_needs_copy
= FALSE
;
13332 vm_map_version_t version
;
13334 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry
)) >= VM_MAP_PAGE_SHIFT(map
),
13335 "map %p (%d) entry %p submap %p (%d)\n",
13336 map
, VM_MAP_PAGE_SHIFT(map
), entry
,
13337 VME_SUBMAP(entry
), VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry
)));
13339 local_vaddr
= vaddr
;
13341 if ((entry
->use_pmap
&&
13342 !((fault_type
& VM_PROT_WRITE
) ||
13344 /* if real_map equals map we unlock below */
13345 if ((*real_map
!= map
) &&
13346 (*real_map
!= cow_sub_map_parent
)) {
13347 vm_map_unlock(*real_map
);
13349 *real_map
= VME_SUBMAP(entry
);
13352 if (entry
->needs_copy
&&
13353 ((fault_type
& VM_PROT_WRITE
) ||
13355 if (!mapped_needs_copy
) {
13356 if (vm_map_lock_read_to_write(map
)) {
13357 vm_map_lock_read(map
);
13361 vm_map_lock_read(VME_SUBMAP(entry
));
13362 *var_map
= VME_SUBMAP(entry
);
13363 cow_sub_map_parent
= map
;
13364 /* reset base to map before cow object */
13365 /* this is the map which will accept */
13366 /* the new cow object */
13367 old_start
= entry
->vme_start
;
13368 old_end
= entry
->vme_end
;
13369 cow_parent_vaddr
= vaddr
;
13370 mapped_needs_copy
= TRUE
;
13372 vm_map_lock_read(VME_SUBMAP(entry
));
13373 *var_map
= VME_SUBMAP(entry
);
13374 if ((cow_sub_map_parent
!= map
) &&
13375 (*real_map
!= map
)) {
13376 vm_map_unlock(map
);
13380 vm_map_lock_read(VME_SUBMAP(entry
));
13381 *var_map
= VME_SUBMAP(entry
);
13382 /* leave map locked if it is a target */
13383 /* cow sub_map above otherwise, just */
13384 /* follow the maps down to the object */
13385 /* here we unlock knowing we are not */
13386 /* revisiting the map. */
13387 if ((*real_map
!= map
) && (map
!= cow_sub_map_parent
)) {
13388 vm_map_unlock_read(map
);
13394 /* calculate the offset in the submap for vaddr */
13395 local_vaddr
= (local_vaddr
- entry
->vme_start
) + VME_OFFSET(entry
);
13396 assertf(VM_MAP_PAGE_ALIGNED(local_vaddr
, fault_page_mask
),
13397 "local_vaddr 0x%llx entry->vme_start 0x%llx fault_page_mask 0x%llx\n",
13398 (uint64_t)local_vaddr
, (uint64_t)entry
->vme_start
, (uint64_t)fault_page_mask
);
13401 if (!vm_map_lookup_entry(map
, local_vaddr
, &submap_entry
)) {
13402 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
13403 vm_map_unlock(cow_sub_map_parent
);
13405 if ((*real_map
!= map
)
13406 && (*real_map
!= cow_sub_map_parent
)) {
13407 vm_map_unlock(*real_map
);
13410 return KERN_INVALID_ADDRESS
;
13413 /* find the attenuated shadow of the underlying object */
13414 /* on our target map */
13416 /* in english the submap object may extend beyond the */
13417 /* region mapped by the entry or, may only fill a portion */
13418 /* of it. For our purposes, we only care if the object */
13419 /* doesn't fill. In this case the area which will */
13420 /* ultimately be clipped in the top map will only need */
13421 /* to be as big as the portion of the underlying entry */
13422 /* which is mapped */
13423 start_delta
= submap_entry
->vme_start
> VME_OFFSET(entry
) ?
13424 submap_entry
->vme_start
- VME_OFFSET(entry
) : 0;
13427 (VME_OFFSET(entry
) + start_delta
+ (old_end
- old_start
)) <=
13428 submap_entry
->vme_end
?
13429 0 : (VME_OFFSET(entry
) +
13430 (old_end
- old_start
))
13431 - submap_entry
->vme_end
;
13433 old_start
+= start_delta
;
13434 old_end
-= end_delta
;
13436 if (submap_entry
->is_sub_map
) {
13437 entry
= submap_entry
;
13438 vaddr
= local_vaddr
;
13439 goto submap_recurse
;
13442 if (((fault_type
& VM_PROT_WRITE
) ||
13444 && cow_sub_map_parent
) {
13445 vm_object_t sub_object
, copy_object
;
13446 vm_object_offset_t copy_offset
;
13447 vm_map_offset_t local_start
;
13448 vm_map_offset_t local_end
;
13449 boolean_t copied_slowly
= FALSE
;
13450 vm_object_offset_t copied_slowly_phys_offset
= 0;
13451 kern_return_t kr
= KERN_SUCCESS
;
13453 if (vm_map_lock_read_to_write(map
)) {
13454 vm_map_lock_read(map
);
13455 old_start
-= start_delta
;
13456 old_end
+= end_delta
;
13461 sub_object
= VME_OBJECT(submap_entry
);
13462 if (sub_object
== VM_OBJECT_NULL
) {
13464 vm_object_allocate(
13466 (submap_entry
->vme_end
-
13467 submap_entry
->vme_start
));
13468 VME_OBJECT_SET(submap_entry
, sub_object
);
13469 VME_OFFSET_SET(submap_entry
, 0);
13470 assert(!submap_entry
->is_sub_map
);
13471 assert(submap_entry
->use_pmap
);
13473 local_start
= local_vaddr
-
13474 (cow_parent_vaddr
- old_start
);
13475 local_end
= local_vaddr
+
13476 (old_end
- cow_parent_vaddr
);
13477 vm_map_clip_start(map
, submap_entry
, local_start
);
13478 vm_map_clip_end(map
, submap_entry
, local_end
);
13479 if (submap_entry
->is_sub_map
) {
13480 /* unnesting was done when clipping */
13481 assert(!submap_entry
->use_pmap
);
13484 /* This is the COW case, lets connect */
13485 /* an entry in our space to the underlying */
13486 /* object in the submap, bypassing the */
13489 if (submap_entry
->wired_count
!= 0 ||
13490 (sub_object
->copy_strategy
!=
13491 MEMORY_OBJECT_COPY_SYMMETRIC
)) {
13492 if ((submap_entry
->protection
& VM_PROT_EXECUTE
) &&
13493 no_force_copy_if_executable
) {
13494 // printf("FBDP map %p entry %p start 0x%llx end 0x%llx wired %d strat %d\n", map, submap_entry, (uint64_t)local_start, (uint64_t)local_end, submap_entry->wired_count, sub_object->copy_strategy);
13495 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
13496 vm_map_unlock(cow_sub_map_parent
);
13498 if ((*real_map
!= map
)
13499 && (*real_map
!= cow_sub_map_parent
)) {
13500 vm_map_unlock(*real_map
);
13503 vm_map_lock_write_to_read(map
);
13504 kr
= KERN_PROTECTION_FAILURE
;
13505 DTRACE_VM4(submap_no_copy_executable
,
13507 vm_object_offset_t
, submap_entry_offset
,
13508 vm_object_size_t
, submap_entry_size
,
13513 vm_object_reference(sub_object
);
13515 assertf(VM_MAP_PAGE_ALIGNED(VME_OFFSET(submap_entry
), VM_MAP_PAGE_MASK(map
)),
13516 "submap_entry %p offset 0x%llx\n",
13517 submap_entry
, VME_OFFSET(submap_entry
));
13518 submap_entry_offset
= VME_OFFSET(submap_entry
);
13519 submap_entry_size
= submap_entry
->vme_end
- submap_entry
->vme_start
;
13521 DTRACE_VM6(submap_copy_slowly
,
13522 vm_map_t
, cow_sub_map_parent
,
13523 vm_map_offset_t
, vaddr
,
13525 vm_object_size_t
, submap_entry_size
,
13526 int, submap_entry
->wired_count
,
13527 int, sub_object
->copy_strategy
);
13529 saved_submap_entry
= submap_entry
;
13530 version
.main_timestamp
= map
->timestamp
;
13531 vm_map_unlock(map
); /* Increments timestamp by 1 */
13532 submap_entry
= VM_MAP_ENTRY_NULL
;
13534 vm_object_lock(sub_object
);
13535 kr
= vm_object_copy_slowly(sub_object
,
13536 submap_entry_offset
,
13540 copied_slowly
= TRUE
;
13541 /* 4k: account for extra offset in physical page */
13542 copied_slowly_phys_offset
= submap_entry_offset
- vm_object_trunc_page(submap_entry_offset
);
13543 vm_object_deallocate(sub_object
);
13547 if (kr
!= KERN_SUCCESS
&&
13548 kr
!= KERN_MEMORY_RESTART_COPY
) {
13549 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
13550 vm_map_unlock(cow_sub_map_parent
);
13552 if ((*real_map
!= map
)
13553 && (*real_map
!= cow_sub_map_parent
)) {
13554 vm_map_unlock(*real_map
);
13557 vm_object_deallocate(copy_object
);
13558 copy_object
= VM_OBJECT_NULL
;
13559 vm_map_lock_write_to_read(map
);
13560 DTRACE_VM4(submap_copy_slowly
,
13561 vm_object_t
, sub_object
,
13562 vm_object_offset_t
, submap_entry_offset
,
13563 vm_object_size_t
, submap_entry_size
,
13568 if ((kr
== KERN_SUCCESS
) &&
13569 (version
.main_timestamp
+ 1) == map
->timestamp
) {
13570 submap_entry
= saved_submap_entry
;
13572 saved_submap_entry
= NULL
;
13573 old_start
-= start_delta
;
13574 old_end
+= end_delta
;
13575 vm_object_deallocate(copy_object
);
13576 copy_object
= VM_OBJECT_NULL
;
13577 vm_map_lock_write_to_read(map
);
13581 /* set up shadow object */
13582 copy_object
= sub_object
;
13583 vm_object_lock(sub_object
);
13584 vm_object_reference_locked(sub_object
);
13585 sub_object
->shadowed
= TRUE
;
13586 vm_object_unlock(sub_object
);
13588 assert(submap_entry
->wired_count
== 0);
13589 submap_entry
->needs_copy
= TRUE
;
13591 prot
= submap_entry
->protection
;
13592 assert(!pmap_has_prot_policy(map
->pmap
, submap_entry
->translated_allow_execute
, prot
));
13593 prot
= prot
& ~VM_PROT_WRITE
;
13594 assert(!pmap_has_prot_policy(map
->pmap
, submap_entry
->translated_allow_execute
, prot
));
13596 if (override_nx(old_map
,
13597 VME_ALIAS(submap_entry
))
13599 prot
|= VM_PROT_EXECUTE
;
13602 vm_object_pmap_protect(
13604 VME_OFFSET(submap_entry
),
13605 submap_entry
->vme_end
-
13606 submap_entry
->vme_start
,
13607 (submap_entry
->is_shared
13608 || map
->mapped_in_other_pmaps
) ?
13609 PMAP_NULL
: map
->pmap
,
13610 VM_MAP_PAGE_SIZE(map
),
13611 submap_entry
->vme_start
,
13616 * Adjust the fault offset to the submap entry.
13618 copy_offset
= (local_vaddr
-
13619 submap_entry
->vme_start
+
13620 VME_OFFSET(submap_entry
));
13622 /* This works diffently than the */
13623 /* normal submap case. We go back */
13624 /* to the parent of the cow map and*/
13625 /* clip out the target portion of */
13626 /* the sub_map, substituting the */
13627 /* new copy object, */
13629 subentry_protection
= submap_entry
->protection
;
13630 subentry_max_protection
= submap_entry
->max_protection
;
13631 subentry_no_copy_on_read
= submap_entry
->vme_no_copy_on_read
;
13632 vm_map_unlock(map
);
13633 submap_entry
= NULL
; /* not valid after map unlock */
13635 local_start
= old_start
;
13636 local_end
= old_end
;
13637 map
= cow_sub_map_parent
;
13638 *var_map
= cow_sub_map_parent
;
13639 vaddr
= cow_parent_vaddr
;
13640 cow_sub_map_parent
= NULL
;
13642 if (!vm_map_lookup_entry(map
,
13644 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
13645 vm_map_unlock(cow_sub_map_parent
);
13647 if ((*real_map
!= map
)
13648 && (*real_map
!= cow_sub_map_parent
)) {
13649 vm_map_unlock(*real_map
);
13652 vm_object_deallocate(
13654 copy_object
= VM_OBJECT_NULL
;
13655 vm_map_lock_write_to_read(map
);
13656 DTRACE_VM4(submap_lookup_post_unlock
,
13657 uint64_t, (uint64_t)entry
->vme_start
,
13658 uint64_t, (uint64_t)entry
->vme_end
,
13659 vm_map_offset_t
, vaddr
,
13660 int, copied_slowly
);
13661 return KERN_INVALID_ADDRESS
;
13664 /* clip out the portion of space */
13665 /* mapped by the sub map which */
13666 /* corresponds to the underlying */
13670 * Clip (and unnest) the smallest nested chunk
13671 * possible around the faulting address...
13673 local_start
= vaddr
& ~(pmap_shared_region_size_min(map
->pmap
) - 1);
13674 local_end
= local_start
+ pmap_shared_region_size_min(map
->pmap
);
13676 * ... but don't go beyond the "old_start" to "old_end"
13677 * range, to avoid spanning over another VM region
13678 * with a possibly different VM object and/or offset.
13680 if (local_start
< old_start
) {
13681 local_start
= old_start
;
13683 if (local_end
> old_end
) {
13684 local_end
= old_end
;
13687 * Adjust copy_offset to the start of the range.
13689 copy_offset
-= (vaddr
- local_start
);
13691 vm_map_clip_start(map
, entry
, local_start
);
13692 vm_map_clip_end(map
, entry
, local_end
);
13693 if (entry
->is_sub_map
) {
13694 /* unnesting was done when clipping */
13695 assert(!entry
->use_pmap
);
13698 /* substitute copy object for */
13699 /* shared map entry */
13700 vm_map_deallocate(VME_SUBMAP(entry
));
13701 assert(!entry
->iokit_acct
);
13702 entry
->is_sub_map
= FALSE
;
13703 entry
->use_pmap
= TRUE
;
13704 VME_OBJECT_SET(entry
, copy_object
);
13706 /* propagate the submap entry's protections */
13707 if (entry
->protection
!= VM_PROT_READ
) {
13709 * Someone has already altered the top entry's
13710 * protections via vm_protect(VM_PROT_COPY).
13711 * Respect these new values and ignore the
13712 * submap entry's protections.
13716 * Regular copy-on-write: propagate the submap
13717 * entry's protections to the top map entry.
13719 entry
->protection
|= subentry_protection
;
13721 entry
->max_protection
|= subentry_max_protection
;
13722 /* propagate no_copy_on_read */
13723 entry
->vme_no_copy_on_read
= subentry_no_copy_on_read
;
13725 if ((entry
->protection
& VM_PROT_WRITE
) &&
13726 (entry
->protection
& VM_PROT_EXECUTE
) &&
13727 #if XNU_TARGET_OS_OSX
13728 map
->pmap
!= kernel_pmap
&&
13729 (vm_map_cs_enforcement(map
)
13731 || !VM_MAP_IS_EXOTIC(map
)
13732 #endif /* __arm64__ */
13734 #endif /* XNU_TARGET_OS_OSX */
13735 !(entry
->used_for_jit
) &&
13736 VM_MAP_POLICY_WX_STRIP_X(map
)) {
13738 uint64_t, (uint64_t)entry
->vme_start
,
13739 uint64_t, (uint64_t)entry
->vme_end
,
13740 vm_prot_t
, entry
->protection
);
13741 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13743 (current_task()->bsd_info
13744 ? proc_name_address(current_task()->bsd_info
)
13747 entry
->protection
&= ~VM_PROT_EXECUTE
;
13750 if (copied_slowly
) {
13751 VME_OFFSET_SET(entry
, local_start
- old_start
+ copied_slowly_phys_offset
);
13752 entry
->needs_copy
= FALSE
;
13753 entry
->is_shared
= FALSE
;
13755 VME_OFFSET_SET(entry
, copy_offset
);
13756 assert(entry
->wired_count
== 0);
13757 entry
->needs_copy
= TRUE
;
13758 if (entry
->inheritance
== VM_INHERIT_SHARE
) {
13759 entry
->inheritance
= VM_INHERIT_COPY
;
13761 if (map
!= old_map
) {
13762 entry
->is_shared
= TRUE
;
13765 if (entry
->inheritance
== VM_INHERIT_SHARE
) {
13766 entry
->inheritance
= VM_INHERIT_COPY
;
13769 vm_map_lock_write_to_read(map
);
13771 if ((cow_sub_map_parent
)
13772 && (cow_sub_map_parent
!= *real_map
)
13773 && (cow_sub_map_parent
!= map
)) {
13774 vm_map_unlock(cow_sub_map_parent
);
13776 entry
= submap_entry
;
13777 vaddr
= local_vaddr
;
13782 * Check whether this task is allowed to have
13786 prot
= entry
->protection
;
13788 if (override_nx(old_map
, VME_ALIAS(entry
)) && prot
) {
13790 * HACK -- if not a stack, then allow execution
13792 prot
|= VM_PROT_EXECUTE
;
13795 if (mask_protections
) {
13796 fault_type
&= prot
;
13797 if (fault_type
== VM_PROT_NONE
) {
13798 goto protection_failure
;
13801 if (((fault_type
& prot
) != fault_type
)
13803 /* prefetch abort in execute-only page */
13804 && !(prot
== VM_PROT_EXECUTE
&& fault_type
== (VM_PROT_READ
| VM_PROT_EXECUTE
))
13807 protection_failure
:
13808 if (*real_map
!= map
) {
13809 vm_map_unlock(*real_map
);
13813 if ((fault_type
& VM_PROT_EXECUTE
) && prot
) {
13814 log_stack_execution_failure((addr64_t
)vaddr
, prot
);
13817 DTRACE_VM2(prot_fault
, int, 1, (uint64_t *), NULL
);
13818 return KERN_PROTECTION_FAILURE
;
13822 * If this page is not pageable, we have to get
13823 * it for all possible accesses.
13826 *wired
= (entry
->wired_count
!= 0);
13832 * If the entry was copy-on-write, we either ...
13835 if (entry
->needs_copy
) {
13837 * If we want to write the page, we may as well
13838 * handle that now since we've got the map locked.
13840 * If we don't need to write the page, we just
13841 * demote the permissions allowed.
13844 if ((fault_type
& VM_PROT_WRITE
) || *wired
|| force_copy
) {
13846 * Make a new object, and place it in the
13847 * object chain. Note that no new references
13848 * have appeared -- one just moved from the
13849 * map to the new object.
13852 if (vm_map_lock_read_to_write(map
)) {
13853 vm_map_lock_read(map
);
13857 if (VME_OBJECT(entry
)->shadowed
== FALSE
) {
13858 vm_object_lock(VME_OBJECT(entry
));
13859 VME_OBJECT(entry
)->shadowed
= TRUE
;
13860 vm_object_unlock(VME_OBJECT(entry
));
13862 VME_OBJECT_SHADOW(entry
,
13863 (vm_map_size_t
) (entry
->vme_end
-
13864 entry
->vme_start
));
13865 entry
->needs_copy
= FALSE
;
13867 vm_map_lock_write_to_read(map
);
13869 if ((fault_type
& VM_PROT_WRITE
) == 0 && *wired
== 0) {
13871 * We're attempting to read a copy-on-write
13872 * page -- don't allow writes.
13875 prot
&= (~VM_PROT_WRITE
);
13880 * Create an object if necessary.
13882 if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
13883 if (vm_map_lock_read_to_write(map
)) {
13884 vm_map_lock_read(map
);
13888 VME_OBJECT_SET(entry
,
13889 vm_object_allocate(
13890 (vm_map_size_t
)(entry
->vme_end
-
13891 entry
->vme_start
)));
13892 VME_OFFSET_SET(entry
, 0);
13893 assert(entry
->use_pmap
);
13894 vm_map_lock_write_to_read(map
);
13898 * Return the object/offset from this entry. If the entry
13899 * was copy-on-write or empty, it has been fixed up. Also
13900 * return the protection.
13903 *offset
= (vaddr
- entry
->vme_start
) + VME_OFFSET(entry
);
13904 *object
= VME_OBJECT(entry
);
13906 KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET
, VM_MAP_LOOKUP_OBJECT
), VM_KERNEL_UNSLIDE_OR_PERM(*object
), (unsigned long) VME_ALIAS(entry
), 0, 0);
13909 fault_info
->interruptible
= THREAD_UNINT
; /* for now... */
13910 /* ... the caller will change "interruptible" if needed */
13911 fault_info
->cluster_size
= 0;
13912 fault_info
->user_tag
= VME_ALIAS(entry
);
13913 fault_info
->pmap_options
= 0;
13914 if (entry
->iokit_acct
||
13915 (!entry
->is_sub_map
&& !entry
->use_pmap
)) {
13916 fault_info
->pmap_options
|= PMAP_OPTIONS_ALT_ACCT
;
13918 fault_info
->behavior
= entry
->behavior
;
13919 fault_info
->lo_offset
= VME_OFFSET(entry
);
13920 fault_info
->hi_offset
=
13921 (entry
->vme_end
- entry
->vme_start
) + VME_OFFSET(entry
);
13922 fault_info
->no_cache
= entry
->no_cache
;
13923 fault_info
->stealth
= FALSE
;
13924 fault_info
->io_sync
= FALSE
;
13925 if (entry
->used_for_jit
||
13926 entry
->vme_resilient_codesign
) {
13927 fault_info
->cs_bypass
= TRUE
;
13929 fault_info
->cs_bypass
= FALSE
;
13931 fault_info
->pmap_cs_associated
= FALSE
;
13933 if (entry
->pmap_cs_associated
) {
13935 * The pmap layer will validate this page
13936 * before allowing it to be executed from.
13938 fault_info
->pmap_cs_associated
= TRUE
;
13940 #endif /* CONFIG_PMAP_CS */
13941 fault_info
->mark_zf_absent
= FALSE
;
13942 fault_info
->batch_pmap_op
= FALSE
;
13943 fault_info
->resilient_media
= entry
->vme_resilient_media
;
13944 fault_info
->no_copy_on_read
= entry
->vme_no_copy_on_read
;
13945 if (entry
->translated_allow_execute
) {
13946 fault_info
->pmap_options
|= PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE
;
13951 * Lock the object to prevent it from disappearing
13953 if (object_lock_type
== OBJECT_LOCK_EXCLUSIVE
) {
13954 if (contended
== NULL
) {
13955 vm_object_lock(*object
);
13957 *contended
= vm_object_lock_check_contended(*object
);
13960 vm_object_lock_shared(*object
);
13964 * Save the version number
13967 out_version
->main_timestamp
= map
->timestamp
;
13969 return KERN_SUCCESS
;
13976 * Verifies that the map in question has not changed
13977 * since the given version. The map has to be locked
13978 * ("shared" mode is fine) before calling this function
13979 * and it will be returned locked too.
13984 vm_map_version_t
*version
) /* REF */
13988 vm_map_lock_assert_held(map
);
13989 result
= (map
->timestamp
== version
->main_timestamp
);
13995 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
13996 * Goes away after regular vm_region_recurse function migrates to
13998 * vm_region_recurse: A form of vm_region which follows the
13999 * submaps in a target map
14004 vm_map_region_recurse_64(
14006 vm_map_offset_t
*address
, /* IN/OUT */
14007 vm_map_size_t
*size
, /* OUT */
14008 natural_t
*nesting_depth
, /* IN/OUT */
14009 vm_region_submap_info_64_t submap_info
, /* IN/OUT */
14010 mach_msg_type_number_t
*count
) /* IN/OUT */
14012 mach_msg_type_number_t original_count
;
14013 vm_region_extended_info_data_t extended
;
14014 vm_map_entry_t tmp_entry
;
14015 vm_map_offset_t user_address
;
14016 unsigned int user_max_depth
;
14019 * "curr_entry" is the VM map entry preceding or including the
14020 * address we're looking for.
14021 * "curr_map" is the map or sub-map containing "curr_entry".
14022 * "curr_address" is the equivalent of the top map's "user_address"
14023 * in the current map.
14024 * "curr_offset" is the cumulated offset of "curr_map" in the
14025 * target task's address space.
14026 * "curr_depth" is the depth of "curr_map" in the chain of
14029 * "curr_max_below" and "curr_max_above" limit the range (around
14030 * "curr_address") we should take into account in the current (sub)map.
14031 * They limit the range to what's visible through the map entries
14032 * we've traversed from the top map to the current map.
14035 vm_map_entry_t curr_entry
;
14036 vm_map_address_t curr_address
;
14037 vm_map_offset_t curr_offset
;
14039 unsigned int curr_depth
;
14040 vm_map_offset_t curr_max_below
, curr_max_above
;
14041 vm_map_offset_t curr_skip
;
14044 * "next_" is the same as "curr_" but for the VM region immediately
14045 * after the address we're looking for. We need to keep track of this
14046 * too because we want to return info about that region if the
14047 * address we're looking for is not mapped.
14049 vm_map_entry_t next_entry
;
14050 vm_map_offset_t next_offset
;
14051 vm_map_offset_t next_address
;
14053 unsigned int next_depth
;
14054 vm_map_offset_t next_max_below
, next_max_above
;
14055 vm_map_offset_t next_skip
;
14057 boolean_t look_for_pages
;
14058 vm_region_submap_short_info_64_t short_info
;
14059 boolean_t do_region_footprint
;
14060 int effective_page_size
, effective_page_shift
;
14062 if (map
== VM_MAP_NULL
) {
14063 /* no address space to work on */
14064 return KERN_INVALID_ARGUMENT
;
14067 effective_page_shift
= vm_self_region_page_shift(map
);
14068 effective_page_size
= (1 << effective_page_shift
);
14070 if (*count
< VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
) {
14072 * "info" structure is not big enough and
14075 return KERN_INVALID_ARGUMENT
;
14078 do_region_footprint
= task_self_region_footprint();
14079 original_count
= *count
;
14081 if (original_count
< VM_REGION_SUBMAP_INFO_V0_COUNT_64
) {
14082 *count
= VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
;
14083 look_for_pages
= FALSE
;
14084 short_info
= (vm_region_submap_short_info_64_t
) submap_info
;
14085 submap_info
= NULL
;
14087 look_for_pages
= TRUE
;
14088 *count
= VM_REGION_SUBMAP_INFO_V0_COUNT_64
;
14091 if (original_count
>= VM_REGION_SUBMAP_INFO_V1_COUNT_64
) {
14092 *count
= VM_REGION_SUBMAP_INFO_V1_COUNT_64
;
14094 if (original_count
>= VM_REGION_SUBMAP_INFO_V2_COUNT_64
) {
14095 *count
= VM_REGION_SUBMAP_INFO_V2_COUNT_64
;
14099 user_address
= *address
;
14100 user_max_depth
= *nesting_depth
;
14103 vm_map_lock_read(map
);
14109 curr_address
= user_address
;
14113 curr_max_above
= ((vm_map_offset_t
) -1) - curr_address
;
14114 curr_max_below
= curr_address
;
14122 next_max_above
= (vm_map_offset_t
) -1;
14123 next_max_below
= (vm_map_offset_t
) -1;
14126 if (vm_map_lookup_entry(curr_map
,
14129 /* tmp_entry contains the address we're looking for */
14130 curr_entry
= tmp_entry
;
14132 vm_map_offset_t skip
;
14134 * The address is not mapped. "tmp_entry" is the
14135 * map entry preceding the address. We want the next
14136 * one, if it exists.
14138 curr_entry
= tmp_entry
->vme_next
;
14140 if (curr_entry
== vm_map_to_entry(curr_map
) ||
14141 (curr_entry
->vme_start
>=
14142 curr_address
+ curr_max_above
)) {
14143 /* no next entry at this level: stop looking */
14145 vm_map_unlock_read(curr_map
);
14152 curr_max_above
= 0;
14153 curr_max_below
= 0;
14157 /* adjust current address and offset */
14158 skip
= curr_entry
->vme_start
- curr_address
;
14159 curr_address
= curr_entry
->vme_start
;
14161 curr_offset
+= skip
;
14162 curr_max_above
-= skip
;
14163 curr_max_below
= 0;
14167 * Is the next entry at this level closer to the address (or
14168 * deeper in the submap chain) than the one we had
14171 tmp_entry
= curr_entry
->vme_next
;
14172 if (tmp_entry
== vm_map_to_entry(curr_map
)) {
14173 /* no next entry at this level */
14174 } else if (tmp_entry
->vme_start
>=
14175 curr_address
+ curr_max_above
) {
14177 * tmp_entry is beyond the scope of what we mapped of
14178 * this submap in the upper level: ignore it.
14180 } else if ((next_entry
== NULL
) ||
14181 (tmp_entry
->vme_start
+ curr_offset
<=
14182 next_entry
->vme_start
+ next_offset
)) {
14184 * We didn't have a "next_entry" or this one is
14185 * closer to the address we're looking for:
14186 * use this "tmp_entry" as the new "next_entry".
14188 if (next_entry
!= NULL
) {
14189 /* unlock the last "next_map" */
14190 if (next_map
!= curr_map
&& not_in_kdp
) {
14191 vm_map_unlock_read(next_map
);
14194 next_entry
= tmp_entry
;
14195 next_map
= curr_map
;
14196 next_depth
= curr_depth
;
14197 next_address
= next_entry
->vme_start
;
14198 next_skip
= curr_skip
;
14199 next_skip
+= (next_address
- curr_address
);
14200 next_offset
= curr_offset
;
14201 next_offset
+= (next_address
- curr_address
);
14202 next_max_above
= MIN(next_max_above
, curr_max_above
);
14203 next_max_above
= MIN(next_max_above
,
14204 next_entry
->vme_end
- next_address
);
14205 next_max_below
= MIN(next_max_below
, curr_max_below
);
14206 next_max_below
= MIN(next_max_below
,
14207 next_address
- next_entry
->vme_start
);
14211 * "curr_max_{above,below}" allow us to keep track of the
14212 * portion of the submap that is actually mapped at this level:
14213 * the rest of that submap is irrelevant to us, since it's not
14215 * The relevant portion of the map starts at
14216 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
14218 curr_max_above
= MIN(curr_max_above
,
14219 curr_entry
->vme_end
- curr_address
);
14220 curr_max_below
= MIN(curr_max_below
,
14221 curr_address
- curr_entry
->vme_start
);
14223 if (!curr_entry
->is_sub_map
||
14224 curr_depth
>= user_max_depth
) {
14226 * We hit a leaf map or we reached the maximum depth
14227 * we could, so stop looking. Keep the current map
14234 * Get down to the next submap level.
14238 * Lock the next level and unlock the current level,
14239 * unless we need to keep it locked to access the "next_entry"
14243 vm_map_lock_read(VME_SUBMAP(curr_entry
));
14245 if (curr_map
== next_map
) {
14246 /* keep "next_map" locked in case we need it */
14248 /* release this map */
14250 vm_map_unlock_read(curr_map
);
14255 * Adjust the offset. "curr_entry" maps the submap
14256 * at relative address "curr_entry->vme_start" in the
14257 * curr_map but skips the first "VME_OFFSET(curr_entry)"
14258 * bytes of the submap.
14259 * "curr_offset" always represents the offset of a virtual
14260 * address in the curr_map relative to the absolute address
14261 * space (i.e. the top-level VM map).
14264 (VME_OFFSET(curr_entry
) - curr_entry
->vme_start
);
14265 curr_address
= user_address
+ curr_offset
;
14266 /* switch to the submap */
14267 curr_map
= VME_SUBMAP(curr_entry
);
14272 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
14273 // so probably should be a real 32b ID vs. ptr.
14274 // Current users just check for equality
14276 if (curr_entry
== NULL
) {
14277 /* no VM region contains the address... */
14279 if (do_region_footprint
&& /* we want footprint numbers */
14280 next_entry
== NULL
&& /* & there are no more regions */
14281 /* & we haven't already provided our fake region: */
14282 user_address
<= vm_map_last_entry(map
)->vme_end
) {
14283 ledger_amount_t ledger_resident
, ledger_compressed
;
14286 * Add a fake memory region to account for
14287 * purgeable and/or ledger-tagged memory that
14288 * counts towards this task's memory footprint,
14289 * i.e. the resident/compressed pages of non-volatile
14290 * objects owned by that task.
14292 task_ledgers_footprint(map
->pmap
->ledger
,
14294 &ledger_compressed
);
14295 if (ledger_resident
+ ledger_compressed
== 0) {
14296 /* no purgeable memory usage to report */
14297 return KERN_INVALID_ADDRESS
;
14299 /* fake region to show nonvolatile footprint */
14300 if (look_for_pages
) {
14301 submap_info
->protection
= VM_PROT_DEFAULT
;
14302 submap_info
->max_protection
= VM_PROT_DEFAULT
;
14303 submap_info
->inheritance
= VM_INHERIT_DEFAULT
;
14304 submap_info
->offset
= 0;
14305 submap_info
->user_tag
= -1;
14306 submap_info
->pages_resident
= (unsigned int) (ledger_resident
/ effective_page_size
);
14307 submap_info
->pages_shared_now_private
= 0;
14308 submap_info
->pages_swapped_out
= (unsigned int) (ledger_compressed
/ effective_page_size
);
14309 submap_info
->pages_dirtied
= submap_info
->pages_resident
;
14310 submap_info
->ref_count
= 1;
14311 submap_info
->shadow_depth
= 0;
14312 submap_info
->external_pager
= 0;
14313 submap_info
->share_mode
= SM_PRIVATE
;
14314 submap_info
->is_submap
= 0;
14315 submap_info
->behavior
= VM_BEHAVIOR_DEFAULT
;
14316 submap_info
->object_id
= VM_OBJECT_ID_FAKE(map
, task_ledgers
.purgeable_nonvolatile
);
14317 submap_info
->user_wired_count
= 0;
14318 submap_info
->pages_reusable
= 0;
14320 short_info
->user_tag
= -1;
14321 short_info
->offset
= 0;
14322 short_info
->protection
= VM_PROT_DEFAULT
;
14323 short_info
->inheritance
= VM_INHERIT_DEFAULT
;
14324 short_info
->max_protection
= VM_PROT_DEFAULT
;
14325 short_info
->behavior
= VM_BEHAVIOR_DEFAULT
;
14326 short_info
->user_wired_count
= 0;
14327 short_info
->is_submap
= 0;
14328 short_info
->object_id
= VM_OBJECT_ID_FAKE(map
, task_ledgers
.purgeable_nonvolatile
);
14329 short_info
->external_pager
= 0;
14330 short_info
->shadow_depth
= 0;
14331 short_info
->share_mode
= SM_PRIVATE
;
14332 short_info
->ref_count
= 1;
14334 *nesting_depth
= 0;
14335 *size
= (vm_map_size_t
) (ledger_resident
+ ledger_compressed
);
14336 // *address = user_address;
14337 *address
= vm_map_last_entry(map
)->vme_end
;
14338 return KERN_SUCCESS
;
14341 if (next_entry
== NULL
) {
14342 /* ... and no VM region follows it either */
14343 return KERN_INVALID_ADDRESS
;
14345 /* ... gather info about the next VM region */
14346 curr_entry
= next_entry
;
14347 curr_map
= next_map
; /* still locked ... */
14348 curr_address
= next_address
;
14349 curr_skip
= next_skip
;
14350 curr_offset
= next_offset
;
14351 curr_depth
= next_depth
;
14352 curr_max_above
= next_max_above
;
14353 curr_max_below
= next_max_below
;
14355 /* we won't need "next_entry" after all */
14356 if (next_entry
!= NULL
) {
14357 /* release "next_map" */
14358 if (next_map
!= curr_map
&& not_in_kdp
) {
14359 vm_map_unlock_read(next_map
);
14368 next_max_below
= -1;
14369 next_max_above
= -1;
14371 if (curr_entry
->is_sub_map
&&
14372 curr_depth
< user_max_depth
) {
14374 * We're not as deep as we could be: we must have
14375 * gone back up after not finding anything mapped
14376 * below the original top-level map entry's.
14377 * Let's move "curr_address" forward and recurse again.
14379 user_address
= curr_address
;
14380 goto recurse_again
;
14383 *nesting_depth
= curr_depth
;
14384 *size
= curr_max_above
+ curr_max_below
;
14385 *address
= user_address
+ curr_skip
- curr_max_below
;
14387 if (look_for_pages
) {
14388 submap_info
->user_tag
= VME_ALIAS(curr_entry
);
14389 submap_info
->offset
= VME_OFFSET(curr_entry
);
14390 submap_info
->protection
= curr_entry
->protection
;
14391 submap_info
->inheritance
= curr_entry
->inheritance
;
14392 submap_info
->max_protection
= curr_entry
->max_protection
;
14393 submap_info
->behavior
= curr_entry
->behavior
;
14394 submap_info
->user_wired_count
= curr_entry
->user_wired_count
;
14395 submap_info
->is_submap
= curr_entry
->is_sub_map
;
14396 submap_info
->object_id
= VM_OBJECT_ID(VME_OBJECT(curr_entry
));
14398 short_info
->user_tag
= VME_ALIAS(curr_entry
);
14399 short_info
->offset
= VME_OFFSET(curr_entry
);
14400 short_info
->protection
= curr_entry
->protection
;
14401 short_info
->inheritance
= curr_entry
->inheritance
;
14402 short_info
->max_protection
= curr_entry
->max_protection
;
14403 short_info
->behavior
= curr_entry
->behavior
;
14404 short_info
->user_wired_count
= curr_entry
->user_wired_count
;
14405 short_info
->is_submap
= curr_entry
->is_sub_map
;
14406 short_info
->object_id
= VM_OBJECT_ID(VME_OBJECT(curr_entry
));
14409 extended
.pages_resident
= 0;
14410 extended
.pages_swapped_out
= 0;
14411 extended
.pages_shared_now_private
= 0;
14412 extended
.pages_dirtied
= 0;
14413 extended
.pages_reusable
= 0;
14414 extended
.external_pager
= 0;
14415 extended
.shadow_depth
= 0;
14416 extended
.share_mode
= SM_EMPTY
;
14417 extended
.ref_count
= 0;
14420 if (!curr_entry
->is_sub_map
) {
14421 vm_map_offset_t range_start
, range_end
;
14422 range_start
= MAX((curr_address
- curr_max_below
),
14423 curr_entry
->vme_start
);
14424 range_end
= MIN((curr_address
+ curr_max_above
),
14425 curr_entry
->vme_end
);
14426 vm_map_region_walk(curr_map
,
14429 (VME_OFFSET(curr_entry
) +
14431 curr_entry
->vme_start
)),
14432 range_end
- range_start
,
14434 look_for_pages
, VM_REGION_EXTENDED_INFO_COUNT
);
14435 if (extended
.external_pager
&&
14436 extended
.ref_count
== 2 &&
14437 extended
.share_mode
== SM_SHARED
) {
14438 extended
.share_mode
= SM_PRIVATE
;
14441 if (curr_entry
->use_pmap
) {
14442 extended
.share_mode
= SM_TRUESHARED
;
14444 extended
.share_mode
= SM_PRIVATE
;
14446 extended
.ref_count
= os_ref_get_count(&VME_SUBMAP(curr_entry
)->map_refcnt
);
14450 if (look_for_pages
) {
14451 submap_info
->pages_resident
= extended
.pages_resident
;
14452 submap_info
->pages_swapped_out
= extended
.pages_swapped_out
;
14453 submap_info
->pages_shared_now_private
=
14454 extended
.pages_shared_now_private
;
14455 submap_info
->pages_dirtied
= extended
.pages_dirtied
;
14456 submap_info
->external_pager
= extended
.external_pager
;
14457 submap_info
->shadow_depth
= extended
.shadow_depth
;
14458 submap_info
->share_mode
= extended
.share_mode
;
14459 submap_info
->ref_count
= extended
.ref_count
;
14461 if (original_count
>= VM_REGION_SUBMAP_INFO_V1_COUNT_64
) {
14462 submap_info
->pages_reusable
= extended
.pages_reusable
;
14464 if (original_count
>= VM_REGION_SUBMAP_INFO_V2_COUNT_64
) {
14465 submap_info
->object_id_full
= (vm_object_id_t
) (VME_OBJECT(curr_entry
) != NULL
) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry
)) : 0ULL;
14468 short_info
->external_pager
= extended
.external_pager
;
14469 short_info
->shadow_depth
= extended
.shadow_depth
;
14470 short_info
->share_mode
= extended
.share_mode
;
14471 short_info
->ref_count
= extended
.ref_count
;
14475 vm_map_unlock_read(curr_map
);
14478 return KERN_SUCCESS
;
14484 * User call to obtain information about a region in
14485 * a task's address map. Currently, only one flavor is
14488 * XXX The reserved and behavior fields cannot be filled
14489 * in until the vm merge from the IK is completed, and
14490 * vm_reserve is implemented.
14496 vm_map_offset_t
*address
, /* IN/OUT */
14497 vm_map_size_t
*size
, /* OUT */
14498 vm_region_flavor_t flavor
, /* IN */
14499 vm_region_info_t info
, /* OUT */
14500 mach_msg_type_number_t
*count
, /* IN/OUT */
14501 mach_port_t
*object_name
) /* OUT */
14503 vm_map_entry_t tmp_entry
;
14504 vm_map_entry_t entry
;
14505 vm_map_offset_t start
;
14507 if (map
== VM_MAP_NULL
) {
14508 return KERN_INVALID_ARGUMENT
;
14512 case VM_REGION_BASIC_INFO
:
14513 /* legacy for old 32-bit objects info */
14515 vm_region_basic_info_t basic
;
14517 if (*count
< VM_REGION_BASIC_INFO_COUNT
) {
14518 return KERN_INVALID_ARGUMENT
;
14521 basic
= (vm_region_basic_info_t
) info
;
14522 *count
= VM_REGION_BASIC_INFO_COUNT
;
14524 vm_map_lock_read(map
);
14527 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
14528 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
14529 vm_map_unlock_read(map
);
14530 return KERN_INVALID_ADDRESS
;
14536 start
= entry
->vme_start
;
14538 basic
->offset
= (uint32_t)VME_OFFSET(entry
);
14539 basic
->protection
= entry
->protection
;
14540 basic
->inheritance
= entry
->inheritance
;
14541 basic
->max_protection
= entry
->max_protection
;
14542 basic
->behavior
= entry
->behavior
;
14543 basic
->user_wired_count
= entry
->user_wired_count
;
14544 basic
->reserved
= entry
->is_sub_map
;
14546 *size
= (entry
->vme_end
- start
);
14549 *object_name
= IP_NULL
;
14551 if (entry
->is_sub_map
) {
14552 basic
->shared
= FALSE
;
14554 basic
->shared
= entry
->is_shared
;
14557 vm_map_unlock_read(map
);
14558 return KERN_SUCCESS
;
14561 case VM_REGION_BASIC_INFO_64
:
14563 vm_region_basic_info_64_t basic
;
14565 if (*count
< VM_REGION_BASIC_INFO_COUNT_64
) {
14566 return KERN_INVALID_ARGUMENT
;
14569 basic
= (vm_region_basic_info_64_t
) info
;
14570 *count
= VM_REGION_BASIC_INFO_COUNT_64
;
14572 vm_map_lock_read(map
);
14575 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
14576 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
14577 vm_map_unlock_read(map
);
14578 return KERN_INVALID_ADDRESS
;
14584 start
= entry
->vme_start
;
14586 basic
->offset
= VME_OFFSET(entry
);
14587 basic
->protection
= entry
->protection
;
14588 basic
->inheritance
= entry
->inheritance
;
14589 basic
->max_protection
= entry
->max_protection
;
14590 basic
->behavior
= entry
->behavior
;
14591 basic
->user_wired_count
= entry
->user_wired_count
;
14592 basic
->reserved
= entry
->is_sub_map
;
14594 *size
= (entry
->vme_end
- start
);
14597 *object_name
= IP_NULL
;
14599 if (entry
->is_sub_map
) {
14600 basic
->shared
= FALSE
;
14602 basic
->shared
= entry
->is_shared
;
14605 vm_map_unlock_read(map
);
14606 return KERN_SUCCESS
;
14608 case VM_REGION_EXTENDED_INFO
:
14609 if (*count
< VM_REGION_EXTENDED_INFO_COUNT
) {
14610 return KERN_INVALID_ARGUMENT
;
14613 case VM_REGION_EXTENDED_INFO__legacy
:
14614 if (*count
< VM_REGION_EXTENDED_INFO_COUNT__legacy
) {
14615 return KERN_INVALID_ARGUMENT
;
14619 vm_region_extended_info_t extended
;
14620 mach_msg_type_number_t original_count
;
14621 int effective_page_size
, effective_page_shift
;
14623 extended
= (vm_region_extended_info_t
) info
;
14625 effective_page_shift
= vm_self_region_page_shift(map
);
14626 effective_page_size
= (1 << effective_page_shift
);
14628 vm_map_lock_read(map
);
14631 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
14632 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
14633 vm_map_unlock_read(map
);
14634 return KERN_INVALID_ADDRESS
;
14639 start
= entry
->vme_start
;
14641 extended
->protection
= entry
->protection
;
14642 extended
->user_tag
= VME_ALIAS(entry
);
14643 extended
->pages_resident
= 0;
14644 extended
->pages_swapped_out
= 0;
14645 extended
->pages_shared_now_private
= 0;
14646 extended
->pages_dirtied
= 0;
14647 extended
->external_pager
= 0;
14648 extended
->shadow_depth
= 0;
14650 original_count
= *count
;
14651 if (flavor
== VM_REGION_EXTENDED_INFO__legacy
) {
14652 *count
= VM_REGION_EXTENDED_INFO_COUNT__legacy
;
14654 extended
->pages_reusable
= 0;
14655 *count
= VM_REGION_EXTENDED_INFO_COUNT
;
14658 vm_map_region_walk(map
, start
, entry
, VME_OFFSET(entry
), entry
->vme_end
- start
, extended
, TRUE
, *count
);
14660 if (extended
->external_pager
&& extended
->ref_count
== 2 && extended
->share_mode
== SM_SHARED
) {
14661 extended
->share_mode
= SM_PRIVATE
;
14665 *object_name
= IP_NULL
;
14668 *size
= (entry
->vme_end
- start
);
14670 vm_map_unlock_read(map
);
14671 return KERN_SUCCESS
;
14673 case VM_REGION_TOP_INFO
:
14675 vm_region_top_info_t top
;
14677 if (*count
< VM_REGION_TOP_INFO_COUNT
) {
14678 return KERN_INVALID_ARGUMENT
;
14681 top
= (vm_region_top_info_t
) info
;
14682 *count
= VM_REGION_TOP_INFO_COUNT
;
14684 vm_map_lock_read(map
);
14687 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
14688 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
14689 vm_map_unlock_read(map
);
14690 return KERN_INVALID_ADDRESS
;
14695 start
= entry
->vme_start
;
14697 top
->private_pages_resident
= 0;
14698 top
->shared_pages_resident
= 0;
14700 vm_map_region_top_walk(entry
, top
);
14703 *object_name
= IP_NULL
;
14706 *size
= (entry
->vme_end
- start
);
14708 vm_map_unlock_read(map
);
14709 return KERN_SUCCESS
;
14712 return KERN_INVALID_ARGUMENT
;
14716 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
14717 MIN((entry_size), \
14718 ((obj)->all_reusable ? \
14719 (obj)->wired_page_count : \
14720 (obj)->resident_page_count - (obj)->reusable_page_count))
14723 vm_map_region_top_walk(
14724 vm_map_entry_t entry
,
14725 vm_region_top_info_t top
)
14727 if (VME_OBJECT(entry
) == 0 || entry
->is_sub_map
) {
14728 top
->share_mode
= SM_EMPTY
;
14729 top
->ref_count
= 0;
14735 struct vm_object
*obj
, *tmp_obj
;
14737 uint32_t entry_size
;
14739 entry_size
= (uint32_t) ((entry
->vme_end
- entry
->vme_start
) / PAGE_SIZE_64
);
14741 obj
= VME_OBJECT(entry
);
14743 vm_object_lock(obj
);
14745 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14749 assert(obj
->reusable_page_count
<= obj
->resident_page_count
);
14751 if (ref_count
== 1) {
14752 top
->private_pages_resident
=
14753 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14755 top
->shared_pages_resident
=
14756 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14758 top
->ref_count
= ref_count
;
14759 top
->share_mode
= SM_COW
;
14761 while ((tmp_obj
= obj
->shadow
)) {
14762 vm_object_lock(tmp_obj
);
14763 vm_object_unlock(obj
);
14766 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14770 assert(obj
->reusable_page_count
<= obj
->resident_page_count
);
14771 top
->shared_pages_resident
+=
14772 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14773 top
->ref_count
+= ref_count
- 1;
14776 if (entry
->superpage_size
) {
14777 top
->share_mode
= SM_LARGE_PAGE
;
14778 top
->shared_pages_resident
= 0;
14779 top
->private_pages_resident
= entry_size
;
14780 } else if (entry
->needs_copy
) {
14781 top
->share_mode
= SM_COW
;
14782 top
->shared_pages_resident
=
14783 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14785 if (ref_count
== 1 ||
14786 (ref_count
== 2 && obj
->named
)) {
14787 top
->share_mode
= SM_PRIVATE
;
14788 top
->private_pages_resident
=
14789 OBJ_RESIDENT_COUNT(obj
,
14792 top
->share_mode
= SM_SHARED
;
14793 top
->shared_pages_resident
=
14794 OBJ_RESIDENT_COUNT(obj
,
14798 top
->ref_count
= ref_count
;
14800 /* XXX K64: obj_id will be truncated */
14801 top
->obj_id
= (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj
);
14803 vm_object_unlock(obj
);
14808 vm_map_region_walk(
14810 vm_map_offset_t va
,
14811 vm_map_entry_t entry
,
14812 vm_object_offset_t offset
,
14813 vm_object_size_t range
,
14814 vm_region_extended_info_t extended
,
14815 boolean_t look_for_pages
,
14816 mach_msg_type_number_t count
)
14818 struct vm_object
*obj
, *tmp_obj
;
14819 vm_map_offset_t last_offset
;
14822 struct vm_object
*shadow_object
;
14823 unsigned short shadow_depth
;
14824 boolean_t do_region_footprint
;
14825 int effective_page_size
, effective_page_shift
;
14826 vm_map_offset_t effective_page_mask
;
14828 do_region_footprint
= task_self_region_footprint();
14830 if ((VME_OBJECT(entry
) == 0) ||
14831 (entry
->is_sub_map
) ||
14832 (VME_OBJECT(entry
)->phys_contiguous
&&
14833 !entry
->superpage_size
)) {
14834 extended
->share_mode
= SM_EMPTY
;
14835 extended
->ref_count
= 0;
14839 if (entry
->superpage_size
) {
14840 extended
->shadow_depth
= 0;
14841 extended
->share_mode
= SM_LARGE_PAGE
;
14842 extended
->ref_count
= 1;
14843 extended
->external_pager
= 0;
14845 /* TODO4K: Superpage in 4k mode? */
14846 extended
->pages_resident
= (unsigned int)(range
>> PAGE_SHIFT
);
14847 extended
->shadow_depth
= 0;
14851 effective_page_shift
= vm_self_region_page_shift(map
);
14852 effective_page_size
= (1 << effective_page_shift
);
14853 effective_page_mask
= effective_page_size
- 1;
14855 offset
= vm_map_trunc_page(offset
, effective_page_mask
);
14857 obj
= VME_OBJECT(entry
);
14859 vm_object_lock(obj
);
14861 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14865 if (look_for_pages
) {
14866 for (last_offset
= offset
+ range
;
14867 offset
< last_offset
;
14868 offset
+= effective_page_size
, va
+= effective_page_size
) {
14869 if (do_region_footprint
) {
14873 if (map
->has_corpse_footprint
) {
14875 * Query the page info data we saved
14876 * while forking the corpse.
14878 vm_map_corpse_footprint_query_page_info(
14886 vm_map_footprint_query_page_info(
14892 if (disp
& VM_PAGE_QUERY_PAGE_PRESENT
) {
14893 extended
->pages_resident
++;
14895 if (disp
& VM_PAGE_QUERY_PAGE_REUSABLE
) {
14896 extended
->pages_reusable
++;
14898 if (disp
& VM_PAGE_QUERY_PAGE_DIRTY
) {
14899 extended
->pages_dirtied
++;
14901 if (disp
& PMAP_QUERY_PAGE_COMPRESSED
) {
14902 extended
->pages_swapped_out
++;
14907 vm_map_region_look_for_page(map
, va
, obj
,
14908 vm_object_trunc_page(offset
), ref_count
,
14909 0, extended
, count
);
14912 if (do_region_footprint
) {
14913 goto collect_object_info
;
14916 collect_object_info
:
14917 shadow_object
= obj
->shadow
;
14920 if (!(obj
->internal
)) {
14921 extended
->external_pager
= 1;
14924 if (shadow_object
!= VM_OBJECT_NULL
) {
14925 vm_object_lock(shadow_object
);
14927 shadow_object
!= VM_OBJECT_NULL
;
14929 vm_object_t next_shadow
;
14931 if (!(shadow_object
->internal
)) {
14932 extended
->external_pager
= 1;
14935 next_shadow
= shadow_object
->shadow
;
14937 vm_object_lock(next_shadow
);
14939 vm_object_unlock(shadow_object
);
14940 shadow_object
= next_shadow
;
14943 extended
->shadow_depth
= shadow_depth
;
14946 if (extended
->shadow_depth
|| entry
->needs_copy
) {
14947 extended
->share_mode
= SM_COW
;
14949 if (ref_count
== 1) {
14950 extended
->share_mode
= SM_PRIVATE
;
14952 if (obj
->true_share
) {
14953 extended
->share_mode
= SM_TRUESHARED
;
14955 extended
->share_mode
= SM_SHARED
;
14959 extended
->ref_count
= ref_count
- extended
->shadow_depth
;
14961 for (i
= 0; i
< extended
->shadow_depth
; i
++) {
14962 if ((tmp_obj
= obj
->shadow
) == 0) {
14965 vm_object_lock(tmp_obj
);
14966 vm_object_unlock(obj
);
14968 if ((ref_count
= tmp_obj
->ref_count
) > 1 && tmp_obj
->paging_in_progress
) {
14972 extended
->ref_count
+= ref_count
;
14975 vm_object_unlock(obj
);
14977 if (extended
->share_mode
== SM_SHARED
) {
14978 vm_map_entry_t cur
;
14979 vm_map_entry_t last
;
14982 obj
= VME_OBJECT(entry
);
14983 last
= vm_map_to_entry(map
);
14986 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14989 for (cur
= vm_map_first_entry(map
); cur
!= last
; cur
= cur
->vme_next
) {
14990 my_refs
+= vm_map_region_count_obj_refs(cur
, obj
);
14993 if (my_refs
== ref_count
) {
14994 extended
->share_mode
= SM_PRIVATE_ALIASED
;
14995 } else if (my_refs
> 1) {
14996 extended
->share_mode
= SM_SHARED_ALIASED
;
15002 /* object is locked on entry and locked on return */
15006 vm_map_region_look_for_page(
15007 __unused vm_map_t map
,
15008 __unused vm_map_offset_t va
,
15009 vm_object_t object
,
15010 vm_object_offset_t offset
,
15012 unsigned short depth
,
15013 vm_region_extended_info_t extended
,
15014 mach_msg_type_number_t count
)
15017 vm_object_t shadow
;
15019 vm_object_t caller_object
;
15021 shadow
= object
->shadow
;
15022 caller_object
= object
;
15026 if (!(object
->internal
)) {
15027 extended
->external_pager
= 1;
15030 if ((p
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
15031 if (shadow
&& (max_refcnt
== 1)) {
15032 extended
->pages_shared_now_private
++;
15035 if (!p
->vmp_fictitious
&&
15036 (p
->vmp_dirty
|| pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p
)))) {
15037 extended
->pages_dirtied
++;
15038 } else if (count
>= VM_REGION_EXTENDED_INFO_COUNT
) {
15039 if (p
->vmp_reusable
|| object
->all_reusable
) {
15040 extended
->pages_reusable
++;
15044 extended
->pages_resident
++;
15046 if (object
!= caller_object
) {
15047 vm_object_unlock(object
);
15052 if (object
->internal
&&
15054 !object
->terminating
&&
15055 object
->pager_ready
) {
15056 if (VM_COMPRESSOR_PAGER_STATE_GET(object
, offset
)
15057 == VM_EXTERNAL_STATE_EXISTS
) {
15058 /* the pager has that page */
15059 extended
->pages_swapped_out
++;
15060 if (object
!= caller_object
) {
15061 vm_object_unlock(object
);
15068 vm_object_lock(shadow
);
15070 if ((ref_count
= shadow
->ref_count
) > 1 && shadow
->paging_in_progress
) {
15074 if (++depth
> extended
->shadow_depth
) {
15075 extended
->shadow_depth
= depth
;
15078 if (ref_count
> max_refcnt
) {
15079 max_refcnt
= ref_count
;
15082 if (object
!= caller_object
) {
15083 vm_object_unlock(object
);
15086 offset
= offset
+ object
->vo_shadow_offset
;
15088 shadow
= object
->shadow
;
15091 if (object
!= caller_object
) {
15092 vm_object_unlock(object
);
15099 vm_map_region_count_obj_refs(
15100 vm_map_entry_t entry
,
15101 vm_object_t object
)
15104 vm_object_t chk_obj
;
15105 vm_object_t tmp_obj
;
15107 if (VME_OBJECT(entry
) == 0) {
15111 if (entry
->is_sub_map
) {
15116 chk_obj
= VME_OBJECT(entry
);
15117 vm_object_lock(chk_obj
);
15120 if (chk_obj
== object
) {
15123 tmp_obj
= chk_obj
->shadow
;
15125 vm_object_lock(tmp_obj
);
15127 vm_object_unlock(chk_obj
);
15137 * Routine: vm_map_simplify
15140 * Attempt to simplify the map representation in
15141 * the vicinity of the given starting address.
15143 * This routine is intended primarily to keep the
15144 * kernel maps more compact -- they generally don't
15145 * benefit from the "expand a map entry" technology
15146 * at allocation time because the adjacent entry
15147 * is often wired down.
15150 vm_map_simplify_entry(
15152 vm_map_entry_t this_entry
)
15154 vm_map_entry_t prev_entry
;
15156 counter(c_vm_map_simplify_entry_called
++);
15158 prev_entry
= this_entry
->vme_prev
;
15160 if ((this_entry
!= vm_map_to_entry(map
)) &&
15161 (prev_entry
!= vm_map_to_entry(map
)) &&
15163 (prev_entry
->vme_end
== this_entry
->vme_start
) &&
15165 (prev_entry
->is_sub_map
== this_entry
->is_sub_map
) &&
15166 (VME_OBJECT(prev_entry
) == VME_OBJECT(this_entry
)) &&
15167 ((VME_OFFSET(prev_entry
) + (prev_entry
->vme_end
-
15168 prev_entry
->vme_start
))
15169 == VME_OFFSET(this_entry
)) &&
15171 (prev_entry
->behavior
== this_entry
->behavior
) &&
15172 (prev_entry
->needs_copy
== this_entry
->needs_copy
) &&
15173 (prev_entry
->protection
== this_entry
->protection
) &&
15174 (prev_entry
->max_protection
== this_entry
->max_protection
) &&
15175 (prev_entry
->inheritance
== this_entry
->inheritance
) &&
15176 (prev_entry
->use_pmap
== this_entry
->use_pmap
) &&
15177 (VME_ALIAS(prev_entry
) == VME_ALIAS(this_entry
)) &&
15178 (prev_entry
->no_cache
== this_entry
->no_cache
) &&
15179 (prev_entry
->permanent
== this_entry
->permanent
) &&
15180 (prev_entry
->map_aligned
== this_entry
->map_aligned
) &&
15181 (prev_entry
->zero_wired_pages
== this_entry
->zero_wired_pages
) &&
15182 (prev_entry
->used_for_jit
== this_entry
->used_for_jit
) &&
15183 (prev_entry
->pmap_cs_associated
== this_entry
->pmap_cs_associated
) &&
15184 /* from_reserved_zone: OK if that field doesn't match */
15185 (prev_entry
->iokit_acct
== this_entry
->iokit_acct
) &&
15186 (prev_entry
->vme_resilient_codesign
==
15187 this_entry
->vme_resilient_codesign
) &&
15188 (prev_entry
->vme_resilient_media
==
15189 this_entry
->vme_resilient_media
) &&
15190 (prev_entry
->vme_no_copy_on_read
== this_entry
->vme_no_copy_on_read
) &&
15192 (prev_entry
->wired_count
== this_entry
->wired_count
) &&
15193 (prev_entry
->user_wired_count
== this_entry
->user_wired_count
) &&
15195 ((prev_entry
->vme_atomic
== FALSE
) && (this_entry
->vme_atomic
== FALSE
)) &&
15196 (prev_entry
->in_transition
== FALSE
) &&
15197 (this_entry
->in_transition
== FALSE
) &&
15198 (prev_entry
->needs_wakeup
== FALSE
) &&
15199 (this_entry
->needs_wakeup
== FALSE
) &&
15200 (prev_entry
->is_shared
== this_entry
->is_shared
) &&
15201 (prev_entry
->superpage_size
== FALSE
) &&
15202 (this_entry
->superpage_size
== FALSE
)
15204 vm_map_store_entry_unlink(map
, prev_entry
);
15205 assert(prev_entry
->vme_start
< this_entry
->vme_end
);
15206 if (prev_entry
->map_aligned
) {
15207 assert(VM_MAP_PAGE_ALIGNED(prev_entry
->vme_start
,
15208 VM_MAP_PAGE_MASK(map
)));
15210 this_entry
->vme_start
= prev_entry
->vme_start
;
15211 VME_OFFSET_SET(this_entry
, VME_OFFSET(prev_entry
));
15213 if (map
->holelistenabled
) {
15214 vm_map_store_update_first_free(map
, this_entry
, TRUE
);
15217 if (prev_entry
->is_sub_map
) {
15218 vm_map_deallocate(VME_SUBMAP(prev_entry
));
15220 vm_object_deallocate(VME_OBJECT(prev_entry
));
15222 vm_map_entry_dispose(map
, prev_entry
);
15223 SAVE_HINT_MAP_WRITE(map
, this_entry
);
15224 counter(c_vm_map_simplified
++);
15231 vm_map_offset_t start
)
15233 vm_map_entry_t this_entry
;
15236 if (vm_map_lookup_entry(map
, start
, &this_entry
)) {
15237 vm_map_simplify_entry(map
, this_entry
);
15238 vm_map_simplify_entry(map
, this_entry
->vme_next
);
15240 counter(c_vm_map_simplify_called
++);
15241 vm_map_unlock(map
);
15245 vm_map_simplify_range(
15247 vm_map_offset_t start
,
15248 vm_map_offset_t end
)
15250 vm_map_entry_t entry
;
15253 * The map should be locked (for "write") by the caller.
15256 if (start
>= end
) {
15257 /* invalid address range */
15261 start
= vm_map_trunc_page(start
,
15262 VM_MAP_PAGE_MASK(map
));
15263 end
= vm_map_round_page(end
,
15264 VM_MAP_PAGE_MASK(map
));
15266 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
15267 /* "start" is not mapped and "entry" ends before "start" */
15268 if (entry
== vm_map_to_entry(map
)) {
15269 /* start with first entry in the map */
15270 entry
= vm_map_first_entry(map
);
15272 /* start with next entry */
15273 entry
= entry
->vme_next
;
15277 while (entry
!= vm_map_to_entry(map
) &&
15278 entry
->vme_start
<= end
) {
15279 /* try and coalesce "entry" with its previous entry */
15280 vm_map_simplify_entry(map
, entry
);
15281 entry
= entry
->vme_next
;
15287 * Routine: vm_map_machine_attribute
15289 * Provide machine-specific attributes to mappings,
15290 * such as cachability etc. for machines that provide
15291 * them. NUMA architectures and machines with big/strange
15292 * caches will use this.
15294 * Responsibilities for locking and checking are handled here,
15295 * everything else in the pmap module. If any non-volatile
15296 * information must be kept, the pmap module should handle
15297 * it itself. [This assumes that attributes do not
15298 * need to be inherited, which seems ok to me]
15301 vm_map_machine_attribute(
15303 vm_map_offset_t start
,
15304 vm_map_offset_t end
,
15305 vm_machine_attribute_t attribute
,
15306 vm_machine_attribute_val_t
* value
) /* IN/OUT */
15309 vm_map_size_t sync_size
;
15310 vm_map_entry_t entry
;
15312 if (start
< vm_map_min(map
) || end
> vm_map_max(map
)) {
15313 return KERN_INVALID_ADDRESS
;
15316 /* Figure how much memory we need to flush (in page increments) */
15317 sync_size
= end
- start
;
15321 if (attribute
!= MATTR_CACHE
) {
15322 /* If we don't have to find physical addresses, we */
15323 /* don't have to do an explicit traversal here. */
15324 ret
= pmap_attribute(map
->pmap
, start
, end
- start
,
15326 vm_map_unlock(map
);
15330 ret
= KERN_SUCCESS
; /* Assume it all worked */
15332 while (sync_size
) {
15333 if (vm_map_lookup_entry(map
, start
, &entry
)) {
15334 vm_map_size_t sub_size
;
15335 if ((entry
->vme_end
- start
) > sync_size
) {
15336 sub_size
= sync_size
;
15339 sub_size
= entry
->vme_end
- start
;
15340 sync_size
-= sub_size
;
15342 if (entry
->is_sub_map
) {
15343 vm_map_offset_t sub_start
;
15344 vm_map_offset_t sub_end
;
15346 sub_start
= (start
- entry
->vme_start
)
15347 + VME_OFFSET(entry
);
15348 sub_end
= sub_start
+ sub_size
;
15349 vm_map_machine_attribute(
15355 if (VME_OBJECT(entry
)) {
15357 vm_object_t object
;
15358 vm_object_t base_object
;
15359 vm_object_t last_object
;
15360 vm_object_offset_t offset
;
15361 vm_object_offset_t base_offset
;
15362 vm_map_size_t range
;
15364 offset
= (start
- entry
->vme_start
)
15365 + VME_OFFSET(entry
);
15366 offset
= vm_object_trunc_page(offset
);
15367 base_offset
= offset
;
15368 object
= VME_OBJECT(entry
);
15369 base_object
= object
;
15370 last_object
= NULL
;
15372 vm_object_lock(object
);
15375 m
= vm_page_lookup(
15378 if (m
&& !m
->vmp_fictitious
) {
15380 pmap_attribute_cache_sync(
15381 VM_PAGE_GET_PHYS_PAGE(m
),
15384 } else if (object
->shadow
) {
15385 offset
= offset
+ object
->vo_shadow_offset
;
15386 last_object
= object
;
15387 object
= object
->shadow
;
15388 vm_object_lock(last_object
->shadow
);
15389 vm_object_unlock(last_object
);
15392 if (range
< PAGE_SIZE
) {
15395 range
-= PAGE_SIZE
;
15398 if (base_object
!= object
) {
15399 vm_object_unlock(object
);
15400 vm_object_lock(base_object
);
15401 object
= base_object
;
15403 /* Bump to the next page */
15404 base_offset
+= PAGE_SIZE
;
15405 offset
= base_offset
;
15407 vm_object_unlock(object
);
15412 vm_map_unlock(map
);
15413 return KERN_FAILURE
;
15417 vm_map_unlock(map
);
15423 * vm_map_behavior_set:
15425 * Sets the paging reference behavior of the specified address
15426 * range in the target map. Paging reference behavior affects
15427 * how pagein operations resulting from faults on the map will be
15431 vm_map_behavior_set(
15433 vm_map_offset_t start
,
15434 vm_map_offset_t end
,
15435 vm_behavior_t new_behavior
)
15437 vm_map_entry_t entry
;
15438 vm_map_entry_t temp_entry
;
15441 start
< vm_map_min(map
) ||
15442 end
> vm_map_max(map
)) {
15443 return KERN_NO_SPACE
;
15446 switch (new_behavior
) {
15448 * This first block of behaviors all set a persistent state on the specified
15449 * memory range. All we have to do here is to record the desired behavior
15450 * in the vm_map_entry_t's.
15453 case VM_BEHAVIOR_DEFAULT
:
15454 case VM_BEHAVIOR_RANDOM
:
15455 case VM_BEHAVIOR_SEQUENTIAL
:
15456 case VM_BEHAVIOR_RSEQNTL
:
15457 case VM_BEHAVIOR_ZERO_WIRED_PAGES
:
15461 * The entire address range must be valid for the map.
15462 * Note that vm_map_range_check() does a
15463 * vm_map_lookup_entry() internally and returns the
15464 * entry containing the start of the address range if
15465 * the entire range is valid.
15467 if (vm_map_range_check(map
, start
, end
, &temp_entry
)) {
15468 entry
= temp_entry
;
15469 vm_map_clip_start(map
, entry
, start
);
15471 vm_map_unlock(map
);
15472 return KERN_INVALID_ADDRESS
;
15475 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
15476 vm_map_clip_end(map
, entry
, end
);
15477 if (entry
->is_sub_map
) {
15478 assert(!entry
->use_pmap
);
15481 if (new_behavior
== VM_BEHAVIOR_ZERO_WIRED_PAGES
) {
15482 entry
->zero_wired_pages
= TRUE
;
15484 entry
->behavior
= new_behavior
;
15486 entry
= entry
->vme_next
;
15489 vm_map_unlock(map
);
15493 * The rest of these are different from the above in that they cause
15494 * an immediate action to take place as opposed to setting a behavior that
15495 * affects future actions.
15498 case VM_BEHAVIOR_WILLNEED
:
15499 return vm_map_willneed(map
, start
, end
);
15501 case VM_BEHAVIOR_DONTNEED
:
15502 return vm_map_msync(map
, start
, end
- start
, VM_SYNC_DEACTIVATE
| VM_SYNC_CONTIGUOUS
);
15504 case VM_BEHAVIOR_FREE
:
15505 return vm_map_msync(map
, start
, end
- start
, VM_SYNC_KILLPAGES
| VM_SYNC_CONTIGUOUS
);
15507 case VM_BEHAVIOR_REUSABLE
:
15508 return vm_map_reusable_pages(map
, start
, end
);
15510 case VM_BEHAVIOR_REUSE
:
15511 return vm_map_reuse_pages(map
, start
, end
);
15513 case VM_BEHAVIOR_CAN_REUSE
:
15514 return vm_map_can_reuse(map
, start
, end
);
15517 case VM_BEHAVIOR_PAGEOUT
:
15518 return vm_map_pageout(map
, start
, end
);
15519 #endif /* MACH_ASSERT */
15522 return KERN_INVALID_ARGUMENT
;
15525 return KERN_SUCCESS
;
15530 * Internals for madvise(MADV_WILLNEED) system call.
15532 * The implementation is to do:-
15533 * a) read-ahead if the mapping corresponds to a mapped regular file
15534 * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
15538 static kern_return_t
15541 vm_map_offset_t start
,
15542 vm_map_offset_t end
15545 vm_map_entry_t entry
;
15546 vm_object_t object
;
15547 memory_object_t pager
;
15548 struct vm_object_fault_info fault_info
= {};
15550 vm_object_size_t len
;
15551 vm_object_offset_t offset
;
15553 fault_info
.interruptible
= THREAD_UNINT
; /* ignored value */
15554 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
15555 fault_info
.stealth
= TRUE
;
15558 * The MADV_WILLNEED operation doesn't require any changes to the
15559 * vm_map_entry_t's, so the read lock is sufficient.
15562 vm_map_lock_read(map
);
15565 * The madvise semantics require that the address range be fully
15566 * allocated with no holes. Otherwise, we're required to return
15570 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15571 vm_map_unlock_read(map
);
15572 return KERN_INVALID_ADDRESS
;
15576 * Examine each vm_map_entry_t in the range.
15578 for (; entry
!= vm_map_to_entry(map
) && start
< end
;) {
15580 * The first time through, the start address could be anywhere
15581 * within the vm_map_entry we found. So adjust the offset to
15582 * correspond. After that, the offset will always be zero to
15583 * correspond to the beginning of the current vm_map_entry.
15585 offset
= (start
- entry
->vme_start
) + VME_OFFSET(entry
);
15588 * Set the length so we don't go beyond the end of the
15589 * map_entry or beyond the end of the range we were given.
15590 * This range could span also multiple map entries all of which
15591 * map different files, so make sure we only do the right amount
15592 * of I/O for each object. Note that it's possible for there
15593 * to be multiple map entries all referring to the same object
15594 * but with different page permissions, but it's not worth
15595 * trying to optimize that case.
15597 len
= MIN(entry
->vme_end
- start
, end
- start
);
15599 if ((vm_size_t
) len
!= len
) {
15600 /* 32-bit overflow */
15601 len
= (vm_size_t
) (0 - PAGE_SIZE
);
15603 fault_info
.cluster_size
= (vm_size_t
) len
;
15604 fault_info
.lo_offset
= offset
;
15605 fault_info
.hi_offset
= offset
+ len
;
15606 fault_info
.user_tag
= VME_ALIAS(entry
);
15607 fault_info
.pmap_options
= 0;
15608 if (entry
->iokit_acct
||
15609 (!entry
->is_sub_map
&& !entry
->use_pmap
)) {
15610 fault_info
.pmap_options
|= PMAP_OPTIONS_ALT_ACCT
;
15614 * If the entry is a submap OR there's no read permission
15615 * to this mapping, then just skip it.
15617 if ((entry
->is_sub_map
) || (entry
->protection
& VM_PROT_READ
) == 0) {
15618 entry
= entry
->vme_next
;
15619 start
= entry
->vme_start
;
15623 object
= VME_OBJECT(entry
);
15625 if (object
== NULL
||
15626 (object
&& object
->internal
)) {
15628 * Memory range backed by anonymous memory.
15630 vm_size_t region_size
= 0, effective_page_size
= 0;
15631 vm_map_offset_t addr
= 0, effective_page_mask
= 0;
15636 effective_page_mask
= MIN(vm_map_page_mask(current_map()), PAGE_MASK
);
15637 effective_page_size
= effective_page_mask
+ 1;
15639 vm_map_unlock_read(map
);
15641 while (region_size
) {
15643 vm_map_trunc_page(addr
, effective_page_mask
),
15644 VM_PROT_READ
| VM_PROT_WRITE
);
15646 region_size
-= effective_page_size
;
15647 addr
+= effective_page_size
;
15651 * Find the file object backing this map entry. If there is
15652 * none, then we simply ignore the "will need" advice for this
15653 * entry and go on to the next one.
15655 if ((object
= find_vnode_object(entry
)) == VM_OBJECT_NULL
) {
15656 entry
= entry
->vme_next
;
15657 start
= entry
->vme_start
;
15661 vm_object_paging_begin(object
);
15662 pager
= object
->pager
;
15663 vm_object_unlock(object
);
15666 * The data_request() could take a long time, so let's
15667 * release the map lock to avoid blocking other threads.
15669 vm_map_unlock_read(map
);
15672 * Get the data from the object asynchronously.
15674 * Note that memory_object_data_request() places limits on the
15675 * amount of I/O it will do. Regardless of the len we
15676 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15677 * silently truncates the len to that size. This isn't
15678 * necessarily bad since madvise shouldn't really be used to
15679 * page in unlimited amounts of data. Other Unix variants
15680 * limit the willneed case as well. If this turns out to be an
15681 * issue for developers, then we can always adjust the policy
15682 * here and still be backwards compatible since this is all
15685 kr
= memory_object_data_request(
15687 vm_object_trunc_page(offset
) + object
->paging_offset
,
15690 (memory_object_fault_info_t
)&fault_info
);
15692 vm_object_lock(object
);
15693 vm_object_paging_end(object
);
15694 vm_object_unlock(object
);
15697 * If we couldn't do the I/O for some reason, just give up on
15698 * the madvise. We still return success to the user since
15699 * madvise isn't supposed to fail when the advice can't be
15703 if (kr
!= KERN_SUCCESS
) {
15704 return KERN_SUCCESS
;
15709 if (start
>= end
) {
15711 return KERN_SUCCESS
;
15714 /* look up next entry */
15715 vm_map_lock_read(map
);
15716 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
15718 * There's a new hole in the address range.
15720 vm_map_unlock_read(map
);
15721 return KERN_INVALID_ADDRESS
;
15725 vm_map_unlock_read(map
);
15726 return KERN_SUCCESS
;
15730 vm_map_entry_is_reusable(
15731 vm_map_entry_t entry
)
15733 /* Only user map entries */
15735 vm_object_t object
;
15737 if (entry
->is_sub_map
) {
15741 switch (VME_ALIAS(entry
)) {
15742 case VM_MEMORY_MALLOC
:
15743 case VM_MEMORY_MALLOC_SMALL
:
15744 case VM_MEMORY_MALLOC_LARGE
:
15745 case VM_MEMORY_REALLOC
:
15746 case VM_MEMORY_MALLOC_TINY
:
15747 case VM_MEMORY_MALLOC_LARGE_REUSABLE
:
15748 case VM_MEMORY_MALLOC_LARGE_REUSED
:
15750 * This is a malloc() memory region: check if it's still
15751 * in its original state and can be re-used for more
15752 * malloc() allocations.
15757 * Not a malloc() memory region: let the caller decide if
15763 if (/*entry->is_shared ||*/
15764 entry
->is_sub_map
||
15765 entry
->in_transition
||
15766 entry
->protection
!= VM_PROT_DEFAULT
||
15767 entry
->max_protection
!= VM_PROT_ALL
||
15768 entry
->inheritance
!= VM_INHERIT_DEFAULT
||
15770 entry
->permanent
||
15771 entry
->superpage_size
!= FALSE
||
15772 entry
->zero_wired_pages
||
15773 entry
->wired_count
!= 0 ||
15774 entry
->user_wired_count
!= 0) {
15778 object
= VME_OBJECT(entry
);
15779 if (object
== VM_OBJECT_NULL
) {
15785 * Let's proceed even if the VM object is potentially
15787 * We check for this later when processing the actual
15788 * VM pages, so the contents will be safe if shared.
15790 * But we can still mark this memory region as "reusable" to
15791 * acknowledge that the caller did let us know that the memory
15792 * could be re-used and should not be penalized for holding
15793 * on to it. This allows its "resident size" to not include
15794 * the reusable range.
15796 object
->ref_count
== 1 &&
15798 object
->wired_page_count
== 0 &&
15799 object
->copy
== VM_OBJECT_NULL
&&
15800 object
->shadow
== VM_OBJECT_NULL
&&
15801 object
->internal
&&
15802 object
->purgable
== VM_PURGABLE_DENY
&&
15803 object
->copy_strategy
!= MEMORY_OBJECT_COPY_DELAY
&&
15804 !object
->true_share
&&
15805 object
->wimg_bits
== VM_WIMG_USE_DEFAULT
&&
15806 !object
->code_signed
) {
15812 static kern_return_t
15813 vm_map_reuse_pages(
15815 vm_map_offset_t start
,
15816 vm_map_offset_t end
)
15818 vm_map_entry_t entry
;
15819 vm_object_t object
;
15820 vm_object_offset_t start_offset
, end_offset
;
15823 * The MADV_REUSE operation doesn't require any changes to the
15824 * vm_map_entry_t's, so the read lock is sufficient.
15827 if (VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
) {
15830 * need to figure out what reusable means for a
15831 * portion of a native page.
15833 return KERN_SUCCESS
;
15836 vm_map_lock_read(map
);
15837 assert(map
->pmap
!= kernel_pmap
); /* protect alias access */
15840 * The madvise semantics require that the address range be fully
15841 * allocated with no holes. Otherwise, we're required to return
15845 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15846 vm_map_unlock_read(map
);
15847 vm_page_stats_reusable
.reuse_pages_failure
++;
15848 return KERN_INVALID_ADDRESS
;
15852 * Examine each vm_map_entry_t in the range.
15854 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
15855 entry
= entry
->vme_next
) {
15857 * Sanity check on the VM map entry.
15859 if (!vm_map_entry_is_reusable(entry
)) {
15860 vm_map_unlock_read(map
);
15861 vm_page_stats_reusable
.reuse_pages_failure
++;
15862 return KERN_INVALID_ADDRESS
;
15866 * The first time through, the start address could be anywhere
15867 * within the vm_map_entry we found. So adjust the offset to
15870 if (entry
->vme_start
< start
) {
15871 start_offset
= start
- entry
->vme_start
;
15875 end_offset
= MIN(end
, entry
->vme_end
) - entry
->vme_start
;
15876 start_offset
+= VME_OFFSET(entry
);
15877 end_offset
+= VME_OFFSET(entry
);
15879 assert(!entry
->is_sub_map
);
15880 object
= VME_OBJECT(entry
);
15881 if (object
!= VM_OBJECT_NULL
) {
15882 vm_object_lock(object
);
15883 vm_object_reuse_pages(object
, start_offset
, end_offset
,
15885 vm_object_unlock(object
);
15888 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC_LARGE_REUSABLE
) {
15891 * We do not hold the VM map exclusively here.
15892 * The "alias" field is not that critical, so it's
15893 * safe to update it here, as long as it is the only
15894 * one that can be modified while holding the VM map
15897 VME_ALIAS_SET(entry
, VM_MEMORY_MALLOC_LARGE_REUSED
);
15901 vm_map_unlock_read(map
);
15902 vm_page_stats_reusable
.reuse_pages_success
++;
15903 return KERN_SUCCESS
;
15907 static kern_return_t
15908 vm_map_reusable_pages(
15910 vm_map_offset_t start
,
15911 vm_map_offset_t end
)
15913 vm_map_entry_t entry
;
15914 vm_object_t object
;
15915 vm_object_offset_t start_offset
, end_offset
;
15916 vm_map_offset_t pmap_offset
;
15918 if (VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
) {
15921 * need to figure out what reusable means for a portion
15922 * of a native page.
15924 return KERN_SUCCESS
;
15928 * The MADV_REUSABLE operation doesn't require any changes to the
15929 * vm_map_entry_t's, so the read lock is sufficient.
15932 vm_map_lock_read(map
);
15933 assert(map
->pmap
!= kernel_pmap
); /* protect alias access */
15936 * The madvise semantics require that the address range be fully
15937 * allocated with no holes. Otherwise, we're required to return
15941 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15942 vm_map_unlock_read(map
);
15943 vm_page_stats_reusable
.reusable_pages_failure
++;
15944 return KERN_INVALID_ADDRESS
;
15948 * Examine each vm_map_entry_t in the range.
15950 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
15951 entry
= entry
->vme_next
) {
15952 int kill_pages
= 0;
15955 * Sanity check on the VM map entry.
15957 if (!vm_map_entry_is_reusable(entry
)) {
15958 vm_map_unlock_read(map
);
15959 vm_page_stats_reusable
.reusable_pages_failure
++;
15960 return KERN_INVALID_ADDRESS
;
15963 if (!(entry
->protection
& VM_PROT_WRITE
) && !entry
->used_for_jit
) {
15964 /* not writable: can't discard contents */
15965 vm_map_unlock_read(map
);
15966 vm_page_stats_reusable
.reusable_nonwritable
++;
15967 vm_page_stats_reusable
.reusable_pages_failure
++;
15968 return KERN_PROTECTION_FAILURE
;
15972 * The first time through, the start address could be anywhere
15973 * within the vm_map_entry we found. So adjust the offset to
15976 if (entry
->vme_start
< start
) {
15977 start_offset
= start
- entry
->vme_start
;
15978 pmap_offset
= start
;
15981 pmap_offset
= entry
->vme_start
;
15983 end_offset
= MIN(end
, entry
->vme_end
) - entry
->vme_start
;
15984 start_offset
+= VME_OFFSET(entry
);
15985 end_offset
+= VME_OFFSET(entry
);
15987 assert(!entry
->is_sub_map
);
15988 object
= VME_OBJECT(entry
);
15989 if (object
== VM_OBJECT_NULL
) {
15994 vm_object_lock(object
);
15995 if (((object
->ref_count
== 1) ||
15996 (object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
&&
15997 object
->copy
== VM_OBJECT_NULL
)) &&
15998 object
->shadow
== VM_OBJECT_NULL
&&
16000 * "iokit_acct" entries are billed for their virtual size
16001 * (rather than for their resident pages only), so they
16002 * wouldn't benefit from making pages reusable, and it
16003 * would be hard to keep track of pages that are both
16004 * "iokit_acct" and "reusable" in the pmap stats and
16007 !(entry
->iokit_acct
||
16008 (!entry
->is_sub_map
&& !entry
->use_pmap
))) {
16009 if (object
->ref_count
!= 1) {
16010 vm_page_stats_reusable
.reusable_shared
++;
16016 if (kill_pages
!= -1) {
16017 vm_object_deactivate_pages(object
,
16019 end_offset
- start_offset
,
16021 TRUE
/*reusable_pages*/,
16025 vm_page_stats_reusable
.reusable_pages_shared
++;
16027 vm_object_unlock(object
);
16029 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC_LARGE
||
16030 VME_ALIAS(entry
) == VM_MEMORY_MALLOC_LARGE_REUSED
) {
16033 * We do not hold the VM map exclusively here.
16034 * The "alias" field is not that critical, so it's
16035 * safe to update it here, as long as it is the only
16036 * one that can be modified while holding the VM map
16039 VME_ALIAS_SET(entry
, VM_MEMORY_MALLOC_LARGE_REUSABLE
);
16043 vm_map_unlock_read(map
);
16044 vm_page_stats_reusable
.reusable_pages_success
++;
16045 return KERN_SUCCESS
;
16049 static kern_return_t
16052 vm_map_offset_t start
,
16053 vm_map_offset_t end
)
16055 vm_map_entry_t entry
;
16058 * The MADV_REUSABLE operation doesn't require any changes to the
16059 * vm_map_entry_t's, so the read lock is sufficient.
16062 vm_map_lock_read(map
);
16063 assert(map
->pmap
!= kernel_pmap
); /* protect alias access */
16066 * The madvise semantics require that the address range be fully
16067 * allocated with no holes. Otherwise, we're required to return
16071 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
16072 vm_map_unlock_read(map
);
16073 vm_page_stats_reusable
.can_reuse_failure
++;
16074 return KERN_INVALID_ADDRESS
;
16078 * Examine each vm_map_entry_t in the range.
16080 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
16081 entry
= entry
->vme_next
) {
16083 * Sanity check on the VM map entry.
16085 if (!vm_map_entry_is_reusable(entry
)) {
16086 vm_map_unlock_read(map
);
16087 vm_page_stats_reusable
.can_reuse_failure
++;
16088 return KERN_INVALID_ADDRESS
;
16092 vm_map_unlock_read(map
);
16093 vm_page_stats_reusable
.can_reuse_success
++;
16094 return KERN_SUCCESS
;
16099 static kern_return_t
16102 vm_map_offset_t start
,
16103 vm_map_offset_t end
)
16105 vm_map_entry_t entry
;
16108 * The MADV_PAGEOUT operation doesn't require any changes to the
16109 * vm_map_entry_t's, so the read lock is sufficient.
16112 vm_map_lock_read(map
);
16115 * The madvise semantics require that the address range be fully
16116 * allocated with no holes. Otherwise, we're required to return
16120 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
16121 vm_map_unlock_read(map
);
16122 return KERN_INVALID_ADDRESS
;
16126 * Examine each vm_map_entry_t in the range.
16128 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
16129 entry
= entry
->vme_next
) {
16130 vm_object_t object
;
16133 * Sanity check on the VM map entry.
16135 if (entry
->is_sub_map
) {
16137 vm_map_offset_t submap_start
;
16138 vm_map_offset_t submap_end
;
16139 vm_map_entry_t submap_entry
;
16141 submap
= VME_SUBMAP(entry
);
16142 submap_start
= VME_OFFSET(entry
);
16143 submap_end
= submap_start
+ (entry
->vme_end
-
16146 vm_map_lock_read(submap
);
16148 if (!vm_map_range_check(submap
,
16152 vm_map_unlock_read(submap
);
16153 vm_map_unlock_read(map
);
16154 return KERN_INVALID_ADDRESS
;
16157 object
= VME_OBJECT(submap_entry
);
16158 if (submap_entry
->is_sub_map
||
16159 object
== VM_OBJECT_NULL
||
16160 !object
->internal
) {
16161 vm_map_unlock_read(submap
);
16165 vm_object_pageout(object
);
16167 vm_map_unlock_read(submap
);
16168 submap
= VM_MAP_NULL
;
16169 submap_entry
= VM_MAP_ENTRY_NULL
;
16173 object
= VME_OBJECT(entry
);
16174 if (entry
->is_sub_map
||
16175 object
== VM_OBJECT_NULL
||
16176 !object
->internal
) {
16180 vm_object_pageout(object
);
16183 vm_map_unlock_read(map
);
16184 return KERN_SUCCESS
;
16186 #endif /* MACH_ASSERT */
16190 * Routine: vm_map_entry_insert
16192 * Description: This routine inserts a new vm_entry in a locked map.
16195 vm_map_entry_insert(
16197 vm_map_entry_t insp_entry
,
16198 vm_map_offset_t start
,
16199 vm_map_offset_t end
,
16200 vm_object_t object
,
16201 vm_object_offset_t offset
,
16202 boolean_t needs_copy
,
16203 boolean_t is_shared
,
16204 boolean_t in_transition
,
16205 vm_prot_t cur_protection
,
16206 vm_prot_t max_protection
,
16207 vm_behavior_t behavior
,
16208 vm_inherit_t inheritance
,
16209 unsigned short wired_count
,
16210 boolean_t no_cache
,
16211 boolean_t permanent
,
16212 boolean_t no_copy_on_read
,
16213 unsigned int superpage_size
,
16214 boolean_t clear_map_aligned
,
16215 boolean_t is_submap
,
16216 boolean_t used_for_jit
,
16218 boolean_t translated_allow_execute
)
16220 vm_map_entry_t new_entry
;
16222 assert(insp_entry
!= (vm_map_entry_t
)0);
16223 vm_map_lock_assert_exclusive(map
);
16225 #if DEVELOPMENT || DEBUG
16226 vm_object_offset_t end_offset
= 0;
16227 assertf(!os_add_overflow(end
- start
, offset
, &end_offset
), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end
- start
), offset
);
16228 #endif /* DEVELOPMENT || DEBUG */
16230 new_entry
= vm_map_entry_create(map
, !map
->hdr
.entries_pageable
);
16232 if (VM_MAP_PAGE_SHIFT(map
) != PAGE_SHIFT
) {
16233 new_entry
->map_aligned
= TRUE
;
16235 new_entry
->map_aligned
= FALSE
;
16237 if (clear_map_aligned
&&
16238 (!VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)) ||
16239 !VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)))) {
16240 new_entry
->map_aligned
= FALSE
;
16243 new_entry
->vme_start
= start
;
16244 new_entry
->vme_end
= end
;
16245 if (new_entry
->map_aligned
) {
16246 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_start
,
16247 VM_MAP_PAGE_MASK(map
)));
16248 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_end
,
16249 VM_MAP_PAGE_MASK(map
)));
16251 assert(page_aligned(new_entry
->vme_start
));
16252 assert(page_aligned(new_entry
->vme_end
));
16254 assert(new_entry
->vme_start
< new_entry
->vme_end
);
16256 VME_OBJECT_SET(new_entry
, object
);
16257 VME_OFFSET_SET(new_entry
, offset
);
16258 new_entry
->is_shared
= is_shared
;
16259 new_entry
->is_sub_map
= is_submap
;
16260 new_entry
->needs_copy
= needs_copy
;
16261 new_entry
->in_transition
= in_transition
;
16262 new_entry
->needs_wakeup
= FALSE
;
16263 new_entry
->inheritance
= inheritance
;
16264 new_entry
->protection
= cur_protection
;
16265 new_entry
->max_protection
= max_protection
;
16266 new_entry
->behavior
= behavior
;
16267 new_entry
->wired_count
= wired_count
;
16268 new_entry
->user_wired_count
= 0;
16271 * submap: "use_pmap" means "nested".
16274 new_entry
->use_pmap
= FALSE
;
16277 * object: "use_pmap" means "use pmap accounting" for footprint.
16280 new_entry
->use_pmap
= TRUE
;
16282 VME_ALIAS_SET(new_entry
, alias
);
16283 new_entry
->zero_wired_pages
= FALSE
;
16284 new_entry
->no_cache
= no_cache
;
16285 new_entry
->permanent
= permanent
;
16286 if (superpage_size
) {
16287 new_entry
->superpage_size
= TRUE
;
16289 new_entry
->superpage_size
= FALSE
;
16291 if (used_for_jit
) {
16292 if (!(map
->jit_entry_exists
) ||
16293 VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map
)) {
16294 new_entry
->used_for_jit
= TRUE
;
16295 map
->jit_entry_exists
= TRUE
;
16298 new_entry
->used_for_jit
= FALSE
;
16300 if (translated_allow_execute
) {
16301 new_entry
->translated_allow_execute
= TRUE
;
16303 new_entry
->translated_allow_execute
= FALSE
;
16305 new_entry
->pmap_cs_associated
= FALSE
;
16306 new_entry
->iokit_acct
= FALSE
;
16307 new_entry
->vme_resilient_codesign
= FALSE
;
16308 new_entry
->vme_resilient_media
= FALSE
;
16309 new_entry
->vme_atomic
= FALSE
;
16310 new_entry
->vme_no_copy_on_read
= no_copy_on_read
;
16313 * Insert the new entry into the list.
16316 vm_map_store_entry_link(map
, insp_entry
, new_entry
,
16317 VM_MAP_KERNEL_FLAGS_NONE
);
16318 map
->size
+= end
- start
;
16321 * Update the free space hint and the lookup hint.
16324 SAVE_HINT_MAP_WRITE(map
, new_entry
);
16328 int vm_remap_old_path
= 0;
16329 int vm_remap_new_path
= 0;
16331 * Routine: vm_map_remap_extract
16333 * Description: This routine returns a vm_entry list from a map.
16335 static kern_return_t
16336 vm_map_remap_extract(
16338 vm_map_offset_t addr
,
16339 vm_map_size_t size
,
16340 vm_prot_t required_protection
,
16342 struct vm_map_header
*map_header
,
16343 vm_prot_t
*cur_protection
,
16344 vm_prot_t
*max_protection
,
16345 /* What, no behavior? */
16346 vm_inherit_t inheritance
,
16347 vm_map_kernel_flags_t vmk_flags
)
16349 kern_return_t result
;
16350 vm_map_size_t mapped_size
;
16351 vm_map_size_t tmp_size
;
16352 vm_map_entry_t src_entry
; /* result of last map lookup */
16353 vm_map_entry_t new_entry
;
16354 vm_object_offset_t offset
;
16355 vm_map_offset_t map_address
;
16356 vm_map_offset_t src_start
; /* start of entry to map */
16357 vm_map_offset_t src_end
; /* end of region to be mapped */
16358 vm_object_t object
;
16359 vm_map_version_t version
;
16360 boolean_t src_needs_copy
;
16361 boolean_t new_entry_needs_copy
;
16362 vm_map_entry_t saved_src_entry
;
16363 boolean_t src_entry_was_wired
;
16364 vm_prot_t max_prot_for_prot_copy
;
16365 vm_map_offset_t effective_page_mask
;
16366 boolean_t pageable
, same_map
;
16368 pageable
= vmk_flags
.vmkf_copy_pageable
;
16369 same_map
= vmk_flags
.vmkf_copy_same_map
;
16371 effective_page_mask
= MIN(PAGE_MASK
, VM_MAP_PAGE_MASK(map
));
16373 assert(map
!= VM_MAP_NULL
);
16375 assert(size
== vm_map_round_page(size
, effective_page_mask
));
16376 assert(inheritance
== VM_INHERIT_NONE
||
16377 inheritance
== VM_INHERIT_COPY
||
16378 inheritance
== VM_INHERIT_SHARE
);
16379 assert(!(required_protection
& ~VM_PROT_ALL
));
16382 * Compute start and end of region.
16384 src_start
= vm_map_trunc_page(addr
, effective_page_mask
);
16385 src_end
= vm_map_round_page(src_start
+ size
, effective_page_mask
);
16388 * Initialize map_header.
16390 map_header
->links
.next
= CAST_TO_VM_MAP_ENTRY(&map_header
->links
);
16391 map_header
->links
.prev
= CAST_TO_VM_MAP_ENTRY(&map_header
->links
);
16392 map_header
->nentries
= 0;
16393 map_header
->entries_pageable
= pageable
;
16394 // map_header->page_shift = MIN(VM_MAP_PAGE_SHIFT(map), PAGE_SHIFT);
16395 map_header
->page_shift
= VM_MAP_PAGE_SHIFT(map
);
16396 map_header
->rb_head_store
.rbh_root
= (void *)(int)SKIP_RB_TREE
;
16398 vm_map_store_init( map_header
);
16400 if (copy
&& vmk_flags
.vmkf_remap_prot_copy
) {
16401 max_prot_for_prot_copy
= *max_protection
& VM_PROT_ALL
;
16403 max_prot_for_prot_copy
= VM_PROT_NONE
;
16405 *cur_protection
= VM_PROT_ALL
;
16406 *max_protection
= VM_PROT_ALL
;
16410 result
= KERN_SUCCESS
;
16413 * The specified source virtual space might correspond to
16414 * multiple map entries, need to loop on them.
16417 if (VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
) {
16419 * This address space uses sub-pages so the range might
16420 * not be re-mappable in an address space with larger
16421 * pages. Re-assemble any broken-up VM map entries to
16422 * improve our chances of making it work.
16424 vm_map_simplify_range(map
, src_start
, src_end
);
16426 while (mapped_size
!= size
) {
16427 vm_map_size_t entry_size
;
16430 * Find the beginning of the region.
16432 if (!vm_map_lookup_entry(map
, src_start
, &src_entry
)) {
16433 result
= KERN_INVALID_ADDRESS
;
16437 if (src_start
< src_entry
->vme_start
||
16438 (mapped_size
&& src_start
!= src_entry
->vme_start
)) {
16439 result
= KERN_INVALID_ADDRESS
;
16443 tmp_size
= size
- mapped_size
;
16444 if (src_end
> src_entry
->vme_end
) {
16445 tmp_size
-= (src_end
- src_entry
->vme_end
);
16448 entry_size
= (vm_map_size_t
)(src_entry
->vme_end
-
16449 src_entry
->vme_start
);
16451 if (src_entry
->is_sub_map
&&
16452 vmk_flags
.vmkf_copy_single_object
) {
16454 vm_map_offset_t submap_start
;
16455 vm_map_size_t submap_size
;
16458 * No check for "required_protection" on "src_entry"
16459 * because the protections that matter are the ones
16460 * on the submap's VM map entry, which will be checked
16461 * during the call to vm_map_remap_extract() below.
16463 submap_size
= src_entry
->vme_end
- src_start
;
16464 if (submap_size
> size
) {
16465 submap_size
= size
;
16467 submap_start
= VME_OFFSET(src_entry
) + src_start
- src_entry
->vme_start
;
16468 submap
= VME_SUBMAP(src_entry
);
16469 vm_map_reference(submap
);
16470 vm_map_unlock(map
);
16472 result
= vm_map_remap_extract(submap
,
16475 required_protection
,
16482 vm_map_deallocate(submap
);
16486 if ((src_entry
->protection
& required_protection
)
16487 != required_protection
) {
16488 if (vmk_flags
.vmkf_copy_single_object
&&
16489 mapped_size
!= 0) {
16491 * Single object extraction.
16492 * We can't extract more with the required
16493 * protection but we've extracted some, so
16494 * stop there and declare success.
16495 * The caller should check the size of
16496 * the copy entry we've extracted.
16498 result
= KERN_SUCCESS
;
16501 * VM range extraction.
16502 * Required proctection is not available
16503 * for this part of the range: fail.
16505 result
= KERN_PROTECTION_FAILURE
;
16510 if (src_entry
->is_sub_map
&&
16511 VM_MAP_PAGE_SHIFT(VME_SUBMAP(src_entry
)) < PAGE_SHIFT
) {
16513 vm_map_offset_t submap_start
;
16514 vm_map_size_t submap_size
;
16515 vm_map_copy_t submap_copy
;
16516 vm_prot_t submap_curprot
, submap_maxprot
;
16518 vm_remap_new_path
++;
16521 * No check for "required_protection" on "src_entry"
16522 * because the protections that matter are the ones
16523 * on the submap's VM map entry, which will be checked
16524 * during the call to vm_map_copy_extract() below.
16526 object
= VM_OBJECT_NULL
;
16527 submap_copy
= VM_MAP_COPY_NULL
;
16529 /* find equivalent range in the submap */
16530 submap
= VME_SUBMAP(src_entry
);
16531 submap_start
= VME_OFFSET(src_entry
) + src_start
- src_entry
->vme_start
;
16532 submap_size
= tmp_size
;
16533 /* extra ref to keep submap alive */
16534 vm_map_reference(submap
);
16536 DTRACE_VM6(remap_submap_recurse
,
16538 vm_map_offset_t
, addr
,
16539 vm_map_size_t
, size
,
16541 vm_map_offset_t
, submap_start
,
16542 vm_map_size_t
, submap_size
);
16545 * The map can be safely unlocked since we
16546 * already hold a reference on the submap.
16548 * No timestamp since we don't care if the map
16549 * gets modified while we're down in the submap.
16550 * We'll resume the extraction at src_start + tmp_size
16553 vm_map_unlock(map
);
16554 src_entry
= NULL
; /* not valid once map is unlocked */
16556 result
= vm_map_copy_extract(submap
,
16559 required_protection
,
16567 /* release extra ref on submap */
16568 vm_map_deallocate(submap
);
16569 submap
= VM_MAP_NULL
;
16571 if (result
!= KERN_SUCCESS
) {
16576 /* transfer submap_copy entries to map_header */
16577 while (vm_map_copy_first_entry(submap_copy
) !=
16578 vm_map_copy_to_entry(submap_copy
)) {
16579 vm_map_entry_t copy_entry
;
16580 vm_map_size_t copy_entry_size
;
16582 copy_entry
= vm_map_copy_first_entry(submap_copy
);
16583 assert(!copy_entry
->is_sub_map
);
16584 vm_map_copy_entry_unlink(submap_copy
, copy_entry
);
16585 copy_entry_size
= copy_entry
->vme_end
- copy_entry
->vme_start
;
16586 copy_entry
->vme_start
= map_address
;
16587 copy_entry
->vme_end
= map_address
+ copy_entry_size
;
16588 map_address
+= copy_entry_size
;
16589 mapped_size
+= copy_entry_size
;
16590 src_start
+= copy_entry_size
;
16591 assert(src_start
<= src_end
);
16592 _vm_map_store_entry_link(map_header
,
16593 map_header
->links
.prev
,
16596 /* done with submap_copy */
16597 vm_map_copy_discard(submap_copy
);
16599 *cur_protection
&= submap_curprot
;
16600 *max_protection
&= submap_maxprot
;
16602 /* re-acquire the map lock and continue to next entry */
16605 } else if (src_entry
->is_sub_map
) {
16606 vm_remap_old_path
++;
16607 DTRACE_VM4(remap_submap
,
16609 vm_map_offset_t
, addr
,
16610 vm_map_size_t
, size
,
16613 vm_map_reference(VME_SUBMAP(src_entry
));
16614 object
= VM_OBJECT_NULL
;
16616 object
= VME_OBJECT(src_entry
);
16617 if (src_entry
->iokit_acct
) {
16619 * This entry uses "IOKit accounting".
16621 } else if (object
!= VM_OBJECT_NULL
&&
16622 (object
->purgable
!= VM_PURGABLE_DENY
||
16623 object
->vo_ledger_tag
!= VM_LEDGER_TAG_NONE
)) {
16625 * Purgeable objects have their own accounting:
16626 * no pmap accounting for them.
16628 assertf(!src_entry
->use_pmap
,
16629 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16632 (uint64_t)src_entry
->vme_start
,
16633 (uint64_t)src_entry
->vme_end
,
16634 src_entry
->protection
,
16635 src_entry
->max_protection
,
16636 VME_ALIAS(src_entry
));
16639 * Not IOKit or purgeable:
16640 * must be accounted by pmap stats.
16642 assertf(src_entry
->use_pmap
,
16643 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16646 (uint64_t)src_entry
->vme_start
,
16647 (uint64_t)src_entry
->vme_end
,
16648 src_entry
->protection
,
16649 src_entry
->max_protection
,
16650 VME_ALIAS(src_entry
));
16653 if (object
== VM_OBJECT_NULL
) {
16654 assert(!src_entry
->needs_copy
);
16655 object
= vm_object_allocate(entry_size
);
16656 VME_OFFSET_SET(src_entry
, 0);
16657 VME_OBJECT_SET(src_entry
, object
);
16658 assert(src_entry
->use_pmap
);
16659 } else if (src_entry
->wired_count
||
16660 object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) {
16662 * A wired memory region should not have
16663 * any pending copy-on-write and needs to
16664 * keep pointing at the VM object that
16665 * contains the wired pages.
16666 * If we're sharing this memory (copy=false),
16667 * we'll share this VM object.
16668 * If we're copying this memory (copy=true),
16669 * we'll call vm_object_copy_slowly() below
16670 * and use the new VM object for the remapping.
16672 * Or, we are already using an asymmetric
16673 * copy, and therefore we already have
16674 * the right object.
16676 assert(!src_entry
->needs_copy
);
16677 } else if (src_entry
->needs_copy
|| object
->shadowed
||
16678 (object
->internal
&& !object
->true_share
&&
16679 !src_entry
->is_shared
&&
16680 object
->vo_size
> entry_size
)) {
16681 VME_OBJECT_SHADOW(src_entry
, entry_size
);
16682 assert(src_entry
->use_pmap
);
16684 if (!src_entry
->needs_copy
&&
16685 (src_entry
->protection
& VM_PROT_WRITE
)) {
16688 assert(!pmap_has_prot_policy(map
->pmap
, src_entry
->translated_allow_execute
, src_entry
->protection
));
16690 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
16692 if (override_nx(map
,
16693 VME_ALIAS(src_entry
))
16695 prot
|= VM_PROT_EXECUTE
;
16698 assert(!pmap_has_prot_policy(map
->pmap
, src_entry
->translated_allow_execute
, prot
));
16700 if (map
->mapped_in_other_pmaps
) {
16701 vm_object_pmap_protect(
16702 VME_OBJECT(src_entry
),
16703 VME_OFFSET(src_entry
),
16707 src_entry
->vme_start
,
16710 } else if (__improbable(map
->pmap
== PMAP_NULL
)) {
16711 extern boolean_t vm_tests_in_progress
;
16712 assert(vm_tests_in_progress
);
16714 * Some VM tests (in vm_tests.c)
16715 * sometimes want to use a VM
16716 * map without a pmap.
16717 * Otherwise, this should never
16720 #endif /* MACH_ASSERT */
16722 pmap_protect(vm_map_pmap(map
),
16723 src_entry
->vme_start
,
16724 src_entry
->vme_end
,
16729 object
= VME_OBJECT(src_entry
);
16730 src_entry
->needs_copy
= FALSE
;
16734 vm_object_lock(object
);
16735 vm_object_reference_locked(object
); /* object ref. for new entry */
16736 assert(!src_entry
->needs_copy
);
16737 if (object
->copy_strategy
==
16738 MEMORY_OBJECT_COPY_SYMMETRIC
) {
16740 * If we want to share this object (copy==0),
16741 * it needs to be COPY_DELAY.
16742 * If we want to copy this object (copy==1),
16743 * we can't just set "needs_copy" on our side
16744 * and expect the other side to do the same
16745 * (symmetrically), so we can't let the object
16746 * stay COPY_SYMMETRIC.
16747 * So we always switch from COPY_SYMMETRIC to
16750 object
->copy_strategy
=
16751 MEMORY_OBJECT_COPY_DELAY
;
16753 vm_object_unlock(object
);
16756 offset
= (VME_OFFSET(src_entry
) +
16757 (src_start
- src_entry
->vme_start
));
16759 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
16760 vm_map_entry_copy(map
, new_entry
, src_entry
);
16761 if (new_entry
->is_sub_map
) {
16762 /* clr address space specifics */
16763 new_entry
->use_pmap
= FALSE
;
16766 * We're dealing with a copy-on-write operation,
16767 * so the resulting mapping should not inherit the
16768 * original mapping's accounting settings.
16769 * "use_pmap" should be reset to its default (TRUE)
16770 * so that the new mapping gets accounted for in
16771 * the task's memory footprint.
16773 new_entry
->use_pmap
= TRUE
;
16775 /* "iokit_acct" was cleared in vm_map_entry_copy() */
16776 assert(!new_entry
->iokit_acct
);
16778 new_entry
->map_aligned
= FALSE
;
16780 new_entry
->vme_start
= map_address
;
16781 new_entry
->vme_end
= map_address
+ tmp_size
;
16782 assert(new_entry
->vme_start
< new_entry
->vme_end
);
16783 if (copy
&& vmk_flags
.vmkf_remap_prot_copy
) {
16785 * Remapping for vm_map_protect(VM_PROT_COPY)
16786 * to convert a read-only mapping into a
16787 * copy-on-write version of itself but
16788 * with write access:
16789 * keep the original inheritance and add
16790 * VM_PROT_WRITE to the max protection.
16792 new_entry
->inheritance
= src_entry
->inheritance
;
16793 new_entry
->protection
&= max_prot_for_prot_copy
;
16794 new_entry
->max_protection
|= VM_PROT_WRITE
;
16796 new_entry
->inheritance
= inheritance
;
16798 VME_OFFSET_SET(new_entry
, offset
);
16801 * The new region has to be copied now if required.
16805 if (src_entry
->used_for_jit
== TRUE
) {
16807 #if __APRR_SUPPORTED__
16809 * Disallow re-mapping of any JIT regions on APRR devices.
16811 result
= KERN_PROTECTION_FAILURE
;
16813 #endif /* __APRR_SUPPORTED__*/
16814 } else if (!VM_MAP_POLICY_ALLOW_JIT_SHARING(map
)) {
16816 * Cannot allow an entry describing a JIT
16817 * region to be shared across address spaces.
16819 result
= KERN_INVALID_ARGUMENT
;
16824 src_entry
->is_shared
= TRUE
;
16825 new_entry
->is_shared
= TRUE
;
16826 if (!(new_entry
->is_sub_map
)) {
16827 new_entry
->needs_copy
= FALSE
;
16829 } else if (src_entry
->is_sub_map
) {
16830 /* make this a COW sub_map if not already */
16831 assert(new_entry
->wired_count
== 0);
16832 new_entry
->needs_copy
= TRUE
;
16833 object
= VM_OBJECT_NULL
;
16834 } else if (src_entry
->wired_count
== 0 &&
16835 !(debug4k_no_cow_copyin
&& VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
) &&
16836 vm_object_copy_quickly(VME_OBJECT_PTR(new_entry
),
16837 VME_OFFSET(new_entry
),
16838 (new_entry
->vme_end
-
16839 new_entry
->vme_start
),
16841 &new_entry_needs_copy
)) {
16842 new_entry
->needs_copy
= new_entry_needs_copy
;
16843 new_entry
->is_shared
= FALSE
;
16844 assertf(new_entry
->use_pmap
, "map %p new_entry %p\n", map
, new_entry
);
16847 * Handle copy_on_write semantics.
16849 if (src_needs_copy
&& !src_entry
->needs_copy
) {
16852 assert(!pmap_has_prot_policy(map
->pmap
, src_entry
->translated_allow_execute
, src_entry
->protection
));
16854 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
16856 if (override_nx(map
,
16857 VME_ALIAS(src_entry
))
16859 prot
|= VM_PROT_EXECUTE
;
16862 assert(!pmap_has_prot_policy(map
->pmap
, src_entry
->translated_allow_execute
, prot
));
16864 vm_object_pmap_protect(object
,
16867 ((src_entry
->is_shared
16868 || map
->mapped_in_other_pmaps
) ?
16869 PMAP_NULL
: map
->pmap
),
16870 VM_MAP_PAGE_SIZE(map
),
16871 src_entry
->vme_start
,
16874 assert(src_entry
->wired_count
== 0);
16875 src_entry
->needs_copy
= TRUE
;
16878 * Throw away the old object reference of the new entry.
16880 vm_object_deallocate(object
);
16882 new_entry
->is_shared
= FALSE
;
16883 assertf(new_entry
->use_pmap
, "map %p new_entry %p\n", map
, new_entry
);
16885 src_entry_was_wired
= (src_entry
->wired_count
> 0);
16886 saved_src_entry
= src_entry
;
16887 src_entry
= VM_MAP_ENTRY_NULL
;
16890 * The map can be safely unlocked since we
16891 * already hold a reference on the object.
16893 * Record the timestamp of the map for later
16894 * verification, and unlock the map.
16896 version
.main_timestamp
= map
->timestamp
;
16897 vm_map_unlock(map
); /* Increments timestamp once! */
16900 * Perform the copy.
16902 if (src_entry_was_wired
> 0 ||
16903 (debug4k_no_cow_copyin
&&
16904 VM_MAP_PAGE_SHIFT(map
) < PAGE_SHIFT
)) {
16905 vm_object_lock(object
);
16906 result
= vm_object_copy_slowly(
16909 (new_entry
->vme_end
-
16910 new_entry
->vme_start
),
16912 VME_OBJECT_PTR(new_entry
));
16914 VME_OFFSET_SET(new_entry
, offset
- vm_object_trunc_page(offset
));
16915 new_entry
->needs_copy
= FALSE
;
16917 vm_object_offset_t new_offset
;
16919 new_offset
= VME_OFFSET(new_entry
);
16920 result
= vm_object_copy_strategically(
16923 (new_entry
->vme_end
-
16924 new_entry
->vme_start
),
16925 VME_OBJECT_PTR(new_entry
),
16927 &new_entry_needs_copy
);
16928 if (new_offset
!= VME_OFFSET(new_entry
)) {
16929 VME_OFFSET_SET(new_entry
, new_offset
);
16932 new_entry
->needs_copy
= new_entry_needs_copy
;
16936 * Throw away the old object reference of the new entry.
16938 vm_object_deallocate(object
);
16940 if (result
!= KERN_SUCCESS
&&
16941 result
!= KERN_MEMORY_RESTART_COPY
) {
16942 _vm_map_entry_dispose(map_header
, new_entry
);
16948 * Verify that the map has not substantially
16949 * changed while the copy was being made.
16953 if (version
.main_timestamp
+ 1 != map
->timestamp
) {
16955 * Simple version comparison failed.
16957 * Retry the lookup and verify that the
16958 * same object/offset are still present.
16960 saved_src_entry
= VM_MAP_ENTRY_NULL
;
16961 vm_object_deallocate(VME_OBJECT(new_entry
));
16962 _vm_map_entry_dispose(map_header
, new_entry
);
16963 if (result
== KERN_MEMORY_RESTART_COPY
) {
16964 result
= KERN_SUCCESS
;
16968 /* map hasn't changed: src_entry is still valid */
16969 src_entry
= saved_src_entry
;
16970 saved_src_entry
= VM_MAP_ENTRY_NULL
;
16972 if (result
== KERN_MEMORY_RESTART_COPY
) {
16973 vm_object_reference(object
);
16978 _vm_map_store_entry_link(map_header
,
16979 map_header
->links
.prev
, new_entry
);
16981 /*Protections for submap mapping are irrelevant here*/
16982 if (!src_entry
->is_sub_map
) {
16983 *cur_protection
&= src_entry
->protection
;
16984 *max_protection
&= src_entry
->max_protection
;
16987 map_address
+= tmp_size
;
16988 mapped_size
+= tmp_size
;
16989 src_start
+= tmp_size
;
16991 if (vmk_flags
.vmkf_copy_single_object
) {
16992 if (mapped_size
!= size
) {
16993 DEBUG4K_SHARE("map %p addr 0x%llx size 0x%llx clipped copy at mapped_size 0x%llx\n", map
, (uint64_t)addr
, (uint64_t)size
, (uint64_t)mapped_size
);
16994 if (src_entry
->vme_next
!= vm_map_to_entry(map
) &&
16995 VME_OBJECT(src_entry
->vme_next
) == VME_OBJECT(src_entry
)) {
16997 DEBUG4K_ERROR("could have extended copy to next entry...\n");
17004 vm_map_unlock(map
);
17005 if (result
!= KERN_SUCCESS
) {
17007 * Free all allocated elements.
17009 for (src_entry
= map_header
->links
.next
;
17010 src_entry
!= CAST_TO_VM_MAP_ENTRY(&map_header
->links
);
17011 src_entry
= new_entry
) {
17012 new_entry
= src_entry
->vme_next
;
17013 _vm_map_store_entry_unlink(map_header
, src_entry
);
17014 if (src_entry
->is_sub_map
) {
17015 vm_map_deallocate(VME_SUBMAP(src_entry
));
17017 vm_object_deallocate(VME_OBJECT(src_entry
));
17019 _vm_map_entry_dispose(map_header
, src_entry
);
17029 return VM_MAP_IS_EXOTIC(map
);
17036 return VM_MAP_IS_ALIEN(map
);
17039 #if XNU_TARGET_OS_OSX
17045 map
->is_alien
= true;
17046 vm_map_unlock(map
);
17048 #endif /* XNU_TARGET_OS_OSX */
17050 void vm_map_copy_to_physcopy(vm_map_copy_t copy_map
, vm_map_t target_map
);
17052 vm_map_copy_to_physcopy(
17053 vm_map_copy_t copy_map
,
17054 vm_map_t target_map
)
17056 vm_map_size_t size
;
17057 vm_map_entry_t entry
;
17058 vm_map_entry_t new_entry
;
17059 vm_object_t new_object
;
17060 unsigned int pmap_flags
;
17063 vm_map_address_t src_start
, src_end
, src_cur
;
17064 vm_map_address_t dst_start
, dst_end
, dst_cur
;
17069 * Perform the equivalent of vm_allocate() and memcpy().
17070 * Replace the mappings in "copy_map" with the newly allocated mapping.
17072 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) BEFORE\n", copy_map
, copy_map
->cpy_hdr
.page_shift
, copy_map
->cpy_hdr
.nentries
, copy_map
->offset
, (uint64_t)copy_map
->size
);
17074 assert(copy_map
->cpy_hdr
.page_shift
!= VM_MAP_PAGE_MASK(target_map
));
17076 /* allocate new VM object */
17077 size
= VM_MAP_ROUND_PAGE(copy_map
->size
, PAGE_MASK
);
17078 new_object
= vm_object_allocate(size
);
17079 assert(new_object
);
17081 /* allocate new VM map entry */
17082 new_entry
= vm_map_copy_entry_create(copy_map
, FALSE
);
17085 /* finish initializing new VM map entry */
17086 new_entry
->protection
= VM_PROT_DEFAULT
;
17087 new_entry
->max_protection
= VM_PROT_DEFAULT
;
17088 new_entry
->use_pmap
= TRUE
;
17090 /* make new VM map entry point to new VM object */
17091 new_entry
->vme_start
= 0;
17092 new_entry
->vme_end
= size
;
17093 VME_OBJECT_SET(new_entry
, new_object
);
17094 VME_OFFSET_SET(new_entry
, 0);
17096 /* create a new pmap to map "copy_map" */
17098 assert(copy_map
->cpy_hdr
.page_shift
== FOURK_PAGE_SHIFT
);
17099 #if PMAP_CREATE_FORCE_4K_PAGES
17100 pmap_flags
|= PMAP_CREATE_FORCE_4K_PAGES
;
17101 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
17102 pmap_flags
|= PMAP_CREATE_64BIT
;
17103 new_pmap
= pmap_create_options(NULL
, (vm_map_size_t
)0, pmap_flags
);
17106 /* create a new pageable VM map to map "copy_map" */
17107 new_map
= vm_map_create(new_pmap
, 0, MACH_VM_MAX_ADDRESS
, TRUE
);
17109 vm_map_set_page_shift(new_map
, copy_map
->cpy_hdr
.page_shift
);
17111 /* map "copy_map" in the new VM map */
17113 kr
= vm_map_copyout_internal(
17118 FALSE
, /* consume_on_success */
17121 VM_INHERIT_DEFAULT
);
17122 assert(kr
== KERN_SUCCESS
);
17123 src_end
= src_start
+ copy_map
->size
;
17125 /* map "new_object" in the new VM map */
17126 vm_object_reference(new_object
);
17128 kr
= vm_map_enter(new_map
,
17133 VM_MAP_KERNEL_FLAGS_NONE
,
17134 VM_KERN_MEMORY_OSFMK
,
17137 FALSE
, /* needs copy */
17140 VM_INHERIT_DEFAULT
);
17141 assert(kr
== KERN_SUCCESS
);
17142 dst_end
= dst_start
+ size
;
17144 /* get a kernel buffer */
17145 kbuf
= kheap_alloc(KHEAP_TEMP
, PAGE_SIZE
, Z_WAITOK
);
17148 /* physically copy "copy_map" mappings to new VM object */
17149 for (src_cur
= src_start
, dst_cur
= dst_start
;
17151 src_cur
+= PAGE_SIZE
, dst_cur
+= PAGE_SIZE
) {
17155 if (src_cur
+ PAGE_SIZE
> src_end
) {
17156 /* partial copy for last page */
17157 bytes
= src_end
- src_cur
;
17158 assert(bytes
> 0 && bytes
< PAGE_SIZE
);
17159 /* rest of dst page should be zero-filled */
17161 /* get bytes from src mapping */
17162 kr
= copyinmap(new_map
, src_cur
, kbuf
, bytes
);
17163 if (kr
!= KERN_SUCCESS
) {
17164 DEBUG4K_COPY("copyinmap(%p, 0x%llx, %p, 0x%llx) kr 0x%x\n", new_map
, (uint64_t)src_cur
, kbuf
, (uint64_t)bytes
, kr
);
17166 /* put bytes in dst mapping */
17167 assert(dst_cur
< dst_end
);
17168 assert(dst_cur
+ bytes
<= dst_end
);
17169 kr
= copyoutmap(new_map
, kbuf
, dst_cur
, bytes
);
17170 if (kr
!= KERN_SUCCESS
) {
17171 DEBUG4K_COPY("copyoutmap(%p, %p, 0x%llx, 0x%llx) kr 0x%x\n", new_map
, kbuf
, (uint64_t)dst_cur
, (uint64_t)bytes
, kr
);
17175 /* free kernel buffer */
17176 kheap_free(KHEAP_TEMP
, kbuf
, PAGE_SIZE
);
17179 /* destroy new map */
17180 vm_map_destroy(new_map
, VM_MAP_REMOVE_NO_FLAGS
);
17181 new_map
= VM_MAP_NULL
;
17183 /* dispose of the old map entries in "copy_map" */
17184 while (vm_map_copy_first_entry(copy_map
) !=
17185 vm_map_copy_to_entry(copy_map
)) {
17186 entry
= vm_map_copy_first_entry(copy_map
);
17187 vm_map_copy_entry_unlink(copy_map
, entry
);
17188 if (entry
->is_sub_map
) {
17189 vm_map_deallocate(VME_SUBMAP(entry
));
17191 vm_object_deallocate(VME_OBJECT(entry
));
17193 vm_map_copy_entry_dispose(copy_map
, entry
);
17196 /* change "copy_map"'s page_size to match "target_map" */
17197 copy_map
->cpy_hdr
.page_shift
= VM_MAP_PAGE_SHIFT(target_map
);
17198 copy_map
->offset
= 0;
17199 copy_map
->size
= size
;
17201 /* insert new map entry in "copy_map" */
17202 assert(vm_map_copy_last_entry(copy_map
) == vm_map_copy_to_entry(copy_map
));
17203 vm_map_copy_entry_link(copy_map
, vm_map_copy_last_entry(copy_map
), new_entry
);
17205 DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) AFTER\n", copy_map
, copy_map
->cpy_hdr
.page_shift
, copy_map
->cpy_hdr
.nentries
, copy_map
->offset
, (uint64_t)copy_map
->size
);
17209 vm_map_copy_adjust_get_target_copy_map(
17210 vm_map_copy_t copy_map
,
17211 vm_map_copy_t
*target_copy_map_p
);
17213 vm_map_copy_adjust_get_target_copy_map(
17214 vm_map_copy_t copy_map
,
17215 vm_map_copy_t
*target_copy_map_p
)
17217 vm_map_copy_t target_copy_map
;
17218 vm_map_entry_t entry
, target_entry
;
17220 if (*target_copy_map_p
!= VM_MAP_COPY_NULL
) {
17221 /* the caller already has a "target_copy_map": use it */
17225 /* the caller wants us to create a new copy of "copy_map" */
17226 target_copy_map
= vm_map_copy_allocate();
17227 target_copy_map
->type
= copy_map
->type
;
17228 assert(target_copy_map
->type
== VM_MAP_COPY_ENTRY_LIST
);
17229 target_copy_map
->offset
= copy_map
->offset
;
17230 target_copy_map
->size
= copy_map
->size
;
17231 target_copy_map
->cpy_hdr
.page_shift
= copy_map
->cpy_hdr
.page_shift
;
17232 vm_map_store_init(&target_copy_map
->cpy_hdr
);
17233 for (entry
= vm_map_copy_first_entry(copy_map
);
17234 entry
!= vm_map_copy_to_entry(copy_map
);
17235 entry
= entry
->vme_next
) {
17236 target_entry
= vm_map_copy_entry_create(target_copy_map
, FALSE
);
17237 vm_map_entry_copy_full(target_entry
, entry
);
17238 if (target_entry
->is_sub_map
) {
17239 vm_map_reference(VME_SUBMAP(target_entry
));
17241 vm_object_reference(VME_OBJECT(target_entry
));
17243 vm_map_copy_entry_link(
17245 vm_map_copy_last_entry(target_copy_map
),
17248 entry
= VM_MAP_ENTRY_NULL
;
17249 *target_copy_map_p
= target_copy_map
;
17254 vm_map_copy_t copy_map
,
17255 int new_page_shift
,
17256 vm_map_offset_t trim_start
,
17257 vm_map_offset_t trim_end
);
17260 vm_map_copy_t copy_map
,
17261 int new_page_shift
,
17262 vm_map_offset_t trim_start
,
17263 vm_map_offset_t trim_end
)
17265 int copy_page_shift
;
17266 vm_map_entry_t entry
, next_entry
;
17268 assert(copy_map
->type
== VM_MAP_COPY_ENTRY_LIST
);
17269 assert(copy_map
->cpy_hdr
.nentries
> 0);
17271 trim_start
+= vm_map_copy_first_entry(copy_map
)->vme_start
;
17272 trim_end
+= vm_map_copy_first_entry(copy_map
)->vme_start
;
17274 /* use the new page_shift to do the clipping */
17275 copy_page_shift
= VM_MAP_COPY_PAGE_SHIFT(copy_map
);
17276 copy_map
->cpy_hdr
.page_shift
= new_page_shift
;
17278 for (entry
= vm_map_copy_first_entry(copy_map
);
17279 entry
!= vm_map_copy_to_entry(copy_map
);
17280 entry
= next_entry
) {
17281 next_entry
= entry
->vme_next
;
17282 if (entry
->vme_end
<= trim_start
) {
17283 /* entry fully before trim range: skip */
17286 if (entry
->vme_start
>= trim_end
) {
17287 /* entry fully after trim range: done */
17290 /* clip entry if needed */
17291 vm_map_copy_clip_start(copy_map
, entry
, trim_start
);
17292 vm_map_copy_clip_end(copy_map
, entry
, trim_end
);
17293 /* dispose of entry */
17294 copy_map
->size
-= entry
->vme_end
- entry
->vme_start
;
17295 vm_map_copy_entry_unlink(copy_map
, entry
);
17296 if (entry
->is_sub_map
) {
17297 vm_map_deallocate(VME_SUBMAP(entry
));
17299 vm_object_deallocate(VME_OBJECT(entry
));
17301 vm_map_copy_entry_dispose(copy_map
, entry
);
17302 entry
= VM_MAP_ENTRY_NULL
;
17305 /* restore copy_map's original page_shift */
17306 copy_map
->cpy_hdr
.page_shift
= copy_page_shift
;
17310 * Make any necessary adjustments to "copy_map" to allow it to be
17311 * mapped into "target_map".
17312 * If no changes were necessary, "target_copy_map" points to the
17313 * untouched "copy_map".
17314 * If changes are necessary, changes will be made to "target_copy_map".
17315 * If "target_copy_map" was NULL, we create a new "vm_map_copy_t" and
17316 * copy the original "copy_map" to it before applying the changes.
17317 * The caller should discard "target_copy_map" if it's not the same as
17318 * the original "copy_map".
17320 /* TODO4K: also adjust to sub-range in the copy_map -> add start&end? */
17322 vm_map_copy_adjust_to_target(
17323 vm_map_copy_t src_copy_map
,
17324 vm_map_offset_t offset
,
17325 vm_map_size_t size
,
17326 vm_map_t target_map
,
17328 vm_map_copy_t
*target_copy_map_p
,
17329 vm_map_offset_t
*overmap_start_p
,
17330 vm_map_offset_t
*overmap_end_p
,
17331 vm_map_offset_t
*trimmed_start_p
)
17333 vm_map_copy_t copy_map
, target_copy_map
;
17334 vm_map_size_t target_size
;
17335 vm_map_size_t src_copy_map_size
;
17336 vm_map_size_t overmap_start
, overmap_end
;
17338 vm_map_entry_t entry
, target_entry
;
17339 vm_map_offset_t addr_adjustment
;
17340 vm_map_offset_t new_start
, new_end
;
17341 int copy_page_mask
, target_page_mask
;
17342 int copy_page_shift
, target_page_shift
;
17343 vm_map_offset_t trimmed_end
;
17346 * Assert that the vm_map_copy is coming from the right
17347 * zone and hasn't been forged
17349 vm_map_copy_require(src_copy_map
);
17350 assert(src_copy_map
->type
== VM_MAP_COPY_ENTRY_LIST
);
17353 * Start working with "src_copy_map" but we'll switch
17354 * to "target_copy_map" as soon as we start making adjustments.
17356 copy_map
= src_copy_map
;
17357 src_copy_map_size
= src_copy_map
->size
;
17359 copy_page_shift
= VM_MAP_COPY_PAGE_SHIFT(copy_map
);
17360 copy_page_mask
= VM_MAP_COPY_PAGE_MASK(copy_map
);
17361 target_page_shift
= VM_MAP_PAGE_SHIFT(target_map
);
17362 target_page_mask
= VM_MAP_PAGE_MASK(target_map
);
17364 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p...\n", copy_map
, copy_page_shift
, (uint64_t)copy_map
->offset
, (uint64_t)copy_map
->size
, target_map
, target_page_shift
, copy
, (uint64_t)offset
, (uint64_t)size
, *target_copy_map_p
);
17366 target_copy_map
= *target_copy_map_p
;
17367 if (target_copy_map
!= VM_MAP_COPY_NULL
) {
17368 vm_map_copy_require(target_copy_map
);
17371 if (offset
+ size
> copy_map
->size
) {
17372 DEBUG4K_ERROR("copy_map %p (%d->%d) copy_map->size 0x%llx offset 0x%llx size 0x%llx KERN_INVALID_ARGUMENT\n", copy_map
, copy_page_shift
, target_page_shift
, (uint64_t)copy_map
->size
, (uint64_t)offset
, (uint64_t)size
);
17373 return KERN_INVALID_ARGUMENT
;
17378 new_end
= VM_MAP_ROUND_PAGE(offset
+ size
, target_page_mask
);
17379 if (new_end
< copy_map
->size
) {
17380 trimmed_end
= src_copy_map_size
- new_end
;
17381 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim end from 0x%llx to 0x%llx\n", copy_map
, copy_page_shift
, target_page_shift
, copy
, (uint64_t)offset
, (uint64_t)size
, target_copy_map
, (uint64_t)new_end
, (uint64_t)copy_map
->size
);
17382 /* get "target_copy_map" if needed and adjust it */
17383 vm_map_copy_adjust_get_target_copy_map(copy_map
,
17385 copy_map
= target_copy_map
;
17386 vm_map_copy_trim(target_copy_map
, target_page_shift
,
17387 new_end
, copy_map
->size
);
17390 /* trim the start */
17391 new_start
= VM_MAP_TRUNC_PAGE(offset
, target_page_mask
);
17392 if (new_start
!= 0) {
17393 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim start from 0x%llx to 0x%llx\n", copy_map
, copy_page_shift
, target_page_shift
, copy
, (uint64_t)offset
, (uint64_t)size
, target_copy_map
, (uint64_t)0, (uint64_t)new_start
);
17394 /* get "target_copy_map" if needed and adjust it */
17395 vm_map_copy_adjust_get_target_copy_map(copy_map
,
17397 copy_map
= target_copy_map
;
17398 vm_map_copy_trim(target_copy_map
, target_page_shift
,
17401 *trimmed_start_p
= new_start
;
17403 /* target_size starts with what's left after trimming */
17404 target_size
= copy_map
->size
;
17405 assertf(target_size
== src_copy_map_size
- *trimmed_start_p
- trimmed_end
,
17406 "target_size 0x%llx src_copy_map_size 0x%llx trimmed_start 0x%llx trimmed_end 0x%llx\n",
17407 (uint64_t)target_size
, (uint64_t)src_copy_map_size
,
17408 (uint64_t)*trimmed_start_p
, (uint64_t)trimmed_end
);
17410 /* check for misalignments but don't adjust yet */
17414 if (copy_page_shift
< target_page_shift
) {
17416 * Remapping from 4K to 16K: check the VM object alignments
17417 * throughout the range.
17418 * If the start and end of the range are mis-aligned, we can
17419 * over-map to re-align, and adjust the "overmap" start/end
17420 * and "target_size" of the range accordingly.
17421 * If there is any mis-alignment within the range:
17423 * we can do immediate-copy instead of copy-on-write,
17425 * no way to remap and share; fail.
17427 for (entry
= vm_map_copy_first_entry(copy_map
);
17428 entry
!= vm_map_copy_to_entry(copy_map
);
17429 entry
= entry
->vme_next
) {
17430 vm_object_offset_t object_offset_start
, object_offset_end
;
17432 object_offset_start
= VME_OFFSET(entry
);
17433 object_offset_end
= object_offset_start
;
17434 object_offset_end
+= entry
->vme_end
- entry
->vme_start
;
17435 if (object_offset_start
& target_page_mask
) {
17436 if (entry
== vm_map_copy_first_entry(copy_map
) && !copy
) {
17442 if (object_offset_end
& target_page_mask
) {
17443 if (entry
->vme_next
== vm_map_copy_to_entry(copy_map
) && !copy
) {
17451 entry
= VM_MAP_ENTRY_NULL
;
17453 /* decide how to deal with misalignments */
17454 assert(overmap_start
<= 1);
17455 assert(overmap_end
<= 1);
17456 if (!overmap_start
&& !overmap_end
&& !misalignments
) {
17457 /* copy_map is properly aligned for target_map ... */
17458 if (*trimmed_start_p
) {
17459 /* ... but we trimmed it, so still need to adjust */
17461 /* ... and we didn't trim anything: we're done */
17462 if (target_copy_map
== VM_MAP_COPY_NULL
) {
17463 target_copy_map
= copy_map
;
17465 *target_copy_map_p
= target_copy_map
;
17466 *overmap_start_p
= 0;
17467 *overmap_end_p
= 0;
17468 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map
, copy_page_shift
, (uint64_t)copy_map
->offset
, (uint64_t)copy_map
->size
, target_map
, target_page_shift
, copy
, *target_copy_map_p
, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p
), (uint64_t)(*target_copy_map_p
)->offset
, (uint64_t)(*target_copy_map_p
)->size
, (uint64_t)*trimmed_start_p
, (uint64_t)*overmap_start_p
, (uint64_t)*overmap_end_p
);
17469 return KERN_SUCCESS
;
17471 } else if (misalignments
&& !copy
) {
17472 /* can't "share" if misaligned */
17473 DEBUG4K_ADJUST("unsupported sharing\n");
17475 if (debug4k_panic_on_misaligned_sharing
) {
17476 panic("DEBUG4k %s:%d unsupported sharing\n", __FUNCTION__
, __LINE__
);
17478 #endif /* MACH_ASSERT */
17479 DEBUG4K_ADJUST("copy_map %p (%d) target_map %p (%d) copy %d target_copy_map %p -> KERN_NOT_SUPPORTED\n", copy_map
, copy_page_shift
, target_map
, target_page_shift
, copy
, *target_copy_map_p
);
17480 return KERN_NOT_SUPPORTED
;
17482 /* can't virtual-copy if misaligned (but can physical-copy) */
17483 DEBUG4K_ADJUST("mis-aligned copying\n");
17486 /* get a "target_copy_map" if needed and switch to it */
17487 vm_map_copy_adjust_get_target_copy_map(copy_map
, &target_copy_map
);
17488 copy_map
= target_copy_map
;
17490 if (misalignments
&& copy
) {
17491 vm_map_size_t target_copy_map_size
;
17494 * Can't do copy-on-write with misaligned mappings.
17495 * Replace the mappings with a physical copy of the original
17496 * mappings' contents.
17498 target_copy_map_size
= target_copy_map
->size
;
17499 vm_map_copy_to_physcopy(target_copy_map
, target_map
);
17500 *target_copy_map_p
= target_copy_map
;
17501 *overmap_start_p
= 0;
17502 *overmap_end_p
= target_copy_map
->size
- target_copy_map_size
;
17503 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx)-> trimmed 0x%llx overmap start 0x%llx end 0x%llx PHYSCOPY\n", copy_map
, copy_page_shift
, (uint64_t)copy_map
->offset
, (uint64_t)copy_map
->size
, target_map
, target_page_shift
, copy
, *target_copy_map_p
, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p
), (uint64_t)(*target_copy_map_p
)->offset
, (uint64_t)(*target_copy_map_p
)->size
, (uint64_t)*trimmed_start_p
, (uint64_t)*overmap_start_p
, (uint64_t)*overmap_end_p
);
17504 return KERN_SUCCESS
;
17507 /* apply the adjustments */
17511 /* remove copy_map->offset, so that everything starts at offset 0 */
17512 addr_adjustment
= copy_map
->offset
;
17513 /* also remove whatever we trimmed from the start */
17514 addr_adjustment
+= *trimmed_start_p
;
17515 for (target_entry
= vm_map_copy_first_entry(target_copy_map
);
17516 target_entry
!= vm_map_copy_to_entry(target_copy_map
);
17517 target_entry
= target_entry
->vme_next
) {
17518 vm_object_offset_t object_offset_start
, object_offset_end
;
17520 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx BEFORE\n", target_copy_map
, VM_MAP_COPY_PAGE_SHIFT(target_copy_map
), target_copy_map
->offset
, (uint64_t)target_copy_map
->size
, target_entry
, (uint64_t)target_entry
->vme_start
, (uint64_t)target_entry
->vme_end
, VME_OBJECT(target_entry
), VME_OFFSET(target_entry
));
17521 object_offset_start
= VME_OFFSET(target_entry
);
17522 if (object_offset_start
& target_page_mask
) {
17523 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at start\n", target_copy_map
, VM_MAP_COPY_PAGE_SHIFT(target_copy_map
), target_copy_map
->offset
, (uint64_t)target_copy_map
->size
, target_entry
, (uint64_t)target_entry
->vme_start
, (uint64_t)target_entry
->vme_end
, VME_OBJECT(target_entry
), VME_OFFSET(target_entry
));
17524 if (target_entry
== vm_map_copy_first_entry(target_copy_map
)) {
17526 * start of 1st entry is mis-aligned:
17527 * re-adjust by over-mapping.
17529 overmap_start
= object_offset_start
- trunc_page_mask_64(object_offset_start
, target_page_mask
);
17530 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_start 0x%llx\n", target_entry
, VME_OFFSET(target_entry
), copy
, (uint64_t)overmap_start
);
17531 VME_OFFSET_SET(target_entry
, VME_OFFSET(target_entry
) - overmap_start
);
17534 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry
, VME_OFFSET(target_entry
), copy
, misalignments
);
17539 if (target_entry
== vm_map_copy_first_entry(target_copy_map
)) {
17540 target_size
+= overmap_start
;
17542 target_entry
->vme_start
+= overmap_start
;
17544 target_entry
->vme_end
+= overmap_start
;
17546 object_offset_end
= VME_OFFSET(target_entry
) + target_entry
->vme_end
- target_entry
->vme_start
;
17547 if (object_offset_end
& target_page_mask
) {
17548 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at end\n", target_copy_map
, VM_MAP_COPY_PAGE_SHIFT(target_copy_map
), target_copy_map
->offset
, (uint64_t)target_copy_map
->size
, target_entry
, (uint64_t)target_entry
->vme_start
, (uint64_t)target_entry
->vme_end
, VME_OBJECT(target_entry
), VME_OFFSET(target_entry
));
17549 if (target_entry
->vme_next
== vm_map_copy_to_entry(target_copy_map
)) {
17551 * end of last entry is mis-aligned: re-adjust by over-mapping.
17553 overmap_end
= round_page_mask_64(object_offset_end
, target_page_mask
) - object_offset_end
;
17554 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_end 0x%llx\n", target_entry
, VME_OFFSET(target_entry
), copy
, (uint64_t)overmap_end
);
17555 target_entry
->vme_end
+= overmap_end
;
17556 target_size
+= overmap_end
;
17559 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry
, VME_OFFSET(target_entry
), copy
, misalignments
);
17563 target_entry
->vme_start
-= addr_adjustment
;
17564 target_entry
->vme_end
-= addr_adjustment
;
17565 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx AFTER\n", target_copy_map
, VM_MAP_COPY_PAGE_SHIFT(target_copy_map
), target_copy_map
->offset
, (uint64_t)target_copy_map
->size
, target_entry
, (uint64_t)target_entry
->vme_start
, (uint64_t)target_entry
->vme_end
, VME_OBJECT(target_entry
), VME_OFFSET(target_entry
));
17568 target_copy_map
->size
= target_size
;
17569 target_copy_map
->offset
+= overmap_start
;
17570 target_copy_map
->offset
-= addr_adjustment
;
17571 target_copy_map
->cpy_hdr
.page_shift
= target_page_shift
;
17573 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->size, target_page_mask));
17574 // assert(VM_MAP_PAGE_ALIGNED(target_copy_map->offset, FOURK_PAGE_MASK));
17575 assert(overmap_start
< VM_MAP_PAGE_SIZE(target_map
));
17576 assert(overmap_end
< VM_MAP_PAGE_SIZE(target_map
));
17578 *target_copy_map_p
= target_copy_map
;
17579 *overmap_start_p
= overmap_start
;
17580 *overmap_end_p
= overmap_end
;
17582 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map
, copy_page_shift
, (uint64_t)copy_map
->offset
, (uint64_t)copy_map
->size
, target_map
, target_page_shift
, copy
, *target_copy_map_p
, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p
), (uint64_t)(*target_copy_map_p
)->offset
, (uint64_t)(*target_copy_map_p
)->size
, (uint64_t)*trimmed_start_p
, (uint64_t)*overmap_start_p
, (uint64_t)*overmap_end_p
);
17583 return KERN_SUCCESS
;
17587 vm_map_range_physical_size(
17589 vm_map_address_t start
,
17590 mach_vm_size_t size
,
17591 mach_vm_size_t
* phys_size
)
17594 vm_map_copy_t copy_map
, target_copy_map
;
17595 vm_map_offset_t adjusted_start
, adjusted_end
;
17596 vm_map_size_t adjusted_size
;
17597 vm_prot_t cur_prot
, max_prot
;
17598 vm_map_offset_t overmap_start
, overmap_end
, trimmed_start
;
17599 vm_map_kernel_flags_t vmk_flags
;
17601 adjusted_start
= vm_map_trunc_page(start
, VM_MAP_PAGE_MASK(map
));
17602 adjusted_end
= vm_map_round_page(start
+ size
, VM_MAP_PAGE_MASK(map
));
17603 adjusted_size
= adjusted_end
- adjusted_start
;
17604 *phys_size
= adjusted_size
;
17605 if (VM_MAP_PAGE_SIZE(map
) == PAGE_SIZE
) {
17606 return KERN_SUCCESS
;
17609 adjusted_start
= vm_map_trunc_page(start
, PAGE_MASK
);
17610 adjusted_end
= vm_map_round_page(start
+ size
, PAGE_MASK
);
17611 adjusted_size
= adjusted_end
- adjusted_start
;
17612 *phys_size
= adjusted_size
;
17613 return KERN_SUCCESS
;
17615 if (adjusted_size
== 0) {
17616 DEBUG4K_SHARE("map %p start 0x%llx size 0x%llx adjusted 0x%llx -> phys_size 0!\n", map
, (uint64_t)start
, (uint64_t)size
, (uint64_t)adjusted_size
);
17618 return KERN_SUCCESS
;
17621 vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
17622 vmk_flags
.vmkf_copy_pageable
= TRUE
;
17623 vmk_flags
.vmkf_copy_same_map
= TRUE
;
17624 assert(adjusted_size
!= 0);
17625 kr
= vm_map_copy_extract(map
, adjusted_start
, adjusted_size
,
17626 VM_PROT_NONE
, /* required_protection: no check here */
17629 &cur_prot
, &max_prot
, VM_INHERIT_DEFAULT
,
17631 if (kr
!= KERN_SUCCESS
) {
17632 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map
, (uint64_t)start
, (uint64_t)adjusted_start
, size
, (uint64_t)adjusted_size
, kr
);
17637 assert(copy_map
!= VM_MAP_COPY_NULL
);
17638 target_copy_map
= copy_map
;
17639 DEBUG4K_ADJUST("adjusting...\n");
17640 kr
= vm_map_copy_adjust_to_target(
17642 start
- adjusted_start
, /* offset */
17650 if (kr
== KERN_SUCCESS
) {
17651 if (target_copy_map
->size
!= *phys_size
) {
17652 DEBUG4K_ADJUST("map %p (%d) start 0x%llx size 0x%llx adjusted_start 0x%llx adjusted_end 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx phys_size 0x%llx -> 0x%llx\n", map
, VM_MAP_PAGE_SHIFT(map
), (uint64_t)start
, (uint64_t)size
, (uint64_t)adjusted_start
, (uint64_t)adjusted_end
, (uint64_t)overmap_start
, (uint64_t)overmap_end
, (uint64_t)trimmed_start
, (uint64_t)*phys_size
, (uint64_t)target_copy_map
->size
);
17654 *phys_size
= target_copy_map
->size
;
17656 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map
, (uint64_t)start
, (uint64_t)adjusted_start
, size
, (uint64_t)adjusted_size
, kr
);
17660 vm_map_copy_discard(copy_map
);
17661 copy_map
= VM_MAP_COPY_NULL
;
17668 memory_entry_check_for_adjustment(
17671 vm_map_offset_t
*overmap_start
,
17672 vm_map_offset_t
*overmap_end
)
17674 kern_return_t kr
= KERN_SUCCESS
;
17675 vm_map_copy_t copy_map
= VM_MAP_COPY_NULL
, target_copy_map
= VM_MAP_COPY_NULL
;
17678 assertf(ip_kotype(port
) == IKOT_NAMED_ENTRY
, "Port Type expected: %d...received:%d\n", IKOT_NAMED_ENTRY
, ip_kotype(port
));
17680 vm_named_entry_t named_entry
;
17682 named_entry
= (vm_named_entry_t
) port
->ip_kobject
;
17683 named_entry_lock(named_entry
);
17684 copy_map
= named_entry
->backing
.copy
;
17685 target_copy_map
= copy_map
;
17687 if (src_map
&& VM_MAP_PAGE_SHIFT(src_map
) < PAGE_SHIFT
) {
17688 vm_map_offset_t trimmed_start
;
17691 DEBUG4K_ADJUST("adjusting...\n");
17692 kr
= vm_map_copy_adjust_to_target(
17695 copy_map
->size
, /* size */
17702 assert(trimmed_start
== 0);
17704 named_entry_unlock(named_entry
);
17711 * Routine: vm_remap
17713 * Map portion of a task's address space.
17714 * Mapped region must not overlap more than
17715 * one vm memory object. Protections and
17716 * inheritance attributes remain the same
17717 * as in the original task and are out parameters.
17718 * Source and Target task can be identical
17719 * Other attributes are identical as for vm_map()
17723 vm_map_t target_map
,
17724 vm_map_address_t
*address
,
17725 vm_map_size_t size
,
17726 vm_map_offset_t mask
,
17728 vm_map_kernel_flags_t vmk_flags
,
17731 vm_map_offset_t memory_address
,
17733 vm_prot_t
*cur_protection
,
17734 vm_prot_t
*max_protection
,
17735 vm_inherit_t inheritance
)
17737 kern_return_t result
;
17738 vm_map_entry_t entry
;
17739 vm_map_entry_t insp_entry
= VM_MAP_ENTRY_NULL
;
17740 vm_map_entry_t new_entry
;
17741 vm_map_copy_t copy_map
;
17742 vm_map_offset_t offset_in_mapping
;
17743 vm_map_size_t target_size
= 0;
17744 vm_map_size_t src_page_mask
, target_page_mask
;
17745 vm_map_offset_t overmap_start
, overmap_end
, trimmed_start
;
17746 vm_map_offset_t initial_memory_address
;
17747 vm_map_size_t initial_size
;
17749 if (target_map
== VM_MAP_NULL
) {
17750 return KERN_INVALID_ARGUMENT
;
17753 initial_memory_address
= memory_address
;
17754 initial_size
= size
;
17755 src_page_mask
= VM_MAP_PAGE_MASK(src_map
);
17756 target_page_mask
= VM_MAP_PAGE_MASK(target_map
);
17758 switch (inheritance
) {
17759 case VM_INHERIT_NONE
:
17760 case VM_INHERIT_COPY
:
17761 case VM_INHERIT_SHARE
:
17762 if (size
!= 0 && src_map
!= VM_MAP_NULL
) {
17767 return KERN_INVALID_ARGUMENT
;
17770 if (src_page_mask
!= target_page_mask
) {
17772 DEBUG4K_COPY("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map
, VM_MAP_PAGE_SIZE(src_map
), (uint64_t)memory_address
, (uint64_t)size
, copy
, target_map
, VM_MAP_PAGE_SIZE(target_map
));
17774 DEBUG4K_SHARE("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map
, VM_MAP_PAGE_SIZE(src_map
), (uint64_t)memory_address
, (uint64_t)size
, copy
, target_map
, VM_MAP_PAGE_SIZE(target_map
));
17779 * If the user is requesting that we return the address of the
17780 * first byte of the data (rather than the base of the page),
17781 * then we use different rounding semantics: specifically,
17782 * we assume that (memory_address, size) describes a region
17783 * all of whose pages we must cover, rather than a base to be truncated
17784 * down and a size to be added to that base. So we figure out
17785 * the highest page that the requested region includes and make
17786 * sure that the size will cover it.
17788 * The key example we're worried about it is of the form:
17790 * memory_address = 0x1ff0, size = 0x20
17792 * With the old semantics, we round down the memory_address to 0x1000
17793 * and round up the size to 0x1000, resulting in our covering *only*
17794 * page 0x1000. With the new semantics, we'd realize that the region covers
17795 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
17796 * 0x1000 and page 0x2000 in the region we remap.
17798 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
17799 vm_map_offset_t range_start
, range_end
;
17801 range_start
= vm_map_trunc_page(memory_address
, src_page_mask
);
17802 range_end
= vm_map_round_page(memory_address
+ size
, src_page_mask
);
17803 memory_address
= range_start
;
17804 size
= range_end
- range_start
;
17805 offset_in_mapping
= initial_memory_address
- memory_address
;
17809 * This legacy code path is broken: for the range mentioned
17810 * above [ memory_address = 0x1ff0,size = 0x20 ], which spans
17811 * two 4k pages, it yields [ memory_address = 0x1000,
17812 * size = 0x1000 ], which covers only the first 4k page.
17813 * BUT some code unfortunately depends on this bug, so we
17814 * can't fix it without breaking something.
17815 * New code should get automatically opted in the new
17816 * behavior with the new VM_FLAGS_RETURN_DATA_ADDR flags.
17818 offset_in_mapping
= 0;
17819 memory_address
= vm_map_trunc_page(memory_address
, src_page_mask
);
17820 size
= vm_map_round_page(size
, src_page_mask
);
17821 initial_memory_address
= memory_address
;
17822 initial_size
= size
;
17827 return KERN_INVALID_ARGUMENT
;
17830 if (flags
& VM_FLAGS_RESILIENT_MEDIA
) {
17831 /* must be copy-on-write to be "media resilient" */
17833 return KERN_INVALID_ARGUMENT
;
17837 vmk_flags
.vmkf_copy_pageable
= target_map
->hdr
.entries_pageable
;
17838 vmk_flags
.vmkf_copy_same_map
= (src_map
== target_map
);
17841 result
= vm_map_copy_extract(src_map
,
17844 VM_PROT_NONE
, /* required_protection: no check here */
17850 if (result
!= KERN_SUCCESS
) {
17853 assert(copy_map
!= VM_MAP_COPY_NULL
);
17858 target_size
= size
;
17859 if (src_page_mask
!= target_page_mask
) {
17860 vm_map_copy_t target_copy_map
;
17862 target_copy_map
= copy_map
; /* can modify "copy_map" itself */
17863 DEBUG4K_ADJUST("adjusting...\n");
17864 result
= vm_map_copy_adjust_to_target(
17866 offset_in_mapping
, /* offset */
17874 if (result
!= KERN_SUCCESS
) {
17875 DEBUG4K_COPY("failed to adjust 0x%x\n", result
);
17876 vm_map_copy_discard(copy_map
);
17879 if (trimmed_start
== 0) {
17880 /* nothing trimmed: no adjustment needed */
17881 } else if (trimmed_start
>= offset_in_mapping
) {
17882 /* trimmed more than offset_in_mapping: nothing left */
17883 assert(overmap_start
== 0);
17884 assert(overmap_end
== 0);
17885 offset_in_mapping
= 0;
17887 /* trimmed some of offset_in_mapping: adjust */
17888 assert(overmap_start
== 0);
17889 assert(overmap_end
== 0);
17890 offset_in_mapping
-= trimmed_start
;
17892 offset_in_mapping
+= overmap_start
;
17893 target_size
= target_copy_map
->size
;
17897 * Allocate/check a range of free virtual address
17898 * space for the target
17900 *address
= vm_map_trunc_page(*address
, target_page_mask
);
17901 vm_map_lock(target_map
);
17902 target_size
= vm_map_round_page(target_size
, target_page_mask
);
17903 result
= vm_map_remap_range_allocate(target_map
, address
,
17905 mask
, flags
, vmk_flags
, tag
,
17908 for (entry
= vm_map_copy_first_entry(copy_map
);
17909 entry
!= vm_map_copy_to_entry(copy_map
);
17910 entry
= new_entry
) {
17911 new_entry
= entry
->vme_next
;
17912 vm_map_copy_entry_unlink(copy_map
, entry
);
17913 if (result
== KERN_SUCCESS
) {
17914 if (flags
& VM_FLAGS_RESILIENT_CODESIGN
) {
17915 /* no codesigning -> read-only access */
17916 entry
->max_protection
= VM_PROT_READ
;
17917 entry
->protection
= VM_PROT_READ
;
17918 entry
->vme_resilient_codesign
= TRUE
;
17920 entry
->vme_start
+= *address
;
17921 entry
->vme_end
+= *address
;
17922 assert(!entry
->map_aligned
);
17923 if ((flags
& VM_FLAGS_RESILIENT_MEDIA
) &&
17924 !entry
->is_sub_map
&&
17925 (VME_OBJECT(entry
) == VM_OBJECT_NULL
||
17926 VME_OBJECT(entry
)->internal
)) {
17927 entry
->vme_resilient_media
= TRUE
;
17929 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_start
, MIN(target_page_mask
, PAGE_MASK
)));
17930 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_end
, MIN(target_page_mask
, PAGE_MASK
)));
17931 assert(VM_MAP_PAGE_ALIGNED(VME_OFFSET(entry
), MIN(target_page_mask
, PAGE_MASK
)));
17932 vm_map_store_entry_link(target_map
, insp_entry
, entry
,
17934 insp_entry
= entry
;
17936 if (!entry
->is_sub_map
) {
17937 vm_object_deallocate(VME_OBJECT(entry
));
17939 vm_map_deallocate(VME_SUBMAP(entry
));
17941 vm_map_copy_entry_dispose(copy_map
, entry
);
17945 if (flags
& VM_FLAGS_RESILIENT_CODESIGN
) {
17946 *cur_protection
= VM_PROT_READ
;
17947 *max_protection
= VM_PROT_READ
;
17950 if (target_map
->disable_vmentry_reuse
== TRUE
) {
17951 assert(!target_map
->is_nested_map
);
17952 if (target_map
->highest_entry_end
< insp_entry
->vme_end
) {
17953 target_map
->highest_entry_end
= insp_entry
->vme_end
;
17957 if (result
== KERN_SUCCESS
) {
17958 target_map
->size
+= target_size
;
17959 SAVE_HINT_MAP_WRITE(target_map
, insp_entry
);
17962 vm_map_unlock(target_map
);
17964 if (result
== KERN_SUCCESS
&& target_map
->wiring_required
) {
17965 result
= vm_map_wire_kernel(target_map
, *address
,
17966 *address
+ size
, *cur_protection
, VM_KERN_MEMORY_MLOCK
,
17971 * If requested, return the address of the data pointed to by the
17972 * request, rather than the base of the resulting page.
17974 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
17975 *address
+= offset_in_mapping
;
17978 if (src_page_mask
!= target_page_mask
) {
17979 DEBUG4K_SHARE("vm_remap(%p 0x%llx 0x%llx copy=%d-> %p 0x%llx 0x%llx result=0x%x\n", src_map
, (uint64_t)memory_address
, (uint64_t)size
, copy
, target_map
, (uint64_t)*address
, (uint64_t)offset_in_mapping
, result
);
17981 vm_map_copy_discard(copy_map
);
17982 copy_map
= VM_MAP_COPY_NULL
;
17988 * Routine: vm_map_remap_range_allocate
17991 * Allocate a range in the specified virtual address map.
17992 * returns the address and the map entry just before the allocated
17995 * Map must be locked.
17998 static kern_return_t
17999 vm_map_remap_range_allocate(
18001 vm_map_address_t
*address
, /* IN/OUT */
18002 vm_map_size_t size
,
18003 vm_map_offset_t mask
,
18005 vm_map_kernel_flags_t vmk_flags
,
18006 __unused vm_tag_t tag
,
18007 vm_map_entry_t
*map_entry
) /* OUT */
18009 vm_map_entry_t entry
;
18010 vm_map_offset_t start
;
18011 vm_map_offset_t end
;
18012 vm_map_offset_t desired_empty_end
;
18014 vm_map_entry_t hole_entry
;
18020 if (flags
& VM_FLAGS_ANYWHERE
) {
18021 if (flags
& VM_FLAGS_RANDOM_ADDR
) {
18023 * Get a random start address.
18025 kr
= vm_map_random_address_for_size(map
, address
, size
);
18026 if (kr
!= KERN_SUCCESS
) {
18033 * Calculate the first possible address.
18036 if (start
< map
->min_offset
) {
18037 start
= map
->min_offset
;
18039 if (start
> map
->max_offset
) {
18040 return KERN_NO_SPACE
;
18044 * Look for the first possible address;
18045 * if there's already something at this
18046 * address, we have to start after it.
18049 if (map
->disable_vmentry_reuse
== TRUE
) {
18050 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
18052 if (map
->holelistenabled
) {
18053 hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
18055 if (hole_entry
== NULL
) {
18057 * No more space in the map?
18059 return KERN_NO_SPACE
;
18061 boolean_t found_hole
= FALSE
;
18064 if (hole_entry
->vme_start
>= start
) {
18065 start
= hole_entry
->vme_start
;
18070 if (hole_entry
->vme_end
> start
) {
18074 hole_entry
= hole_entry
->vme_next
;
18075 } while (hole_entry
!= CAST_TO_VM_MAP_ENTRY(map
->holes_list
));
18077 if (found_hole
== FALSE
) {
18078 return KERN_NO_SPACE
;
18081 entry
= hole_entry
;
18084 assert(first_free_is_valid(map
));
18085 if (start
== map
->min_offset
) {
18086 if ((entry
= map
->first_free
) != vm_map_to_entry(map
)) {
18087 start
= entry
->vme_end
;
18090 vm_map_entry_t tmp_entry
;
18091 if (vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
18092 start
= tmp_entry
->vme_end
;
18097 start
= vm_map_round_page(start
,
18098 VM_MAP_PAGE_MASK(map
));
18102 * In any case, the "entry" always precedes
18103 * the proposed new region throughout the
18108 vm_map_entry_t next
;
18111 * Find the end of the proposed new region.
18112 * Be sure we didn't go beyond the end, or
18113 * wrap around the address.
18116 end
= ((start
+ mask
) & ~mask
);
18117 end
= vm_map_round_page(end
,
18118 VM_MAP_PAGE_MASK(map
));
18120 return KERN_NO_SPACE
;
18125 /* We want an entire page of empty space, but don't increase the allocation size. */
18126 desired_empty_end
= vm_map_round_page(end
, VM_MAP_PAGE_MASK(map
));
18128 if ((desired_empty_end
> map
->max_offset
) || (desired_empty_end
< start
)) {
18129 if (map
->wait_for_space
) {
18130 if (size
<= (map
->max_offset
-
18131 map
->min_offset
)) {
18132 assert_wait((event_t
) map
, THREAD_INTERRUPTIBLE
);
18133 vm_map_unlock(map
);
18134 thread_block(THREAD_CONTINUE_NULL
);
18140 return KERN_NO_SPACE
;
18143 next
= entry
->vme_next
;
18145 if (map
->holelistenabled
) {
18146 if (entry
->vme_end
>= desired_empty_end
) {
18151 * If there are no more entries, we must win.
18155 * If there is another entry, it must be
18156 * after the end of the potential new region.
18159 if (next
== vm_map_to_entry(map
)) {
18163 if (next
->vme_start
>= desired_empty_end
) {
18169 * Didn't fit -- move to the next entry.
18174 if (map
->holelistenabled
) {
18175 if (entry
== CAST_TO_VM_MAP_ENTRY(map
->holes_list
)) {
18179 return KERN_NO_SPACE
;
18181 start
= entry
->vme_start
;
18183 start
= entry
->vme_end
;
18187 if (map
->holelistenabled
) {
18188 if (vm_map_lookup_entry(map
, entry
->vme_start
, &entry
)) {
18189 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry
, (unsigned long long)entry
->vme_start
);
18195 vm_map_entry_t temp_entry
;
18199 * the address doesn't itself violate
18200 * the mask requirement.
18203 if ((start
& mask
) != 0) {
18204 return KERN_NO_SPACE
;
18209 * ... the address is within bounds
18212 end
= start
+ size
;
18214 if ((start
< map
->min_offset
) ||
18215 (end
> map
->max_offset
) ||
18217 return KERN_INVALID_ADDRESS
;
18221 * If we're asked to overwrite whatever was mapped in that
18222 * range, first deallocate that range.
18224 if (flags
& VM_FLAGS_OVERWRITE
) {
18226 int remove_flags
= VM_MAP_REMOVE_SAVE_ENTRIES
| VM_MAP_REMOVE_NO_MAP_ALIGN
;
18229 * We use a "zap_map" to avoid having to unlock
18230 * the "map" in vm_map_delete(), which would compromise
18231 * the atomicity of the "deallocate" and then "remap"
18234 zap_map
= vm_map_create(PMAP_NULL
,
18237 map
->hdr
.entries_pageable
);
18238 if (zap_map
== VM_MAP_NULL
) {
18239 return KERN_RESOURCE_SHORTAGE
;
18241 vm_map_set_page_shift(zap_map
, VM_MAP_PAGE_SHIFT(map
));
18242 vm_map_disable_hole_optimization(zap_map
);
18244 if (vmk_flags
.vmkf_overwrite_immutable
) {
18245 remove_flags
|= VM_MAP_REMOVE_IMMUTABLE
;
18247 kr
= vm_map_delete(map
, start
, end
,
18250 if (kr
== KERN_SUCCESS
) {
18251 vm_map_destroy(zap_map
,
18252 VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
18253 zap_map
= VM_MAP_NULL
;
18258 * ... the starting address isn't allocated
18261 if (vm_map_lookup_entry(map
, start
, &temp_entry
)) {
18262 return KERN_NO_SPACE
;
18265 entry
= temp_entry
;
18268 * ... the next region doesn't overlap the
18272 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
18273 (entry
->vme_next
->vme_start
< end
)) {
18274 return KERN_NO_SPACE
;
18277 *map_entry
= entry
;
18278 return KERN_SUCCESS
;
18284 * Set the address map for the current thread to the specified map
18292 thread_t thread
= current_thread();
18293 vm_map_t oldmap
= thread
->map
;
18295 mp_disable_preemption();
18296 mycpu
= cpu_number();
18299 * Deactivate the current map and activate the requested map
18301 PMAP_SWITCH_USER(thread
, map
, mycpu
);
18303 mp_enable_preemption();
18309 * Routine: vm_map_write_user
18312 * Copy out data from a kernel space into space in the
18313 * destination map. The space must already exist in the
18315 * NOTE: This routine should only be called by threads
18316 * which can block on a page fault. i.e. kernel mode user
18324 vm_map_address_t dst_addr
,
18327 kern_return_t kr
= KERN_SUCCESS
;
18329 if (current_map() == map
) {
18330 if (copyout(src_p
, dst_addr
, size
)) {
18331 kr
= KERN_INVALID_ADDRESS
;
18336 /* take on the identity of the target map while doing */
18339 vm_map_reference(map
);
18340 oldmap
= vm_map_switch(map
);
18341 if (copyout(src_p
, dst_addr
, size
)) {
18342 kr
= KERN_INVALID_ADDRESS
;
18344 vm_map_switch(oldmap
);
18345 vm_map_deallocate(map
);
18351 * Routine: vm_map_read_user
18354 * Copy in data from a user space source map into the
18355 * kernel map. The space must already exist in the
18357 * NOTE: This routine should only be called by threads
18358 * which can block on a page fault. i.e. kernel mode user
18365 vm_map_address_t src_addr
,
18369 kern_return_t kr
= KERN_SUCCESS
;
18371 if (current_map() == map
) {
18372 if (copyin(src_addr
, dst_p
, size
)) {
18373 kr
= KERN_INVALID_ADDRESS
;
18378 /* take on the identity of the target map while doing */
18381 vm_map_reference(map
);
18382 oldmap
= vm_map_switch(map
);
18383 if (copyin(src_addr
, dst_p
, size
)) {
18384 kr
= KERN_INVALID_ADDRESS
;
18386 vm_map_switch(oldmap
);
18387 vm_map_deallocate(map
);
18394 * vm_map_check_protection:
18396 * Assert that the target map allows the specified
18397 * privilege on the entire address region given.
18398 * The entire region must be allocated.
18401 vm_map_check_protection(vm_map_t map
, vm_map_offset_t start
,
18402 vm_map_offset_t end
, vm_prot_t protection
)
18404 vm_map_entry_t entry
;
18405 vm_map_entry_t tmp_entry
;
18409 if (start
< vm_map_min(map
) || end
> vm_map_max(map
) || start
> end
) {
18410 vm_map_unlock(map
);
18414 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
18415 vm_map_unlock(map
);
18421 while (start
< end
) {
18422 if (entry
== vm_map_to_entry(map
)) {
18423 vm_map_unlock(map
);
18428 * No holes allowed!
18431 if (start
< entry
->vme_start
) {
18432 vm_map_unlock(map
);
18437 * Check protection associated with entry.
18440 if ((entry
->protection
& protection
) != protection
) {
18441 vm_map_unlock(map
);
18445 /* go to next entry */
18447 start
= entry
->vme_end
;
18448 entry
= entry
->vme_next
;
18450 vm_map_unlock(map
);
18455 vm_map_purgable_control(
18457 vm_map_offset_t address
,
18458 vm_purgable_t control
,
18461 vm_map_entry_t entry
;
18462 vm_object_t object
;
18464 boolean_t was_nonvolatile
;
18467 * Vet all the input parameters and current type and state of the
18468 * underlaying object. Return with an error if anything is amiss.
18470 if (map
== VM_MAP_NULL
) {
18471 return KERN_INVALID_ARGUMENT
;
18474 if (control
!= VM_PURGABLE_SET_STATE
&&
18475 control
!= VM_PURGABLE_GET_STATE
&&
18476 control
!= VM_PURGABLE_PURGE_ALL
&&
18477 control
!= VM_PURGABLE_SET_STATE_FROM_KERNEL
) {
18478 return KERN_INVALID_ARGUMENT
;
18481 if (control
== VM_PURGABLE_PURGE_ALL
) {
18482 vm_purgeable_object_purge_all();
18483 return KERN_SUCCESS
;
18486 if ((control
== VM_PURGABLE_SET_STATE
||
18487 control
== VM_PURGABLE_SET_STATE_FROM_KERNEL
) &&
18488 (((*state
& ~(VM_PURGABLE_ALL_MASKS
)) != 0) ||
18489 ((*state
& VM_PURGABLE_STATE_MASK
) > VM_PURGABLE_STATE_MASK
))) {
18490 return KERN_INVALID_ARGUMENT
;
18493 vm_map_lock_read(map
);
18495 if (!vm_map_lookup_entry(map
, address
, &entry
) || entry
->is_sub_map
) {
18497 * Must pass a valid non-submap address.
18499 vm_map_unlock_read(map
);
18500 return KERN_INVALID_ADDRESS
;
18503 if ((entry
->protection
& VM_PROT_WRITE
) == 0) {
18505 * Can't apply purgable controls to something you can't write.
18507 vm_map_unlock_read(map
);
18508 return KERN_PROTECTION_FAILURE
;
18511 object
= VME_OBJECT(entry
);
18512 if (object
== VM_OBJECT_NULL
||
18513 object
->purgable
== VM_PURGABLE_DENY
) {
18515 * Object must already be present and be purgeable.
18517 vm_map_unlock_read(map
);
18518 return KERN_INVALID_ARGUMENT
;
18521 vm_object_lock(object
);
18524 if (VME_OFFSET(entry
) != 0 ||
18525 entry
->vme_end
- entry
->vme_start
!= object
->vo_size
) {
18527 * Can only apply purgable controls to the whole (existing)
18530 vm_map_unlock_read(map
);
18531 vm_object_unlock(object
);
18532 return KERN_INVALID_ARGUMENT
;
18536 assert(!entry
->is_sub_map
);
18537 assert(!entry
->use_pmap
); /* purgeable has its own accounting */
18539 vm_map_unlock_read(map
);
18541 was_nonvolatile
= (object
->purgable
== VM_PURGABLE_NONVOLATILE
);
18543 kr
= vm_object_purgable_control(object
, control
, state
);
18545 if (was_nonvolatile
&&
18546 object
->purgable
!= VM_PURGABLE_NONVOLATILE
&&
18547 map
->pmap
== kernel_pmap
) {
18549 object
->vo_purgeable_volatilizer
= kernel_task
;
18553 vm_object_unlock(object
);
18559 vm_map_footprint_query_page_info(
18561 vm_map_entry_t map_entry
,
18562 vm_map_offset_t curr_s_offset
,
18563 int *disposition_p
)
18566 vm_object_t object
;
18568 int effective_page_size
;
18570 vm_map_lock_assert_held(map
);
18571 assert(!map
->has_corpse_footprint
);
18572 assert(curr_s_offset
>= map_entry
->vme_start
);
18573 assert(curr_s_offset
< map_entry
->vme_end
);
18575 object
= VME_OBJECT(map_entry
);
18576 if (object
== VM_OBJECT_NULL
) {
18577 *disposition_p
= 0;
18581 effective_page_size
= MIN(PAGE_SIZE
, VM_MAP_PAGE_SIZE(map
));
18584 if (object
== VM_OBJECT_NULL
) {
18585 /* nothing mapped here: no need to ask */
18586 *disposition_p
= 0;
18588 } else if (map_entry
->is_sub_map
&&
18589 !map_entry
->use_pmap
) {
18590 /* nested pmap: no footprint */
18591 *disposition_p
= 0;
18598 pmap_query_page_info(map
->pmap
, curr_s_offset
, &pmap_disp
);
18601 * Compute this page's disposition.
18605 /* deal with "alternate accounting" first */
18606 if (!map_entry
->is_sub_map
&&
18607 object
->vo_no_footprint
) {
18608 /* does not count in footprint */
18609 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18610 } else if (!map_entry
->is_sub_map
&&
18611 (object
->purgable
== VM_PURGABLE_NONVOLATILE
||
18612 (object
->purgable
== VM_PURGABLE_DENY
&&
18613 object
->vo_ledger_tag
)) &&
18614 VM_OBJECT_OWNER(object
) != NULL
&&
18615 VM_OBJECT_OWNER(object
)->map
== map
) {
18616 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18617 if ((((curr_s_offset
18618 - map_entry
->vme_start
18619 + VME_OFFSET(map_entry
))
18620 / effective_page_size
) <
18621 (object
->resident_page_count
+
18622 vm_compressor_pager_get_count(object
->pager
)))) {
18624 * Non-volatile purgeable object owned
18625 * by this task: report the first
18626 * "#resident + #compressed" pages as
18627 * "resident" (to show that they
18628 * contribute to the footprint) but not
18629 * "dirty" (to avoid double-counting
18630 * with the fake "non-volatile" region
18631 * we'll report at the end of the
18632 * address space to account for all
18633 * (mapped or not) non-volatile memory
18634 * owned by this task.
18636 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
18638 } else if (!map_entry
->is_sub_map
&&
18639 (object
->purgable
== VM_PURGABLE_VOLATILE
||
18640 object
->purgable
== VM_PURGABLE_EMPTY
) &&
18641 VM_OBJECT_OWNER(object
) != NULL
&&
18642 VM_OBJECT_OWNER(object
)->map
== map
) {
18643 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18644 if ((((curr_s_offset
18645 - map_entry
->vme_start
18646 + VME_OFFSET(map_entry
))
18647 / effective_page_size
) <
18648 object
->wired_page_count
)) {
18650 * Volatile|empty purgeable object owned
18651 * by this task: report the first
18652 * "#wired" pages as "resident" (to
18653 * show that they contribute to the
18654 * footprint) but not "dirty" (to avoid
18655 * double-counting with the fake
18656 * "non-volatile" region we'll report
18657 * at the end of the address space to
18658 * account for all (mapped or not)
18659 * non-volatile memory owned by this
18662 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
18664 } else if (!map_entry
->is_sub_map
&&
18665 map_entry
->iokit_acct
&&
18666 object
->internal
&&
18667 object
->purgable
== VM_PURGABLE_DENY
) {
18669 * Non-purgeable IOKit memory: phys_footprint
18670 * includes the entire virtual mapping.
18672 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18673 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
18674 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
18675 } else if (pmap_disp
& (PMAP_QUERY_PAGE_ALTACCT
|
18676 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT
)) {
18677 /* alternate accounting */
18678 #if (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG)
18679 if (map
->pmap
->footprint_was_suspended
) {
18681 * The assertion below can fail if dyld
18682 * suspended footprint accounting
18683 * while doing some adjustments to
18684 * this page; the mapping would say
18685 * "use pmap accounting" but the page
18686 * would be marked "alternate
18690 #endif /* (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG) */
18692 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18696 if (pmap_disp
& PMAP_QUERY_PAGE_PRESENT
) {
18697 assertf(map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18698 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
18699 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
18700 if (pmap_disp
& PMAP_QUERY_PAGE_INTERNAL
) {
18701 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
18703 disposition
|= VM_PAGE_QUERY_PAGE_EXTERNAL
;
18705 if (pmap_disp
& PMAP_QUERY_PAGE_REUSABLE
) {
18706 disposition
|= VM_PAGE_QUERY_PAGE_REUSABLE
;
18708 } else if (pmap_disp
& PMAP_QUERY_PAGE_COMPRESSED
) {
18709 assertf(map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
18710 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
18714 *disposition_p
= disposition
;
18718 vm_map_page_query_internal(
18719 vm_map_t target_map
,
18720 vm_map_offset_t offset
,
18725 vm_page_info_basic_data_t info
;
18726 mach_msg_type_number_t count
;
18728 count
= VM_PAGE_INFO_BASIC_COUNT
;
18729 kr
= vm_map_page_info(target_map
,
18731 VM_PAGE_INFO_BASIC
,
18732 (vm_page_info_t
) &info
,
18734 if (kr
== KERN_SUCCESS
) {
18735 *disposition
= info
.disposition
;
18736 *ref_count
= info
.ref_count
;
18748 vm_map_offset_t offset
,
18749 vm_page_info_flavor_t flavor
,
18750 vm_page_info_t info
,
18751 mach_msg_type_number_t
*count
)
18753 return vm_map_page_range_info_internal(map
,
18754 offset
, /* start of range */
18755 (offset
+ 1), /* this will get rounded in the call to the page boundary */
18756 (int)-1, /* effective_page_shift: unspecified */
18763 vm_map_page_range_info_internal(
18765 vm_map_offset_t start_offset
,
18766 vm_map_offset_t end_offset
,
18767 int effective_page_shift
,
18768 vm_page_info_flavor_t flavor
,
18769 vm_page_info_t info
,
18770 mach_msg_type_number_t
*count
)
18772 vm_map_entry_t map_entry
= VM_MAP_ENTRY_NULL
;
18773 vm_object_t object
= VM_OBJECT_NULL
, curr_object
= VM_OBJECT_NULL
;
18774 vm_page_t m
= VM_PAGE_NULL
;
18775 kern_return_t retval
= KERN_SUCCESS
;
18776 int disposition
= 0;
18778 int depth
= 0, info_idx
= 0;
18779 vm_page_info_basic_t basic_info
= 0;
18780 vm_map_offset_t offset_in_page
= 0, offset_in_object
= 0, curr_offset_in_object
= 0;
18781 vm_map_offset_t start
= 0, end
= 0, curr_s_offset
= 0, curr_e_offset
= 0;
18782 boolean_t do_region_footprint
;
18783 ledger_amount_t ledger_resident
, ledger_compressed
;
18784 int effective_page_size
;
18785 vm_map_offset_t effective_page_mask
;
18788 case VM_PAGE_INFO_BASIC
:
18789 if (*count
!= VM_PAGE_INFO_BASIC_COUNT
) {
18791 * The "vm_page_info_basic_data" structure was not
18792 * properly padded, so allow the size to be off by
18793 * one to maintain backwards binary compatibility...
18795 if (*count
!= VM_PAGE_INFO_BASIC_COUNT
- 1) {
18796 return KERN_INVALID_ARGUMENT
;
18801 return KERN_INVALID_ARGUMENT
;
18804 if (effective_page_shift
== -1) {
18805 effective_page_shift
= vm_self_region_page_shift_safely(map
);
18806 if (effective_page_shift
== -1) {
18807 return KERN_INVALID_ARGUMENT
;
18810 effective_page_size
= (1 << effective_page_shift
);
18811 effective_page_mask
= effective_page_size
- 1;
18813 do_region_footprint
= task_self_region_footprint();
18817 info_idx
= 0; /* Tracks the next index within the info structure to be filled.*/
18818 retval
= KERN_SUCCESS
;
18820 offset_in_page
= start_offset
& effective_page_mask
;
18821 start
= vm_map_trunc_page(start_offset
, effective_page_mask
);
18822 end
= vm_map_round_page(end_offset
, effective_page_mask
);
18825 return KERN_INVALID_ARGUMENT
;
18828 assert((end
- start
) <= MAX_PAGE_RANGE_QUERY
);
18830 vm_map_lock_read(map
);
18832 task_ledgers_footprint(map
->pmap
->ledger
, &ledger_resident
, &ledger_compressed
);
18834 for (curr_s_offset
= start
; curr_s_offset
< end
;) {
18836 * New lookup needs reset of these variables.
18838 curr_object
= object
= VM_OBJECT_NULL
;
18839 offset_in_object
= 0;
18843 if (do_region_footprint
&&
18844 curr_s_offset
>= vm_map_last_entry(map
)->vme_end
) {
18846 * Request for "footprint" info about a page beyond
18847 * the end of address space: this must be for
18848 * the fake region vm_map_region_recurse_64()
18849 * reported to account for non-volatile purgeable
18850 * memory owned by this task.
18854 if (curr_s_offset
- vm_map_last_entry(map
)->vme_end
<=
18855 (unsigned) ledger_compressed
) {
18857 * We haven't reported all the "non-volatile
18858 * compressed" pages yet, so report this fake
18859 * page as "compressed".
18861 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
18864 * We've reported all the non-volatile
18865 * compressed page but not all the non-volatile
18866 * pages , so report this fake page as
18867 * "resident dirty".
18869 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
18870 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
18871 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
18874 case VM_PAGE_INFO_BASIC
:
18875 basic_info
= (vm_page_info_basic_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
18876 basic_info
->disposition
= disposition
;
18877 basic_info
->ref_count
= 1;
18878 basic_info
->object_id
= VM_OBJECT_ID_FAKE(map
, task_ledgers
.purgeable_nonvolatile
);
18879 basic_info
->offset
= 0;
18880 basic_info
->depth
= 0;
18885 curr_s_offset
+= effective_page_size
;
18890 * First, find the map entry covering "curr_s_offset", going down
18891 * submaps if necessary.
18893 if (!vm_map_lookup_entry(map
, curr_s_offset
, &map_entry
)) {
18894 /* no entry -> no object -> no page */
18896 if (curr_s_offset
< vm_map_min(map
)) {
18898 * Illegal address that falls below map min.
18900 curr_e_offset
= MIN(end
, vm_map_min(map
));
18901 } else if (curr_s_offset
>= vm_map_max(map
)) {
18903 * Illegal address that falls on/after map max.
18905 curr_e_offset
= end
;
18906 } else if (map_entry
== vm_map_to_entry(map
)) {
18910 if (map_entry
->vme_next
== vm_map_to_entry(map
)) {
18914 curr_e_offset
= MIN(map
->max_offset
, end
);
18917 * Hole at start of the map.
18919 curr_e_offset
= MIN(map_entry
->vme_next
->vme_start
, end
);
18922 if (map_entry
->vme_next
== vm_map_to_entry(map
)) {
18924 * Hole at the end of the map.
18926 curr_e_offset
= MIN(map
->max_offset
, end
);
18928 curr_e_offset
= MIN(map_entry
->vme_next
->vme_start
, end
);
18932 assert(curr_e_offset
>= curr_s_offset
);
18934 uint64_t num_pages
= (curr_e_offset
- curr_s_offset
) >> effective_page_shift
;
18936 void *info_ptr
= (void*) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
18938 bzero(info_ptr
, num_pages
* sizeof(struct vm_page_info_basic
));
18940 curr_s_offset
= curr_e_offset
;
18942 info_idx
+= num_pages
;
18947 /* compute offset from this map entry's start */
18948 offset_in_object
= curr_s_offset
- map_entry
->vme_start
;
18950 /* compute offset into this map entry's object (or submap) */
18951 offset_in_object
+= VME_OFFSET(map_entry
);
18953 if (map_entry
->is_sub_map
) {
18954 vm_map_t sub_map
= VM_MAP_NULL
;
18955 vm_page_info_t submap_info
= 0;
18956 vm_map_offset_t submap_s_offset
= 0, submap_e_offset
= 0, range_len
= 0;
18958 range_len
= MIN(map_entry
->vme_end
, end
) - curr_s_offset
;
18960 submap_s_offset
= offset_in_object
;
18961 submap_e_offset
= submap_s_offset
+ range_len
;
18963 sub_map
= VME_SUBMAP(map_entry
);
18965 vm_map_reference(sub_map
);
18966 vm_map_unlock_read(map
);
18968 submap_info
= (vm_page_info_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
18970 assertf(VM_MAP_PAGE_SHIFT(sub_map
) >= VM_MAP_PAGE_SHIFT(map
),
18971 "Submap page size (%d) differs from current map (%d)\n", VM_MAP_PAGE_SIZE(sub_map
), VM_MAP_PAGE_SIZE(map
));
18973 retval
= vm_map_page_range_info_internal(sub_map
,
18976 effective_page_shift
,
18977 VM_PAGE_INFO_BASIC
,
18978 (vm_page_info_t
) submap_info
,
18981 assert(retval
== KERN_SUCCESS
);
18983 vm_map_lock_read(map
);
18984 vm_map_deallocate(sub_map
);
18986 /* Move the "info" index by the number of pages we inspected.*/
18987 info_idx
+= range_len
>> effective_page_shift
;
18989 /* Move our current offset by the size of the range we inspected.*/
18990 curr_s_offset
+= range_len
;
18995 object
= VME_OBJECT(map_entry
);
18997 if (object
== VM_OBJECT_NULL
) {
18999 * We don't have an object here and, hence,
19000 * no pages to inspect. We'll fill up the
19001 * info structure appropriately.
19004 curr_e_offset
= MIN(map_entry
->vme_end
, end
);
19006 uint64_t num_pages
= (curr_e_offset
- curr_s_offset
) >> effective_page_shift
;
19008 void *info_ptr
= (void*) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
19010 bzero(info_ptr
, num_pages
* sizeof(struct vm_page_info_basic
));
19012 curr_s_offset
= curr_e_offset
;
19014 info_idx
+= num_pages
;
19019 if (do_region_footprint
) {
19021 if (map
->has_corpse_footprint
) {
19023 * Query the page info data we saved
19024 * while forking the corpse.
19026 vm_map_corpse_footprint_query_page_info(
19032 * Query the live pmap for footprint info
19035 vm_map_footprint_query_page_info(
19042 case VM_PAGE_INFO_BASIC
:
19043 basic_info
= (vm_page_info_basic_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
19044 basic_info
->disposition
= disposition
;
19045 basic_info
->ref_count
= 1;
19046 basic_info
->object_id
= VM_OBJECT_ID_FAKE(map
, task_ledgers
.purgeable_nonvolatile
);
19047 basic_info
->offset
= 0;
19048 basic_info
->depth
= 0;
19053 curr_s_offset
+= effective_page_size
;
19057 vm_object_reference(object
);
19059 * Shared mode -- so we can allow other readers
19060 * to grab the lock too.
19062 vm_object_lock_shared(object
);
19064 curr_e_offset
= MIN(map_entry
->vme_end
, end
);
19066 vm_map_unlock_read(map
);
19068 map_entry
= NULL
; /* map is unlocked, the entry is no longer valid. */
19070 curr_object
= object
;
19072 for (; curr_s_offset
< curr_e_offset
;) {
19073 if (object
== curr_object
) {
19074 ref_count
= curr_object
->ref_count
- 1; /* account for our object reference above. */
19076 ref_count
= curr_object
->ref_count
;
19079 curr_offset_in_object
= offset_in_object
;
19082 m
= vm_page_lookup(curr_object
, vm_object_trunc_page(curr_offset_in_object
));
19084 if (m
!= VM_PAGE_NULL
) {
19085 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
19088 if (curr_object
->internal
&&
19089 curr_object
->alive
&&
19090 !curr_object
->terminating
&&
19091 curr_object
->pager_ready
) {
19092 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object
, vm_object_trunc_page(curr_offset_in_object
))
19093 == VM_EXTERNAL_STATE_EXISTS
) {
19094 /* the pager has that page */
19095 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
19101 * Go down the VM object shadow chain until we find the page
19102 * we're looking for.
19105 if (curr_object
->shadow
!= VM_OBJECT_NULL
) {
19106 vm_object_t shadow
= VM_OBJECT_NULL
;
19108 curr_offset_in_object
+= curr_object
->vo_shadow_offset
;
19109 shadow
= curr_object
->shadow
;
19111 vm_object_lock_shared(shadow
);
19112 vm_object_unlock(curr_object
);
19114 curr_object
= shadow
;
19123 /* The ref_count is not strictly accurate, it measures the number */
19124 /* of entities holding a ref on the object, they may not be mapping */
19125 /* the object or may not be mapping the section holding the */
19126 /* target page but its still a ball park number and though an over- */
19127 /* count, it picks up the copy-on-write cases */
19129 /* We could also get a picture of page sharing from pmap_attributes */
19130 /* but this would under count as only faulted-in mappings would */
19133 if ((curr_object
== object
) && curr_object
->shadow
) {
19134 disposition
|= VM_PAGE_QUERY_PAGE_COPIED
;
19137 if (!curr_object
->internal
) {
19138 disposition
|= VM_PAGE_QUERY_PAGE_EXTERNAL
;
19141 if (m
!= VM_PAGE_NULL
) {
19142 if (m
->vmp_fictitious
) {
19143 disposition
|= VM_PAGE_QUERY_PAGE_FICTITIOUS
;
19145 if (m
->vmp_dirty
|| pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m
))) {
19146 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
19149 if (m
->vmp_reference
|| pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m
))) {
19150 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
19153 if (m
->vmp_q_state
== VM_PAGE_ON_SPECULATIVE_Q
) {
19154 disposition
|= VM_PAGE_QUERY_PAGE_SPECULATIVE
;
19159 * when this routine deals with 4k
19160 * pages, check the appropriate CS bit
19163 if (m
->vmp_cs_validated
) {
19164 disposition
|= VM_PAGE_QUERY_PAGE_CS_VALIDATED
;
19166 if (m
->vmp_cs_tainted
) {
19167 disposition
|= VM_PAGE_QUERY_PAGE_CS_TAINTED
;
19169 if (m
->vmp_cs_nx
) {
19170 disposition
|= VM_PAGE_QUERY_PAGE_CS_NX
;
19172 if (m
->vmp_reusable
|| curr_object
->all_reusable
) {
19173 disposition
|= VM_PAGE_QUERY_PAGE_REUSABLE
;
19179 case VM_PAGE_INFO_BASIC
:
19180 basic_info
= (vm_page_info_basic_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
19181 basic_info
->disposition
= disposition
;
19182 basic_info
->ref_count
= ref_count
;
19183 basic_info
->object_id
= (vm_object_id_t
) (uintptr_t)
19184 VM_KERNEL_ADDRPERM(curr_object
);
19185 basic_info
->offset
=
19186 (memory_object_offset_t
) curr_offset_in_object
+ offset_in_page
;
19187 basic_info
->depth
= depth
;
19194 offset_in_page
= 0; // This doesn't really make sense for any offset other than the starting offset.
19197 * Move to next offset in the range and in our object.
19199 curr_s_offset
+= effective_page_size
;
19200 offset_in_object
+= effective_page_size
;
19201 curr_offset_in_object
= offset_in_object
;
19203 if (curr_object
!= object
) {
19204 vm_object_unlock(curr_object
);
19206 curr_object
= object
;
19208 vm_object_lock_shared(curr_object
);
19210 vm_object_lock_yield_shared(curr_object
);
19214 vm_object_unlock(curr_object
);
19215 vm_object_deallocate(curr_object
);
19217 vm_map_lock_read(map
);
19220 vm_map_unlock_read(map
);
19227 * Synchronises the memory range specified with its backing store
19228 * image by either flushing or cleaning the contents to the appropriate
19229 * memory manager engaging in a memory object synchronize dialog with
19230 * the manager. The client doesn't return until the manager issues
19231 * m_o_s_completed message. MIG Magically converts user task parameter
19232 * to the task's address map.
19234 * interpretation of sync_flags
19235 * VM_SYNC_INVALIDATE - discard pages, only return precious
19236 * pages to manager.
19238 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
19239 * - discard pages, write dirty or precious
19240 * pages back to memory manager.
19242 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
19243 * - write dirty or precious pages back to
19244 * the memory manager.
19246 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
19247 * is a hole in the region, and we would
19248 * have returned KERN_SUCCESS, return
19249 * KERN_INVALID_ADDRESS instead.
19252 * The memory object attributes have not yet been implemented, this
19253 * function will have to deal with the invalidate attribute
19256 * KERN_INVALID_TASK Bad task parameter
19257 * KERN_INVALID_ARGUMENT both sync and async were specified.
19258 * KERN_SUCCESS The usual.
19259 * KERN_INVALID_ADDRESS There was a hole in the region.
19265 vm_map_address_t address
,
19266 vm_map_size_t size
,
19267 vm_sync_t sync_flags
)
19269 vm_map_entry_t entry
;
19270 vm_map_size_t amount_left
;
19271 vm_object_offset_t offset
;
19272 vm_object_offset_t start_offset
, end_offset
;
19273 boolean_t do_sync_req
;
19274 boolean_t had_hole
= FALSE
;
19275 vm_map_offset_t pmap_offset
;
19277 if ((sync_flags
& VM_SYNC_ASYNCHRONOUS
) &&
19278 (sync_flags
& VM_SYNC_SYNCHRONOUS
)) {
19279 return KERN_INVALID_ARGUMENT
;
19282 if (VM_MAP_PAGE_MASK(map
) < PAGE_MASK
) {
19283 DEBUG4K_SHARE("map %p address 0x%llx size 0x%llx flags 0x%x\n", map
, (uint64_t)address
, (uint64_t)size
, sync_flags
);
19287 * align address and size on page boundaries
19289 size
= (vm_map_round_page(address
+ size
,
19290 VM_MAP_PAGE_MASK(map
)) -
19291 vm_map_trunc_page(address
,
19292 VM_MAP_PAGE_MASK(map
)));
19293 address
= vm_map_trunc_page(address
,
19294 VM_MAP_PAGE_MASK(map
));
19296 if (map
== VM_MAP_NULL
) {
19297 return KERN_INVALID_TASK
;
19301 return KERN_SUCCESS
;
19304 amount_left
= size
;
19306 while (amount_left
> 0) {
19307 vm_object_size_t flush_size
;
19308 vm_object_t object
;
19311 if (!vm_map_lookup_entry(map
,
19314 vm_map_size_t skip
;
19317 * hole in the address map.
19321 if (sync_flags
& VM_SYNC_KILLPAGES
) {
19323 * For VM_SYNC_KILLPAGES, there should be
19324 * no holes in the range, since we couldn't
19325 * prevent someone else from allocating in
19326 * that hole and we wouldn't want to "kill"
19329 vm_map_unlock(map
);
19334 * Check for empty map.
19336 if (entry
== vm_map_to_entry(map
) &&
19337 entry
->vme_next
== entry
) {
19338 vm_map_unlock(map
);
19342 * Check that we don't wrap and that
19343 * we have at least one real map entry.
19345 if ((map
->hdr
.nentries
== 0) ||
19346 (entry
->vme_next
->vme_start
< address
)) {
19347 vm_map_unlock(map
);
19351 * Move up to the next entry if needed
19353 skip
= (entry
->vme_next
->vme_start
- address
);
19354 if (skip
>= amount_left
) {
19357 amount_left
-= skip
;
19359 address
= entry
->vme_next
->vme_start
;
19360 vm_map_unlock(map
);
19364 offset
= address
- entry
->vme_start
;
19365 pmap_offset
= address
;
19368 * do we have more to flush than is contained in this
19371 if (amount_left
+ entry
->vme_start
+ offset
> entry
->vme_end
) {
19372 flush_size
= entry
->vme_end
-
19373 (entry
->vme_start
+ offset
);
19375 flush_size
= amount_left
;
19377 amount_left
-= flush_size
;
19378 address
+= flush_size
;
19380 if (entry
->is_sub_map
== TRUE
) {
19381 vm_map_t local_map
;
19382 vm_map_offset_t local_offset
;
19384 local_map
= VME_SUBMAP(entry
);
19385 local_offset
= VME_OFFSET(entry
);
19386 vm_map_reference(local_map
);
19387 vm_map_unlock(map
);
19392 sync_flags
) == KERN_INVALID_ADDRESS
) {
19395 vm_map_deallocate(local_map
);
19398 object
= VME_OBJECT(entry
);
19401 * We can't sync this object if the object has not been
19404 if (object
== VM_OBJECT_NULL
) {
19405 vm_map_unlock(map
);
19408 offset
+= VME_OFFSET(entry
);
19410 vm_object_lock(object
);
19412 if (sync_flags
& (VM_SYNC_KILLPAGES
| VM_SYNC_DEACTIVATE
)) {
19413 int kill_pages
= 0;
19414 boolean_t reusable_pages
= FALSE
;
19416 if (VM_MAP_PAGE_MASK(map
) < PAGE_MASK
) {
19418 * This is a destructive operation and so we
19419 * err on the side of limiting the range of
19422 start_offset
= vm_object_round_page(offset
);
19423 end_offset
= vm_object_trunc_page(offset
+ flush_size
);
19425 if (end_offset
<= start_offset
) {
19426 vm_object_unlock(object
);
19427 vm_map_unlock(map
);
19431 pmap_offset
+= start_offset
- offset
;;
19433 start_offset
= offset
;
19434 end_offset
= offset
+ flush_size
;
19437 if (sync_flags
& VM_SYNC_KILLPAGES
) {
19438 if (((object
->ref_count
== 1) ||
19439 ((object
->copy_strategy
!=
19440 MEMORY_OBJECT_COPY_SYMMETRIC
) &&
19441 (object
->copy
== VM_OBJECT_NULL
))) &&
19442 (object
->shadow
== VM_OBJECT_NULL
)) {
19443 if (object
->ref_count
!= 1) {
19444 vm_page_stats_reusable
.free_shared
++;
19451 if (kill_pages
!= -1) {
19452 vm_object_deactivate_pages(
19455 (vm_object_size_t
) (end_offset
- start_offset
),
19461 vm_object_unlock(object
);
19462 vm_map_unlock(map
);
19466 * We can't sync this object if there isn't a pager.
19467 * Don't bother to sync internal objects, since there can't
19468 * be any "permanent" storage for these objects anyway.
19470 if ((object
->pager
== MEMORY_OBJECT_NULL
) ||
19471 (object
->internal
) || (object
->private)) {
19472 vm_object_unlock(object
);
19473 vm_map_unlock(map
);
19477 * keep reference on the object until syncing is done
19479 vm_object_reference_locked(object
);
19480 vm_object_unlock(object
);
19482 vm_map_unlock(map
);
19484 if (VM_MAP_PAGE_MASK(map
) < PAGE_MASK
) {
19485 start_offset
= vm_object_trunc_page(offset
);
19486 end_offset
= vm_object_round_page(offset
+ flush_size
);
19488 start_offset
= offset
;
19489 end_offset
= offset
+ flush_size
;
19492 do_sync_req
= vm_object_sync(object
,
19494 (end_offset
- start_offset
),
19495 sync_flags
& VM_SYNC_INVALIDATE
,
19496 ((sync_flags
& VM_SYNC_SYNCHRONOUS
) ||
19497 (sync_flags
& VM_SYNC_ASYNCHRONOUS
)),
19498 sync_flags
& VM_SYNC_SYNCHRONOUS
);
19500 if ((sync_flags
& VM_SYNC_INVALIDATE
) && object
->resident_page_count
== 0) {
19502 * clear out the clustering and read-ahead hints
19504 vm_object_lock(object
);
19506 object
->pages_created
= 0;
19507 object
->pages_used
= 0;
19508 object
->sequential
= 0;
19509 object
->last_alloc
= 0;
19511 vm_object_unlock(object
);
19513 vm_object_deallocate(object
);
19516 /* for proper msync() behaviour */
19517 if (had_hole
== TRUE
&& (sync_flags
& VM_SYNC_CONTIGUOUS
)) {
19518 return KERN_INVALID_ADDRESS
;
19521 return KERN_SUCCESS
;
19525 vm_named_entry_from_vm_object(
19526 vm_named_entry_t named_entry
,
19527 vm_object_t object
,
19528 vm_object_offset_t offset
,
19529 vm_object_size_t size
,
19532 vm_map_copy_t copy
;
19533 vm_map_entry_t copy_entry
;
19535 assert(!named_entry
->is_sub_map
);
19536 assert(!named_entry
->is_copy
);
19537 assert(!named_entry
->is_object
);
19538 assert(!named_entry
->internal
);
19539 assert(named_entry
->backing
.copy
== VM_MAP_COPY_NULL
);
19541 copy
= vm_map_copy_allocate();
19542 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
19543 copy
->offset
= offset
;
19545 copy
->cpy_hdr
.page_shift
= PAGE_SHIFT
;
19546 vm_map_store_init(©
->cpy_hdr
);
19548 copy_entry
= vm_map_copy_entry_create(copy
, FALSE
);
19549 copy_entry
->protection
= prot
;
19550 copy_entry
->max_protection
= prot
;
19551 copy_entry
->use_pmap
= TRUE
;
19552 copy_entry
->vme_start
= VM_MAP_TRUNC_PAGE(offset
, PAGE_MASK
);
19553 copy_entry
->vme_end
= VM_MAP_ROUND_PAGE(offset
+ size
, PAGE_MASK
);
19554 VME_OBJECT_SET(copy_entry
, object
);
19555 VME_OFFSET_SET(copy_entry
, vm_object_trunc_page(offset
));
19556 vm_map_copy_entry_link(copy
, vm_map_copy_last_entry(copy
), copy_entry
);
19558 named_entry
->backing
.copy
= copy
;
19559 named_entry
->is_object
= TRUE
;
19560 if (object
->internal
) {
19561 named_entry
->internal
= TRUE
;
19564 DEBUG4K_MEMENTRY("named_entry %p copy %p object %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry
, copy
, object
, offset
, size
, prot
);
19566 return KERN_SUCCESS
;
19570 vm_named_entry_to_vm_object(
19571 vm_named_entry_t named_entry
)
19573 vm_map_copy_t copy
;
19574 vm_map_entry_t copy_entry
;
19575 vm_object_t object
;
19577 assert(!named_entry
->is_sub_map
);
19578 assert(!named_entry
->is_copy
);
19579 assert(named_entry
->is_object
);
19580 copy
= named_entry
->backing
.copy
;
19581 assert(copy
!= VM_MAP_COPY_NULL
);
19582 assert(copy
->cpy_hdr
.nentries
== 1);
19583 copy_entry
= vm_map_copy_first_entry(copy
);
19584 assert(!copy_entry
->is_sub_map
);
19585 object
= VME_OBJECT(copy_entry
);
19587 DEBUG4K_MEMENTRY("%p -> %p -> %p [0x%llx 0x%llx 0x%llx 0x%x/0x%x ] -> %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry
, copy
, copy_entry
, (uint64_t)copy_entry
->vme_start
, (uint64_t)copy_entry
->vme_end
, copy_entry
->vme_offset
, copy_entry
->protection
, copy_entry
->max_protection
, object
, named_entry
->offset
, named_entry
->size
, named_entry
->protection
);
19593 * Routine: convert_port_entry_to_map
19595 * Convert from a port specifying an entry or a task
19596 * to a map. Doesn't consume the port ref; produces a map ref,
19597 * which may be null. Unlike convert_port_to_map, the
19598 * port may be task or a named entry backed.
19605 convert_port_entry_to_map(
19609 vm_named_entry_t named_entry
;
19610 uint32_t try_failed_count
= 0;
19612 if (IP_VALID(port
) && (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
19615 if (ip_active(port
) && (ip_kotype(port
)
19616 == IKOT_NAMED_ENTRY
)) {
19618 (vm_named_entry_t
) ip_get_kobject(port
);
19619 if (!(lck_mtx_try_lock(&(named_entry
)->Lock
))) {
19622 try_failed_count
++;
19623 mutex_pause(try_failed_count
);
19626 named_entry
->ref_count
++;
19627 lck_mtx_unlock(&(named_entry
)->Lock
);
19629 if ((named_entry
->is_sub_map
) &&
19630 (named_entry
->protection
19631 & VM_PROT_WRITE
)) {
19632 map
= named_entry
->backing
.map
;
19633 if (map
->pmap
!= PMAP_NULL
) {
19634 if (map
->pmap
== kernel_pmap
) {
19635 panic("userspace has access "
19636 "to a kernel map %p", map
);
19638 pmap_require(map
->pmap
);
19641 mach_destroy_memory_entry(port
);
19642 return VM_MAP_NULL
;
19644 vm_map_reference_swap(map
);
19645 mach_destroy_memory_entry(port
);
19648 return VM_MAP_NULL
;
19652 map
= convert_port_to_map(port
);
19659 * Routine: convert_port_entry_to_object
19661 * Convert from a port specifying a named entry to an
19662 * object. Doesn't consume the port ref; produces a map ref,
19663 * which may be null.
19670 convert_port_entry_to_object(
19673 vm_object_t object
= VM_OBJECT_NULL
;
19674 vm_named_entry_t named_entry
;
19675 uint32_t try_failed_count
= 0;
19677 if (IP_VALID(port
) &&
19678 (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
19681 if (ip_active(port
) &&
19682 (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
19683 named_entry
= (vm_named_entry_t
) ip_get_kobject(port
);
19684 if (!(lck_mtx_try_lock(&(named_entry
)->Lock
))) {
19686 try_failed_count
++;
19687 mutex_pause(try_failed_count
);
19690 named_entry
->ref_count
++;
19691 lck_mtx_unlock(&(named_entry
)->Lock
);
19693 if (!(named_entry
->is_sub_map
) &&
19694 !(named_entry
->is_copy
) &&
19695 (named_entry
->is_object
) &&
19696 (named_entry
->protection
& VM_PROT_WRITE
)) {
19697 vm_map_copy_t copy
;
19698 vm_map_entry_t copy_entry
;
19700 copy
= named_entry
->backing
.copy
;
19701 assert(copy
->cpy_hdr
.nentries
== 1);
19702 copy_entry
= vm_map_copy_first_entry(copy
);
19703 assert(!copy_entry
->is_sub_map
);
19704 object
= VME_OBJECT(copy_entry
);
19705 assert(object
!= VM_OBJECT_NULL
);
19706 vm_object_reference(object
);
19708 mach_destroy_memory_entry(port
);
19716 * Export routines to other components for the things we access locally through
19723 return current_map_fast();
19727 * vm_map_reference:
19729 * Most code internal to the osfmk will go through a
19730 * macro defining this. This is always here for the
19731 * use of other kernel components.
19733 #undef vm_map_reference
19738 if (map
== VM_MAP_NULL
) {
19742 lck_mtx_lock(&map
->s_lock
);
19744 assert(map
->res_count
> 0);
19745 assert(os_ref_get_count(&map
->map_refcnt
) >= map
->res_count
);
19748 os_ref_retain_locked(&map
->map_refcnt
);
19749 lck_mtx_unlock(&map
->s_lock
);
19753 * vm_map_deallocate:
19755 * Removes a reference from the specified map,
19756 * destroying it if no references remain.
19757 * The map should not be locked.
19765 if (map
== VM_MAP_NULL
) {
19769 lck_mtx_lock(&map
->s_lock
);
19770 ref
= os_ref_release_locked(&map
->map_refcnt
);
19772 vm_map_res_deallocate(map
);
19773 lck_mtx_unlock(&map
->s_lock
);
19776 assert(os_ref_get_count(&map
->map_refcnt
) == 0);
19777 lck_mtx_unlock(&map
->s_lock
);
19781 * The map residence count isn't decremented here because
19782 * the vm_map_delete below will traverse the entire map,
19783 * deleting entries, and the residence counts on objects
19784 * and sharing maps will go away then.
19788 vm_map_destroy(map
, VM_MAP_REMOVE_NO_FLAGS
);
19792 vm_map_inspect_deallocate(
19793 vm_map_inspect_t map
)
19795 vm_map_deallocate((vm_map_t
)map
);
19799 vm_map_read_deallocate(
19802 vm_map_deallocate((vm_map_t
)map
);
19807 vm_map_disable_NX(vm_map_t map
)
19812 if (map
->pmap
== NULL
) {
19816 pmap_disable_NX(map
->pmap
);
19820 vm_map_disallow_data_exec(vm_map_t map
)
19826 map
->map_disallow_data_exec
= TRUE
;
19829 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
19830 * more descriptive.
19833 vm_map_set_32bit(vm_map_t map
)
19835 #if defined(__arm__) || defined(__arm64__)
19836 map
->max_offset
= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_DEVICE
);
19838 map
->max_offset
= (vm_map_offset_t
)VM_MAX_ADDRESS
;
19844 vm_map_set_64bit(vm_map_t map
)
19846 #if defined(__arm__) || defined(__arm64__)
19847 map
->max_offset
= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_DEVICE
);
19849 map
->max_offset
= (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
;
19854 * Expand the maximum size of an existing map to the maximum supported.
19857 vm_map_set_jumbo(vm_map_t map
)
19859 #if defined (__arm64__) && !defined(CONFIG_ARROW)
19860 vm_map_set_max_addr(map
, ~0);
19867 * This map has a JIT entitlement
19870 vm_map_set_jit_entitled(vm_map_t map
)
19872 #if defined (__arm64__)
19873 pmap_set_jit_entitled(map
->pmap
);
19880 * Expand the maximum size of an existing map.
19883 vm_map_set_max_addr(vm_map_t map
, vm_map_offset_t new_max_offset
)
19885 #if defined(__arm64__)
19886 vm_map_offset_t max_supported_offset
= 0;
19887 vm_map_offset_t old_max_offset
= map
->max_offset
;
19888 max_supported_offset
= pmap_max_offset(vm_map_is_64bit(map
), ARM_PMAP_MAX_OFFSET_JUMBO
);
19890 new_max_offset
= trunc_page(new_max_offset
);
19892 /* The address space cannot be shrunk using this routine. */
19893 if (old_max_offset
>= new_max_offset
) {
19897 if (max_supported_offset
< new_max_offset
) {
19898 new_max_offset
= max_supported_offset
;
19901 map
->max_offset
= new_max_offset
;
19903 if (map
->holes_list
->prev
->vme_end
== old_max_offset
) {
19905 * There is already a hole at the end of the map; simply make it bigger.
19907 map
->holes_list
->prev
->vme_end
= map
->max_offset
;
19910 * There is no hole at the end, so we need to create a new hole
19911 * for the new empty space we're creating.
19913 struct vm_map_links
*new_hole
= zalloc(vm_map_holes_zone
);
19914 new_hole
->start
= old_max_offset
;
19915 new_hole
->end
= map
->max_offset
;
19916 new_hole
->prev
= map
->holes_list
->prev
;
19917 new_hole
->next
= (struct vm_map_entry
*)map
->holes_list
;
19918 map
->holes_list
->prev
->links
.next
= (struct vm_map_entry
*)new_hole
;
19919 map
->holes_list
->prev
= (struct vm_map_entry
*)new_hole
;
19923 (void)new_max_offset
;
19928 vm_compute_max_offset(boolean_t is64
)
19930 #if defined(__arm__) || defined(__arm64__)
19931 return pmap_max_offset(is64
, ARM_PMAP_MAX_OFFSET_DEVICE
);
19933 return is64
? (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
: (vm_map_offset_t
)VM_MAX_ADDRESS
;
19938 vm_map_get_max_aslr_slide_section(
19939 vm_map_t map __unused
,
19940 int64_t *max_sections
,
19941 int64_t *section_size
)
19943 #if defined(__arm64__)
19945 *section_size
= ARM_TT_TWIG_SIZE
;
19953 vm_map_get_max_aslr_slide_pages(vm_map_t map
)
19955 #if defined(__arm64__)
19956 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
19957 * limited embedded address space; this is also meant to minimize pmap
19958 * memory usage on 16KB page systems.
19960 return 1 << (24 - VM_MAP_PAGE_SHIFT(map
));
19962 return 1 << (vm_map_is_64bit(map
) ? 16 : 8);
19967 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map
)
19969 #if defined(__arm64__)
19970 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
19971 * of independent entropy on 16KB page systems.
19973 return 1 << (22 - VM_MAP_PAGE_SHIFT(map
));
19975 return 1 << (vm_map_is_64bit(map
) ? 16 : 8);
19984 return map
->max_offset
> ((vm_map_offset_t
)VM_MAX_ADDRESS
);
19989 vm_map_has_hard_pagezero(
19991 vm_map_offset_t pagezero_size
)
19995 * We should lock the VM map (for read) here but we can get away
19996 * with it for now because there can't really be any race condition:
19997 * the VM map's min_offset is changed only when the VM map is created
19998 * and when the zero page is established (when the binary gets loaded),
19999 * and this routine gets called only when the task terminates and the
20000 * VM map is being torn down, and when a new map is created via
20001 * load_machfile()/execve().
20003 return map
->min_offset
>= pagezero_size
;
20007 * Raise a VM map's maximun offset.
20010 vm_map_raise_max_offset(
20012 vm_map_offset_t new_max_offset
)
20017 ret
= KERN_INVALID_ADDRESS
;
20019 if (new_max_offset
>= map
->max_offset
) {
20020 if (!vm_map_is_64bit(map
)) {
20021 if (new_max_offset
<= (vm_map_offset_t
)VM_MAX_ADDRESS
) {
20022 map
->max_offset
= new_max_offset
;
20023 ret
= KERN_SUCCESS
;
20026 if (new_max_offset
<= (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
) {
20027 map
->max_offset
= new_max_offset
;
20028 ret
= KERN_SUCCESS
;
20033 vm_map_unlock(map
);
20039 * Raise a VM map's minimum offset.
20040 * To strictly enforce "page zero" reservation.
20043 vm_map_raise_min_offset(
20045 vm_map_offset_t new_min_offset
)
20047 vm_map_entry_t first_entry
;
20049 new_min_offset
= vm_map_round_page(new_min_offset
,
20050 VM_MAP_PAGE_MASK(map
));
20054 if (new_min_offset
< map
->min_offset
) {
20056 * Can't move min_offset backwards, as that would expose
20057 * a part of the address space that was previously, and for
20058 * possibly good reasons, inaccessible.
20060 vm_map_unlock(map
);
20061 return KERN_INVALID_ADDRESS
;
20063 if (new_min_offset
>= map
->max_offset
) {
20064 /* can't go beyond the end of the address space */
20065 vm_map_unlock(map
);
20066 return KERN_INVALID_ADDRESS
;
20069 first_entry
= vm_map_first_entry(map
);
20070 if (first_entry
!= vm_map_to_entry(map
) &&
20071 first_entry
->vme_start
< new_min_offset
) {
20073 * Some memory was already allocated below the new
20074 * minimun offset. It's too late to change it now...
20076 vm_map_unlock(map
);
20077 return KERN_NO_SPACE
;
20080 map
->min_offset
= new_min_offset
;
20082 assert(map
->holes_list
);
20083 map
->holes_list
->start
= new_min_offset
;
20084 assert(new_min_offset
< map
->holes_list
->end
);
20086 vm_map_unlock(map
);
20088 return KERN_SUCCESS
;
20092 * Set the limit on the maximum amount of user wired memory allowed for this map.
20093 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
20094 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
20095 * don't have to reach over to the BSD data structures.
20099 vm_map_set_user_wire_limit(vm_map_t map
,
20102 map
->user_wire_limit
= limit
;
20107 vm_map_switch_protect(vm_map_t map
,
20111 map
->switch_protect
= val
;
20112 vm_map_unlock(map
);
20115 extern int cs_process_enforcement_enable
;
20117 vm_map_cs_enforcement(
20120 if (cs_process_enforcement_enable
) {
20123 return map
->cs_enforcement
;
20127 vm_map_cs_enforcement_set(
20132 map
->cs_enforcement
= val
;
20133 pmap_set_vm_map_cs_enforced(map
->pmap
, val
);
20134 vm_map_unlock(map
);
20138 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
20139 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
20140 * bump both counters.
20143 vm_map_iokit_mapped_region(vm_map_t map
, vm_size_t bytes
)
20145 pmap_t pmap
= vm_map_pmap(map
);
20147 ledger_credit(pmap
->ledger
, task_ledgers
.iokit_mapped
, bytes
);
20148 ledger_credit(pmap
->ledger
, task_ledgers
.phys_footprint
, bytes
);
20152 vm_map_iokit_unmapped_region(vm_map_t map
, vm_size_t bytes
)
20154 pmap_t pmap
= vm_map_pmap(map
);
20156 ledger_debit(pmap
->ledger
, task_ledgers
.iokit_mapped
, bytes
);
20157 ledger_debit(pmap
->ledger
, task_ledgers
.phys_footprint
, bytes
);
20160 /* Add (generate) code signature for memory range */
20161 #if CONFIG_DYNAMIC_CODE_SIGNING
20163 vm_map_sign(vm_map_t map
,
20164 vm_map_offset_t start
,
20165 vm_map_offset_t end
)
20167 vm_map_entry_t entry
;
20169 vm_object_t object
;
20172 * Vet all the input parameters and current type and state of the
20173 * underlaying object. Return with an error if anything is amiss.
20175 if (map
== VM_MAP_NULL
) {
20176 return KERN_INVALID_ARGUMENT
;
20179 vm_map_lock_read(map
);
20181 if (!vm_map_lookup_entry(map
, start
, &entry
) || entry
->is_sub_map
) {
20183 * Must pass a valid non-submap address.
20185 vm_map_unlock_read(map
);
20186 return KERN_INVALID_ADDRESS
;
20189 if ((entry
->vme_start
> start
) || (entry
->vme_end
< end
)) {
20191 * Map entry doesn't cover the requested range. Not handling
20192 * this situation currently.
20194 vm_map_unlock_read(map
);
20195 return KERN_INVALID_ARGUMENT
;
20198 object
= VME_OBJECT(entry
);
20199 if (object
== VM_OBJECT_NULL
) {
20201 * Object must already be present or we can't sign.
20203 vm_map_unlock_read(map
);
20204 return KERN_INVALID_ARGUMENT
;
20207 vm_object_lock(object
);
20208 vm_map_unlock_read(map
);
20210 while (start
< end
) {
20213 m
= vm_page_lookup(object
,
20214 start
- entry
->vme_start
+ VME_OFFSET(entry
));
20215 if (m
== VM_PAGE_NULL
) {
20216 /* shoud we try to fault a page here? we can probably
20217 * demand it exists and is locked for this request */
20218 vm_object_unlock(object
);
20219 return KERN_FAILURE
;
20221 /* deal with special page status */
20223 (m
->vmp_unusual
&& (m
->vmp_error
|| m
->vmp_restart
|| m
->vmp_private
|| m
->vmp_absent
))) {
20224 vm_object_unlock(object
);
20225 return KERN_FAILURE
;
20228 /* Page is OK... now "validate" it */
20229 /* This is the place where we'll call out to create a code
20230 * directory, later */
20231 /* XXX TODO4K: deal with 4k subpages individually? */
20232 m
->vmp_cs_validated
= VMP_CS_ALL_TRUE
;
20234 /* The page is now "clean" for codesigning purposes. That means
20235 * we don't consider it as modified (wpmapped) anymore. But
20236 * we'll disconnect the page so we note any future modification
20238 m
->vmp_wpmapped
= FALSE
;
20239 refmod
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
));
20241 /* Pull the dirty status from the pmap, since we cleared the
20243 if ((refmod
& VM_MEM_MODIFIED
) && !m
->vmp_dirty
) {
20244 SET_PAGE_DIRTY(m
, FALSE
);
20247 /* On to the next page */
20248 start
+= PAGE_SIZE
;
20250 vm_object_unlock(object
);
20252 return KERN_SUCCESS
;
20257 vm_map_partial_reap(vm_map_t map
, unsigned int *reclaimed_resident
, unsigned int *reclaimed_compressed
)
20259 vm_map_entry_t entry
= VM_MAP_ENTRY_NULL
;
20260 vm_map_entry_t next_entry
;
20261 kern_return_t kr
= KERN_SUCCESS
;
20267 * We use a "zap_map" to avoid having to unlock
20268 * the "map" in vm_map_delete().
20270 zap_map
= vm_map_create(PMAP_NULL
,
20273 map
->hdr
.entries_pageable
);
20275 if (zap_map
== VM_MAP_NULL
) {
20276 return KERN_RESOURCE_SHORTAGE
;
20279 vm_map_set_page_shift(zap_map
,
20280 VM_MAP_PAGE_SHIFT(map
));
20281 vm_map_disable_hole_optimization(zap_map
);
20283 for (entry
= vm_map_first_entry(map
);
20284 entry
!= vm_map_to_entry(map
);
20285 entry
= next_entry
) {
20286 next_entry
= entry
->vme_next
;
20288 if (VME_OBJECT(entry
) &&
20289 !entry
->is_sub_map
&&
20290 (VME_OBJECT(entry
)->internal
== TRUE
) &&
20291 (VME_OBJECT(entry
)->ref_count
== 1)) {
20292 *reclaimed_resident
+= VME_OBJECT(entry
)->resident_page_count
;
20293 *reclaimed_compressed
+= vm_compressor_pager_get_count(VME_OBJECT(entry
)->pager
);
20295 (void)vm_map_delete(map
,
20298 VM_MAP_REMOVE_SAVE_ENTRIES
,
20303 vm_map_unlock(map
);
20306 * Get rid of the "zap_maps" and all the map entries that
20307 * they may still contain.
20309 if (zap_map
!= VM_MAP_NULL
) {
20310 vm_map_destroy(zap_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
20311 zap_map
= VM_MAP_NULL
;
20318 #if DEVELOPMENT || DEBUG
20321 vm_map_disconnect_page_mappings(
20323 boolean_t do_unnest
)
20325 vm_map_entry_t entry
;
20326 int page_count
= 0;
20328 if (do_unnest
== TRUE
) {
20329 #ifndef NO_NESTED_PMAP
20332 for (entry
= vm_map_first_entry(map
);
20333 entry
!= vm_map_to_entry(map
);
20334 entry
= entry
->vme_next
) {
20335 if (entry
->is_sub_map
&& entry
->use_pmap
) {
20337 * Make sure the range between the start of this entry and
20338 * the end of this entry is no longer nested, so that
20339 * we will only remove mappings from the pmap in use by this
20342 vm_map_clip_unnest(map
, entry
, entry
->vme_start
, entry
->vme_end
);
20345 vm_map_unlock(map
);
20348 vm_map_lock_read(map
);
20350 page_count
= map
->pmap
->stats
.resident_count
;
20352 for (entry
= vm_map_first_entry(map
);
20353 entry
!= vm_map_to_entry(map
);
20354 entry
= entry
->vme_next
) {
20355 if (!entry
->is_sub_map
&& ((VME_OBJECT(entry
) == 0) ||
20356 (VME_OBJECT(entry
)->phys_contiguous
))) {
20359 if (entry
->is_sub_map
) {
20360 assert(!entry
->use_pmap
);
20363 pmap_remove_options(map
->pmap
, entry
->vme_start
, entry
->vme_end
, 0);
20365 vm_map_unlock_read(map
);
20371 vm_map_inject_error(vm_map_t map
, vm_map_offset_t vaddr
)
20373 vm_object_t object
= NULL
;
20374 vm_object_offset_t offset
;
20377 vm_map_version_t version
;
20379 int result
= KERN_FAILURE
;
20381 vaddr
= vm_map_trunc_page(vaddr
, PAGE_MASK
);
20384 result
= vm_map_lookup_locked(&map
, vaddr
, VM_PROT_READ
,
20385 OBJECT_LOCK_EXCLUSIVE
, &version
, &object
, &offset
, &prot
, &wired
,
20386 NULL
, &real_map
, NULL
);
20387 if (object
== NULL
) {
20388 result
= KERN_MEMORY_ERROR
;
20389 } else if (object
->pager
) {
20390 result
= vm_compressor_pager_inject_error(object
->pager
,
20393 result
= KERN_MEMORY_PRESENT
;
20396 if (object
!= NULL
) {
20397 vm_object_unlock(object
);
20400 if (real_map
!= map
) {
20401 vm_map_unlock(real_map
);
20403 vm_map_unlock(map
);
20414 extern struct freezer_context freezer_context_global
;
20415 AbsoluteTime c_freezer_last_yield_ts
= 0;
20417 extern unsigned int memorystatus_freeze_private_shared_pages_ratio
;
20418 extern unsigned int memorystatus_freeze_shared_mb_per_process_max
;
20423 unsigned int *purgeable_count
,
20424 unsigned int *wired_count
,
20425 unsigned int *clean_count
,
20426 unsigned int *dirty_count
,
20427 unsigned int dirty_budget
,
20428 unsigned int *shared_count
,
20429 int *freezer_error_code
,
20430 boolean_t eval_only
)
20432 vm_map_entry_t entry2
= VM_MAP_ENTRY_NULL
;
20433 kern_return_t kr
= KERN_SUCCESS
;
20434 boolean_t evaluation_phase
= TRUE
;
20435 vm_object_t cur_shared_object
= NULL
;
20436 int cur_shared_obj_ref_cnt
= 0;
20437 unsigned int dirty_private_count
= 0, dirty_shared_count
= 0, obj_pages_snapshot
= 0;
20439 *purgeable_count
= *wired_count
= *clean_count
= *dirty_count
= *shared_count
= 0;
20442 * We need the exclusive lock here so that we can
20443 * block any page faults or lookups while we are
20444 * in the middle of freezing this vm map.
20446 vm_map_t map
= task
->map
;
20450 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT
);
20452 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20453 if (vm_compressor_low_on_space()) {
20454 *freezer_error_code
= FREEZER_ERROR_NO_COMPRESSOR_SPACE
;
20457 if (vm_swap_low_on_space()) {
20458 *freezer_error_code
= FREEZER_ERROR_NO_SWAP_SPACE
;
20461 kr
= KERN_NO_SPACE
;
20465 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
== FALSE
) {
20467 * In-memory compressor backing the freezer. No disk.
20468 * So no need to do the evaluation phase.
20470 evaluation_phase
= FALSE
;
20472 if (eval_only
== TRUE
) {
20474 * We don't support 'eval_only' mode
20475 * in this non-swap config.
20477 *freezer_error_code
= FREEZER_ERROR_GENERIC
;
20478 kr
= KERN_INVALID_ARGUMENT
;
20482 freezer_context_global
.freezer_ctx_uncompressed_pages
= 0;
20483 clock_get_uptime(&c_freezer_last_yield_ts
);
20487 for (entry2
= vm_map_first_entry(map
);
20488 entry2
!= vm_map_to_entry(map
);
20489 entry2
= entry2
->vme_next
) {
20490 vm_object_t src_object
= VME_OBJECT(entry2
);
20493 !entry2
->is_sub_map
&&
20494 !src_object
->phys_contiguous
) {
20495 /* If eligible, scan the entry, moving eligible pages over to our parent object */
20497 if (src_object
->internal
== TRUE
) {
20498 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
) {
20500 * We skip purgeable objects during evaluation phase only.
20501 * If we decide to freeze this process, we'll explicitly
20502 * purge these objects before we go around again with
20503 * 'evaluation_phase' set to FALSE.
20506 if ((src_object
->purgable
== VM_PURGABLE_EMPTY
) || (src_object
->purgable
== VM_PURGABLE_VOLATILE
)) {
20508 * We want to purge objects that may not belong to this task but are mapped
20509 * in this task alone. Since we already purged this task's purgeable memory
20510 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
20511 * on this task's purgeable objects. Hence the check for only volatile objects.
20513 if (evaluation_phase
== FALSE
&&
20514 (src_object
->purgable
== VM_PURGABLE_VOLATILE
) &&
20515 (src_object
->ref_count
== 1)) {
20516 vm_object_lock(src_object
);
20517 vm_object_purge(src_object
, 0);
20518 vm_object_unlock(src_object
);
20524 * Pages belonging to this object could be swapped to disk.
20525 * Make sure it's not a shared object because we could end
20526 * up just bringing it back in again.
20528 * We try to optimize somewhat by checking for objects that are mapped
20529 * more than once within our own map. But we don't do full searches,
20530 * we just look at the entries following our current entry.
20533 if (src_object
->ref_count
> 1) {
20534 if (src_object
!= cur_shared_object
) {
20535 obj_pages_snapshot
= (src_object
->resident_page_count
- src_object
->wired_page_count
) + vm_compressor_pager_get_count(src_object
->pager
);
20536 dirty_shared_count
+= obj_pages_snapshot
;
20538 cur_shared_object
= src_object
;
20539 cur_shared_obj_ref_cnt
= 1;
20542 cur_shared_obj_ref_cnt
++;
20543 if (src_object
->ref_count
== cur_shared_obj_ref_cnt
) {
20545 * Fall through to below and treat this object as private.
20546 * So deduct its pages from our shared total and add it to the
20550 dirty_shared_count
-= obj_pages_snapshot
;
20551 dirty_private_count
+= obj_pages_snapshot
;
20559 if (src_object
->ref_count
== 1) {
20560 dirty_private_count
+= (src_object
->resident_page_count
- src_object
->wired_page_count
) + vm_compressor_pager_get_count(src_object
->pager
);
20563 if (evaluation_phase
== TRUE
) {
20568 uint32_t paged_out_count
= vm_object_compressed_freezer_pageout(src_object
, dirty_budget
);
20569 *wired_count
+= src_object
->wired_page_count
;
20571 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20572 if (vm_compressor_low_on_space()) {
20573 *freezer_error_code
= FREEZER_ERROR_NO_COMPRESSOR_SPACE
;
20576 if (vm_swap_low_on_space()) {
20577 *freezer_error_code
= FREEZER_ERROR_NO_SWAP_SPACE
;
20580 kr
= KERN_NO_SPACE
;
20583 if (paged_out_count
>= dirty_budget
) {
20586 dirty_budget
-= paged_out_count
;
20591 *shared_count
= (unsigned int) ((dirty_shared_count
* PAGE_SIZE_64
) / (1024 * 1024ULL));
20592 if (evaluation_phase
) {
20593 unsigned int shared_pages_threshold
= (memorystatus_freeze_shared_mb_per_process_max
* 1024 * 1024ULL) / PAGE_SIZE_64
;
20595 if (dirty_shared_count
> shared_pages_threshold
) {
20596 *freezer_error_code
= FREEZER_ERROR_EXCESS_SHARED_MEMORY
;
20601 if (dirty_shared_count
&&
20602 ((dirty_private_count
/ dirty_shared_count
) < memorystatus_freeze_private_shared_pages_ratio
)) {
20603 *freezer_error_code
= FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO
;
20608 evaluation_phase
= FALSE
;
20609 dirty_shared_count
= dirty_private_count
= 0;
20611 freezer_context_global
.freezer_ctx_uncompressed_pages
= 0;
20612 clock_get_uptime(&c_freezer_last_yield_ts
);
20619 vm_purgeable_purge_task_owned(task
);
20627 vm_map_unlock(map
);
20629 if ((eval_only
== FALSE
) && (kr
== KERN_SUCCESS
)) {
20630 vm_object_compressed_freezer_done();
20638 * vm_map_entry_should_cow_for_true_share:
20640 * Determines if the map entry should be clipped and setup for copy-on-write
20641 * to avoid applying "true_share" to a large VM object when only a subset is
20644 * For now, we target only the map entries created for the Objective C
20645 * Garbage Collector, which initially have the following properties:
20646 * - alias == VM_MEMORY_MALLOC
20647 * - wired_count == 0
20649 * and a VM object with:
20651 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
20653 * - vo_size == ANON_CHUNK_SIZE
20655 * Only non-kernel map entries.
20658 vm_map_entry_should_cow_for_true_share(
20659 vm_map_entry_t entry
)
20661 vm_object_t object
;
20663 if (entry
->is_sub_map
) {
20664 /* entry does not point at a VM object */
20668 if (entry
->needs_copy
) {
20669 /* already set for copy_on_write: done! */
20673 if (VME_ALIAS(entry
) != VM_MEMORY_MALLOC
&&
20674 VME_ALIAS(entry
) != VM_MEMORY_MALLOC_SMALL
) {
20675 /* not a malloc heap or Obj-C Garbage Collector heap */
20679 if (entry
->wired_count
) {
20680 /* wired: can't change the map entry... */
20681 vm_counters
.should_cow_but_wired
++;
20685 object
= VME_OBJECT(entry
);
20687 if (object
== VM_OBJECT_NULL
) {
20688 /* no object yet... */
20692 if (!object
->internal
) {
20693 /* not an internal object */
20697 if (object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) {
20698 /* not the default copy strategy */
20702 if (object
->true_share
) {
20703 /* already true_share: too late to avoid it */
20707 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC
&&
20708 object
->vo_size
!= ANON_CHUNK_SIZE
) {
20709 /* ... not an object created for the ObjC Garbage Collector */
20713 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC_SMALL
&&
20714 object
->vo_size
!= 2048 * 4096) {
20715 /* ... not a "MALLOC_SMALL" heap */
20720 * All the criteria match: we have a large object being targeted for "true_share".
20721 * To limit the adverse side-effects linked with "true_share", tell the caller to
20722 * try and avoid setting up the entire object for "true_share" by clipping the
20723 * targeted range and setting it up for copy-on-write.
20729 vm_map_round_page_mask(
20730 vm_map_offset_t offset
,
20731 vm_map_offset_t mask
)
20733 return VM_MAP_ROUND_PAGE(offset
, mask
);
20737 vm_map_trunc_page_mask(
20738 vm_map_offset_t offset
,
20739 vm_map_offset_t mask
)
20741 return VM_MAP_TRUNC_PAGE(offset
, mask
);
20745 vm_map_page_aligned(
20746 vm_map_offset_t offset
,
20747 vm_map_offset_t mask
)
20749 return ((offset
) & mask
) == 0;
20756 return VM_MAP_PAGE_SHIFT(map
);
20763 return VM_MAP_PAGE_SIZE(map
);
20770 return VM_MAP_PAGE_MASK(map
);
20774 vm_map_set_page_shift(
20778 if (map
->hdr
.nentries
!= 0) {
20779 /* too late to change page size */
20780 return KERN_FAILURE
;
20783 map
->hdr
.page_shift
= pageshift
;
20785 return KERN_SUCCESS
;
20789 vm_map_query_volatile(
20791 mach_vm_size_t
*volatile_virtual_size_p
,
20792 mach_vm_size_t
*volatile_resident_size_p
,
20793 mach_vm_size_t
*volatile_compressed_size_p
,
20794 mach_vm_size_t
*volatile_pmap_size_p
,
20795 mach_vm_size_t
*volatile_compressed_pmap_size_p
)
20797 mach_vm_size_t volatile_virtual_size
;
20798 mach_vm_size_t volatile_resident_count
;
20799 mach_vm_size_t volatile_compressed_count
;
20800 mach_vm_size_t volatile_pmap_count
;
20801 mach_vm_size_t volatile_compressed_pmap_count
;
20802 mach_vm_size_t resident_count
;
20803 vm_map_entry_t entry
;
20804 vm_object_t object
;
20806 /* map should be locked by caller */
20808 volatile_virtual_size
= 0;
20809 volatile_resident_count
= 0;
20810 volatile_compressed_count
= 0;
20811 volatile_pmap_count
= 0;
20812 volatile_compressed_pmap_count
= 0;
20814 for (entry
= vm_map_first_entry(map
);
20815 entry
!= vm_map_to_entry(map
);
20816 entry
= entry
->vme_next
) {
20817 mach_vm_size_t pmap_resident_bytes
, pmap_compressed_bytes
;
20819 if (entry
->is_sub_map
) {
20822 if (!(entry
->protection
& VM_PROT_WRITE
)) {
20825 object
= VME_OBJECT(entry
);
20826 if (object
== VM_OBJECT_NULL
) {
20829 if (object
->purgable
!= VM_PURGABLE_VOLATILE
&&
20830 object
->purgable
!= VM_PURGABLE_EMPTY
) {
20833 if (VME_OFFSET(entry
)) {
20835 * If the map entry has been split and the object now
20836 * appears several times in the VM map, we don't want
20837 * to count the object's resident_page_count more than
20838 * once. We count it only for the first one, starting
20839 * at offset 0 and ignore the other VM map entries.
20843 resident_count
= object
->resident_page_count
;
20844 if ((VME_OFFSET(entry
) / PAGE_SIZE
) >= resident_count
) {
20845 resident_count
= 0;
20847 resident_count
-= (VME_OFFSET(entry
) / PAGE_SIZE
);
20850 volatile_virtual_size
+= entry
->vme_end
- entry
->vme_start
;
20851 volatile_resident_count
+= resident_count
;
20852 if (object
->pager
) {
20853 volatile_compressed_count
+=
20854 vm_compressor_pager_get_count(object
->pager
);
20856 pmap_compressed_bytes
= 0;
20857 pmap_resident_bytes
=
20858 pmap_query_resident(map
->pmap
,
20861 &pmap_compressed_bytes
);
20862 volatile_pmap_count
+= (pmap_resident_bytes
/ PAGE_SIZE
);
20863 volatile_compressed_pmap_count
+= (pmap_compressed_bytes
20867 /* map is still locked on return */
20869 *volatile_virtual_size_p
= volatile_virtual_size
;
20870 *volatile_resident_size_p
= volatile_resident_count
* PAGE_SIZE
;
20871 *volatile_compressed_size_p
= volatile_compressed_count
* PAGE_SIZE
;
20872 *volatile_pmap_size_p
= volatile_pmap_count
* PAGE_SIZE
;
20873 *volatile_compressed_pmap_size_p
= volatile_compressed_pmap_count
* PAGE_SIZE
;
20875 return KERN_SUCCESS
;
20879 vm_map_sizes(vm_map_t map
,
20880 vm_map_size_t
* psize
,
20881 vm_map_size_t
* pfree
,
20882 vm_map_size_t
* plargest_free
)
20884 vm_map_entry_t entry
;
20885 vm_map_offset_t prev
;
20886 vm_map_size_t free
, total_free
, largest_free
;
20890 *psize
= *pfree
= *plargest_free
= 0;
20893 total_free
= largest_free
= 0;
20895 vm_map_lock_read(map
);
20897 *psize
= map
->max_offset
- map
->min_offset
;
20900 prev
= map
->min_offset
;
20901 for (entry
= vm_map_first_entry(map
);; entry
= entry
->vme_next
) {
20902 end
= (entry
== vm_map_to_entry(map
));
20905 free
= entry
->vme_end
- prev
;
20907 free
= entry
->vme_start
- prev
;
20910 total_free
+= free
;
20911 if (free
> largest_free
) {
20912 largest_free
= free
;
20918 prev
= entry
->vme_end
;
20920 vm_map_unlock_read(map
);
20922 *pfree
= total_free
;
20924 if (plargest_free
) {
20925 *plargest_free
= largest_free
;
20929 #if VM_SCAN_FOR_SHADOW_CHAIN
20930 int vm_map_shadow_max(vm_map_t map
);
20935 int shadows
, shadows_max
;
20936 vm_map_entry_t entry
;
20937 vm_object_t object
, next_object
;
20945 vm_map_lock_read(map
);
20947 for (entry
= vm_map_first_entry(map
);
20948 entry
!= vm_map_to_entry(map
);
20949 entry
= entry
->vme_next
) {
20950 if (entry
->is_sub_map
) {
20953 object
= VME_OBJECT(entry
);
20954 if (object
== NULL
) {
20957 vm_object_lock_shared(object
);
20959 object
->shadow
!= NULL
;
20960 shadows
++, object
= next_object
) {
20961 next_object
= object
->shadow
;
20962 vm_object_lock_shared(next_object
);
20963 vm_object_unlock(object
);
20965 vm_object_unlock(object
);
20966 if (shadows
> shadows_max
) {
20967 shadows_max
= shadows
;
20971 vm_map_unlock_read(map
);
20973 return shadows_max
;
20975 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
20978 vm_commit_pagezero_status(vm_map_t lmap
)
20980 pmap_advise_pagezero_range(lmap
->pmap
, lmap
->min_offset
);
20983 #if XNU_TARGET_OS_OSX
20985 vm_map_set_high_start(
20987 vm_map_offset_t high_start
)
20989 map
->vmmap_high_start
= high_start
;
20991 #endif /* XNU_TARGET_OS_OSX */
20995 * FORKED CORPSE FOOTPRINT
20997 * A forked corpse gets a copy of the original VM map but its pmap is mostly
20998 * empty since it never ran and never got to fault in any pages.
20999 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
21000 * a forked corpse would therefore return very little information.
21002 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
21003 * to vm_map_fork() to collect footprint information from the original VM map
21004 * and its pmap, and store it in the forked corpse's VM map. That information
21005 * is stored in place of the VM map's "hole list" since we'll never need to
21006 * lookup for holes in the corpse's map.
21008 * The corpse's footprint info looks like this:
21010 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
21012 * +---------------------------------------+
21013 * header-> | cf_size |
21014 * +-------------------+-------------------+
21015 * | cf_last_region | cf_last_zeroes |
21016 * +-------------------+-------------------+
21017 * region1-> | cfr_vaddr |
21018 * +-------------------+-------------------+
21019 * | cfr_num_pages | d0 | d1 | d2 | d3 |
21020 * +---------------------------------------+
21021 * | d4 | d5 | ... |
21022 * +---------------------------------------+
21024 * +-------------------+-------------------+
21025 * | dy | dz | na | na | cfr_vaddr... | <-region2
21026 * +-------------------+-------------------+
21027 * | cfr_vaddr (ctd) | cfr_num_pages |
21028 * +---------------------------------------+
21030 * +---------------------------------------+
21032 * +---------------------------------------+
21033 * last region-> | cfr_vaddr |
21034 * +---------------------------------------+
21035 * + cfr_num_pages | d0 | d1 | d2 | d3 |
21036 * +---------------------------------------+
21038 * +---------------------------------------+
21039 * | dx | dy | dz | na | na | na | na | na |
21040 * +---------------------------------------+
21043 * cf_size: total size of the buffer (rounded to page size)
21044 * cf_last_region: offset in the buffer of the last "region" sub-header
21045 * cf_last_zeroes: number of trailing "zero" dispositions at the end
21047 * cfr_vaddr: virtual address of the start of the covered "region"
21048 * cfr_num_pages: number of pages in the covered "region"
21049 * d*: disposition of the page at that virtual address
21050 * Regions in the buffer are word-aligned.
21052 * We estimate the size of the buffer based on the number of memory regions
21053 * and the virtual size of the address space. While copying each memory region
21054 * during vm_map_fork(), we also collect the footprint info for that region
21055 * and store it in the buffer, packing it as much as possible (coalescing
21056 * contiguous memory regions to avoid having too many region headers and
21057 * avoiding long streaks of "zero" page dispositions by splitting footprint
21058 * "regions", so the number of regions in the footprint buffer might not match
21059 * the number of memory regions in the address space.
21061 * We also have to copy the original task's "nonvolatile" ledgers since that's
21062 * part of the footprint and will need to be reported to any tool asking for
21063 * the footprint information of the forked corpse.
21066 uint64_t vm_map_corpse_footprint_count
= 0;
21067 uint64_t vm_map_corpse_footprint_size_avg
= 0;
21068 uint64_t vm_map_corpse_footprint_size_max
= 0;
21069 uint64_t vm_map_corpse_footprint_full
= 0;
21070 uint64_t vm_map_corpse_footprint_no_buf
= 0;
21072 struct vm_map_corpse_footprint_header
{
21073 vm_size_t cf_size
; /* allocated buffer size */
21074 uint32_t cf_last_region
; /* offset of last region in buffer */
21076 uint32_t cfu_last_zeroes
; /* during creation:
21077 * number of "zero" dispositions at
21078 * end of last region */
21079 uint32_t cfu_hint_region
; /* during lookup:
21080 * offset of last looked up region */
21081 #define cf_last_zeroes cfu.cfu_last_zeroes
21082 #define cf_hint_region cfu.cfu_hint_region
21085 typedef uint8_t cf_disp_t
;
21086 struct vm_map_corpse_footprint_region
{
21087 vm_map_offset_t cfr_vaddr
; /* region start virtual address */
21088 uint32_t cfr_num_pages
; /* number of pages in this "region" */
21089 cf_disp_t cfr_disposition
[0]; /* disposition of each page */
21090 } __attribute__((packed
));
21093 vm_page_disposition_to_cf_disp(
21096 assert(sizeof(cf_disp_t
) == 1);
21097 /* relocate bits that don't fit in a "uint8_t" */
21098 if (disposition
& VM_PAGE_QUERY_PAGE_REUSABLE
) {
21099 disposition
|= VM_PAGE_QUERY_PAGE_FICTITIOUS
;
21101 /* cast gets rid of extra bits */
21102 return (cf_disp_t
) disposition
;
21106 vm_page_cf_disp_to_disposition(
21111 assert(sizeof(cf_disp_t
) == 1);
21112 disposition
= (int) cf_disp
;
21113 /* move relocated bits back in place */
21114 if (cf_disp
& VM_PAGE_QUERY_PAGE_FICTITIOUS
) {
21115 disposition
|= VM_PAGE_QUERY_PAGE_REUSABLE
;
21116 disposition
&= ~VM_PAGE_QUERY_PAGE_FICTITIOUS
;
21118 return disposition
;
21122 * vm_map_corpse_footprint_new_region:
21123 * closes the current footprint "region" and creates a new one
21125 * Returns NULL if there's not enough space in the buffer for a new region.
21127 static struct vm_map_corpse_footprint_region
*
21128 vm_map_corpse_footprint_new_region(
21129 struct vm_map_corpse_footprint_header
*footprint_header
)
21131 uintptr_t footprint_edge
;
21132 uint32_t new_region_offset
;
21133 struct vm_map_corpse_footprint_region
*footprint_region
;
21134 struct vm_map_corpse_footprint_region
*new_footprint_region
;
21136 footprint_edge
= ((uintptr_t)footprint_header
+
21137 footprint_header
->cf_size
);
21138 footprint_region
= ((struct vm_map_corpse_footprint_region
*)
21139 ((char *)footprint_header
+
21140 footprint_header
->cf_last_region
));
21141 assert((uintptr_t)footprint_region
+ sizeof(*footprint_region
) <=
21144 /* get rid of trailing zeroes in the last region */
21145 assert(footprint_region
->cfr_num_pages
>=
21146 footprint_header
->cf_last_zeroes
);
21147 footprint_region
->cfr_num_pages
-=
21148 footprint_header
->cf_last_zeroes
;
21149 footprint_header
->cf_last_zeroes
= 0;
21151 /* reuse this region if it's now empty */
21152 if (footprint_region
->cfr_num_pages
== 0) {
21153 return footprint_region
;
21156 /* compute offset of new region */
21157 new_region_offset
= footprint_header
->cf_last_region
;
21158 new_region_offset
+= sizeof(*footprint_region
);
21159 new_region_offset
+= (footprint_region
->cfr_num_pages
* sizeof(cf_disp_t
));
21160 new_region_offset
= roundup(new_region_offset
, sizeof(int));
21162 /* check if we're going over the edge */
21163 if (((uintptr_t)footprint_header
+
21164 new_region_offset
+
21165 sizeof(*footprint_region
)) >=
21167 /* over the edge: no new region */
21171 /* adjust offset of last region in header */
21172 footprint_header
->cf_last_region
= new_region_offset
;
21174 new_footprint_region
= (struct vm_map_corpse_footprint_region
*)
21175 ((char *)footprint_header
+
21176 footprint_header
->cf_last_region
);
21177 new_footprint_region
->cfr_vaddr
= 0;
21178 new_footprint_region
->cfr_num_pages
= 0;
21179 /* caller needs to initialize new region */
21181 return new_footprint_region
;
21185 * vm_map_corpse_footprint_collect:
21186 * collect footprint information for "old_entry" in "old_map" and
21187 * stores it in "new_map"'s vmmap_footprint_info.
21190 vm_map_corpse_footprint_collect(
21192 vm_map_entry_t old_entry
,
21195 vm_map_offset_t va
;
21197 struct vm_map_corpse_footprint_header
*footprint_header
;
21198 struct vm_map_corpse_footprint_region
*footprint_region
;
21199 struct vm_map_corpse_footprint_region
*new_footprint_region
;
21200 cf_disp_t
*next_disp_p
;
21201 uintptr_t footprint_edge
;
21202 uint32_t num_pages_tmp
;
21203 int effective_page_size
;
21205 effective_page_size
= MIN(PAGE_SIZE
, VM_MAP_PAGE_SIZE(old_map
));
21207 va
= old_entry
->vme_start
;
21209 vm_map_lock_assert_exclusive(old_map
);
21210 vm_map_lock_assert_exclusive(new_map
);
21212 assert(new_map
->has_corpse_footprint
);
21213 assert(!old_map
->has_corpse_footprint
);
21214 if (!new_map
->has_corpse_footprint
||
21215 old_map
->has_corpse_footprint
) {
21217 * This can only transfer footprint info from a
21218 * map with a live pmap to a map with a corpse footprint.
21220 return KERN_NOT_SUPPORTED
;
21223 if (new_map
->vmmap_corpse_footprint
== NULL
) {
21225 vm_size_t buf_size
;
21228 buf_size
= (sizeof(*footprint_header
) +
21229 (old_map
->hdr
.nentries
21231 (sizeof(*footprint_region
) +
21232 +3)) /* potential alignment for each region */
21234 ((old_map
->size
/ effective_page_size
)
21236 sizeof(cf_disp_t
))); /* disposition for each page */
21237 // printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
21238 buf_size
= round_page(buf_size
);
21240 /* limit buffer to 1 page to validate overflow detection */
21241 // buf_size = PAGE_SIZE;
21243 /* limit size to a somewhat sane amount */
21244 #if XNU_TARGET_OS_OSX
21245 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
21246 #else /* XNU_TARGET_OS_OSX */
21247 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
21248 #endif /* XNU_TARGET_OS_OSX */
21249 if (buf_size
> VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE
) {
21250 buf_size
= VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE
;
21254 * Allocate the pageable buffer (with a trailing guard page).
21255 * It will be zero-filled on demand.
21257 kr
= kernel_memory_allocate(kernel_map
,
21260 + PAGE_SIZE
), /* trailing guard page */
21262 KMA_PAGEABLE
| KMA_GUARD_LAST
,
21263 VM_KERN_MEMORY_DIAG
);
21264 if (kr
!= KERN_SUCCESS
) {
21265 vm_map_corpse_footprint_no_buf
++;
21269 /* initialize header and 1st region */
21270 footprint_header
= (struct vm_map_corpse_footprint_header
*)buf
;
21271 new_map
->vmmap_corpse_footprint
= footprint_header
;
21273 footprint_header
->cf_size
= buf_size
;
21274 footprint_header
->cf_last_region
=
21275 sizeof(*footprint_header
);
21276 footprint_header
->cf_last_zeroes
= 0;
21278 footprint_region
= (struct vm_map_corpse_footprint_region
*)
21279 ((char *)footprint_header
+
21280 footprint_header
->cf_last_region
);
21281 footprint_region
->cfr_vaddr
= 0;
21282 footprint_region
->cfr_num_pages
= 0;
21284 /* retrieve header and last region */
21285 footprint_header
= (struct vm_map_corpse_footprint_header
*)
21286 new_map
->vmmap_corpse_footprint
;
21287 footprint_region
= (struct vm_map_corpse_footprint_region
*)
21288 ((char *)footprint_header
+
21289 footprint_header
->cf_last_region
);
21291 footprint_edge
= ((uintptr_t)footprint_header
+
21292 footprint_header
->cf_size
);
21294 if ((footprint_region
->cfr_vaddr
+
21295 (((vm_map_offset_t
)footprint_region
->cfr_num_pages
) *
21296 effective_page_size
))
21297 != old_entry
->vme_start
) {
21298 uint64_t num_pages_delta
, num_pages_delta_size
;
21299 uint32_t region_offset_delta_size
;
21302 * Not the next contiguous virtual address:
21303 * start a new region or store "zero" dispositions for
21304 * the missing pages?
21306 /* size of gap in actual page dispositions */
21307 num_pages_delta
= ((old_entry
->vme_start
-
21308 footprint_region
->cfr_vaddr
) / effective_page_size
)
21309 - footprint_region
->cfr_num_pages
;
21310 num_pages_delta_size
= num_pages_delta
* sizeof(cf_disp_t
);
21311 /* size of gap as a new footprint region header */
21312 region_offset_delta_size
=
21313 (sizeof(*footprint_region
) +
21314 roundup(((footprint_region
->cfr_num_pages
-
21315 footprint_header
->cf_last_zeroes
) * sizeof(cf_disp_t
)),
21317 ((footprint_region
->cfr_num_pages
-
21318 footprint_header
->cf_last_zeroes
) * sizeof(cf_disp_t
)));
21319 // printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
21320 if (region_offset_delta_size
< num_pages_delta_size
||
21321 os_add3_overflow(footprint_region
->cfr_num_pages
,
21322 (uint32_t) num_pages_delta
,
21326 * Storing data for this gap would take more space
21327 * than inserting a new footprint region header:
21328 * let's start a new region and save space. If it's a
21329 * tie, let's avoid using a new region, since that
21330 * would require more region hops to find the right
21331 * range during lookups.
21333 * If the current region's cfr_num_pages would overflow
21334 * if we added "zero" page dispositions for the gap,
21335 * no choice but to start a new region.
21337 // printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
21338 new_footprint_region
=
21339 vm_map_corpse_footprint_new_region(footprint_header
);
21340 /* check that we're not going over the edge */
21341 if (new_footprint_region
== NULL
) {
21342 goto over_the_edge
;
21344 footprint_region
= new_footprint_region
;
21345 /* initialize new region as empty */
21346 footprint_region
->cfr_vaddr
= old_entry
->vme_start
;
21347 footprint_region
->cfr_num_pages
= 0;
21350 * Store "zero" page dispositions for the missing
21353 // printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
21354 for (; num_pages_delta
> 0; num_pages_delta
--) {
21355 next_disp_p
= (cf_disp_t
*)
21356 ((uintptr_t) footprint_region
+
21357 sizeof(*footprint_region
));
21358 next_disp_p
+= footprint_region
->cfr_num_pages
;
21359 /* check that we're not going over the edge */
21360 if ((uintptr_t)next_disp_p
>= footprint_edge
) {
21361 goto over_the_edge
;
21363 /* store "zero" disposition for this gap page */
21364 footprint_region
->cfr_num_pages
++;
21365 *next_disp_p
= (cf_disp_t
) 0;
21366 footprint_header
->cf_last_zeroes
++;
21371 for (va
= old_entry
->vme_start
;
21372 va
< old_entry
->vme_end
;
21373 va
+= effective_page_size
) {
21377 vm_map_footprint_query_page_info(old_map
,
21381 cf_disp
= vm_page_disposition_to_cf_disp(disposition
);
21383 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
21385 if (cf_disp
== 0 && footprint_region
->cfr_num_pages
== 0) {
21387 * Ignore "zero" dispositions at start of
21388 * region: just move start of region.
21390 footprint_region
->cfr_vaddr
+= effective_page_size
;
21394 /* would region's cfr_num_pages overflow? */
21395 if (os_add_overflow(footprint_region
->cfr_num_pages
, 1,
21397 /* overflow: create a new region */
21398 new_footprint_region
=
21399 vm_map_corpse_footprint_new_region(
21401 if (new_footprint_region
== NULL
) {
21402 goto over_the_edge
;
21404 footprint_region
= new_footprint_region
;
21405 footprint_region
->cfr_vaddr
= va
;
21406 footprint_region
->cfr_num_pages
= 0;
21409 next_disp_p
= (cf_disp_t
*) ((uintptr_t) footprint_region
+
21410 sizeof(*footprint_region
));
21411 next_disp_p
+= footprint_region
->cfr_num_pages
;
21412 /* check that we're not going over the edge */
21413 if ((uintptr_t)next_disp_p
>= footprint_edge
) {
21414 goto over_the_edge
;
21416 /* store this dispostion */
21417 *next_disp_p
= cf_disp
;
21418 footprint_region
->cfr_num_pages
++;
21420 if (cf_disp
!= 0) {
21421 /* non-zero disp: break the current zero streak */
21422 footprint_header
->cf_last_zeroes
= 0;
21427 /* zero disp: add to the current streak of zeroes */
21428 footprint_header
->cf_last_zeroes
++;
21429 if ((footprint_header
->cf_last_zeroes
+
21430 roundup(((footprint_region
->cfr_num_pages
-
21431 footprint_header
->cf_last_zeroes
) * sizeof(cf_disp_t
)) &
21434 (sizeof(*footprint_header
))) {
21436 * There are not enough trailing "zero" dispositions
21437 * (+ the extra padding we would need for the previous
21438 * region); creating a new region would not save space
21439 * at this point, so let's keep this "zero" disposition
21440 * in this region and reconsider later.
21445 * Create a new region to avoid having too many consecutive
21446 * "zero" dispositions.
21448 new_footprint_region
=
21449 vm_map_corpse_footprint_new_region(footprint_header
);
21450 if (new_footprint_region
== NULL
) {
21451 goto over_the_edge
;
21453 footprint_region
= new_footprint_region
;
21454 /* initialize the new region as empty ... */
21455 footprint_region
->cfr_num_pages
= 0;
21456 /* ... and skip this "zero" disp */
21457 footprint_region
->cfr_vaddr
= va
+ effective_page_size
;
21460 return KERN_SUCCESS
;
21463 // printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
21464 vm_map_corpse_footprint_full
++;
21465 return KERN_RESOURCE_SHORTAGE
;
21469 * vm_map_corpse_footprint_collect_done:
21470 * completes the footprint collection by getting rid of any remaining
21471 * trailing "zero" dispositions and trimming the unused part of the
21475 vm_map_corpse_footprint_collect_done(
21478 struct vm_map_corpse_footprint_header
*footprint_header
;
21479 struct vm_map_corpse_footprint_region
*footprint_region
;
21480 vm_size_t buf_size
, actual_size
;
21483 assert(new_map
->has_corpse_footprint
);
21484 if (!new_map
->has_corpse_footprint
||
21485 new_map
->vmmap_corpse_footprint
== NULL
) {
21489 footprint_header
= (struct vm_map_corpse_footprint_header
*)
21490 new_map
->vmmap_corpse_footprint
;
21491 buf_size
= footprint_header
->cf_size
;
21493 footprint_region
= (struct vm_map_corpse_footprint_region
*)
21494 ((char *)footprint_header
+
21495 footprint_header
->cf_last_region
);
21497 /* get rid of trailing zeroes in last region */
21498 assert(footprint_region
->cfr_num_pages
>= footprint_header
->cf_last_zeroes
);
21499 footprint_region
->cfr_num_pages
-= footprint_header
->cf_last_zeroes
;
21500 footprint_header
->cf_last_zeroes
= 0;
21502 actual_size
= (vm_size_t
)(footprint_header
->cf_last_region
+
21503 sizeof(*footprint_region
) +
21504 (footprint_region
->cfr_num_pages
* sizeof(cf_disp_t
)));
21506 // printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
21507 vm_map_corpse_footprint_size_avg
=
21508 (((vm_map_corpse_footprint_size_avg
*
21509 vm_map_corpse_footprint_count
) +
21511 (vm_map_corpse_footprint_count
+ 1));
21512 vm_map_corpse_footprint_count
++;
21513 if (actual_size
> vm_map_corpse_footprint_size_max
) {
21514 vm_map_corpse_footprint_size_max
= actual_size
;
21517 actual_size
= round_page(actual_size
);
21518 if (buf_size
> actual_size
) {
21519 kr
= vm_deallocate(kernel_map
,
21520 ((vm_address_t
)footprint_header
+
21522 PAGE_SIZE
), /* trailing guard page */
21523 (buf_size
- actual_size
));
21524 assertf(kr
== KERN_SUCCESS
,
21525 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21527 (uint64_t) buf_size
,
21528 (uint64_t) actual_size
,
21530 kr
= vm_protect(kernel_map
,
21531 ((vm_address_t
)footprint_header
+
21534 FALSE
, /* set_maximum */
21536 assertf(kr
== KERN_SUCCESS
,
21537 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21539 (uint64_t) buf_size
,
21540 (uint64_t) actual_size
,
21544 footprint_header
->cf_size
= actual_size
;
21548 * vm_map_corpse_footprint_query_page_info:
21549 * retrieves the disposition of the page at virtual address "vaddr"
21550 * in the forked corpse's VM map
21552 * This is the equivalent of vm_map_footprint_query_page_info() for a forked corpse.
21555 vm_map_corpse_footprint_query_page_info(
21557 vm_map_offset_t va
,
21558 int *disposition_p
)
21560 struct vm_map_corpse_footprint_header
*footprint_header
;
21561 struct vm_map_corpse_footprint_region
*footprint_region
;
21562 uint32_t footprint_region_offset
;
21563 vm_map_offset_t region_start
, region_end
;
21566 int effective_page_size
;
21569 if (!map
->has_corpse_footprint
) {
21570 *disposition_p
= 0;
21571 kr
= KERN_INVALID_ARGUMENT
;
21575 footprint_header
= map
->vmmap_corpse_footprint
;
21576 if (footprint_header
== NULL
) {
21577 *disposition_p
= 0;
21578 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21579 kr
= KERN_INVALID_ARGUMENT
;
21583 /* start looking at the hint ("cf_hint_region") */
21584 footprint_region_offset
= footprint_header
->cf_hint_region
;
21586 effective_page_size
= MIN(PAGE_SIZE
, VM_MAP_PAGE_SIZE(map
));
21589 if (footprint_region_offset
< sizeof(*footprint_header
)) {
21590 /* hint too low: start from 1st region */
21591 footprint_region_offset
= sizeof(*footprint_header
);
21593 if (footprint_region_offset
>= footprint_header
->cf_last_region
) {
21594 /* hint too high: re-start from 1st region */
21595 footprint_region_offset
= sizeof(*footprint_header
);
21597 footprint_region
= (struct vm_map_corpse_footprint_region
*)
21598 ((char *)footprint_header
+ footprint_region_offset
);
21599 region_start
= footprint_region
->cfr_vaddr
;
21600 region_end
= (region_start
+
21601 ((vm_map_offset_t
)(footprint_region
->cfr_num_pages
) *
21602 effective_page_size
));
21603 if (va
< region_start
&&
21604 footprint_region_offset
!= sizeof(*footprint_header
)) {
21605 /* our range starts before the hint region */
21607 /* reset the hint (in a racy way...) */
21608 footprint_header
->cf_hint_region
= sizeof(*footprint_header
);
21609 /* lookup "va" again from 1st region */
21610 footprint_region_offset
= sizeof(*footprint_header
);
21614 while (va
>= region_end
) {
21615 if (footprint_region_offset
>= footprint_header
->cf_last_region
) {
21618 /* skip the region's header */
21619 footprint_region_offset
+= sizeof(*footprint_region
);
21620 /* skip the region's page dispositions */
21621 footprint_region_offset
+= (footprint_region
->cfr_num_pages
* sizeof(cf_disp_t
));
21622 /* align to next word boundary */
21623 footprint_region_offset
=
21624 roundup(footprint_region_offset
,
21626 footprint_region
= (struct vm_map_corpse_footprint_region
*)
21627 ((char *)footprint_header
+ footprint_region_offset
);
21628 region_start
= footprint_region
->cfr_vaddr
;
21629 region_end
= (region_start
+
21630 ((vm_map_offset_t
)(footprint_region
->cfr_num_pages
) *
21631 effective_page_size
));
21633 if (va
< region_start
|| va
>= region_end
) {
21634 /* page not found */
21635 *disposition_p
= 0;
21636 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21641 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
21642 footprint_header
->cf_hint_region
= footprint_region_offset
;
21644 /* get page disposition for "va" in this region */
21645 disp_idx
= (int) ((va
- footprint_region
->cfr_vaddr
) / effective_page_size
);
21646 cf_disp
= footprint_region
->cfr_disposition
[disp_idx
];
21647 *disposition_p
= vm_page_cf_disp_to_disposition(cf_disp
);
21650 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21651 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
21652 DTRACE_VM4(footprint_query_page_info
,
21654 vm_map_offset_t
, va
,
21655 int, *disposition_p
,
21656 kern_return_t
, kr
);
21662 vm_map_corpse_footprint_destroy(
21665 if (map
->has_corpse_footprint
&&
21666 map
->vmmap_corpse_footprint
!= 0) {
21667 struct vm_map_corpse_footprint_header
*footprint_header
;
21668 vm_size_t buf_size
;
21671 footprint_header
= map
->vmmap_corpse_footprint
;
21672 buf_size
= footprint_header
->cf_size
;
21673 kr
= vm_deallocate(kernel_map
,
21674 (vm_offset_t
) map
->vmmap_corpse_footprint
,
21675 ((vm_size_t
) buf_size
21676 + PAGE_SIZE
)); /* trailing guard page */
21677 assertf(kr
== KERN_SUCCESS
, "kr=0x%x\n", kr
);
21678 map
->vmmap_corpse_footprint
= 0;
21679 map
->has_corpse_footprint
= FALSE
;
21684 * vm_map_copy_footprint_ledgers:
21685 * copies any ledger that's relevant to the memory footprint of "old_task"
21686 * into the forked corpse's task ("new_task")
21689 vm_map_copy_footprint_ledgers(
21693 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.phys_footprint
);
21694 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.purgeable_nonvolatile
);
21695 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.purgeable_nonvolatile_compressed
);
21696 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.internal
);
21697 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.internal_compressed
);
21698 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.iokit_mapped
);
21699 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.alternate_accounting
);
21700 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.alternate_accounting_compressed
);
21701 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.page_table
);
21702 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.tagged_footprint
);
21703 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.tagged_footprint_compressed
);
21704 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.network_nonvolatile
);
21705 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.network_nonvolatile_compressed
);
21706 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.media_footprint
);
21707 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.media_footprint_compressed
);
21708 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.graphics_footprint
);
21709 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.graphics_footprint_compressed
);
21710 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.neural_footprint
);
21711 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.neural_footprint_compressed
);
21712 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.wired_mem
);
21716 * vm_map_copy_ledger:
21717 * copy a single ledger from "old_task" to "new_task"
21720 vm_map_copy_ledger(
21725 ledger_amount_t old_balance
, new_balance
, delta
;
21727 assert(new_task
->map
->has_corpse_footprint
);
21728 if (!new_task
->map
->has_corpse_footprint
) {
21732 /* turn off sanity checks for the ledger we're about to mess with */
21733 ledger_disable_panic_on_negative(new_task
->ledger
,
21736 /* adjust "new_task" to match "old_task" */
21737 ledger_get_balance(old_task
->ledger
,
21740 ledger_get_balance(new_task
->ledger
,
21743 if (new_balance
== old_balance
) {
21744 /* new == old: done */
21745 } else if (new_balance
> old_balance
) {
21746 /* new > old ==> new -= new - old */
21747 delta
= new_balance
- old_balance
;
21748 ledger_debit(new_task
->ledger
,
21752 /* new < old ==> new += old - new */
21753 delta
= old_balance
- new_balance
;
21754 ledger_credit(new_task
->ledger
,
21762 extern int pmap_ledgers_panic
;
21763 extern int pmap_ledgers_panic_leeway
;
21765 #define LEDGER_DRIFT(__LEDGER) \
21766 int __LEDGER##_over; \
21767 ledger_amount_t __LEDGER##_over_total; \
21768 ledger_amount_t __LEDGER##_over_max; \
21769 int __LEDGER##_under; \
21770 ledger_amount_t __LEDGER##_under_total; \
21771 ledger_amount_t __LEDGER##_under_max
21774 uint64_t num_pmaps_checked
;
21776 LEDGER_DRIFT(phys_footprint
);
21777 LEDGER_DRIFT(internal
);
21778 LEDGER_DRIFT(internal_compressed
);
21779 LEDGER_DRIFT(iokit_mapped
);
21780 LEDGER_DRIFT(alternate_accounting
);
21781 LEDGER_DRIFT(alternate_accounting_compressed
);
21782 LEDGER_DRIFT(page_table
);
21783 LEDGER_DRIFT(purgeable_volatile
);
21784 LEDGER_DRIFT(purgeable_nonvolatile
);
21785 LEDGER_DRIFT(purgeable_volatile_compressed
);
21786 LEDGER_DRIFT(purgeable_nonvolatile_compressed
);
21787 LEDGER_DRIFT(tagged_nofootprint
);
21788 LEDGER_DRIFT(tagged_footprint
);
21789 LEDGER_DRIFT(tagged_nofootprint_compressed
);
21790 LEDGER_DRIFT(tagged_footprint_compressed
);
21791 LEDGER_DRIFT(network_volatile
);
21792 LEDGER_DRIFT(network_nonvolatile
);
21793 LEDGER_DRIFT(network_volatile_compressed
);
21794 LEDGER_DRIFT(network_nonvolatile_compressed
);
21795 LEDGER_DRIFT(media_nofootprint
);
21796 LEDGER_DRIFT(media_footprint
);
21797 LEDGER_DRIFT(media_nofootprint_compressed
);
21798 LEDGER_DRIFT(media_footprint_compressed
);
21799 LEDGER_DRIFT(graphics_nofootprint
);
21800 LEDGER_DRIFT(graphics_footprint
);
21801 LEDGER_DRIFT(graphics_nofootprint_compressed
);
21802 LEDGER_DRIFT(graphics_footprint_compressed
);
21803 LEDGER_DRIFT(neural_nofootprint
);
21804 LEDGER_DRIFT(neural_footprint
);
21805 LEDGER_DRIFT(neural_nofootprint_compressed
);
21806 LEDGER_DRIFT(neural_footprint_compressed
);
21807 } pmap_ledgers_drift
;
21810 vm_map_pmap_check_ledgers(
21816 ledger_amount_t bal
;
21817 boolean_t do_panic
;
21821 pmap_ledgers_drift
.num_pmaps_checked
++;
21823 #define LEDGER_CHECK_BALANCE(__LEDGER) \
21825 int panic_on_negative = TRUE; \
21826 ledger_get_balance(ledger, \
21827 task_ledgers.__LEDGER, \
21829 ledger_get_panic_on_negative(ledger, \
21830 task_ledgers.__LEDGER, \
21831 &panic_on_negative); \
21833 if (panic_on_negative || \
21834 (pmap_ledgers_panic && \
21835 pmap_ledgers_panic_leeway > 0 && \
21836 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
21837 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
21840 printf("LEDGER BALANCE proc %d (%s) " \
21841 "\"%s\" = %lld\n", \
21842 pid, procname, #__LEDGER, bal); \
21844 pmap_ledgers_drift.__LEDGER##_over++; \
21845 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
21846 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
21847 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
21849 } else if (bal < 0) { \
21850 pmap_ledgers_drift.__LEDGER##_under++; \
21851 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
21852 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
21853 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
21859 LEDGER_CHECK_BALANCE(phys_footprint
);
21860 LEDGER_CHECK_BALANCE(internal
);
21861 LEDGER_CHECK_BALANCE(internal_compressed
);
21862 LEDGER_CHECK_BALANCE(iokit_mapped
);
21863 LEDGER_CHECK_BALANCE(alternate_accounting
);
21864 LEDGER_CHECK_BALANCE(alternate_accounting_compressed
);
21865 LEDGER_CHECK_BALANCE(page_table
);
21866 LEDGER_CHECK_BALANCE(purgeable_volatile
);
21867 LEDGER_CHECK_BALANCE(purgeable_nonvolatile
);
21868 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed
);
21869 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed
);
21870 LEDGER_CHECK_BALANCE(tagged_nofootprint
);
21871 LEDGER_CHECK_BALANCE(tagged_footprint
);
21872 LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed
);
21873 LEDGER_CHECK_BALANCE(tagged_footprint_compressed
);
21874 LEDGER_CHECK_BALANCE(network_volatile
);
21875 LEDGER_CHECK_BALANCE(network_nonvolatile
);
21876 LEDGER_CHECK_BALANCE(network_volatile_compressed
);
21877 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed
);
21878 LEDGER_CHECK_BALANCE(media_nofootprint
);
21879 LEDGER_CHECK_BALANCE(media_footprint
);
21880 LEDGER_CHECK_BALANCE(media_nofootprint_compressed
);
21881 LEDGER_CHECK_BALANCE(media_footprint_compressed
);
21882 LEDGER_CHECK_BALANCE(graphics_nofootprint
);
21883 LEDGER_CHECK_BALANCE(graphics_footprint
);
21884 LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed
);
21885 LEDGER_CHECK_BALANCE(graphics_footprint_compressed
);
21886 LEDGER_CHECK_BALANCE(neural_nofootprint
);
21887 LEDGER_CHECK_BALANCE(neural_footprint
);
21888 LEDGER_CHECK_BALANCE(neural_nofootprint_compressed
);
21889 LEDGER_CHECK_BALANCE(neural_footprint_compressed
);
21892 if (pmap_ledgers_panic
) {
21893 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
21894 pmap
, pid
, procname
);
21896 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
21897 pmap
, pid
, procname
);
21901 #endif /* MACH_ASSERT */