2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
63 * Virtual memory mapping module.
66 #include <task_swapper.h>
67 #include <mach_assert.h>
69 #include <vm/vm_options.h>
71 #include <libkern/OSAtomic.h>
73 #include <mach/kern_return.h>
74 #include <mach/port.h>
75 #include <mach/vm_attributes.h>
76 #include <mach/vm_param.h>
77 #include <mach/vm_behavior.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/memory_object.h>
80 #include <mach/mach_vm.h>
81 #include <machine/cpu_capabilities.h>
84 #include <kern/assert.h>
85 #include <kern/backtrace.h>
86 #include <kern/counters.h>
87 #include <kern/exc_guard.h>
88 #include <kern/kalloc.h>
89 #include <kern/zalloc.h>
92 #include <vm/vm_compressor.h>
93 #include <vm/vm_compressor_pager.h>
94 #include <vm/vm_init.h>
95 #include <vm/vm_fault.h>
96 #include <vm/vm_map.h>
97 #include <vm/vm_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
101 #include <vm/vm_kern.h>
102 #include <ipc/ipc_port.h>
103 #include <kern/sched_prim.h>
104 #include <kern/misc_protos.h>
106 #include <mach/vm_map_server.h>
107 #include <mach/mach_host_server.h>
108 #include <vm/vm_protos.h>
109 #include <vm/vm_purgeable_internal.h>
111 #include <vm/vm_protos.h>
112 #include <vm/vm_shared_region.h>
113 #include <vm/vm_map_store.h>
115 #include <san/kasan.h>
117 #include <sys/codesign.h>
118 #include <libkern/section_keywords.h>
119 #if DEVELOPMENT || DEBUG
120 extern int proc_selfcsflags(void);
122 extern int panic_on_unsigned_execute
;
123 #endif /* CONFIG_EMBEDDED */
124 #endif /* DEVELOPMENT || DEBUG */
127 extern const int fourk_binary_compatibility_unsafe
;
128 extern const int fourk_binary_compatibility_allow_wx
;
129 #endif /* __arm64__ */
130 extern int proc_selfpid(void);
131 extern char *proc_name_address(void *p
);
133 #if VM_MAP_DEBUG_APPLE_PROTECT
134 int vm_map_debug_apple_protect
= 0;
135 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
136 #if VM_MAP_DEBUG_FOURK
137 int vm_map_debug_fourk
= 0;
138 #endif /* VM_MAP_DEBUG_FOURK */
140 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable
= 1;
141 int vm_map_executable_immutable_verbose
= 0;
143 os_refgrp_decl(static, map_refgrp
, "vm_map", NULL
);
145 extern u_int32_t
random(void); /* from <libkern/libkern.h> */
146 /* Internal prototypes
149 static void vm_map_simplify_range(
151 vm_map_offset_t start
,
152 vm_map_offset_t end
); /* forward */
154 static boolean_t
vm_map_range_check(
156 vm_map_offset_t start
,
158 vm_map_entry_t
*entry
);
160 static vm_map_entry_t
_vm_map_entry_create(
161 struct vm_map_header
*map_header
, boolean_t map_locked
);
163 static void _vm_map_entry_dispose(
164 struct vm_map_header
*map_header
,
165 vm_map_entry_t entry
);
167 static void vm_map_pmap_enter(
169 vm_map_offset_t addr
,
170 vm_map_offset_t end_addr
,
172 vm_object_offset_t offset
,
173 vm_prot_t protection
);
175 static void _vm_map_clip_end(
176 struct vm_map_header
*map_header
,
177 vm_map_entry_t entry
,
178 vm_map_offset_t end
);
180 static void _vm_map_clip_start(
181 struct vm_map_header
*map_header
,
182 vm_map_entry_t entry
,
183 vm_map_offset_t start
);
185 static void vm_map_entry_delete(
187 vm_map_entry_t entry
);
189 static kern_return_t
vm_map_delete(
191 vm_map_offset_t start
,
196 static void vm_map_copy_insert(
198 vm_map_entry_t after_where
,
201 static kern_return_t
vm_map_copy_overwrite_unaligned(
203 vm_map_entry_t entry
,
205 vm_map_address_t start
,
206 boolean_t discard_on_success
);
208 static kern_return_t
vm_map_copy_overwrite_aligned(
210 vm_map_entry_t tmp_entry
,
212 vm_map_offset_t start
,
215 static kern_return_t
vm_map_copyin_kernel_buffer(
217 vm_map_address_t src_addr
,
219 boolean_t src_destroy
,
220 vm_map_copy_t
*copy_result
); /* OUT */
222 static kern_return_t
vm_map_copyout_kernel_buffer(
224 vm_map_address_t
*addr
, /* IN/OUT */
226 vm_map_size_t copy_size
,
228 boolean_t consume_on_success
);
230 static void vm_map_fork_share(
232 vm_map_entry_t old_entry
,
235 static boolean_t
vm_map_fork_copy(
237 vm_map_entry_t
*old_entry_p
,
239 int vm_map_copyin_flags
);
241 static kern_return_t
vm_map_wire_nested(
243 vm_map_offset_t start
,
245 vm_prot_t caller_prot
,
249 vm_map_offset_t pmap_addr
,
250 ppnum_t
*physpage_p
);
252 static kern_return_t
vm_map_unwire_nested(
254 vm_map_offset_t start
,
258 vm_map_offset_t pmap_addr
);
260 static kern_return_t
vm_map_overwrite_submap_recurse(
262 vm_map_offset_t dst_addr
,
263 vm_map_size_t dst_size
);
265 static kern_return_t
vm_map_copy_overwrite_nested(
267 vm_map_offset_t dst_addr
,
269 boolean_t interruptible
,
271 boolean_t discard_on_success
);
273 static kern_return_t
vm_map_remap_extract(
275 vm_map_offset_t addr
,
278 struct vm_map_header
*map_header
,
279 vm_prot_t
*cur_protection
,
280 vm_prot_t
*max_protection
,
281 vm_inherit_t inheritance
,
284 vm_map_kernel_flags_t vmk_flags
);
286 static kern_return_t
vm_map_remap_range_allocate(
288 vm_map_address_t
*address
,
290 vm_map_offset_t mask
,
292 vm_map_kernel_flags_t vmk_flags
,
294 vm_map_entry_t
*map_entry
);
296 static void vm_map_region_look_for_page(
300 vm_object_offset_t offset
,
303 vm_region_extended_info_t extended
,
304 mach_msg_type_number_t count
);
306 static int vm_map_region_count_obj_refs(
307 vm_map_entry_t entry
,
311 static kern_return_t
vm_map_willneed(
313 vm_map_offset_t start
,
314 vm_map_offset_t end
);
316 static kern_return_t
vm_map_reuse_pages(
318 vm_map_offset_t start
,
319 vm_map_offset_t end
);
321 static kern_return_t
vm_map_reusable_pages(
323 vm_map_offset_t start
,
324 vm_map_offset_t end
);
326 static kern_return_t
vm_map_can_reuse(
328 vm_map_offset_t start
,
329 vm_map_offset_t end
);
332 static kern_return_t
vm_map_pageout(
334 vm_map_offset_t start
,
335 vm_map_offset_t end
);
336 #endif /* MACH_ASSERT */
338 static void vm_map_corpse_footprint_destroy(
341 pid_t
find_largest_process_vm_map_entries(void);
344 * Macros to copy a vm_map_entry. We must be careful to correctly
345 * manage the wired page count. vm_map_entry_copy() creates a new
346 * map entry to the same memory - the wired count in the new entry
347 * must be set to zero. vm_map_entry_copy_full() creates a new
348 * entry that is identical to the old entry. This preserves the
349 * wire count; it's used for map splitting and zone changing in
356 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
357 * But for security reasons on embedded platforms, we don't want the
358 * new mapping to be "used for jit", so we always reset the flag here.
359 * Same for "pmap_cs_associated".
361 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD) \
363 (NEW)->used_for_jit = FALSE; \
364 (NEW)->pmap_cs_associated = FALSE; \
367 #else /* CONFIG_EMBEDDED */
370 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
371 * On macOS, the new mapping can be "used for jit".
373 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD) \
375 assert((NEW)->used_for_jit == (OLD)->used_for_jit); \
376 assert((NEW)->pmap_cs_associated == FALSE); \
379 #endif /* CONFIG_EMBEDDED */
381 #define vm_map_entry_copy(NEW, OLD) \
383 boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
385 (NEW)->is_shared = FALSE; \
386 (NEW)->needs_wakeup = FALSE; \
387 (NEW)->in_transition = FALSE; \
388 (NEW)->wired_count = 0; \
389 (NEW)->user_wired_count = 0; \
390 (NEW)->permanent = FALSE; \
391 VM_MAP_ENTRY_COPY_CODE_SIGNING((NEW),(OLD)); \
392 (NEW)->from_reserved_zone = _vmec_reserved; \
393 if ((NEW)->iokit_acct) { \
394 assertf(!(NEW)->use_pmap, "old %p new %p\n", (OLD), (NEW)); \
395 (NEW)->iokit_acct = FALSE; \
396 (NEW)->use_pmap = TRUE; \
398 (NEW)->vme_resilient_codesign = FALSE; \
399 (NEW)->vme_resilient_media = FALSE; \
400 (NEW)->vme_atomic = FALSE; \
401 (NEW)->vme_no_copy_on_read = FALSE; \
404 #define vm_map_entry_copy_full(NEW, OLD) \
406 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
408 (NEW)->from_reserved_zone = _vmecf_reserved; \
412 * Normal lock_read_to_write() returns FALSE/0 on failure.
413 * These functions evaluate to zero on success and non-zero value on failure.
415 __attribute__((always_inline
))
417 vm_map_lock_read_to_write(vm_map_t map
)
419 if (lck_rw_lock_shared_to_exclusive(&(map
)->lock
)) {
420 DTRACE_VM(vm_map_lock_upgrade
);
426 __attribute__((always_inline
))
428 vm_map_try_lock(vm_map_t map
)
430 if (lck_rw_try_lock_exclusive(&(map
)->lock
)) {
431 DTRACE_VM(vm_map_lock_w
);
437 __attribute__((always_inline
))
439 vm_map_try_lock_read(vm_map_t map
)
441 if (lck_rw_try_lock_shared(&(map
)->lock
)) {
442 DTRACE_VM(vm_map_lock_r
);
449 * Decide if we want to allow processes to execute from their data or stack areas.
450 * override_nx() returns true if we do. Data/stack execution can be enabled independently
451 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
452 * or allow_stack_exec to enable data execution for that type of data area for that particular
453 * ABI (or both by or'ing the flags together). These are initialized in the architecture
454 * specific pmap files since the default behavior varies according to architecture. The
455 * main reason it varies is because of the need to provide binary compatibility with old
456 * applications that were written before these restrictions came into being. In the old
457 * days, an app could execute anything it could read, but this has slowly been tightened
458 * up over time. The default behavior is:
460 * 32-bit PPC apps may execute from both stack and data areas
461 * 32-bit Intel apps may exeucte from data areas but not stack
462 * 64-bit PPC/Intel apps may not execute from either data or stack
464 * An application on any architecture may override these defaults by explicitly
465 * adding PROT_EXEC permission to the page in question with the mprotect(2)
466 * system call. This code here just determines what happens when an app tries to
467 * execute from a page that lacks execute permission.
469 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
470 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
471 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
472 * execution from data areas for a particular binary even if the arch normally permits it. As
473 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
474 * to support some complicated use cases, notably browsers with out-of-process plugins that
475 * are not all NX-safe.
478 extern int allow_data_exec
, allow_stack_exec
;
481 override_nx(vm_map_t map
, uint32_t user_tag
) /* map unused on arm */
485 if (map
->pmap
== kernel_pmap
) {
490 * Determine if the app is running in 32 or 64 bit mode.
493 if (vm_map_is_64bit(map
)) {
494 current_abi
= VM_ABI_64
;
496 current_abi
= VM_ABI_32
;
500 * Determine if we should allow the execution based on whether it's a
501 * stack or data area and the current architecture.
504 if (user_tag
== VM_MEMORY_STACK
) {
505 return allow_stack_exec
& current_abi
;
508 return (allow_data_exec
& current_abi
) && (map
->map_disallow_data_exec
== FALSE
);
513 * Virtual memory maps provide for the mapping, protection,
514 * and sharing of virtual memory objects. In addition,
515 * this module provides for an efficient virtual copy of
516 * memory from one map to another.
518 * Synchronization is required prior to most operations.
520 * Maps consist of an ordered doubly-linked list of simple
521 * entries; a single hint is used to speed up lookups.
523 * Sharing maps have been deleted from this version of Mach.
524 * All shared objects are now mapped directly into the respective
525 * maps. This requires a change in the copy on write strategy;
526 * the asymmetric (delayed) strategy is used for shared temporary
527 * objects instead of the symmetric (shadow) strategy. All maps
528 * are now "top level" maps (either task map, kernel map or submap
529 * of the kernel map).
531 * Since portions of maps are specified by start/end addreses,
532 * which may not align with existing map entries, all
533 * routines merely "clip" entries to these start/end values.
534 * [That is, an entry is split into two, bordering at a
535 * start or end value.] Note that these clippings may not
536 * always be necessary (as the two resulting entries are then
537 * not changed); however, the clipping is done for convenience.
538 * No attempt is currently made to "glue back together" two
541 * The symmetric (shadow) copy strategy implements virtual copy
542 * by copying VM object references from one map to
543 * another, and then marking both regions as copy-on-write.
544 * It is important to note that only one writeable reference
545 * to a VM object region exists in any map when this strategy
546 * is used -- this means that shadow object creation can be
547 * delayed until a write operation occurs. The symmetric (delayed)
548 * strategy allows multiple maps to have writeable references to
549 * the same region of a vm object, and hence cannot delay creating
550 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
551 * Copying of permanent objects is completely different; see
552 * vm_object_copy_strategically() in vm_object.c.
555 static zone_t vm_map_zone
; /* zone for vm_map structures */
556 zone_t vm_map_entry_zone
; /* zone for vm_map_entry structures */
557 static zone_t vm_map_entry_reserved_zone
; /* zone with reserve for non-blocking allocations */
558 static zone_t vm_map_copy_zone
; /* zone for vm_map_copy structures */
559 zone_t vm_map_holes_zone
; /* zone for vm map holes (vm_map_links) structures */
563 * Placeholder object for submap operations. This object is dropped
564 * into the range by a call to vm_map_find, and removed when
565 * vm_map_submap creates the submap.
568 vm_object_t vm_submap_object
;
570 static void *map_data
;
571 static vm_size_t map_data_size
;
572 static void *kentry_data
;
573 static vm_size_t kentry_data_size
;
574 static void *map_holes_data
;
575 static vm_size_t map_holes_data_size
;
578 #define NO_COALESCE_LIMIT 0
580 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
583 /* Skip acquiring locks if we're in the midst of a kernel core dump */
584 unsigned int not_in_kdp
= 1;
586 unsigned int vm_map_set_cache_attr_count
= 0;
589 vm_map_set_cache_attr(
593 vm_map_entry_t map_entry
;
595 kern_return_t kr
= KERN_SUCCESS
;
597 vm_map_lock_read(map
);
599 if (!vm_map_lookup_entry(map
, va
, &map_entry
) ||
600 map_entry
->is_sub_map
) {
602 * that memory is not properly mapped
604 kr
= KERN_INVALID_ARGUMENT
;
607 object
= VME_OBJECT(map_entry
);
609 if (object
== VM_OBJECT_NULL
) {
611 * there should be a VM object here at this point
613 kr
= KERN_INVALID_ARGUMENT
;
616 vm_object_lock(object
);
617 object
->set_cache_attr
= TRUE
;
618 vm_object_unlock(object
);
620 vm_map_set_cache_attr_count
++;
622 vm_map_unlock_read(map
);
628 #if CONFIG_CODE_DECRYPTION
630 * vm_map_apple_protected:
631 * This remaps the requested part of the object with an object backed by
632 * the decrypting pager.
633 * crypt_info contains entry points and session data for the crypt module.
634 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
635 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
638 vm_map_apple_protected(
640 vm_map_offset_t start
,
642 vm_object_offset_t crypto_backing_offset
,
643 struct pager_crypt_info
*crypt_info
)
645 boolean_t map_locked
;
647 vm_map_entry_t map_entry
;
648 struct vm_map_entry tmp_entry
;
649 memory_object_t unprotected_mem_obj
;
650 vm_object_t protected_object
;
651 vm_map_offset_t map_addr
;
652 vm_map_offset_t start_aligned
, end_aligned
;
653 vm_object_offset_t crypto_start
, crypto_end
;
655 vm_map_kernel_flags_t vmk_flags
;
658 vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
661 unprotected_mem_obj
= MEMORY_OBJECT_NULL
;
663 start_aligned
= vm_map_trunc_page(start
, PAGE_MASK_64
);
664 end_aligned
= vm_map_round_page(end
, PAGE_MASK_64
);
665 start_aligned
= vm_map_trunc_page(start_aligned
, VM_MAP_PAGE_MASK(map
));
666 end_aligned
= vm_map_round_page(end_aligned
, VM_MAP_PAGE_MASK(map
));
670 * "start" and "end" might be 4K-aligned but not 16K-aligned,
671 * so we might have to loop and establish up to 3 mappings:
673 * + the first 16K-page, which might overlap with the previous
674 * 4K-aligned mapping,
676 * + the last 16K-page, which might overlap with the next
677 * 4K-aligned mapping.
678 * Each of these mapping might be backed by a vnode pager (if
679 * properly page-aligned) or a "fourk_pager", itself backed by a
680 * vnode pager (if 4K-aligned but not page-aligned).
682 #endif /* __arm64__ */
684 map_addr
= start_aligned
;
685 for (map_addr
= start_aligned
;
687 map_addr
= tmp_entry
.vme_end
) {
691 /* lookup the protected VM object */
692 if (!vm_map_lookup_entry(map
,
695 map_entry
->is_sub_map
||
696 VME_OBJECT(map_entry
) == VM_OBJECT_NULL
||
697 !(map_entry
->protection
& VM_PROT_EXECUTE
)) {
698 /* that memory is not properly mapped */
699 kr
= KERN_INVALID_ARGUMENT
;
703 /* get the protected object to be decrypted */
704 protected_object
= VME_OBJECT(map_entry
);
705 if (protected_object
== VM_OBJECT_NULL
) {
706 /* there should be a VM object here at this point */
707 kr
= KERN_INVALID_ARGUMENT
;
710 /* ensure protected object stays alive while map is unlocked */
711 vm_object_reference(protected_object
);
713 /* limit the map entry to the area we want to cover */
714 vm_map_clip_start(map
, map_entry
, start_aligned
);
715 vm_map_clip_end(map
, map_entry
, end_aligned
);
717 tmp_entry
= *map_entry
;
718 map_entry
= VM_MAP_ENTRY_NULL
; /* not valid after unlocking map */
723 * This map entry might be only partially encrypted
724 * (if not fully "page-aligned").
727 crypto_end
= tmp_entry
.vme_end
- tmp_entry
.vme_start
;
728 if (tmp_entry
.vme_start
< start
) {
729 if (tmp_entry
.vme_start
!= start_aligned
) {
730 kr
= KERN_INVALID_ADDRESS
;
732 crypto_start
+= (start
- tmp_entry
.vme_start
);
734 if (tmp_entry
.vme_end
> end
) {
735 if (tmp_entry
.vme_end
!= end_aligned
) {
736 kr
= KERN_INVALID_ADDRESS
;
738 crypto_end
-= (tmp_entry
.vme_end
- end
);
742 * This "extra backing offset" is needed to get the decryption
743 * routine to use the right key. It adjusts for the possibly
744 * relative offset of an interposed "4K" pager...
746 if (crypto_backing_offset
== (vm_object_offset_t
) -1) {
747 crypto_backing_offset
= VME_OFFSET(&tmp_entry
);
751 * Lookup (and create if necessary) the protected memory object
752 * matching that VM object.
753 * If successful, this also grabs a reference on the memory object,
754 * to guarantee that it doesn't go away before we get a chance to map
757 unprotected_mem_obj
= apple_protect_pager_setup(
759 VME_OFFSET(&tmp_entry
),
760 crypto_backing_offset
,
765 /* release extra ref on protected object */
766 vm_object_deallocate(protected_object
);
768 if (unprotected_mem_obj
== NULL
) {
773 vm_flags
= VM_FLAGS_FIXED
| VM_FLAGS_OVERWRITE
;
774 /* can overwrite an immutable mapping */
775 vmk_flags
.vmkf_overwrite_immutable
= TRUE
;
777 if (tmp_entry
.used_for_jit
&&
778 (VM_MAP_PAGE_SHIFT(map
) != FOURK_PAGE_SHIFT
||
779 PAGE_SHIFT
!= FOURK_PAGE_SHIFT
) &&
780 fourk_binary_compatibility_unsafe
&&
781 fourk_binary_compatibility_allow_wx
) {
782 printf("** FOURK_COMPAT [%d]: "
783 "allowing write+execute at 0x%llx\n",
784 proc_selfpid(), tmp_entry
.vme_start
);
785 vmk_flags
.vmkf_map_jit
= TRUE
;
787 #endif /* __arm64__ */
789 /* map this memory object in place of the current one */
790 map_addr
= tmp_entry
.vme_start
;
791 kr
= vm_map_enter_mem_object(map
,
794 tmp_entry
.vme_start
),
795 (mach_vm_offset_t
) 0,
799 (ipc_port_t
)(uintptr_t) unprotected_mem_obj
,
802 tmp_entry
.protection
,
803 tmp_entry
.max_protection
,
804 tmp_entry
.inheritance
);
805 assertf(kr
== KERN_SUCCESS
,
807 assertf(map_addr
== tmp_entry
.vme_start
,
808 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
810 (uint64_t) tmp_entry
.vme_start
,
813 #if VM_MAP_DEBUG_APPLE_PROTECT
814 if (vm_map_debug_apple_protect
) {
815 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
816 " backing:[object:%p,offset:0x%llx,"
817 "crypto_backing_offset:0x%llx,"
818 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
821 (uint64_t) (map_addr
+ (tmp_entry
.vme_end
-
822 tmp_entry
.vme_start
)),
825 VME_OFFSET(&tmp_entry
),
826 crypto_backing_offset
,
830 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
833 * Release the reference obtained by
834 * apple_protect_pager_setup().
835 * The mapping (if it succeeded) is now holding a reference on
838 memory_object_deallocate(unprotected_mem_obj
);
839 unprotected_mem_obj
= MEMORY_OBJECT_NULL
;
841 /* continue with next map entry */
842 crypto_backing_offset
+= (tmp_entry
.vme_end
-
843 tmp_entry
.vme_start
);
844 crypto_backing_offset
-= crypto_start
;
854 #endif /* CONFIG_CODE_DECRYPTION */
857 lck_grp_t vm_map_lck_grp
;
858 lck_grp_attr_t vm_map_lck_grp_attr
;
859 lck_attr_t vm_map_lck_attr
;
860 lck_attr_t vm_map_lck_rw_attr
;
863 int malloc_no_cow
= 1;
864 #define VM_PROTECT_WX_FAIL 0
865 #else /* CONFIG_EMBEDDED */
866 int malloc_no_cow
= 0;
867 #define VM_PROTECT_WX_FAIL 1
868 #endif /* CONFIG_EMBEDDED */
869 uint64_t vm_memory_malloc_no_cow_mask
= 0ULL;
871 int vm_check_map_sanity
= 0;
877 * Initialize the vm_map module. Must be called before
878 * any other vm_map routines.
880 * Map and entry structures are allocated from zones -- we must
881 * initialize those zones.
883 * There are three zones of interest:
885 * vm_map_zone: used to allocate maps.
886 * vm_map_entry_zone: used to allocate map entries.
887 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
889 * The kernel allocates map entries from a special zone that is initially
890 * "crammed" with memory. It would be difficult (perhaps impossible) for
891 * the kernel to allocate more memory to a entry zone when it became
892 * empty since the very act of allocating memory implies the creation
899 vm_size_t entry_zone_alloc_size
;
900 const char *mez_name
= "VM map entries";
902 vm_map_zone
= zinit((vm_map_size_t
) sizeof(struct _vm_map
), 40 * 1024,
904 zone_change(vm_map_zone
, Z_NOENCRYPT
, TRUE
);
905 #if defined(__LP64__)
906 entry_zone_alloc_size
= PAGE_SIZE
* 5;
908 entry_zone_alloc_size
= PAGE_SIZE
* 6;
910 vm_map_entry_zone
= zinit((vm_map_size_t
) sizeof(struct vm_map_entry
),
911 1024 * 1024, entry_zone_alloc_size
,
913 zone_change(vm_map_entry_zone
, Z_NOENCRYPT
, TRUE
);
914 zone_change(vm_map_entry_zone
, Z_NOCALLOUT
, TRUE
);
915 zone_change(vm_map_entry_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
917 vm_map_entry_reserved_zone
= zinit((vm_map_size_t
) sizeof(struct vm_map_entry
),
918 kentry_data_size
* 64, kentry_data_size
,
919 "Reserved VM map entries");
920 zone_change(vm_map_entry_reserved_zone
, Z_NOENCRYPT
, TRUE
);
921 /* Don't quarantine because we always need elements available */
922 zone_change(vm_map_entry_reserved_zone
, Z_KASAN_QUARANTINE
, FALSE
);
924 vm_map_copy_zone
= zinit((vm_map_size_t
) sizeof(struct vm_map_copy
),
925 16 * 1024, PAGE_SIZE
, "VM map copies");
926 zone_change(vm_map_copy_zone
, Z_NOENCRYPT
, TRUE
);
928 vm_map_holes_zone
= zinit((vm_map_size_t
) sizeof(struct vm_map_links
),
929 16 * 1024, PAGE_SIZE
, "VM map holes");
930 zone_change(vm_map_holes_zone
, Z_NOENCRYPT
, TRUE
);
933 * Cram the map and kentry zones with initial data.
934 * Set reserved_zone non-collectible to aid zone_gc().
936 zone_change(vm_map_zone
, Z_COLLECT
, FALSE
);
937 zone_change(vm_map_zone
, Z_FOREIGN
, TRUE
);
938 zone_change(vm_map_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
940 zone_change(vm_map_entry_reserved_zone
, Z_COLLECT
, FALSE
);
941 zone_change(vm_map_entry_reserved_zone
, Z_EXPAND
, FALSE
);
942 zone_change(vm_map_entry_reserved_zone
, Z_FOREIGN
, TRUE
);
943 zone_change(vm_map_entry_reserved_zone
, Z_NOCALLOUT
, TRUE
);
944 zone_change(vm_map_entry_reserved_zone
, Z_CALLERACCT
, FALSE
); /* don't charge caller */
945 zone_change(vm_map_copy_zone
, Z_CALLERACCT
, FALSE
); /* don't charge caller */
946 zone_change(vm_map_entry_reserved_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
948 zone_change(vm_map_holes_zone
, Z_COLLECT
, TRUE
);
949 zone_change(vm_map_holes_zone
, Z_EXPAND
, TRUE
);
950 zone_change(vm_map_holes_zone
, Z_FOREIGN
, TRUE
);
951 zone_change(vm_map_holes_zone
, Z_NOCALLOUT
, TRUE
);
952 zone_change(vm_map_holes_zone
, Z_CALLERACCT
, TRUE
);
953 zone_change(vm_map_holes_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
956 * Add the stolen memory to zones, adjust zone size and stolen counts.
957 * zcram only up to the maximum number of pages for each zone chunk.
959 zcram(vm_map_zone
, (vm_offset_t
)map_data
, map_data_size
);
961 const vm_size_t stride
= ZONE_CHUNK_MAXPAGES
* PAGE_SIZE
;
962 for (vm_offset_t off
= 0; off
< kentry_data_size
; off
+= stride
) {
963 zcram(vm_map_entry_reserved_zone
,
964 (vm_offset_t
)kentry_data
+ off
,
965 MIN(kentry_data_size
- off
, stride
));
967 for (vm_offset_t off
= 0; off
< map_holes_data_size
; off
+= stride
) {
968 zcram(vm_map_holes_zone
,
969 (vm_offset_t
)map_holes_data
+ off
,
970 MIN(map_holes_data_size
- off
, stride
));
974 * Since these are covered by zones, remove them from stolen page accounting.
976 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size
) + atop_64(kentry_data_size
) + atop_64(map_holes_data_size
));
978 lck_grp_attr_setdefault(&vm_map_lck_grp_attr
);
979 lck_grp_init(&vm_map_lck_grp
, "vm_map", &vm_map_lck_grp_attr
);
980 lck_attr_setdefault(&vm_map_lck_attr
);
982 lck_attr_setdefault(&vm_map_lck_rw_attr
);
983 lck_attr_cleardebug(&vm_map_lck_rw_attr
);
985 #if VM_MAP_DEBUG_APPLE_PROTECT
986 PE_parse_boot_argn("vm_map_debug_apple_protect",
987 &vm_map_debug_apple_protect
,
988 sizeof(vm_map_debug_apple_protect
));
989 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
990 #if VM_MAP_DEBUG_APPLE_FOURK
991 PE_parse_boot_argn("vm_map_debug_fourk",
993 sizeof(vm_map_debug_fourk
));
994 #endif /* VM_MAP_DEBUG_FOURK */
995 PE_parse_boot_argn("vm_map_executable_immutable",
996 &vm_map_executable_immutable
,
997 sizeof(vm_map_executable_immutable
));
998 PE_parse_boot_argn("vm_map_executable_immutable_verbose",
999 &vm_map_executable_immutable_verbose
,
1000 sizeof(vm_map_executable_immutable_verbose
));
1002 PE_parse_boot_argn("malloc_no_cow",
1004 sizeof(malloc_no_cow
));
1005 if (malloc_no_cow
) {
1006 vm_memory_malloc_no_cow_mask
= 0ULL;
1007 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC
;
1008 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_SMALL
;
1009 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_MEDIUM
;
1010 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_LARGE
;
1011 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1012 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1013 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_TINY
;
1014 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE
;
1015 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED
;
1016 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_NANO
;
1017 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1018 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1019 &vm_memory_malloc_no_cow_mask
,
1020 sizeof(vm_memory_malloc_no_cow_mask
));
1024 PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity
, sizeof(vm_check_map_sanity
));
1025 if (vm_check_map_sanity
) {
1026 kprintf("VM sanity checking enabled\n");
1028 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1034 vm_map_steal_memory(
1037 uint32_t kentry_initial_pages
;
1039 map_data_size
= round_page(10 * sizeof(struct _vm_map
));
1040 map_data
= pmap_steal_memory(map_data_size
);
1043 * kentry_initial_pages corresponds to the number of kernel map entries
1044 * required during bootstrap until the asynchronous replenishment
1045 * scheme is activated and/or entries are available from the general
1048 #if defined(__LP64__)
1049 kentry_initial_pages
= 10;
1051 kentry_initial_pages
= 6;
1055 /* If using the guard allocator, reserve more memory for the kernel
1056 * reserved map entry pool.
1058 if (gzalloc_enabled()) {
1059 kentry_initial_pages
*= 1024;
1063 kentry_data_size
= kentry_initial_pages
* PAGE_SIZE
;
1064 kentry_data
= pmap_steal_memory(kentry_data_size
);
1066 map_holes_data_size
= kentry_data_size
;
1067 map_holes_data
= pmap_steal_memory(map_holes_data_size
);
1070 boolean_t vm_map_supports_hole_optimization
= FALSE
;
1073 vm_kernel_reserved_entry_init(void)
1075 zone_prio_refill_configure(vm_map_entry_reserved_zone
, (6 * PAGE_SIZE
) / sizeof(struct vm_map_entry
));
1078 * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1080 zone_prio_refill_configure(vm_map_holes_zone
, (6 * PAGE_SIZE
) / sizeof(struct vm_map_links
));
1081 vm_map_supports_hole_optimization
= TRUE
;
1085 vm_map_disable_hole_optimization(vm_map_t map
)
1087 vm_map_entry_t head_entry
, hole_entry
, next_hole_entry
;
1089 if (map
->holelistenabled
) {
1090 head_entry
= hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
1092 while (hole_entry
!= NULL
) {
1093 next_hole_entry
= hole_entry
->vme_next
;
1095 hole_entry
->vme_next
= NULL
;
1096 hole_entry
->vme_prev
= NULL
;
1097 zfree(vm_map_holes_zone
, hole_entry
);
1099 if (next_hole_entry
== head_entry
) {
1102 hole_entry
= next_hole_entry
;
1106 map
->holes_list
= NULL
;
1107 map
->holelistenabled
= FALSE
;
1109 map
->first_free
= vm_map_first_entry(map
);
1110 SAVE_HINT_HOLE_WRITE(map
, NULL
);
1115 vm_kernel_map_is_kernel(vm_map_t map
)
1117 return map
->pmap
== kernel_pmap
;
1123 * Creates and returns a new empty VM map with
1124 * the given physical map structure, and having
1125 * the given lower and upper address bounds.
1131 vm_map_offset_t min
,
1132 vm_map_offset_t max
,
1139 options
|= VM_MAP_CREATE_PAGEABLE
;
1141 return vm_map_create_options(pmap
, min
, max
, options
);
1145 vm_map_create_options(
1147 vm_map_offset_t min
,
1148 vm_map_offset_t max
,
1152 struct vm_map_links
*hole_entry
= NULL
;
1154 if (options
& ~(VM_MAP_CREATE_ALL_OPTIONS
)) {
1155 /* unknown option */
1159 result
= (vm_map_t
) zalloc(vm_map_zone
);
1160 if (result
== VM_MAP_NULL
) {
1161 panic("vm_map_create");
1164 vm_map_first_entry(result
) = vm_map_to_entry(result
);
1165 vm_map_last_entry(result
) = vm_map_to_entry(result
);
1166 result
->hdr
.nentries
= 0;
1167 if (options
& VM_MAP_CREATE_PAGEABLE
) {
1168 result
->hdr
.entries_pageable
= TRUE
;
1170 result
->hdr
.entries_pageable
= FALSE
;
1173 vm_map_store_init( &(result
->hdr
));
1175 result
->hdr
.page_shift
= PAGE_SHIFT
;
1178 result
->user_wire_limit
= MACH_VM_MAX_ADDRESS
; /* default limit is unlimited */
1179 result
->user_wire_size
= 0;
1180 #if !CONFIG_EMBEDDED
1181 result
->vmmap_high_start
= 0;
1183 os_ref_init_count(&result
->map_refcnt
, &map_refgrp
, 1);
1185 result
->res_count
= 1;
1186 result
->sw_state
= MAP_SW_IN
;
1187 #endif /* TASK_SWAPPER */
1188 result
->pmap
= pmap
;
1189 result
->min_offset
= min
;
1190 result
->max_offset
= max
;
1191 result
->wiring_required
= FALSE
;
1192 result
->no_zero_fill
= FALSE
;
1193 result
->mapped_in_other_pmaps
= FALSE
;
1194 result
->wait_for_space
= FALSE
;
1195 result
->switch_protect
= FALSE
;
1196 result
->disable_vmentry_reuse
= FALSE
;
1197 result
->map_disallow_data_exec
= FALSE
;
1198 result
->is_nested_map
= FALSE
;
1199 result
->map_disallow_new_exec
= FALSE
;
1200 result
->highest_entry_end
= 0;
1201 result
->first_free
= vm_map_to_entry(result
);
1202 result
->hint
= vm_map_to_entry(result
);
1203 result
->jit_entry_exists
= FALSE
;
1205 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1206 if (options
& VM_MAP_CREATE_CORPSE_FOOTPRINT
) {
1207 result
->has_corpse_footprint
= TRUE
;
1208 result
->holelistenabled
= FALSE
;
1209 result
->vmmap_corpse_footprint
= NULL
;
1211 result
->has_corpse_footprint
= FALSE
;
1212 if (vm_map_supports_hole_optimization
) {
1213 hole_entry
= zalloc(vm_map_holes_zone
);
1215 hole_entry
->start
= min
;
1216 #if defined(__arm__) || defined(__arm64__)
1217 hole_entry
->end
= result
->max_offset
;
1219 hole_entry
->end
= (max
> (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
) ? max
: (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
;
1221 result
->holes_list
= result
->hole_hint
= hole_entry
;
1222 hole_entry
->prev
= hole_entry
->next
= CAST_TO_VM_MAP_ENTRY(hole_entry
);
1223 result
->holelistenabled
= TRUE
;
1225 result
->holelistenabled
= FALSE
;
1229 vm_map_lock_init(result
);
1230 lck_mtx_init_ext(&result
->s_lock
, &result
->s_lock_ext
, &vm_map_lck_grp
, &vm_map_lck_attr
);
1236 * vm_map_entry_create: [ internal use only ]
1238 * Allocates a VM map entry for insertion in the
1239 * given map (or map copy). No fields are filled.
1241 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1243 #define vm_map_copy_entry_create(copy, map_locked) \
1244 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1245 unsigned reserved_zalloc_count
, nonreserved_zalloc_count
;
1247 static vm_map_entry_t
1248 _vm_map_entry_create(
1249 struct vm_map_header
*map_header
, boolean_t __unused map_locked
)
1252 vm_map_entry_t entry
;
1254 zone
= vm_map_entry_zone
;
1256 assert(map_header
->entries_pageable
? !map_locked
: TRUE
);
1258 if (map_header
->entries_pageable
) {
1259 entry
= (vm_map_entry_t
) zalloc(zone
);
1261 entry
= (vm_map_entry_t
) zalloc_canblock(zone
, FALSE
);
1263 if (entry
== VM_MAP_ENTRY_NULL
) {
1264 zone
= vm_map_entry_reserved_zone
;
1265 entry
= (vm_map_entry_t
) zalloc(zone
);
1266 OSAddAtomic(1, &reserved_zalloc_count
);
1268 OSAddAtomic(1, &nonreserved_zalloc_count
);
1272 if (entry
== VM_MAP_ENTRY_NULL
) {
1273 panic("vm_map_entry_create");
1275 entry
->from_reserved_zone
= (zone
== vm_map_entry_reserved_zone
);
1277 vm_map_store_update((vm_map_t
) NULL
, entry
, VM_MAP_ENTRY_CREATE
);
1278 #if MAP_ENTRY_CREATION_DEBUG
1279 entry
->vme_creation_maphdr
= map_header
;
1280 backtrace(&entry
->vme_creation_bt
[0],
1281 (sizeof(entry
->vme_creation_bt
) / sizeof(uintptr_t)), NULL
);
1287 * vm_map_entry_dispose: [ internal use only ]
1289 * Inverse of vm_map_entry_create.
1291 * write map lock held so no need to
1292 * do anything special to insure correctness
1295 #define vm_map_entry_dispose(map, entry) \
1296 _vm_map_entry_dispose(&(map)->hdr, (entry))
1298 #define vm_map_copy_entry_dispose(map, entry) \
1299 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1302 _vm_map_entry_dispose(
1303 struct vm_map_header
*map_header
,
1304 vm_map_entry_t entry
)
1308 if (map_header
->entries_pageable
|| !(entry
->from_reserved_zone
)) {
1309 zone
= vm_map_entry_zone
;
1311 zone
= vm_map_entry_reserved_zone
;
1314 if (!map_header
->entries_pageable
) {
1315 if (zone
== vm_map_entry_zone
) {
1316 OSAddAtomic(-1, &nonreserved_zalloc_count
);
1318 OSAddAtomic(-1, &reserved_zalloc_count
);
1326 static boolean_t first_free_check
= FALSE
;
1328 first_free_is_valid(
1331 if (!first_free_check
) {
1335 return first_free_is_valid_store( map
);
1337 #endif /* MACH_ASSERT */
1340 #define vm_map_copy_entry_link(copy, after_where, entry) \
1341 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1343 #define vm_map_copy_entry_unlink(copy, entry) \
1344 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1346 #if MACH_ASSERT && TASK_SWAPPER
1348 * vm_map_res_reference:
1350 * Adds another valid residence count to the given map.
1352 * Map is locked so this function can be called from
1357 vm_map_res_reference(vm_map_t map
)
1359 /* assert map is locked */
1360 assert(map
->res_count
>= 0);
1361 assert(os_ref_get_count(&map
->map_refcnt
) >= map
->res_count
);
1362 if (map
->res_count
== 0) {
1363 lck_mtx_unlock(&map
->s_lock
);
1366 lck_mtx_lock(&map
->s_lock
);
1375 * vm_map_reference_swap:
1377 * Adds valid reference and residence counts to the given map.
1379 * The map may not be in memory (i.e. zero residence count).
1383 vm_map_reference_swap(vm_map_t map
)
1385 assert(map
!= VM_MAP_NULL
);
1386 lck_mtx_lock(&map
->s_lock
);
1387 assert(map
->res_count
>= 0);
1388 assert(os_ref_get_count(&map
->map_refcnt
) >= map
->res_count
);
1389 os_ref_retain_locked(&map
->map_refcnt
);
1390 vm_map_res_reference(map
);
1391 lck_mtx_unlock(&map
->s_lock
);
1395 * vm_map_res_deallocate:
1397 * Decrement residence count on a map; possibly causing swapout.
1399 * The map must be in memory (i.e. non-zero residence count).
1401 * The map is locked, so this function is callable from vm_map_deallocate.
1405 vm_map_res_deallocate(vm_map_t map
)
1407 assert(map
->res_count
> 0);
1408 if (--map
->res_count
== 0) {
1409 lck_mtx_unlock(&map
->s_lock
);
1411 vm_map_swapout(map
);
1413 lck_mtx_lock(&map
->s_lock
);
1415 assert(os_ref_get_count(&map
->map_refcnt
) >= map
->res_count
);
1417 #endif /* MACH_ASSERT && TASK_SWAPPER */
1422 * Actually destroy a map.
1431 /* final cleanup: no need to unnest shared region */
1432 flags
|= VM_MAP_REMOVE_NO_UNNESTING
;
1433 /* final cleanup: ok to remove immutable mappings */
1434 flags
|= VM_MAP_REMOVE_IMMUTABLE
;
1435 /* final cleanup: allow gaps in range */
1436 flags
|= VM_MAP_REMOVE_GAPS_OK
;
1438 /* clean up regular map entries */
1439 (void) vm_map_delete(map
, map
->min_offset
, map
->max_offset
,
1440 flags
, VM_MAP_NULL
);
1441 /* clean up leftover special mappings (commpage, etc...) */
1442 #if !defined(__arm__) && !defined(__arm64__)
1443 (void) vm_map_delete(map
, 0x0, 0xFFFFFFFFFFFFF000ULL
,
1444 flags
, VM_MAP_NULL
);
1445 #endif /* !__arm__ && !__arm64__ */
1447 vm_map_disable_hole_optimization(map
);
1448 vm_map_corpse_footprint_destroy(map
);
1452 assert(map
->hdr
.nentries
== 0);
1455 pmap_destroy(map
->pmap
);
1458 if (vm_map_lck_attr
.lck_attr_val
& LCK_ATTR_DEBUG
) {
1460 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1461 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1462 * structure or kalloc'ed via lck_mtx_init.
1463 * An example is s_lock_ext within struct _vm_map.
1465 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1466 * can add another tag to detect embedded vs alloc'ed indirect external
1467 * mutexes but that'll be additional checks in the lock path and require
1468 * updating dependencies for the old vs new tag.
1470 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1471 * just when lock debugging is ON, we choose to forego explicitly destroying
1472 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1473 * count on vm_map_lck_grp, which has no serious side-effect.
1476 lck_rw_destroy(&(map
)->lock
, &vm_map_lck_grp
);
1477 lck_mtx_destroy(&(map
)->s_lock
, &vm_map_lck_grp
);
1480 zfree(vm_map_zone
, map
);
1484 * Returns pid of the task with the largest number of VM map entries.
1485 * Used in the zone-map-exhaustion jetsam path.
1488 find_largest_process_vm_map_entries(void)
1490 pid_t victim_pid
= -1;
1491 int max_vm_map_entries
= 0;
1492 task_t task
= TASK_NULL
;
1493 queue_head_t
*task_list
= &tasks
;
1495 lck_mtx_lock(&tasks_threads_lock
);
1496 queue_iterate(task_list
, task
, task_t
, tasks
) {
1497 if (task
== kernel_task
|| !task
->active
) {
1501 vm_map_t task_map
= task
->map
;
1502 if (task_map
!= VM_MAP_NULL
) {
1503 int task_vm_map_entries
= task_map
->hdr
.nentries
;
1504 if (task_vm_map_entries
> max_vm_map_entries
) {
1505 max_vm_map_entries
= task_vm_map_entries
;
1506 victim_pid
= pid_from_task(task
);
1510 lck_mtx_unlock(&tasks_threads_lock
);
1512 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid
, max_vm_map_entries
);
1518 * vm_map_swapin/vm_map_swapout
1520 * Swap a map in and out, either referencing or releasing its resources.
1521 * These functions are internal use only; however, they must be exported
1522 * because they may be called from macros, which are exported.
1524 * In the case of swapout, there could be races on the residence count,
1525 * so if the residence count is up, we return, assuming that a
1526 * vm_map_deallocate() call in the near future will bring us back.
1529 * -- We use the map write lock for synchronization among races.
1530 * -- The map write lock, and not the simple s_lock, protects the
1531 * swap state of the map.
1532 * -- If a map entry is a share map, then we hold both locks, in
1533 * hierarchical order.
1535 * Synchronization Notes:
1536 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1537 * will block on the map lock and proceed when swapout is through.
1538 * 2) A vm_map_reference() call at this time is illegal, and will
1539 * cause a panic. vm_map_reference() is only allowed on resident
1540 * maps, since it refuses to block.
1541 * 3) A vm_map_swapin() call during a swapin will block, and
1542 * proceeed when the first swapin is done, turning into a nop.
1543 * This is the reason the res_count is not incremented until
1544 * after the swapin is complete.
1545 * 4) There is a timing hole after the checks of the res_count, before
1546 * the map lock is taken, during which a swapin may get the lock
1547 * before a swapout about to happen. If this happens, the swapin
1548 * will detect the state and increment the reference count, causing
1549 * the swapout to be a nop, thereby delaying it until a later
1550 * vm_map_deallocate. If the swapout gets the lock first, then
1551 * the swapin will simply block until the swapout is done, and
1554 * Because vm_map_swapin() is potentially an expensive operation, it
1555 * should be used with caution.
1558 * 1) A map with a residence count of zero is either swapped, or
1560 * 2) A map with a non-zero residence count is either resident,
1561 * or being swapped in.
1564 int vm_map_swap_enable
= 1;
1567 vm_map_swapin(vm_map_t map
)
1569 vm_map_entry_t entry
;
1571 if (!vm_map_swap_enable
) { /* debug */
1577 * First deal with various races.
1579 if (map
->sw_state
== MAP_SW_IN
) {
1581 * we raced with swapout and won. Returning will incr.
1582 * the res_count, turning the swapout into a nop.
1588 * The residence count must be zero. If we raced with another
1589 * swapin, the state would have been IN; if we raced with a
1590 * swapout (after another competing swapin), we must have lost
1591 * the race to get here (see above comment), in which case
1592 * res_count is still 0.
1594 assert(map
->res_count
== 0);
1597 * There are no intermediate states of a map going out or
1598 * coming in, since the map is locked during the transition.
1600 assert(map
->sw_state
== MAP_SW_OUT
);
1603 * We now operate upon each map entry. If the entry is a sub-
1604 * or share-map, we call vm_map_res_reference upon it.
1605 * If the entry is an object, we call vm_object_res_reference
1606 * (this may iterate through the shadow chain).
1607 * Note that we hold the map locked the entire time,
1608 * even if we get back here via a recursive call in
1609 * vm_map_res_reference.
1611 entry
= vm_map_first_entry(map
);
1613 while (entry
!= vm_map_to_entry(map
)) {
1614 if (VME_OBJECT(entry
) != VM_OBJECT_NULL
) {
1615 if (entry
->is_sub_map
) {
1616 vm_map_t lmap
= VME_SUBMAP(entry
);
1617 lck_mtx_lock(&lmap
->s_lock
);
1618 vm_map_res_reference(lmap
);
1619 lck_mtx_unlock(&lmap
->s_lock
);
1621 vm_object_t object
= VME_OBEJCT(entry
);
1622 vm_object_lock(object
);
1624 * This call may iterate through the
1627 vm_object_res_reference(object
);
1628 vm_object_unlock(object
);
1631 entry
= entry
->vme_next
;
1633 assert(map
->sw_state
== MAP_SW_OUT
);
1634 map
->sw_state
= MAP_SW_IN
;
1638 vm_map_swapout(vm_map_t map
)
1640 vm_map_entry_t entry
;
1644 * First deal with various races.
1645 * If we raced with a swapin and lost, the residence count
1646 * will have been incremented to 1, and we simply return.
1648 lck_mtx_lock(&map
->s_lock
);
1649 if (map
->res_count
!= 0) {
1650 lck_mtx_unlock(&map
->s_lock
);
1653 lck_mtx_unlock(&map
->s_lock
);
1656 * There are no intermediate states of a map going out or
1657 * coming in, since the map is locked during the transition.
1659 assert(map
->sw_state
== MAP_SW_IN
);
1661 if (!vm_map_swap_enable
) {
1666 * We now operate upon each map entry. If the entry is a sub-
1667 * or share-map, we call vm_map_res_deallocate upon it.
1668 * If the entry is an object, we call vm_object_res_deallocate
1669 * (this may iterate through the shadow chain).
1670 * Note that we hold the map locked the entire time,
1671 * even if we get back here via a recursive call in
1672 * vm_map_res_deallocate.
1674 entry
= vm_map_first_entry(map
);
1676 while (entry
!= vm_map_to_entry(map
)) {
1677 if (VME_OBJECT(entry
) != VM_OBJECT_NULL
) {
1678 if (entry
->is_sub_map
) {
1679 vm_map_t lmap
= VME_SUBMAP(entry
);
1680 lck_mtx_lock(&lmap
->s_lock
);
1681 vm_map_res_deallocate(lmap
);
1682 lck_mtx_unlock(&lmap
->s_lock
);
1684 vm_object_t object
= VME_OBJECT(entry
);
1685 vm_object_lock(object
);
1687 * This call may take a long time,
1688 * since it could actively push
1689 * out pages (if we implement it
1692 vm_object_res_deallocate(object
);
1693 vm_object_unlock(object
);
1696 entry
= entry
->vme_next
;
1698 assert(map
->sw_state
== MAP_SW_IN
);
1699 map
->sw_state
= MAP_SW_OUT
;
1702 #endif /* TASK_SWAPPER */
1705 * vm_map_lookup_entry: [ internal use only ]
1707 * Calls into the vm map store layer to find the map
1708 * entry containing (or immediately preceding) the
1709 * specified address in the given map; the entry is returned
1710 * in the "entry" parameter. The boolean
1711 * result indicates whether the address is
1712 * actually contained in the map.
1715 vm_map_lookup_entry(
1717 vm_map_offset_t address
,
1718 vm_map_entry_t
*entry
) /* OUT */
1720 return vm_map_store_lookup_entry( map
, address
, entry
);
1724 * Routine: vm_map_find_space
1726 * Allocate a range in the specified virtual address map,
1727 * returning the entry allocated for that range.
1728 * Used by kmem_alloc, etc.
1730 * The map must be NOT be locked. It will be returned locked
1731 * on KERN_SUCCESS, unlocked on failure.
1733 * If an entry is allocated, the object/offset fields
1734 * are initialized to zero.
1739 vm_map_offset_t
*address
, /* OUT */
1741 vm_map_offset_t mask
,
1743 vm_map_kernel_flags_t vmk_flags
,
1745 vm_map_entry_t
*o_entry
) /* OUT */
1747 vm_map_entry_t entry
, new_entry
;
1748 vm_map_offset_t start
;
1749 vm_map_offset_t end
;
1750 vm_map_entry_t hole_entry
;
1754 return KERN_INVALID_ARGUMENT
;
1757 if (vmk_flags
.vmkf_guard_after
) {
1758 /* account for the back guard page in the size */
1759 size
+= VM_MAP_PAGE_SIZE(map
);
1762 new_entry
= vm_map_entry_create(map
, FALSE
);
1765 * Look for the first possible address; if there's already
1766 * something at this address, we have to start after it.
1771 if (map
->disable_vmentry_reuse
== TRUE
) {
1772 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
1774 if (map
->holelistenabled
) {
1775 hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
1777 if (hole_entry
== NULL
) {
1779 * No more space in the map?
1781 vm_map_entry_dispose(map
, new_entry
);
1783 return KERN_NO_SPACE
;
1787 start
= entry
->vme_start
;
1789 assert(first_free_is_valid(map
));
1790 if ((entry
= map
->first_free
) == vm_map_to_entry(map
)) {
1791 start
= map
->min_offset
;
1793 start
= entry
->vme_end
;
1799 * In any case, the "entry" always precedes
1800 * the proposed new region throughout the loop:
1804 vm_map_entry_t next
;
1807 * Find the end of the proposed new region.
1808 * Be sure we didn't go beyond the end, or
1809 * wrap around the address.
1812 if (vmk_flags
.vmkf_guard_before
) {
1813 /* reserve space for the front guard page */
1814 start
+= VM_MAP_PAGE_SIZE(map
);
1816 end
= ((start
+ mask
) & ~mask
);
1819 vm_map_entry_dispose(map
, new_entry
);
1821 return KERN_NO_SPACE
;
1824 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
1826 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
1828 if ((end
> map
->max_offset
) || (end
< start
)) {
1829 vm_map_entry_dispose(map
, new_entry
);
1831 return KERN_NO_SPACE
;
1834 next
= entry
->vme_next
;
1836 if (map
->holelistenabled
) {
1837 if (entry
->vme_end
>= end
) {
1842 * If there are no more entries, we must win.
1846 * If there is another entry, it must be
1847 * after the end of the potential new region.
1850 if (next
== vm_map_to_entry(map
)) {
1854 if (next
->vme_start
>= end
) {
1860 * Didn't fit -- move to the next entry.
1865 if (map
->holelistenabled
) {
1866 if (entry
== CAST_TO_VM_MAP_ENTRY(map
->holes_list
)) {
1870 vm_map_entry_dispose(map
, new_entry
);
1872 return KERN_NO_SPACE
;
1874 start
= entry
->vme_start
;
1876 start
= entry
->vme_end
;
1880 if (map
->holelistenabled
) {
1881 if (vm_map_lookup_entry(map
, entry
->vme_start
, &entry
)) {
1882 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry
, (unsigned long long)entry
->vme_start
);
1888 * "start" and "end" should define the endpoints of the
1889 * available new range, and
1890 * "entry" should refer to the region before the new
1893 * the map should be locked.
1896 if (vmk_flags
.vmkf_guard_before
) {
1897 /* go back for the front guard page */
1898 start
-= VM_MAP_PAGE_SIZE(map
);
1902 assert(start
< end
);
1903 new_entry
->vme_start
= start
;
1904 new_entry
->vme_end
= end
;
1905 assert(page_aligned(new_entry
->vme_start
));
1906 assert(page_aligned(new_entry
->vme_end
));
1907 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_start
,
1908 VM_MAP_PAGE_MASK(map
)));
1909 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_end
,
1910 VM_MAP_PAGE_MASK(map
)));
1912 new_entry
->is_shared
= FALSE
;
1913 new_entry
->is_sub_map
= FALSE
;
1914 new_entry
->use_pmap
= TRUE
;
1915 VME_OBJECT_SET(new_entry
, VM_OBJECT_NULL
);
1916 VME_OFFSET_SET(new_entry
, (vm_object_offset_t
) 0);
1918 new_entry
->needs_copy
= FALSE
;
1920 new_entry
->inheritance
= VM_INHERIT_DEFAULT
;
1921 new_entry
->protection
= VM_PROT_DEFAULT
;
1922 new_entry
->max_protection
= VM_PROT_ALL
;
1923 new_entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
1924 new_entry
->wired_count
= 0;
1925 new_entry
->user_wired_count
= 0;
1927 new_entry
->in_transition
= FALSE
;
1928 new_entry
->needs_wakeup
= FALSE
;
1929 new_entry
->no_cache
= FALSE
;
1930 new_entry
->permanent
= FALSE
;
1931 new_entry
->superpage_size
= FALSE
;
1932 if (VM_MAP_PAGE_SHIFT(map
) != PAGE_SHIFT
) {
1933 new_entry
->map_aligned
= TRUE
;
1935 new_entry
->map_aligned
= FALSE
;
1938 new_entry
->used_for_jit
= FALSE
;
1939 new_entry
->pmap_cs_associated
= FALSE
;
1940 new_entry
->zero_wired_pages
= FALSE
;
1941 new_entry
->iokit_acct
= FALSE
;
1942 new_entry
->vme_resilient_codesign
= FALSE
;
1943 new_entry
->vme_resilient_media
= FALSE
;
1944 if (vmk_flags
.vmkf_atomic_entry
) {
1945 new_entry
->vme_atomic
= TRUE
;
1947 new_entry
->vme_atomic
= FALSE
;
1950 VME_ALIAS_SET(new_entry
, tag
);
1953 * Insert the new entry into the list
1956 vm_map_store_entry_link(map
, entry
, new_entry
, VM_MAP_KERNEL_FLAGS_NONE
);
1961 * Update the lookup hint
1963 SAVE_HINT_MAP_WRITE(map
, new_entry
);
1965 *o_entry
= new_entry
;
1966 return KERN_SUCCESS
;
1969 int vm_map_pmap_enter_print
= FALSE
;
1970 int vm_map_pmap_enter_enable
= FALSE
;
1973 * Routine: vm_map_pmap_enter [internal only]
1976 * Force pages from the specified object to be entered into
1977 * the pmap at the specified address if they are present.
1978 * As soon as a page not found in the object the scan ends.
1983 * In/out conditions:
1984 * The source map should not be locked on entry.
1986 __unused
static void
1989 vm_map_offset_t addr
,
1990 vm_map_offset_t end_addr
,
1992 vm_object_offset_t offset
,
1993 vm_prot_t protection
)
1997 struct vm_object_fault_info fault_info
= {};
1999 if (map
->pmap
== 0) {
2003 while (addr
< end_addr
) {
2009 * From vm_map_enter(), we come into this function without the map
2010 * lock held or the object lock held.
2011 * We haven't taken a reference on the object either.
2012 * We should do a proper lookup on the map to make sure
2013 * that things are sane before we go locking objects that
2014 * could have been deallocated from under us.
2017 vm_object_lock(object
);
2019 m
= vm_page_lookup(object
, offset
);
2021 if (m
== VM_PAGE_NULL
|| m
->vmp_busy
|| m
->vmp_fictitious
||
2022 (m
->vmp_unusual
&& (m
->vmp_error
|| m
->vmp_restart
|| m
->vmp_absent
))) {
2023 vm_object_unlock(object
);
2027 if (vm_map_pmap_enter_print
) {
2028 printf("vm_map_pmap_enter:");
2029 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2030 map
, (unsigned long long)addr
, object
, (unsigned long long)offset
);
2032 type_of_fault
= DBG_CACHE_HIT_FAULT
;
2033 kr
= vm_fault_enter(m
, map
->pmap
,
2034 addr
, protection
, protection
,
2036 FALSE
, /* change_wiring */
2037 VM_KERN_MEMORY_NONE
, /* tag - not wiring */
2039 NULL
, /* need_retry */
2042 vm_object_unlock(object
);
2044 offset
+= PAGE_SIZE_64
;
2049 boolean_t
vm_map_pmap_is_empty(
2051 vm_map_offset_t start
,
2052 vm_map_offset_t end
);
2054 vm_map_pmap_is_empty(
2056 vm_map_offset_t start
,
2057 vm_map_offset_t end
)
2059 #ifdef MACHINE_PMAP_IS_EMPTY
2060 return pmap_is_empty(map
->pmap
, start
, end
);
2061 #else /* MACHINE_PMAP_IS_EMPTY */
2062 vm_map_offset_t offset
;
2065 if (map
->pmap
== NULL
) {
2069 for (offset
= start
;
2071 offset
+= PAGE_SIZE
) {
2072 phys_page
= pmap_find_phys(map
->pmap
, offset
);
2074 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
2075 "page %d at 0x%llx\n",
2076 map
, (long long)start
, (long long)end
,
2077 phys_page
, (long long)offset
);
2082 #endif /* MACHINE_PMAP_IS_EMPTY */
2085 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2087 vm_map_random_address_for_size(
2089 vm_map_offset_t
*address
,
2092 kern_return_t kr
= KERN_SUCCESS
;
2094 vm_map_offset_t random_addr
= 0;
2095 vm_map_offset_t hole_end
;
2097 vm_map_entry_t next_entry
= VM_MAP_ENTRY_NULL
;
2098 vm_map_entry_t prev_entry
= VM_MAP_ENTRY_NULL
;
2099 vm_map_size_t vm_hole_size
= 0;
2100 vm_map_size_t addr_space_size
;
2102 addr_space_size
= vm_map_max(map
) - vm_map_min(map
);
2104 assert(page_aligned(size
));
2106 while (tries
< MAX_TRIES_TO_GET_RANDOM_ADDRESS
) {
2107 random_addr
= ((vm_map_offset_t
)random()) << PAGE_SHIFT
;
2108 random_addr
= vm_map_trunc_page(
2109 vm_map_min(map
) + (random_addr
% addr_space_size
),
2110 VM_MAP_PAGE_MASK(map
));
2112 if (vm_map_lookup_entry(map
, random_addr
, &prev_entry
) == FALSE
) {
2113 if (prev_entry
== vm_map_to_entry(map
)) {
2114 next_entry
= vm_map_first_entry(map
);
2116 next_entry
= prev_entry
->vme_next
;
2118 if (next_entry
== vm_map_to_entry(map
)) {
2119 hole_end
= vm_map_max(map
);
2121 hole_end
= next_entry
->vme_start
;
2123 vm_hole_size
= hole_end
- random_addr
;
2124 if (vm_hole_size
>= size
) {
2125 *address
= random_addr
;
2132 if (tries
== MAX_TRIES_TO_GET_RANDOM_ADDRESS
) {
2139 vm_memory_malloc_no_cow(
2142 uint64_t alias_mask
;
2148 alias_mask
= 1ULL << alias
;
2149 if (alias_mask
& vm_memory_malloc_no_cow_mask
) {
2156 * Routine: vm_map_enter
2159 * Allocate a range in the specified virtual address map.
2160 * The resulting range will refer to memory defined by
2161 * the given memory object and offset into that object.
2163 * Arguments are as defined in the vm_map call.
2165 int _map_enter_debug
= 0;
2166 static unsigned int vm_map_enter_restore_successes
= 0;
2167 static unsigned int vm_map_enter_restore_failures
= 0;
2171 vm_map_offset_t
*address
, /* IN/OUT */
2173 vm_map_offset_t mask
,
2175 vm_map_kernel_flags_t vmk_flags
,
2178 vm_object_offset_t offset
,
2179 boolean_t needs_copy
,
2180 vm_prot_t cur_protection
,
2181 vm_prot_t max_protection
,
2182 vm_inherit_t inheritance
)
2184 vm_map_entry_t entry
, new_entry
;
2185 vm_map_offset_t start
, tmp_start
, tmp_offset
;
2186 vm_map_offset_t end
, tmp_end
;
2187 vm_map_offset_t tmp2_start
, tmp2_end
;
2188 vm_map_offset_t desired_empty_end
;
2189 vm_map_offset_t step
;
2190 kern_return_t result
= KERN_SUCCESS
;
2191 vm_map_t zap_old_map
= VM_MAP_NULL
;
2192 vm_map_t zap_new_map
= VM_MAP_NULL
;
2193 boolean_t map_locked
= FALSE
;
2194 boolean_t pmap_empty
= TRUE
;
2195 boolean_t new_mapping_established
= FALSE
;
2196 boolean_t keep_map_locked
= vmk_flags
.vmkf_keep_map_locked
;
2197 boolean_t anywhere
= ((flags
& VM_FLAGS_ANYWHERE
) != 0);
2198 boolean_t purgable
= ((flags
& VM_FLAGS_PURGABLE
) != 0);
2199 boolean_t overwrite
= ((flags
& VM_FLAGS_OVERWRITE
) != 0);
2200 boolean_t no_cache
= ((flags
& VM_FLAGS_NO_CACHE
) != 0);
2201 boolean_t is_submap
= vmk_flags
.vmkf_submap
;
2202 boolean_t permanent
= vmk_flags
.vmkf_permanent
;
2203 boolean_t no_copy_on_read
= vmk_flags
.vmkf_no_copy_on_read
;
2204 boolean_t entry_for_jit
= vmk_flags
.vmkf_map_jit
;
2205 boolean_t iokit_acct
= vmk_flags
.vmkf_iokit_acct
;
2206 boolean_t resilient_codesign
= ((flags
& VM_FLAGS_RESILIENT_CODESIGN
) != 0);
2207 boolean_t resilient_media
= ((flags
& VM_FLAGS_RESILIENT_MEDIA
) != 0);
2208 boolean_t random_address
= ((flags
& VM_FLAGS_RANDOM_ADDR
) != 0);
2209 unsigned int superpage_size
= ((flags
& VM_FLAGS_SUPERPAGE_MASK
) >> VM_FLAGS_SUPERPAGE_SHIFT
);
2210 vm_tag_t user_alias
;
2211 vm_map_offset_t effective_min_offset
, effective_max_offset
;
2213 boolean_t clear_map_aligned
= FALSE
;
2214 vm_map_entry_t hole_entry
;
2215 vm_map_size_t chunk_size
= 0;
2217 assertf(vmk_flags
.__vmkf_unused
== 0, "vmk_flags unused=0x%x\n", vmk_flags
.__vmkf_unused
);
2219 if (flags
& VM_FLAGS_4GB_CHUNK
) {
2220 #if defined(__LP64__)
2221 chunk_size
= (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2222 #else /* __LP64__ */
2223 chunk_size
= ANON_CHUNK_SIZE
;
2224 #endif /* __LP64__ */
2226 chunk_size
= ANON_CHUNK_SIZE
;
2229 if (superpage_size
) {
2230 switch (superpage_size
) {
2232 * Note that the current implementation only supports
2233 * a single size for superpages, SUPERPAGE_SIZE, per
2234 * architecture. As soon as more sizes are supposed
2235 * to be supported, SUPERPAGE_SIZE has to be replaced
2236 * with a lookup of the size depending on superpage_size.
2239 case SUPERPAGE_SIZE_ANY
:
2240 /* handle it like 2 MB and round up to page size */
2241 size
= (size
+ 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2242 case SUPERPAGE_SIZE_2MB
:
2246 return KERN_INVALID_ARGUMENT
;
2248 mask
= SUPERPAGE_SIZE
- 1;
2249 if (size
& (SUPERPAGE_SIZE
- 1)) {
2250 return KERN_INVALID_ARGUMENT
;
2252 inheritance
= VM_INHERIT_NONE
; /* fork() children won't inherit superpages */
2256 if ((cur_protection
& VM_PROT_WRITE
) &&
2257 (cur_protection
& VM_PROT_EXECUTE
) &&
2258 #if !CONFIG_EMBEDDED
2259 map
!= kernel_map
&&
2260 (cs_process_global_enforcement() ||
2261 (vmk_flags
.vmkf_cs_enforcement_override
2262 ? vmk_flags
.vmkf_cs_enforcement
2263 : cs_process_enforcement(NULL
))) &&
2264 #endif /* !CONFIG_EMBEDDED */
2269 vm_prot_t
, cur_protection
);
2270 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
2271 #if VM_PROTECT_WX_FAIL
2273 #else /* VM_PROTECT_WX_FAIL */
2274 "turning off execute\n",
2275 #endif /* VM_PROTECT_WX_FAIL */
2277 (current_task()->bsd_info
2278 ? proc_name_address(current_task()->bsd_info
)
2281 cur_protection
&= ~VM_PROT_EXECUTE
;
2282 #if VM_PROTECT_WX_FAIL
2283 return KERN_PROTECTION_FAILURE
;
2284 #endif /* VM_PROTECT_WX_FAIL */
2288 * If the task has requested executable lockdown,
2289 * deny any new executable mapping.
2291 if (map
->map_disallow_new_exec
== TRUE
) {
2292 if (cur_protection
& VM_PROT_EXECUTE
) {
2293 return KERN_PROTECTION_FAILURE
;
2297 if (resilient_codesign
) {
2299 if ((cur_protection
& (VM_PROT_WRITE
| VM_PROT_EXECUTE
)) ||
2300 (max_protection
& (VM_PROT_WRITE
| VM_PROT_EXECUTE
))) {
2301 return KERN_PROTECTION_FAILURE
;
2305 if (resilient_media
) {
2307 // assert(!needs_copy);
2308 if (object
!= VM_OBJECT_NULL
&&
2309 !object
->internal
) {
2311 * This mapping is directly backed by an external
2312 * memory manager (e.g. a vnode pager for a file):
2313 * we would not have any safe place to inject
2314 * a zero-filled page if an actual page is not
2315 * available, without possibly impacting the actual
2316 * contents of the mapped object (e.g. the file),
2317 * so we can't provide any media resiliency here.
2319 return KERN_INVALID_ARGUMENT
;
2325 /* submaps can not be purgeable */
2326 return KERN_INVALID_ARGUMENT
;
2328 if (object
== VM_OBJECT_NULL
) {
2329 /* submaps can not be created lazily */
2330 return KERN_INVALID_ARGUMENT
;
2333 if (vmk_flags
.vmkf_already
) {
2335 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2336 * is already present. For it to be meaningul, the requested
2337 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2338 * we shouldn't try and remove what was mapped there first
2339 * (!VM_FLAGS_OVERWRITE).
2341 if ((flags
& VM_FLAGS_ANYWHERE
) ||
2342 (flags
& VM_FLAGS_OVERWRITE
)) {
2343 return KERN_INVALID_ARGUMENT
;
2347 effective_min_offset
= map
->min_offset
;
2349 if (vmk_flags
.vmkf_beyond_max
) {
2351 * Allow an insertion beyond the map's max offset.
2353 #if !defined(__arm__) && !defined(__arm64__)
2354 if (vm_map_is_64bit(map
)) {
2355 effective_max_offset
= 0xFFFFFFFFFFFFF000ULL
;
2357 #endif /* __arm__ */
2358 effective_max_offset
= 0x00000000FFFFF000ULL
;
2360 #if !defined(CONFIG_EMBEDDED)
2361 if (__improbable(vmk_flags
.vmkf_32bit_map_va
)) {
2362 effective_max_offset
= MIN(map
->max_offset
, 0x00000000FFFFF000ULL
);
2364 effective_max_offset
= map
->max_offset
;
2367 effective_max_offset
= map
->max_offset
;
2372 (offset
& PAGE_MASK_64
) != 0) {
2374 return KERN_INVALID_ARGUMENT
;
2377 if (map
->pmap
== kernel_pmap
) {
2378 user_alias
= VM_KERN_MEMORY_NONE
;
2383 if (user_alias
== VM_MEMORY_MALLOC_MEDIUM
) {
2384 chunk_size
= MALLOC_MEDIUM_CHUNK_SIZE
;
2387 #define RETURN(value) { result = value; goto BailOut; }
2389 assert(page_aligned(*address
));
2390 assert(page_aligned(size
));
2392 if (!VM_MAP_PAGE_ALIGNED(size
, VM_MAP_PAGE_MASK(map
))) {
2394 * In most cases, the caller rounds the size up to the
2396 * If we get a size that is explicitly not map-aligned here,
2397 * we'll have to respect the caller's wish and mark the
2398 * mapping as "not map-aligned" to avoid tripping the
2399 * map alignment checks later.
2401 clear_map_aligned
= TRUE
;
2404 !VM_MAP_PAGE_ALIGNED(*address
, VM_MAP_PAGE_MASK(map
))) {
2406 * We've been asked to map at a fixed address and that
2407 * address is not aligned to the map's specific alignment.
2408 * The caller should know what it's doing (i.e. most likely
2409 * mapping some fragmented copy map, transferring memory from
2410 * a VM map with a different alignment), so clear map_aligned
2411 * for this new VM map entry and proceed.
2413 clear_map_aligned
= TRUE
;
2417 * Only zero-fill objects are allowed to be purgable.
2418 * LP64todo - limit purgable objects to 32-bits for now
2422 (object
!= VM_OBJECT_NULL
&&
2423 (object
->vo_size
!= size
||
2424 object
->purgable
== VM_PURGABLE_DENY
))
2425 || size
> ANON_MAX_SIZE
)) { /* LP64todo: remove when dp capable */
2426 return KERN_INVALID_ARGUMENT
;
2429 if (!anywhere
&& overwrite
) {
2431 * Create a temporary VM map to hold the old mappings in the
2432 * affected area while we create the new one.
2433 * This avoids releasing the VM map lock in
2434 * vm_map_entry_delete() and allows atomicity
2435 * when we want to replace some mappings with a new one.
2436 * It also allows us to restore the old VM mappings if the
2437 * new mapping fails.
2439 zap_old_map
= vm_map_create(PMAP_NULL
,
2442 map
->hdr
.entries_pageable
);
2443 vm_map_set_page_shift(zap_old_map
, VM_MAP_PAGE_SHIFT(map
));
2444 vm_map_disable_hole_optimization(zap_old_map
);
2455 if (entry_for_jit
) {
2457 if (map
->jit_entry_exists
) {
2458 result
= KERN_INVALID_ARGUMENT
;
2461 random_address
= TRUE
;
2462 #endif /* CONFIG_EMBEDDED */
2465 if (random_address
) {
2467 * Get a random start address.
2469 result
= vm_map_random_address_for_size(map
, address
, size
);
2470 if (result
!= KERN_SUCCESS
) {
2475 #if !CONFIG_EMBEDDED
2476 else if ((start
== 0 || start
== vm_map_min(map
)) &&
2477 !map
->disable_vmentry_reuse
&&
2478 map
->vmmap_high_start
!= 0) {
2479 start
= map
->vmmap_high_start
;
2485 * Calculate the first possible address.
2488 if (start
< effective_min_offset
) {
2489 start
= effective_min_offset
;
2491 if (start
> effective_max_offset
) {
2492 RETURN(KERN_NO_SPACE
);
2496 * Look for the first possible address;
2497 * if there's already something at this
2498 * address, we have to start after it.
2501 if (map
->disable_vmentry_reuse
== TRUE
) {
2502 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
2504 if (map
->holelistenabled
) {
2505 hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
2507 if (hole_entry
== NULL
) {
2509 * No more space in the map?
2511 result
= KERN_NO_SPACE
;
2514 boolean_t found_hole
= FALSE
;
2517 if (hole_entry
->vme_start
>= start
) {
2518 start
= hole_entry
->vme_start
;
2523 if (hole_entry
->vme_end
> start
) {
2527 hole_entry
= hole_entry
->vme_next
;
2528 } while (hole_entry
!= CAST_TO_VM_MAP_ENTRY(map
->holes_list
));
2530 if (found_hole
== FALSE
) {
2531 result
= KERN_NO_SPACE
;
2538 start
+= PAGE_SIZE_64
;
2542 assert(first_free_is_valid(map
));
2544 entry
= map
->first_free
;
2546 if (entry
== vm_map_to_entry(map
)) {
2549 if (entry
->vme_next
== vm_map_to_entry(map
)) {
2551 * Hole at the end of the map.
2555 if (start
< (entry
->vme_next
)->vme_start
) {
2556 start
= entry
->vme_end
;
2557 start
= vm_map_round_page(start
,
2558 VM_MAP_PAGE_MASK(map
));
2561 * Need to do a lookup.
2568 if (entry
== NULL
) {
2569 vm_map_entry_t tmp_entry
;
2570 if (vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
2571 assert(!entry_for_jit
);
2572 start
= tmp_entry
->vme_end
;
2573 start
= vm_map_round_page(start
,
2574 VM_MAP_PAGE_MASK(map
));
2582 * In any case, the "entry" always precedes
2583 * the proposed new region throughout the
2588 vm_map_entry_t next
;
2591 * Find the end of the proposed new region.
2592 * Be sure we didn't go beyond the end, or
2593 * wrap around the address.
2596 end
= ((start
+ mask
) & ~mask
);
2597 end
= vm_map_round_page(end
,
2598 VM_MAP_PAGE_MASK(map
));
2600 RETURN(KERN_NO_SPACE
);
2603 assert(VM_MAP_PAGE_ALIGNED(start
,
2604 VM_MAP_PAGE_MASK(map
)));
2607 /* We want an entire page of empty space, but don't increase the allocation size. */
2608 desired_empty_end
= vm_map_round_page(end
, VM_MAP_PAGE_MASK(map
));
2610 if ((desired_empty_end
> effective_max_offset
) || (desired_empty_end
< start
)) {
2611 if (map
->wait_for_space
) {
2612 assert(!keep_map_locked
);
2613 if (size
<= (effective_max_offset
-
2614 effective_min_offset
)) {
2615 assert_wait((event_t
)map
,
2619 thread_block(THREAD_CONTINUE_NULL
);
2623 RETURN(KERN_NO_SPACE
);
2626 next
= entry
->vme_next
;
2628 if (map
->holelistenabled
) {
2629 if (entry
->vme_end
>= desired_empty_end
) {
2634 * If there are no more entries, we must win.
2638 * If there is another entry, it must be
2639 * after the end of the potential new region.
2642 if (next
== vm_map_to_entry(map
)) {
2646 if (next
->vme_start
>= desired_empty_end
) {
2652 * Didn't fit -- move to the next entry.
2657 if (map
->holelistenabled
) {
2658 if (entry
== CAST_TO_VM_MAP_ENTRY(map
->holes_list
)) {
2662 result
= KERN_NO_SPACE
;
2665 start
= entry
->vme_start
;
2667 start
= entry
->vme_end
;
2670 start
= vm_map_round_page(start
,
2671 VM_MAP_PAGE_MASK(map
));
2674 if (map
->holelistenabled
) {
2675 if (vm_map_lookup_entry(map
, entry
->vme_start
, &entry
)) {
2676 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry
, (unsigned long long)entry
->vme_start
);
2681 assert(VM_MAP_PAGE_ALIGNED(*address
,
2682 VM_MAP_PAGE_MASK(map
)));
2686 * the address doesn't itself violate
2687 * the mask requirement.
2692 if ((start
& mask
) != 0) {
2693 RETURN(KERN_NO_SPACE
);
2697 * ... the address is within bounds
2702 if ((start
< effective_min_offset
) ||
2703 (end
> effective_max_offset
) ||
2705 RETURN(KERN_INVALID_ADDRESS
);
2708 if (overwrite
&& zap_old_map
!= VM_MAP_NULL
) {
2711 * Fixed mapping and "overwrite" flag: attempt to
2712 * remove all existing mappings in the specified
2713 * address range, saving them in our "zap_old_map".
2715 remove_flags
= VM_MAP_REMOVE_SAVE_ENTRIES
;
2716 remove_flags
|= VM_MAP_REMOVE_NO_MAP_ALIGN
;
2717 if (vmk_flags
.vmkf_overwrite_immutable
) {
2718 /* we can overwrite immutable mappings */
2719 remove_flags
|= VM_MAP_REMOVE_IMMUTABLE
;
2721 (void) vm_map_delete(map
, start
, end
,
2727 * ... the starting address isn't allocated
2730 if (vm_map_lookup_entry(map
, start
, &entry
)) {
2731 if (!(vmk_flags
.vmkf_already
)) {
2732 RETURN(KERN_NO_SPACE
);
2735 * Check if what's already there is what we want.
2738 tmp_offset
= offset
;
2739 if (entry
->vme_start
< start
) {
2740 tmp_start
-= start
- entry
->vme_start
;
2741 tmp_offset
-= start
- entry
->vme_start
;
2743 for (; entry
->vme_start
< end
;
2744 entry
= entry
->vme_next
) {
2746 * Check if the mapping's attributes
2747 * match the existing map entry.
2749 if (entry
== vm_map_to_entry(map
) ||
2750 entry
->vme_start
!= tmp_start
||
2751 entry
->is_sub_map
!= is_submap
||
2752 VME_OFFSET(entry
) != tmp_offset
||
2753 entry
->needs_copy
!= needs_copy
||
2754 entry
->protection
!= cur_protection
||
2755 entry
->max_protection
!= max_protection
||
2756 entry
->inheritance
!= inheritance
||
2757 entry
->iokit_acct
!= iokit_acct
||
2758 VME_ALIAS(entry
) != alias
) {
2759 /* not the same mapping ! */
2760 RETURN(KERN_NO_SPACE
);
2763 * Check if the same object is being mapped.
2766 if (VME_SUBMAP(entry
) !=
2767 (vm_map_t
) object
) {
2768 /* not the same submap */
2769 RETURN(KERN_NO_SPACE
);
2772 if (VME_OBJECT(entry
) != object
) {
2773 /* not the same VM object... */
2776 obj2
= VME_OBJECT(entry
);
2777 if ((obj2
== VM_OBJECT_NULL
||
2779 (object
== VM_OBJECT_NULL
||
2780 object
->internal
)) {
2787 RETURN(KERN_NO_SPACE
);
2792 tmp_offset
+= entry
->vme_end
- entry
->vme_start
;
2793 tmp_start
+= entry
->vme_end
- entry
->vme_start
;
2794 if (entry
->vme_end
>= end
) {
2795 /* reached the end of our mapping */
2799 /* it all matches: let's use what's already there ! */
2800 RETURN(KERN_MEMORY_PRESENT
);
2804 * ... the next region doesn't overlap the
2808 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
2809 (entry
->vme_next
->vme_start
< end
)) {
2810 RETURN(KERN_NO_SPACE
);
2816 * "start" and "end" should define the endpoints of the
2817 * available new range, and
2818 * "entry" should refer to the region before the new
2821 * the map should be locked.
2825 * See whether we can avoid creating a new entry (and object) by
2826 * extending one of our neighbors. [So far, we only attempt to
2827 * extend from below.] Note that we can never extend/join
2828 * purgable objects because they need to remain distinct
2829 * entities in order to implement their "volatile object"
2835 vm_memory_malloc_no_cow(user_alias
)) {
2836 if (object
== VM_OBJECT_NULL
) {
2837 object
= vm_object_allocate(size
);
2838 object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
2839 object
->true_share
= FALSE
;
2842 object
->purgable
= VM_PURGABLE_NONVOLATILE
;
2843 if (map
->pmap
== kernel_pmap
) {
2845 * Purgeable mappings made in a kernel
2846 * map are "owned" by the kernel itself
2847 * rather than the current user task
2848 * because they're likely to be used by
2849 * more than this user task (see
2850 * execargs_purgeable_allocate(), for
2853 owner
= kernel_task
;
2855 owner
= current_task();
2857 assert(object
->vo_owner
== NULL
);
2858 assert(object
->resident_page_count
== 0);
2859 assert(object
->wired_page_count
== 0);
2860 vm_object_lock(object
);
2861 vm_purgeable_nonvolatile_enqueue(object
, owner
);
2862 vm_object_unlock(object
);
2864 offset
= (vm_object_offset_t
)0;
2866 } else if ((is_submap
== FALSE
) &&
2867 (object
== VM_OBJECT_NULL
) &&
2868 (entry
!= vm_map_to_entry(map
)) &&
2869 (entry
->vme_end
== start
) &&
2870 (!entry
->is_shared
) &&
2871 (!entry
->is_sub_map
) &&
2872 (!entry
->in_transition
) &&
2873 (!entry
->needs_wakeup
) &&
2874 (entry
->behavior
== VM_BEHAVIOR_DEFAULT
) &&
2875 (entry
->protection
== cur_protection
) &&
2876 (entry
->max_protection
== max_protection
) &&
2877 (entry
->inheritance
== inheritance
) &&
2878 ((user_alias
== VM_MEMORY_REALLOC
) ||
2879 (VME_ALIAS(entry
) == alias
)) &&
2880 (entry
->no_cache
== no_cache
) &&
2881 (entry
->permanent
== permanent
) &&
2882 /* no coalescing for immutable executable mappings */
2883 !((entry
->protection
& VM_PROT_EXECUTE
) &&
2884 entry
->permanent
) &&
2885 (!entry
->superpage_size
&& !superpage_size
) &&
2887 * No coalescing if not map-aligned, to avoid propagating
2888 * that condition any further than needed:
2890 (!entry
->map_aligned
|| !clear_map_aligned
) &&
2891 (!entry
->zero_wired_pages
) &&
2892 (!entry
->used_for_jit
&& !entry_for_jit
) &&
2893 (!entry
->pmap_cs_associated
) &&
2894 (entry
->iokit_acct
== iokit_acct
) &&
2895 (!entry
->vme_resilient_codesign
) &&
2896 (!entry
->vme_resilient_media
) &&
2897 (!entry
->vme_atomic
) &&
2898 (entry
->vme_no_copy_on_read
== no_copy_on_read
) &&
2900 ((entry
->vme_end
- entry
->vme_start
) + size
<=
2901 (user_alias
== VM_MEMORY_REALLOC
?
2903 NO_COALESCE_LIMIT
)) &&
2905 (entry
->wired_count
== 0)) { /* implies user_wired_count == 0 */
2906 if (vm_object_coalesce(VME_OBJECT(entry
),
2909 (vm_object_offset_t
) 0,
2910 (vm_map_size_t
)(entry
->vme_end
- entry
->vme_start
),
2911 (vm_map_size_t
)(end
- entry
->vme_end
))) {
2913 * Coalesced the two objects - can extend
2914 * the previous map entry to include the
2917 map
->size
+= (end
- entry
->vme_end
);
2918 assert(entry
->vme_start
< end
);
2919 assert(VM_MAP_PAGE_ALIGNED(end
,
2920 VM_MAP_PAGE_MASK(map
)));
2921 if (__improbable(vm_debug_events
)) {
2922 DTRACE_VM5(map_entry_extend
, vm_map_t
, map
, vm_map_entry_t
, entry
, vm_address_t
, entry
->vme_start
, vm_address_t
, entry
->vme_end
, vm_address_t
, end
);
2924 entry
->vme_end
= end
;
2925 if (map
->holelistenabled
) {
2926 vm_map_store_update_first_free(map
, entry
, TRUE
);
2928 vm_map_store_update_first_free(map
, map
->first_free
, TRUE
);
2930 new_mapping_established
= TRUE
;
2931 RETURN(KERN_SUCCESS
);
2935 step
= superpage_size
? SUPERPAGE_SIZE
: (end
- start
);
2938 for (tmp2_start
= start
; tmp2_start
< end
; tmp2_start
+= step
) {
2939 tmp2_end
= tmp2_start
+ step
;
2941 * Create a new entry
2944 * The reserved "page zero" in each process's address space can
2945 * be arbitrarily large. Splitting it into separate objects and
2946 * therefore different VM map entries serves no purpose and just
2947 * slows down operations on the VM map, so let's not split the
2948 * allocation into chunks if the max protection is NONE. That
2949 * memory should never be accessible, so it will never get to the
2952 tmp_start
= tmp2_start
;
2953 if (object
== VM_OBJECT_NULL
&&
2954 size
> chunk_size
&&
2955 max_protection
!= VM_PROT_NONE
&&
2956 superpage_size
== 0) {
2957 tmp_end
= tmp_start
+ chunk_size
;
2962 new_entry
= vm_map_entry_insert(
2963 map
, entry
, tmp_start
, tmp_end
,
2964 object
, offset
, needs_copy
,
2966 cur_protection
, max_protection
,
2967 VM_BEHAVIOR_DEFAULT
,
2968 (entry_for_jit
)? VM_INHERIT_NONE
: inheritance
,
2979 assert((object
!= kernel_object
) || (VM_KERN_MEMORY_NONE
!= alias
));
2981 if (resilient_codesign
&&
2982 !((cur_protection
| max_protection
) &
2983 (VM_PROT_WRITE
| VM_PROT_EXECUTE
))) {
2984 new_entry
->vme_resilient_codesign
= TRUE
;
2987 if (resilient_media
&&
2988 (object
== VM_OBJECT_NULL
||
2989 object
->internal
)) {
2990 new_entry
->vme_resilient_media
= TRUE
;
2993 assert(!new_entry
->iokit_acct
);
2995 object
!= VM_OBJECT_NULL
&&
2996 (object
->purgable
!= VM_PURGABLE_DENY
||
2997 object
->vo_ledger_tag
)) {
2998 assert(new_entry
->use_pmap
);
2999 assert(!new_entry
->iokit_acct
);
3001 * Turn off pmap accounting since
3002 * purgeable (or tagged) objects have their
3005 new_entry
->use_pmap
= FALSE
;
3006 } else if (!is_submap
&&
3008 object
!= VM_OBJECT_NULL
&&
3010 /* alternate accounting */
3011 assert(!new_entry
->iokit_acct
);
3012 assert(new_entry
->use_pmap
);
3013 new_entry
->iokit_acct
= TRUE
;
3014 new_entry
->use_pmap
= FALSE
;
3016 vm_map_iokit_mapped_region
,
3018 vm_map_offset_t
, new_entry
->vme_start
,
3019 vm_map_offset_t
, new_entry
->vme_end
,
3020 int, VME_ALIAS(new_entry
));
3021 vm_map_iokit_mapped_region(
3023 (new_entry
->vme_end
-
3024 new_entry
->vme_start
));
3025 } else if (!is_submap
) {
3026 assert(!new_entry
->iokit_acct
);
3027 assert(new_entry
->use_pmap
);
3032 boolean_t submap_is_64bit
;
3035 assert(new_entry
->is_sub_map
);
3036 assert(!new_entry
->use_pmap
);
3037 assert(!new_entry
->iokit_acct
);
3038 submap
= (vm_map_t
) object
;
3039 submap_is_64bit
= vm_map_is_64bit(submap
);
3040 use_pmap
= vmk_flags
.vmkf_nested_pmap
;
3041 #ifndef NO_NESTED_PMAP
3042 if (use_pmap
&& submap
->pmap
== NULL
) {
3043 ledger_t ledger
= map
->pmap
->ledger
;
3044 /* we need a sub pmap to nest... */
3045 submap
->pmap
= pmap_create_options(ledger
, 0,
3046 submap_is_64bit
? PMAP_CREATE_64BIT
: 0);
3047 if (submap
->pmap
== NULL
) {
3048 /* let's proceed without nesting... */
3050 #if defined(__arm__) || defined(__arm64__)
3052 pmap_set_nested(submap
->pmap
);
3056 if (use_pmap
&& submap
->pmap
!= NULL
) {
3057 kr
= pmap_nest(map
->pmap
,
3061 tmp_end
- tmp_start
);
3062 if (kr
!= KERN_SUCCESS
) {
3063 printf("vm_map_enter: "
3064 "pmap_nest(0x%llx,0x%llx) "
3066 (long long)tmp_start
,
3070 /* we're now nested ! */
3071 new_entry
->use_pmap
= TRUE
;
3075 #endif /* NO_NESTED_PMAP */
3079 if (superpage_size
) {
3081 vm_object_t sp_object
;
3082 vm_object_offset_t sp_offset
;
3084 VME_OFFSET_SET(entry
, 0);
3086 /* allocate one superpage */
3087 kr
= cpm_allocate(SUPERPAGE_SIZE
, &pages
, 0, SUPERPAGE_NBASEPAGES
- 1, TRUE
, 0);
3088 if (kr
!= KERN_SUCCESS
) {
3089 /* deallocate whole range... */
3090 new_mapping_established
= TRUE
;
3091 /* ... but only up to "tmp_end" */
3092 size
-= end
- tmp_end
;
3096 /* create one vm_object per superpage */
3097 sp_object
= vm_object_allocate((vm_map_size_t
)(entry
->vme_end
- entry
->vme_start
));
3098 sp_object
->phys_contiguous
= TRUE
;
3099 sp_object
->vo_shadow_offset
= (vm_object_offset_t
)VM_PAGE_GET_PHYS_PAGE(pages
) * PAGE_SIZE
;
3100 VME_OBJECT_SET(entry
, sp_object
);
3101 assert(entry
->use_pmap
);
3103 /* enter the base pages into the object */
3104 vm_object_lock(sp_object
);
3106 sp_offset
< SUPERPAGE_SIZE
;
3107 sp_offset
+= PAGE_SIZE
) {
3109 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m
));
3110 pages
= NEXT_PAGE(m
);
3111 *(NEXT_PAGE_PTR(m
)) = VM_PAGE_NULL
;
3112 vm_page_insert_wired(m
, sp_object
, sp_offset
, VM_KERN_MEMORY_OSFMK
);
3114 vm_object_unlock(sp_object
);
3116 } while (tmp_end
!= tmp2_end
&&
3117 (tmp_start
= tmp_end
) &&
3118 (tmp_end
= (tmp2_end
- tmp_end
> chunk_size
) ?
3119 tmp_end
+ chunk_size
: tmp2_end
));
3122 new_mapping_established
= TRUE
;
3125 assert(map_locked
== TRUE
);
3127 if (result
== KERN_SUCCESS
) {
3128 vm_prot_t pager_prot
;
3129 memory_object_t pager
;
3133 !(vmk_flags
.vmkf_no_pmap_check
)) {
3134 assert(vm_map_pmap_is_empty(map
,
3141 * For "named" VM objects, let the pager know that the
3142 * memory object is being mapped. Some pagers need to keep
3143 * track of this, to know when they can reclaim the memory
3144 * object, for example.
3145 * VM calls memory_object_map() for each mapping (specifying
3146 * the protection of each mapping) and calls
3147 * memory_object_last_unmap() when all the mappings are gone.
3149 pager_prot
= max_protection
;
3152 * Copy-On-Write mapping: won't modify
3153 * the memory object.
3155 pager_prot
&= ~VM_PROT_WRITE
;
3158 object
!= VM_OBJECT_NULL
&&
3160 object
->pager
!= MEMORY_OBJECT_NULL
) {
3161 vm_object_lock(object
);
3162 pager
= object
->pager
;
3163 if (object
->named
&&
3164 pager
!= MEMORY_OBJECT_NULL
) {
3165 assert(object
->pager_ready
);
3166 vm_object_mapping_wait(object
, THREAD_UNINT
);
3167 vm_object_mapping_begin(object
);
3168 vm_object_unlock(object
);
3170 kr
= memory_object_map(pager
, pager_prot
);
3171 assert(kr
== KERN_SUCCESS
);
3173 vm_object_lock(object
);
3174 vm_object_mapping_end(object
);
3176 vm_object_unlock(object
);
3180 assert(map_locked
== TRUE
);
3182 if (!keep_map_locked
) {
3188 * We can't hold the map lock if we enter this block.
3191 if (result
== KERN_SUCCESS
) {
3192 /* Wire down the new entry if the user
3193 * requested all new map entries be wired.
3195 if ((map
->wiring_required
) || (superpage_size
)) {
3196 assert(!keep_map_locked
);
3197 pmap_empty
= FALSE
; /* pmap won't be empty */
3198 kr
= vm_map_wire_kernel(map
, start
, end
,
3199 new_entry
->protection
, VM_KERN_MEMORY_MLOCK
,
3206 if (result
!= KERN_SUCCESS
) {
3207 if (new_mapping_established
) {
3209 * We have to get rid of the new mappings since we
3210 * won't make them available to the user.
3211 * Try and do that atomically, to minimize the risk
3212 * that someone else create new mappings that range.
3214 zap_new_map
= vm_map_create(PMAP_NULL
,
3217 map
->hdr
.entries_pageable
);
3218 vm_map_set_page_shift(zap_new_map
,
3219 VM_MAP_PAGE_SHIFT(map
));
3220 vm_map_disable_hole_optimization(zap_new_map
);
3226 (void) vm_map_delete(map
, *address
, *address
+ size
,
3227 (VM_MAP_REMOVE_SAVE_ENTRIES
|
3228 VM_MAP_REMOVE_NO_MAP_ALIGN
),
3231 if (zap_old_map
!= VM_MAP_NULL
&&
3232 zap_old_map
->hdr
.nentries
!= 0) {
3233 vm_map_entry_t entry1
, entry2
;
3236 * The new mapping failed. Attempt to restore
3237 * the old mappings, saved in the "zap_old_map".
3244 /* first check if the coast is still clear */
3245 start
= vm_map_first_entry(zap_old_map
)->vme_start
;
3246 end
= vm_map_last_entry(zap_old_map
)->vme_end
;
3247 if (vm_map_lookup_entry(map
, start
, &entry1
) ||
3248 vm_map_lookup_entry(map
, end
, &entry2
) ||
3251 * Part of that range has already been
3252 * re-mapped: we can't restore the old
3255 vm_map_enter_restore_failures
++;
3258 * Transfer the saved map entries from
3259 * "zap_old_map" to the original "map",
3260 * inserting them all after "entry1".
3262 for (entry2
= vm_map_first_entry(zap_old_map
);
3263 entry2
!= vm_map_to_entry(zap_old_map
);
3264 entry2
= vm_map_first_entry(zap_old_map
)) {
3265 vm_map_size_t entry_size
;
3267 entry_size
= (entry2
->vme_end
-
3269 vm_map_store_entry_unlink(zap_old_map
,
3271 zap_old_map
->size
-= entry_size
;
3272 vm_map_store_entry_link(map
, entry1
, entry2
,
3273 VM_MAP_KERNEL_FLAGS_NONE
);
3274 map
->size
+= entry_size
;
3277 if (map
->wiring_required
) {
3279 * XXX TODO: we should rewire the
3283 vm_map_enter_restore_successes
++;
3289 * The caller is responsible for releasing the lock if it requested to
3290 * keep the map locked.
3292 if (map_locked
&& !keep_map_locked
) {
3297 * Get rid of the "zap_maps" and all the map entries that
3298 * they may still contain.
3300 if (zap_old_map
!= VM_MAP_NULL
) {
3301 vm_map_destroy(zap_old_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3302 zap_old_map
= VM_MAP_NULL
;
3304 if (zap_new_map
!= VM_MAP_NULL
) {
3305 vm_map_destroy(zap_new_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3306 zap_new_map
= VM_MAP_NULL
;
3315 extern const struct memory_object_pager_ops fourk_pager_ops
;
3319 vm_map_offset_t
*address
, /* IN/OUT */
3321 vm_map_offset_t mask
,
3323 vm_map_kernel_flags_t vmk_flags
,
3326 vm_object_offset_t offset
,
3327 boolean_t needs_copy
,
3328 vm_prot_t cur_protection
,
3329 vm_prot_t max_protection
,
3330 vm_inherit_t inheritance
)
3332 vm_map_entry_t entry
, new_entry
;
3333 vm_map_offset_t start
, fourk_start
;
3334 vm_map_offset_t end
, fourk_end
;
3335 vm_map_size_t fourk_size
;
3336 kern_return_t result
= KERN_SUCCESS
;
3337 vm_map_t zap_old_map
= VM_MAP_NULL
;
3338 vm_map_t zap_new_map
= VM_MAP_NULL
;
3339 boolean_t map_locked
= FALSE
;
3340 boolean_t pmap_empty
= TRUE
;
3341 boolean_t new_mapping_established
= FALSE
;
3342 boolean_t keep_map_locked
= vmk_flags
.vmkf_keep_map_locked
;
3343 boolean_t anywhere
= ((flags
& VM_FLAGS_ANYWHERE
) != 0);
3344 boolean_t purgable
= ((flags
& VM_FLAGS_PURGABLE
) != 0);
3345 boolean_t overwrite
= ((flags
& VM_FLAGS_OVERWRITE
) != 0);
3346 boolean_t no_cache
= ((flags
& VM_FLAGS_NO_CACHE
) != 0);
3347 boolean_t is_submap
= vmk_flags
.vmkf_submap
;
3348 boolean_t permanent
= vmk_flags
.vmkf_permanent
;
3349 boolean_t no_copy_on_read
= vmk_flags
.vmkf_permanent
;
3350 boolean_t entry_for_jit
= vmk_flags
.vmkf_map_jit
;
3351 // boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3352 unsigned int superpage_size
= ((flags
& VM_FLAGS_SUPERPAGE_MASK
) >> VM_FLAGS_SUPERPAGE_SHIFT
);
3353 vm_map_offset_t effective_min_offset
, effective_max_offset
;
3355 boolean_t clear_map_aligned
= FALSE
;
3356 memory_object_t fourk_mem_obj
;
3357 vm_object_t fourk_object
;
3358 vm_map_offset_t fourk_pager_offset
;
3359 int fourk_pager_index_start
, fourk_pager_index_num
;
3361 boolean_t fourk_copy
;
3362 vm_object_t copy_object
;
3363 vm_object_offset_t copy_offset
;
3365 fourk_mem_obj
= MEMORY_OBJECT_NULL
;
3366 fourk_object
= VM_OBJECT_NULL
;
3368 if (superpage_size
) {
3369 return KERN_NOT_SUPPORTED
;
3372 if ((cur_protection
& VM_PROT_WRITE
) &&
3373 (cur_protection
& VM_PROT_EXECUTE
) &&
3374 #if !CONFIG_EMBEDDED
3375 map
!= kernel_map
&&
3376 cs_process_enforcement(NULL
) &&
3377 #endif /* !CONFIG_EMBEDDED */
3382 vm_prot_t
, cur_protection
);
3383 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3384 "turning off execute\n",
3386 (current_task()->bsd_info
3387 ? proc_name_address(current_task()->bsd_info
)
3390 cur_protection
&= ~VM_PROT_EXECUTE
;
3394 * If the task has requested executable lockdown,
3395 * deny any new executable mapping.
3397 if (map
->map_disallow_new_exec
== TRUE
) {
3398 if (cur_protection
& VM_PROT_EXECUTE
) {
3399 return KERN_PROTECTION_FAILURE
;
3404 return KERN_NOT_SUPPORTED
;
3406 if (vmk_flags
.vmkf_already
) {
3407 return KERN_NOT_SUPPORTED
;
3409 if (purgable
|| entry_for_jit
) {
3410 return KERN_NOT_SUPPORTED
;
3413 effective_min_offset
= map
->min_offset
;
3415 if (vmk_flags
.vmkf_beyond_max
) {
3416 return KERN_NOT_SUPPORTED
;
3418 effective_max_offset
= map
->max_offset
;
3422 (offset
& FOURK_PAGE_MASK
) != 0) {
3424 return KERN_INVALID_ARGUMENT
;
3427 #define RETURN(value) { result = value; goto BailOut; }
3429 assert(VM_MAP_PAGE_ALIGNED(*address
, FOURK_PAGE_MASK
));
3430 assert(VM_MAP_PAGE_ALIGNED(size
, FOURK_PAGE_MASK
));
3432 if (!anywhere
&& overwrite
) {
3433 return KERN_NOT_SUPPORTED
;
3435 if (!anywhere
&& overwrite
) {
3437 * Create a temporary VM map to hold the old mappings in the
3438 * affected area while we create the new one.
3439 * This avoids releasing the VM map lock in
3440 * vm_map_entry_delete() and allows atomicity
3441 * when we want to replace some mappings with a new one.
3442 * It also allows us to restore the old VM mappings if the
3443 * new mapping fails.
3445 zap_old_map
= vm_map_create(PMAP_NULL
,
3448 map
->hdr
.entries_pageable
);
3449 vm_map_set_page_shift(zap_old_map
, VM_MAP_PAGE_SHIFT(map
));
3450 vm_map_disable_hole_optimization(zap_old_map
);
3453 fourk_start
= *address
;
3455 fourk_end
= fourk_start
+ fourk_size
;
3457 start
= vm_map_trunc_page(*address
, VM_MAP_PAGE_MASK(map
));
3458 end
= vm_map_round_page(fourk_end
, VM_MAP_PAGE_MASK(map
));
3462 return KERN_NOT_SUPPORTED
;
3466 * the address doesn't itself violate
3467 * the mask requirement.
3472 if ((start
& mask
) != 0) {
3473 RETURN(KERN_NO_SPACE
);
3477 * ... the address is within bounds
3482 if ((start
< effective_min_offset
) ||
3483 (end
> effective_max_offset
) ||
3485 RETURN(KERN_INVALID_ADDRESS
);
3488 if (overwrite
&& zap_old_map
!= VM_MAP_NULL
) {
3490 * Fixed mapping and "overwrite" flag: attempt to
3491 * remove all existing mappings in the specified
3492 * address range, saving them in our "zap_old_map".
3494 (void) vm_map_delete(map
, start
, end
,
3495 (VM_MAP_REMOVE_SAVE_ENTRIES
|
3496 VM_MAP_REMOVE_NO_MAP_ALIGN
),
3501 * ... the starting address isn't allocated
3503 if (vm_map_lookup_entry(map
, start
, &entry
)) {
3504 vm_object_t cur_object
, shadow_object
;
3507 * We might already some 4K mappings
3508 * in a 16K page here.
3511 if (entry
->vme_end
- entry
->vme_start
3512 != SIXTEENK_PAGE_SIZE
) {
3513 RETURN(KERN_NO_SPACE
);
3515 if (entry
->is_sub_map
) {
3516 RETURN(KERN_NO_SPACE
);
3518 if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
3519 RETURN(KERN_NO_SPACE
);
3522 /* go all the way down the shadow chain */
3523 cur_object
= VME_OBJECT(entry
);
3524 vm_object_lock(cur_object
);
3525 while (cur_object
->shadow
!= VM_OBJECT_NULL
) {
3526 shadow_object
= cur_object
->shadow
;
3527 vm_object_lock(shadow_object
);
3528 vm_object_unlock(cur_object
);
3529 cur_object
= shadow_object
;
3530 shadow_object
= VM_OBJECT_NULL
;
3532 if (cur_object
->internal
||
3533 cur_object
->pager
== NULL
) {
3534 vm_object_unlock(cur_object
);
3535 RETURN(KERN_NO_SPACE
);
3537 if (cur_object
->pager
->mo_pager_ops
3538 != &fourk_pager_ops
) {
3539 vm_object_unlock(cur_object
);
3540 RETURN(KERN_NO_SPACE
);
3542 fourk_object
= cur_object
;
3543 fourk_mem_obj
= fourk_object
->pager
;
3545 /* keep the "4K" object alive */
3546 vm_object_reference_locked(fourk_object
);
3547 vm_object_unlock(fourk_object
);
3549 /* merge permissions */
3550 entry
->protection
|= cur_protection
;
3551 entry
->max_protection
|= max_protection
;
3552 if ((entry
->protection
& (VM_PROT_WRITE
|
3553 VM_PROT_EXECUTE
)) ==
3554 (VM_PROT_WRITE
| VM_PROT_EXECUTE
) &&
3555 fourk_binary_compatibility_unsafe
&&
3556 fourk_binary_compatibility_allow_wx
) {
3557 /* write+execute: need to be "jit" */
3558 entry
->used_for_jit
= TRUE
;
3561 goto map_in_fourk_pager
;
3565 * ... the next region doesn't overlap the
3569 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
3570 (entry
->vme_next
->vme_start
< end
)) {
3571 RETURN(KERN_NO_SPACE
);
3577 * "start" and "end" should define the endpoints of the
3578 * available new range, and
3579 * "entry" should refer to the region before the new
3582 * the map should be locked.
3585 /* create a new "4K" pager */
3586 fourk_mem_obj
= fourk_pager_create();
3587 fourk_object
= fourk_pager_to_vm_object(fourk_mem_obj
);
3588 assert(fourk_object
);
3590 /* keep the "4" object alive */
3591 vm_object_reference(fourk_object
);
3593 /* create a "copy" object, to map the "4K" object copy-on-write */
3595 result
= vm_object_copy_strategically(fourk_object
,
3601 assert(result
== KERN_SUCCESS
);
3602 assert(copy_object
!= VM_OBJECT_NULL
);
3603 assert(copy_offset
== 0);
3605 /* take a reference on the copy object, for this mapping */
3606 vm_object_reference(copy_object
);
3608 /* map the "4K" pager's copy object */
3610 vm_map_entry_insert(map
, entry
,
3611 vm_map_trunc_page(start
,
3612 VM_MAP_PAGE_MASK(map
)),
3613 vm_map_round_page(end
,
3614 VM_MAP_PAGE_MASK(map
)),
3617 FALSE
, /* needs_copy */
3620 cur_protection
, max_protection
,
3621 VM_BEHAVIOR_DEFAULT
,
3636 #if VM_MAP_DEBUG_FOURK
3637 if (vm_map_debug_fourk
) {
3638 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3640 (uint64_t) entry
->vme_start
,
3641 (uint64_t) entry
->vme_end
,
3644 #endif /* VM_MAP_DEBUG_FOURK */
3646 new_mapping_established
= TRUE
;
3649 /* "map" the original "object" where it belongs in the "4K" pager */
3650 fourk_pager_offset
= (fourk_start
& SIXTEENK_PAGE_MASK
);
3651 fourk_pager_index_start
= (int) (fourk_pager_offset
/ FOURK_PAGE_SIZE
);
3652 if (fourk_size
> SIXTEENK_PAGE_SIZE
) {
3653 fourk_pager_index_num
= 4;
3655 fourk_pager_index_num
= (int) (fourk_size
/ FOURK_PAGE_SIZE
);
3657 if (fourk_pager_index_start
+ fourk_pager_index_num
> 4) {
3658 fourk_pager_index_num
= 4 - fourk_pager_index_start
;
3661 cur_idx
< fourk_pager_index_num
;
3663 vm_object_t old_object
;
3664 vm_object_offset_t old_offset
;
3666 kr
= fourk_pager_populate(fourk_mem_obj
,
3667 TRUE
, /* overwrite */
3668 fourk_pager_index_start
+ cur_idx
,
3672 (cur_idx
* FOURK_PAGE_SIZE
))
3676 #if VM_MAP_DEBUG_FOURK
3677 if (vm_map_debug_fourk
) {
3678 if (old_object
== (vm_object_t
) -1 &&
3679 old_offset
== (vm_object_offset_t
) -1) {
3680 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3681 "pager [%p:0x%llx] "
3683 "[object:%p,offset:0x%llx]\n",
3685 (uint64_t) entry
->vme_start
,
3686 (uint64_t) entry
->vme_end
,
3689 fourk_pager_index_start
+ cur_idx
,
3692 ? (offset
+ (cur_idx
* FOURK_PAGE_SIZE
))
3695 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3696 "pager [%p:0x%llx] "
3697 "populate[%d] [object:%p,offset:0x%llx] "
3698 "old [%p:0x%llx]\n",
3700 (uint64_t) entry
->vme_start
,
3701 (uint64_t) entry
->vme_end
,
3704 fourk_pager_index_start
+ cur_idx
,
3707 ? (offset
+ (cur_idx
* FOURK_PAGE_SIZE
))
3713 #endif /* VM_MAP_DEBUG_FOURK */
3715 assert(kr
== KERN_SUCCESS
);
3716 if (object
!= old_object
&&
3717 object
!= VM_OBJECT_NULL
&&
3718 object
!= (vm_object_t
) -1) {
3719 vm_object_reference(object
);
3721 if (object
!= old_object
&&
3722 old_object
!= VM_OBJECT_NULL
&&
3723 old_object
!= (vm_object_t
) -1) {
3724 vm_object_deallocate(old_object
);
3729 assert(map_locked
== TRUE
);
3731 if (fourk_object
!= VM_OBJECT_NULL
) {
3732 vm_object_deallocate(fourk_object
);
3733 fourk_object
= VM_OBJECT_NULL
;
3734 fourk_mem_obj
= MEMORY_OBJECT_NULL
;
3737 if (result
== KERN_SUCCESS
) {
3738 vm_prot_t pager_prot
;
3739 memory_object_t pager
;
3743 !(vmk_flags
.vmkf_no_pmap_check
)) {
3744 assert(vm_map_pmap_is_empty(map
,
3751 * For "named" VM objects, let the pager know that the
3752 * memory object is being mapped. Some pagers need to keep
3753 * track of this, to know when they can reclaim the memory
3754 * object, for example.
3755 * VM calls memory_object_map() for each mapping (specifying
3756 * the protection of each mapping) and calls
3757 * memory_object_last_unmap() when all the mappings are gone.
3759 pager_prot
= max_protection
;
3762 * Copy-On-Write mapping: won't modify
3763 * the memory object.
3765 pager_prot
&= ~VM_PROT_WRITE
;
3768 object
!= VM_OBJECT_NULL
&&
3770 object
->pager
!= MEMORY_OBJECT_NULL
) {
3771 vm_object_lock(object
);
3772 pager
= object
->pager
;
3773 if (object
->named
&&
3774 pager
!= MEMORY_OBJECT_NULL
) {
3775 assert(object
->pager_ready
);
3776 vm_object_mapping_wait(object
, THREAD_UNINT
);
3777 vm_object_mapping_begin(object
);
3778 vm_object_unlock(object
);
3780 kr
= memory_object_map(pager
, pager_prot
);
3781 assert(kr
== KERN_SUCCESS
);
3783 vm_object_lock(object
);
3784 vm_object_mapping_end(object
);
3786 vm_object_unlock(object
);
3789 fourk_object
!= VM_OBJECT_NULL
&&
3790 fourk_object
->named
&&
3791 fourk_object
->pager
!= MEMORY_OBJECT_NULL
) {
3792 vm_object_lock(fourk_object
);
3793 pager
= fourk_object
->pager
;
3794 if (fourk_object
->named
&&
3795 pager
!= MEMORY_OBJECT_NULL
) {
3796 assert(fourk_object
->pager_ready
);
3797 vm_object_mapping_wait(fourk_object
,
3799 vm_object_mapping_begin(fourk_object
);
3800 vm_object_unlock(fourk_object
);
3802 kr
= memory_object_map(pager
, VM_PROT_READ
);
3803 assert(kr
== KERN_SUCCESS
);
3805 vm_object_lock(fourk_object
);
3806 vm_object_mapping_end(fourk_object
);
3808 vm_object_unlock(fourk_object
);
3812 assert(map_locked
== TRUE
);
3814 if (!keep_map_locked
) {
3820 * We can't hold the map lock if we enter this block.
3823 if (result
== KERN_SUCCESS
) {
3824 /* Wire down the new entry if the user
3825 * requested all new map entries be wired.
3827 if ((map
->wiring_required
) || (superpage_size
)) {
3828 assert(!keep_map_locked
);
3829 pmap_empty
= FALSE
; /* pmap won't be empty */
3830 kr
= vm_map_wire_kernel(map
, start
, end
,
3831 new_entry
->protection
, VM_KERN_MEMORY_MLOCK
,
3838 if (result
!= KERN_SUCCESS
) {
3839 if (new_mapping_established
) {
3841 * We have to get rid of the new mappings since we
3842 * won't make them available to the user.
3843 * Try and do that atomically, to minimize the risk
3844 * that someone else create new mappings that range.
3846 zap_new_map
= vm_map_create(PMAP_NULL
,
3849 map
->hdr
.entries_pageable
);
3850 vm_map_set_page_shift(zap_new_map
,
3851 VM_MAP_PAGE_SHIFT(map
));
3852 vm_map_disable_hole_optimization(zap_new_map
);
3858 (void) vm_map_delete(map
, *address
, *address
+ size
,
3859 (VM_MAP_REMOVE_SAVE_ENTRIES
|
3860 VM_MAP_REMOVE_NO_MAP_ALIGN
),
3863 if (zap_old_map
!= VM_MAP_NULL
&&
3864 zap_old_map
->hdr
.nentries
!= 0) {
3865 vm_map_entry_t entry1
, entry2
;
3868 * The new mapping failed. Attempt to restore
3869 * the old mappings, saved in the "zap_old_map".
3876 /* first check if the coast is still clear */
3877 start
= vm_map_first_entry(zap_old_map
)->vme_start
;
3878 end
= vm_map_last_entry(zap_old_map
)->vme_end
;
3879 if (vm_map_lookup_entry(map
, start
, &entry1
) ||
3880 vm_map_lookup_entry(map
, end
, &entry2
) ||
3883 * Part of that range has already been
3884 * re-mapped: we can't restore the old
3887 vm_map_enter_restore_failures
++;
3890 * Transfer the saved map entries from
3891 * "zap_old_map" to the original "map",
3892 * inserting them all after "entry1".
3894 for (entry2
= vm_map_first_entry(zap_old_map
);
3895 entry2
!= vm_map_to_entry(zap_old_map
);
3896 entry2
= vm_map_first_entry(zap_old_map
)) {
3897 vm_map_size_t entry_size
;
3899 entry_size
= (entry2
->vme_end
-
3901 vm_map_store_entry_unlink(zap_old_map
,
3903 zap_old_map
->size
-= entry_size
;
3904 vm_map_store_entry_link(map
, entry1
, entry2
,
3905 VM_MAP_KERNEL_FLAGS_NONE
);
3906 map
->size
+= entry_size
;
3909 if (map
->wiring_required
) {
3911 * XXX TODO: we should rewire the
3915 vm_map_enter_restore_successes
++;
3921 * The caller is responsible for releasing the lock if it requested to
3922 * keep the map locked.
3924 if (map_locked
&& !keep_map_locked
) {
3929 * Get rid of the "zap_maps" and all the map entries that
3930 * they may still contain.
3932 if (zap_old_map
!= VM_MAP_NULL
) {
3933 vm_map_destroy(zap_old_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3934 zap_old_map
= VM_MAP_NULL
;
3936 if (zap_new_map
!= VM_MAP_NULL
) {
3937 vm_map_destroy(zap_new_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3938 zap_new_map
= VM_MAP_NULL
;
3945 #endif /* __arm64__ */
3948 * Counters for the prefault optimization.
3950 int64_t vm_prefault_nb_pages
= 0;
3951 int64_t vm_prefault_nb_bailout
= 0;
3953 static kern_return_t
3954 vm_map_enter_mem_object_helper(
3955 vm_map_t target_map
,
3956 vm_map_offset_t
*address
,
3957 vm_map_size_t initial_size
,
3958 vm_map_offset_t mask
,
3960 vm_map_kernel_flags_t vmk_flags
,
3963 vm_object_offset_t offset
,
3965 vm_prot_t cur_protection
,
3966 vm_prot_t max_protection
,
3967 vm_inherit_t inheritance
,
3968 upl_page_list_ptr_t page_list
,
3969 unsigned int page_list_count
)
3971 vm_map_address_t map_addr
;
3972 vm_map_size_t map_size
;
3974 vm_object_size_t size
;
3975 kern_return_t result
;
3976 boolean_t mask_cur_protection
, mask_max_protection
;
3977 boolean_t kernel_prefault
, try_prefault
= (page_list_count
!= 0);
3978 vm_map_offset_t offset_in_mapping
= 0;
3980 boolean_t fourk
= vmk_flags
.vmkf_fourk
;
3981 #endif /* __arm64__ */
3983 assertf(vmk_flags
.__vmkf_unused
== 0, "vmk_flags unused=0x%x\n", vmk_flags
.__vmkf_unused
);
3985 mask_cur_protection
= cur_protection
& VM_PROT_IS_MASK
;
3986 mask_max_protection
= max_protection
& VM_PROT_IS_MASK
;
3987 cur_protection
&= ~VM_PROT_IS_MASK
;
3988 max_protection
&= ~VM_PROT_IS_MASK
;
3991 * Check arguments for validity
3993 if ((target_map
== VM_MAP_NULL
) ||
3994 (cur_protection
& ~VM_PROT_ALL
) ||
3995 (max_protection
& ~VM_PROT_ALL
) ||
3996 (inheritance
> VM_INHERIT_LAST_VALID
) ||
3997 (try_prefault
&& (copy
|| !page_list
)) ||
3998 initial_size
== 0) {
3999 return KERN_INVALID_ARGUMENT
;
4004 map_addr
= vm_map_trunc_page(*address
, FOURK_PAGE_MASK
);
4005 map_size
= vm_map_round_page(initial_size
, FOURK_PAGE_MASK
);
4007 #endif /* __arm64__ */
4009 map_addr
= vm_map_trunc_page(*address
,
4010 VM_MAP_PAGE_MASK(target_map
));
4011 map_size
= vm_map_round_page(initial_size
,
4012 VM_MAP_PAGE_MASK(target_map
));
4014 size
= vm_object_round_page(initial_size
);
4017 * Find the vm object (if any) corresponding to this port.
4019 if (!IP_VALID(port
)) {
4020 object
= VM_OBJECT_NULL
;
4023 } else if (ip_kotype(port
) == IKOT_NAMED_ENTRY
) {
4024 vm_named_entry_t named_entry
;
4026 named_entry
= (vm_named_entry_t
) port
->ip_kobject
;
4028 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4029 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4030 offset
+= named_entry
->data_offset
;
4033 /* a few checks to make sure user is obeying rules */
4035 if (offset
>= named_entry
->size
) {
4036 return KERN_INVALID_RIGHT
;
4038 size
= named_entry
->size
- offset
;
4040 if (mask_max_protection
) {
4041 max_protection
&= named_entry
->protection
;
4043 if (mask_cur_protection
) {
4044 cur_protection
&= named_entry
->protection
;
4046 if ((named_entry
->protection
& max_protection
) !=
4048 return KERN_INVALID_RIGHT
;
4050 if ((named_entry
->protection
& cur_protection
) !=
4052 return KERN_INVALID_RIGHT
;
4054 if (offset
+ size
< offset
) {
4056 return KERN_INVALID_ARGUMENT
;
4058 if (named_entry
->size
< (offset
+ initial_size
)) {
4059 return KERN_INVALID_ARGUMENT
;
4062 if (named_entry
->is_copy
) {
4063 /* for a vm_map_copy, we can only map it whole */
4064 if ((size
!= named_entry
->size
) &&
4065 (vm_map_round_page(size
,
4066 VM_MAP_PAGE_MASK(target_map
)) ==
4067 named_entry
->size
)) {
4068 /* XXX FBDP use the rounded size... */
4069 size
= vm_map_round_page(
4071 VM_MAP_PAGE_MASK(target_map
));
4074 if (!(flags
& VM_FLAGS_ANYWHERE
) &&
4076 size
!= named_entry
->size
)) {
4078 * XXX for a mapping at a "fixed" address,
4079 * we can't trim after mapping the whole
4080 * memory entry, so reject a request for a
4083 return KERN_INVALID_ARGUMENT
;
4087 /* the callers parameter offset is defined to be the */
4088 /* offset from beginning of named entry offset in object */
4089 offset
= offset
+ named_entry
->offset
;
4091 if (!VM_MAP_PAGE_ALIGNED(size
,
4092 VM_MAP_PAGE_MASK(target_map
))) {
4094 * Let's not map more than requested;
4095 * vm_map_enter() will handle this "not map-aligned"
4101 named_entry_lock(named_entry
);
4102 if (named_entry
->is_sub_map
) {
4105 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4106 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4107 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4110 submap
= named_entry
->backing
.map
;
4111 vm_map_lock(submap
);
4112 vm_map_reference(submap
);
4113 vm_map_unlock(submap
);
4114 named_entry_unlock(named_entry
);
4116 vmk_flags
.vmkf_submap
= TRUE
;
4118 result
= vm_map_enter(target_map
,
4125 (vm_object_t
)(uintptr_t) submap
,
4131 if (result
!= KERN_SUCCESS
) {
4132 vm_map_deallocate(submap
);
4135 * No need to lock "submap" just to check its
4136 * "mapped" flag: that flag is never reset
4137 * once it's been set and if we race, we'll
4138 * just end up setting it twice, which is OK.
4140 if (submap
->mapped_in_other_pmaps
== FALSE
&&
4141 vm_map_pmap(submap
) != PMAP_NULL
&&
4142 vm_map_pmap(submap
) !=
4143 vm_map_pmap(target_map
)) {
4145 * This submap is being mapped in a map
4146 * that uses a different pmap.
4147 * Set its "mapped_in_other_pmaps" flag
4148 * to indicate that we now need to
4149 * remove mappings from all pmaps rather
4150 * than just the submap's pmap.
4152 vm_map_lock(submap
);
4153 submap
->mapped_in_other_pmaps
= TRUE
;
4154 vm_map_unlock(submap
);
4156 *address
= map_addr
;
4159 } else if (named_entry
->is_copy
) {
4161 vm_map_copy_t copy_map
;
4162 vm_map_entry_t copy_entry
;
4163 vm_map_offset_t copy_addr
;
4165 if (flags
& ~(VM_FLAGS_FIXED
|
4167 VM_FLAGS_OVERWRITE
|
4168 VM_FLAGS_RETURN_4K_DATA_ADDR
|
4169 VM_FLAGS_RETURN_DATA_ADDR
|
4170 VM_FLAGS_ALIAS_MASK
)) {
4171 named_entry_unlock(named_entry
);
4172 return KERN_INVALID_ARGUMENT
;
4175 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4176 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4177 offset_in_mapping
= offset
- vm_object_trunc_page(offset
);
4178 if (flags
& VM_FLAGS_RETURN_4K_DATA_ADDR
) {
4179 offset_in_mapping
&= ~((signed)(0xFFF));
4181 offset
= vm_object_trunc_page(offset
);
4182 map_size
= vm_object_round_page(offset
+ offset_in_mapping
+ initial_size
) - offset
;
4185 copy_map
= named_entry
->backing
.copy
;
4186 assert(copy_map
->type
== VM_MAP_COPY_ENTRY_LIST
);
4187 if (copy_map
->type
!= VM_MAP_COPY_ENTRY_LIST
) {
4188 /* unsupported type; should not happen */
4189 printf("vm_map_enter_mem_object: "
4190 "memory_entry->backing.copy "
4191 "unsupported type 0x%x\n",
4193 named_entry_unlock(named_entry
);
4194 return KERN_INVALID_ARGUMENT
;
4197 /* reserve a contiguous range */
4198 kr
= vm_map_enter(target_map
,
4200 /* map whole mem entry, trim later: */
4203 flags
& (VM_FLAGS_ANYWHERE
|
4204 VM_FLAGS_OVERWRITE
|
4205 VM_FLAGS_RETURN_4K_DATA_ADDR
|
4206 VM_FLAGS_RETURN_DATA_ADDR
),
4215 if (kr
!= KERN_SUCCESS
) {
4216 named_entry_unlock(named_entry
);
4220 copy_addr
= map_addr
;
4222 for (copy_entry
= vm_map_copy_first_entry(copy_map
);
4223 copy_entry
!= vm_map_copy_to_entry(copy_map
);
4224 copy_entry
= copy_entry
->vme_next
) {
4226 vm_map_kernel_flags_t vmk_remap_flags
;
4227 vm_map_t copy_submap
;
4228 vm_object_t copy_object
;
4229 vm_map_size_t copy_size
;
4230 vm_object_offset_t copy_offset
;
4234 vmk_remap_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
4236 copy_object
= VME_OBJECT(copy_entry
);
4237 copy_offset
= VME_OFFSET(copy_entry
);
4238 copy_size
= (copy_entry
->vme_end
-
4239 copy_entry
->vme_start
);
4240 VM_GET_FLAGS_ALIAS(flags
, copy_vm_alias
);
4241 if (copy_vm_alias
== 0) {
4243 * Caller does not want a specific
4244 * alias for this new mapping: use
4245 * the alias of the original mapping.
4247 copy_vm_alias
= VME_ALIAS(copy_entry
);
4251 if ((copy_addr
+ copy_size
) >
4253 named_entry
->size
/* XXX full size */)) {
4254 /* over-mapping too much !? */
4255 kr
= KERN_INVALID_ARGUMENT
;
4260 /* take a reference on the object */
4261 if (copy_entry
->is_sub_map
) {
4262 vmk_remap_flags
.vmkf_submap
= TRUE
;
4263 copy_submap
= VME_SUBMAP(copy_entry
);
4264 vm_map_lock(copy_submap
);
4265 vm_map_reference(copy_submap
);
4266 vm_map_unlock(copy_submap
);
4267 copy_object
= (vm_object_t
)(uintptr_t) copy_submap
;
4269 copy_object
!= VM_OBJECT_NULL
&&
4270 (copy_entry
->needs_copy
||
4271 copy_object
->shadowed
||
4272 (!copy_object
->true_share
&&
4273 !copy_entry
->is_shared
&&
4274 copy_object
->vo_size
> copy_size
))) {
4276 * We need to resolve our side of this
4277 * "symmetric" copy-on-write now; we
4278 * need a new object to map and share,
4279 * instead of the current one which
4280 * might still be shared with the
4283 * Note: A "vm_map_copy_t" does not
4284 * have a lock but we're protected by
4285 * the named entry's lock here.
4287 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4288 VME_OBJECT_SHADOW(copy_entry
, copy_size
);
4289 if (!copy_entry
->needs_copy
&&
4290 copy_entry
->protection
& VM_PROT_WRITE
) {
4293 prot
= copy_entry
->protection
& ~VM_PROT_WRITE
;
4294 vm_object_pmap_protect(copy_object
,
4302 copy_entry
->needs_copy
= FALSE
;
4303 copy_entry
->is_shared
= TRUE
;
4304 copy_object
= VME_OBJECT(copy_entry
);
4305 copy_offset
= VME_OFFSET(copy_entry
);
4306 vm_object_lock(copy_object
);
4307 vm_object_reference_locked(copy_object
);
4308 if (copy_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
) {
4309 /* we're about to make a shared mapping of this object */
4310 copy_object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
4311 copy_object
->true_share
= TRUE
;
4313 vm_object_unlock(copy_object
);
4316 * We already have the right object
4319 copy_object
= VME_OBJECT(copy_entry
);
4320 vm_object_reference(copy_object
);
4323 /* over-map the object into destination */
4324 remap_flags
|= flags
;
4325 remap_flags
|= VM_FLAGS_FIXED
;
4326 remap_flags
|= VM_FLAGS_OVERWRITE
;
4327 remap_flags
&= ~VM_FLAGS_ANYWHERE
;
4328 if (!copy
&& !copy_entry
->is_sub_map
) {
4330 * copy-on-write should have been
4331 * resolved at this point, or we would
4332 * end up sharing instead of copying.
4334 assert(!copy_entry
->needs_copy
);
4336 #if !CONFIG_EMBEDDED
4337 if (copy_entry
->used_for_jit
) {
4338 vmk_remap_flags
.vmkf_map_jit
= TRUE
;
4340 #endif /* !CONFIG_EMBEDDED */
4341 kr
= vm_map_enter(target_map
,
4344 (vm_map_offset_t
) 0,
4350 ((copy_object
== NULL
) ? FALSE
: copy
),
4354 if (kr
!= KERN_SUCCESS
) {
4355 if (copy_entry
->is_sub_map
) {
4356 vm_map_deallocate(copy_submap
);
4358 vm_object_deallocate(copy_object
);
4365 copy_addr
+= copy_size
;
4368 if (kr
== KERN_SUCCESS
) {
4369 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4370 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4371 *address
= map_addr
+ offset_in_mapping
;
4373 *address
= map_addr
;
4378 * Trim in front, from 0 to "offset".
4380 vm_map_remove(target_map
,
4383 VM_MAP_REMOVE_NO_FLAGS
);
4386 if (offset
+ map_size
< named_entry
->size
) {
4388 * Trim in back, from
4389 * "offset + map_size" to
4390 * "named_entry->size".
4392 vm_map_remove(target_map
,
4397 VM_MAP_REMOVE_NO_FLAGS
);
4400 named_entry_unlock(named_entry
);
4402 if (kr
!= KERN_SUCCESS
) {
4403 if (!(flags
& VM_FLAGS_OVERWRITE
)) {
4404 /* deallocate the contiguous range */
4405 (void) vm_deallocate(target_map
,
4413 unsigned int access
;
4414 vm_prot_t protections
;
4415 unsigned int wimg_mode
;
4417 /* we are mapping a VM object */
4419 protections
= named_entry
->protection
& VM_PROT_ALL
;
4420 access
= GET_MAP_MEM(named_entry
->protection
);
4422 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4423 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4424 offset_in_mapping
= offset
- vm_object_trunc_page(offset
);
4425 if (flags
& VM_FLAGS_RETURN_4K_DATA_ADDR
) {
4426 offset_in_mapping
&= ~((signed)(0xFFF));
4428 offset
= vm_object_trunc_page(offset
);
4429 map_size
= vm_object_round_page(offset
+ offset_in_mapping
+ initial_size
) - offset
;
4432 object
= named_entry
->backing
.object
;
4433 assert(object
!= VM_OBJECT_NULL
);
4434 vm_object_lock(object
);
4435 named_entry_unlock(named_entry
);
4437 vm_object_reference_locked(object
);
4439 wimg_mode
= object
->wimg_bits
;
4440 vm_prot_to_wimg(access
, &wimg_mode
);
4441 if (object
->wimg_bits
!= wimg_mode
) {
4442 vm_object_change_wimg_mode(object
, wimg_mode
);
4445 vm_object_unlock(object
);
4447 } else if (ip_kotype(port
) == IKOT_MEMORY_OBJECT
) {
4449 * JMM - This is temporary until we unify named entries
4450 * and raw memory objects.
4452 * Detected fake ip_kotype for a memory object. In
4453 * this case, the port isn't really a port at all, but
4454 * instead is just a raw memory object.
4456 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4457 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4458 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4461 object
= memory_object_to_vm_object((memory_object_t
)port
);
4462 if (object
== VM_OBJECT_NULL
) {
4463 return KERN_INVALID_OBJECT
;
4465 vm_object_reference(object
);
4467 /* wait for object (if any) to be ready */
4468 if (object
!= VM_OBJECT_NULL
) {
4469 if (object
== kernel_object
) {
4470 printf("Warning: Attempt to map kernel object"
4471 " by a non-private kernel entity\n");
4472 return KERN_INVALID_OBJECT
;
4474 if (!object
->pager_ready
) {
4475 vm_object_lock(object
);
4477 while (!object
->pager_ready
) {
4478 vm_object_wait(object
,
4479 VM_OBJECT_EVENT_PAGER_READY
,
4481 vm_object_lock(object
);
4483 vm_object_unlock(object
);
4487 return KERN_INVALID_OBJECT
;
4490 if (object
!= VM_OBJECT_NULL
&&
4492 object
->pager
!= MEMORY_OBJECT_NULL
&&
4493 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
4494 memory_object_t pager
;
4495 vm_prot_t pager_prot
;
4499 * For "named" VM objects, let the pager know that the
4500 * memory object is being mapped. Some pagers need to keep
4501 * track of this, to know when they can reclaim the memory
4502 * object, for example.
4503 * VM calls memory_object_map() for each mapping (specifying
4504 * the protection of each mapping) and calls
4505 * memory_object_last_unmap() when all the mappings are gone.
4507 pager_prot
= max_protection
;
4510 * Copy-On-Write mapping: won't modify the
4513 pager_prot
&= ~VM_PROT_WRITE
;
4515 vm_object_lock(object
);
4516 pager
= object
->pager
;
4517 if (object
->named
&&
4518 pager
!= MEMORY_OBJECT_NULL
&&
4519 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
4520 assert(object
->pager_ready
);
4521 vm_object_mapping_wait(object
, THREAD_UNINT
);
4522 vm_object_mapping_begin(object
);
4523 vm_object_unlock(object
);
4525 kr
= memory_object_map(pager
, pager_prot
);
4526 assert(kr
== KERN_SUCCESS
);
4528 vm_object_lock(object
);
4529 vm_object_mapping_end(object
);
4531 vm_object_unlock(object
);
4535 * Perform the copy if requested
4539 vm_object_t new_object
;
4540 vm_object_offset_t new_offset
;
4542 result
= vm_object_copy_strategically(object
, offset
,
4544 &new_object
, &new_offset
,
4548 if (result
== KERN_MEMORY_RESTART_COPY
) {
4550 boolean_t src_needs_copy
;
4554 * We currently ignore src_needs_copy.
4555 * This really is the issue of how to make
4556 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4557 * non-kernel users to use. Solution forthcoming.
4558 * In the meantime, since we don't allow non-kernel
4559 * memory managers to specify symmetric copy,
4560 * we won't run into problems here.
4562 new_object
= object
;
4563 new_offset
= offset
;
4564 success
= vm_object_copy_quickly(&new_object
,
4570 result
= KERN_SUCCESS
;
4573 * Throw away the reference to the
4574 * original object, as it won't be mapped.
4577 vm_object_deallocate(object
);
4579 if (result
!= KERN_SUCCESS
) {
4583 object
= new_object
;
4584 offset
= new_offset
;
4588 * If non-kernel users want to try to prefault pages, the mapping and prefault
4589 * needs to be atomic.
4591 kernel_prefault
= (try_prefault
&& vm_kernel_map_is_kernel(target_map
));
4592 vmk_flags
.vmkf_keep_map_locked
= (try_prefault
&& !kernel_prefault
);
4596 /* map this object in a "4K" pager */
4597 result
= vm_map_enter_fourk(target_map
,
4600 (vm_map_offset_t
) mask
,
4611 #endif /* __arm64__ */
4613 result
= vm_map_enter(target_map
,
4614 &map_addr
, map_size
,
4615 (vm_map_offset_t
)mask
,
4621 cur_protection
, max_protection
,
4624 if (result
!= KERN_SUCCESS
) {
4625 vm_object_deallocate(object
);
4629 * Try to prefault, and do not forget to release the vm map lock.
4631 if (result
== KERN_SUCCESS
&& try_prefault
) {
4632 mach_vm_address_t va
= map_addr
;
4633 kern_return_t kr
= KERN_SUCCESS
;
4637 pmap_options
= kernel_prefault
? 0 : PMAP_OPTIONS_NOWAIT
;
4638 if (object
->internal
) {
4639 pmap_options
|= PMAP_OPTIONS_INTERNAL
;
4642 for (i
= 0; i
< page_list_count
; ++i
) {
4643 if (!UPL_VALID_PAGE(page_list
, i
)) {
4644 if (kernel_prefault
) {
4645 assertf(FALSE
, "kernel_prefault && !UPL_VALID_PAGE");
4646 result
= KERN_MEMORY_ERROR
;
4651 * If this function call failed, we should stop
4652 * trying to optimize, other calls are likely
4653 * going to fail too.
4655 * We are not gonna report an error for such
4656 * failure though. That's an optimization, not
4657 * something critical.
4659 kr
= pmap_enter_options(target_map
->pmap
,
4660 va
, UPL_PHYS_PAGE(page_list
, i
),
4661 cur_protection
, VM_PROT_NONE
,
4662 0, TRUE
, pmap_options
, NULL
);
4663 if (kr
!= KERN_SUCCESS
) {
4664 OSIncrementAtomic64(&vm_prefault_nb_bailout
);
4665 if (kernel_prefault
) {
4670 OSIncrementAtomic64(&vm_prefault_nb_pages
);
4673 /* Next virtual address */
4676 if (vmk_flags
.vmkf_keep_map_locked
) {
4677 vm_map_unlock(target_map
);
4681 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4682 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4683 *address
= map_addr
+ offset_in_mapping
;
4685 *address
= map_addr
;
4691 vm_map_enter_mem_object(
4692 vm_map_t target_map
,
4693 vm_map_offset_t
*address
,
4694 vm_map_size_t initial_size
,
4695 vm_map_offset_t mask
,
4697 vm_map_kernel_flags_t vmk_flags
,
4700 vm_object_offset_t offset
,
4702 vm_prot_t cur_protection
,
4703 vm_prot_t max_protection
,
4704 vm_inherit_t inheritance
)
4708 ret
= vm_map_enter_mem_object_helper(target_map
,
4725 if (ret
== KERN_SUCCESS
&& address
&& target_map
->pmap
== kernel_pmap
) {
4726 kasan_notify_address(*address
, initial_size
);
4734 vm_map_enter_mem_object_prefault(
4735 vm_map_t target_map
,
4736 vm_map_offset_t
*address
,
4737 vm_map_size_t initial_size
,
4738 vm_map_offset_t mask
,
4740 vm_map_kernel_flags_t vmk_flags
,
4743 vm_object_offset_t offset
,
4744 vm_prot_t cur_protection
,
4745 vm_prot_t max_protection
,
4746 upl_page_list_ptr_t page_list
,
4747 unsigned int page_list_count
)
4751 ret
= vm_map_enter_mem_object_helper(target_map
,
4768 if (ret
== KERN_SUCCESS
&& address
&& target_map
->pmap
== kernel_pmap
) {
4769 kasan_notify_address(*address
, initial_size
);
4778 vm_map_enter_mem_object_control(
4779 vm_map_t target_map
,
4780 vm_map_offset_t
*address
,
4781 vm_map_size_t initial_size
,
4782 vm_map_offset_t mask
,
4784 vm_map_kernel_flags_t vmk_flags
,
4786 memory_object_control_t control
,
4787 vm_object_offset_t offset
,
4789 vm_prot_t cur_protection
,
4790 vm_prot_t max_protection
,
4791 vm_inherit_t inheritance
)
4793 vm_map_address_t map_addr
;
4794 vm_map_size_t map_size
;
4796 vm_object_size_t size
;
4797 kern_return_t result
;
4798 memory_object_t pager
;
4799 vm_prot_t pager_prot
;
4802 boolean_t fourk
= vmk_flags
.vmkf_fourk
;
4803 #endif /* __arm64__ */
4806 * Check arguments for validity
4808 if ((target_map
== VM_MAP_NULL
) ||
4809 (cur_protection
& ~VM_PROT_ALL
) ||
4810 (max_protection
& ~VM_PROT_ALL
) ||
4811 (inheritance
> VM_INHERIT_LAST_VALID
) ||
4812 initial_size
== 0) {
4813 return KERN_INVALID_ARGUMENT
;
4818 map_addr
= vm_map_trunc_page(*address
,
4820 map_size
= vm_map_round_page(initial_size
,
4823 #endif /* __arm64__ */
4825 map_addr
= vm_map_trunc_page(*address
,
4826 VM_MAP_PAGE_MASK(target_map
));
4827 map_size
= vm_map_round_page(initial_size
,
4828 VM_MAP_PAGE_MASK(target_map
));
4830 size
= vm_object_round_page(initial_size
);
4832 object
= memory_object_control_to_vm_object(control
);
4834 if (object
== VM_OBJECT_NULL
) {
4835 return KERN_INVALID_OBJECT
;
4838 if (object
== kernel_object
) {
4839 printf("Warning: Attempt to map kernel object"
4840 " by a non-private kernel entity\n");
4841 return KERN_INVALID_OBJECT
;
4844 vm_object_lock(object
);
4845 object
->ref_count
++;
4846 vm_object_res_reference(object
);
4849 * For "named" VM objects, let the pager know that the
4850 * memory object is being mapped. Some pagers need to keep
4851 * track of this, to know when they can reclaim the memory
4852 * object, for example.
4853 * VM calls memory_object_map() for each mapping (specifying
4854 * the protection of each mapping) and calls
4855 * memory_object_last_unmap() when all the mappings are gone.
4857 pager_prot
= max_protection
;
4859 pager_prot
&= ~VM_PROT_WRITE
;
4861 pager
= object
->pager
;
4862 if (object
->named
&&
4863 pager
!= MEMORY_OBJECT_NULL
&&
4864 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
4865 assert(object
->pager_ready
);
4866 vm_object_mapping_wait(object
, THREAD_UNINT
);
4867 vm_object_mapping_begin(object
);
4868 vm_object_unlock(object
);
4870 kr
= memory_object_map(pager
, pager_prot
);
4871 assert(kr
== KERN_SUCCESS
);
4873 vm_object_lock(object
);
4874 vm_object_mapping_end(object
);
4876 vm_object_unlock(object
);
4879 * Perform the copy if requested
4883 vm_object_t new_object
;
4884 vm_object_offset_t new_offset
;
4886 result
= vm_object_copy_strategically(object
, offset
, size
,
4887 &new_object
, &new_offset
,
4891 if (result
== KERN_MEMORY_RESTART_COPY
) {
4893 boolean_t src_needs_copy
;
4897 * We currently ignore src_needs_copy.
4898 * This really is the issue of how to make
4899 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4900 * non-kernel users to use. Solution forthcoming.
4901 * In the meantime, since we don't allow non-kernel
4902 * memory managers to specify symmetric copy,
4903 * we won't run into problems here.
4905 new_object
= object
;
4906 new_offset
= offset
;
4907 success
= vm_object_copy_quickly(&new_object
,
4912 result
= KERN_SUCCESS
;
4915 * Throw away the reference to the
4916 * original object, as it won't be mapped.
4919 vm_object_deallocate(object
);
4921 if (result
!= KERN_SUCCESS
) {
4925 object
= new_object
;
4926 offset
= new_offset
;
4931 result
= vm_map_enter_fourk(target_map
,
4934 (vm_map_offset_t
)mask
,
4940 cur_protection
, max_protection
,
4943 #endif /* __arm64__ */
4945 result
= vm_map_enter(target_map
,
4946 &map_addr
, map_size
,
4947 (vm_map_offset_t
)mask
,
4953 cur_protection
, max_protection
,
4956 if (result
!= KERN_SUCCESS
) {
4957 vm_object_deallocate(object
);
4959 *address
= map_addr
;
4968 extern pmap_paddr_t avail_start
, avail_end
;
4972 * Allocate memory in the specified map, with the caveat that
4973 * the memory is physically contiguous. This call may fail
4974 * if the system can't find sufficient contiguous memory.
4975 * This call may cause or lead to heart-stopping amounts of
4978 * Memory obtained from this call should be freed in the
4979 * normal way, viz., via vm_deallocate.
4984 vm_map_offset_t
*addr
,
4988 vm_object_t cpm_obj
;
4992 vm_map_offset_t va
, start
, end
, offset
;
4994 vm_map_offset_t prev_addr
= 0;
4995 #endif /* MACH_ASSERT */
4997 boolean_t anywhere
= ((VM_FLAGS_ANYWHERE
& flags
) != 0);
5000 VM_GET_FLAGS_ALIAS(flags
, tag
);
5004 return KERN_SUCCESS
;
5007 *addr
= vm_map_min(map
);
5009 *addr
= vm_map_trunc_page(*addr
,
5010 VM_MAP_PAGE_MASK(map
));
5012 size
= vm_map_round_page(size
,
5013 VM_MAP_PAGE_MASK(map
));
5016 * LP64todo - cpm_allocate should probably allow
5017 * allocations of >4GB, but not with the current
5018 * algorithm, so just cast down the size for now.
5020 if (size
> VM_MAX_ADDRESS
) {
5021 return KERN_RESOURCE_SHORTAGE
;
5023 if ((kr
= cpm_allocate(CAST_DOWN(vm_size_t
, size
),
5024 &pages
, 0, 0, TRUE
, flags
)) != KERN_SUCCESS
) {
5028 cpm_obj
= vm_object_allocate((vm_object_size_t
)size
);
5029 assert(cpm_obj
!= VM_OBJECT_NULL
);
5030 assert(cpm_obj
->internal
);
5031 assert(cpm_obj
->vo_size
== (vm_object_size_t
)size
);
5032 assert(cpm_obj
->can_persist
== FALSE
);
5033 assert(cpm_obj
->pager_created
== FALSE
);
5034 assert(cpm_obj
->pageout
== FALSE
);
5035 assert(cpm_obj
->shadow
== VM_OBJECT_NULL
);
5038 * Insert pages into object.
5041 vm_object_lock(cpm_obj
);
5042 for (offset
= 0; offset
< size
; offset
+= PAGE_SIZE
) {
5044 pages
= NEXT_PAGE(m
);
5045 *(NEXT_PAGE_PTR(m
)) = VM_PAGE_NULL
;
5047 assert(!m
->vmp_gobbled
);
5048 assert(!m
->vmp_wanted
);
5049 assert(!m
->vmp_pageout
);
5050 assert(!m
->vmp_tabled
);
5051 assert(VM_PAGE_WIRED(m
));
5052 assert(m
->vmp_busy
);
5053 assert(VM_PAGE_GET_PHYS_PAGE(m
) >= (avail_start
>> PAGE_SHIFT
) && VM_PAGE_GET_PHYS_PAGE(m
) <= (avail_end
>> PAGE_SHIFT
));
5055 m
->vmp_busy
= FALSE
;
5056 vm_page_insert(m
, cpm_obj
, offset
);
5058 assert(cpm_obj
->resident_page_count
== size
/ PAGE_SIZE
);
5059 vm_object_unlock(cpm_obj
);
5062 * Hang onto a reference on the object in case a
5063 * multi-threaded application for some reason decides
5064 * to deallocate the portion of the address space into
5065 * which we will insert this object.
5067 * Unfortunately, we must insert the object now before
5068 * we can talk to the pmap module about which addresses
5069 * must be wired down. Hence, the race with a multi-
5072 vm_object_reference(cpm_obj
);
5075 * Insert object into map.
5084 VM_MAP_KERNEL_FLAGS_NONE
,
5086 (vm_object_offset_t
)0,
5090 VM_INHERIT_DEFAULT
);
5092 if (kr
!= KERN_SUCCESS
) {
5094 * A CPM object doesn't have can_persist set,
5095 * so all we have to do is deallocate it to
5096 * free up these pages.
5098 assert(cpm_obj
->pager_created
== FALSE
);
5099 assert(cpm_obj
->can_persist
== FALSE
);
5100 assert(cpm_obj
->pageout
== FALSE
);
5101 assert(cpm_obj
->shadow
== VM_OBJECT_NULL
);
5102 vm_object_deallocate(cpm_obj
); /* kill acquired ref */
5103 vm_object_deallocate(cpm_obj
); /* kill creation ref */
5107 * Inform the physical mapping system that the
5108 * range of addresses may not fault, so that
5109 * page tables and such can be locked down as well.
5113 pmap
= vm_map_pmap(map
);
5114 pmap_pageable(pmap
, start
, end
, FALSE
);
5117 * Enter each page into the pmap, to avoid faults.
5118 * Note that this loop could be coded more efficiently,
5119 * if the need arose, rather than looking up each page
5122 for (offset
= 0, va
= start
; offset
< size
;
5123 va
+= PAGE_SIZE
, offset
+= PAGE_SIZE
) {
5126 vm_object_lock(cpm_obj
);
5127 m
= vm_page_lookup(cpm_obj
, (vm_object_offset_t
)offset
);
5128 assert(m
!= VM_PAGE_NULL
);
5130 vm_page_zero_fill(m
);
5132 type_of_fault
= DBG_ZERO_FILL_FAULT
;
5134 vm_fault_enter(m
, pmap
, va
, VM_PROT_ALL
, VM_PROT_WRITE
,
5136 FALSE
, /* change_wiring */
5137 VM_KERN_MEMORY_NONE
, /* tag - not wiring */
5138 FALSE
, /* no_cache */
5139 FALSE
, /* cs_bypass */
5141 0, /* pmap_options */
5142 NULL
, /* need_retry */
5145 vm_object_unlock(cpm_obj
);
5150 * Verify ordering in address space.
5152 for (offset
= 0; offset
< size
; offset
+= PAGE_SIZE
) {
5153 vm_object_lock(cpm_obj
);
5154 m
= vm_page_lookup(cpm_obj
, (vm_object_offset_t
)offset
);
5155 vm_object_unlock(cpm_obj
);
5156 if (m
== VM_PAGE_NULL
) {
5157 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
5158 cpm_obj
, (uint64_t)offset
);
5160 assert(m
->vmp_tabled
);
5161 assert(!m
->vmp_busy
);
5162 assert(!m
->vmp_wanted
);
5163 assert(!m
->vmp_fictitious
);
5164 assert(!m
->vmp_private
);
5165 assert(!m
->vmp_absent
);
5166 assert(!m
->vmp_error
);
5167 assert(!m
->vmp_cleaning
);
5168 assert(!m
->vmp_laundry
);
5169 assert(!m
->vmp_precious
);
5170 assert(!m
->vmp_clustered
);
5172 if (VM_PAGE_GET_PHYS_PAGE(m
) != prev_addr
+ 1) {
5173 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5174 (uint64_t)start
, (uint64_t)end
, (uint64_t)va
);
5175 printf("obj %p off 0x%llx\n", cpm_obj
, (uint64_t)offset
);
5176 printf("m %p prev_address 0x%llx\n", m
, (uint64_t)prev_addr
);
5177 panic("vm_allocate_cpm: pages not contig!");
5180 prev_addr
= VM_PAGE_GET_PHYS_PAGE(m
);
5182 #endif /* MACH_ASSERT */
5184 vm_object_deallocate(cpm_obj
); /* kill extra ref */
5193 * Interface is defined in all cases, but unless the kernel
5194 * is built explicitly for this option, the interface does
5200 __unused vm_map_t map
,
5201 __unused vm_map_offset_t
*addr
,
5202 __unused vm_map_size_t size
,
5205 return KERN_FAILURE
;
5209 /* Not used without nested pmaps */
5210 #ifndef NO_NESTED_PMAP
5212 * Clip and unnest a portion of a nested submap mapping.
5219 vm_map_entry_t entry
,
5220 vm_map_offset_t start_unnest
,
5221 vm_map_offset_t end_unnest
)
5223 vm_map_offset_t old_start_unnest
= start_unnest
;
5224 vm_map_offset_t old_end_unnest
= end_unnest
;
5226 assert(entry
->is_sub_map
);
5227 assert(VME_SUBMAP(entry
) != NULL
);
5228 assert(entry
->use_pmap
);
5231 * Query the platform for the optimal unnest range.
5232 * DRK: There's some duplication of effort here, since
5233 * callers may have adjusted the range to some extent. This
5234 * routine was introduced to support 1GiB subtree nesting
5235 * for x86 platforms, which can also nest on 2MiB boundaries
5236 * depending on size/alignment.
5238 if (pmap_adjust_unnest_parameters(map
->pmap
, &start_unnest
, &end_unnest
)) {
5239 assert(VME_SUBMAP(entry
)->is_nested_map
);
5240 assert(!VME_SUBMAP(entry
)->disable_vmentry_reuse
);
5241 log_unnest_badness(map
,
5244 VME_SUBMAP(entry
)->is_nested_map
,
5246 VME_SUBMAP(entry
)->lowest_unnestable_start
-
5247 VME_OFFSET(entry
)));
5250 if (entry
->vme_start
> start_unnest
||
5251 entry
->vme_end
< end_unnest
) {
5252 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5253 "bad nested entry: start=0x%llx end=0x%llx\n",
5254 (long long)start_unnest
, (long long)end_unnest
,
5255 (long long)entry
->vme_start
, (long long)entry
->vme_end
);
5258 if (start_unnest
> entry
->vme_start
) {
5259 _vm_map_clip_start(&map
->hdr
,
5262 if (map
->holelistenabled
) {
5263 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5265 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5268 if (entry
->vme_end
> end_unnest
) {
5269 _vm_map_clip_end(&map
->hdr
,
5272 if (map
->holelistenabled
) {
5273 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5275 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5279 pmap_unnest(map
->pmap
,
5281 entry
->vme_end
- entry
->vme_start
);
5282 if ((map
->mapped_in_other_pmaps
) && os_ref_get_count(&map
->map_refcnt
) != 0) {
5283 /* clean up parent map/maps */
5284 vm_map_submap_pmap_clean(
5285 map
, entry
->vme_start
,
5290 entry
->use_pmap
= FALSE
;
5291 if ((map
->pmap
!= kernel_pmap
) &&
5292 (VME_ALIAS(entry
) == VM_MEMORY_SHARED_PMAP
)) {
5293 VME_ALIAS_SET(entry
, VM_MEMORY_UNSHARED_PMAP
);
5296 #endif /* NO_NESTED_PMAP */
5299 * vm_map_clip_start: [ internal use only ]
5301 * Asserts that the given entry begins at or after
5302 * the specified address; if necessary,
5303 * it splits the entry into two.
5308 vm_map_entry_t entry
,
5309 vm_map_offset_t startaddr
)
5311 #ifndef NO_NESTED_PMAP
5312 if (entry
->is_sub_map
&&
5314 startaddr
>= entry
->vme_start
) {
5315 vm_map_offset_t start_unnest
, end_unnest
;
5318 * Make sure "startaddr" is no longer in a nested range
5319 * before we clip. Unnest only the minimum range the platform
5321 * vm_map_clip_unnest may perform additional adjustments to
5324 start_unnest
= startaddr
& ~(pmap_nesting_size_min
- 1);
5325 end_unnest
= start_unnest
+ pmap_nesting_size_min
;
5326 vm_map_clip_unnest(map
, entry
, start_unnest
, end_unnest
);
5328 #endif /* NO_NESTED_PMAP */
5329 if (startaddr
> entry
->vme_start
) {
5330 if (VME_OBJECT(entry
) &&
5331 !entry
->is_sub_map
&&
5332 VME_OBJECT(entry
)->phys_contiguous
) {
5333 pmap_remove(map
->pmap
,
5334 (addr64_t
)(entry
->vme_start
),
5335 (addr64_t
)(entry
->vme_end
));
5337 if (entry
->vme_atomic
) {
5338 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map
, entry
);
5344 vm_map_offset_t
, entry
->vme_start
,
5345 vm_map_offset_t
, entry
->vme_end
,
5346 vm_map_offset_t
, startaddr
,
5347 int, VME_ALIAS(entry
));
5349 _vm_map_clip_start(&map
->hdr
, entry
, startaddr
);
5350 if (map
->holelistenabled
) {
5351 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5353 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5359 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5361 if ((startaddr) > (entry)->vme_start) \
5362 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5366 * This routine is called only when it is known that
5367 * the entry must be split.
5371 struct vm_map_header
*map_header
,
5372 vm_map_entry_t entry
,
5373 vm_map_offset_t start
)
5375 vm_map_entry_t new_entry
;
5378 * Split off the front portion --
5379 * note that we must insert the new
5380 * entry BEFORE this one, so that
5381 * this entry has the specified starting
5385 if (entry
->map_aligned
) {
5386 assert(VM_MAP_PAGE_ALIGNED(start
,
5387 VM_MAP_HDR_PAGE_MASK(map_header
)));
5390 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
5391 vm_map_entry_copy_full(new_entry
, entry
);
5393 new_entry
->vme_end
= start
;
5394 assert(new_entry
->vme_start
< new_entry
->vme_end
);
5395 VME_OFFSET_SET(entry
, VME_OFFSET(entry
) + (start
- entry
->vme_start
));
5396 assert(start
< entry
->vme_end
);
5397 entry
->vme_start
= start
;
5399 _vm_map_store_entry_link(map_header
, entry
->vme_prev
, new_entry
);
5401 if (entry
->is_sub_map
) {
5402 vm_map_reference(VME_SUBMAP(new_entry
));
5404 vm_object_reference(VME_OBJECT(new_entry
));
5410 * vm_map_clip_end: [ internal use only ]
5412 * Asserts that the given entry ends at or before
5413 * the specified address; if necessary,
5414 * it splits the entry into two.
5419 vm_map_entry_t entry
,
5420 vm_map_offset_t endaddr
)
5422 if (endaddr
> entry
->vme_end
) {
5424 * Within the scope of this clipping, limit "endaddr" to
5425 * the end of this map entry...
5427 endaddr
= entry
->vme_end
;
5429 #ifndef NO_NESTED_PMAP
5430 if (entry
->is_sub_map
&& entry
->use_pmap
) {
5431 vm_map_offset_t start_unnest
, end_unnest
;
5434 * Make sure the range between the start of this entry and
5435 * the new "endaddr" is no longer nested before we clip.
5436 * Unnest only the minimum range the platform can handle.
5437 * vm_map_clip_unnest may perform additional adjustments to
5440 start_unnest
= entry
->vme_start
;
5442 (endaddr
+ pmap_nesting_size_min
- 1) &
5443 ~(pmap_nesting_size_min
- 1);
5444 vm_map_clip_unnest(map
, entry
, start_unnest
, end_unnest
);
5446 #endif /* NO_NESTED_PMAP */
5447 if (endaddr
< entry
->vme_end
) {
5448 if (VME_OBJECT(entry
) &&
5449 !entry
->is_sub_map
&&
5450 VME_OBJECT(entry
)->phys_contiguous
) {
5451 pmap_remove(map
->pmap
,
5452 (addr64_t
)(entry
->vme_start
),
5453 (addr64_t
)(entry
->vme_end
));
5455 if (entry
->vme_atomic
) {
5456 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map
, entry
);
5461 vm_map_offset_t
, entry
->vme_start
,
5462 vm_map_offset_t
, entry
->vme_end
,
5463 vm_map_offset_t
, endaddr
,
5464 int, VME_ALIAS(entry
));
5466 _vm_map_clip_end(&map
->hdr
, entry
, endaddr
);
5467 if (map
->holelistenabled
) {
5468 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5470 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5476 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5478 if ((endaddr) < (entry)->vme_end) \
5479 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5483 * This routine is called only when it is known that
5484 * the entry must be split.
5488 struct vm_map_header
*map_header
,
5489 vm_map_entry_t entry
,
5490 vm_map_offset_t end
)
5492 vm_map_entry_t new_entry
;
5495 * Create a new entry and insert it
5496 * AFTER the specified entry
5499 if (entry
->map_aligned
) {
5500 assert(VM_MAP_PAGE_ALIGNED(end
,
5501 VM_MAP_HDR_PAGE_MASK(map_header
)));
5504 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
5505 vm_map_entry_copy_full(new_entry
, entry
);
5507 assert(entry
->vme_start
< end
);
5508 new_entry
->vme_start
= entry
->vme_end
= end
;
5509 VME_OFFSET_SET(new_entry
,
5510 VME_OFFSET(new_entry
) + (end
- entry
->vme_start
));
5511 assert(new_entry
->vme_start
< new_entry
->vme_end
);
5513 _vm_map_store_entry_link(map_header
, entry
, new_entry
);
5515 if (entry
->is_sub_map
) {
5516 vm_map_reference(VME_SUBMAP(new_entry
));
5518 vm_object_reference(VME_OBJECT(new_entry
));
5524 * VM_MAP_RANGE_CHECK: [ internal use only ]
5526 * Asserts that the starting and ending region
5527 * addresses fall within the valid range of the map.
5529 #define VM_MAP_RANGE_CHECK(map, start, end) \
5531 if (start < vm_map_min(map)) \
5532 start = vm_map_min(map); \
5533 if (end > vm_map_max(map)) \
5534 end = vm_map_max(map); \
5540 * vm_map_range_check: [ internal use only ]
5542 * Check that the region defined by the specified start and
5543 * end addresses are wholly contained within a single map
5544 * entry or set of adjacent map entries of the spacified map,
5545 * i.e. the specified region contains no unmapped space.
5546 * If any or all of the region is unmapped, FALSE is returned.
5547 * Otherwise, TRUE is returned and if the output argument 'entry'
5548 * is not NULL it points to the map entry containing the start
5551 * The map is locked for reading on entry and is left locked.
5556 vm_map_offset_t start
,
5557 vm_map_offset_t end
,
5558 vm_map_entry_t
*entry
)
5561 vm_map_offset_t prev
;
5564 * Basic sanity checks first
5566 if (start
< vm_map_min(map
) || end
> vm_map_max(map
) || start
> end
) {
5571 * Check first if the region starts within a valid
5572 * mapping for the map.
5574 if (!vm_map_lookup_entry(map
, start
, &cur
)) {
5579 * Optimize for the case that the region is contained
5580 * in a single map entry.
5582 if (entry
!= (vm_map_entry_t
*) NULL
) {
5585 if (end
<= cur
->vme_end
) {
5590 * If the region is not wholly contained within a
5591 * single entry, walk the entries looking for holes.
5593 prev
= cur
->vme_end
;
5594 cur
= cur
->vme_next
;
5595 while ((cur
!= vm_map_to_entry(map
)) && (prev
== cur
->vme_start
)) {
5596 if (end
<= cur
->vme_end
) {
5599 prev
= cur
->vme_end
;
5600 cur
= cur
->vme_next
;
5606 * vm_map_submap: [ kernel use only ]
5608 * Mark the given range as handled by a subordinate map.
5610 * This range must have been created with vm_map_find using
5611 * the vm_submap_object, and no other operations may have been
5612 * performed on this range prior to calling vm_map_submap.
5614 * Only a limited number of operations can be performed
5615 * within this rage after calling vm_map_submap:
5617 * [Don't try vm_map_copyin!]
5619 * To remove a submapping, one must first remove the
5620 * range from the superior map, and then destroy the
5621 * submap (if desired). [Better yet, don't try it.]
5626 vm_map_offset_t start
,
5627 vm_map_offset_t end
,
5629 vm_map_offset_t offset
,
5630 #ifdef NO_NESTED_PMAP
5632 #endif /* NO_NESTED_PMAP */
5635 vm_map_entry_t entry
;
5636 kern_return_t result
= KERN_INVALID_ARGUMENT
;
5641 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
5642 entry
= entry
->vme_next
;
5645 if (entry
== vm_map_to_entry(map
) ||
5646 entry
->is_sub_map
) {
5648 return KERN_INVALID_ARGUMENT
;
5651 vm_map_clip_start(map
, entry
, start
);
5652 vm_map_clip_end(map
, entry
, end
);
5654 if ((entry
->vme_start
== start
) && (entry
->vme_end
== end
) &&
5655 (!entry
->is_sub_map
) &&
5656 ((object
= VME_OBJECT(entry
)) == vm_submap_object
) &&
5657 (object
->resident_page_count
== 0) &&
5658 (object
->copy
== VM_OBJECT_NULL
) &&
5659 (object
->shadow
== VM_OBJECT_NULL
) &&
5660 (!object
->pager_created
)) {
5661 VME_OFFSET_SET(entry
, (vm_object_offset_t
)offset
);
5662 VME_OBJECT_SET(entry
, VM_OBJECT_NULL
);
5663 vm_object_deallocate(object
);
5664 entry
->is_sub_map
= TRUE
;
5665 entry
->use_pmap
= FALSE
;
5666 VME_SUBMAP_SET(entry
, submap
);
5667 vm_map_reference(submap
);
5668 if (submap
->mapped_in_other_pmaps
== FALSE
&&
5669 vm_map_pmap(submap
) != PMAP_NULL
&&
5670 vm_map_pmap(submap
) != vm_map_pmap(map
)) {
5672 * This submap is being mapped in a map
5673 * that uses a different pmap.
5674 * Set its "mapped_in_other_pmaps" flag
5675 * to indicate that we now need to
5676 * remove mappings from all pmaps rather
5677 * than just the submap's pmap.
5679 submap
->mapped_in_other_pmaps
= TRUE
;
5682 #ifndef NO_NESTED_PMAP
5684 /* nest if platform code will allow */
5685 if (submap
->pmap
== NULL
) {
5686 ledger_t ledger
= map
->pmap
->ledger
;
5687 submap
->pmap
= pmap_create_options(ledger
,
5688 (vm_map_size_t
) 0, 0);
5689 if (submap
->pmap
== PMAP_NULL
) {
5691 return KERN_NO_SPACE
;
5693 #if defined(__arm__) || defined(__arm64__)
5694 pmap_set_nested(submap
->pmap
);
5697 result
= pmap_nest(map
->pmap
,
5698 (VME_SUBMAP(entry
))->pmap
,
5701 (uint64_t)(end
- start
));
5703 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result
);
5705 entry
->use_pmap
= TRUE
;
5707 #else /* NO_NESTED_PMAP */
5708 pmap_remove(map
->pmap
, (addr64_t
)start
, (addr64_t
)end
);
5709 #endif /* NO_NESTED_PMAP */
5710 result
= KERN_SUCCESS
;
5720 * Sets the protection of the specified address
5721 * region in the target map. If "set_max" is
5722 * specified, the maximum protection is to be set;
5723 * otherwise, only the current protection is affected.
5728 vm_map_offset_t start
,
5729 vm_map_offset_t end
,
5733 vm_map_entry_t current
;
5734 vm_map_offset_t prev
;
5735 vm_map_entry_t entry
;
5737 int pmap_options
= 0;
5740 if (new_prot
& VM_PROT_COPY
) {
5741 vm_map_offset_t new_start
;
5742 vm_prot_t cur_prot
, max_prot
;
5743 vm_map_kernel_flags_t kflags
;
5745 /* LP64todo - see below */
5746 if (start
>= map
->max_offset
) {
5747 return KERN_INVALID_ADDRESS
;
5750 #if VM_PROTECT_WX_FAIL
5751 if ((new_prot
& VM_PROT_EXECUTE
) &&
5752 map
!= kernel_map
&&
5753 cs_process_enforcement(NULL
)) {
5755 uint64_t, (uint64_t) start
,
5756 uint64_t, (uint64_t) end
,
5757 vm_prot_t
, new_prot
);
5758 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5760 (current_task()->bsd_info
5761 ? proc_name_address(current_task()->bsd_info
)
5764 return KERN_PROTECTION_FAILURE
;
5766 #endif /* VM_PROTECT_WX_FAIL */
5769 * Let vm_map_remap_extract() know that it will need to:
5770 * + make a copy of the mapping
5771 * + add VM_PROT_WRITE to the max protections
5772 * + remove any protections that are no longer allowed from the
5773 * max protections (to avoid any WRITE/EXECUTE conflict, for
5775 * Note that "max_prot" is an IN/OUT parameter only for this
5776 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
5779 max_prot
= new_prot
& VM_PROT_ALL
;
5780 kflags
= VM_MAP_KERNEL_FLAGS_NONE
;
5781 kflags
.vmkf_remap_prot_copy
= TRUE
;
5782 kflags
.vmkf_overwrite_immutable
= TRUE
;
5784 kr
= vm_map_remap(map
,
5788 VM_FLAGS_FIXED
| VM_FLAGS_OVERWRITE
,
5793 TRUE
, /* copy-on-write remapping! */
5796 VM_INHERIT_DEFAULT
);
5797 if (kr
!= KERN_SUCCESS
) {
5800 new_prot
&= ~VM_PROT_COPY
;
5805 /* LP64todo - remove this check when vm_map_commpage64()
5806 * no longer has to stuff in a map_entry for the commpage
5807 * above the map's max_offset.
5809 if (start
>= map
->max_offset
) {
5811 return KERN_INVALID_ADDRESS
;
5816 * Lookup the entry. If it doesn't start in a valid
5817 * entry, return an error.
5819 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
5821 return KERN_INVALID_ADDRESS
;
5824 if (entry
->superpage_size
&& (start
& (SUPERPAGE_SIZE
- 1))) { /* extend request to whole entry */
5825 start
= SUPERPAGE_ROUND_DOWN(start
);
5830 if (entry
->superpage_size
) {
5831 end
= SUPERPAGE_ROUND_UP(end
);
5835 * Make a first pass to check for protection and address
5840 prev
= current
->vme_start
;
5841 while ((current
!= vm_map_to_entry(map
)) &&
5842 (current
->vme_start
< end
)) {
5844 * If there is a hole, return an error.
5846 if (current
->vme_start
!= prev
) {
5848 return KERN_INVALID_ADDRESS
;
5851 new_max
= current
->max_protection
;
5852 if ((new_prot
& new_max
) != new_prot
) {
5854 return KERN_PROTECTION_FAILURE
;
5857 if ((new_prot
& VM_PROT_WRITE
) &&
5858 (new_prot
& VM_PROT_EXECUTE
) &&
5859 #if !CONFIG_EMBEDDED
5860 map
!= kernel_map
&&
5861 cs_process_enforcement(NULL
) &&
5862 #endif /* !CONFIG_EMBEDDED */
5863 !(current
->used_for_jit
)) {
5865 uint64_t, (uint64_t) current
->vme_start
,
5866 uint64_t, (uint64_t) current
->vme_end
,
5867 vm_prot_t
, new_prot
);
5868 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5870 (current_task()->bsd_info
5871 ? proc_name_address(current_task()->bsd_info
)
5874 new_prot
&= ~VM_PROT_EXECUTE
;
5875 #if VM_PROTECT_WX_FAIL
5877 return KERN_PROTECTION_FAILURE
;
5878 #endif /* VM_PROTECT_WX_FAIL */
5882 * If the task has requested executable lockdown,
5884 * - adding executable protections OR
5885 * - adding write protections to an existing executable mapping.
5887 if (map
->map_disallow_new_exec
== TRUE
) {
5888 if ((new_prot
& VM_PROT_EXECUTE
) ||
5889 ((current
->protection
& VM_PROT_EXECUTE
) && (new_prot
& VM_PROT_WRITE
))) {
5891 return KERN_PROTECTION_FAILURE
;
5895 prev
= current
->vme_end
;
5896 current
= current
->vme_next
;
5901 end
== vm_map_round_page(prev
, VM_MAP_PAGE_MASK(map
))) {
5902 vm_map_entry_t prev_entry
;
5904 prev_entry
= current
->vme_prev
;
5905 if (prev_entry
!= vm_map_to_entry(map
) &&
5906 !prev_entry
->map_aligned
&&
5907 (vm_map_round_page(prev_entry
->vme_end
,
5908 VM_MAP_PAGE_MASK(map
))
5911 * The last entry in our range is not "map-aligned"
5912 * but it would have reached all the way to "end"
5913 * if it had been map-aligned, so this is not really
5914 * a hole in the range and we can proceed.
5919 #endif /* __arm64__ */
5923 return KERN_INVALID_ADDRESS
;
5927 * Go back and fix up protections.
5928 * Clip to start here if the range starts within
5933 if (current
!= vm_map_to_entry(map
)) {
5934 /* clip and unnest if necessary */
5935 vm_map_clip_start(map
, current
, start
);
5938 while ((current
!= vm_map_to_entry(map
)) &&
5939 (current
->vme_start
< end
)) {
5942 vm_map_clip_end(map
, current
, end
);
5944 if (current
->is_sub_map
) {
5945 /* clipping did unnest if needed */
5946 assert(!current
->use_pmap
);
5949 old_prot
= current
->protection
;
5952 current
->max_protection
= new_prot
;
5953 current
->protection
= new_prot
& old_prot
;
5955 current
->protection
= new_prot
;
5959 * Update physical map if necessary.
5960 * If the request is to turn off write protection,
5961 * we won't do it for real (in pmap). This is because
5962 * it would cause copy-on-write to fail. We've already
5963 * set, the new protection in the map, so if a
5964 * write-protect fault occurred, it will be fixed up
5965 * properly, COW or not.
5967 if (current
->protection
!= old_prot
) {
5968 /* Look one level in we support nested pmaps */
5969 /* from mapped submaps which are direct entries */
5974 prot
= current
->protection
;
5975 if (current
->is_sub_map
|| (VME_OBJECT(current
) == NULL
) || (VME_OBJECT(current
) != compressor_object
)) {
5976 prot
&= ~VM_PROT_WRITE
;
5978 assert(!VME_OBJECT(current
)->code_signed
);
5979 assert(VME_OBJECT(current
)->copy_strategy
== MEMORY_OBJECT_COPY_NONE
);
5982 if (override_nx(map
, VME_ALIAS(current
)) && prot
) {
5983 prot
|= VM_PROT_EXECUTE
;
5986 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5987 if (!(old_prot
& VM_PROT_EXECUTE
) &&
5988 (prot
& VM_PROT_EXECUTE
) &&
5989 panic_on_unsigned_execute
&&
5990 (proc_selfcsflags() & CS_KILL
)) {
5991 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map
, (uint64_t)current
->vme_start
, (uint64_t)current
->vme_end
, old_prot
, prot
);
5993 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5995 if (pmap_has_prot_policy(prot
)) {
5996 if (current
->wired_count
) {
5997 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
5998 map
, (uint64_t)current
->vme_start
, (uint64_t)current
->vme_end
, prot
, current
->wired_count
);
6001 /* If the pmap layer cares about this
6002 * protection type, force a fault for
6003 * each page so that vm_fault will
6004 * repopulate the page with the full
6005 * set of protections.
6008 * TODO: We don't seem to need this,
6009 * but this is due to an internal
6010 * implementation detail of
6011 * pmap_protect. Do we want to rely
6014 prot
= VM_PROT_NONE
;
6017 if (current
->is_sub_map
&& current
->use_pmap
) {
6018 pmap_protect(VME_SUBMAP(current
)->pmap
,
6023 if (prot
& VM_PROT_WRITE
) {
6024 if (VME_OBJECT(current
) == compressor_object
) {
6026 * For write requests on the
6027 * compressor, we wil ask the
6028 * pmap layer to prevent us from
6029 * taking a write fault when we
6030 * attempt to access the mapping
6033 pmap_options
|= PMAP_OPTIONS_PROTECT_IMMEDIATE
;
6037 pmap_protect_options(map
->pmap
,
6045 current
= current
->vme_next
;
6049 while ((current
!= vm_map_to_entry(map
)) &&
6050 (current
->vme_start
<= end
)) {
6051 vm_map_simplify_entry(map
, current
);
6052 current
= current
->vme_next
;
6056 return KERN_SUCCESS
;
6062 * Sets the inheritance of the specified address
6063 * range in the target map. Inheritance
6064 * affects how the map will be shared with
6065 * child maps at the time of vm_map_fork.
6070 vm_map_offset_t start
,
6071 vm_map_offset_t end
,
6072 vm_inherit_t new_inheritance
)
6074 vm_map_entry_t entry
;
6075 vm_map_entry_t temp_entry
;
6079 VM_MAP_RANGE_CHECK(map
, start
, end
);
6081 if (vm_map_lookup_entry(map
, start
, &temp_entry
)) {
6084 temp_entry
= temp_entry
->vme_next
;
6088 /* first check entire range for submaps which can't support the */
6089 /* given inheritance. */
6090 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
6091 if (entry
->is_sub_map
) {
6092 if (new_inheritance
== VM_INHERIT_COPY
) {
6094 return KERN_INVALID_ARGUMENT
;
6098 entry
= entry
->vme_next
;
6102 if (entry
!= vm_map_to_entry(map
)) {
6103 /* clip and unnest if necessary */
6104 vm_map_clip_start(map
, entry
, start
);
6107 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
6108 vm_map_clip_end(map
, entry
, end
);
6109 if (entry
->is_sub_map
) {
6110 /* clip did unnest if needed */
6111 assert(!entry
->use_pmap
);
6114 entry
->inheritance
= new_inheritance
;
6116 entry
= entry
->vme_next
;
6120 return KERN_SUCCESS
;
6124 * Update the accounting for the amount of wired memory in this map. If the user has
6125 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6128 static kern_return_t
6131 vm_map_entry_t entry
,
6132 boolean_t user_wire
)
6137 unsigned int total_wire_count
= vm_page_wire_count
+ vm_lopage_free_count
;
6140 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6144 if (entry
->user_wired_count
== 0) {
6145 size
= entry
->vme_end
- entry
->vme_start
;
6148 * Since this is the first time the user is wiring this map entry, check to see if we're
6149 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6150 * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value. There is also
6151 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6152 * limit, then we fail.
6155 if (size
+ map
->user_wire_size
> MIN(map
->user_wire_limit
, vm_per_task_user_wire_limit
) ||
6156 size
+ ptoa_64(total_wire_count
) > vm_global_user_wire_limit
) {
6157 return KERN_RESOURCE_SHORTAGE
;
6161 * The first time the user wires an entry, we also increment the wired_count and add this to
6162 * the total that has been wired in the map.
6165 if (entry
->wired_count
>= MAX_WIRE_COUNT
) {
6166 return KERN_FAILURE
;
6169 entry
->wired_count
++;
6170 map
->user_wire_size
+= size
;
6173 if (entry
->user_wired_count
>= MAX_WIRE_COUNT
) {
6174 return KERN_FAILURE
;
6177 entry
->user_wired_count
++;
6180 * The kernel's wiring the memory. Just bump the count and continue.
6183 if (entry
->wired_count
>= MAX_WIRE_COUNT
) {
6184 panic("vm_map_wire: too many wirings");
6187 entry
->wired_count
++;
6190 return KERN_SUCCESS
;
6194 * Update the memory wiring accounting now that the given map entry is being unwired.
6198 subtract_wire_counts(
6200 vm_map_entry_t entry
,
6201 boolean_t user_wire
)
6205 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6208 if (entry
->user_wired_count
== 1) {
6210 * We're removing the last user wire reference. Decrement the wired_count and the total
6211 * user wired memory for this map.
6214 assert(entry
->wired_count
>= 1);
6215 entry
->wired_count
--;
6216 map
->user_wire_size
-= entry
->vme_end
- entry
->vme_start
;
6219 assert(entry
->user_wired_count
>= 1);
6220 entry
->user_wired_count
--;
6223 * The kernel is unwiring the memory. Just update the count.
6226 assert(entry
->wired_count
>= 1);
6227 entry
->wired_count
--;
6231 int cs_executable_wire
= 0;
6236 * Sets the pageability of the specified address range in the
6237 * target map as wired. Regions specified as not pageable require
6238 * locked-down physical memory and physical page maps. The
6239 * access_type variable indicates types of accesses that must not
6240 * generate page faults. This is checked against protection of
6241 * memory being locked-down.
6243 * The map must not be locked, but a reference must remain to the
6244 * map throughout the call.
6246 static kern_return_t
6249 vm_map_offset_t start
,
6250 vm_map_offset_t end
,
6251 vm_prot_t caller_prot
,
6253 boolean_t user_wire
,
6255 vm_map_offset_t pmap_addr
,
6256 ppnum_t
*physpage_p
)
6258 vm_map_entry_t entry
;
6259 vm_prot_t access_type
;
6260 struct vm_map_entry
*first_entry
, tmp_entry
;
6262 vm_map_offset_t s
, e
;
6264 boolean_t need_wakeup
;
6265 boolean_t main_map
= FALSE
;
6266 wait_interrupt_t interruptible_state
;
6267 thread_t cur_thread
;
6268 unsigned int last_timestamp
;
6270 boolean_t wire_and_extract
;
6272 access_type
= (caller_prot
& VM_PROT_ALL
);
6274 wire_and_extract
= FALSE
;
6275 if (physpage_p
!= NULL
) {
6277 * The caller wants the physical page number of the
6278 * wired page. We return only one physical page number
6279 * so this works for only one page at a time.
6281 if ((end
- start
) != PAGE_SIZE
) {
6282 return KERN_INVALID_ARGUMENT
;
6284 wire_and_extract
= TRUE
;
6289 if (map_pmap
== NULL
) {
6292 last_timestamp
= map
->timestamp
;
6294 VM_MAP_RANGE_CHECK(map
, start
, end
);
6295 assert(page_aligned(start
));
6296 assert(page_aligned(end
));
6297 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
6298 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
6300 /* We wired what the caller asked for, zero pages */
6302 return KERN_SUCCESS
;
6305 need_wakeup
= FALSE
;
6306 cur_thread
= current_thread();
6311 if (vm_map_lookup_entry(map
, s
, &first_entry
)) {
6312 entry
= first_entry
;
6314 * vm_map_clip_start will be done later.
6315 * We don't want to unnest any nested submaps here !
6318 /* Start address is not in map */
6319 rc
= KERN_INVALID_ADDRESS
;
6323 while ((entry
!= vm_map_to_entry(map
)) && (s
< end
)) {
6325 * At this point, we have wired from "start" to "s".
6326 * We still need to wire from "s" to "end".
6328 * "entry" hasn't been clipped, so it could start before "s"
6329 * and/or end after "end".
6332 /* "e" is how far we want to wire in this entry */
6339 * If another thread is wiring/unwiring this entry then
6340 * block after informing other thread to wake us up.
6342 if (entry
->in_transition
) {
6343 wait_result_t wait_result
;
6346 * We have not clipped the entry. Make sure that
6347 * the start address is in range so that the lookup
6348 * below will succeed.
6349 * "s" is the current starting point: we've already
6350 * wired from "start" to "s" and we still have
6351 * to wire from "s" to "end".
6354 entry
->needs_wakeup
= TRUE
;
6357 * wake up anybody waiting on entries that we have
6361 vm_map_entry_wakeup(map
);
6362 need_wakeup
= FALSE
;
6365 * User wiring is interruptible
6367 wait_result
= vm_map_entry_wait(map
,
6368 (user_wire
) ? THREAD_ABORTSAFE
:
6370 if (user_wire
&& wait_result
== THREAD_INTERRUPTED
) {
6372 * undo the wirings we have done so far
6373 * We do not clear the needs_wakeup flag,
6374 * because we cannot tell if we were the
6382 * Cannot avoid a lookup here. reset timestamp.
6384 last_timestamp
= map
->timestamp
;
6387 * The entry could have been clipped, look it up again.
6388 * Worse that can happen is, it may not exist anymore.
6390 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
6392 * User: undo everything upto the previous
6393 * entry. let vm_map_unwire worry about
6394 * checking the validity of the range.
6399 entry
= first_entry
;
6403 if (entry
->is_sub_map
) {
6404 vm_map_offset_t sub_start
;
6405 vm_map_offset_t sub_end
;
6406 vm_map_offset_t local_start
;
6407 vm_map_offset_t local_end
;
6410 if (wire_and_extract
) {
6412 * Wiring would result in copy-on-write
6413 * which would not be compatible with
6414 * the sharing we have with the original
6415 * provider of this memory.
6417 rc
= KERN_INVALID_ARGUMENT
;
6421 vm_map_clip_start(map
, entry
, s
);
6422 vm_map_clip_end(map
, entry
, end
);
6424 sub_start
= VME_OFFSET(entry
);
6425 sub_end
= entry
->vme_end
;
6426 sub_end
+= VME_OFFSET(entry
) - entry
->vme_start
;
6428 local_end
= entry
->vme_end
;
6429 if (map_pmap
== NULL
) {
6431 vm_object_offset_t offset
;
6434 vm_map_entry_t local_entry
;
6435 vm_map_version_t version
;
6436 vm_map_t lookup_map
;
6438 if (entry
->use_pmap
) {
6439 pmap
= VME_SUBMAP(entry
)->pmap
;
6440 /* ppc implementation requires that */
6441 /* submaps pmap address ranges line */
6442 /* up with parent map */
6444 pmap_addr
= sub_start
;
6452 if (entry
->wired_count
) {
6453 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
6458 * The map was not unlocked:
6459 * no need to goto re-lookup.
6460 * Just go directly to next entry.
6462 entry
= entry
->vme_next
;
6463 s
= entry
->vme_start
;
6467 /* call vm_map_lookup_locked to */
6468 /* cause any needs copy to be */
6470 local_start
= entry
->vme_start
;
6472 vm_map_lock_write_to_read(map
);
6473 if (vm_map_lookup_locked(
6474 &lookup_map
, local_start
,
6475 access_type
| VM_PROT_COPY
,
6476 OBJECT_LOCK_EXCLUSIVE
,
6478 &offset
, &prot
, &wired
,
6481 vm_map_unlock_read(lookup_map
);
6482 assert(map_pmap
== NULL
);
6483 vm_map_unwire(map
, start
,
6485 return KERN_FAILURE
;
6487 vm_object_unlock(object
);
6488 if (real_map
!= lookup_map
) {
6489 vm_map_unlock(real_map
);
6491 vm_map_unlock_read(lookup_map
);
6494 /* we unlocked, so must re-lookup */
6495 if (!vm_map_lookup_entry(map
,
6503 * entry could have been "simplified",
6506 entry
= local_entry
;
6507 assert(s
== local_start
);
6508 vm_map_clip_start(map
, entry
, s
);
6509 vm_map_clip_end(map
, entry
, end
);
6510 /* re-compute "e" */
6516 /* did we have a change of type? */
6517 if (!entry
->is_sub_map
) {
6518 last_timestamp
= map
->timestamp
;
6522 local_start
= entry
->vme_start
;
6526 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
6530 entry
->in_transition
= TRUE
;
6533 rc
= vm_map_wire_nested(VME_SUBMAP(entry
),
6536 user_wire
, pmap
, pmap_addr
,
6541 * Find the entry again. It could have been clipped
6542 * after we unlocked the map.
6544 if (!vm_map_lookup_entry(map
, local_start
,
6546 panic("vm_map_wire: re-lookup failed");
6548 entry
= first_entry
;
6550 assert(local_start
== s
);
6551 /* re-compute "e" */
6557 last_timestamp
= map
->timestamp
;
6558 while ((entry
!= vm_map_to_entry(map
)) &&
6559 (entry
->vme_start
< e
)) {
6560 assert(entry
->in_transition
);
6561 entry
->in_transition
= FALSE
;
6562 if (entry
->needs_wakeup
) {
6563 entry
->needs_wakeup
= FALSE
;
6566 if (rc
!= KERN_SUCCESS
) {/* from vm_*_wire */
6567 subtract_wire_counts(map
, entry
, user_wire
);
6569 entry
= entry
->vme_next
;
6571 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
6575 /* no need to relookup again */
6576 s
= entry
->vme_start
;
6581 * If this entry is already wired then increment
6582 * the appropriate wire reference count.
6584 if (entry
->wired_count
) {
6585 if ((entry
->protection
& access_type
) != access_type
) {
6586 /* found a protection problem */
6590 * We should always return an error
6591 * in this case but since we didn't
6592 * enforce it before, let's do
6593 * it only for the new "wire_and_extract"
6594 * code path for now...
6596 if (wire_and_extract
) {
6597 rc
= KERN_PROTECTION_FAILURE
;
6603 * entry is already wired down, get our reference
6604 * after clipping to our range.
6606 vm_map_clip_start(map
, entry
, s
);
6607 vm_map_clip_end(map
, entry
, end
);
6609 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
6613 if (wire_and_extract
) {
6615 vm_object_offset_t offset
;
6619 * We don't have to "wire" the page again
6620 * bit we still have to "extract" its
6621 * physical page number, after some sanity
6624 assert((entry
->vme_end
- entry
->vme_start
)
6626 assert(!entry
->needs_copy
);
6627 assert(!entry
->is_sub_map
);
6628 assert(VME_OBJECT(entry
));
6629 if (((entry
->vme_end
- entry
->vme_start
)
6631 entry
->needs_copy
||
6632 entry
->is_sub_map
||
6633 VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
6634 rc
= KERN_INVALID_ARGUMENT
;
6638 object
= VME_OBJECT(entry
);
6639 offset
= VME_OFFSET(entry
);
6640 /* need exclusive lock to update m->dirty */
6641 if (entry
->protection
& VM_PROT_WRITE
) {
6642 vm_object_lock(object
);
6644 vm_object_lock_shared(object
);
6646 m
= vm_page_lookup(object
, offset
);
6647 assert(m
!= VM_PAGE_NULL
);
6648 assert(VM_PAGE_WIRED(m
));
6649 if (m
!= VM_PAGE_NULL
&& VM_PAGE_WIRED(m
)) {
6650 *physpage_p
= VM_PAGE_GET_PHYS_PAGE(m
);
6651 if (entry
->protection
& VM_PROT_WRITE
) {
6652 vm_object_lock_assert_exclusive(
6654 m
->vmp_dirty
= TRUE
;
6657 /* not already wired !? */
6660 vm_object_unlock(object
);
6663 /* map was not unlocked: no need to relookup */
6664 entry
= entry
->vme_next
;
6665 s
= entry
->vme_start
;
6670 * Unwired entry or wire request transmitted via submap
6674 * Wiring would copy the pages to the shadow object.
6675 * The shadow object would not be code-signed so
6676 * attempting to execute code from these copied pages
6677 * would trigger a code-signing violation.
6680 if ((entry
->protection
& VM_PROT_EXECUTE
)
6681 #if !CONFIG_EMBEDDED
6683 map
!= kernel_map
&&
6684 cs_process_enforcement(NULL
)
6685 #endif /* !CONFIG_EMBEDDED */
6688 printf("pid %d[%s] wiring executable range from "
6689 "0x%llx to 0x%llx: rejected to preserve "
6692 (current_task()->bsd_info
6693 ? proc_name_address(current_task()->bsd_info
)
6695 (uint64_t) entry
->vme_start
,
6696 (uint64_t) entry
->vme_end
);
6697 #endif /* MACH_ASSERT */
6698 DTRACE_VM2(cs_executable_wire
,
6699 uint64_t, (uint64_t)entry
->vme_start
,
6700 uint64_t, (uint64_t)entry
->vme_end
);
6701 cs_executable_wire
++;
6702 rc
= KERN_PROTECTION_FAILURE
;
6707 * Perform actions of vm_map_lookup that need the write
6708 * lock on the map: create a shadow object for a
6709 * copy-on-write region, or an object for a zero-fill
6712 size
= entry
->vme_end
- entry
->vme_start
;
6714 * If wiring a copy-on-write page, we need to copy it now
6715 * even if we're only (currently) requesting read access.
6716 * This is aggressive, but once it's wired we can't move it.
6718 if (entry
->needs_copy
) {
6719 if (wire_and_extract
) {
6721 * We're supposed to share with the original
6722 * provider so should not be "needs_copy"
6724 rc
= KERN_INVALID_ARGUMENT
;
6728 VME_OBJECT_SHADOW(entry
, size
);
6729 entry
->needs_copy
= FALSE
;
6730 } else if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
6731 if (wire_and_extract
) {
6733 * We're supposed to share with the original
6734 * provider so should already have an object.
6736 rc
= KERN_INVALID_ARGUMENT
;
6739 VME_OBJECT_SET(entry
, vm_object_allocate(size
));
6740 VME_OFFSET_SET(entry
, (vm_object_offset_t
)0);
6741 assert(entry
->use_pmap
);
6744 vm_map_clip_start(map
, entry
, s
);
6745 vm_map_clip_end(map
, entry
, end
);
6747 /* re-compute "e" */
6754 * Check for holes and protection mismatch.
6755 * Holes: Next entry should be contiguous unless this
6756 * is the end of the region.
6757 * Protection: Access requested must be allowed, unless
6758 * wiring is by protection class
6760 if ((entry
->vme_end
< end
) &&
6761 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
6762 (entry
->vme_next
->vme_start
> entry
->vme_end
))) {
6764 rc
= KERN_INVALID_ADDRESS
;
6767 if ((entry
->protection
& access_type
) != access_type
) {
6768 /* found a protection problem */
6769 rc
= KERN_PROTECTION_FAILURE
;
6773 assert(entry
->wired_count
== 0 && entry
->user_wired_count
== 0);
6775 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
6779 entry
->in_transition
= TRUE
;
6782 * This entry might get split once we unlock the map.
6783 * In vm_fault_wire(), we need the current range as
6784 * defined by this entry. In order for this to work
6785 * along with a simultaneous clip operation, we make a
6786 * temporary copy of this entry and use that for the
6787 * wiring. Note that the underlying objects do not
6788 * change during a clip.
6793 * The in_transition state guarentees that the entry
6794 * (or entries for this range, if split occured) will be
6795 * there when the map lock is acquired for the second time.
6799 if (!user_wire
&& cur_thread
!= THREAD_NULL
) {
6800 interruptible_state
= thread_interrupt_level(THREAD_UNINT
);
6802 interruptible_state
= THREAD_UNINT
;
6806 rc
= vm_fault_wire(map
,
6807 &tmp_entry
, caller_prot
, tag
, map_pmap
, pmap_addr
,
6810 rc
= vm_fault_wire(map
,
6811 &tmp_entry
, caller_prot
, tag
, map
->pmap
,
6812 tmp_entry
.vme_start
,
6816 if (!user_wire
&& cur_thread
!= THREAD_NULL
) {
6817 thread_interrupt_level(interruptible_state
);
6822 if (last_timestamp
+ 1 != map
->timestamp
) {
6824 * Find the entry again. It could have been clipped
6825 * after we unlocked the map.
6827 if (!vm_map_lookup_entry(map
, tmp_entry
.vme_start
,
6829 panic("vm_map_wire: re-lookup failed");
6832 entry
= first_entry
;
6835 last_timestamp
= map
->timestamp
;
6837 while ((entry
!= vm_map_to_entry(map
)) &&
6838 (entry
->vme_start
< tmp_entry
.vme_end
)) {
6839 assert(entry
->in_transition
);
6840 entry
->in_transition
= FALSE
;
6841 if (entry
->needs_wakeup
) {
6842 entry
->needs_wakeup
= FALSE
;
6845 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
6846 subtract_wire_counts(map
, entry
, user_wire
);
6848 entry
= entry
->vme_next
;
6851 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
6855 if ((entry
!= vm_map_to_entry(map
)) && /* we still have entries in the map */
6856 (tmp_entry
.vme_end
!= end
) && /* AND, we are not at the end of the requested range */
6857 (entry
->vme_start
!= tmp_entry
.vme_end
)) { /* AND, the next entry is not contiguous. */
6858 /* found a "new" hole */
6859 s
= tmp_entry
.vme_end
;
6860 rc
= KERN_INVALID_ADDRESS
;
6864 s
= entry
->vme_start
;
6865 } /* end while loop through map entries */
6868 if (rc
== KERN_SUCCESS
) {
6869 /* repair any damage we may have made to the VM map */
6870 vm_map_simplify_range(map
, start
, end
);
6876 * wake up anybody waiting on entries we wired.
6879 vm_map_entry_wakeup(map
);
6882 if (rc
!= KERN_SUCCESS
) {
6883 /* undo what has been wired so far */
6884 vm_map_unwire_nested(map
, start
, s
, user_wire
,
6885 map_pmap
, pmap_addr
);
6895 vm_map_wire_external(
6897 vm_map_offset_t start
,
6898 vm_map_offset_t end
,
6899 vm_prot_t caller_prot
,
6900 boolean_t user_wire
)
6904 kret
= vm_map_wire_nested(map
, start
, end
, caller_prot
, vm_tag_bt(),
6905 user_wire
, (pmap_t
)NULL
, 0, NULL
);
6912 vm_map_offset_t start
,
6913 vm_map_offset_t end
,
6914 vm_prot_t caller_prot
,
6916 boolean_t user_wire
)
6920 kret
= vm_map_wire_nested(map
, start
, end
, caller_prot
, tag
,
6921 user_wire
, (pmap_t
)NULL
, 0, NULL
);
6926 vm_map_wire_and_extract_external(
6928 vm_map_offset_t start
,
6929 vm_prot_t caller_prot
,
6930 boolean_t user_wire
,
6931 ppnum_t
*physpage_p
)
6935 kret
= vm_map_wire_nested(map
,
6937 start
+ VM_MAP_PAGE_SIZE(map
),
6944 if (kret
!= KERN_SUCCESS
&&
6945 physpage_p
!= NULL
) {
6952 vm_map_wire_and_extract_kernel(
6954 vm_map_offset_t start
,
6955 vm_prot_t caller_prot
,
6957 boolean_t user_wire
,
6958 ppnum_t
*physpage_p
)
6962 kret
= vm_map_wire_nested(map
,
6964 start
+ VM_MAP_PAGE_SIZE(map
),
6971 if (kret
!= KERN_SUCCESS
&&
6972 physpage_p
!= NULL
) {
6981 * Sets the pageability of the specified address range in the target
6982 * as pageable. Regions specified must have been wired previously.
6984 * The map must not be locked, but a reference must remain to the map
6985 * throughout the call.
6987 * Kernel will panic on failures. User unwire ignores holes and
6988 * unwired and intransition entries to avoid losing memory by leaving
6991 static kern_return_t
6992 vm_map_unwire_nested(
6994 vm_map_offset_t start
,
6995 vm_map_offset_t end
,
6996 boolean_t user_wire
,
6998 vm_map_offset_t pmap_addr
)
7000 vm_map_entry_t entry
;
7001 struct vm_map_entry
*first_entry
, tmp_entry
;
7002 boolean_t need_wakeup
;
7003 boolean_t main_map
= FALSE
;
7004 unsigned int last_timestamp
;
7007 if (map_pmap
== NULL
) {
7010 last_timestamp
= map
->timestamp
;
7012 VM_MAP_RANGE_CHECK(map
, start
, end
);
7013 assert(page_aligned(start
));
7014 assert(page_aligned(end
));
7015 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
7016 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
7019 /* We unwired what the caller asked for: zero pages */
7021 return KERN_SUCCESS
;
7024 if (vm_map_lookup_entry(map
, start
, &first_entry
)) {
7025 entry
= first_entry
;
7027 * vm_map_clip_start will be done later.
7028 * We don't want to unnest any nested sub maps here !
7032 panic("vm_map_unwire: start not found");
7034 /* Start address is not in map. */
7036 return KERN_INVALID_ADDRESS
;
7039 if (entry
->superpage_size
) {
7040 /* superpages are always wired */
7042 return KERN_INVALID_ADDRESS
;
7045 need_wakeup
= FALSE
;
7046 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
7047 if (entry
->in_transition
) {
7050 * Another thread is wiring down this entry. Note
7051 * that if it is not for the other thread we would
7052 * be unwiring an unwired entry. This is not
7053 * permitted. If we wait, we will be unwiring memory
7057 * Another thread is unwiring this entry. We did not
7058 * have a reference to it, because if we did, this
7059 * entry will not be getting unwired now.
7064 * This could happen: there could be some
7065 * overlapping vslock/vsunlock operations
7067 * We should probably just wait and retry,
7068 * but then we have to be careful that this
7069 * entry could get "simplified" after
7070 * "in_transition" gets unset and before
7071 * we re-lookup the entry, so we would
7072 * have to re-clip the entry to avoid
7073 * re-unwiring what we have already unwired...
7074 * See vm_map_wire_nested().
7076 * Or we could just ignore "in_transition"
7077 * here and proceed to decement the wired
7078 * count(s) on this entry. That should be fine
7079 * as long as "wired_count" doesn't drop all
7080 * the way to 0 (and we should panic if THAT
7083 panic("vm_map_unwire: in_transition entry");
7086 entry
= entry
->vme_next
;
7090 if (entry
->is_sub_map
) {
7091 vm_map_offset_t sub_start
;
7092 vm_map_offset_t sub_end
;
7093 vm_map_offset_t local_end
;
7096 vm_map_clip_start(map
, entry
, start
);
7097 vm_map_clip_end(map
, entry
, end
);
7099 sub_start
= VME_OFFSET(entry
);
7100 sub_end
= entry
->vme_end
- entry
->vme_start
;
7101 sub_end
+= VME_OFFSET(entry
);
7102 local_end
= entry
->vme_end
;
7103 if (map_pmap
== NULL
) {
7104 if (entry
->use_pmap
) {
7105 pmap
= VME_SUBMAP(entry
)->pmap
;
7106 pmap_addr
= sub_start
;
7111 if (entry
->wired_count
== 0 ||
7112 (user_wire
&& entry
->user_wired_count
== 0)) {
7114 panic("vm_map_unwire: entry is unwired");
7116 entry
= entry
->vme_next
;
7122 * Holes: Next entry should be contiguous unless
7123 * this is the end of the region.
7125 if (((entry
->vme_end
< end
) &&
7126 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
7127 (entry
->vme_next
->vme_start
7128 > entry
->vme_end
)))) {
7130 panic("vm_map_unwire: non-contiguous region");
7133 * entry = entry->vme_next;
7138 subtract_wire_counts(map
, entry
, user_wire
);
7140 if (entry
->wired_count
!= 0) {
7141 entry
= entry
->vme_next
;
7145 entry
->in_transition
= TRUE
;
7146 tmp_entry
= *entry
;/* see comment in vm_map_wire() */
7149 * We can unlock the map now. The in_transition state
7150 * guarantees existance of the entry.
7153 vm_map_unwire_nested(VME_SUBMAP(entry
),
7154 sub_start
, sub_end
, user_wire
, pmap
, pmap_addr
);
7157 if (last_timestamp
+ 1 != map
->timestamp
) {
7159 * Find the entry again. It could have been
7160 * clipped or deleted after we unlocked the map.
7162 if (!vm_map_lookup_entry(map
,
7163 tmp_entry
.vme_start
,
7166 panic("vm_map_unwire: re-lookup failed");
7168 entry
= first_entry
->vme_next
;
7170 entry
= first_entry
;
7173 last_timestamp
= map
->timestamp
;
7176 * clear transition bit for all constituent entries
7177 * that were in the original entry (saved in
7178 * tmp_entry). Also check for waiters.
7180 while ((entry
!= vm_map_to_entry(map
)) &&
7181 (entry
->vme_start
< tmp_entry
.vme_end
)) {
7182 assert(entry
->in_transition
);
7183 entry
->in_transition
= FALSE
;
7184 if (entry
->needs_wakeup
) {
7185 entry
->needs_wakeup
= FALSE
;
7188 entry
= entry
->vme_next
;
7193 vm_map_unwire_nested(VME_SUBMAP(entry
),
7194 sub_start
, sub_end
, user_wire
, map_pmap
,
7198 if (last_timestamp
+ 1 != map
->timestamp
) {
7200 * Find the entry again. It could have been
7201 * clipped or deleted after we unlocked the map.
7203 if (!vm_map_lookup_entry(map
,
7204 tmp_entry
.vme_start
,
7207 panic("vm_map_unwire: re-lookup failed");
7209 entry
= first_entry
->vme_next
;
7211 entry
= first_entry
;
7214 last_timestamp
= map
->timestamp
;
7219 if ((entry
->wired_count
== 0) ||
7220 (user_wire
&& entry
->user_wired_count
== 0)) {
7222 panic("vm_map_unwire: entry is unwired");
7225 entry
= entry
->vme_next
;
7229 assert(entry
->wired_count
> 0 &&
7230 (!user_wire
|| entry
->user_wired_count
> 0));
7232 vm_map_clip_start(map
, entry
, start
);
7233 vm_map_clip_end(map
, entry
, end
);
7237 * Holes: Next entry should be contiguous unless
7238 * this is the end of the region.
7240 if (((entry
->vme_end
< end
) &&
7241 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
7242 (entry
->vme_next
->vme_start
> entry
->vme_end
)))) {
7244 panic("vm_map_unwire: non-contiguous region");
7246 entry
= entry
->vme_next
;
7250 subtract_wire_counts(map
, entry
, user_wire
);
7252 if (entry
->wired_count
!= 0) {
7253 entry
= entry
->vme_next
;
7257 if (entry
->zero_wired_pages
) {
7258 entry
->zero_wired_pages
= FALSE
;
7261 entry
->in_transition
= TRUE
;
7262 tmp_entry
= *entry
; /* see comment in vm_map_wire() */
7265 * We can unlock the map now. The in_transition state
7266 * guarantees existance of the entry.
7270 vm_fault_unwire(map
,
7271 &tmp_entry
, FALSE
, map_pmap
, pmap_addr
);
7273 vm_fault_unwire(map
,
7274 &tmp_entry
, FALSE
, map
->pmap
,
7275 tmp_entry
.vme_start
);
7279 if (last_timestamp
+ 1 != map
->timestamp
) {
7281 * Find the entry again. It could have been clipped
7282 * or deleted after we unlocked the map.
7284 if (!vm_map_lookup_entry(map
, tmp_entry
.vme_start
,
7287 panic("vm_map_unwire: re-lookup failed");
7289 entry
= first_entry
->vme_next
;
7291 entry
= first_entry
;
7294 last_timestamp
= map
->timestamp
;
7297 * clear transition bit for all constituent entries that
7298 * were in the original entry (saved in tmp_entry). Also
7299 * check for waiters.
7301 while ((entry
!= vm_map_to_entry(map
)) &&
7302 (entry
->vme_start
< tmp_entry
.vme_end
)) {
7303 assert(entry
->in_transition
);
7304 entry
->in_transition
= FALSE
;
7305 if (entry
->needs_wakeup
) {
7306 entry
->needs_wakeup
= FALSE
;
7309 entry
= entry
->vme_next
;
7314 * We might have fragmented the address space when we wired this
7315 * range of addresses. Attempt to re-coalesce these VM map entries
7316 * with their neighbors now that they're no longer wired.
7317 * Under some circumstances, address space fragmentation can
7318 * prevent VM object shadow chain collapsing, which can cause
7321 vm_map_simplify_range(map
, start
, end
);
7325 * wake up anybody waiting on entries that we have unwired.
7328 vm_map_entry_wakeup(map
);
7330 return KERN_SUCCESS
;
7336 vm_map_offset_t start
,
7337 vm_map_offset_t end
,
7338 boolean_t user_wire
)
7340 return vm_map_unwire_nested(map
, start
, end
,
7341 user_wire
, (pmap_t
)NULL
, 0);
7346 * vm_map_entry_delete: [ internal use only ]
7348 * Deallocate the given entry from the target map.
7351 vm_map_entry_delete(
7353 vm_map_entry_t entry
)
7355 vm_map_offset_t s
, e
;
7359 s
= entry
->vme_start
;
7361 assert(page_aligned(s
));
7362 assert(page_aligned(e
));
7363 if (entry
->map_aligned
== TRUE
) {
7364 assert(VM_MAP_PAGE_ALIGNED(s
, VM_MAP_PAGE_MASK(map
)));
7365 assert(VM_MAP_PAGE_ALIGNED(e
, VM_MAP_PAGE_MASK(map
)));
7367 assert(entry
->wired_count
== 0);
7368 assert(entry
->user_wired_count
== 0);
7369 assert(!entry
->permanent
);
7371 if (entry
->is_sub_map
) {
7373 submap
= VME_SUBMAP(entry
);
7376 object
= VME_OBJECT(entry
);
7379 vm_map_store_entry_unlink(map
, entry
);
7382 vm_map_entry_dispose(map
, entry
);
7386 * Deallocate the object only after removing all
7387 * pmap entries pointing to its pages.
7390 vm_map_deallocate(submap
);
7392 vm_object_deallocate(object
);
7397 vm_map_submap_pmap_clean(
7399 vm_map_offset_t start
,
7400 vm_map_offset_t end
,
7402 vm_map_offset_t offset
)
7404 vm_map_offset_t submap_start
;
7405 vm_map_offset_t submap_end
;
7406 vm_map_size_t remove_size
;
7407 vm_map_entry_t entry
;
7409 submap_end
= offset
+ (end
- start
);
7410 submap_start
= offset
;
7412 vm_map_lock_read(sub_map
);
7413 if (vm_map_lookup_entry(sub_map
, offset
, &entry
)) {
7414 remove_size
= (entry
->vme_end
- entry
->vme_start
);
7415 if (offset
> entry
->vme_start
) {
7416 remove_size
-= offset
- entry
->vme_start
;
7420 if (submap_end
< entry
->vme_end
) {
7422 entry
->vme_end
- submap_end
;
7424 if (entry
->is_sub_map
) {
7425 vm_map_submap_pmap_clean(
7428 start
+ remove_size
,
7432 if (map
->mapped_in_other_pmaps
&&
7433 os_ref_get_count(&map
->map_refcnt
) != 0 &&
7434 VME_OBJECT(entry
) != NULL
) {
7435 vm_object_pmap_protect_options(
7437 (VME_OFFSET(entry
) +
7444 PMAP_OPTIONS_REMOVE
);
7446 pmap_remove(map
->pmap
,
7448 (addr64_t
)(start
+ remove_size
));
7453 entry
= entry
->vme_next
;
7455 while ((entry
!= vm_map_to_entry(sub_map
))
7456 && (entry
->vme_start
< submap_end
)) {
7457 remove_size
= (entry
->vme_end
- entry
->vme_start
);
7458 if (submap_end
< entry
->vme_end
) {
7459 remove_size
-= entry
->vme_end
- submap_end
;
7461 if (entry
->is_sub_map
) {
7462 vm_map_submap_pmap_clean(
7464 (start
+ entry
->vme_start
) - offset
,
7465 ((start
+ entry
->vme_start
) - offset
) + remove_size
,
7469 if (map
->mapped_in_other_pmaps
&&
7470 os_ref_get_count(&map
->map_refcnt
) != 0 &&
7471 VME_OBJECT(entry
) != NULL
) {
7472 vm_object_pmap_protect_options(
7479 PMAP_OPTIONS_REMOVE
);
7481 pmap_remove(map
->pmap
,
7482 (addr64_t
)((start
+ entry
->vme_start
)
7484 (addr64_t
)(((start
+ entry
->vme_start
)
7485 - offset
) + remove_size
));
7488 entry
= entry
->vme_next
;
7490 vm_map_unlock_read(sub_map
);
7495 * virt_memory_guard_ast:
7497 * Handle the AST callout for a virtual memory guard.
7498 * raise an EXC_GUARD exception and terminate the task
7499 * if configured to do so.
7502 virt_memory_guard_ast(
7504 mach_exception_data_type_t code
,
7505 mach_exception_data_type_t subcode
)
7507 task_t task
= thread
->task
;
7508 assert(task
!= kernel_task
);
7509 assert(task
== current_task());
7512 behavior
= task
->task_exc_guard
;
7514 /* Is delivery enabled */
7515 if ((behavior
& TASK_EXC_GUARD_VM_DELIVER
) == 0) {
7519 /* If only once, make sure we're that once */
7520 while (behavior
& TASK_EXC_GUARD_VM_ONCE
) {
7521 uint32_t new_behavior
= behavior
& ~TASK_EXC_GUARD_VM_DELIVER
;
7523 if (OSCompareAndSwap(behavior
, new_behavior
, &task
->task_exc_guard
)) {
7526 behavior
= task
->task_exc_guard
;
7527 if ((behavior
& TASK_EXC_GUARD_VM_DELIVER
) == 0) {
7532 /* Raise exception via corpse fork or synchronously */
7533 if ((task
->task_exc_guard
& TASK_EXC_GUARD_VM_CORPSE
) &&
7534 (task
->task_exc_guard
& TASK_EXC_GUARD_VM_FATAL
) == 0) {
7535 task_violated_guard(code
, subcode
, NULL
);
7537 task_exception_notify(EXC_GUARD
, code
, subcode
);
7540 /* Terminate the task if desired */
7541 if (task
->task_exc_guard
& TASK_EXC_GUARD_VM_FATAL
) {
7542 task_bsdtask_kill(current_task());
7547 * vm_map_guard_exception:
7549 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7551 * Right now, we do this when we find nothing mapped, or a
7552 * gap in the mapping when a user address space deallocate
7553 * was requested. We report the address of the first gap found.
7556 vm_map_guard_exception(
7557 vm_map_offset_t gap_start
,
7560 mach_exception_code_t code
= 0;
7561 unsigned int guard_type
= GUARD_TYPE_VIRT_MEMORY
;
7562 unsigned int target
= 0; /* should we pass in pid associated with map? */
7563 mach_exception_data_type_t subcode
= (uint64_t)gap_start
;
7564 boolean_t fatal
= FALSE
;
7566 task_t task
= current_task();
7568 /* Can't deliver exceptions to kernel task */
7569 if (task
== kernel_task
) {
7573 EXC_GUARD_ENCODE_TYPE(code
, guard_type
);
7574 EXC_GUARD_ENCODE_FLAVOR(code
, reason
);
7575 EXC_GUARD_ENCODE_TARGET(code
, target
);
7577 if (task
->task_exc_guard
& TASK_EXC_GUARD_VM_FATAL
) {
7580 thread_guard_violation(current_thread(), code
, subcode
, fatal
);
7584 * vm_map_delete: [ internal use only ]
7586 * Deallocates the given address range from the target map.
7587 * Removes all user wirings. Unwires one kernel wiring if
7588 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7589 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7590 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7592 * This routine is called with map locked and leaves map locked.
7594 static kern_return_t
7597 vm_map_offset_t start
,
7598 vm_map_offset_t end
,
7602 vm_map_entry_t entry
, next
;
7603 struct vm_map_entry
*first_entry
, tmp_entry
;
7606 boolean_t need_wakeup
;
7607 unsigned int last_timestamp
= ~0; /* unlikely value */
7609 vm_map_offset_t gap_start
;
7610 __unused vm_map_offset_t save_start
= start
;
7611 __unused vm_map_offset_t save_end
= end
;
7612 const vm_map_offset_t FIND_GAP
= 1; /* a not page aligned value */
7613 const vm_map_offset_t GAPS_OK
= 2; /* a different not page aligned value */
7615 if (map
!= kernel_map
&& !(flags
& VM_MAP_REMOVE_GAPS_OK
)) {
7616 gap_start
= FIND_GAP
;
7618 gap_start
= GAPS_OK
;
7621 interruptible
= (flags
& VM_MAP_REMOVE_INTERRUPTIBLE
) ?
7622 THREAD_ABORTSAFE
: THREAD_UNINT
;
7625 * All our DMA I/O operations in IOKit are currently done by
7626 * wiring through the map entries of the task requesting the I/O.
7627 * Because of this, we must always wait for kernel wirings
7628 * to go away on the entries before deleting them.
7630 * Any caller who wants to actually remove a kernel wiring
7631 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7632 * properly remove one wiring instead of blasting through
7635 flags
|= VM_MAP_REMOVE_WAIT_FOR_KWIRE
;
7639 * Find the start of the region, and clip it
7641 if (vm_map_lookup_entry(map
, start
, &first_entry
)) {
7642 entry
= first_entry
;
7643 if (map
== kalloc_map
&&
7644 (entry
->vme_start
!= start
||
7645 entry
->vme_end
!= end
)) {
7646 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7647 "mismatched entry %p [0x%llx:0x%llx]\n",
7652 (uint64_t)entry
->vme_start
,
7653 (uint64_t)entry
->vme_end
);
7657 * If in a superpage, extend the range to include the start of the mapping.
7659 if (entry
->superpage_size
&& (start
& ~SUPERPAGE_MASK
)) {
7660 start
= SUPERPAGE_ROUND_DOWN(start
);
7664 if (start
== entry
->vme_start
) {
7666 * No need to clip. We don't want to cause
7667 * any unnecessary unnesting in this case...
7670 if ((flags
& VM_MAP_REMOVE_NO_MAP_ALIGN
) &&
7671 entry
->map_aligned
&&
7672 !VM_MAP_PAGE_ALIGNED(
7674 VM_MAP_PAGE_MASK(map
))) {
7676 * The entry will no longer be
7677 * map-aligned after clipping
7678 * and the caller said it's OK.
7680 entry
->map_aligned
= FALSE
;
7682 if (map
== kalloc_map
) {
7683 panic("vm_map_delete(%p,0x%llx,0x%llx):"
7684 " clipping %p at 0x%llx\n",
7691 vm_map_clip_start(map
, entry
, start
);
7695 * Fix the lookup hint now, rather than each
7696 * time through the loop.
7698 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
7700 if (map
->pmap
== kernel_pmap
&&
7701 os_ref_get_count(&map
->map_refcnt
) != 0) {
7702 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7703 "no map entry at 0x%llx\n",
7709 entry
= first_entry
->vme_next
;
7710 if (gap_start
== FIND_GAP
) {
7716 if (entry
->superpage_size
) {
7717 end
= SUPERPAGE_ROUND_UP(end
);
7720 need_wakeup
= FALSE
;
7722 * Step through all entries in this region
7724 s
= entry
->vme_start
;
7725 while ((entry
!= vm_map_to_entry(map
)) && (s
< end
)) {
7727 * At this point, we have deleted all the memory entries
7728 * between "start" and "s". We still need to delete
7729 * all memory entries between "s" and "end".
7730 * While we were blocked and the map was unlocked, some
7731 * new memory entries could have been re-allocated between
7732 * "start" and "s" and we don't want to mess with those.
7733 * Some of those entries could even have been re-assembled
7734 * with an entry after "s" (in vm_map_simplify_entry()), so
7735 * we may have to vm_map_clip_start() again.
7738 if (entry
->vme_start
>= s
) {
7740 * This entry starts on or after "s"
7741 * so no need to clip its start.
7745 * This entry has been re-assembled by a
7746 * vm_map_simplify_entry(). We need to
7747 * re-clip its start.
7749 if ((flags
& VM_MAP_REMOVE_NO_MAP_ALIGN
) &&
7750 entry
->map_aligned
&&
7751 !VM_MAP_PAGE_ALIGNED(s
,
7752 VM_MAP_PAGE_MASK(map
))) {
7754 * The entry will no longer be map-aligned
7755 * after clipping and the caller said it's OK.
7757 entry
->map_aligned
= FALSE
;
7759 if (map
== kalloc_map
) {
7760 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7761 "clipping %p at 0x%llx\n",
7768 vm_map_clip_start(map
, entry
, s
);
7770 if (entry
->vme_end
<= end
) {
7772 * This entry is going away completely, so no need
7773 * to clip and possibly cause an unnecessary unnesting.
7776 if ((flags
& VM_MAP_REMOVE_NO_MAP_ALIGN
) &&
7777 entry
->map_aligned
&&
7778 !VM_MAP_PAGE_ALIGNED(end
,
7779 VM_MAP_PAGE_MASK(map
))) {
7781 * The entry will no longer be map-aligned
7782 * after clipping and the caller said it's OK.
7784 entry
->map_aligned
= FALSE
;
7786 if (map
== kalloc_map
) {
7787 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7788 "clipping %p at 0x%llx\n",
7795 vm_map_clip_end(map
, entry
, end
);
7798 if (entry
->permanent
) {
7799 if (map
->pmap
== kernel_pmap
) {
7800 panic("%s(%p,0x%llx,0x%llx): "
7801 "attempt to remove permanent "
7803 "%p [0x%llx:0x%llx]\n",
7809 (uint64_t) entry
->vme_start
,
7810 (uint64_t) entry
->vme_end
);
7811 } else if (flags
& VM_MAP_REMOVE_IMMUTABLE
) {
7812 // printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7813 entry
->permanent
= FALSE
;
7815 } else if ((entry
->protection
& VM_PROT_EXECUTE
) && !pmap_cs_enforced(map
->pmap
)) {
7816 entry
->permanent
= FALSE
;
7818 printf("%d[%s] %s(0x%llx,0x%llx): "
7819 "pmap_cs disabled, allowing for permanent executable entry [0x%llx:0x%llx] "
7822 (current_task()->bsd_info
7823 ? proc_name_address(current_task()->bsd_info
)
7828 (uint64_t)entry
->vme_start
,
7829 (uint64_t)entry
->vme_end
,
7831 entry
->max_protection
);
7834 if (vm_map_executable_immutable_verbose
) {
7835 printf("%d[%s] %s(0x%llx,0x%llx): "
7836 "permanent entry [0x%llx:0x%llx] "
7839 (current_task()->bsd_info
7840 ? proc_name_address(current_task()->bsd_info
)
7845 (uint64_t)entry
->vme_start
,
7846 (uint64_t)entry
->vme_end
,
7848 entry
->max_protection
);
7851 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7853 DTRACE_VM5(vm_map_delete_permanent
,
7854 vm_map_offset_t
, entry
->vme_start
,
7855 vm_map_offset_t
, entry
->vme_end
,
7856 vm_prot_t
, entry
->protection
,
7857 vm_prot_t
, entry
->max_protection
,
7858 int, VME_ALIAS(entry
));
7863 if (entry
->in_transition
) {
7864 wait_result_t wait_result
;
7867 * Another thread is wiring/unwiring this entry.
7868 * Let the other thread know we are waiting.
7870 assert(s
== entry
->vme_start
);
7871 entry
->needs_wakeup
= TRUE
;
7874 * wake up anybody waiting on entries that we have
7875 * already unwired/deleted.
7878 vm_map_entry_wakeup(map
);
7879 need_wakeup
= FALSE
;
7882 wait_result
= vm_map_entry_wait(map
, interruptible
);
7884 if (interruptible
&&
7885 wait_result
== THREAD_INTERRUPTED
) {
7887 * We do not clear the needs_wakeup flag,
7888 * since we cannot tell if we were the only one.
7890 return KERN_ABORTED
;
7894 * The entry could have been clipped or it
7895 * may not exist anymore. Look it up again.
7897 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
7899 * User: use the next entry
7901 if (gap_start
== FIND_GAP
) {
7904 entry
= first_entry
->vme_next
;
7905 s
= entry
->vme_start
;
7907 entry
= first_entry
;
7908 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
7910 last_timestamp
= map
->timestamp
;
7912 } /* end in_transition */
7914 if (entry
->wired_count
) {
7915 boolean_t user_wire
;
7917 user_wire
= entry
->user_wired_count
> 0;
7920 * Remove a kernel wiring if requested
7922 if (flags
& VM_MAP_REMOVE_KUNWIRE
) {
7923 entry
->wired_count
--;
7927 * Remove all user wirings for proper accounting
7929 if (entry
->user_wired_count
> 0) {
7930 while (entry
->user_wired_count
) {
7931 subtract_wire_counts(map
, entry
, user_wire
);
7935 if (entry
->wired_count
!= 0) {
7936 assert(map
!= kernel_map
);
7938 * Cannot continue. Typical case is when
7939 * a user thread has physical io pending on
7940 * on this page. Either wait for the
7941 * kernel wiring to go away or return an
7944 if (flags
& VM_MAP_REMOVE_WAIT_FOR_KWIRE
) {
7945 wait_result_t wait_result
;
7947 assert(s
== entry
->vme_start
);
7948 entry
->needs_wakeup
= TRUE
;
7949 wait_result
= vm_map_entry_wait(map
,
7952 if (interruptible
&&
7953 wait_result
== THREAD_INTERRUPTED
) {
7955 * We do not clear the
7956 * needs_wakeup flag, since we
7957 * cannot tell if we were the
7960 return KERN_ABORTED
;
7964 * The entry could have been clipped or
7965 * it may not exist anymore. Look it
7968 if (!vm_map_lookup_entry(map
, s
,
7970 assert(map
!= kernel_map
);
7972 * User: use the next entry
7974 if (gap_start
== FIND_GAP
) {
7977 entry
= first_entry
->vme_next
;
7978 s
= entry
->vme_start
;
7980 entry
= first_entry
;
7981 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
7983 last_timestamp
= map
->timestamp
;
7986 return KERN_FAILURE
;
7990 entry
->in_transition
= TRUE
;
7992 * copy current entry. see comment in vm_map_wire()
7995 assert(s
== entry
->vme_start
);
7998 * We can unlock the map now. The in_transition
7999 * state guarentees existance of the entry.
8003 if (tmp_entry
.is_sub_map
) {
8005 vm_map_offset_t sub_start
, sub_end
;
8007 vm_map_offset_t pmap_addr
;
8010 sub_map
= VME_SUBMAP(&tmp_entry
);
8011 sub_start
= VME_OFFSET(&tmp_entry
);
8012 sub_end
= sub_start
+ (tmp_entry
.vme_end
-
8013 tmp_entry
.vme_start
);
8014 if (tmp_entry
.use_pmap
) {
8015 pmap
= sub_map
->pmap
;
8016 pmap_addr
= tmp_entry
.vme_start
;
8019 pmap_addr
= tmp_entry
.vme_start
;
8021 (void) vm_map_unwire_nested(sub_map
,
8026 if (VME_OBJECT(&tmp_entry
) == kernel_object
) {
8027 pmap_protect_options(
8029 tmp_entry
.vme_start
,
8032 PMAP_OPTIONS_REMOVE
,
8035 vm_fault_unwire(map
, &tmp_entry
,
8036 VME_OBJECT(&tmp_entry
) == kernel_object
,
8037 map
->pmap
, tmp_entry
.vme_start
);
8042 if (last_timestamp
+ 1 != map
->timestamp
) {
8044 * Find the entry again. It could have
8045 * been clipped after we unlocked the map.
8047 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
8048 assert((map
!= kernel_map
) &&
8049 (!entry
->is_sub_map
));
8050 if (gap_start
== FIND_GAP
) {
8053 first_entry
= first_entry
->vme_next
;
8054 s
= first_entry
->vme_start
;
8056 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8059 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8060 first_entry
= entry
;
8063 last_timestamp
= map
->timestamp
;
8065 entry
= first_entry
;
8066 while ((entry
!= vm_map_to_entry(map
)) &&
8067 (entry
->vme_start
< tmp_entry
.vme_end
)) {
8068 assert(entry
->in_transition
);
8069 entry
->in_transition
= FALSE
;
8070 if (entry
->needs_wakeup
) {
8071 entry
->needs_wakeup
= FALSE
;
8074 entry
= entry
->vme_next
;
8077 * We have unwired the entry(s). Go back and
8080 entry
= first_entry
;
8084 /* entry is unwired */
8085 assert(entry
->wired_count
== 0);
8086 assert(entry
->user_wired_count
== 0);
8088 assert(s
== entry
->vme_start
);
8090 if (flags
& VM_MAP_REMOVE_NO_PMAP_CLEANUP
) {
8092 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8093 * vm_map_delete(), some map entries might have been
8094 * transferred to a "zap_map", which doesn't have a
8095 * pmap. The original pmap has already been flushed
8096 * in the vm_map_delete() call targeting the original
8097 * map, but when we get to destroying the "zap_map",
8098 * we don't have any pmap to flush, so let's just skip
8101 } else if (entry
->is_sub_map
) {
8102 if (entry
->use_pmap
) {
8103 #ifndef NO_NESTED_PMAP
8106 if (flags
& VM_MAP_REMOVE_NO_UNNESTING
) {
8108 * This is the final cleanup of the
8109 * address space being terminated.
8110 * No new mappings are expected and
8111 * we don't really need to unnest the
8112 * shared region (and lose the "global"
8113 * pmap mappings, if applicable).
8115 * Tell the pmap layer that we're
8116 * "clean" wrt nesting.
8118 pmap_flags
= PMAP_UNNEST_CLEAN
;
8121 * We're unmapping part of the nested
8122 * shared region, so we can't keep the
8127 pmap_unnest_options(
8129 (addr64_t
)entry
->vme_start
,
8130 entry
->vme_end
- entry
->vme_start
,
8132 #endif /* NO_NESTED_PMAP */
8133 if (map
->mapped_in_other_pmaps
&&
8134 os_ref_get_count(&map
->map_refcnt
) != 0) {
8135 /* clean up parent map/maps */
8136 vm_map_submap_pmap_clean(
8137 map
, entry
->vme_start
,
8143 vm_map_submap_pmap_clean(
8144 map
, entry
->vme_start
, entry
->vme_end
,
8148 } else if (VME_OBJECT(entry
) != kernel_object
&&
8149 VME_OBJECT(entry
) != compressor_object
) {
8150 object
= VME_OBJECT(entry
);
8151 if (map
->mapped_in_other_pmaps
&&
8152 os_ref_get_count(&map
->map_refcnt
) != 0) {
8153 vm_object_pmap_protect_options(
8154 object
, VME_OFFSET(entry
),
8155 entry
->vme_end
- entry
->vme_start
,
8159 PMAP_OPTIONS_REMOVE
);
8160 } else if ((VME_OBJECT(entry
) != VM_OBJECT_NULL
) ||
8161 (map
->pmap
== kernel_pmap
)) {
8162 /* Remove translations associated
8163 * with this range unless the entry
8164 * does not have an object, or
8165 * it's the kernel map or a descendant
8166 * since the platform could potentially
8167 * create "backdoor" mappings invisible
8168 * to the VM. It is expected that
8169 * objectless, non-kernel ranges
8170 * do not have such VM invisible
8173 pmap_remove_options(map
->pmap
,
8174 (addr64_t
)entry
->vme_start
,
8175 (addr64_t
)entry
->vme_end
,
8176 PMAP_OPTIONS_REMOVE
);
8180 if (entry
->iokit_acct
) {
8181 /* alternate accounting */
8182 DTRACE_VM4(vm_map_iokit_unmapped_region
,
8184 vm_map_offset_t
, entry
->vme_start
,
8185 vm_map_offset_t
, entry
->vme_end
,
8186 int, VME_ALIAS(entry
));
8187 vm_map_iokit_unmapped_region(map
,
8190 entry
->iokit_acct
= FALSE
;
8191 entry
->use_pmap
= FALSE
;
8195 * All pmap mappings for this map entry must have been
8199 assert(vm_map_pmap_is_empty(map
,
8204 next
= entry
->vme_next
;
8206 if (map
->pmap
== kernel_pmap
&&
8207 os_ref_get_count(&map
->map_refcnt
) != 0 &&
8208 entry
->vme_end
< end
&&
8209 (next
== vm_map_to_entry(map
) ||
8210 next
->vme_start
!= entry
->vme_end
)) {
8211 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8212 "hole after %p at 0x%llx\n",
8217 (uint64_t)entry
->vme_end
);
8221 * If the desired range didn't end with "entry", then there is a gap if
8222 * we wrapped around to the start of the map or if "entry" and "next"
8223 * aren't contiguous.
8225 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8226 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8228 if (gap_start
== FIND_GAP
&&
8229 vm_map_round_page(entry
->vme_end
, VM_MAP_PAGE_MASK(map
)) < end
&&
8230 (next
== vm_map_to_entry(map
) || entry
->vme_end
!= next
->vme_start
)) {
8231 gap_start
= entry
->vme_end
;
8233 s
= next
->vme_start
;
8234 last_timestamp
= map
->timestamp
;
8236 if (entry
->permanent
) {
8238 * A permanent entry can not be removed, so leave it
8239 * in place but remove all access permissions.
8241 entry
->protection
= VM_PROT_NONE
;
8242 entry
->max_protection
= VM_PROT_NONE
;
8243 } else if ((flags
& VM_MAP_REMOVE_SAVE_ENTRIES
) &&
8244 zap_map
!= VM_MAP_NULL
) {
8245 vm_map_size_t entry_size
;
8247 * The caller wants to save the affected VM map entries
8248 * into the "zap_map". The caller will take care of
8251 /* unlink the entry from "map" ... */
8252 vm_map_store_entry_unlink(map
, entry
);
8253 /* ... and add it to the end of the "zap_map" */
8254 vm_map_store_entry_link(zap_map
,
8255 vm_map_last_entry(zap_map
),
8257 VM_MAP_KERNEL_FLAGS_NONE
);
8258 entry_size
= entry
->vme_end
- entry
->vme_start
;
8259 map
->size
-= entry_size
;
8260 zap_map
->size
+= entry_size
;
8261 /* we didn't unlock the map, so no timestamp increase */
8264 vm_map_entry_delete(map
, entry
);
8265 /* vm_map_entry_delete unlocks the map */
8271 if (entry
== vm_map_to_entry(map
)) {
8274 if (last_timestamp
+ 1 != map
->timestamp
) {
8276 * We are responsible for deleting everything
8277 * from the given space. If someone has interfered,
8278 * we pick up where we left off. Back fills should
8279 * be all right for anyone, except map_delete, and
8280 * we have to assume that the task has been fully
8281 * disabled before we get here
8283 if (!vm_map_lookup_entry(map
, s
, &entry
)) {
8284 entry
= entry
->vme_next
;
8287 * Nothing found for s. If we weren't already done, then there is a gap.
8289 if (gap_start
== FIND_GAP
&& s
< end
) {
8292 s
= entry
->vme_start
;
8294 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8297 * others can not only allocate behind us, we can
8298 * also see coalesce while we don't have the map lock
8300 if (entry
== vm_map_to_entry(map
)) {
8304 last_timestamp
= map
->timestamp
;
8307 if (map
->wait_for_space
) {
8308 thread_wakeup((event_t
) map
);
8311 * wake up anybody waiting on entries that we have already deleted.
8314 vm_map_entry_wakeup(map
);
8317 if (gap_start
!= FIND_GAP
&& gap_start
!= GAPS_OK
) {
8318 DTRACE_VM3(kern_vm_deallocate_gap
,
8319 vm_map_offset_t
, gap_start
,
8320 vm_map_offset_t
, save_start
,
8321 vm_map_offset_t
, save_end
);
8322 if (!(flags
& VM_MAP_REMOVE_GAPS_OK
)) {
8323 vm_map_guard_exception(gap_start
, kGUARD_EXC_DEALLOC_GAP
);
8327 return KERN_SUCCESS
;
8333 * Remove the given address range from the target map.
8334 * This is the exported form of vm_map_delete.
8339 vm_map_offset_t start
,
8340 vm_map_offset_t end
,
8343 kern_return_t result
;
8346 VM_MAP_RANGE_CHECK(map
, start
, end
);
8348 * For the zone_map, the kernel controls the allocation/freeing of memory.
8349 * Any free to the zone_map should be within the bounds of the map and
8350 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8351 * free to the zone_map into a no-op, there is a problem and we should
8354 if ((map
== zone_map
) && (start
== end
)) {
8355 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start
);
8357 result
= vm_map_delete(map
, start
, end
, flags
, VM_MAP_NULL
);
8364 * vm_map_remove_locked:
8366 * Remove the given address range from the target locked map.
8367 * This is the exported form of vm_map_delete.
8370 vm_map_remove_locked(
8372 vm_map_offset_t start
,
8373 vm_map_offset_t end
,
8376 kern_return_t result
;
8378 VM_MAP_RANGE_CHECK(map
, start
, end
);
8379 result
= vm_map_delete(map
, start
, end
, flags
, VM_MAP_NULL
);
8385 * Routine: vm_map_copy_allocate
8388 * Allocates and initializes a map copy object.
8390 static vm_map_copy_t
8391 vm_map_copy_allocate(void)
8393 vm_map_copy_t new_copy
;
8395 new_copy
= zalloc(vm_map_copy_zone
);
8396 bzero(new_copy
, sizeof(*new_copy
));
8397 new_copy
->c_u
.hdr
.rb_head_store
.rbh_root
= (void*)(int)SKIP_RB_TREE
;
8398 vm_map_copy_first_entry(new_copy
) = vm_map_copy_to_entry(new_copy
);
8399 vm_map_copy_last_entry(new_copy
) = vm_map_copy_to_entry(new_copy
);
8404 * Routine: vm_map_copy_discard
8407 * Dispose of a map copy object (returned by
8411 vm_map_copy_discard(
8414 if (copy
== VM_MAP_COPY_NULL
) {
8418 switch (copy
->type
) {
8419 case VM_MAP_COPY_ENTRY_LIST
:
8420 while (vm_map_copy_first_entry(copy
) !=
8421 vm_map_copy_to_entry(copy
)) {
8422 vm_map_entry_t entry
= vm_map_copy_first_entry(copy
);
8424 vm_map_copy_entry_unlink(copy
, entry
);
8425 if (entry
->is_sub_map
) {
8426 vm_map_deallocate(VME_SUBMAP(entry
));
8428 vm_object_deallocate(VME_OBJECT(entry
));
8430 vm_map_copy_entry_dispose(copy
, entry
);
8433 case VM_MAP_COPY_OBJECT
:
8434 vm_object_deallocate(copy
->cpy_object
);
8436 case VM_MAP_COPY_KERNEL_BUFFER
:
8439 * The vm_map_copy_t and possibly the data buffer were
8440 * allocated by a single call to kalloc(), i.e. the
8441 * vm_map_copy_t was not allocated out of the zone.
8443 if (copy
->size
> msg_ool_size_small
|| copy
->offset
) {
8444 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8445 (long long)copy
->size
, (long long)copy
->offset
);
8447 kfree(copy
, copy
->size
+ cpy_kdata_hdr_sz
);
8450 zfree(vm_map_copy_zone
, copy
);
8454 * Routine: vm_map_copy_copy
8457 * Move the information in a map copy object to
8458 * a new map copy object, leaving the old one
8461 * This is used by kernel routines that need
8462 * to look at out-of-line data (in copyin form)
8463 * before deciding whether to return SUCCESS.
8464 * If the routine returns FAILURE, the original
8465 * copy object will be deallocated; therefore,
8466 * these routines must make a copy of the copy
8467 * object and leave the original empty so that
8468 * deallocation will not fail.
8474 vm_map_copy_t new_copy
;
8476 if (copy
== VM_MAP_COPY_NULL
) {
8477 return VM_MAP_COPY_NULL
;
8481 * Allocate a new copy object, and copy the information
8482 * from the old one into it.
8485 new_copy
= (vm_map_copy_t
) zalloc(vm_map_copy_zone
);
8488 if (copy
->type
== VM_MAP_COPY_ENTRY_LIST
) {
8490 * The links in the entry chain must be
8491 * changed to point to the new copy object.
8493 vm_map_copy_first_entry(copy
)->vme_prev
8494 = vm_map_copy_to_entry(new_copy
);
8495 vm_map_copy_last_entry(copy
)->vme_next
8496 = vm_map_copy_to_entry(new_copy
);
8500 * Change the old copy object into one that contains
8501 * nothing to be deallocated.
8503 copy
->type
= VM_MAP_COPY_OBJECT
;
8504 copy
->cpy_object
= VM_OBJECT_NULL
;
8507 * Return the new object.
8512 static kern_return_t
8513 vm_map_overwrite_submap_recurse(
8515 vm_map_offset_t dst_addr
,
8516 vm_map_size_t dst_size
)
8518 vm_map_offset_t dst_end
;
8519 vm_map_entry_t tmp_entry
;
8520 vm_map_entry_t entry
;
8521 kern_return_t result
;
8522 boolean_t encountered_sub_map
= FALSE
;
8527 * Verify that the destination is all writeable
8528 * initially. We have to trunc the destination
8529 * address and round the copy size or we'll end up
8530 * splitting entries in strange ways.
8533 dst_end
= vm_map_round_page(dst_addr
+ dst_size
,
8534 VM_MAP_PAGE_MASK(dst_map
));
8535 vm_map_lock(dst_map
);
8538 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &tmp_entry
)) {
8539 vm_map_unlock(dst_map
);
8540 return KERN_INVALID_ADDRESS
;
8543 vm_map_clip_start(dst_map
,
8545 vm_map_trunc_page(dst_addr
,
8546 VM_MAP_PAGE_MASK(dst_map
)));
8547 if (tmp_entry
->is_sub_map
) {
8548 /* clipping did unnest if needed */
8549 assert(!tmp_entry
->use_pmap
);
8552 for (entry
= tmp_entry
;;) {
8553 vm_map_entry_t next
;
8555 next
= entry
->vme_next
;
8556 while (entry
->is_sub_map
) {
8557 vm_map_offset_t sub_start
;
8558 vm_map_offset_t sub_end
;
8559 vm_map_offset_t local_end
;
8561 if (entry
->in_transition
) {
8563 * Say that we are waiting, and wait for entry.
8565 entry
->needs_wakeup
= TRUE
;
8566 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8571 encountered_sub_map
= TRUE
;
8572 sub_start
= VME_OFFSET(entry
);
8574 if (entry
->vme_end
< dst_end
) {
8575 sub_end
= entry
->vme_end
;
8579 sub_end
-= entry
->vme_start
;
8580 sub_end
+= VME_OFFSET(entry
);
8581 local_end
= entry
->vme_end
;
8582 vm_map_unlock(dst_map
);
8584 result
= vm_map_overwrite_submap_recurse(
8587 sub_end
- sub_start
);
8589 if (result
!= KERN_SUCCESS
) {
8592 if (dst_end
<= entry
->vme_end
) {
8593 return KERN_SUCCESS
;
8595 vm_map_lock(dst_map
);
8596 if (!vm_map_lookup_entry(dst_map
, local_end
,
8598 vm_map_unlock(dst_map
);
8599 return KERN_INVALID_ADDRESS
;
8602 next
= entry
->vme_next
;
8605 if (!(entry
->protection
& VM_PROT_WRITE
)) {
8606 vm_map_unlock(dst_map
);
8607 return KERN_PROTECTION_FAILURE
;
8611 * If the entry is in transition, we must wait
8612 * for it to exit that state. Anything could happen
8613 * when we unlock the map, so start over.
8615 if (entry
->in_transition
) {
8617 * Say that we are waiting, and wait for entry.
8619 entry
->needs_wakeup
= TRUE
;
8620 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8626 * our range is contained completely within this map entry
8628 if (dst_end
<= entry
->vme_end
) {
8629 vm_map_unlock(dst_map
);
8630 return KERN_SUCCESS
;
8633 * check that range specified is contiguous region
8635 if ((next
== vm_map_to_entry(dst_map
)) ||
8636 (next
->vme_start
!= entry
->vme_end
)) {
8637 vm_map_unlock(dst_map
);
8638 return KERN_INVALID_ADDRESS
;
8642 * Check for permanent objects in the destination.
8644 if ((VME_OBJECT(entry
) != VM_OBJECT_NULL
) &&
8645 ((!VME_OBJECT(entry
)->internal
) ||
8646 (VME_OBJECT(entry
)->true_share
))) {
8647 if (encountered_sub_map
) {
8648 vm_map_unlock(dst_map
);
8649 return KERN_FAILURE
;
8656 vm_map_unlock(dst_map
);
8657 return KERN_SUCCESS
;
8661 * Routine: vm_map_copy_overwrite
8664 * Copy the memory described by the map copy
8665 * object (copy; returned by vm_map_copyin) onto
8666 * the specified destination region (dst_map, dst_addr).
8667 * The destination must be writeable.
8669 * Unlike vm_map_copyout, this routine actually
8670 * writes over previously-mapped memory. If the
8671 * previous mapping was to a permanent (user-supplied)
8672 * memory object, it is preserved.
8674 * The attributes (protection and inheritance) of the
8675 * destination region are preserved.
8677 * If successful, consumes the copy object.
8678 * Otherwise, the caller is responsible for it.
8680 * Implementation notes:
8681 * To overwrite aligned temporary virtual memory, it is
8682 * sufficient to remove the previous mapping and insert
8683 * the new copy. This replacement is done either on
8684 * the whole region (if no permanent virtual memory
8685 * objects are embedded in the destination region) or
8686 * in individual map entries.
8688 * To overwrite permanent virtual memory , it is necessary
8689 * to copy each page, as the external memory management
8690 * interface currently does not provide any optimizations.
8692 * Unaligned memory also has to be copied. It is possible
8693 * to use 'vm_trickery' to copy the aligned data. This is
8694 * not done but not hard to implement.
8696 * Once a page of permanent memory has been overwritten,
8697 * it is impossible to interrupt this function; otherwise,
8698 * the call would be neither atomic nor location-independent.
8699 * The kernel-state portion of a user thread must be
8702 * It may be expensive to forward all requests that might
8703 * overwrite permanent memory (vm_write, vm_copy) to
8704 * uninterruptible kernel threads. This routine may be
8705 * called by interruptible threads; however, success is
8706 * not guaranteed -- if the request cannot be performed
8707 * atomically and interruptibly, an error indication is
8711 static kern_return_t
8712 vm_map_copy_overwrite_nested(
8714 vm_map_address_t dst_addr
,
8716 boolean_t interruptible
,
8718 boolean_t discard_on_success
)
8720 vm_map_offset_t dst_end
;
8721 vm_map_entry_t tmp_entry
;
8722 vm_map_entry_t entry
;
8724 boolean_t aligned
= TRUE
;
8725 boolean_t contains_permanent_objects
= FALSE
;
8726 boolean_t encountered_sub_map
= FALSE
;
8727 vm_map_offset_t base_addr
;
8728 vm_map_size_t copy_size
;
8729 vm_map_size_t total_size
;
8733 * Check for null copy object.
8736 if (copy
== VM_MAP_COPY_NULL
) {
8737 return KERN_SUCCESS
;
8741 * Check for special kernel buffer allocated
8742 * by new_ipc_kmsg_copyin.
8745 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
8746 return vm_map_copyout_kernel_buffer(
8748 copy
, copy
->size
, TRUE
, discard_on_success
);
8752 * Only works for entry lists at the moment. Will
8753 * support page lists later.
8756 assert(copy
->type
== VM_MAP_COPY_ENTRY_LIST
);
8758 if (copy
->size
== 0) {
8759 if (discard_on_success
) {
8760 vm_map_copy_discard(copy
);
8762 return KERN_SUCCESS
;
8766 * Verify that the destination is all writeable
8767 * initially. We have to trunc the destination
8768 * address and round the copy size or we'll end up
8769 * splitting entries in strange ways.
8772 if (!VM_MAP_PAGE_ALIGNED(copy
->size
,
8773 VM_MAP_PAGE_MASK(dst_map
)) ||
8774 !VM_MAP_PAGE_ALIGNED(copy
->offset
,
8775 VM_MAP_PAGE_MASK(dst_map
)) ||
8776 !VM_MAP_PAGE_ALIGNED(dst_addr
,
8777 VM_MAP_PAGE_MASK(dst_map
))) {
8779 dst_end
= vm_map_round_page(dst_addr
+ copy
->size
,
8780 VM_MAP_PAGE_MASK(dst_map
));
8782 dst_end
= dst_addr
+ copy
->size
;
8785 vm_map_lock(dst_map
);
8787 /* LP64todo - remove this check when vm_map_commpage64()
8788 * no longer has to stuff in a map_entry for the commpage
8789 * above the map's max_offset.
8791 if (dst_addr
>= dst_map
->max_offset
) {
8792 vm_map_unlock(dst_map
);
8793 return KERN_INVALID_ADDRESS
;
8797 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &tmp_entry
)) {
8798 vm_map_unlock(dst_map
);
8799 return KERN_INVALID_ADDRESS
;
8801 vm_map_clip_start(dst_map
,
8803 vm_map_trunc_page(dst_addr
,
8804 VM_MAP_PAGE_MASK(dst_map
)));
8805 for (entry
= tmp_entry
;;) {
8806 vm_map_entry_t next
= entry
->vme_next
;
8808 while (entry
->is_sub_map
) {
8809 vm_map_offset_t sub_start
;
8810 vm_map_offset_t sub_end
;
8811 vm_map_offset_t local_end
;
8813 if (entry
->in_transition
) {
8815 * Say that we are waiting, and wait for entry.
8817 entry
->needs_wakeup
= TRUE
;
8818 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8823 local_end
= entry
->vme_end
;
8824 if (!(entry
->needs_copy
)) {
8825 /* if needs_copy we are a COW submap */
8826 /* in such a case we just replace so */
8827 /* there is no need for the follow- */
8829 encountered_sub_map
= TRUE
;
8830 sub_start
= VME_OFFSET(entry
);
8832 if (entry
->vme_end
< dst_end
) {
8833 sub_end
= entry
->vme_end
;
8837 sub_end
-= entry
->vme_start
;
8838 sub_end
+= VME_OFFSET(entry
);
8839 vm_map_unlock(dst_map
);
8841 kr
= vm_map_overwrite_submap_recurse(
8844 sub_end
- sub_start
);
8845 if (kr
!= KERN_SUCCESS
) {
8848 vm_map_lock(dst_map
);
8851 if (dst_end
<= entry
->vme_end
) {
8852 goto start_overwrite
;
8854 if (!vm_map_lookup_entry(dst_map
, local_end
,
8856 vm_map_unlock(dst_map
);
8857 return KERN_INVALID_ADDRESS
;
8859 next
= entry
->vme_next
;
8862 if (!(entry
->protection
& VM_PROT_WRITE
)) {
8863 vm_map_unlock(dst_map
);
8864 return KERN_PROTECTION_FAILURE
;
8868 * If the entry is in transition, we must wait
8869 * for it to exit that state. Anything could happen
8870 * when we unlock the map, so start over.
8872 if (entry
->in_transition
) {
8874 * Say that we are waiting, and wait for entry.
8876 entry
->needs_wakeup
= TRUE
;
8877 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8883 * our range is contained completely within this map entry
8885 if (dst_end
<= entry
->vme_end
) {
8889 * check that range specified is contiguous region
8891 if ((next
== vm_map_to_entry(dst_map
)) ||
8892 (next
->vme_start
!= entry
->vme_end
)) {
8893 vm_map_unlock(dst_map
);
8894 return KERN_INVALID_ADDRESS
;
8899 * Check for permanent objects in the destination.
8901 if ((VME_OBJECT(entry
) != VM_OBJECT_NULL
) &&
8902 ((!VME_OBJECT(entry
)->internal
) ||
8903 (VME_OBJECT(entry
)->true_share
))) {
8904 contains_permanent_objects
= TRUE
;
8912 * If there are permanent objects in the destination, then
8913 * the copy cannot be interrupted.
8916 if (interruptible
&& contains_permanent_objects
) {
8917 vm_map_unlock(dst_map
);
8918 return KERN_FAILURE
; /* XXX */
8923 * Make a second pass, overwriting the data
8924 * At the beginning of each loop iteration,
8925 * the next entry to be overwritten is "tmp_entry"
8926 * (initially, the value returned from the lookup above),
8927 * and the starting address expected in that entry
8931 total_size
= copy
->size
;
8932 if (encountered_sub_map
) {
8934 /* re-calculate tmp_entry since we've had the map */
8936 if (!vm_map_lookup_entry( dst_map
, dst_addr
, &tmp_entry
)) {
8937 vm_map_unlock(dst_map
);
8938 return KERN_INVALID_ADDRESS
;
8941 copy_size
= copy
->size
;
8944 base_addr
= dst_addr
;
8946 /* deconstruct the copy object and do in parts */
8947 /* only in sub_map, interruptable case */
8948 vm_map_entry_t copy_entry
;
8949 vm_map_entry_t previous_prev
= VM_MAP_ENTRY_NULL
;
8950 vm_map_entry_t next_copy
= VM_MAP_ENTRY_NULL
;
8952 int remaining_entries
= 0;
8953 vm_map_offset_t new_offset
= 0;
8955 for (entry
= tmp_entry
; copy_size
== 0;) {
8956 vm_map_entry_t next
;
8958 next
= entry
->vme_next
;
8960 /* tmp_entry and base address are moved along */
8961 /* each time we encounter a sub-map. Otherwise */
8962 /* entry can outpase tmp_entry, and the copy_size */
8963 /* may reflect the distance between them */
8964 /* if the current entry is found to be in transition */
8965 /* we will start over at the beginning or the last */
8966 /* encounter of a submap as dictated by base_addr */
8967 /* we will zero copy_size accordingly. */
8968 if (entry
->in_transition
) {
8970 * Say that we are waiting, and wait for entry.
8972 entry
->needs_wakeup
= TRUE
;
8973 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8975 if (!vm_map_lookup_entry(dst_map
, base_addr
,
8977 vm_map_unlock(dst_map
);
8978 return KERN_INVALID_ADDRESS
;
8984 if (entry
->is_sub_map
) {
8985 vm_map_offset_t sub_start
;
8986 vm_map_offset_t sub_end
;
8987 vm_map_offset_t local_end
;
8989 if (entry
->needs_copy
) {
8990 /* if this is a COW submap */
8991 /* just back the range with a */
8992 /* anonymous entry */
8993 if (entry
->vme_end
< dst_end
) {
8994 sub_end
= entry
->vme_end
;
8998 if (entry
->vme_start
< base_addr
) {
8999 sub_start
= base_addr
;
9001 sub_start
= entry
->vme_start
;
9004 dst_map
, entry
, sub_end
);
9006 dst_map
, entry
, sub_start
);
9007 assert(!entry
->use_pmap
);
9008 assert(!entry
->iokit_acct
);
9009 entry
->use_pmap
= TRUE
;
9010 entry
->is_sub_map
= FALSE
;
9013 VME_OBJECT_SET(entry
, VM_OBJECT_NULL
);
9014 VME_OFFSET_SET(entry
, 0);
9015 entry
->is_shared
= FALSE
;
9016 entry
->needs_copy
= FALSE
;
9017 entry
->protection
= VM_PROT_DEFAULT
;
9018 entry
->max_protection
= VM_PROT_ALL
;
9019 entry
->wired_count
= 0;
9020 entry
->user_wired_count
= 0;
9021 if (entry
->inheritance
9022 == VM_INHERIT_SHARE
) {
9023 entry
->inheritance
= VM_INHERIT_COPY
;
9027 /* first take care of any non-sub_map */
9028 /* entries to send */
9029 if (base_addr
< entry
->vme_start
) {
9032 entry
->vme_start
- base_addr
;
9035 sub_start
= VME_OFFSET(entry
);
9037 if (entry
->vme_end
< dst_end
) {
9038 sub_end
= entry
->vme_end
;
9042 sub_end
-= entry
->vme_start
;
9043 sub_end
+= VME_OFFSET(entry
);
9044 local_end
= entry
->vme_end
;
9045 vm_map_unlock(dst_map
);
9046 copy_size
= sub_end
- sub_start
;
9048 /* adjust the copy object */
9049 if (total_size
> copy_size
) {
9050 vm_map_size_t local_size
= 0;
9051 vm_map_size_t entry_size
;
9054 new_offset
= copy
->offset
;
9055 copy_entry
= vm_map_copy_first_entry(copy
);
9056 while (copy_entry
!=
9057 vm_map_copy_to_entry(copy
)) {
9058 entry_size
= copy_entry
->vme_end
-
9059 copy_entry
->vme_start
;
9060 if ((local_size
< copy_size
) &&
9061 ((local_size
+ entry_size
)
9063 vm_map_copy_clip_end(copy
,
9065 copy_entry
->vme_start
+
9066 (copy_size
- local_size
));
9067 entry_size
= copy_entry
->vme_end
-
9068 copy_entry
->vme_start
;
9069 local_size
+= entry_size
;
9070 new_offset
+= entry_size
;
9072 if (local_size
>= copy_size
) {
9073 next_copy
= copy_entry
->vme_next
;
9074 copy_entry
->vme_next
=
9075 vm_map_copy_to_entry(copy
);
9077 copy
->cpy_hdr
.links
.prev
;
9078 copy
->cpy_hdr
.links
.prev
= copy_entry
;
9079 copy
->size
= copy_size
;
9081 copy
->cpy_hdr
.nentries
;
9082 remaining_entries
-= nentries
;
9083 copy
->cpy_hdr
.nentries
= nentries
;
9086 local_size
+= entry_size
;
9087 new_offset
+= entry_size
;
9090 copy_entry
= copy_entry
->vme_next
;
9094 if ((entry
->use_pmap
) && (pmap
== NULL
)) {
9095 kr
= vm_map_copy_overwrite_nested(
9100 VME_SUBMAP(entry
)->pmap
,
9102 } else if (pmap
!= NULL
) {
9103 kr
= vm_map_copy_overwrite_nested(
9107 interruptible
, pmap
,
9110 kr
= vm_map_copy_overwrite_nested(
9118 if (kr
!= KERN_SUCCESS
) {
9119 if (next_copy
!= NULL
) {
9120 copy
->cpy_hdr
.nentries
+=
9122 copy
->cpy_hdr
.links
.prev
->vme_next
=
9124 copy
->cpy_hdr
.links
.prev
9126 copy
->size
= total_size
;
9130 if (dst_end
<= local_end
) {
9131 return KERN_SUCCESS
;
9133 /* otherwise copy no longer exists, it was */
9134 /* destroyed after successful copy_overwrite */
9135 copy
= vm_map_copy_allocate();
9136 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
9137 copy
->offset
= new_offset
;
9141 * this does not seem to deal with
9142 * the VM map store (R&B tree)
9145 total_size
-= copy_size
;
9147 /* put back remainder of copy in container */
9148 if (next_copy
!= NULL
) {
9149 copy
->cpy_hdr
.nentries
= remaining_entries
;
9150 copy
->cpy_hdr
.links
.next
= next_copy
;
9151 copy
->cpy_hdr
.links
.prev
= previous_prev
;
9152 copy
->size
= total_size
;
9153 next_copy
->vme_prev
=
9154 vm_map_copy_to_entry(copy
);
9157 base_addr
= local_end
;
9158 vm_map_lock(dst_map
);
9159 if (!vm_map_lookup_entry(dst_map
,
9160 local_end
, &tmp_entry
)) {
9161 vm_map_unlock(dst_map
);
9162 return KERN_INVALID_ADDRESS
;
9167 if (dst_end
<= entry
->vme_end
) {
9168 copy_size
= dst_end
- base_addr
;
9172 if ((next
== vm_map_to_entry(dst_map
)) ||
9173 (next
->vme_start
!= entry
->vme_end
)) {
9174 vm_map_unlock(dst_map
);
9175 return KERN_INVALID_ADDRESS
;
9184 /* adjust the copy object */
9185 if (total_size
> copy_size
) {
9186 vm_map_size_t local_size
= 0;
9187 vm_map_size_t entry_size
;
9189 new_offset
= copy
->offset
;
9190 copy_entry
= vm_map_copy_first_entry(copy
);
9191 while (copy_entry
!= vm_map_copy_to_entry(copy
)) {
9192 entry_size
= copy_entry
->vme_end
-
9193 copy_entry
->vme_start
;
9194 if ((local_size
< copy_size
) &&
9195 ((local_size
+ entry_size
)
9197 vm_map_copy_clip_end(copy
, copy_entry
,
9198 copy_entry
->vme_start
+
9199 (copy_size
- local_size
));
9200 entry_size
= copy_entry
->vme_end
-
9201 copy_entry
->vme_start
;
9202 local_size
+= entry_size
;
9203 new_offset
+= entry_size
;
9205 if (local_size
>= copy_size
) {
9206 next_copy
= copy_entry
->vme_next
;
9207 copy_entry
->vme_next
=
9208 vm_map_copy_to_entry(copy
);
9210 copy
->cpy_hdr
.links
.prev
;
9211 copy
->cpy_hdr
.links
.prev
= copy_entry
;
9212 copy
->size
= copy_size
;
9214 copy
->cpy_hdr
.nentries
;
9215 remaining_entries
-= nentries
;
9216 copy
->cpy_hdr
.nentries
= nentries
;
9219 local_size
+= entry_size
;
9220 new_offset
+= entry_size
;
9223 copy_entry
= copy_entry
->vme_next
;
9233 local_pmap
= dst_map
->pmap
;
9236 if ((kr
= vm_map_copy_overwrite_aligned(
9237 dst_map
, tmp_entry
, copy
,
9238 base_addr
, local_pmap
)) != KERN_SUCCESS
) {
9239 if (next_copy
!= NULL
) {
9240 copy
->cpy_hdr
.nentries
+=
9242 copy
->cpy_hdr
.links
.prev
->vme_next
=
9244 copy
->cpy_hdr
.links
.prev
=
9246 copy
->size
+= copy_size
;
9250 vm_map_unlock(dst_map
);
9255 * if the copy and dst address are misaligned but the same
9256 * offset within the page we can copy_not_aligned the
9257 * misaligned parts and copy aligned the rest. If they are
9258 * aligned but len is unaligned we simply need to copy
9259 * the end bit unaligned. We'll need to split the misaligned
9260 * bits of the region in this case !
9262 /* ALWAYS UNLOCKS THE dst_map MAP */
9263 kr
= vm_map_copy_overwrite_unaligned(
9268 discard_on_success
);
9269 if (kr
!= KERN_SUCCESS
) {
9270 if (next_copy
!= NULL
) {
9271 copy
->cpy_hdr
.nentries
+=
9273 copy
->cpy_hdr
.links
.prev
->vme_next
=
9275 copy
->cpy_hdr
.links
.prev
=
9277 copy
->size
+= copy_size
;
9282 total_size
-= copy_size
;
9283 if (total_size
== 0) {
9286 base_addr
+= copy_size
;
9288 copy
->offset
= new_offset
;
9289 if (next_copy
!= NULL
) {
9290 copy
->cpy_hdr
.nentries
= remaining_entries
;
9291 copy
->cpy_hdr
.links
.next
= next_copy
;
9292 copy
->cpy_hdr
.links
.prev
= previous_prev
;
9293 next_copy
->vme_prev
= vm_map_copy_to_entry(copy
);
9294 copy
->size
= total_size
;
9296 vm_map_lock(dst_map
);
9298 if (!vm_map_lookup_entry(dst_map
,
9299 base_addr
, &tmp_entry
)) {
9300 vm_map_unlock(dst_map
);
9301 return KERN_INVALID_ADDRESS
;
9303 if (tmp_entry
->in_transition
) {
9304 entry
->needs_wakeup
= TRUE
;
9305 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
9310 vm_map_clip_start(dst_map
,
9312 vm_map_trunc_page(base_addr
,
9313 VM_MAP_PAGE_MASK(dst_map
)));
9319 * Throw away the vm_map_copy object
9321 if (discard_on_success
) {
9322 vm_map_copy_discard(copy
);
9325 return KERN_SUCCESS
;
9326 }/* vm_map_copy_overwrite */
9329 vm_map_copy_overwrite(
9331 vm_map_offset_t dst_addr
,
9333 boolean_t interruptible
)
9335 vm_map_size_t head_size
, tail_size
;
9336 vm_map_copy_t head_copy
, tail_copy
;
9337 vm_map_offset_t head_addr
, tail_addr
;
9338 vm_map_entry_t entry
;
9340 vm_map_offset_t effective_page_mask
, effective_page_size
;
9349 if (interruptible
||
9350 copy
== VM_MAP_COPY_NULL
||
9351 copy
->type
!= VM_MAP_COPY_ENTRY_LIST
) {
9353 * We can't split the "copy" map if we're interruptible
9354 * or if we don't have a "copy" map...
9357 return vm_map_copy_overwrite_nested(dst_map
,
9365 effective_page_mask
= MAX(VM_MAP_PAGE_MASK(dst_map
), PAGE_MASK
);
9366 effective_page_mask
= MAX(VM_MAP_COPY_PAGE_MASK(copy
),
9367 effective_page_mask
);
9368 effective_page_size
= effective_page_mask
+ 1;
9370 if (copy
->size
< 3 * effective_page_size
) {
9372 * Too small to bother with optimizing...
9377 if ((dst_addr
& effective_page_mask
) !=
9378 (copy
->offset
& effective_page_mask
)) {
9380 * Incompatible mis-alignment of source and destination...
9386 * Proper alignment or identical mis-alignment at the beginning.
9387 * Let's try and do a small unaligned copy first (if needed)
9388 * and then an aligned copy for the rest.
9390 if (!vm_map_page_aligned(dst_addr
, effective_page_mask
)) {
9391 head_addr
= dst_addr
;
9392 head_size
= (effective_page_size
-
9393 (copy
->offset
& effective_page_mask
));
9394 head_size
= MIN(head_size
, copy
->size
);
9396 if (!vm_map_page_aligned(copy
->offset
+ copy
->size
,
9397 effective_page_mask
)) {
9399 * Mis-alignment at the end.
9400 * Do an aligned copy up to the last page and
9401 * then an unaligned copy for the remaining bytes.
9403 tail_size
= ((copy
->offset
+ copy
->size
) &
9404 effective_page_mask
);
9405 tail_size
= MIN(tail_size
, copy
->size
);
9406 tail_addr
= dst_addr
+ copy
->size
- tail_size
;
9407 assert(tail_addr
>= head_addr
+ head_size
);
9409 assert(head_size
+ tail_size
<= copy
->size
);
9411 if (head_size
+ tail_size
== copy
->size
) {
9413 * It's all unaligned, no optimization possible...
9419 * Can't optimize if there are any submaps in the
9420 * destination due to the way we free the "copy" map
9421 * progressively in vm_map_copy_overwrite_nested()
9424 vm_map_lock_read(dst_map
);
9425 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &entry
)) {
9426 vm_map_unlock_read(dst_map
);
9430 (entry
!= vm_map_copy_to_entry(copy
) &&
9431 entry
->vme_start
< dst_addr
+ copy
->size
);
9432 entry
= entry
->vme_next
) {
9433 if (entry
->is_sub_map
) {
9434 vm_map_unlock_read(dst_map
);
9438 vm_map_unlock_read(dst_map
);
9442 * Unaligned copy of the first "head_size" bytes, to reach
9447 * Extract "head_copy" out of "copy".
9449 head_copy
= vm_map_copy_allocate();
9450 head_copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
9451 head_copy
->cpy_hdr
.entries_pageable
=
9452 copy
->cpy_hdr
.entries_pageable
;
9453 vm_map_store_init(&head_copy
->cpy_hdr
);
9455 entry
= vm_map_copy_first_entry(copy
);
9456 if (entry
->vme_end
< copy
->offset
+ head_size
) {
9457 head_size
= entry
->vme_end
- copy
->offset
;
9460 head_copy
->offset
= copy
->offset
;
9461 head_copy
->size
= head_size
;
9462 copy
->offset
+= head_size
;
9463 copy
->size
-= head_size
;
9465 vm_map_copy_clip_end(copy
, entry
, copy
->offset
);
9466 vm_map_copy_entry_unlink(copy
, entry
);
9467 vm_map_copy_entry_link(head_copy
,
9468 vm_map_copy_to_entry(head_copy
),
9472 * Do the unaligned copy.
9474 kr
= vm_map_copy_overwrite_nested(dst_map
,
9480 if (kr
!= KERN_SUCCESS
) {
9487 * Extract "tail_copy" out of "copy".
9489 tail_copy
= vm_map_copy_allocate();
9490 tail_copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
9491 tail_copy
->cpy_hdr
.entries_pageable
=
9492 copy
->cpy_hdr
.entries_pageable
;
9493 vm_map_store_init(&tail_copy
->cpy_hdr
);
9495 tail_copy
->offset
= copy
->offset
+ copy
->size
- tail_size
;
9496 tail_copy
->size
= tail_size
;
9498 copy
->size
-= tail_size
;
9500 entry
= vm_map_copy_last_entry(copy
);
9501 vm_map_copy_clip_start(copy
, entry
, tail_copy
->offset
);
9502 entry
= vm_map_copy_last_entry(copy
);
9503 vm_map_copy_entry_unlink(copy
, entry
);
9504 vm_map_copy_entry_link(tail_copy
,
9505 vm_map_copy_last_entry(tail_copy
),
9510 * Copy most (or possibly all) of the data.
9512 kr
= vm_map_copy_overwrite_nested(dst_map
,
9513 dst_addr
+ head_size
,
9518 if (kr
!= KERN_SUCCESS
) {
9523 kr
= vm_map_copy_overwrite_nested(dst_map
,
9532 assert(copy
->type
== VM_MAP_COPY_ENTRY_LIST
);
9533 if (kr
== KERN_SUCCESS
) {
9535 * Discard all the copy maps.
9538 vm_map_copy_discard(head_copy
);
9541 vm_map_copy_discard(copy
);
9543 vm_map_copy_discard(tail_copy
);
9548 * Re-assemble the original copy map.
9551 entry
= vm_map_copy_first_entry(head_copy
);
9552 vm_map_copy_entry_unlink(head_copy
, entry
);
9553 vm_map_copy_entry_link(copy
,
9554 vm_map_copy_to_entry(copy
),
9556 copy
->offset
-= head_size
;
9557 copy
->size
+= head_size
;
9558 vm_map_copy_discard(head_copy
);
9562 entry
= vm_map_copy_last_entry(tail_copy
);
9563 vm_map_copy_entry_unlink(tail_copy
, entry
);
9564 vm_map_copy_entry_link(copy
,
9565 vm_map_copy_last_entry(copy
),
9567 copy
->size
+= tail_size
;
9568 vm_map_copy_discard(tail_copy
);
9577 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
9580 * Physically copy unaligned data
9583 * Unaligned parts of pages have to be physically copied. We use
9584 * a modified form of vm_fault_copy (which understands none-aligned
9585 * page offsets and sizes) to do the copy. We attempt to copy as
9586 * much memory in one go as possibly, however vm_fault_copy copies
9587 * within 1 memory object so we have to find the smaller of "amount left"
9588 * "source object data size" and "target object data size". With
9589 * unaligned data we don't need to split regions, therefore the source
9590 * (copy) object should be one map entry, the target range may be split
9591 * over multiple map entries however. In any event we are pessimistic
9592 * about these assumptions.
9595 * dst_map is locked on entry and is return locked on success,
9596 * unlocked on error.
9599 static kern_return_t
9600 vm_map_copy_overwrite_unaligned(
9602 vm_map_entry_t entry
,
9604 vm_map_offset_t start
,
9605 boolean_t discard_on_success
)
9607 vm_map_entry_t copy_entry
;
9608 vm_map_entry_t copy_entry_next
;
9609 vm_map_version_t version
;
9610 vm_object_t dst_object
;
9611 vm_object_offset_t dst_offset
;
9612 vm_object_offset_t src_offset
;
9613 vm_object_offset_t entry_offset
;
9614 vm_map_offset_t entry_end
;
9615 vm_map_size_t src_size
,
9619 kern_return_t kr
= KERN_SUCCESS
;
9622 copy_entry
= vm_map_copy_first_entry(copy
);
9624 vm_map_lock_write_to_read(dst_map
);
9626 src_offset
= copy
->offset
- vm_object_trunc_page(copy
->offset
);
9627 amount_left
= copy
->size
;
9629 * unaligned so we never clipped this entry, we need the offset into
9630 * the vm_object not just the data.
9632 while (amount_left
> 0) {
9633 if (entry
== vm_map_to_entry(dst_map
)) {
9634 vm_map_unlock_read(dst_map
);
9635 return KERN_INVALID_ADDRESS
;
9638 /* "start" must be within the current map entry */
9639 assert((start
>= entry
->vme_start
) && (start
< entry
->vme_end
));
9641 dst_offset
= start
- entry
->vme_start
;
9643 dst_size
= entry
->vme_end
- start
;
9645 src_size
= copy_entry
->vme_end
-
9646 (copy_entry
->vme_start
+ src_offset
);
9648 if (dst_size
< src_size
) {
9650 * we can only copy dst_size bytes before
9651 * we have to get the next destination entry
9653 copy_size
= dst_size
;
9656 * we can only copy src_size bytes before
9657 * we have to get the next source copy entry
9659 copy_size
= src_size
;
9662 if (copy_size
> amount_left
) {
9663 copy_size
= amount_left
;
9666 * Entry needs copy, create a shadow shadow object for
9667 * Copy on write region.
9669 if (entry
->needs_copy
&&
9670 ((entry
->protection
& VM_PROT_WRITE
) != 0)) {
9671 if (vm_map_lock_read_to_write(dst_map
)) {
9672 vm_map_lock_read(dst_map
);
9675 VME_OBJECT_SHADOW(entry
,
9676 (vm_map_size_t
)(entry
->vme_end
9677 - entry
->vme_start
));
9678 entry
->needs_copy
= FALSE
;
9679 vm_map_lock_write_to_read(dst_map
);
9681 dst_object
= VME_OBJECT(entry
);
9683 * unlike with the virtual (aligned) copy we're going
9684 * to fault on it therefore we need a target object.
9686 if (dst_object
== VM_OBJECT_NULL
) {
9687 if (vm_map_lock_read_to_write(dst_map
)) {
9688 vm_map_lock_read(dst_map
);
9691 dst_object
= vm_object_allocate((vm_map_size_t
)
9692 entry
->vme_end
- entry
->vme_start
);
9693 VME_OBJECT_SET(entry
, dst_object
);
9694 VME_OFFSET_SET(entry
, 0);
9695 assert(entry
->use_pmap
);
9696 vm_map_lock_write_to_read(dst_map
);
9699 * Take an object reference and unlock map. The "entry" may
9700 * disappear or change when the map is unlocked.
9702 vm_object_reference(dst_object
);
9703 version
.main_timestamp
= dst_map
->timestamp
;
9704 entry_offset
= VME_OFFSET(entry
);
9705 entry_end
= entry
->vme_end
;
9706 vm_map_unlock_read(dst_map
);
9708 * Copy as much as possible in one pass
9711 VME_OBJECT(copy_entry
),
9712 VME_OFFSET(copy_entry
) + src_offset
,
9715 entry_offset
+ dst_offset
,
9721 src_offset
+= copy_size
;
9722 amount_left
-= copy_size
;
9724 * Release the object reference
9726 vm_object_deallocate(dst_object
);
9728 * If a hard error occurred, return it now
9730 if (kr
!= KERN_SUCCESS
) {
9734 if ((copy_entry
->vme_start
+ src_offset
) == copy_entry
->vme_end
9735 || amount_left
== 0) {
9737 * all done with this copy entry, dispose.
9739 copy_entry_next
= copy_entry
->vme_next
;
9741 if (discard_on_success
) {
9742 vm_map_copy_entry_unlink(copy
, copy_entry
);
9743 assert(!copy_entry
->is_sub_map
);
9744 vm_object_deallocate(VME_OBJECT(copy_entry
));
9745 vm_map_copy_entry_dispose(copy
, copy_entry
);
9748 if (copy_entry_next
== vm_map_copy_to_entry(copy
) &&
9751 * not finished copying but run out of source
9753 return KERN_INVALID_ADDRESS
;
9756 copy_entry
= copy_entry_next
;
9761 if (amount_left
== 0) {
9762 return KERN_SUCCESS
;
9765 vm_map_lock_read(dst_map
);
9766 if (version
.main_timestamp
== dst_map
->timestamp
) {
9767 if (start
== entry_end
) {
9769 * destination region is split. Use the version
9770 * information to avoid a lookup in the normal
9773 entry
= entry
->vme_next
;
9775 * should be contiguous. Fail if we encounter
9776 * a hole in the destination.
9778 if (start
!= entry
->vme_start
) {
9779 vm_map_unlock_read(dst_map
);
9780 return KERN_INVALID_ADDRESS
;
9785 * Map version check failed.
9786 * we must lookup the entry because somebody
9787 * might have changed the map behind our backs.
9790 if (!vm_map_lookup_entry(dst_map
, start
, &entry
)) {
9791 vm_map_unlock_read(dst_map
);
9792 return KERN_INVALID_ADDRESS
;
9797 return KERN_SUCCESS
;
9798 }/* vm_map_copy_overwrite_unaligned */
9801 * Routine: vm_map_copy_overwrite_aligned [internal use only]
9804 * Does all the vm_trickery possible for whole pages.
9808 * If there are no permanent objects in the destination,
9809 * and the source and destination map entry zones match,
9810 * and the destination map entry is not shared,
9811 * then the map entries can be deleted and replaced
9812 * with those from the copy. The following code is the
9813 * basic idea of what to do, but there are lots of annoying
9814 * little details about getting protection and inheritance
9815 * right. Should add protection, inheritance, and sharing checks
9816 * to the above pass and make sure that no wiring is involved.
9819 int vm_map_copy_overwrite_aligned_src_not_internal
= 0;
9820 int vm_map_copy_overwrite_aligned_src_not_symmetric
= 0;
9821 int vm_map_copy_overwrite_aligned_src_large
= 0;
9823 static kern_return_t
9824 vm_map_copy_overwrite_aligned(
9826 vm_map_entry_t tmp_entry
,
9828 vm_map_offset_t start
,
9829 __unused pmap_t pmap
)
9832 vm_map_entry_t copy_entry
;
9833 vm_map_size_t copy_size
;
9835 vm_map_entry_t entry
;
9837 while ((copy_entry
= vm_map_copy_first_entry(copy
))
9838 != vm_map_copy_to_entry(copy
)) {
9839 copy_size
= (copy_entry
->vme_end
- copy_entry
->vme_start
);
9842 if (entry
->is_sub_map
) {
9843 /* unnested when clipped earlier */
9844 assert(!entry
->use_pmap
);
9846 if (entry
== vm_map_to_entry(dst_map
)) {
9847 vm_map_unlock(dst_map
);
9848 return KERN_INVALID_ADDRESS
;
9850 size
= (entry
->vme_end
- entry
->vme_start
);
9852 * Make sure that no holes popped up in the
9853 * address map, and that the protection is
9854 * still valid, in case the map was unlocked
9858 if ((entry
->vme_start
!= start
) || ((entry
->is_sub_map
)
9859 && !entry
->needs_copy
)) {
9860 vm_map_unlock(dst_map
);
9861 return KERN_INVALID_ADDRESS
;
9863 assert(entry
!= vm_map_to_entry(dst_map
));
9866 * Check protection again
9869 if (!(entry
->protection
& VM_PROT_WRITE
)) {
9870 vm_map_unlock(dst_map
);
9871 return KERN_PROTECTION_FAILURE
;
9875 * Adjust to source size first
9878 if (copy_size
< size
) {
9879 if (entry
->map_aligned
&&
9880 !VM_MAP_PAGE_ALIGNED(entry
->vme_start
+ copy_size
,
9881 VM_MAP_PAGE_MASK(dst_map
))) {
9882 /* no longer map-aligned */
9883 entry
->map_aligned
= FALSE
;
9885 vm_map_clip_end(dst_map
, entry
, entry
->vme_start
+ copy_size
);
9890 * Adjust to destination size
9893 if (size
< copy_size
) {
9894 vm_map_copy_clip_end(copy
, copy_entry
,
9895 copy_entry
->vme_start
+ size
);
9899 assert((entry
->vme_end
- entry
->vme_start
) == size
);
9900 assert((tmp_entry
->vme_end
- tmp_entry
->vme_start
) == size
);
9901 assert((copy_entry
->vme_end
- copy_entry
->vme_start
) == size
);
9904 * If the destination contains temporary unshared memory,
9905 * we can perform the copy by throwing it away and
9906 * installing the source data.
9909 object
= VME_OBJECT(entry
);
9910 if ((!entry
->is_shared
&&
9911 ((object
== VM_OBJECT_NULL
) ||
9912 (object
->internal
&& !object
->true_share
))) ||
9913 entry
->needs_copy
) {
9914 vm_object_t old_object
= VME_OBJECT(entry
);
9915 vm_object_offset_t old_offset
= VME_OFFSET(entry
);
9916 vm_object_offset_t offset
;
9919 * Ensure that the source and destination aren't
9922 if (old_object
== VME_OBJECT(copy_entry
) &&
9923 old_offset
== VME_OFFSET(copy_entry
)) {
9924 vm_map_copy_entry_unlink(copy
, copy_entry
);
9925 vm_map_copy_entry_dispose(copy
, copy_entry
);
9927 if (old_object
!= VM_OBJECT_NULL
) {
9928 vm_object_deallocate(old_object
);
9931 start
= tmp_entry
->vme_end
;
9932 tmp_entry
= tmp_entry
->vme_next
;
9936 #if !CONFIG_EMBEDDED
9937 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
9938 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
9939 if (VME_OBJECT(copy_entry
) != VM_OBJECT_NULL
&&
9940 VME_OBJECT(copy_entry
)->vo_size
>= __TRADEOFF1_OBJ_SIZE
&&
9941 copy_size
<= __TRADEOFF1_COPY_SIZE
) {
9943 * Virtual vs. Physical copy tradeoff #1.
9945 * Copying only a few pages out of a large
9946 * object: do a physical copy instead of
9947 * a virtual copy, to avoid possibly keeping
9948 * the entire large object alive because of
9949 * those few copy-on-write pages.
9951 vm_map_copy_overwrite_aligned_src_large
++;
9954 #endif /* !CONFIG_EMBEDDED */
9956 if ((dst_map
->pmap
!= kernel_pmap
) &&
9957 (VME_ALIAS(entry
) >= VM_MEMORY_MALLOC
) &&
9958 (VME_ALIAS(entry
) <= VM_MEMORY_MALLOC_MEDIUM
)) {
9959 vm_object_t new_object
, new_shadow
;
9962 * We're about to map something over a mapping
9963 * established by malloc()...
9965 new_object
= VME_OBJECT(copy_entry
);
9966 if (new_object
!= VM_OBJECT_NULL
) {
9967 vm_object_lock_shared(new_object
);
9969 while (new_object
!= VM_OBJECT_NULL
&&
9970 #if !CONFIG_EMBEDDED
9971 !new_object
->true_share
&&
9972 new_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
&&
9973 #endif /* !CONFIG_EMBEDDED */
9974 new_object
->internal
) {
9975 new_shadow
= new_object
->shadow
;
9976 if (new_shadow
== VM_OBJECT_NULL
) {
9979 vm_object_lock_shared(new_shadow
);
9980 vm_object_unlock(new_object
);
9981 new_object
= new_shadow
;
9983 if (new_object
!= VM_OBJECT_NULL
) {
9984 if (!new_object
->internal
) {
9986 * The new mapping is backed
9987 * by an external object. We
9988 * don't want malloc'ed memory
9989 * to be replaced with such a
9990 * non-anonymous mapping, so
9991 * let's go off the optimized
9994 vm_map_copy_overwrite_aligned_src_not_internal
++;
9995 vm_object_unlock(new_object
);
9998 #if !CONFIG_EMBEDDED
9999 if (new_object
->true_share
||
10000 new_object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) {
10002 * Same if there's a "true_share"
10003 * object in the shadow chain, or
10004 * an object with a non-default
10005 * (SYMMETRIC) copy strategy.
10007 vm_map_copy_overwrite_aligned_src_not_symmetric
++;
10008 vm_object_unlock(new_object
);
10011 #endif /* !CONFIG_EMBEDDED */
10012 vm_object_unlock(new_object
);
10015 * The new mapping is still backed by
10016 * anonymous (internal) memory, so it's
10017 * OK to substitute it for the original
10018 * malloc() mapping.
10022 if (old_object
!= VM_OBJECT_NULL
) {
10023 if (entry
->is_sub_map
) {
10024 if (entry
->use_pmap
) {
10025 #ifndef NO_NESTED_PMAP
10026 pmap_unnest(dst_map
->pmap
,
10027 (addr64_t
)entry
->vme_start
,
10028 entry
->vme_end
- entry
->vme_start
);
10029 #endif /* NO_NESTED_PMAP */
10030 if (dst_map
->mapped_in_other_pmaps
) {
10031 /* clean up parent */
10033 vm_map_submap_pmap_clean(
10034 dst_map
, entry
->vme_start
,
10037 VME_OFFSET(entry
));
10040 vm_map_submap_pmap_clean(
10041 dst_map
, entry
->vme_start
,
10044 VME_OFFSET(entry
));
10046 vm_map_deallocate(VME_SUBMAP(entry
));
10048 if (dst_map
->mapped_in_other_pmaps
) {
10049 vm_object_pmap_protect_options(
10053 - entry
->vme_start
,
10057 PMAP_OPTIONS_REMOVE
);
10059 pmap_remove_options(
10061 (addr64_t
)(entry
->vme_start
),
10062 (addr64_t
)(entry
->vme_end
),
10063 PMAP_OPTIONS_REMOVE
);
10065 vm_object_deallocate(old_object
);
10069 if (entry
->iokit_acct
) {
10070 /* keep using iokit accounting */
10071 entry
->use_pmap
= FALSE
;
10073 /* use pmap accounting */
10074 entry
->use_pmap
= TRUE
;
10076 entry
->is_sub_map
= FALSE
;
10077 VME_OBJECT_SET(entry
, VME_OBJECT(copy_entry
));
10078 object
= VME_OBJECT(entry
);
10079 entry
->needs_copy
= copy_entry
->needs_copy
;
10080 entry
->wired_count
= 0;
10081 entry
->user_wired_count
= 0;
10082 offset
= VME_OFFSET(copy_entry
);
10083 VME_OFFSET_SET(entry
, offset
);
10085 vm_map_copy_entry_unlink(copy
, copy_entry
);
10086 vm_map_copy_entry_dispose(copy
, copy_entry
);
10089 * we could try to push pages into the pmap at this point, BUT
10090 * this optimization only saved on average 2 us per page if ALL
10091 * the pages in the source were currently mapped
10092 * and ALL the pages in the dest were touched, if there were fewer
10093 * than 2/3 of the pages touched, this optimization actually cost more cycles
10094 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10098 * Set up for the next iteration. The map
10099 * has not been unlocked, so the next
10100 * address should be at the end of this
10101 * entry, and the next map entry should be
10102 * the one following it.
10105 start
= tmp_entry
->vme_end
;
10106 tmp_entry
= tmp_entry
->vme_next
;
10108 vm_map_version_t version
;
10109 vm_object_t dst_object
;
10110 vm_object_offset_t dst_offset
;
10114 if (entry
->needs_copy
) {
10115 VME_OBJECT_SHADOW(entry
,
10117 entry
->vme_start
));
10118 entry
->needs_copy
= FALSE
;
10121 dst_object
= VME_OBJECT(entry
);
10122 dst_offset
= VME_OFFSET(entry
);
10125 * Take an object reference, and record
10126 * the map version information so that the
10127 * map can be safely unlocked.
10130 if (dst_object
== VM_OBJECT_NULL
) {
10132 * We would usually have just taken the
10133 * optimized path above if the destination
10134 * object has not been allocated yet. But we
10135 * now disable that optimization if the copy
10136 * entry's object is not backed by anonymous
10137 * memory to avoid replacing malloc'ed
10138 * (i.e. re-usable) anonymous memory with a
10139 * not-so-anonymous mapping.
10140 * So we have to handle this case here and
10141 * allocate a new VM object for this map entry.
10143 dst_object
= vm_object_allocate(
10144 entry
->vme_end
- entry
->vme_start
);
10146 VME_OBJECT_SET(entry
, dst_object
);
10147 VME_OFFSET_SET(entry
, dst_offset
);
10148 assert(entry
->use_pmap
);
10151 vm_object_reference(dst_object
);
10153 /* account for unlock bumping up timestamp */
10154 version
.main_timestamp
= dst_map
->timestamp
+ 1;
10156 vm_map_unlock(dst_map
);
10159 * Copy as much as possible in one pass
10164 VME_OBJECT(copy_entry
),
10165 VME_OFFSET(copy_entry
),
10174 * Release the object reference
10177 vm_object_deallocate(dst_object
);
10180 * If a hard error occurred, return it now
10183 if (r
!= KERN_SUCCESS
) {
10187 if (copy_size
!= 0) {
10189 * Dispose of the copied region
10192 vm_map_copy_clip_end(copy
, copy_entry
,
10193 copy_entry
->vme_start
+ copy_size
);
10194 vm_map_copy_entry_unlink(copy
, copy_entry
);
10195 vm_object_deallocate(VME_OBJECT(copy_entry
));
10196 vm_map_copy_entry_dispose(copy
, copy_entry
);
10200 * Pick up in the destination map where we left off.
10202 * Use the version information to avoid a lookup
10203 * in the normal case.
10206 start
+= copy_size
;
10207 vm_map_lock(dst_map
);
10208 if (version
.main_timestamp
== dst_map
->timestamp
&&
10210 /* We can safely use saved tmp_entry value */
10212 if (tmp_entry
->map_aligned
&&
10213 !VM_MAP_PAGE_ALIGNED(
10215 VM_MAP_PAGE_MASK(dst_map
))) {
10216 /* no longer map-aligned */
10217 tmp_entry
->map_aligned
= FALSE
;
10219 vm_map_clip_end(dst_map
, tmp_entry
, start
);
10220 tmp_entry
= tmp_entry
->vme_next
;
10222 /* Must do lookup of tmp_entry */
10224 if (!vm_map_lookup_entry(dst_map
, start
, &tmp_entry
)) {
10225 vm_map_unlock(dst_map
);
10226 return KERN_INVALID_ADDRESS
;
10228 if (tmp_entry
->map_aligned
&&
10229 !VM_MAP_PAGE_ALIGNED(
10231 VM_MAP_PAGE_MASK(dst_map
))) {
10232 /* no longer map-aligned */
10233 tmp_entry
->map_aligned
= FALSE
;
10235 vm_map_clip_start(dst_map
, tmp_entry
, start
);
10240 return KERN_SUCCESS
;
10241 }/* vm_map_copy_overwrite_aligned */
10244 * Routine: vm_map_copyin_kernel_buffer [internal use only]
10247 * Copy in data to a kernel buffer from space in the
10248 * source map. The original space may be optionally
10251 * If successful, returns a new copy object.
10253 static kern_return_t
10254 vm_map_copyin_kernel_buffer(
10256 vm_map_offset_t src_addr
,
10258 boolean_t src_destroy
,
10259 vm_map_copy_t
*copy_result
)
10262 vm_map_copy_t copy
;
10263 vm_size_t kalloc_size
;
10265 if (len
> msg_ool_size_small
) {
10266 return KERN_INVALID_ARGUMENT
;
10269 kalloc_size
= (vm_size_t
)(cpy_kdata_hdr_sz
+ len
);
10271 copy
= (vm_map_copy_t
)kalloc(kalloc_size
);
10272 if (copy
== VM_MAP_COPY_NULL
) {
10273 return KERN_RESOURCE_SHORTAGE
;
10275 copy
->type
= VM_MAP_COPY_KERNEL_BUFFER
;
10279 kr
= copyinmap(src_map
, src_addr
, copy
->cpy_kdata
, (vm_size_t
)len
);
10280 if (kr
!= KERN_SUCCESS
) {
10281 kfree(copy
, kalloc_size
);
10285 (void) vm_map_remove(
10287 vm_map_trunc_page(src_addr
,
10288 VM_MAP_PAGE_MASK(src_map
)),
10289 vm_map_round_page(src_addr
+ len
,
10290 VM_MAP_PAGE_MASK(src_map
)),
10291 (VM_MAP_REMOVE_INTERRUPTIBLE
|
10292 VM_MAP_REMOVE_WAIT_FOR_KWIRE
|
10293 ((src_map
== kernel_map
) ? VM_MAP_REMOVE_KUNWIRE
: VM_MAP_REMOVE_NO_FLAGS
)));
10295 *copy_result
= copy
;
10296 return KERN_SUCCESS
;
10300 * Routine: vm_map_copyout_kernel_buffer [internal use only]
10303 * Copy out data from a kernel buffer into space in the
10304 * destination map. The space may be otpionally dynamically
10307 * If successful, consumes the copy object.
10308 * Otherwise, the caller is responsible for it.
10310 static int vm_map_copyout_kernel_buffer_failures
= 0;
10311 static kern_return_t
10312 vm_map_copyout_kernel_buffer(
10314 vm_map_address_t
*addr
, /* IN/OUT */
10315 vm_map_copy_t copy
,
10316 vm_map_size_t copy_size
,
10317 boolean_t overwrite
,
10318 boolean_t consume_on_success
)
10320 kern_return_t kr
= KERN_SUCCESS
;
10321 thread_t thread
= current_thread();
10323 assert(copy
->size
== copy_size
);
10326 * check for corrupted vm_map_copy structure
10328 if (copy_size
> msg_ool_size_small
|| copy
->offset
) {
10329 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10330 (long long)copy
->size
, (long long)copy
->offset
);
10335 * Allocate space in the target map for the data
10338 kr
= vm_map_enter(map
,
10340 vm_map_round_page(copy_size
,
10341 VM_MAP_PAGE_MASK(map
)),
10342 (vm_map_offset_t
) 0,
10344 VM_MAP_KERNEL_FLAGS_NONE
,
10345 VM_KERN_MEMORY_NONE
,
10347 (vm_object_offset_t
) 0,
10351 VM_INHERIT_DEFAULT
);
10352 if (kr
!= KERN_SUCCESS
) {
10356 if (map
->pmap
== kernel_pmap
) {
10357 kasan_notify_address(*addr
, copy
->size
);
10363 * Copyout the data from the kernel buffer to the target map.
10365 if (thread
->map
== map
) {
10367 * If the target map is the current map, just do
10370 assert((vm_size_t
)copy_size
== copy_size
);
10371 if (copyout(copy
->cpy_kdata
, *addr
, (vm_size_t
)copy_size
)) {
10372 kr
= KERN_INVALID_ADDRESS
;
10378 * If the target map is another map, assume the
10379 * target's address space identity for the duration
10382 vm_map_reference(map
);
10383 oldmap
= vm_map_switch(map
);
10385 assert((vm_size_t
)copy_size
== copy_size
);
10386 if (copyout(copy
->cpy_kdata
, *addr
, (vm_size_t
)copy_size
)) {
10387 vm_map_copyout_kernel_buffer_failures
++;
10388 kr
= KERN_INVALID_ADDRESS
;
10391 (void) vm_map_switch(oldmap
);
10392 vm_map_deallocate(map
);
10395 if (kr
!= KERN_SUCCESS
) {
10396 /* the copy failed, clean up */
10399 * Deallocate the space we allocated in the target map.
10401 (void) vm_map_remove(
10403 vm_map_trunc_page(*addr
,
10404 VM_MAP_PAGE_MASK(map
)),
10405 vm_map_round_page((*addr
+
10406 vm_map_round_page(copy_size
,
10407 VM_MAP_PAGE_MASK(map
))),
10408 VM_MAP_PAGE_MASK(map
)),
10409 VM_MAP_REMOVE_NO_FLAGS
);
10413 /* copy was successful, dicard the copy structure */
10414 if (consume_on_success
) {
10415 kfree(copy
, copy_size
+ cpy_kdata_hdr_sz
);
10423 * Routine: vm_map_copy_insert [internal use only]
10426 * Link a copy chain ("copy") into a map at the
10427 * specified location (after "where").
10429 * The copy chain is destroyed.
10432 vm_map_copy_insert(
10434 vm_map_entry_t after_where
,
10435 vm_map_copy_t copy
)
10437 vm_map_entry_t entry
;
10439 while (vm_map_copy_first_entry(copy
) != vm_map_copy_to_entry(copy
)) {
10440 entry
= vm_map_copy_first_entry(copy
);
10441 vm_map_copy_entry_unlink(copy
, entry
);
10442 vm_map_store_entry_link(map
, after_where
, entry
,
10443 VM_MAP_KERNEL_FLAGS_NONE
);
10444 after_where
= entry
;
10446 zfree(vm_map_copy_zone
, copy
);
10452 vm_map_entry_t where
,
10453 vm_map_copy_t copy
,
10454 vm_map_offset_t adjustment
,
10455 vm_prot_t cur_prot
,
10456 vm_prot_t max_prot
,
10457 vm_inherit_t inheritance
)
10459 vm_map_entry_t copy_entry
, new_entry
;
10461 for (copy_entry
= vm_map_copy_first_entry(copy
);
10462 copy_entry
!= vm_map_copy_to_entry(copy
);
10463 copy_entry
= copy_entry
->vme_next
) {
10464 /* get a new VM map entry for the map */
10465 new_entry
= vm_map_entry_create(map
,
10466 !map
->hdr
.entries_pageable
);
10467 /* copy the "copy entry" to the new entry */
10468 vm_map_entry_copy(new_entry
, copy_entry
);
10469 /* adjust "start" and "end" */
10470 new_entry
->vme_start
+= adjustment
;
10471 new_entry
->vme_end
+= adjustment
;
10472 /* clear some attributes */
10473 new_entry
->inheritance
= inheritance
;
10474 new_entry
->protection
= cur_prot
;
10475 new_entry
->max_protection
= max_prot
;
10476 new_entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
10477 /* take an extra reference on the entry's "object" */
10478 if (new_entry
->is_sub_map
) {
10479 assert(!new_entry
->use_pmap
); /* not nested */
10480 vm_map_lock(VME_SUBMAP(new_entry
));
10481 vm_map_reference(VME_SUBMAP(new_entry
));
10482 vm_map_unlock(VME_SUBMAP(new_entry
));
10484 vm_object_reference(VME_OBJECT(new_entry
));
10486 /* insert the new entry in the map */
10487 vm_map_store_entry_link(map
, where
, new_entry
,
10488 VM_MAP_KERNEL_FLAGS_NONE
);
10489 /* continue inserting the "copy entries" after the new entry */
10496 * Returns true if *size matches (or is in the range of) copy->size.
10497 * Upon returning true, the *size field is updated with the actual size of the
10498 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10501 vm_map_copy_validate_size(
10503 vm_map_copy_t copy
,
10504 vm_map_size_t
*size
)
10506 if (copy
== VM_MAP_COPY_NULL
) {
10509 vm_map_size_t copy_sz
= copy
->size
;
10510 vm_map_size_t sz
= *size
;
10511 switch (copy
->type
) {
10512 case VM_MAP_COPY_OBJECT
:
10513 case VM_MAP_COPY_KERNEL_BUFFER
:
10514 if (sz
== copy_sz
) {
10518 case VM_MAP_COPY_ENTRY_LIST
:
10520 * potential page-size rounding prevents us from exactly
10521 * validating this flavor of vm_map_copy, but we can at least
10522 * assert that it's within a range.
10524 if (copy_sz
>= sz
&&
10525 copy_sz
<= vm_map_round_page(sz
, VM_MAP_PAGE_MASK(dst_map
))) {
10537 * Routine: vm_map_copyout_size
10540 * Copy out a copy chain ("copy") into newly-allocated
10541 * space in the destination map. Uses a prevalidated
10542 * size for the copy object (vm_map_copy_validate_size).
10544 * If successful, consumes the copy object.
10545 * Otherwise, the caller is responsible for it.
10548 vm_map_copyout_size(
10550 vm_map_address_t
*dst_addr
, /* OUT */
10551 vm_map_copy_t copy
,
10552 vm_map_size_t copy_size
)
10554 return vm_map_copyout_internal(dst_map
, dst_addr
, copy
, copy_size
,
10555 TRUE
, /* consume_on_success */
10558 VM_INHERIT_DEFAULT
);
10562 * Routine: vm_map_copyout
10565 * Copy out a copy chain ("copy") into newly-allocated
10566 * space in the destination map.
10568 * If successful, consumes the copy object.
10569 * Otherwise, the caller is responsible for it.
10574 vm_map_address_t
*dst_addr
, /* OUT */
10575 vm_map_copy_t copy
)
10577 return vm_map_copyout_internal(dst_map
, dst_addr
, copy
, copy
? copy
->size
: 0,
10578 TRUE
, /* consume_on_success */
10581 VM_INHERIT_DEFAULT
);
10585 vm_map_copyout_internal(
10587 vm_map_address_t
*dst_addr
, /* OUT */
10588 vm_map_copy_t copy
,
10589 vm_map_size_t copy_size
,
10590 boolean_t consume_on_success
,
10591 vm_prot_t cur_protection
,
10592 vm_prot_t max_protection
,
10593 vm_inherit_t inheritance
)
10595 vm_map_size_t size
;
10596 vm_map_size_t adjustment
;
10597 vm_map_offset_t start
;
10598 vm_object_offset_t vm_copy_start
;
10599 vm_map_entry_t last
;
10600 vm_map_entry_t entry
;
10601 vm_map_entry_t hole_entry
;
10604 * Check for null copy object.
10607 if (copy
== VM_MAP_COPY_NULL
) {
10609 return KERN_SUCCESS
;
10612 if (copy
->size
!= copy_size
) {
10614 return KERN_FAILURE
;
10618 * Check for special copy object, created
10619 * by vm_map_copyin_object.
10622 if (copy
->type
== VM_MAP_COPY_OBJECT
) {
10623 vm_object_t object
= copy
->cpy_object
;
10625 vm_object_offset_t offset
;
10627 offset
= vm_object_trunc_page(copy
->offset
);
10628 size
= vm_map_round_page((copy_size
+
10629 (vm_map_size_t
)(copy
->offset
-
10631 VM_MAP_PAGE_MASK(dst_map
));
10633 kr
= vm_map_enter(dst_map
, dst_addr
, size
,
10634 (vm_map_offset_t
) 0, VM_FLAGS_ANYWHERE
,
10635 VM_MAP_KERNEL_FLAGS_NONE
,
10636 VM_KERN_MEMORY_NONE
,
10637 object
, offset
, FALSE
,
10638 VM_PROT_DEFAULT
, VM_PROT_ALL
,
10639 VM_INHERIT_DEFAULT
);
10640 if (kr
!= KERN_SUCCESS
) {
10643 /* Account for non-pagealigned copy object */
10644 *dst_addr
+= (vm_map_offset_t
)(copy
->offset
- offset
);
10645 if (consume_on_success
) {
10646 zfree(vm_map_copy_zone
, copy
);
10648 return KERN_SUCCESS
;
10652 * Check for special kernel buffer allocated
10653 * by new_ipc_kmsg_copyin.
10656 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
10657 return vm_map_copyout_kernel_buffer(dst_map
, dst_addr
,
10658 copy
, copy_size
, FALSE
,
10659 consume_on_success
);
10664 * Find space for the data
10667 vm_copy_start
= vm_map_trunc_page((vm_map_size_t
)copy
->offset
,
10668 VM_MAP_COPY_PAGE_MASK(copy
));
10669 size
= vm_map_round_page((vm_map_size_t
)copy
->offset
+ copy_size
,
10670 VM_MAP_COPY_PAGE_MASK(copy
))
10676 vm_map_lock(dst_map
);
10677 if (dst_map
->disable_vmentry_reuse
== TRUE
) {
10678 VM_MAP_HIGHEST_ENTRY(dst_map
, entry
, start
);
10681 if (dst_map
->holelistenabled
) {
10682 hole_entry
= CAST_TO_VM_MAP_ENTRY(dst_map
->holes_list
);
10684 if (hole_entry
== NULL
) {
10686 * No more space in the map?
10688 vm_map_unlock(dst_map
);
10689 return KERN_NO_SPACE
;
10693 start
= last
->vme_start
;
10695 assert(first_free_is_valid(dst_map
));
10696 start
= ((last
= dst_map
->first_free
) == vm_map_to_entry(dst_map
)) ?
10697 vm_map_min(dst_map
) : last
->vme_end
;
10699 start
= vm_map_round_page(start
,
10700 VM_MAP_PAGE_MASK(dst_map
));
10704 vm_map_entry_t next
= last
->vme_next
;
10705 vm_map_offset_t end
= start
+ size
;
10707 if ((end
> dst_map
->max_offset
) || (end
< start
)) {
10708 if (dst_map
->wait_for_space
) {
10709 if (size
<= (dst_map
->max_offset
- dst_map
->min_offset
)) {
10710 assert_wait((event_t
) dst_map
,
10711 THREAD_INTERRUPTIBLE
);
10712 vm_map_unlock(dst_map
);
10713 thread_block(THREAD_CONTINUE_NULL
);
10717 vm_map_unlock(dst_map
);
10718 return KERN_NO_SPACE
;
10721 if (dst_map
->holelistenabled
) {
10722 if (last
->vme_end
>= end
) {
10727 * If there are no more entries, we must win.
10731 * If there is another entry, it must be
10732 * after the end of the potential new region.
10735 if (next
== vm_map_to_entry(dst_map
)) {
10739 if (next
->vme_start
>= end
) {
10746 if (dst_map
->holelistenabled
) {
10747 if (last
== CAST_TO_VM_MAP_ENTRY(dst_map
->holes_list
)) {
10751 vm_map_unlock(dst_map
);
10752 return KERN_NO_SPACE
;
10754 start
= last
->vme_start
;
10756 start
= last
->vme_end
;
10758 start
= vm_map_round_page(start
,
10759 VM_MAP_PAGE_MASK(dst_map
));
10762 if (dst_map
->holelistenabled
) {
10763 if (vm_map_lookup_entry(dst_map
, last
->vme_start
, &last
)) {
10764 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last
, (unsigned long long)last
->vme_start
);
10769 adjustment
= start
- vm_copy_start
;
10770 if (!consume_on_success
) {
10772 * We're not allowed to consume "copy", so we'll have to
10773 * copy its map entries into the destination map below.
10774 * No need to re-allocate map entries from the correct
10775 * (pageable or not) zone, since we'll get new map entries
10776 * during the transfer.
10777 * We'll also adjust the map entries's "start" and "end"
10778 * during the transfer, to keep "copy"'s entries consistent
10779 * with its "offset".
10781 goto after_adjustments
;
10785 * Since we're going to just drop the map
10786 * entries from the copy into the destination
10787 * map, they must come from the same pool.
10790 if (copy
->cpy_hdr
.entries_pageable
!= dst_map
->hdr
.entries_pageable
) {
10792 * Mismatches occur when dealing with the default
10796 vm_map_entry_t next
, new;
10799 * Find the zone that the copies were allocated from
10802 entry
= vm_map_copy_first_entry(copy
);
10805 * Reinitialize the copy so that vm_map_copy_entry_link
10808 vm_map_store_copy_reset(copy
, entry
);
10809 copy
->cpy_hdr
.entries_pageable
= dst_map
->hdr
.entries_pageable
;
10814 while (entry
!= vm_map_copy_to_entry(copy
)) {
10815 new = vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
10816 vm_map_entry_copy_full(new, entry
);
10817 new->vme_no_copy_on_read
= FALSE
;
10818 assert(!new->iokit_acct
);
10819 if (new->is_sub_map
) {
10820 /* clr address space specifics */
10821 new->use_pmap
= FALSE
;
10823 vm_map_copy_entry_link(copy
,
10824 vm_map_copy_last_entry(copy
),
10826 next
= entry
->vme_next
;
10827 old_zone
= entry
->from_reserved_zone
? vm_map_entry_reserved_zone
: vm_map_entry_zone
;
10828 zfree(old_zone
, entry
);
10834 * Adjust the addresses in the copy chain, and
10835 * reset the region attributes.
10838 for (entry
= vm_map_copy_first_entry(copy
);
10839 entry
!= vm_map_copy_to_entry(copy
);
10840 entry
= entry
->vme_next
) {
10841 if (VM_MAP_PAGE_SHIFT(dst_map
) == PAGE_SHIFT
) {
10843 * We're injecting this copy entry into a map that
10844 * has the standard page alignment, so clear
10845 * "map_aligned" (which might have been inherited
10846 * from the original map entry).
10848 entry
->map_aligned
= FALSE
;
10851 entry
->vme_start
+= adjustment
;
10852 entry
->vme_end
+= adjustment
;
10854 if (entry
->map_aligned
) {
10855 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_start
,
10856 VM_MAP_PAGE_MASK(dst_map
)));
10857 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_end
,
10858 VM_MAP_PAGE_MASK(dst_map
)));
10861 entry
->inheritance
= VM_INHERIT_DEFAULT
;
10862 entry
->protection
= VM_PROT_DEFAULT
;
10863 entry
->max_protection
= VM_PROT_ALL
;
10864 entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
10867 * If the entry is now wired,
10868 * map the pages into the destination map.
10870 if (entry
->wired_count
!= 0) {
10871 vm_map_offset_t va
;
10872 vm_object_offset_t offset
;
10873 vm_object_t object
;
10877 object
= VME_OBJECT(entry
);
10878 offset
= VME_OFFSET(entry
);
10879 va
= entry
->vme_start
;
10881 pmap_pageable(dst_map
->pmap
,
10886 while (va
< entry
->vme_end
) {
10888 struct vm_object_fault_info fault_info
= {};
10891 * Look up the page in the object.
10892 * Assert that the page will be found in the
10895 * the object was newly created by
10896 * vm_object_copy_slowly, and has
10897 * copies of all of the pages from
10898 * the source object
10900 * the object was moved from the old
10901 * map entry; because the old map
10902 * entry was wired, all of the pages
10903 * were in the top-level object.
10904 * (XXX not true if we wire pages for
10907 vm_object_lock(object
);
10909 m
= vm_page_lookup(object
, offset
);
10910 if (m
== VM_PAGE_NULL
|| !VM_PAGE_WIRED(m
) ||
10912 panic("vm_map_copyout: wiring %p", m
);
10915 prot
= entry
->protection
;
10917 if (override_nx(dst_map
, VME_ALIAS(entry
)) &&
10919 prot
|= VM_PROT_EXECUTE
;
10922 type_of_fault
= DBG_CACHE_HIT_FAULT
;
10924 fault_info
.user_tag
= VME_ALIAS(entry
);
10925 fault_info
.pmap_options
= 0;
10926 if (entry
->iokit_acct
||
10927 (!entry
->is_sub_map
&& !entry
->use_pmap
)) {
10928 fault_info
.pmap_options
|= PMAP_OPTIONS_ALT_ACCT
;
10937 FALSE
, /* change_wiring */
10938 VM_KERN_MEMORY_NONE
, /* tag - not wiring */
10940 NULL
, /* need_retry */
10943 vm_object_unlock(object
);
10945 offset
+= PAGE_SIZE_64
;
10954 * Correct the page alignment for the result
10957 *dst_addr
= start
+ (copy
->offset
- vm_copy_start
);
10960 kasan_notify_address(*dst_addr
, size
);
10964 * Update the hints and the map size
10967 if (consume_on_success
) {
10968 SAVE_HINT_MAP_WRITE(dst_map
, vm_map_copy_last_entry(copy
));
10970 SAVE_HINT_MAP_WRITE(dst_map
, last
);
10973 dst_map
->size
+= size
;
10979 if (consume_on_success
) {
10980 vm_map_copy_insert(dst_map
, last
, copy
);
10982 vm_map_copy_remap(dst_map
, last
, copy
, adjustment
,
10983 cur_protection
, max_protection
,
10987 vm_map_unlock(dst_map
);
10990 * XXX If wiring_required, call vm_map_pageable
10993 return KERN_SUCCESS
;
10997 * Routine: vm_map_copyin
11000 * see vm_map_copyin_common. Exported via Unsupported.exports.
11004 #undef vm_map_copyin
11009 vm_map_address_t src_addr
,
11011 boolean_t src_destroy
,
11012 vm_map_copy_t
*copy_result
) /* OUT */
11014 return vm_map_copyin_common(src_map
, src_addr
, len
, src_destroy
,
11015 FALSE
, copy_result
, FALSE
);
11019 * Routine: vm_map_copyin_common
11022 * Copy the specified region (src_addr, len) from the
11023 * source address space (src_map), possibly removing
11024 * the region from the source address space (src_destroy).
11027 * A vm_map_copy_t object (copy_result), suitable for
11028 * insertion into another address space (using vm_map_copyout),
11029 * copying over another address space region (using
11030 * vm_map_copy_overwrite). If the copy is unused, it
11031 * should be destroyed (using vm_map_copy_discard).
11033 * In/out conditions:
11034 * The source map should not be locked on entry.
11037 typedef struct submap_map
{
11038 vm_map_t parent_map
;
11039 vm_map_offset_t base_start
;
11040 vm_map_offset_t base_end
;
11041 vm_map_size_t base_len
;
11042 struct submap_map
*next
;
11046 vm_map_copyin_common(
11048 vm_map_address_t src_addr
,
11050 boolean_t src_destroy
,
11051 __unused boolean_t src_volatile
,
11052 vm_map_copy_t
*copy_result
, /* OUT */
11053 boolean_t use_maxprot
)
11059 flags
|= VM_MAP_COPYIN_SRC_DESTROY
;
11062 flags
|= VM_MAP_COPYIN_USE_MAXPROT
;
11064 return vm_map_copyin_internal(src_map
,
11071 vm_map_copyin_internal(
11073 vm_map_address_t src_addr
,
11076 vm_map_copy_t
*copy_result
) /* OUT */
11078 vm_map_entry_t tmp_entry
; /* Result of last map lookup --
11079 * in multi-level lookup, this
11080 * entry contains the actual
11081 * vm_object/offset.
11083 vm_map_entry_t new_entry
= VM_MAP_ENTRY_NULL
; /* Map entry for copy */
11085 vm_map_offset_t src_start
; /* Start of current entry --
11086 * where copy is taking place now
11088 vm_map_offset_t src_end
; /* End of entire region to be
11090 vm_map_offset_t src_base
;
11091 vm_map_t base_map
= src_map
;
11092 boolean_t map_share
= FALSE
;
11093 submap_map_t
*parent_maps
= NULL
;
11095 vm_map_copy_t copy
; /* Resulting copy */
11096 vm_map_address_t copy_addr
;
11097 vm_map_size_t copy_size
;
11098 boolean_t src_destroy
;
11099 boolean_t use_maxprot
;
11100 boolean_t preserve_purgeable
;
11101 boolean_t entry_was_shared
;
11102 vm_map_entry_t saved_src_entry
;
11104 if (flags
& ~VM_MAP_COPYIN_ALL_FLAGS
) {
11105 return KERN_INVALID_ARGUMENT
;
11108 src_destroy
= (flags
& VM_MAP_COPYIN_SRC_DESTROY
) ? TRUE
: FALSE
;
11109 use_maxprot
= (flags
& VM_MAP_COPYIN_USE_MAXPROT
) ? TRUE
: FALSE
;
11110 preserve_purgeable
=
11111 (flags
& VM_MAP_COPYIN_PRESERVE_PURGEABLE
) ? TRUE
: FALSE
;
11114 * Check for copies of zero bytes.
11118 *copy_result
= VM_MAP_COPY_NULL
;
11119 return KERN_SUCCESS
;
11123 * Check that the end address doesn't overflow
11125 src_end
= src_addr
+ len
;
11126 if (src_end
< src_addr
) {
11127 return KERN_INVALID_ADDRESS
;
11131 * Compute (page aligned) start and end of region
11133 src_start
= vm_map_trunc_page(src_addr
,
11134 VM_MAP_PAGE_MASK(src_map
));
11135 src_end
= vm_map_round_page(src_end
,
11136 VM_MAP_PAGE_MASK(src_map
));
11139 * If the copy is sufficiently small, use a kernel buffer instead
11140 * of making a virtual copy. The theory being that the cost of
11141 * setting up VM (and taking C-O-W faults) dominates the copy costs
11142 * for small regions.
11144 if ((len
< msg_ool_size_small
) &&
11146 !preserve_purgeable
&&
11147 !(flags
& VM_MAP_COPYIN_ENTRY_LIST
) &&
11149 * Since the "msg_ool_size_small" threshold was increased and
11150 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11151 * address space limits, we revert to doing a virtual copy if the
11152 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11153 * of the commpage would now fail when it used to work.
11155 (src_start
>= vm_map_min(src_map
) &&
11156 src_start
< vm_map_max(src_map
) &&
11157 src_end
>= vm_map_min(src_map
) &&
11158 src_end
< vm_map_max(src_map
))) {
11159 return vm_map_copyin_kernel_buffer(src_map
, src_addr
, len
,
11160 src_destroy
, copy_result
);
11164 * Allocate a header element for the list.
11166 * Use the start and end in the header to
11167 * remember the endpoints prior to rounding.
11170 copy
= vm_map_copy_allocate();
11171 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
11172 copy
->cpy_hdr
.entries_pageable
= TRUE
;
11174 copy
->cpy_hdr
.page_shift
= src_map
->hdr
.page_shift
;
11177 * The copy entries can be broken down for a variety of reasons,
11178 * so we can't guarantee that they will remain map-aligned...
11179 * Will need to adjust the first copy_entry's "vme_start" and
11180 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
11181 * rather than the original map's alignment.
11183 copy
->cpy_hdr
.page_shift
= PAGE_SHIFT
;
11186 vm_map_store_init( &(copy
->cpy_hdr
));
11188 copy
->offset
= src_addr
;
11191 new_entry
= vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
11193 #define RETURN(x) \
11195 vm_map_unlock(src_map); \
11196 if(src_map != base_map) \
11197 vm_map_deallocate(src_map); \
11198 if (new_entry != VM_MAP_ENTRY_NULL) \
11199 vm_map_copy_entry_dispose(copy,new_entry); \
11200 vm_map_copy_discard(copy); \
11202 submap_map_t *_ptr; \
11204 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11205 parent_maps=parent_maps->next; \
11206 if (_ptr->parent_map != base_map) \
11207 vm_map_deallocate(_ptr->parent_map); \
11208 kfree(_ptr, sizeof(submap_map_t)); \
11215 * Find the beginning of the region.
11218 vm_map_lock(src_map
);
11221 * Lookup the original "src_addr" rather than the truncated
11222 * "src_start", in case "src_start" falls in a non-map-aligned
11223 * map entry *before* the map entry that contains "src_addr"...
11225 if (!vm_map_lookup_entry(src_map
, src_addr
, &tmp_entry
)) {
11226 RETURN(KERN_INVALID_ADDRESS
);
11228 if (!tmp_entry
->is_sub_map
) {
11230 * ... but clip to the map-rounded "src_start" rather than
11231 * "src_addr" to preserve map-alignment. We'll adjust the
11232 * first copy entry at the end, if needed.
11234 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
11236 if (src_start
< tmp_entry
->vme_start
) {
11238 * Move "src_start" up to the start of the
11239 * first map entry to copy.
11241 src_start
= tmp_entry
->vme_start
;
11243 /* set for later submap fix-up */
11244 copy_addr
= src_start
;
11247 * Go through entries until we get to the end.
11251 vm_map_entry_t src_entry
= tmp_entry
; /* Top-level entry */
11252 vm_map_size_t src_size
; /* Size of source
11253 * map entry (in both
11257 vm_object_t src_object
; /* Object to copy */
11258 vm_object_offset_t src_offset
;
11260 boolean_t src_needs_copy
; /* Should source map
11261 * be made read-only
11262 * for copy-on-write?
11265 boolean_t new_entry_needs_copy
; /* Will new entry be COW? */
11267 boolean_t was_wired
; /* Was source wired? */
11268 vm_map_version_t version
; /* Version before locks
11269 * dropped to make copy
11271 kern_return_t result
; /* Return value from
11272 * copy_strategically.
11274 while (tmp_entry
->is_sub_map
) {
11275 vm_map_size_t submap_len
;
11278 ptr
= (submap_map_t
*)kalloc(sizeof(submap_map_t
));
11279 ptr
->next
= parent_maps
;
11281 ptr
->parent_map
= src_map
;
11282 ptr
->base_start
= src_start
;
11283 ptr
->base_end
= src_end
;
11284 submap_len
= tmp_entry
->vme_end
- src_start
;
11285 if (submap_len
> (src_end
- src_start
)) {
11286 submap_len
= src_end
- src_start
;
11288 ptr
->base_len
= submap_len
;
11290 src_start
-= tmp_entry
->vme_start
;
11291 src_start
+= VME_OFFSET(tmp_entry
);
11292 src_end
= src_start
+ submap_len
;
11293 src_map
= VME_SUBMAP(tmp_entry
);
11294 vm_map_lock(src_map
);
11295 /* keep an outstanding reference for all maps in */
11296 /* the parents tree except the base map */
11297 vm_map_reference(src_map
);
11298 vm_map_unlock(ptr
->parent_map
);
11299 if (!vm_map_lookup_entry(
11300 src_map
, src_start
, &tmp_entry
)) {
11301 RETURN(KERN_INVALID_ADDRESS
);
11304 if (!tmp_entry
->is_sub_map
) {
11305 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
11307 src_entry
= tmp_entry
;
11309 /* we are now in the lowest level submap... */
11311 if ((VME_OBJECT(tmp_entry
) != VM_OBJECT_NULL
) &&
11312 (VME_OBJECT(tmp_entry
)->phys_contiguous
)) {
11313 /* This is not, supported for now.In future */
11314 /* we will need to detect the phys_contig */
11315 /* condition and then upgrade copy_slowly */
11316 /* to do physical copy from the device mem */
11317 /* based object. We can piggy-back off of */
11318 /* the was wired boolean to set-up the */
11319 /* proper handling */
11320 RETURN(KERN_PROTECTION_FAILURE
);
11323 * Create a new address map entry to hold the result.
11324 * Fill in the fields from the appropriate source entries.
11325 * We must unlock the source map to do this if we need
11326 * to allocate a map entry.
11328 if (new_entry
== VM_MAP_ENTRY_NULL
) {
11329 version
.main_timestamp
= src_map
->timestamp
;
11330 vm_map_unlock(src_map
);
11332 new_entry
= vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
11334 vm_map_lock(src_map
);
11335 if ((version
.main_timestamp
+ 1) != src_map
->timestamp
) {
11336 if (!vm_map_lookup_entry(src_map
, src_start
,
11338 RETURN(KERN_INVALID_ADDRESS
);
11340 if (!tmp_entry
->is_sub_map
) {
11341 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
11343 continue; /* restart w/ new tmp_entry */
11348 * Verify that the region can be read.
11350 if (((src_entry
->protection
& VM_PROT_READ
) == VM_PROT_NONE
&&
11352 (src_entry
->max_protection
& VM_PROT_READ
) == 0) {
11353 RETURN(KERN_PROTECTION_FAILURE
);
11357 * Clip against the endpoints of the entire region.
11360 vm_map_clip_end(src_map
, src_entry
, src_end
);
11362 src_size
= src_entry
->vme_end
- src_start
;
11363 src_object
= VME_OBJECT(src_entry
);
11364 src_offset
= VME_OFFSET(src_entry
);
11365 was_wired
= (src_entry
->wired_count
!= 0);
11367 vm_map_entry_copy(new_entry
, src_entry
);
11368 if (new_entry
->is_sub_map
) {
11369 /* clr address space specifics */
11370 new_entry
->use_pmap
= FALSE
;
11373 * We're dealing with a copy-on-write operation,
11374 * so the resulting mapping should not inherit the
11375 * original mapping's accounting settings.
11376 * "iokit_acct" should have been cleared in
11377 * vm_map_entry_copy().
11378 * "use_pmap" should be reset to its default (TRUE)
11379 * so that the new mapping gets accounted for in
11380 * the task's memory footprint.
11382 assert(!new_entry
->iokit_acct
);
11383 new_entry
->use_pmap
= TRUE
;
11387 * Attempt non-blocking copy-on-write optimizations.
11391 * If we are destroying the source, and the object
11392 * is internal, we could move the object reference
11393 * from the source to the copy. The copy is
11394 * copy-on-write only if the source is.
11395 * We make another reference to the object, because
11396 * destroying the source entry will deallocate it.
11398 * This memory transfer has to be atomic, (to prevent
11399 * the VM object from being shared or copied while
11400 * it's being moved here), so we could only do this
11401 * if we won't have to unlock the VM map until the
11402 * original mapping has been fully removed.
11406 if ((src_object
== VM_OBJECT_NULL
||
11407 (!was_wired
&& !map_share
&& !tmp_entry
->is_shared
)) &&
11408 vm_object_copy_quickly(
11409 VME_OBJECT_PTR(new_entry
),
11413 &new_entry_needs_copy
)) {
11414 new_entry
->needs_copy
= new_entry_needs_copy
;
11417 * Handle copy-on-write obligations
11420 if (src_needs_copy
&& !tmp_entry
->needs_copy
) {
11423 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
11425 if (override_nx(src_map
, VME_ALIAS(src_entry
))
11427 prot
|= VM_PROT_EXECUTE
;
11430 vm_object_pmap_protect(
11434 (src_entry
->is_shared
?
11437 src_entry
->vme_start
,
11440 assert(tmp_entry
->wired_count
== 0);
11441 tmp_entry
->needs_copy
= TRUE
;
11445 * The map has never been unlocked, so it's safe
11446 * to move to the next entry rather than doing
11450 goto CopySuccessful
;
11453 entry_was_shared
= tmp_entry
->is_shared
;
11456 * Take an object reference, so that we may
11457 * release the map lock(s).
11460 assert(src_object
!= VM_OBJECT_NULL
);
11461 vm_object_reference(src_object
);
11464 * Record the timestamp for later verification.
11468 version
.main_timestamp
= src_map
->timestamp
;
11469 vm_map_unlock(src_map
); /* Increments timestamp once! */
11470 saved_src_entry
= src_entry
;
11471 tmp_entry
= VM_MAP_ENTRY_NULL
;
11472 src_entry
= VM_MAP_ENTRY_NULL
;
11480 vm_object_lock(src_object
);
11481 result
= vm_object_copy_slowly(
11486 VME_OBJECT_PTR(new_entry
));
11487 VME_OFFSET_SET(new_entry
, 0);
11488 new_entry
->needs_copy
= FALSE
;
11489 } else if (src_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
&&
11490 (entry_was_shared
|| map_share
)) {
11491 vm_object_t new_object
;
11493 vm_object_lock_shared(src_object
);
11494 new_object
= vm_object_copy_delayed(
11499 if (new_object
== VM_OBJECT_NULL
) {
11503 VME_OBJECT_SET(new_entry
, new_object
);
11504 assert(new_entry
->wired_count
== 0);
11505 new_entry
->needs_copy
= TRUE
;
11506 assert(!new_entry
->iokit_acct
);
11507 assert(new_object
->purgable
== VM_PURGABLE_DENY
);
11508 assertf(new_entry
->use_pmap
, "src_map %p new_entry %p\n", src_map
, new_entry
);
11509 result
= KERN_SUCCESS
;
11511 vm_object_offset_t new_offset
;
11512 new_offset
= VME_OFFSET(new_entry
);
11513 result
= vm_object_copy_strategically(src_object
,
11516 VME_OBJECT_PTR(new_entry
),
11518 &new_entry_needs_copy
);
11519 if (new_offset
!= VME_OFFSET(new_entry
)) {
11520 VME_OFFSET_SET(new_entry
, new_offset
);
11523 new_entry
->needs_copy
= new_entry_needs_copy
;
11526 if (result
== KERN_SUCCESS
&&
11527 preserve_purgeable
&&
11528 src_object
->purgable
!= VM_PURGABLE_DENY
) {
11529 vm_object_t new_object
;
11531 new_object
= VME_OBJECT(new_entry
);
11532 assert(new_object
!= src_object
);
11533 vm_object_lock(new_object
);
11534 assert(new_object
->ref_count
== 1);
11535 assert(new_object
->shadow
== VM_OBJECT_NULL
);
11536 assert(new_object
->copy
== VM_OBJECT_NULL
);
11537 assert(new_object
->vo_owner
== NULL
);
11539 new_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
11540 new_object
->true_share
= TRUE
;
11541 /* start as non-volatile with no owner... */
11542 new_object
->purgable
= VM_PURGABLE_NONVOLATILE
;
11543 vm_purgeable_nonvolatile_enqueue(new_object
, NULL
);
11544 /* ... and move to src_object's purgeable state */
11545 if (src_object
->purgable
!= VM_PURGABLE_NONVOLATILE
) {
11547 state
= src_object
->purgable
;
11548 vm_object_purgable_control(
11550 VM_PURGABLE_SET_STATE_FROM_KERNEL
,
11553 vm_object_unlock(new_object
);
11554 new_object
= VM_OBJECT_NULL
;
11555 /* no pmap accounting for purgeable objects */
11556 new_entry
->use_pmap
= FALSE
;
11559 if (result
!= KERN_SUCCESS
&&
11560 result
!= KERN_MEMORY_RESTART_COPY
) {
11561 vm_map_lock(src_map
);
11566 * Throw away the extra reference
11569 vm_object_deallocate(src_object
);
11572 * Verify that the map has not substantially
11573 * changed while the copy was being made.
11576 vm_map_lock(src_map
);
11578 if ((version
.main_timestamp
+ 1) == src_map
->timestamp
) {
11579 /* src_map hasn't changed: src_entry is still valid */
11580 src_entry
= saved_src_entry
;
11581 goto VerificationSuccessful
;
11585 * Simple version comparison failed.
11587 * Retry the lookup and verify that the
11588 * same object/offset are still present.
11590 * [Note: a memory manager that colludes with
11591 * the calling task can detect that we have
11592 * cheated. While the map was unlocked, the
11593 * mapping could have been changed and restored.]
11596 if (!vm_map_lookup_entry(src_map
, src_start
, &tmp_entry
)) {
11597 if (result
!= KERN_MEMORY_RESTART_COPY
) {
11598 vm_object_deallocate(VME_OBJECT(new_entry
));
11599 VME_OBJECT_SET(new_entry
, VM_OBJECT_NULL
);
11600 /* reset accounting state */
11601 new_entry
->iokit_acct
= FALSE
;
11602 new_entry
->use_pmap
= TRUE
;
11604 RETURN(KERN_INVALID_ADDRESS
);
11607 src_entry
= tmp_entry
;
11608 vm_map_clip_start(src_map
, src_entry
, src_start
);
11610 if ((((src_entry
->protection
& VM_PROT_READ
) == VM_PROT_NONE
) &&
11612 ((src_entry
->max_protection
& VM_PROT_READ
) == 0)) {
11613 goto VerificationFailed
;
11616 if (src_entry
->vme_end
< new_entry
->vme_end
) {
11618 * This entry might have been shortened
11619 * (vm_map_clip_end) or been replaced with
11620 * an entry that ends closer to "src_start"
11622 * Adjust "new_entry" accordingly; copying
11623 * less memory would be correct but we also
11624 * redo the copy (see below) if the new entry
11625 * no longer points at the same object/offset.
11627 assert(VM_MAP_PAGE_ALIGNED(src_entry
->vme_end
,
11628 VM_MAP_COPY_PAGE_MASK(copy
)));
11629 new_entry
->vme_end
= src_entry
->vme_end
;
11630 src_size
= new_entry
->vme_end
- src_start
;
11631 } else if (src_entry
->vme_end
> new_entry
->vme_end
) {
11633 * This entry might have been extended
11634 * (vm_map_entry_simplify() or coalesce)
11635 * or been replaced with an entry that ends farther
11636 * from "src_start" than before.
11638 * We've called vm_object_copy_*() only on
11639 * the previous <start:end> range, so we can't
11640 * just extend new_entry. We have to re-do
11641 * the copy based on the new entry as if it was
11642 * pointing at a different object/offset (see
11643 * "Verification failed" below).
11647 if ((VME_OBJECT(src_entry
) != src_object
) ||
11648 (VME_OFFSET(src_entry
) != src_offset
) ||
11649 (src_entry
->vme_end
> new_entry
->vme_end
)) {
11651 * Verification failed.
11653 * Start over with this top-level entry.
11656 VerificationFailed
: ;
11658 vm_object_deallocate(VME_OBJECT(new_entry
));
11659 tmp_entry
= src_entry
;
11664 * Verification succeeded.
11667 VerificationSuccessful
:;
11669 if (result
== KERN_MEMORY_RESTART_COPY
) {
11680 * Link in the new copy entry.
11683 vm_map_copy_entry_link(copy
, vm_map_copy_last_entry(copy
),
11687 * Determine whether the entire region
11690 src_base
= src_start
;
11691 src_start
= new_entry
->vme_end
;
11692 new_entry
= VM_MAP_ENTRY_NULL
;
11693 while ((src_start
>= src_end
) && (src_end
!= 0)) {
11696 if (src_map
== base_map
) {
11697 /* back to the top */
11702 assert(ptr
!= NULL
);
11703 parent_maps
= parent_maps
->next
;
11705 /* fix up the damage we did in that submap */
11706 vm_map_simplify_range(src_map
,
11710 vm_map_unlock(src_map
);
11711 vm_map_deallocate(src_map
);
11712 vm_map_lock(ptr
->parent_map
);
11713 src_map
= ptr
->parent_map
;
11714 src_base
= ptr
->base_start
;
11715 src_start
= ptr
->base_start
+ ptr
->base_len
;
11716 src_end
= ptr
->base_end
;
11717 if (!vm_map_lookup_entry(src_map
,
11720 (src_end
> src_start
)) {
11721 RETURN(KERN_INVALID_ADDRESS
);
11723 kfree(ptr
, sizeof(submap_map_t
));
11724 if (parent_maps
== NULL
) {
11727 src_entry
= tmp_entry
->vme_prev
;
11730 if ((VM_MAP_PAGE_SHIFT(src_map
) != PAGE_SHIFT
) &&
11731 (src_start
>= src_addr
+ len
) &&
11732 (src_addr
+ len
!= 0)) {
11734 * Stop copying now, even though we haven't reached
11735 * "src_end". We'll adjust the end of the last copy
11736 * entry at the end, if needed.
11738 * If src_map's aligment is different from the
11739 * system's page-alignment, there could be
11740 * extra non-map-aligned map entries between
11741 * the original (non-rounded) "src_addr + len"
11742 * and the rounded "src_end".
11743 * We do not want to copy those map entries since
11744 * they're not part of the copied range.
11749 if ((src_start
>= src_end
) && (src_end
!= 0)) {
11754 * Verify that there are no gaps in the region
11757 tmp_entry
= src_entry
->vme_next
;
11758 if ((tmp_entry
->vme_start
!= src_start
) ||
11759 (tmp_entry
== vm_map_to_entry(src_map
))) {
11760 RETURN(KERN_INVALID_ADDRESS
);
11765 * If the source should be destroyed, do it now, since the
11766 * copy was successful.
11769 (void) vm_map_delete(
11771 vm_map_trunc_page(src_addr
,
11772 VM_MAP_PAGE_MASK(src_map
)),
11774 ((src_map
== kernel_map
) ?
11775 VM_MAP_REMOVE_KUNWIRE
:
11776 VM_MAP_REMOVE_NO_FLAGS
),
11779 /* fix up the damage we did in the base map */
11780 vm_map_simplify_range(
11782 vm_map_trunc_page(src_addr
,
11783 VM_MAP_PAGE_MASK(src_map
)),
11784 vm_map_round_page(src_end
,
11785 VM_MAP_PAGE_MASK(src_map
)));
11788 vm_map_unlock(src_map
);
11789 tmp_entry
= VM_MAP_ENTRY_NULL
;
11791 if (VM_MAP_PAGE_SHIFT(src_map
) != PAGE_SHIFT
) {
11792 vm_map_offset_t original_start
, original_offset
, original_end
;
11794 assert(VM_MAP_COPY_PAGE_MASK(copy
) == PAGE_MASK
);
11796 /* adjust alignment of first copy_entry's "vme_start" */
11797 tmp_entry
= vm_map_copy_first_entry(copy
);
11798 if (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
11799 vm_map_offset_t adjustment
;
11801 original_start
= tmp_entry
->vme_start
;
11802 original_offset
= VME_OFFSET(tmp_entry
);
11804 /* map-align the start of the first copy entry... */
11805 adjustment
= (tmp_entry
->vme_start
-
11807 tmp_entry
->vme_start
,
11808 VM_MAP_PAGE_MASK(src_map
)));
11809 tmp_entry
->vme_start
-= adjustment
;
11810 VME_OFFSET_SET(tmp_entry
,
11811 VME_OFFSET(tmp_entry
) - adjustment
);
11812 copy_addr
-= adjustment
;
11813 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
11814 /* ... adjust for mis-aligned start of copy range */
11816 (vm_map_trunc_page(copy
->offset
,
11818 vm_map_trunc_page(copy
->offset
,
11819 VM_MAP_PAGE_MASK(src_map
)));
11821 assert(page_aligned(adjustment
));
11822 assert(adjustment
< VM_MAP_PAGE_SIZE(src_map
));
11823 tmp_entry
->vme_start
+= adjustment
;
11824 VME_OFFSET_SET(tmp_entry
,
11825 (VME_OFFSET(tmp_entry
) +
11827 copy_addr
+= adjustment
;
11828 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
11832 * Assert that the adjustments haven't exposed
11833 * more than was originally copied...
11835 assert(tmp_entry
->vme_start
>= original_start
);
11836 assert(VME_OFFSET(tmp_entry
) >= original_offset
);
11838 * ... and that it did not adjust outside of a
11839 * a single 16K page.
11841 assert(vm_map_trunc_page(tmp_entry
->vme_start
,
11842 VM_MAP_PAGE_MASK(src_map
)) ==
11843 vm_map_trunc_page(original_start
,
11844 VM_MAP_PAGE_MASK(src_map
)));
11847 /* adjust alignment of last copy_entry's "vme_end" */
11848 tmp_entry
= vm_map_copy_last_entry(copy
);
11849 if (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
11850 vm_map_offset_t adjustment
;
11852 original_end
= tmp_entry
->vme_end
;
11854 /* map-align the end of the last copy entry... */
11855 tmp_entry
->vme_end
=
11856 vm_map_round_page(tmp_entry
->vme_end
,
11857 VM_MAP_PAGE_MASK(src_map
));
11858 /* ... adjust for mis-aligned end of copy range */
11860 (vm_map_round_page((copy
->offset
+
11862 VM_MAP_PAGE_MASK(src_map
)) -
11863 vm_map_round_page((copy
->offset
+
11867 assert(page_aligned(adjustment
));
11868 assert(adjustment
< VM_MAP_PAGE_SIZE(src_map
));
11869 tmp_entry
->vme_end
-= adjustment
;
11870 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
11874 * Assert that the adjustments haven't exposed
11875 * more than was originally copied...
11877 assert(tmp_entry
->vme_end
<= original_end
);
11879 * ... and that it did not adjust outside of a
11880 * a single 16K page.
11882 assert(vm_map_round_page(tmp_entry
->vme_end
,
11883 VM_MAP_PAGE_MASK(src_map
)) ==
11884 vm_map_round_page(original_end
,
11885 VM_MAP_PAGE_MASK(src_map
)));
11889 /* Fix-up start and end points in copy. This is necessary */
11890 /* when the various entries in the copy object were picked */
11891 /* up from different sub-maps */
11893 tmp_entry
= vm_map_copy_first_entry(copy
);
11894 copy_size
= 0; /* compute actual size */
11895 while (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
11896 assert(VM_MAP_PAGE_ALIGNED(
11897 copy_addr
+ (tmp_entry
->vme_end
-
11898 tmp_entry
->vme_start
),
11899 VM_MAP_COPY_PAGE_MASK(copy
)));
11900 assert(VM_MAP_PAGE_ALIGNED(
11902 VM_MAP_COPY_PAGE_MASK(copy
)));
11905 * The copy_entries will be injected directly into the
11906 * destination map and might not be "map aligned" there...
11908 tmp_entry
->map_aligned
= FALSE
;
11910 tmp_entry
->vme_end
= copy_addr
+
11911 (tmp_entry
->vme_end
- tmp_entry
->vme_start
);
11912 tmp_entry
->vme_start
= copy_addr
;
11913 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
11914 copy_addr
+= tmp_entry
->vme_end
- tmp_entry
->vme_start
;
11915 copy_size
+= tmp_entry
->vme_end
- tmp_entry
->vme_start
;
11916 tmp_entry
= (struct vm_map_entry
*)tmp_entry
->vme_next
;
11919 if (VM_MAP_PAGE_SHIFT(src_map
) != PAGE_SHIFT
&&
11920 copy_size
< copy
->size
) {
11922 * The actual size of the VM map copy is smaller than what
11923 * was requested by the caller. This must be because some
11924 * PAGE_SIZE-sized pages are missing at the end of the last
11925 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
11926 * The caller might not have been aware of those missing
11927 * pages and might not want to be aware of it, which is
11928 * fine as long as they don't try to access (and crash on)
11929 * those missing pages.
11930 * Let's adjust the size of the "copy", to avoid failing
11931 * in vm_map_copyout() or vm_map_copy_overwrite().
11933 assert(vm_map_round_page(copy_size
,
11934 VM_MAP_PAGE_MASK(src_map
)) ==
11935 vm_map_round_page(copy
->size
,
11936 VM_MAP_PAGE_MASK(src_map
)));
11937 copy
->size
= copy_size
;
11940 *copy_result
= copy
;
11941 return KERN_SUCCESS
;
11947 vm_map_copy_extract(
11949 vm_map_address_t src_addr
,
11951 vm_map_copy_t
*copy_result
, /* OUT */
11952 vm_prot_t
*cur_prot
, /* OUT */
11953 vm_prot_t
*max_prot
)
11955 vm_map_offset_t src_start
, src_end
;
11956 vm_map_copy_t copy
;
11960 * Check for copies of zero bytes.
11964 *copy_result
= VM_MAP_COPY_NULL
;
11965 return KERN_SUCCESS
;
11969 * Check that the end address doesn't overflow
11971 src_end
= src_addr
+ len
;
11972 if (src_end
< src_addr
) {
11973 return KERN_INVALID_ADDRESS
;
11977 * Compute (page aligned) start and end of region
11979 src_start
= vm_map_trunc_page(src_addr
, PAGE_MASK
);
11980 src_end
= vm_map_round_page(src_end
, PAGE_MASK
);
11983 * Allocate a header element for the list.
11985 * Use the start and end in the header to
11986 * remember the endpoints prior to rounding.
11989 copy
= vm_map_copy_allocate();
11990 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
11991 copy
->cpy_hdr
.entries_pageable
= TRUE
;
11993 vm_map_store_init(©
->cpy_hdr
);
11998 kr
= vm_map_remap_extract(src_map
,
12006 TRUE
, /* pageable */
12007 FALSE
, /* same_map */
12008 VM_MAP_KERNEL_FLAGS_NONE
);
12009 if (kr
!= KERN_SUCCESS
) {
12010 vm_map_copy_discard(copy
);
12014 *copy_result
= copy
;
12015 return KERN_SUCCESS
;
12019 * vm_map_copyin_object:
12021 * Create a copy object from an object.
12022 * Our caller donates an object reference.
12026 vm_map_copyin_object(
12027 vm_object_t object
,
12028 vm_object_offset_t offset
, /* offset of region in object */
12029 vm_object_size_t size
, /* size of region in object */
12030 vm_map_copy_t
*copy_result
) /* OUT */
12032 vm_map_copy_t copy
; /* Resulting copy */
12035 * We drop the object into a special copy object
12036 * that contains the object directly.
12039 copy
= vm_map_copy_allocate();
12040 copy
->type
= VM_MAP_COPY_OBJECT
;
12041 copy
->cpy_object
= object
;
12042 copy
->offset
= offset
;
12045 *copy_result
= copy
;
12046 return KERN_SUCCESS
;
12052 vm_map_entry_t old_entry
,
12055 vm_object_t object
;
12056 vm_map_entry_t new_entry
;
12059 * New sharing code. New map entry
12060 * references original object. Internal
12061 * objects use asynchronous copy algorithm for
12062 * future copies. First make sure we have
12063 * the right object. If we need a shadow,
12064 * or someone else already has one, then
12065 * make a new shadow and share it.
12068 object
= VME_OBJECT(old_entry
);
12069 if (old_entry
->is_sub_map
) {
12070 assert(old_entry
->wired_count
== 0);
12071 #ifndef NO_NESTED_PMAP
12072 if (old_entry
->use_pmap
) {
12073 kern_return_t result
;
12075 result
= pmap_nest(new_map
->pmap
,
12076 (VME_SUBMAP(old_entry
))->pmap
,
12077 (addr64_t
)old_entry
->vme_start
,
12078 (addr64_t
)old_entry
->vme_start
,
12079 (uint64_t)(old_entry
->vme_end
- old_entry
->vme_start
));
12081 panic("vm_map_fork_share: pmap_nest failed!");
12084 #endif /* NO_NESTED_PMAP */
12085 } else if (object
== VM_OBJECT_NULL
) {
12086 object
= vm_object_allocate((vm_map_size_t
)(old_entry
->vme_end
-
12087 old_entry
->vme_start
));
12088 VME_OFFSET_SET(old_entry
, 0);
12089 VME_OBJECT_SET(old_entry
, object
);
12090 old_entry
->use_pmap
= TRUE
;
12091 // assert(!old_entry->needs_copy);
12092 } else if (object
->copy_strategy
!=
12093 MEMORY_OBJECT_COPY_SYMMETRIC
) {
12095 * We are already using an asymmetric
12096 * copy, and therefore we already have
12097 * the right object.
12100 assert(!old_entry
->needs_copy
);
12101 } else if (old_entry
->needs_copy
|| /* case 1 */
12102 object
->shadowed
|| /* case 2 */
12103 (!object
->true_share
&& /* case 3 */
12104 !old_entry
->is_shared
&&
12106 (vm_map_size_t
)(old_entry
->vme_end
-
12107 old_entry
->vme_start
)))) {
12109 * We need to create a shadow.
12110 * There are three cases here.
12111 * In the first case, we need to
12112 * complete a deferred symmetrical
12113 * copy that we participated in.
12114 * In the second and third cases,
12115 * we need to create the shadow so
12116 * that changes that we make to the
12117 * object do not interfere with
12118 * any symmetrical copies which
12119 * have occured (case 2) or which
12120 * might occur (case 3).
12122 * The first case is when we had
12123 * deferred shadow object creation
12124 * via the entry->needs_copy mechanism.
12125 * This mechanism only works when
12126 * only one entry points to the source
12127 * object, and we are about to create
12128 * a second entry pointing to the
12129 * same object. The problem is that
12130 * there is no way of mapping from
12131 * an object to the entries pointing
12132 * to it. (Deferred shadow creation
12133 * works with one entry because occurs
12134 * at fault time, and we walk from the
12135 * entry to the object when handling
12138 * The second case is when the object
12139 * to be shared has already been copied
12140 * with a symmetric copy, but we point
12141 * directly to the object without
12142 * needs_copy set in our entry. (This
12143 * can happen because different ranges
12144 * of an object can be pointed to by
12145 * different entries. In particular,
12146 * a single entry pointing to an object
12147 * can be split by a call to vm_inherit,
12148 * which, combined with task_create, can
12149 * result in the different entries
12150 * having different needs_copy values.)
12151 * The shadowed flag in the object allows
12152 * us to detect this case. The problem
12153 * with this case is that if this object
12154 * has or will have shadows, then we
12155 * must not perform an asymmetric copy
12156 * of this object, since such a copy
12157 * allows the object to be changed, which
12158 * will break the previous symmetrical
12159 * copies (which rely upon the object
12160 * not changing). In a sense, the shadowed
12161 * flag says "don't change this object".
12162 * We fix this by creating a shadow
12163 * object for this object, and sharing
12164 * that. This works because we are free
12165 * to change the shadow object (and thus
12166 * to use an asymmetric copy strategy);
12167 * this is also semantically correct,
12168 * since this object is temporary, and
12169 * therefore a copy of the object is
12170 * as good as the object itself. (This
12171 * is not true for permanent objects,
12172 * since the pager needs to see changes,
12173 * which won't happen if the changes
12174 * are made to a copy.)
12176 * The third case is when the object
12177 * to be shared has parts sticking
12178 * outside of the entry we're working
12179 * with, and thus may in the future
12180 * be subject to a symmetrical copy.
12181 * (This is a preemptive version of
12184 VME_OBJECT_SHADOW(old_entry
,
12185 (vm_map_size_t
) (old_entry
->vme_end
-
12186 old_entry
->vme_start
));
12189 * If we're making a shadow for other than
12190 * copy on write reasons, then we have
12191 * to remove write permission.
12194 if (!old_entry
->needs_copy
&&
12195 (old_entry
->protection
& VM_PROT_WRITE
)) {
12198 assert(!pmap_has_prot_policy(old_entry
->protection
));
12200 prot
= old_entry
->protection
& ~VM_PROT_WRITE
;
12202 assert(!pmap_has_prot_policy(prot
));
12204 if (override_nx(old_map
, VME_ALIAS(old_entry
)) && prot
) {
12205 prot
|= VM_PROT_EXECUTE
;
12209 if (old_map
->mapped_in_other_pmaps
) {
12210 vm_object_pmap_protect(
12211 VME_OBJECT(old_entry
),
12212 VME_OFFSET(old_entry
),
12213 (old_entry
->vme_end
-
12214 old_entry
->vme_start
),
12216 old_entry
->vme_start
,
12219 pmap_protect(old_map
->pmap
,
12220 old_entry
->vme_start
,
12221 old_entry
->vme_end
,
12226 old_entry
->needs_copy
= FALSE
;
12227 object
= VME_OBJECT(old_entry
);
12232 * If object was using a symmetric copy strategy,
12233 * change its copy strategy to the default
12234 * asymmetric copy strategy, which is copy_delay
12235 * in the non-norma case and copy_call in the
12236 * norma case. Bump the reference count for the
12240 if (old_entry
->is_sub_map
) {
12241 vm_map_lock(VME_SUBMAP(old_entry
));
12242 vm_map_reference(VME_SUBMAP(old_entry
));
12243 vm_map_unlock(VME_SUBMAP(old_entry
));
12245 vm_object_lock(object
);
12246 vm_object_reference_locked(object
);
12247 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
) {
12248 object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
12250 vm_object_unlock(object
);
12254 * Clone the entry, using object ref from above.
12255 * Mark both entries as shared.
12258 new_entry
= vm_map_entry_create(new_map
, FALSE
); /* Never the kernel
12259 * map or descendants */
12260 vm_map_entry_copy(new_entry
, old_entry
);
12261 old_entry
->is_shared
= TRUE
;
12262 new_entry
->is_shared
= TRUE
;
12265 * We're dealing with a shared mapping, so the resulting mapping
12266 * should inherit some of the original mapping's accounting settings.
12267 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12268 * "use_pmap" should stay the same as before (if it hasn't been reset
12269 * to TRUE when we cleared "iokit_acct").
12271 assert(!new_entry
->iokit_acct
);
12274 * If old entry's inheritence is VM_INHERIT_NONE,
12275 * the new entry is for corpse fork, remove the
12276 * write permission from the new entry.
12278 if (old_entry
->inheritance
== VM_INHERIT_NONE
) {
12279 new_entry
->protection
&= ~VM_PROT_WRITE
;
12280 new_entry
->max_protection
&= ~VM_PROT_WRITE
;
12284 * Insert the entry into the new map -- we
12285 * know we're inserting at the end of the new
12289 vm_map_store_entry_link(new_map
, vm_map_last_entry(new_map
), new_entry
,
12290 VM_MAP_KERNEL_FLAGS_NONE
);
12293 * Update the physical map
12296 if (old_entry
->is_sub_map
) {
12297 /* Bill Angell pmap support goes here */
12299 pmap_copy(new_map
->pmap
, old_map
->pmap
, new_entry
->vme_start
,
12300 old_entry
->vme_end
- old_entry
->vme_start
,
12301 old_entry
->vme_start
);
12308 vm_map_entry_t
*old_entry_p
,
12310 int vm_map_copyin_flags
)
12312 vm_map_entry_t old_entry
= *old_entry_p
;
12313 vm_map_size_t entry_size
= old_entry
->vme_end
- old_entry
->vme_start
;
12314 vm_map_offset_t start
= old_entry
->vme_start
;
12315 vm_map_copy_t copy
;
12316 vm_map_entry_t last
= vm_map_last_entry(new_map
);
12318 vm_map_unlock(old_map
);
12320 * Use maxprot version of copyin because we
12321 * care about whether this memory can ever
12322 * be accessed, not just whether it's accessible
12325 vm_map_copyin_flags
|= VM_MAP_COPYIN_USE_MAXPROT
;
12326 if (vm_map_copyin_internal(old_map
, start
, entry_size
,
12327 vm_map_copyin_flags
, ©
)
12330 * The map might have changed while it
12331 * was unlocked, check it again. Skip
12332 * any blank space or permanently
12333 * unreadable region.
12335 vm_map_lock(old_map
);
12336 if (!vm_map_lookup_entry(old_map
, start
, &last
) ||
12337 (last
->max_protection
& VM_PROT_READ
) == VM_PROT_NONE
) {
12338 last
= last
->vme_next
;
12340 *old_entry_p
= last
;
12343 * XXX For some error returns, want to
12344 * XXX skip to the next element. Note
12345 * that INVALID_ADDRESS and
12346 * PROTECTION_FAILURE are handled above.
12353 * Insert the copy into the new map
12356 vm_map_copy_insert(new_map
, last
, copy
);
12359 * Pick up the traversal at the end of
12360 * the copied region.
12363 vm_map_lock(old_map
);
12364 start
+= entry_size
;
12365 if (!vm_map_lookup_entry(old_map
, start
, &last
)) {
12366 last
= last
->vme_next
;
12368 if (last
->vme_start
== start
) {
12370 * No need to clip here and we don't
12371 * want to cause any unnecessary
12375 vm_map_clip_start(old_map
, last
, start
);
12378 *old_entry_p
= last
;
12386 * Create and return a new map based on the old
12387 * map, according to the inheritance values on the
12388 * regions in that map and the options.
12390 * The source map must not be locked.
12400 vm_map_entry_t old_entry
;
12401 vm_map_size_t new_size
= 0, entry_size
;
12402 vm_map_entry_t new_entry
;
12403 boolean_t src_needs_copy
;
12404 boolean_t new_entry_needs_copy
;
12405 boolean_t pmap_is64bit
;
12406 int vm_map_copyin_flags
;
12407 vm_inherit_t old_entry_inheritance
;
12408 int map_create_options
;
12409 kern_return_t footprint_collect_kr
;
12411 if (options
& ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE
|
12412 VM_MAP_FORK_PRESERVE_PURGEABLE
|
12413 VM_MAP_FORK_CORPSE_FOOTPRINT
)) {
12414 /* unsupported option */
12415 return VM_MAP_NULL
;
12419 #if defined(__i386__) || defined(__x86_64__)
12420 old_map
->pmap
->pm_task_map
!= TASK_MAP_32BIT
;
12421 #elif defined(__arm64__)
12422 old_map
->pmap
->max
== MACH_VM_MAX_ADDRESS
;
12423 #elif defined(__arm__)
12426 #error Unknown architecture.
12429 unsigned int pmap_flags
= 0;
12430 pmap_flags
|= pmap_is64bit
? PMAP_CREATE_64BIT
: 0;
12431 #if defined(HAS_APPLE_PAC)
12432 pmap_flags
|= old_map
->pmap
->disable_jop
? PMAP_CREATE_DISABLE_JOP
: 0;
12434 new_pmap
= pmap_create_options(ledger
, (vm_map_size_t
) 0, pmap_flags
);
12436 vm_map_reference_swap(old_map
);
12437 vm_map_lock(old_map
);
12439 map_create_options
= 0;
12440 if (old_map
->hdr
.entries_pageable
) {
12441 map_create_options
|= VM_MAP_CREATE_PAGEABLE
;
12443 if (options
& VM_MAP_FORK_CORPSE_FOOTPRINT
) {
12444 map_create_options
|= VM_MAP_CREATE_CORPSE_FOOTPRINT
;
12445 footprint_collect_kr
= KERN_SUCCESS
;
12447 new_map
= vm_map_create_options(new_pmap
,
12448 old_map
->min_offset
,
12449 old_map
->max_offset
,
12450 map_create_options
);
12451 vm_map_lock(new_map
);
12452 vm_commit_pagezero_status(new_map
);
12453 /* inherit the parent map's page size */
12454 vm_map_set_page_shift(new_map
, VM_MAP_PAGE_SHIFT(old_map
));
12456 old_entry
= vm_map_first_entry(old_map
);
12457 old_entry
!= vm_map_to_entry(old_map
);
12459 entry_size
= old_entry
->vme_end
- old_entry
->vme_start
;
12461 old_entry_inheritance
= old_entry
->inheritance
;
12463 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12464 * share VM_INHERIT_NONE entries that are not backed by a
12467 if (old_entry_inheritance
== VM_INHERIT_NONE
&&
12468 (options
& VM_MAP_FORK_SHARE_IF_INHERIT_NONE
) &&
12469 !(!old_entry
->is_sub_map
&&
12470 VME_OBJECT(old_entry
) != NULL
&&
12471 VME_OBJECT(old_entry
)->pager
!= NULL
&&
12472 is_device_pager_ops(
12473 VME_OBJECT(old_entry
)->pager
->mo_pager_ops
))) {
12474 old_entry_inheritance
= VM_INHERIT_SHARE
;
12477 if (old_entry_inheritance
!= VM_INHERIT_NONE
&&
12478 (options
& VM_MAP_FORK_CORPSE_FOOTPRINT
) &&
12479 footprint_collect_kr
== KERN_SUCCESS
) {
12481 * The corpse won't have old_map->pmap to query
12482 * footprint information, so collect that data now
12483 * and store it in new_map->vmmap_corpse_footprint
12484 * for later autopsy.
12486 footprint_collect_kr
=
12487 vm_map_corpse_footprint_collect(old_map
,
12492 switch (old_entry_inheritance
) {
12493 case VM_INHERIT_NONE
:
12496 case VM_INHERIT_SHARE
:
12497 vm_map_fork_share(old_map
, old_entry
, new_map
);
12498 new_size
+= entry_size
;
12501 case VM_INHERIT_COPY
:
12504 * Inline the copy_quickly case;
12505 * upon failure, fall back on call
12506 * to vm_map_fork_copy.
12509 if (old_entry
->is_sub_map
) {
12512 if ((old_entry
->wired_count
!= 0) ||
12513 ((VME_OBJECT(old_entry
) != NULL
) &&
12514 (VME_OBJECT(old_entry
)->true_share
))) {
12515 goto slow_vm_map_fork_copy
;
12518 new_entry
= vm_map_entry_create(new_map
, FALSE
); /* never the kernel map or descendants */
12519 vm_map_entry_copy(new_entry
, old_entry
);
12520 if (new_entry
->is_sub_map
) {
12521 /* clear address space specifics */
12522 new_entry
->use_pmap
= FALSE
;
12525 * We're dealing with a copy-on-write operation,
12526 * so the resulting mapping should not inherit
12527 * the original mapping's accounting settings.
12528 * "iokit_acct" should have been cleared in
12529 * vm_map_entry_copy().
12530 * "use_pmap" should be reset to its default
12531 * (TRUE) so that the new mapping gets
12532 * accounted for in the task's memory footprint.
12534 assert(!new_entry
->iokit_acct
);
12535 new_entry
->use_pmap
= TRUE
;
12538 if (!vm_object_copy_quickly(
12539 VME_OBJECT_PTR(new_entry
),
12540 VME_OFFSET(old_entry
),
12541 (old_entry
->vme_end
-
12542 old_entry
->vme_start
),
12544 &new_entry_needs_copy
)) {
12545 vm_map_entry_dispose(new_map
, new_entry
);
12546 goto slow_vm_map_fork_copy
;
12550 * Handle copy-on-write obligations
12553 if (src_needs_copy
&& !old_entry
->needs_copy
) {
12556 assert(!pmap_has_prot_policy(old_entry
->protection
));
12558 prot
= old_entry
->protection
& ~VM_PROT_WRITE
;
12560 if (override_nx(old_map
, VME_ALIAS(old_entry
))
12562 prot
|= VM_PROT_EXECUTE
;
12565 assert(!pmap_has_prot_policy(prot
));
12567 vm_object_pmap_protect(
12568 VME_OBJECT(old_entry
),
12569 VME_OFFSET(old_entry
),
12570 (old_entry
->vme_end
-
12571 old_entry
->vme_start
),
12572 ((old_entry
->is_shared
12573 || old_map
->mapped_in_other_pmaps
)
12576 old_entry
->vme_start
,
12579 assert(old_entry
->wired_count
== 0);
12580 old_entry
->needs_copy
= TRUE
;
12582 new_entry
->needs_copy
= new_entry_needs_copy
;
12585 * Insert the entry at the end
12589 vm_map_store_entry_link(new_map
,
12590 vm_map_last_entry(new_map
),
12592 VM_MAP_KERNEL_FLAGS_NONE
);
12593 new_size
+= entry_size
;
12596 slow_vm_map_fork_copy
:
12597 vm_map_copyin_flags
= 0;
12598 if (options
& VM_MAP_FORK_PRESERVE_PURGEABLE
) {
12599 vm_map_copyin_flags
|=
12600 VM_MAP_COPYIN_PRESERVE_PURGEABLE
;
12602 if (vm_map_fork_copy(old_map
,
12605 vm_map_copyin_flags
)) {
12606 new_size
+= entry_size
;
12610 old_entry
= old_entry
->vme_next
;
12613 #if defined(__arm64__)
12614 pmap_insert_sharedpage(new_map
->pmap
);
12617 new_map
->size
= new_size
;
12619 if (options
& VM_MAP_FORK_CORPSE_FOOTPRINT
) {
12620 vm_map_corpse_footprint_collect_done(new_map
);
12623 vm_map_unlock(new_map
);
12624 vm_map_unlock(old_map
);
12625 vm_map_deallocate(old_map
);
12633 * Setup the "new_map" with the proper execution environment according
12634 * to the type of executable (platform, 64bit, chroot environment).
12635 * Map the comm page and shared region, etc...
12644 cpu_subtype_t cpu_subtype
)
12646 SHARED_REGION_TRACE_DEBUG(
12647 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
12648 (void *)VM_KERNEL_ADDRPERM(current_task()),
12649 (void *)VM_KERNEL_ADDRPERM(new_map
),
12650 (void *)VM_KERNEL_ADDRPERM(task
),
12651 (void *)VM_KERNEL_ADDRPERM(fsroot
),
12654 (void) vm_commpage_enter(new_map
, task
, is64bit
);
12655 (void) vm_shared_region_enter(new_map
, task
, is64bit
, fsroot
, cpu
, cpu_subtype
);
12656 SHARED_REGION_TRACE_DEBUG(
12657 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
12658 (void *)VM_KERNEL_ADDRPERM(current_task()),
12659 (void *)VM_KERNEL_ADDRPERM(new_map
),
12660 (void *)VM_KERNEL_ADDRPERM(task
),
12661 (void *)VM_KERNEL_ADDRPERM(fsroot
),
12664 return KERN_SUCCESS
;
12668 * vm_map_lookup_locked:
12670 * Finds the VM object, offset, and
12671 * protection for a given virtual address in the
12672 * specified map, assuming a page fault of the
12675 * Returns the (object, offset, protection) for
12676 * this address, whether it is wired down, and whether
12677 * this map has the only reference to the data in question.
12678 * In order to later verify this lookup, a "version"
12681 * The map MUST be locked by the caller and WILL be
12682 * locked on exit. In order to guarantee the
12683 * existence of the returned object, it is returned
12686 * If a lookup is requested with "write protection"
12687 * specified, the map may be changed to perform virtual
12688 * copying operations, although the data referenced will
12692 vm_map_lookup_locked(
12693 vm_map_t
*var_map
, /* IN/OUT */
12694 vm_map_offset_t vaddr
,
12695 vm_prot_t fault_type
,
12696 int object_lock_type
,
12697 vm_map_version_t
*out_version
, /* OUT */
12698 vm_object_t
*object
, /* OUT */
12699 vm_object_offset_t
*offset
, /* OUT */
12700 vm_prot_t
*out_prot
, /* OUT */
12701 boolean_t
*wired
, /* OUT */
12702 vm_object_fault_info_t fault_info
, /* OUT */
12703 vm_map_t
*real_map
)
12705 vm_map_entry_t entry
;
12706 vm_map_t map
= *var_map
;
12707 vm_map_t old_map
= *var_map
;
12708 vm_map_t cow_sub_map_parent
= VM_MAP_NULL
;
12709 vm_map_offset_t cow_parent_vaddr
= 0;
12710 vm_map_offset_t old_start
= 0;
12711 vm_map_offset_t old_end
= 0;
12713 boolean_t mask_protections
;
12714 boolean_t force_copy
;
12715 vm_prot_t original_fault_type
;
12718 * VM_PROT_MASK means that the caller wants us to use "fault_type"
12719 * as a mask against the mapping's actual protections, not as an
12722 mask_protections
= (fault_type
& VM_PROT_IS_MASK
) ? TRUE
: FALSE
;
12723 force_copy
= (fault_type
& VM_PROT_COPY
) ? TRUE
: FALSE
;
12724 fault_type
&= VM_PROT_ALL
;
12725 original_fault_type
= fault_type
;
12730 fault_type
= original_fault_type
;
12733 * If the map has an interesting hint, try it before calling
12734 * full blown lookup routine.
12738 if ((entry
== vm_map_to_entry(map
)) ||
12739 (vaddr
< entry
->vme_start
) || (vaddr
>= entry
->vme_end
)) {
12740 vm_map_entry_t tmp_entry
;
12743 * Entry was either not a valid hint, or the vaddr
12744 * was not contained in the entry, so do a full lookup.
12746 if (!vm_map_lookup_entry(map
, vaddr
, &tmp_entry
)) {
12747 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
12748 vm_map_unlock(cow_sub_map_parent
);
12750 if ((*real_map
!= map
)
12751 && (*real_map
!= cow_sub_map_parent
)) {
12752 vm_map_unlock(*real_map
);
12754 return KERN_INVALID_ADDRESS
;
12759 if (map
== old_map
) {
12760 old_start
= entry
->vme_start
;
12761 old_end
= entry
->vme_end
;
12765 * Handle submaps. Drop lock on upper map, submap is
12770 if (entry
->is_sub_map
) {
12771 vm_map_offset_t local_vaddr
;
12772 vm_map_offset_t end_delta
;
12773 vm_map_offset_t start_delta
;
12774 vm_map_entry_t submap_entry
;
12775 vm_prot_t subentry_protection
;
12776 vm_prot_t subentry_max_protection
;
12777 boolean_t subentry_no_copy_on_read
;
12778 boolean_t mapped_needs_copy
= FALSE
;
12780 local_vaddr
= vaddr
;
12782 if ((entry
->use_pmap
&&
12783 !((fault_type
& VM_PROT_WRITE
) ||
12785 /* if real_map equals map we unlock below */
12786 if ((*real_map
!= map
) &&
12787 (*real_map
!= cow_sub_map_parent
)) {
12788 vm_map_unlock(*real_map
);
12790 *real_map
= VME_SUBMAP(entry
);
12793 if (entry
->needs_copy
&&
12794 ((fault_type
& VM_PROT_WRITE
) ||
12796 if (!mapped_needs_copy
) {
12797 if (vm_map_lock_read_to_write(map
)) {
12798 vm_map_lock_read(map
);
12802 vm_map_lock_read(VME_SUBMAP(entry
));
12803 *var_map
= VME_SUBMAP(entry
);
12804 cow_sub_map_parent
= map
;
12805 /* reset base to map before cow object */
12806 /* this is the map which will accept */
12807 /* the new cow object */
12808 old_start
= entry
->vme_start
;
12809 old_end
= entry
->vme_end
;
12810 cow_parent_vaddr
= vaddr
;
12811 mapped_needs_copy
= TRUE
;
12813 vm_map_lock_read(VME_SUBMAP(entry
));
12814 *var_map
= VME_SUBMAP(entry
);
12815 if ((cow_sub_map_parent
!= map
) &&
12816 (*real_map
!= map
)) {
12817 vm_map_unlock(map
);
12821 vm_map_lock_read(VME_SUBMAP(entry
));
12822 *var_map
= VME_SUBMAP(entry
);
12823 /* leave map locked if it is a target */
12824 /* cow sub_map above otherwise, just */
12825 /* follow the maps down to the object */
12826 /* here we unlock knowing we are not */
12827 /* revisiting the map. */
12828 if ((*real_map
!= map
) && (map
!= cow_sub_map_parent
)) {
12829 vm_map_unlock_read(map
);
12835 /* calculate the offset in the submap for vaddr */
12836 local_vaddr
= (local_vaddr
- entry
->vme_start
) + VME_OFFSET(entry
);
12839 if (!vm_map_lookup_entry(map
, local_vaddr
, &submap_entry
)) {
12840 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
12841 vm_map_unlock(cow_sub_map_parent
);
12843 if ((*real_map
!= map
)
12844 && (*real_map
!= cow_sub_map_parent
)) {
12845 vm_map_unlock(*real_map
);
12848 return KERN_INVALID_ADDRESS
;
12851 /* find the attenuated shadow of the underlying object */
12852 /* on our target map */
12854 /* in english the submap object may extend beyond the */
12855 /* region mapped by the entry or, may only fill a portion */
12856 /* of it. For our purposes, we only care if the object */
12857 /* doesn't fill. In this case the area which will */
12858 /* ultimately be clipped in the top map will only need */
12859 /* to be as big as the portion of the underlying entry */
12860 /* which is mapped */
12861 start_delta
= submap_entry
->vme_start
> VME_OFFSET(entry
) ?
12862 submap_entry
->vme_start
- VME_OFFSET(entry
) : 0;
12865 (VME_OFFSET(entry
) + start_delta
+ (old_end
- old_start
)) <=
12866 submap_entry
->vme_end
?
12867 0 : (VME_OFFSET(entry
) +
12868 (old_end
- old_start
))
12869 - submap_entry
->vme_end
;
12871 old_start
+= start_delta
;
12872 old_end
-= end_delta
;
12874 if (submap_entry
->is_sub_map
) {
12875 entry
= submap_entry
;
12876 vaddr
= local_vaddr
;
12877 goto submap_recurse
;
12880 if (((fault_type
& VM_PROT_WRITE
) ||
12882 && cow_sub_map_parent
) {
12883 vm_object_t sub_object
, copy_object
;
12884 vm_object_offset_t copy_offset
;
12885 vm_map_offset_t local_start
;
12886 vm_map_offset_t local_end
;
12887 boolean_t copied_slowly
= FALSE
;
12889 if (vm_map_lock_read_to_write(map
)) {
12890 vm_map_lock_read(map
);
12891 old_start
-= start_delta
;
12892 old_end
+= end_delta
;
12897 sub_object
= VME_OBJECT(submap_entry
);
12898 if (sub_object
== VM_OBJECT_NULL
) {
12900 vm_object_allocate(
12902 (submap_entry
->vme_end
-
12903 submap_entry
->vme_start
));
12904 VME_OBJECT_SET(submap_entry
, sub_object
);
12905 VME_OFFSET_SET(submap_entry
, 0);
12906 assert(!submap_entry
->is_sub_map
);
12907 assert(submap_entry
->use_pmap
);
12909 local_start
= local_vaddr
-
12910 (cow_parent_vaddr
- old_start
);
12911 local_end
= local_vaddr
+
12912 (old_end
- cow_parent_vaddr
);
12913 vm_map_clip_start(map
, submap_entry
, local_start
);
12914 vm_map_clip_end(map
, submap_entry
, local_end
);
12915 if (submap_entry
->is_sub_map
) {
12916 /* unnesting was done when clipping */
12917 assert(!submap_entry
->use_pmap
);
12920 /* This is the COW case, lets connect */
12921 /* an entry in our space to the underlying */
12922 /* object in the submap, bypassing the */
12926 if (submap_entry
->wired_count
!= 0 ||
12927 (sub_object
->copy_strategy
==
12928 MEMORY_OBJECT_COPY_NONE
)) {
12929 vm_object_lock(sub_object
);
12930 vm_object_copy_slowly(sub_object
,
12931 VME_OFFSET(submap_entry
),
12932 (submap_entry
->vme_end
-
12933 submap_entry
->vme_start
),
12936 copied_slowly
= TRUE
;
12938 /* set up shadow object */
12939 copy_object
= sub_object
;
12940 vm_object_lock(sub_object
);
12941 vm_object_reference_locked(sub_object
);
12942 sub_object
->shadowed
= TRUE
;
12943 vm_object_unlock(sub_object
);
12945 assert(submap_entry
->wired_count
== 0);
12946 submap_entry
->needs_copy
= TRUE
;
12948 prot
= submap_entry
->protection
;
12949 assert(!pmap_has_prot_policy(prot
));
12950 prot
= prot
& ~VM_PROT_WRITE
;
12951 assert(!pmap_has_prot_policy(prot
));
12953 if (override_nx(old_map
,
12954 VME_ALIAS(submap_entry
))
12956 prot
|= VM_PROT_EXECUTE
;
12959 vm_object_pmap_protect(
12961 VME_OFFSET(submap_entry
),
12962 submap_entry
->vme_end
-
12963 submap_entry
->vme_start
,
12964 (submap_entry
->is_shared
12965 || map
->mapped_in_other_pmaps
) ?
12966 PMAP_NULL
: map
->pmap
,
12967 submap_entry
->vme_start
,
12972 * Adjust the fault offset to the submap entry.
12974 copy_offset
= (local_vaddr
-
12975 submap_entry
->vme_start
+
12976 VME_OFFSET(submap_entry
));
12978 /* This works diffently than the */
12979 /* normal submap case. We go back */
12980 /* to the parent of the cow map and*/
12981 /* clip out the target portion of */
12982 /* the sub_map, substituting the */
12983 /* new copy object, */
12985 subentry_protection
= submap_entry
->protection
;
12986 subentry_max_protection
= submap_entry
->max_protection
;
12987 subentry_no_copy_on_read
= submap_entry
->vme_no_copy_on_read
;
12988 vm_map_unlock(map
);
12989 submap_entry
= NULL
; /* not valid after map unlock */
12991 local_start
= old_start
;
12992 local_end
= old_end
;
12993 map
= cow_sub_map_parent
;
12994 *var_map
= cow_sub_map_parent
;
12995 vaddr
= cow_parent_vaddr
;
12996 cow_sub_map_parent
= NULL
;
12998 if (!vm_map_lookup_entry(map
,
13000 vm_object_deallocate(
13002 vm_map_lock_write_to_read(map
);
13003 return KERN_INVALID_ADDRESS
;
13006 /* clip out the portion of space */
13007 /* mapped by the sub map which */
13008 /* corresponds to the underlying */
13012 * Clip (and unnest) the smallest nested chunk
13013 * possible around the faulting address...
13015 local_start
= vaddr
& ~(pmap_nesting_size_min
- 1);
13016 local_end
= local_start
+ pmap_nesting_size_min
;
13018 * ... but don't go beyond the "old_start" to "old_end"
13019 * range, to avoid spanning over another VM region
13020 * with a possibly different VM object and/or offset.
13022 if (local_start
< old_start
) {
13023 local_start
= old_start
;
13025 if (local_end
> old_end
) {
13026 local_end
= old_end
;
13029 * Adjust copy_offset to the start of the range.
13031 copy_offset
-= (vaddr
- local_start
);
13033 vm_map_clip_start(map
, entry
, local_start
);
13034 vm_map_clip_end(map
, entry
, local_end
);
13035 if (entry
->is_sub_map
) {
13036 /* unnesting was done when clipping */
13037 assert(!entry
->use_pmap
);
13040 /* substitute copy object for */
13041 /* shared map entry */
13042 vm_map_deallocate(VME_SUBMAP(entry
));
13043 assert(!entry
->iokit_acct
);
13044 entry
->is_sub_map
= FALSE
;
13045 entry
->use_pmap
= TRUE
;
13046 VME_OBJECT_SET(entry
, copy_object
);
13048 /* propagate the submap entry's protections */
13049 if (entry
->protection
!= VM_PROT_READ
) {
13051 * Someone has already altered the top entry's
13052 * protections via vm_protect(VM_PROT_COPY).
13053 * Respect these new values and ignore the
13054 * submap entry's protections.
13058 * Regular copy-on-write: propagate the submap
13059 * entry's protections to the top map entry.
13061 entry
->protection
|= subentry_protection
;
13063 entry
->max_protection
|= subentry_max_protection
;
13064 /* propagate no_copy_on_read */
13065 entry
->vme_no_copy_on_read
= subentry_no_copy_on_read
;
13067 if ((entry
->protection
& VM_PROT_WRITE
) &&
13068 (entry
->protection
& VM_PROT_EXECUTE
) &&
13069 #if !CONFIG_EMBEDDED
13070 map
!= kernel_map
&&
13071 cs_process_enforcement(NULL
) &&
13072 #endif /* !CONFIG_EMBEDDED */
13073 !(entry
->used_for_jit
)) {
13075 uint64_t, (uint64_t)entry
->vme_start
,
13076 uint64_t, (uint64_t)entry
->vme_end
,
13077 vm_prot_t
, entry
->protection
);
13078 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13080 (current_task()->bsd_info
13081 ? proc_name_address(current_task()->bsd_info
)
13084 entry
->protection
&= ~VM_PROT_EXECUTE
;
13087 if (copied_slowly
) {
13088 VME_OFFSET_SET(entry
, local_start
- old_start
);
13089 entry
->needs_copy
= FALSE
;
13090 entry
->is_shared
= FALSE
;
13092 VME_OFFSET_SET(entry
, copy_offset
);
13093 assert(entry
->wired_count
== 0);
13094 entry
->needs_copy
= TRUE
;
13095 if (entry
->inheritance
== VM_INHERIT_SHARE
) {
13096 entry
->inheritance
= VM_INHERIT_COPY
;
13098 if (map
!= old_map
) {
13099 entry
->is_shared
= TRUE
;
13102 if (entry
->inheritance
== VM_INHERIT_SHARE
) {
13103 entry
->inheritance
= VM_INHERIT_COPY
;
13106 vm_map_lock_write_to_read(map
);
13108 if ((cow_sub_map_parent
)
13109 && (cow_sub_map_parent
!= *real_map
)
13110 && (cow_sub_map_parent
!= map
)) {
13111 vm_map_unlock(cow_sub_map_parent
);
13113 entry
= submap_entry
;
13114 vaddr
= local_vaddr
;
13119 * Check whether this task is allowed to have
13123 prot
= entry
->protection
;
13125 if (override_nx(old_map
, VME_ALIAS(entry
)) && prot
) {
13127 * HACK -- if not a stack, then allow execution
13129 prot
|= VM_PROT_EXECUTE
;
13132 if (mask_protections
) {
13133 fault_type
&= prot
;
13134 if (fault_type
== VM_PROT_NONE
) {
13135 goto protection_failure
;
13138 if (((fault_type
& prot
) != fault_type
)
13140 /* prefetch abort in execute-only page */
13141 && !(prot
== VM_PROT_EXECUTE
&& fault_type
== (VM_PROT_READ
| VM_PROT_EXECUTE
))
13144 protection_failure
:
13145 if (*real_map
!= map
) {
13146 vm_map_unlock(*real_map
);
13150 if ((fault_type
& VM_PROT_EXECUTE
) && prot
) {
13151 log_stack_execution_failure((addr64_t
)vaddr
, prot
);
13154 DTRACE_VM2(prot_fault
, int, 1, (uint64_t *), NULL
);
13155 return KERN_PROTECTION_FAILURE
;
13159 * If this page is not pageable, we have to get
13160 * it for all possible accesses.
13163 *wired
= (entry
->wired_count
!= 0);
13169 * If the entry was copy-on-write, we either ...
13172 if (entry
->needs_copy
) {
13174 * If we want to write the page, we may as well
13175 * handle that now since we've got the map locked.
13177 * If we don't need to write the page, we just
13178 * demote the permissions allowed.
13181 if ((fault_type
& VM_PROT_WRITE
) || *wired
|| force_copy
) {
13183 * Make a new object, and place it in the
13184 * object chain. Note that no new references
13185 * have appeared -- one just moved from the
13186 * map to the new object.
13189 if (vm_map_lock_read_to_write(map
)) {
13190 vm_map_lock_read(map
);
13194 if (VME_OBJECT(entry
)->shadowed
== FALSE
) {
13195 vm_object_lock(VME_OBJECT(entry
));
13196 VME_OBJECT(entry
)->shadowed
= TRUE
;
13197 vm_object_unlock(VME_OBJECT(entry
));
13199 VME_OBJECT_SHADOW(entry
,
13200 (vm_map_size_t
) (entry
->vme_end
-
13201 entry
->vme_start
));
13202 entry
->needs_copy
= FALSE
;
13204 vm_map_lock_write_to_read(map
);
13206 if ((fault_type
& VM_PROT_WRITE
) == 0 && *wired
== 0) {
13208 * We're attempting to read a copy-on-write
13209 * page -- don't allow writes.
13212 prot
&= (~VM_PROT_WRITE
);
13217 * Create an object if necessary.
13219 if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
13220 if (vm_map_lock_read_to_write(map
)) {
13221 vm_map_lock_read(map
);
13225 VME_OBJECT_SET(entry
,
13226 vm_object_allocate(
13227 (vm_map_size_t
)(entry
->vme_end
-
13228 entry
->vme_start
)));
13229 VME_OFFSET_SET(entry
, 0);
13230 assert(entry
->use_pmap
);
13231 vm_map_lock_write_to_read(map
);
13235 * Return the object/offset from this entry. If the entry
13236 * was copy-on-write or empty, it has been fixed up. Also
13237 * return the protection.
13240 *offset
= (vaddr
- entry
->vme_start
) + VME_OFFSET(entry
);
13241 *object
= VME_OBJECT(entry
);
13243 KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET
, VM_MAP_LOOKUP_OBJECT
), VM_KERNEL_UNSLIDE_OR_PERM(*object
), 0, 0, 0, 0);
13246 fault_info
->interruptible
= THREAD_UNINT
; /* for now... */
13247 /* ... the caller will change "interruptible" if needed */
13248 fault_info
->cluster_size
= 0;
13249 fault_info
->user_tag
= VME_ALIAS(entry
);
13250 fault_info
->pmap_options
= 0;
13251 if (entry
->iokit_acct
||
13252 (!entry
->is_sub_map
&& !entry
->use_pmap
)) {
13253 fault_info
->pmap_options
|= PMAP_OPTIONS_ALT_ACCT
;
13255 fault_info
->behavior
= entry
->behavior
;
13256 fault_info
->lo_offset
= VME_OFFSET(entry
);
13257 fault_info
->hi_offset
=
13258 (entry
->vme_end
- entry
->vme_start
) + VME_OFFSET(entry
);
13259 fault_info
->no_cache
= entry
->no_cache
;
13260 fault_info
->stealth
= FALSE
;
13261 fault_info
->io_sync
= FALSE
;
13262 if (entry
->used_for_jit
||
13263 entry
->vme_resilient_codesign
) {
13264 fault_info
->cs_bypass
= TRUE
;
13266 fault_info
->cs_bypass
= FALSE
;
13268 fault_info
->pmap_cs_associated
= FALSE
;
13270 if (entry
->pmap_cs_associated
) {
13272 * The pmap layer will validate this page
13273 * before allowing it to be executed from.
13275 fault_info
->pmap_cs_associated
= TRUE
;
13277 #endif /* CONFIG_PMAP_CS */
13278 fault_info
->mark_zf_absent
= FALSE
;
13279 fault_info
->batch_pmap_op
= FALSE
;
13280 fault_info
->resilient_media
= entry
->vme_resilient_media
;
13281 fault_info
->no_copy_on_read
= entry
->vme_no_copy_on_read
;
13285 * Lock the object to prevent it from disappearing
13287 if (object_lock_type
== OBJECT_LOCK_EXCLUSIVE
) {
13288 vm_object_lock(*object
);
13290 vm_object_lock_shared(*object
);
13294 * Save the version number
13297 out_version
->main_timestamp
= map
->timestamp
;
13299 return KERN_SUCCESS
;
13306 * Verifies that the map in question has not changed
13307 * since the given version. The map has to be locked
13308 * ("shared" mode is fine) before calling this function
13309 * and it will be returned locked too.
13314 vm_map_version_t
*version
) /* REF */
13318 vm_map_lock_assert_held(map
);
13319 result
= (map
->timestamp
== version
->main_timestamp
);
13325 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
13326 * Goes away after regular vm_region_recurse function migrates to
13328 * vm_region_recurse: A form of vm_region which follows the
13329 * submaps in a target map
13334 vm_map_region_recurse_64(
13336 vm_map_offset_t
*address
, /* IN/OUT */
13337 vm_map_size_t
*size
, /* OUT */
13338 natural_t
*nesting_depth
, /* IN/OUT */
13339 vm_region_submap_info_64_t submap_info
, /* IN/OUT */
13340 mach_msg_type_number_t
*count
) /* IN/OUT */
13342 mach_msg_type_number_t original_count
;
13343 vm_region_extended_info_data_t extended
;
13344 vm_map_entry_t tmp_entry
;
13345 vm_map_offset_t user_address
;
13346 unsigned int user_max_depth
;
13349 * "curr_entry" is the VM map entry preceding or including the
13350 * address we're looking for.
13351 * "curr_map" is the map or sub-map containing "curr_entry".
13352 * "curr_address" is the equivalent of the top map's "user_address"
13353 * in the current map.
13354 * "curr_offset" is the cumulated offset of "curr_map" in the
13355 * target task's address space.
13356 * "curr_depth" is the depth of "curr_map" in the chain of
13359 * "curr_max_below" and "curr_max_above" limit the range (around
13360 * "curr_address") we should take into account in the current (sub)map.
13361 * They limit the range to what's visible through the map entries
13362 * we've traversed from the top map to the current map.
13365 vm_map_entry_t curr_entry
;
13366 vm_map_address_t curr_address
;
13367 vm_map_offset_t curr_offset
;
13369 unsigned int curr_depth
;
13370 vm_map_offset_t curr_max_below
, curr_max_above
;
13371 vm_map_offset_t curr_skip
;
13374 * "next_" is the same as "curr_" but for the VM region immediately
13375 * after the address we're looking for. We need to keep track of this
13376 * too because we want to return info about that region if the
13377 * address we're looking for is not mapped.
13379 vm_map_entry_t next_entry
;
13380 vm_map_offset_t next_offset
;
13381 vm_map_offset_t next_address
;
13383 unsigned int next_depth
;
13384 vm_map_offset_t next_max_below
, next_max_above
;
13385 vm_map_offset_t next_skip
;
13387 boolean_t look_for_pages
;
13388 vm_region_submap_short_info_64_t short_info
;
13389 boolean_t do_region_footprint
;
13391 if (map
== VM_MAP_NULL
) {
13392 /* no address space to work on */
13393 return KERN_INVALID_ARGUMENT
;
13397 if (*count
< VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
) {
13399 * "info" structure is not big enough and
13402 return KERN_INVALID_ARGUMENT
;
13405 do_region_footprint
= task_self_region_footprint();
13406 original_count
= *count
;
13408 if (original_count
< VM_REGION_SUBMAP_INFO_V0_COUNT_64
) {
13409 *count
= VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
;
13410 look_for_pages
= FALSE
;
13411 short_info
= (vm_region_submap_short_info_64_t
) submap_info
;
13412 submap_info
= NULL
;
13414 look_for_pages
= TRUE
;
13415 *count
= VM_REGION_SUBMAP_INFO_V0_COUNT_64
;
13418 if (original_count
>= VM_REGION_SUBMAP_INFO_V1_COUNT_64
) {
13419 *count
= VM_REGION_SUBMAP_INFO_V1_COUNT_64
;
13421 if (original_count
>= VM_REGION_SUBMAP_INFO_V2_COUNT_64
) {
13422 *count
= VM_REGION_SUBMAP_INFO_V2_COUNT_64
;
13426 user_address
= *address
;
13427 user_max_depth
= *nesting_depth
;
13430 vm_map_lock_read(map
);
13436 curr_address
= user_address
;
13440 curr_max_above
= ((vm_map_offset_t
) -1) - curr_address
;
13441 curr_max_below
= curr_address
;
13449 next_max_above
= (vm_map_offset_t
) -1;
13450 next_max_below
= (vm_map_offset_t
) -1;
13453 if (vm_map_lookup_entry(curr_map
,
13456 /* tmp_entry contains the address we're looking for */
13457 curr_entry
= tmp_entry
;
13459 vm_map_offset_t skip
;
13461 * The address is not mapped. "tmp_entry" is the
13462 * map entry preceding the address. We want the next
13463 * one, if it exists.
13465 curr_entry
= tmp_entry
->vme_next
;
13467 if (curr_entry
== vm_map_to_entry(curr_map
) ||
13468 (curr_entry
->vme_start
>=
13469 curr_address
+ curr_max_above
)) {
13470 /* no next entry at this level: stop looking */
13472 vm_map_unlock_read(curr_map
);
13479 curr_max_above
= 0;
13480 curr_max_below
= 0;
13484 /* adjust current address and offset */
13485 skip
= curr_entry
->vme_start
- curr_address
;
13486 curr_address
= curr_entry
->vme_start
;
13488 curr_offset
+= skip
;
13489 curr_max_above
-= skip
;
13490 curr_max_below
= 0;
13494 * Is the next entry at this level closer to the address (or
13495 * deeper in the submap chain) than the one we had
13498 tmp_entry
= curr_entry
->vme_next
;
13499 if (tmp_entry
== vm_map_to_entry(curr_map
)) {
13500 /* no next entry at this level */
13501 } else if (tmp_entry
->vme_start
>=
13502 curr_address
+ curr_max_above
) {
13504 * tmp_entry is beyond the scope of what we mapped of
13505 * this submap in the upper level: ignore it.
13507 } else if ((next_entry
== NULL
) ||
13508 (tmp_entry
->vme_start
+ curr_offset
<=
13509 next_entry
->vme_start
+ next_offset
)) {
13511 * We didn't have a "next_entry" or this one is
13512 * closer to the address we're looking for:
13513 * use this "tmp_entry" as the new "next_entry".
13515 if (next_entry
!= NULL
) {
13516 /* unlock the last "next_map" */
13517 if (next_map
!= curr_map
&& not_in_kdp
) {
13518 vm_map_unlock_read(next_map
);
13521 next_entry
= tmp_entry
;
13522 next_map
= curr_map
;
13523 next_depth
= curr_depth
;
13524 next_address
= next_entry
->vme_start
;
13525 next_skip
= curr_skip
;
13526 next_skip
+= (next_address
- curr_address
);
13527 next_offset
= curr_offset
;
13528 next_offset
+= (next_address
- curr_address
);
13529 next_max_above
= MIN(next_max_above
, curr_max_above
);
13530 next_max_above
= MIN(next_max_above
,
13531 next_entry
->vme_end
- next_address
);
13532 next_max_below
= MIN(next_max_below
, curr_max_below
);
13533 next_max_below
= MIN(next_max_below
,
13534 next_address
- next_entry
->vme_start
);
13538 * "curr_max_{above,below}" allow us to keep track of the
13539 * portion of the submap that is actually mapped at this level:
13540 * the rest of that submap is irrelevant to us, since it's not
13542 * The relevant portion of the map starts at
13543 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
13545 curr_max_above
= MIN(curr_max_above
,
13546 curr_entry
->vme_end
- curr_address
);
13547 curr_max_below
= MIN(curr_max_below
,
13548 curr_address
- curr_entry
->vme_start
);
13550 if (!curr_entry
->is_sub_map
||
13551 curr_depth
>= user_max_depth
) {
13553 * We hit a leaf map or we reached the maximum depth
13554 * we could, so stop looking. Keep the current map
13561 * Get down to the next submap level.
13565 * Lock the next level and unlock the current level,
13566 * unless we need to keep it locked to access the "next_entry"
13570 vm_map_lock_read(VME_SUBMAP(curr_entry
));
13572 if (curr_map
== next_map
) {
13573 /* keep "next_map" locked in case we need it */
13575 /* release this map */
13577 vm_map_unlock_read(curr_map
);
13582 * Adjust the offset. "curr_entry" maps the submap
13583 * at relative address "curr_entry->vme_start" in the
13584 * curr_map but skips the first "VME_OFFSET(curr_entry)"
13585 * bytes of the submap.
13586 * "curr_offset" always represents the offset of a virtual
13587 * address in the curr_map relative to the absolute address
13588 * space (i.e. the top-level VM map).
13591 (VME_OFFSET(curr_entry
) - curr_entry
->vme_start
);
13592 curr_address
= user_address
+ curr_offset
;
13593 /* switch to the submap */
13594 curr_map
= VME_SUBMAP(curr_entry
);
13599 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13600 // so probably should be a real 32b ID vs. ptr.
13601 // Current users just check for equality
13603 if (curr_entry
== NULL
) {
13604 /* no VM region contains the address... */
13606 if (do_region_footprint
&& /* we want footprint numbers */
13607 next_entry
== NULL
&& /* & there are no more regions */
13608 /* & we haven't already provided our fake region: */
13609 user_address
<= vm_map_last_entry(map
)->vme_end
) {
13610 ledger_amount_t ledger_resident
, ledger_compressed
;
13613 * Add a fake memory region to account for
13614 * purgeable and/or ledger-tagged memory that
13615 * counts towards this task's memory footprint,
13616 * i.e. the resident/compressed pages of non-volatile
13617 * objects owned by that task.
13619 task_ledgers_footprint(map
->pmap
->ledger
,
13621 &ledger_compressed
);
13622 if (ledger_resident
+ ledger_compressed
== 0) {
13623 /* no purgeable memory usage to report */
13624 return KERN_INVALID_ADDRESS
;
13626 /* fake region to show nonvolatile footprint */
13627 if (look_for_pages
) {
13628 submap_info
->protection
= VM_PROT_DEFAULT
;
13629 submap_info
->max_protection
= VM_PROT_DEFAULT
;
13630 submap_info
->inheritance
= VM_INHERIT_DEFAULT
;
13631 submap_info
->offset
= 0;
13632 submap_info
->user_tag
= -1;
13633 submap_info
->pages_resident
= (unsigned int) (ledger_resident
/ PAGE_SIZE
);
13634 submap_info
->pages_shared_now_private
= 0;
13635 submap_info
->pages_swapped_out
= (unsigned int) (ledger_compressed
/ PAGE_SIZE
);
13636 submap_info
->pages_dirtied
= submap_info
->pages_resident
;
13637 submap_info
->ref_count
= 1;
13638 submap_info
->shadow_depth
= 0;
13639 submap_info
->external_pager
= 0;
13640 submap_info
->share_mode
= SM_PRIVATE
;
13641 submap_info
->is_submap
= 0;
13642 submap_info
->behavior
= VM_BEHAVIOR_DEFAULT
;
13643 submap_info
->object_id
= INFO_MAKE_FAKE_OBJECT_ID(map
, task_ledgers
.purgeable_nonvolatile
);
13644 submap_info
->user_wired_count
= 0;
13645 submap_info
->pages_reusable
= 0;
13647 short_info
->user_tag
= -1;
13648 short_info
->offset
= 0;
13649 short_info
->protection
= VM_PROT_DEFAULT
;
13650 short_info
->inheritance
= VM_INHERIT_DEFAULT
;
13651 short_info
->max_protection
= VM_PROT_DEFAULT
;
13652 short_info
->behavior
= VM_BEHAVIOR_DEFAULT
;
13653 short_info
->user_wired_count
= 0;
13654 short_info
->is_submap
= 0;
13655 short_info
->object_id
= INFO_MAKE_FAKE_OBJECT_ID(map
, task_ledgers
.purgeable_nonvolatile
);
13656 short_info
->external_pager
= 0;
13657 short_info
->shadow_depth
= 0;
13658 short_info
->share_mode
= SM_PRIVATE
;
13659 short_info
->ref_count
= 1;
13661 *nesting_depth
= 0;
13662 *size
= (vm_map_size_t
) (ledger_resident
+ ledger_compressed
);
13663 // *address = user_address;
13664 *address
= vm_map_last_entry(map
)->vme_end
;
13665 return KERN_SUCCESS
;
13668 if (next_entry
== NULL
) {
13669 /* ... and no VM region follows it either */
13670 return KERN_INVALID_ADDRESS
;
13672 /* ... gather info about the next VM region */
13673 curr_entry
= next_entry
;
13674 curr_map
= next_map
; /* still locked ... */
13675 curr_address
= next_address
;
13676 curr_skip
= next_skip
;
13677 curr_offset
= next_offset
;
13678 curr_depth
= next_depth
;
13679 curr_max_above
= next_max_above
;
13680 curr_max_below
= next_max_below
;
13682 /* we won't need "next_entry" after all */
13683 if (next_entry
!= NULL
) {
13684 /* release "next_map" */
13685 if (next_map
!= curr_map
&& not_in_kdp
) {
13686 vm_map_unlock_read(next_map
);
13695 next_max_below
= -1;
13696 next_max_above
= -1;
13698 if (curr_entry
->is_sub_map
&&
13699 curr_depth
< user_max_depth
) {
13701 * We're not as deep as we could be: we must have
13702 * gone back up after not finding anything mapped
13703 * below the original top-level map entry's.
13704 * Let's move "curr_address" forward and recurse again.
13706 user_address
= curr_address
;
13707 goto recurse_again
;
13710 *nesting_depth
= curr_depth
;
13711 *size
= curr_max_above
+ curr_max_below
;
13712 *address
= user_address
+ curr_skip
- curr_max_below
;
13714 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13715 // so probably should be a real 32b ID vs. ptr.
13716 // Current users just check for equality
13717 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
13719 if (look_for_pages
) {
13720 submap_info
->user_tag
= VME_ALIAS(curr_entry
);
13721 submap_info
->offset
= VME_OFFSET(curr_entry
);
13722 submap_info
->protection
= curr_entry
->protection
;
13723 submap_info
->inheritance
= curr_entry
->inheritance
;
13724 submap_info
->max_protection
= curr_entry
->max_protection
;
13725 submap_info
->behavior
= curr_entry
->behavior
;
13726 submap_info
->user_wired_count
= curr_entry
->user_wired_count
;
13727 submap_info
->is_submap
= curr_entry
->is_sub_map
;
13728 submap_info
->object_id
= INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry
));
13730 short_info
->user_tag
= VME_ALIAS(curr_entry
);
13731 short_info
->offset
= VME_OFFSET(curr_entry
);
13732 short_info
->protection
= curr_entry
->protection
;
13733 short_info
->inheritance
= curr_entry
->inheritance
;
13734 short_info
->max_protection
= curr_entry
->max_protection
;
13735 short_info
->behavior
= curr_entry
->behavior
;
13736 short_info
->user_wired_count
= curr_entry
->user_wired_count
;
13737 short_info
->is_submap
= curr_entry
->is_sub_map
;
13738 short_info
->object_id
= INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry
));
13741 extended
.pages_resident
= 0;
13742 extended
.pages_swapped_out
= 0;
13743 extended
.pages_shared_now_private
= 0;
13744 extended
.pages_dirtied
= 0;
13745 extended
.pages_reusable
= 0;
13746 extended
.external_pager
= 0;
13747 extended
.shadow_depth
= 0;
13748 extended
.share_mode
= SM_EMPTY
;
13749 extended
.ref_count
= 0;
13752 if (!curr_entry
->is_sub_map
) {
13753 vm_map_offset_t range_start
, range_end
;
13754 range_start
= MAX((curr_address
- curr_max_below
),
13755 curr_entry
->vme_start
);
13756 range_end
= MIN((curr_address
+ curr_max_above
),
13757 curr_entry
->vme_end
);
13758 vm_map_region_walk(curr_map
,
13761 (VME_OFFSET(curr_entry
) +
13763 curr_entry
->vme_start
)),
13764 range_end
- range_start
,
13766 look_for_pages
, VM_REGION_EXTENDED_INFO_COUNT
);
13767 if (extended
.external_pager
&&
13768 extended
.ref_count
== 2 &&
13769 extended
.share_mode
== SM_SHARED
) {
13770 extended
.share_mode
= SM_PRIVATE
;
13773 if (curr_entry
->use_pmap
) {
13774 extended
.share_mode
= SM_TRUESHARED
;
13776 extended
.share_mode
= SM_PRIVATE
;
13778 extended
.ref_count
= os_ref_get_count(&VME_SUBMAP(curr_entry
)->map_refcnt
);
13782 if (look_for_pages
) {
13783 submap_info
->pages_resident
= extended
.pages_resident
;
13784 submap_info
->pages_swapped_out
= extended
.pages_swapped_out
;
13785 submap_info
->pages_shared_now_private
=
13786 extended
.pages_shared_now_private
;
13787 submap_info
->pages_dirtied
= extended
.pages_dirtied
;
13788 submap_info
->external_pager
= extended
.external_pager
;
13789 submap_info
->shadow_depth
= extended
.shadow_depth
;
13790 submap_info
->share_mode
= extended
.share_mode
;
13791 submap_info
->ref_count
= extended
.ref_count
;
13793 if (original_count
>= VM_REGION_SUBMAP_INFO_V1_COUNT_64
) {
13794 submap_info
->pages_reusable
= extended
.pages_reusable
;
13796 if (original_count
>= VM_REGION_SUBMAP_INFO_V2_COUNT_64
) {
13797 submap_info
->object_id_full
= (vm_object_id_t
) (VME_OBJECT(curr_entry
) != NULL
) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry
)) : 0ULL;
13800 short_info
->external_pager
= extended
.external_pager
;
13801 short_info
->shadow_depth
= extended
.shadow_depth
;
13802 short_info
->share_mode
= extended
.share_mode
;
13803 short_info
->ref_count
= extended
.ref_count
;
13807 vm_map_unlock_read(curr_map
);
13810 return KERN_SUCCESS
;
13816 * User call to obtain information about a region in
13817 * a task's address map. Currently, only one flavor is
13820 * XXX The reserved and behavior fields cannot be filled
13821 * in until the vm merge from the IK is completed, and
13822 * vm_reserve is implemented.
13828 vm_map_offset_t
*address
, /* IN/OUT */
13829 vm_map_size_t
*size
, /* OUT */
13830 vm_region_flavor_t flavor
, /* IN */
13831 vm_region_info_t info
, /* OUT */
13832 mach_msg_type_number_t
*count
, /* IN/OUT */
13833 mach_port_t
*object_name
) /* OUT */
13835 vm_map_entry_t tmp_entry
;
13836 vm_map_entry_t entry
;
13837 vm_map_offset_t start
;
13839 if (map
== VM_MAP_NULL
) {
13840 return KERN_INVALID_ARGUMENT
;
13844 case VM_REGION_BASIC_INFO
:
13845 /* legacy for old 32-bit objects info */
13847 vm_region_basic_info_t basic
;
13849 if (*count
< VM_REGION_BASIC_INFO_COUNT
) {
13850 return KERN_INVALID_ARGUMENT
;
13853 basic
= (vm_region_basic_info_t
) info
;
13854 *count
= VM_REGION_BASIC_INFO_COUNT
;
13856 vm_map_lock_read(map
);
13859 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
13860 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
13861 vm_map_unlock_read(map
);
13862 return KERN_INVALID_ADDRESS
;
13868 start
= entry
->vme_start
;
13870 basic
->offset
= (uint32_t)VME_OFFSET(entry
);
13871 basic
->protection
= entry
->protection
;
13872 basic
->inheritance
= entry
->inheritance
;
13873 basic
->max_protection
= entry
->max_protection
;
13874 basic
->behavior
= entry
->behavior
;
13875 basic
->user_wired_count
= entry
->user_wired_count
;
13876 basic
->reserved
= entry
->is_sub_map
;
13878 *size
= (entry
->vme_end
- start
);
13881 *object_name
= IP_NULL
;
13883 if (entry
->is_sub_map
) {
13884 basic
->shared
= FALSE
;
13886 basic
->shared
= entry
->is_shared
;
13889 vm_map_unlock_read(map
);
13890 return KERN_SUCCESS
;
13893 case VM_REGION_BASIC_INFO_64
:
13895 vm_region_basic_info_64_t basic
;
13897 if (*count
< VM_REGION_BASIC_INFO_COUNT_64
) {
13898 return KERN_INVALID_ARGUMENT
;
13901 basic
= (vm_region_basic_info_64_t
) info
;
13902 *count
= VM_REGION_BASIC_INFO_COUNT_64
;
13904 vm_map_lock_read(map
);
13907 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
13908 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
13909 vm_map_unlock_read(map
);
13910 return KERN_INVALID_ADDRESS
;
13916 start
= entry
->vme_start
;
13918 basic
->offset
= VME_OFFSET(entry
);
13919 basic
->protection
= entry
->protection
;
13920 basic
->inheritance
= entry
->inheritance
;
13921 basic
->max_protection
= entry
->max_protection
;
13922 basic
->behavior
= entry
->behavior
;
13923 basic
->user_wired_count
= entry
->user_wired_count
;
13924 basic
->reserved
= entry
->is_sub_map
;
13926 *size
= (entry
->vme_end
- start
);
13929 *object_name
= IP_NULL
;
13931 if (entry
->is_sub_map
) {
13932 basic
->shared
= FALSE
;
13934 basic
->shared
= entry
->is_shared
;
13937 vm_map_unlock_read(map
);
13938 return KERN_SUCCESS
;
13940 case VM_REGION_EXTENDED_INFO
:
13941 if (*count
< VM_REGION_EXTENDED_INFO_COUNT
) {
13942 return KERN_INVALID_ARGUMENT
;
13945 case VM_REGION_EXTENDED_INFO__legacy
:
13946 if (*count
< VM_REGION_EXTENDED_INFO_COUNT__legacy
) {
13947 return KERN_INVALID_ARGUMENT
;
13951 vm_region_extended_info_t extended
;
13952 mach_msg_type_number_t original_count
;
13954 extended
= (vm_region_extended_info_t
) info
;
13956 vm_map_lock_read(map
);
13959 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
13960 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
13961 vm_map_unlock_read(map
);
13962 return KERN_INVALID_ADDRESS
;
13967 start
= entry
->vme_start
;
13969 extended
->protection
= entry
->protection
;
13970 extended
->user_tag
= VME_ALIAS(entry
);
13971 extended
->pages_resident
= 0;
13972 extended
->pages_swapped_out
= 0;
13973 extended
->pages_shared_now_private
= 0;
13974 extended
->pages_dirtied
= 0;
13975 extended
->external_pager
= 0;
13976 extended
->shadow_depth
= 0;
13978 original_count
= *count
;
13979 if (flavor
== VM_REGION_EXTENDED_INFO__legacy
) {
13980 *count
= VM_REGION_EXTENDED_INFO_COUNT__legacy
;
13982 extended
->pages_reusable
= 0;
13983 *count
= VM_REGION_EXTENDED_INFO_COUNT
;
13986 vm_map_region_walk(map
, start
, entry
, VME_OFFSET(entry
), entry
->vme_end
- start
, extended
, TRUE
, *count
);
13988 if (extended
->external_pager
&& extended
->ref_count
== 2 && extended
->share_mode
== SM_SHARED
) {
13989 extended
->share_mode
= SM_PRIVATE
;
13993 *object_name
= IP_NULL
;
13996 *size
= (entry
->vme_end
- start
);
13998 vm_map_unlock_read(map
);
13999 return KERN_SUCCESS
;
14001 case VM_REGION_TOP_INFO
:
14003 vm_region_top_info_t top
;
14005 if (*count
< VM_REGION_TOP_INFO_COUNT
) {
14006 return KERN_INVALID_ARGUMENT
;
14009 top
= (vm_region_top_info_t
) info
;
14010 *count
= VM_REGION_TOP_INFO_COUNT
;
14012 vm_map_lock_read(map
);
14015 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
14016 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
14017 vm_map_unlock_read(map
);
14018 return KERN_INVALID_ADDRESS
;
14023 start
= entry
->vme_start
;
14025 top
->private_pages_resident
= 0;
14026 top
->shared_pages_resident
= 0;
14028 vm_map_region_top_walk(entry
, top
);
14031 *object_name
= IP_NULL
;
14034 *size
= (entry
->vme_end
- start
);
14036 vm_map_unlock_read(map
);
14037 return KERN_SUCCESS
;
14040 return KERN_INVALID_ARGUMENT
;
14044 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
14045 MIN((entry_size), \
14046 ((obj)->all_reusable ? \
14047 (obj)->wired_page_count : \
14048 (obj)->resident_page_count - (obj)->reusable_page_count))
14051 vm_map_region_top_walk(
14052 vm_map_entry_t entry
,
14053 vm_region_top_info_t top
)
14055 if (VME_OBJECT(entry
) == 0 || entry
->is_sub_map
) {
14056 top
->share_mode
= SM_EMPTY
;
14057 top
->ref_count
= 0;
14063 struct vm_object
*obj
, *tmp_obj
;
14065 uint32_t entry_size
;
14067 entry_size
= (uint32_t) ((entry
->vme_end
- entry
->vme_start
) / PAGE_SIZE_64
);
14069 obj
= VME_OBJECT(entry
);
14071 vm_object_lock(obj
);
14073 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14077 assert(obj
->reusable_page_count
<= obj
->resident_page_count
);
14079 if (ref_count
== 1) {
14080 top
->private_pages_resident
=
14081 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14083 top
->shared_pages_resident
=
14084 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14086 top
->ref_count
= ref_count
;
14087 top
->share_mode
= SM_COW
;
14089 while ((tmp_obj
= obj
->shadow
)) {
14090 vm_object_lock(tmp_obj
);
14091 vm_object_unlock(obj
);
14094 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14098 assert(obj
->reusable_page_count
<= obj
->resident_page_count
);
14099 top
->shared_pages_resident
+=
14100 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14101 top
->ref_count
+= ref_count
- 1;
14104 if (entry
->superpage_size
) {
14105 top
->share_mode
= SM_LARGE_PAGE
;
14106 top
->shared_pages_resident
= 0;
14107 top
->private_pages_resident
= entry_size
;
14108 } else if (entry
->needs_copy
) {
14109 top
->share_mode
= SM_COW
;
14110 top
->shared_pages_resident
=
14111 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14113 if (ref_count
== 1 ||
14114 (ref_count
== 2 && obj
->named
)) {
14115 top
->share_mode
= SM_PRIVATE
;
14116 top
->private_pages_resident
=
14117 OBJ_RESIDENT_COUNT(obj
,
14120 top
->share_mode
= SM_SHARED
;
14121 top
->shared_pages_resident
=
14122 OBJ_RESIDENT_COUNT(obj
,
14126 top
->ref_count
= ref_count
;
14128 /* XXX K64: obj_id will be truncated */
14129 top
->obj_id
= (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj
);
14131 vm_object_unlock(obj
);
14136 vm_map_region_walk(
14138 vm_map_offset_t va
,
14139 vm_map_entry_t entry
,
14140 vm_object_offset_t offset
,
14141 vm_object_size_t range
,
14142 vm_region_extended_info_t extended
,
14143 boolean_t look_for_pages
,
14144 mach_msg_type_number_t count
)
14146 struct vm_object
*obj
, *tmp_obj
;
14147 vm_map_offset_t last_offset
;
14150 struct vm_object
*shadow_object
;
14152 boolean_t do_region_footprint
;
14154 do_region_footprint
= task_self_region_footprint();
14156 if ((VME_OBJECT(entry
) == 0) ||
14157 (entry
->is_sub_map
) ||
14158 (VME_OBJECT(entry
)->phys_contiguous
&&
14159 !entry
->superpage_size
)) {
14160 extended
->share_mode
= SM_EMPTY
;
14161 extended
->ref_count
= 0;
14165 if (entry
->superpage_size
) {
14166 extended
->shadow_depth
= 0;
14167 extended
->share_mode
= SM_LARGE_PAGE
;
14168 extended
->ref_count
= 1;
14169 extended
->external_pager
= 0;
14170 extended
->pages_resident
= (unsigned int)(range
>> PAGE_SHIFT
);
14171 extended
->shadow_depth
= 0;
14175 obj
= VME_OBJECT(entry
);
14177 vm_object_lock(obj
);
14179 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14183 if (look_for_pages
) {
14184 for (last_offset
= offset
+ range
;
14185 offset
< last_offset
;
14186 offset
+= PAGE_SIZE_64
, va
+= PAGE_SIZE
) {
14187 if (do_region_footprint
) {
14191 if (map
->has_corpse_footprint
) {
14193 * Query the page info data we saved
14194 * while forking the corpse.
14196 vm_map_corpse_footprint_query_page_info(
14204 pmap_query_page_info(map
->pmap
,
14208 if (disp
& PMAP_QUERY_PAGE_PRESENT
) {
14209 if (!(disp
& PMAP_QUERY_PAGE_ALTACCT
)) {
14210 extended
->pages_resident
++;
14212 if (disp
& PMAP_QUERY_PAGE_REUSABLE
) {
14213 extended
->pages_reusable
++;
14214 } else if (!(disp
& PMAP_QUERY_PAGE_INTERNAL
) ||
14215 (disp
& PMAP_QUERY_PAGE_ALTACCT
)) {
14216 /* alternate accounting */
14218 extended
->pages_dirtied
++;
14220 } else if (disp
& PMAP_QUERY_PAGE_COMPRESSED
) {
14221 if (disp
& PMAP_QUERY_PAGE_COMPRESSED_ALTACCT
) {
14222 /* alternate accounting */
14224 extended
->pages_swapped_out
++;
14227 /* deal with alternate accounting */
14228 if (obj
->purgable
== VM_PURGABLE_NONVOLATILE
&&
14229 /* && not tagged as no-footprint? */
14230 VM_OBJECT_OWNER(obj
) != NULL
&&
14231 VM_OBJECT_OWNER(obj
)->map
== map
) {
14234 + VME_OFFSET(entry
))
14236 (obj
->resident_page_count
+
14237 vm_compressor_pager_get_count(obj
->pager
)))) {
14239 * Non-volatile purgeable object owned
14240 * by this task: report the first
14241 * "#resident + #compressed" pages as
14242 * "resident" (to show that they
14243 * contribute to the footprint) but not
14244 * "dirty" (to avoid double-counting
14245 * with the fake "non-volatile" region
14246 * we'll report at the end of the
14247 * address space to account for all
14248 * (mapped or not) non-volatile memory
14249 * owned by this task.
14251 extended
->pages_resident
++;
14253 } else if ((obj
->purgable
== VM_PURGABLE_VOLATILE
||
14254 obj
->purgable
== VM_PURGABLE_EMPTY
) &&
14255 /* && not tagged as no-footprint? */
14256 VM_OBJECT_OWNER(obj
) != NULL
&&
14257 VM_OBJECT_OWNER(obj
)->map
== map
) {
14260 + VME_OFFSET(entry
))
14262 obj
->wired_page_count
)) {
14264 * Volatile|empty purgeable object owned
14265 * by this task: report the first
14266 * "#wired" pages as "resident" (to
14267 * show that they contribute to the
14268 * footprint) but not "dirty" (to avoid
14269 * double-counting with the fake
14270 * "non-volatile" region we'll report
14271 * at the end of the address space to
14272 * account for all (mapped or not)
14273 * non-volatile memory owned by this
14276 extended
->pages_resident
++;
14278 } else if (obj
->purgable
!= VM_PURGABLE_DENY
) {
14280 * Pages from purgeable objects
14281 * will be reported as dirty
14282 * appropriately in an extra
14283 * fake memory region at the end of
14284 * the address space.
14286 } else if (entry
->iokit_acct
) {
14288 * IOKit mappings are considered
14289 * as fully dirty for footprint's
14292 extended
->pages_dirtied
++;
14297 vm_map_region_look_for_page(map
, va
, obj
,
14299 0, extended
, count
);
14302 if (do_region_footprint
) {
14303 goto collect_object_info
;
14306 collect_object_info
:
14307 shadow_object
= obj
->shadow
;
14310 if (!(obj
->internal
)) {
14311 extended
->external_pager
= 1;
14314 if (shadow_object
!= VM_OBJECT_NULL
) {
14315 vm_object_lock(shadow_object
);
14317 shadow_object
!= VM_OBJECT_NULL
;
14319 vm_object_t next_shadow
;
14321 if (!(shadow_object
->internal
)) {
14322 extended
->external_pager
= 1;
14325 next_shadow
= shadow_object
->shadow
;
14327 vm_object_lock(next_shadow
);
14329 vm_object_unlock(shadow_object
);
14330 shadow_object
= next_shadow
;
14333 extended
->shadow_depth
= shadow_depth
;
14336 if (extended
->shadow_depth
|| entry
->needs_copy
) {
14337 extended
->share_mode
= SM_COW
;
14339 if (ref_count
== 1) {
14340 extended
->share_mode
= SM_PRIVATE
;
14342 if (obj
->true_share
) {
14343 extended
->share_mode
= SM_TRUESHARED
;
14345 extended
->share_mode
= SM_SHARED
;
14349 extended
->ref_count
= ref_count
- extended
->shadow_depth
;
14351 for (i
= 0; i
< extended
->shadow_depth
; i
++) {
14352 if ((tmp_obj
= obj
->shadow
) == 0) {
14355 vm_object_lock(tmp_obj
);
14356 vm_object_unlock(obj
);
14358 if ((ref_count
= tmp_obj
->ref_count
) > 1 && tmp_obj
->paging_in_progress
) {
14362 extended
->ref_count
+= ref_count
;
14365 vm_object_unlock(obj
);
14367 if (extended
->share_mode
== SM_SHARED
) {
14368 vm_map_entry_t cur
;
14369 vm_map_entry_t last
;
14372 obj
= VME_OBJECT(entry
);
14373 last
= vm_map_to_entry(map
);
14376 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14379 for (cur
= vm_map_first_entry(map
); cur
!= last
; cur
= cur
->vme_next
) {
14380 my_refs
+= vm_map_region_count_obj_refs(cur
, obj
);
14383 if (my_refs
== ref_count
) {
14384 extended
->share_mode
= SM_PRIVATE_ALIASED
;
14385 } else if (my_refs
> 1) {
14386 extended
->share_mode
= SM_SHARED_ALIASED
;
14392 /* object is locked on entry and locked on return */
14396 vm_map_region_look_for_page(
14397 __unused vm_map_t map
,
14398 __unused vm_map_offset_t va
,
14399 vm_object_t object
,
14400 vm_object_offset_t offset
,
14403 vm_region_extended_info_t extended
,
14404 mach_msg_type_number_t count
)
14407 vm_object_t shadow
;
14409 vm_object_t caller_object
;
14411 shadow
= object
->shadow
;
14412 caller_object
= object
;
14416 if (!(object
->internal
)) {
14417 extended
->external_pager
= 1;
14420 if ((p
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
14421 if (shadow
&& (max_refcnt
== 1)) {
14422 extended
->pages_shared_now_private
++;
14425 if (!p
->vmp_fictitious
&&
14426 (p
->vmp_dirty
|| pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p
)))) {
14427 extended
->pages_dirtied
++;
14428 } else if (count
>= VM_REGION_EXTENDED_INFO_COUNT
) {
14429 if (p
->vmp_reusable
|| object
->all_reusable
) {
14430 extended
->pages_reusable
++;
14434 extended
->pages_resident
++;
14436 if (object
!= caller_object
) {
14437 vm_object_unlock(object
);
14442 if (object
->internal
&&
14444 !object
->terminating
&&
14445 object
->pager_ready
) {
14446 if (VM_COMPRESSOR_PAGER_STATE_GET(object
, offset
)
14447 == VM_EXTERNAL_STATE_EXISTS
) {
14448 /* the pager has that page */
14449 extended
->pages_swapped_out
++;
14450 if (object
!= caller_object
) {
14451 vm_object_unlock(object
);
14458 vm_object_lock(shadow
);
14460 if ((ref_count
= shadow
->ref_count
) > 1 && shadow
->paging_in_progress
) {
14464 if (++depth
> extended
->shadow_depth
) {
14465 extended
->shadow_depth
= depth
;
14468 if (ref_count
> max_refcnt
) {
14469 max_refcnt
= ref_count
;
14472 if (object
!= caller_object
) {
14473 vm_object_unlock(object
);
14476 offset
= offset
+ object
->vo_shadow_offset
;
14478 shadow
= object
->shadow
;
14481 if (object
!= caller_object
) {
14482 vm_object_unlock(object
);
14489 vm_map_region_count_obj_refs(
14490 vm_map_entry_t entry
,
14491 vm_object_t object
)
14494 vm_object_t chk_obj
;
14495 vm_object_t tmp_obj
;
14497 if (VME_OBJECT(entry
) == 0) {
14501 if (entry
->is_sub_map
) {
14506 chk_obj
= VME_OBJECT(entry
);
14507 vm_object_lock(chk_obj
);
14510 if (chk_obj
== object
) {
14513 tmp_obj
= chk_obj
->shadow
;
14515 vm_object_lock(tmp_obj
);
14517 vm_object_unlock(chk_obj
);
14527 * Routine: vm_map_simplify
14530 * Attempt to simplify the map representation in
14531 * the vicinity of the given starting address.
14533 * This routine is intended primarily to keep the
14534 * kernel maps more compact -- they generally don't
14535 * benefit from the "expand a map entry" technology
14536 * at allocation time because the adjacent entry
14537 * is often wired down.
14540 vm_map_simplify_entry(
14542 vm_map_entry_t this_entry
)
14544 vm_map_entry_t prev_entry
;
14546 counter(c_vm_map_simplify_entry_called
++);
14548 prev_entry
= this_entry
->vme_prev
;
14550 if ((this_entry
!= vm_map_to_entry(map
)) &&
14551 (prev_entry
!= vm_map_to_entry(map
)) &&
14553 (prev_entry
->vme_end
== this_entry
->vme_start
) &&
14555 (prev_entry
->is_sub_map
== this_entry
->is_sub_map
) &&
14556 (VME_OBJECT(prev_entry
) == VME_OBJECT(this_entry
)) &&
14557 ((VME_OFFSET(prev_entry
) + (prev_entry
->vme_end
-
14558 prev_entry
->vme_start
))
14559 == VME_OFFSET(this_entry
)) &&
14561 (prev_entry
->behavior
== this_entry
->behavior
) &&
14562 (prev_entry
->needs_copy
== this_entry
->needs_copy
) &&
14563 (prev_entry
->protection
== this_entry
->protection
) &&
14564 (prev_entry
->max_protection
== this_entry
->max_protection
) &&
14565 (prev_entry
->inheritance
== this_entry
->inheritance
) &&
14566 (prev_entry
->use_pmap
== this_entry
->use_pmap
) &&
14567 (VME_ALIAS(prev_entry
) == VME_ALIAS(this_entry
)) &&
14568 (prev_entry
->no_cache
== this_entry
->no_cache
) &&
14569 (prev_entry
->permanent
== this_entry
->permanent
) &&
14570 (prev_entry
->map_aligned
== this_entry
->map_aligned
) &&
14571 (prev_entry
->zero_wired_pages
== this_entry
->zero_wired_pages
) &&
14572 (prev_entry
->used_for_jit
== this_entry
->used_for_jit
) &&
14573 (prev_entry
->pmap_cs_associated
== this_entry
->pmap_cs_associated
) &&
14574 /* from_reserved_zone: OK if that field doesn't match */
14575 (prev_entry
->iokit_acct
== this_entry
->iokit_acct
) &&
14576 (prev_entry
->vme_resilient_codesign
==
14577 this_entry
->vme_resilient_codesign
) &&
14578 (prev_entry
->vme_resilient_media
==
14579 this_entry
->vme_resilient_media
) &&
14580 (prev_entry
->vme_no_copy_on_read
== this_entry
->vme_no_copy_on_read
) &&
14582 (prev_entry
->wired_count
== this_entry
->wired_count
) &&
14583 (prev_entry
->user_wired_count
== this_entry
->user_wired_count
) &&
14585 ((prev_entry
->vme_atomic
== FALSE
) && (this_entry
->vme_atomic
== FALSE
)) &&
14586 (prev_entry
->in_transition
== FALSE
) &&
14587 (this_entry
->in_transition
== FALSE
) &&
14588 (prev_entry
->needs_wakeup
== FALSE
) &&
14589 (this_entry
->needs_wakeup
== FALSE
) &&
14590 (prev_entry
->is_shared
== FALSE
) &&
14591 (this_entry
->is_shared
== FALSE
) &&
14592 (prev_entry
->superpage_size
== FALSE
) &&
14593 (this_entry
->superpage_size
== FALSE
)
14595 vm_map_store_entry_unlink(map
, prev_entry
);
14596 assert(prev_entry
->vme_start
< this_entry
->vme_end
);
14597 if (prev_entry
->map_aligned
) {
14598 assert(VM_MAP_PAGE_ALIGNED(prev_entry
->vme_start
,
14599 VM_MAP_PAGE_MASK(map
)));
14601 this_entry
->vme_start
= prev_entry
->vme_start
;
14602 VME_OFFSET_SET(this_entry
, VME_OFFSET(prev_entry
));
14604 if (map
->holelistenabled
) {
14605 vm_map_store_update_first_free(map
, this_entry
, TRUE
);
14608 if (prev_entry
->is_sub_map
) {
14609 vm_map_deallocate(VME_SUBMAP(prev_entry
));
14611 vm_object_deallocate(VME_OBJECT(prev_entry
));
14613 vm_map_entry_dispose(map
, prev_entry
);
14614 SAVE_HINT_MAP_WRITE(map
, this_entry
);
14615 counter(c_vm_map_simplified
++);
14622 vm_map_offset_t start
)
14624 vm_map_entry_t this_entry
;
14627 if (vm_map_lookup_entry(map
, start
, &this_entry
)) {
14628 vm_map_simplify_entry(map
, this_entry
);
14629 vm_map_simplify_entry(map
, this_entry
->vme_next
);
14631 counter(c_vm_map_simplify_called
++);
14632 vm_map_unlock(map
);
14636 vm_map_simplify_range(
14638 vm_map_offset_t start
,
14639 vm_map_offset_t end
)
14641 vm_map_entry_t entry
;
14644 * The map should be locked (for "write") by the caller.
14647 if (start
>= end
) {
14648 /* invalid address range */
14652 start
= vm_map_trunc_page(start
,
14653 VM_MAP_PAGE_MASK(map
));
14654 end
= vm_map_round_page(end
,
14655 VM_MAP_PAGE_MASK(map
));
14657 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
14658 /* "start" is not mapped and "entry" ends before "start" */
14659 if (entry
== vm_map_to_entry(map
)) {
14660 /* start with first entry in the map */
14661 entry
= vm_map_first_entry(map
);
14663 /* start with next entry */
14664 entry
= entry
->vme_next
;
14668 while (entry
!= vm_map_to_entry(map
) &&
14669 entry
->vme_start
<= end
) {
14670 /* try and coalesce "entry" with its previous entry */
14671 vm_map_simplify_entry(map
, entry
);
14672 entry
= entry
->vme_next
;
14678 * Routine: vm_map_machine_attribute
14680 * Provide machine-specific attributes to mappings,
14681 * such as cachability etc. for machines that provide
14682 * them. NUMA architectures and machines with big/strange
14683 * caches will use this.
14685 * Responsibilities for locking and checking are handled here,
14686 * everything else in the pmap module. If any non-volatile
14687 * information must be kept, the pmap module should handle
14688 * it itself. [This assumes that attributes do not
14689 * need to be inherited, which seems ok to me]
14692 vm_map_machine_attribute(
14694 vm_map_offset_t start
,
14695 vm_map_offset_t end
,
14696 vm_machine_attribute_t attribute
,
14697 vm_machine_attribute_val_t
* value
) /* IN/OUT */
14700 vm_map_size_t sync_size
;
14701 vm_map_entry_t entry
;
14703 if (start
< vm_map_min(map
) || end
> vm_map_max(map
)) {
14704 return KERN_INVALID_ADDRESS
;
14707 /* Figure how much memory we need to flush (in page increments) */
14708 sync_size
= end
- start
;
14712 if (attribute
!= MATTR_CACHE
) {
14713 /* If we don't have to find physical addresses, we */
14714 /* don't have to do an explicit traversal here. */
14715 ret
= pmap_attribute(map
->pmap
, start
, end
- start
,
14717 vm_map_unlock(map
);
14721 ret
= KERN_SUCCESS
; /* Assume it all worked */
14723 while (sync_size
) {
14724 if (vm_map_lookup_entry(map
, start
, &entry
)) {
14725 vm_map_size_t sub_size
;
14726 if ((entry
->vme_end
- start
) > sync_size
) {
14727 sub_size
= sync_size
;
14730 sub_size
= entry
->vme_end
- start
;
14731 sync_size
-= sub_size
;
14733 if (entry
->is_sub_map
) {
14734 vm_map_offset_t sub_start
;
14735 vm_map_offset_t sub_end
;
14737 sub_start
= (start
- entry
->vme_start
)
14738 + VME_OFFSET(entry
);
14739 sub_end
= sub_start
+ sub_size
;
14740 vm_map_machine_attribute(
14746 if (VME_OBJECT(entry
)) {
14748 vm_object_t object
;
14749 vm_object_t base_object
;
14750 vm_object_t last_object
;
14751 vm_object_offset_t offset
;
14752 vm_object_offset_t base_offset
;
14753 vm_map_size_t range
;
14755 offset
= (start
- entry
->vme_start
)
14756 + VME_OFFSET(entry
);
14757 base_offset
= offset
;
14758 object
= VME_OBJECT(entry
);
14759 base_object
= object
;
14760 last_object
= NULL
;
14762 vm_object_lock(object
);
14765 m
= vm_page_lookup(
14768 if (m
&& !m
->vmp_fictitious
) {
14770 pmap_attribute_cache_sync(
14771 VM_PAGE_GET_PHYS_PAGE(m
),
14774 } else if (object
->shadow
) {
14775 offset
= offset
+ object
->vo_shadow_offset
;
14776 last_object
= object
;
14777 object
= object
->shadow
;
14778 vm_object_lock(last_object
->shadow
);
14779 vm_object_unlock(last_object
);
14782 range
-= PAGE_SIZE
;
14784 if (base_object
!= object
) {
14785 vm_object_unlock(object
);
14786 vm_object_lock(base_object
);
14787 object
= base_object
;
14789 /* Bump to the next page */
14790 base_offset
+= PAGE_SIZE
;
14791 offset
= base_offset
;
14793 vm_object_unlock(object
);
14798 vm_map_unlock(map
);
14799 return KERN_FAILURE
;
14803 vm_map_unlock(map
);
14809 * vm_map_behavior_set:
14811 * Sets the paging reference behavior of the specified address
14812 * range in the target map. Paging reference behavior affects
14813 * how pagein operations resulting from faults on the map will be
14817 vm_map_behavior_set(
14819 vm_map_offset_t start
,
14820 vm_map_offset_t end
,
14821 vm_behavior_t new_behavior
)
14823 vm_map_entry_t entry
;
14824 vm_map_entry_t temp_entry
;
14827 start
< vm_map_min(map
) ||
14828 end
> vm_map_max(map
)) {
14829 return KERN_NO_SPACE
;
14832 switch (new_behavior
) {
14834 * This first block of behaviors all set a persistent state on the specified
14835 * memory range. All we have to do here is to record the desired behavior
14836 * in the vm_map_entry_t's.
14839 case VM_BEHAVIOR_DEFAULT
:
14840 case VM_BEHAVIOR_RANDOM
:
14841 case VM_BEHAVIOR_SEQUENTIAL
:
14842 case VM_BEHAVIOR_RSEQNTL
:
14843 case VM_BEHAVIOR_ZERO_WIRED_PAGES
:
14847 * The entire address range must be valid for the map.
14848 * Note that vm_map_range_check() does a
14849 * vm_map_lookup_entry() internally and returns the
14850 * entry containing the start of the address range if
14851 * the entire range is valid.
14853 if (vm_map_range_check(map
, start
, end
, &temp_entry
)) {
14854 entry
= temp_entry
;
14855 vm_map_clip_start(map
, entry
, start
);
14857 vm_map_unlock(map
);
14858 return KERN_INVALID_ADDRESS
;
14861 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
14862 vm_map_clip_end(map
, entry
, end
);
14863 if (entry
->is_sub_map
) {
14864 assert(!entry
->use_pmap
);
14867 if (new_behavior
== VM_BEHAVIOR_ZERO_WIRED_PAGES
) {
14868 entry
->zero_wired_pages
= TRUE
;
14870 entry
->behavior
= new_behavior
;
14872 entry
= entry
->vme_next
;
14875 vm_map_unlock(map
);
14879 * The rest of these are different from the above in that they cause
14880 * an immediate action to take place as opposed to setting a behavior that
14881 * affects future actions.
14884 case VM_BEHAVIOR_WILLNEED
:
14885 return vm_map_willneed(map
, start
, end
);
14887 case VM_BEHAVIOR_DONTNEED
:
14888 return vm_map_msync(map
, start
, end
- start
, VM_SYNC_DEACTIVATE
| VM_SYNC_CONTIGUOUS
);
14890 case VM_BEHAVIOR_FREE
:
14891 return vm_map_msync(map
, start
, end
- start
, VM_SYNC_KILLPAGES
| VM_SYNC_CONTIGUOUS
);
14893 case VM_BEHAVIOR_REUSABLE
:
14894 return vm_map_reusable_pages(map
, start
, end
);
14896 case VM_BEHAVIOR_REUSE
:
14897 return vm_map_reuse_pages(map
, start
, end
);
14899 case VM_BEHAVIOR_CAN_REUSE
:
14900 return vm_map_can_reuse(map
, start
, end
);
14903 case VM_BEHAVIOR_PAGEOUT
:
14904 return vm_map_pageout(map
, start
, end
);
14905 #endif /* MACH_ASSERT */
14908 return KERN_INVALID_ARGUMENT
;
14911 return KERN_SUCCESS
;
14916 * Internals for madvise(MADV_WILLNEED) system call.
14918 * The implementation is to do:-
14919 * a) read-ahead if the mapping corresponds to a mapped regular file
14920 * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
14924 static kern_return_t
14927 vm_map_offset_t start
,
14928 vm_map_offset_t end
14931 vm_map_entry_t entry
;
14932 vm_object_t object
;
14933 memory_object_t pager
;
14934 struct vm_object_fault_info fault_info
= {};
14936 vm_object_size_t len
;
14937 vm_object_offset_t offset
;
14939 fault_info
.interruptible
= THREAD_UNINT
; /* ignored value */
14940 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
14941 fault_info
.stealth
= TRUE
;
14944 * The MADV_WILLNEED operation doesn't require any changes to the
14945 * vm_map_entry_t's, so the read lock is sufficient.
14948 vm_map_lock_read(map
);
14951 * The madvise semantics require that the address range be fully
14952 * allocated with no holes. Otherwise, we're required to return
14956 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
14957 vm_map_unlock_read(map
);
14958 return KERN_INVALID_ADDRESS
;
14962 * Examine each vm_map_entry_t in the range.
14964 for (; entry
!= vm_map_to_entry(map
) && start
< end
;) {
14966 * The first time through, the start address could be anywhere
14967 * within the vm_map_entry we found. So adjust the offset to
14968 * correspond. After that, the offset will always be zero to
14969 * correspond to the beginning of the current vm_map_entry.
14971 offset
= (start
- entry
->vme_start
) + VME_OFFSET(entry
);
14974 * Set the length so we don't go beyond the end of the
14975 * map_entry or beyond the end of the range we were given.
14976 * This range could span also multiple map entries all of which
14977 * map different files, so make sure we only do the right amount
14978 * of I/O for each object. Note that it's possible for there
14979 * to be multiple map entries all referring to the same object
14980 * but with different page permissions, but it's not worth
14981 * trying to optimize that case.
14983 len
= MIN(entry
->vme_end
- start
, end
- start
);
14985 if ((vm_size_t
) len
!= len
) {
14986 /* 32-bit overflow */
14987 len
= (vm_size_t
) (0 - PAGE_SIZE
);
14989 fault_info
.cluster_size
= (vm_size_t
) len
;
14990 fault_info
.lo_offset
= offset
;
14991 fault_info
.hi_offset
= offset
+ len
;
14992 fault_info
.user_tag
= VME_ALIAS(entry
);
14993 fault_info
.pmap_options
= 0;
14994 if (entry
->iokit_acct
||
14995 (!entry
->is_sub_map
&& !entry
->use_pmap
)) {
14996 fault_info
.pmap_options
|= PMAP_OPTIONS_ALT_ACCT
;
15000 * If the entry is a submap OR there's no read permission
15001 * to this mapping, then just skip it.
15003 if ((entry
->is_sub_map
) || (entry
->protection
& VM_PROT_READ
) == 0) {
15004 entry
= entry
->vme_next
;
15005 start
= entry
->vme_start
;
15009 object
= VME_OBJECT(entry
);
15011 if (object
== NULL
||
15012 (object
&& object
->internal
)) {
15014 * Memory range backed by anonymous memory.
15016 vm_size_t region_size
= 0, effective_page_size
= 0;
15017 vm_map_offset_t addr
= 0, effective_page_mask
= 0;
15022 effective_page_mask
= MAX(vm_map_page_mask(current_map()), PAGE_MASK
);
15023 effective_page_size
= effective_page_mask
+ 1;
15025 vm_map_unlock_read(map
);
15027 while (region_size
) {
15029 vm_map_trunc_page(addr
, effective_page_mask
),
15030 VM_PROT_READ
| VM_PROT_WRITE
);
15032 region_size
-= effective_page_size
;
15033 addr
+= effective_page_size
;
15037 * Find the file object backing this map entry. If there is
15038 * none, then we simply ignore the "will need" advice for this
15039 * entry and go on to the next one.
15041 if ((object
= find_vnode_object(entry
)) == VM_OBJECT_NULL
) {
15042 entry
= entry
->vme_next
;
15043 start
= entry
->vme_start
;
15047 vm_object_paging_begin(object
);
15048 pager
= object
->pager
;
15049 vm_object_unlock(object
);
15052 * The data_request() could take a long time, so let's
15053 * release the map lock to avoid blocking other threads.
15055 vm_map_unlock_read(map
);
15058 * Get the data from the object asynchronously.
15060 * Note that memory_object_data_request() places limits on the
15061 * amount of I/O it will do. Regardless of the len we
15062 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15063 * silently truncates the len to that size. This isn't
15064 * necessarily bad since madvise shouldn't really be used to
15065 * page in unlimited amounts of data. Other Unix variants
15066 * limit the willneed case as well. If this turns out to be an
15067 * issue for developers, then we can always adjust the policy
15068 * here and still be backwards compatible since this is all
15071 kr
= memory_object_data_request(
15073 offset
+ object
->paging_offset
,
15076 (memory_object_fault_info_t
)&fault_info
);
15078 vm_object_lock(object
);
15079 vm_object_paging_end(object
);
15080 vm_object_unlock(object
);
15083 * If we couldn't do the I/O for some reason, just give up on
15084 * the madvise. We still return success to the user since
15085 * madvise isn't supposed to fail when the advice can't be
15089 if (kr
!= KERN_SUCCESS
) {
15090 return KERN_SUCCESS
;
15095 if (start
>= end
) {
15097 return KERN_SUCCESS
;
15100 /* look up next entry */
15101 vm_map_lock_read(map
);
15102 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
15104 * There's a new hole in the address range.
15106 vm_map_unlock_read(map
);
15107 return KERN_INVALID_ADDRESS
;
15111 vm_map_unlock_read(map
);
15112 return KERN_SUCCESS
;
15116 vm_map_entry_is_reusable(
15117 vm_map_entry_t entry
)
15119 /* Only user map entries */
15121 vm_object_t object
;
15123 if (entry
->is_sub_map
) {
15127 switch (VME_ALIAS(entry
)) {
15128 case VM_MEMORY_MALLOC
:
15129 case VM_MEMORY_MALLOC_SMALL
:
15130 case VM_MEMORY_MALLOC_LARGE
:
15131 case VM_MEMORY_REALLOC
:
15132 case VM_MEMORY_MALLOC_TINY
:
15133 case VM_MEMORY_MALLOC_LARGE_REUSABLE
:
15134 case VM_MEMORY_MALLOC_LARGE_REUSED
:
15136 * This is a malloc() memory region: check if it's still
15137 * in its original state and can be re-used for more
15138 * malloc() allocations.
15143 * Not a malloc() memory region: let the caller decide if
15149 if (/*entry->is_shared ||*/
15150 entry
->is_sub_map
||
15151 entry
->in_transition
||
15152 entry
->protection
!= VM_PROT_DEFAULT
||
15153 entry
->max_protection
!= VM_PROT_ALL
||
15154 entry
->inheritance
!= VM_INHERIT_DEFAULT
||
15156 entry
->permanent
||
15157 entry
->superpage_size
!= FALSE
||
15158 entry
->zero_wired_pages
||
15159 entry
->wired_count
!= 0 ||
15160 entry
->user_wired_count
!= 0) {
15164 object
= VME_OBJECT(entry
);
15165 if (object
== VM_OBJECT_NULL
) {
15171 * Let's proceed even if the VM object is potentially
15173 * We check for this later when processing the actual
15174 * VM pages, so the contents will be safe if shared.
15176 * But we can still mark this memory region as "reusable" to
15177 * acknowledge that the caller did let us know that the memory
15178 * could be re-used and should not be penalized for holding
15179 * on to it. This allows its "resident size" to not include
15180 * the reusable range.
15182 object
->ref_count
== 1 &&
15184 object
->wired_page_count
== 0 &&
15185 object
->copy
== VM_OBJECT_NULL
&&
15186 object
->shadow
== VM_OBJECT_NULL
&&
15187 object
->internal
&&
15188 object
->purgable
== VM_PURGABLE_DENY
&&
15189 object
->copy_strategy
!= MEMORY_OBJECT_COPY_DELAY
&&
15190 !object
->true_share
&&
15191 object
->wimg_bits
== VM_WIMG_USE_DEFAULT
&&
15192 !object
->code_signed
) {
15198 static kern_return_t
15199 vm_map_reuse_pages(
15201 vm_map_offset_t start
,
15202 vm_map_offset_t end
)
15204 vm_map_entry_t entry
;
15205 vm_object_t object
;
15206 vm_object_offset_t start_offset
, end_offset
;
15209 * The MADV_REUSE operation doesn't require any changes to the
15210 * vm_map_entry_t's, so the read lock is sufficient.
15213 vm_map_lock_read(map
);
15214 assert(map
->pmap
!= kernel_pmap
); /* protect alias access */
15217 * The madvise semantics require that the address range be fully
15218 * allocated with no holes. Otherwise, we're required to return
15222 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15223 vm_map_unlock_read(map
);
15224 vm_page_stats_reusable
.reuse_pages_failure
++;
15225 return KERN_INVALID_ADDRESS
;
15229 * Examine each vm_map_entry_t in the range.
15231 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
15232 entry
= entry
->vme_next
) {
15234 * Sanity check on the VM map entry.
15236 if (!vm_map_entry_is_reusable(entry
)) {
15237 vm_map_unlock_read(map
);
15238 vm_page_stats_reusable
.reuse_pages_failure
++;
15239 return KERN_INVALID_ADDRESS
;
15243 * The first time through, the start address could be anywhere
15244 * within the vm_map_entry we found. So adjust the offset to
15247 if (entry
->vme_start
< start
) {
15248 start_offset
= start
- entry
->vme_start
;
15252 end_offset
= MIN(end
, entry
->vme_end
) - entry
->vme_start
;
15253 start_offset
+= VME_OFFSET(entry
);
15254 end_offset
+= VME_OFFSET(entry
);
15256 assert(!entry
->is_sub_map
);
15257 object
= VME_OBJECT(entry
);
15258 if (object
!= VM_OBJECT_NULL
) {
15259 vm_object_lock(object
);
15260 vm_object_reuse_pages(object
, start_offset
, end_offset
,
15262 vm_object_unlock(object
);
15265 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC_LARGE_REUSABLE
) {
15268 * We do not hold the VM map exclusively here.
15269 * The "alias" field is not that critical, so it's
15270 * safe to update it here, as long as it is the only
15271 * one that can be modified while holding the VM map
15274 VME_ALIAS_SET(entry
, VM_MEMORY_MALLOC_LARGE_REUSED
);
15278 vm_map_unlock_read(map
);
15279 vm_page_stats_reusable
.reuse_pages_success
++;
15280 return KERN_SUCCESS
;
15284 static kern_return_t
15285 vm_map_reusable_pages(
15287 vm_map_offset_t start
,
15288 vm_map_offset_t end
)
15290 vm_map_entry_t entry
;
15291 vm_object_t object
;
15292 vm_object_offset_t start_offset
, end_offset
;
15293 vm_map_offset_t pmap_offset
;
15296 * The MADV_REUSABLE operation doesn't require any changes to the
15297 * vm_map_entry_t's, so the read lock is sufficient.
15300 vm_map_lock_read(map
);
15301 assert(map
->pmap
!= kernel_pmap
); /* protect alias access */
15304 * The madvise semantics require that the address range be fully
15305 * allocated with no holes. Otherwise, we're required to return
15309 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15310 vm_map_unlock_read(map
);
15311 vm_page_stats_reusable
.reusable_pages_failure
++;
15312 return KERN_INVALID_ADDRESS
;
15316 * Examine each vm_map_entry_t in the range.
15318 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
15319 entry
= entry
->vme_next
) {
15320 int kill_pages
= 0;
15323 * Sanity check on the VM map entry.
15325 if (!vm_map_entry_is_reusable(entry
)) {
15326 vm_map_unlock_read(map
);
15327 vm_page_stats_reusable
.reusable_pages_failure
++;
15328 return KERN_INVALID_ADDRESS
;
15331 if (!(entry
->protection
& VM_PROT_WRITE
) && !entry
->used_for_jit
) {
15332 /* not writable: can't discard contents */
15333 vm_map_unlock_read(map
);
15334 vm_page_stats_reusable
.reusable_nonwritable
++;
15335 vm_page_stats_reusable
.reusable_pages_failure
++;
15336 return KERN_PROTECTION_FAILURE
;
15340 * The first time through, the start address could be anywhere
15341 * within the vm_map_entry we found. So adjust the offset to
15344 if (entry
->vme_start
< start
) {
15345 start_offset
= start
- entry
->vme_start
;
15346 pmap_offset
= start
;
15349 pmap_offset
= entry
->vme_start
;
15351 end_offset
= MIN(end
, entry
->vme_end
) - entry
->vme_start
;
15352 start_offset
+= VME_OFFSET(entry
);
15353 end_offset
+= VME_OFFSET(entry
);
15355 assert(!entry
->is_sub_map
);
15356 object
= VME_OBJECT(entry
);
15357 if (object
== VM_OBJECT_NULL
) {
15362 vm_object_lock(object
);
15363 if (((object
->ref_count
== 1) ||
15364 (object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
&&
15365 object
->copy
== VM_OBJECT_NULL
)) &&
15366 object
->shadow
== VM_OBJECT_NULL
&&
15368 * "iokit_acct" entries are billed for their virtual size
15369 * (rather than for their resident pages only), so they
15370 * wouldn't benefit from making pages reusable, and it
15371 * would be hard to keep track of pages that are both
15372 * "iokit_acct" and "reusable" in the pmap stats and
15375 !(entry
->iokit_acct
||
15376 (!entry
->is_sub_map
&& !entry
->use_pmap
))) {
15377 if (object
->ref_count
!= 1) {
15378 vm_page_stats_reusable
.reusable_shared
++;
15384 if (kill_pages
!= -1) {
15385 vm_object_deactivate_pages(object
,
15387 end_offset
- start_offset
,
15389 TRUE
/*reusable_pages*/,
15393 vm_page_stats_reusable
.reusable_pages_shared
++;
15395 vm_object_unlock(object
);
15397 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC_LARGE
||
15398 VME_ALIAS(entry
) == VM_MEMORY_MALLOC_LARGE_REUSED
) {
15401 * We do not hold the VM map exclusively here.
15402 * The "alias" field is not that critical, so it's
15403 * safe to update it here, as long as it is the only
15404 * one that can be modified while holding the VM map
15407 VME_ALIAS_SET(entry
, VM_MEMORY_MALLOC_LARGE_REUSABLE
);
15411 vm_map_unlock_read(map
);
15412 vm_page_stats_reusable
.reusable_pages_success
++;
15413 return KERN_SUCCESS
;
15417 static kern_return_t
15420 vm_map_offset_t start
,
15421 vm_map_offset_t end
)
15423 vm_map_entry_t entry
;
15426 * The MADV_REUSABLE operation doesn't require any changes to the
15427 * vm_map_entry_t's, so the read lock is sufficient.
15430 vm_map_lock_read(map
);
15431 assert(map
->pmap
!= kernel_pmap
); /* protect alias access */
15434 * The madvise semantics require that the address range be fully
15435 * allocated with no holes. Otherwise, we're required to return
15439 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15440 vm_map_unlock_read(map
);
15441 vm_page_stats_reusable
.can_reuse_failure
++;
15442 return KERN_INVALID_ADDRESS
;
15446 * Examine each vm_map_entry_t in the range.
15448 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
15449 entry
= entry
->vme_next
) {
15451 * Sanity check on the VM map entry.
15453 if (!vm_map_entry_is_reusable(entry
)) {
15454 vm_map_unlock_read(map
);
15455 vm_page_stats_reusable
.can_reuse_failure
++;
15456 return KERN_INVALID_ADDRESS
;
15460 vm_map_unlock_read(map
);
15461 vm_page_stats_reusable
.can_reuse_success
++;
15462 return KERN_SUCCESS
;
15467 static kern_return_t
15470 vm_map_offset_t start
,
15471 vm_map_offset_t end
)
15473 vm_map_entry_t entry
;
15476 * The MADV_PAGEOUT operation doesn't require any changes to the
15477 * vm_map_entry_t's, so the read lock is sufficient.
15480 vm_map_lock_read(map
);
15483 * The madvise semantics require that the address range be fully
15484 * allocated with no holes. Otherwise, we're required to return
15488 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15489 vm_map_unlock_read(map
);
15490 return KERN_INVALID_ADDRESS
;
15494 * Examine each vm_map_entry_t in the range.
15496 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
15497 entry
= entry
->vme_next
) {
15498 vm_object_t object
;
15501 * Sanity check on the VM map entry.
15503 if (entry
->is_sub_map
) {
15505 vm_map_offset_t submap_start
;
15506 vm_map_offset_t submap_end
;
15507 vm_map_entry_t submap_entry
;
15509 submap
= VME_SUBMAP(entry
);
15510 submap_start
= VME_OFFSET(entry
);
15511 submap_end
= submap_start
+ (entry
->vme_end
-
15514 vm_map_lock_read(submap
);
15516 if (!vm_map_range_check(submap
,
15520 vm_map_unlock_read(submap
);
15521 vm_map_unlock_read(map
);
15522 return KERN_INVALID_ADDRESS
;
15525 object
= VME_OBJECT(submap_entry
);
15526 if (submap_entry
->is_sub_map
||
15527 object
== VM_OBJECT_NULL
||
15528 !object
->internal
) {
15529 vm_map_unlock_read(submap
);
15533 vm_object_pageout(object
);
15535 vm_map_unlock_read(submap
);
15536 submap
= VM_MAP_NULL
;
15537 submap_entry
= VM_MAP_ENTRY_NULL
;
15541 object
= VME_OBJECT(entry
);
15542 if (entry
->is_sub_map
||
15543 object
== VM_OBJECT_NULL
||
15544 !object
->internal
) {
15548 vm_object_pageout(object
);
15551 vm_map_unlock_read(map
);
15552 return KERN_SUCCESS
;
15554 #endif /* MACH_ASSERT */
15558 * Routine: vm_map_entry_insert
15560 * Description: This routine inserts a new vm_entry in a locked map.
15563 vm_map_entry_insert(
15565 vm_map_entry_t insp_entry
,
15566 vm_map_offset_t start
,
15567 vm_map_offset_t end
,
15568 vm_object_t object
,
15569 vm_object_offset_t offset
,
15570 boolean_t needs_copy
,
15571 boolean_t is_shared
,
15572 boolean_t in_transition
,
15573 vm_prot_t cur_protection
,
15574 vm_prot_t max_protection
,
15575 vm_behavior_t behavior
,
15576 vm_inherit_t inheritance
,
15577 unsigned wired_count
,
15578 boolean_t no_cache
,
15579 boolean_t permanent
,
15580 boolean_t no_copy_on_read
,
15581 unsigned int superpage_size
,
15582 boolean_t clear_map_aligned
,
15583 boolean_t is_submap
,
15584 boolean_t used_for_jit
,
15587 vm_map_entry_t new_entry
;
15589 assert(insp_entry
!= (vm_map_entry_t
)0);
15590 vm_map_lock_assert_exclusive(map
);
15592 #if DEVELOPMENT || DEBUG
15593 vm_object_offset_t end_offset
= 0;
15594 assertf(!os_add_overflow(end
- start
, offset
, &end_offset
), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end
- start
), offset
);
15595 #endif /* DEVELOPMENT || DEBUG */
15597 new_entry
= vm_map_entry_create(map
, !map
->hdr
.entries_pageable
);
15599 if (VM_MAP_PAGE_SHIFT(map
) != PAGE_SHIFT
) {
15600 new_entry
->map_aligned
= TRUE
;
15602 new_entry
->map_aligned
= FALSE
;
15604 if (clear_map_aligned
&&
15605 (!VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)) ||
15606 !VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)))) {
15607 new_entry
->map_aligned
= FALSE
;
15610 new_entry
->vme_start
= start
;
15611 new_entry
->vme_end
= end
;
15612 assert(page_aligned(new_entry
->vme_start
));
15613 assert(page_aligned(new_entry
->vme_end
));
15614 if (new_entry
->map_aligned
) {
15615 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_start
,
15616 VM_MAP_PAGE_MASK(map
)));
15617 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_end
,
15618 VM_MAP_PAGE_MASK(map
)));
15620 assert(new_entry
->vme_start
< new_entry
->vme_end
);
15622 VME_OBJECT_SET(new_entry
, object
);
15623 VME_OFFSET_SET(new_entry
, offset
);
15624 new_entry
->is_shared
= is_shared
;
15625 new_entry
->is_sub_map
= is_submap
;
15626 new_entry
->needs_copy
= needs_copy
;
15627 new_entry
->in_transition
= in_transition
;
15628 new_entry
->needs_wakeup
= FALSE
;
15629 new_entry
->inheritance
= inheritance
;
15630 new_entry
->protection
= cur_protection
;
15631 new_entry
->max_protection
= max_protection
;
15632 new_entry
->behavior
= behavior
;
15633 new_entry
->wired_count
= wired_count
;
15634 new_entry
->user_wired_count
= 0;
15637 * submap: "use_pmap" means "nested".
15640 new_entry
->use_pmap
= FALSE
;
15643 * object: "use_pmap" means "use pmap accounting" for footprint.
15646 new_entry
->use_pmap
= TRUE
;
15648 VME_ALIAS_SET(new_entry
, alias
);
15649 new_entry
->zero_wired_pages
= FALSE
;
15650 new_entry
->no_cache
= no_cache
;
15651 new_entry
->permanent
= permanent
;
15652 if (superpage_size
) {
15653 new_entry
->superpage_size
= TRUE
;
15655 new_entry
->superpage_size
= FALSE
;
15657 if (used_for_jit
) {
15658 #if CONFIG_EMBEDDED
15659 if (!(map
->jit_entry_exists
))
15660 #endif /* CONFIG_EMBEDDED */
15662 new_entry
->used_for_jit
= TRUE
;
15663 map
->jit_entry_exists
= TRUE
;
15666 new_entry
->used_for_jit
= FALSE
;
15668 new_entry
->pmap_cs_associated
= FALSE
;
15669 new_entry
->iokit_acct
= FALSE
;
15670 new_entry
->vme_resilient_codesign
= FALSE
;
15671 new_entry
->vme_resilient_media
= FALSE
;
15672 new_entry
->vme_atomic
= FALSE
;
15673 new_entry
->vme_no_copy_on_read
= no_copy_on_read
;
15676 * Insert the new entry into the list.
15679 vm_map_store_entry_link(map
, insp_entry
, new_entry
,
15680 VM_MAP_KERNEL_FLAGS_NONE
);
15681 map
->size
+= end
- start
;
15684 * Update the free space hint and the lookup hint.
15687 SAVE_HINT_MAP_WRITE(map
, new_entry
);
15692 * Routine: vm_map_remap_extract
15694 * Descritpion: This routine returns a vm_entry list from a map.
15696 static kern_return_t
15697 vm_map_remap_extract(
15699 vm_map_offset_t addr
,
15700 vm_map_size_t size
,
15702 struct vm_map_header
*map_header
,
15703 vm_prot_t
*cur_protection
,
15704 vm_prot_t
*max_protection
,
15705 /* What, no behavior? */
15706 vm_inherit_t inheritance
,
15707 boolean_t pageable
,
15708 boolean_t same_map
,
15709 vm_map_kernel_flags_t vmk_flags
)
15711 kern_return_t result
;
15712 vm_map_size_t mapped_size
;
15713 vm_map_size_t tmp_size
;
15714 vm_map_entry_t src_entry
; /* result of last map lookup */
15715 vm_map_entry_t new_entry
;
15716 vm_object_offset_t offset
;
15717 vm_map_offset_t map_address
;
15718 vm_map_offset_t src_start
; /* start of entry to map */
15719 vm_map_offset_t src_end
; /* end of region to be mapped */
15720 vm_object_t object
;
15721 vm_map_version_t version
;
15722 boolean_t src_needs_copy
;
15723 boolean_t new_entry_needs_copy
;
15724 vm_map_entry_t saved_src_entry
;
15725 boolean_t src_entry_was_wired
;
15726 vm_prot_t max_prot_for_prot_copy
;
15728 assert(map
!= VM_MAP_NULL
);
15730 assert(size
== vm_map_round_page(size
, PAGE_MASK
));
15731 assert(inheritance
== VM_INHERIT_NONE
||
15732 inheritance
== VM_INHERIT_COPY
||
15733 inheritance
== VM_INHERIT_SHARE
);
15736 * Compute start and end of region.
15738 src_start
= vm_map_trunc_page(addr
, PAGE_MASK
);
15739 src_end
= vm_map_round_page(src_start
+ size
, PAGE_MASK
);
15743 * Initialize map_header.
15745 map_header
->links
.next
= CAST_TO_VM_MAP_ENTRY(&map_header
->links
);
15746 map_header
->links
.prev
= CAST_TO_VM_MAP_ENTRY(&map_header
->links
);
15747 map_header
->nentries
= 0;
15748 map_header
->entries_pageable
= pageable
;
15749 map_header
->page_shift
= PAGE_SHIFT
;
15751 vm_map_store_init( map_header
);
15753 if (copy
&& vmk_flags
.vmkf_remap_prot_copy
) {
15754 max_prot_for_prot_copy
= *max_protection
& VM_PROT_ALL
;
15756 max_prot_for_prot_copy
= VM_PROT_NONE
;
15758 *cur_protection
= VM_PROT_ALL
;
15759 *max_protection
= VM_PROT_ALL
;
15763 result
= KERN_SUCCESS
;
15766 * The specified source virtual space might correspond to
15767 * multiple map entries, need to loop on them.
15770 while (mapped_size
!= size
) {
15771 vm_map_size_t entry_size
;
15774 * Find the beginning of the region.
15776 if (!vm_map_lookup_entry(map
, src_start
, &src_entry
)) {
15777 result
= KERN_INVALID_ADDRESS
;
15781 if (src_start
< src_entry
->vme_start
||
15782 (mapped_size
&& src_start
!= src_entry
->vme_start
)) {
15783 result
= KERN_INVALID_ADDRESS
;
15787 tmp_size
= size
- mapped_size
;
15788 if (src_end
> src_entry
->vme_end
) {
15789 tmp_size
-= (src_end
- src_entry
->vme_end
);
15792 entry_size
= (vm_map_size_t
)(src_entry
->vme_end
-
15793 src_entry
->vme_start
);
15795 if (src_entry
->is_sub_map
) {
15796 vm_map_reference(VME_SUBMAP(src_entry
));
15797 object
= VM_OBJECT_NULL
;
15799 object
= VME_OBJECT(src_entry
);
15800 if (src_entry
->iokit_acct
) {
15802 * This entry uses "IOKit accounting".
15804 } else if (object
!= VM_OBJECT_NULL
&&
15805 (object
->purgable
!= VM_PURGABLE_DENY
||
15806 object
->vo_ledger_tag
!= VM_LEDGER_TAG_NONE
)) {
15808 * Purgeable objects have their own accounting:
15809 * no pmap accounting for them.
15811 assertf(!src_entry
->use_pmap
,
15812 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15815 (uint64_t)src_entry
->vme_start
,
15816 (uint64_t)src_entry
->vme_end
,
15817 src_entry
->protection
,
15818 src_entry
->max_protection
,
15819 VME_ALIAS(src_entry
));
15822 * Not IOKit or purgeable:
15823 * must be accounted by pmap stats.
15825 assertf(src_entry
->use_pmap
,
15826 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15829 (uint64_t)src_entry
->vme_start
,
15830 (uint64_t)src_entry
->vme_end
,
15831 src_entry
->protection
,
15832 src_entry
->max_protection
,
15833 VME_ALIAS(src_entry
));
15836 if (object
== VM_OBJECT_NULL
) {
15837 object
= vm_object_allocate(entry_size
);
15838 VME_OFFSET_SET(src_entry
, 0);
15839 VME_OBJECT_SET(src_entry
, object
);
15840 assert(src_entry
->use_pmap
);
15841 } else if (object
->copy_strategy
!=
15842 MEMORY_OBJECT_COPY_SYMMETRIC
) {
15844 * We are already using an asymmetric
15845 * copy, and therefore we already have
15846 * the right object.
15848 assert(!src_entry
->needs_copy
);
15849 } else if (src_entry
->needs_copy
|| object
->shadowed
||
15850 (object
->internal
&& !object
->true_share
&&
15851 !src_entry
->is_shared
&&
15852 object
->vo_size
> entry_size
)) {
15853 VME_OBJECT_SHADOW(src_entry
, entry_size
);
15854 assert(src_entry
->use_pmap
);
15856 if (!src_entry
->needs_copy
&&
15857 (src_entry
->protection
& VM_PROT_WRITE
)) {
15860 assert(!pmap_has_prot_policy(src_entry
->protection
));
15862 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
15864 if (override_nx(map
,
15865 VME_ALIAS(src_entry
))
15867 prot
|= VM_PROT_EXECUTE
;
15870 assert(!pmap_has_prot_policy(prot
));
15872 if (map
->mapped_in_other_pmaps
) {
15873 vm_object_pmap_protect(
15874 VME_OBJECT(src_entry
),
15875 VME_OFFSET(src_entry
),
15878 src_entry
->vme_start
,
15881 pmap_protect(vm_map_pmap(map
),
15882 src_entry
->vme_start
,
15883 src_entry
->vme_end
,
15888 object
= VME_OBJECT(src_entry
);
15889 src_entry
->needs_copy
= FALSE
;
15893 vm_object_lock(object
);
15894 vm_object_reference_locked(object
); /* object ref. for new entry */
15895 if (object
->copy_strategy
==
15896 MEMORY_OBJECT_COPY_SYMMETRIC
) {
15897 object
->copy_strategy
=
15898 MEMORY_OBJECT_COPY_DELAY
;
15900 vm_object_unlock(object
);
15903 offset
= (VME_OFFSET(src_entry
) +
15904 (src_start
- src_entry
->vme_start
));
15906 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
15907 vm_map_entry_copy(new_entry
, src_entry
);
15908 if (new_entry
->is_sub_map
) {
15909 /* clr address space specifics */
15910 new_entry
->use_pmap
= FALSE
;
15913 * We're dealing with a copy-on-write operation,
15914 * so the resulting mapping should not inherit the
15915 * original mapping's accounting settings.
15916 * "use_pmap" should be reset to its default (TRUE)
15917 * so that the new mapping gets accounted for in
15918 * the task's memory footprint.
15920 new_entry
->use_pmap
= TRUE
;
15922 /* "iokit_acct" was cleared in vm_map_entry_copy() */
15923 assert(!new_entry
->iokit_acct
);
15925 new_entry
->map_aligned
= FALSE
;
15927 new_entry
->vme_start
= map_address
;
15928 new_entry
->vme_end
= map_address
+ tmp_size
;
15929 assert(new_entry
->vme_start
< new_entry
->vme_end
);
15930 if (copy
&& vmk_flags
.vmkf_remap_prot_copy
) {
15932 * Remapping for vm_map_protect(VM_PROT_COPY)
15933 * to convert a read-only mapping into a
15934 * copy-on-write version of itself but
15935 * with write access:
15936 * keep the original inheritance and add
15937 * VM_PROT_WRITE to the max protection.
15939 new_entry
->inheritance
= src_entry
->inheritance
;
15940 new_entry
->protection
&= max_prot_for_prot_copy
;
15941 new_entry
->max_protection
|= VM_PROT_WRITE
;
15943 new_entry
->inheritance
= inheritance
;
15945 VME_OFFSET_SET(new_entry
, offset
);
15948 * The new region has to be copied now if required.
15952 if (src_entry
->used_for_jit
== TRUE
) {
15955 #if CONFIG_EMBEDDED
15957 * Cannot allow an entry describing a JIT
15958 * region to be shared across address spaces.
15960 result
= KERN_INVALID_ARGUMENT
;
15962 #endif /* CONFIG_EMBEDDED */
15966 src_entry
->is_shared
= TRUE
;
15967 new_entry
->is_shared
= TRUE
;
15968 if (!(new_entry
->is_sub_map
)) {
15969 new_entry
->needs_copy
= FALSE
;
15971 } else if (src_entry
->is_sub_map
) {
15972 /* make this a COW sub_map if not already */
15973 assert(new_entry
->wired_count
== 0);
15974 new_entry
->needs_copy
= TRUE
;
15975 object
= VM_OBJECT_NULL
;
15976 } else if (src_entry
->wired_count
== 0 &&
15977 vm_object_copy_quickly(VME_OBJECT_PTR(new_entry
),
15978 VME_OFFSET(new_entry
),
15979 (new_entry
->vme_end
-
15980 new_entry
->vme_start
),
15982 &new_entry_needs_copy
)) {
15983 new_entry
->needs_copy
= new_entry_needs_copy
;
15984 new_entry
->is_shared
= FALSE
;
15985 assertf(new_entry
->use_pmap
, "map %p new_entry %p\n", map
, new_entry
);
15988 * Handle copy_on_write semantics.
15990 if (src_needs_copy
&& !src_entry
->needs_copy
) {
15993 assert(!pmap_has_prot_policy(src_entry
->protection
));
15995 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
15997 if (override_nx(map
,
15998 VME_ALIAS(src_entry
))
16000 prot
|= VM_PROT_EXECUTE
;
16003 assert(!pmap_has_prot_policy(prot
));
16005 vm_object_pmap_protect(object
,
16008 ((src_entry
->is_shared
16009 || map
->mapped_in_other_pmaps
) ?
16010 PMAP_NULL
: map
->pmap
),
16011 src_entry
->vme_start
,
16014 assert(src_entry
->wired_count
== 0);
16015 src_entry
->needs_copy
= TRUE
;
16018 * Throw away the old object reference of the new entry.
16020 vm_object_deallocate(object
);
16022 new_entry
->is_shared
= FALSE
;
16023 assertf(new_entry
->use_pmap
, "map %p new_entry %p\n", map
, new_entry
);
16025 src_entry_was_wired
= (src_entry
->wired_count
> 0);
16026 saved_src_entry
= src_entry
;
16027 src_entry
= VM_MAP_ENTRY_NULL
;
16030 * The map can be safely unlocked since we
16031 * already hold a reference on the object.
16033 * Record the timestamp of the map for later
16034 * verification, and unlock the map.
16036 version
.main_timestamp
= map
->timestamp
;
16037 vm_map_unlock(map
); /* Increments timestamp once! */
16040 * Perform the copy.
16042 if (src_entry_was_wired
> 0) {
16043 vm_object_lock(object
);
16044 result
= vm_object_copy_slowly(
16047 (new_entry
->vme_end
-
16048 new_entry
->vme_start
),
16050 VME_OBJECT_PTR(new_entry
));
16052 VME_OFFSET_SET(new_entry
, 0);
16053 new_entry
->needs_copy
= FALSE
;
16055 vm_object_offset_t new_offset
;
16057 new_offset
= VME_OFFSET(new_entry
);
16058 result
= vm_object_copy_strategically(
16061 (new_entry
->vme_end
-
16062 new_entry
->vme_start
),
16063 VME_OBJECT_PTR(new_entry
),
16065 &new_entry_needs_copy
);
16066 if (new_offset
!= VME_OFFSET(new_entry
)) {
16067 VME_OFFSET_SET(new_entry
, new_offset
);
16070 new_entry
->needs_copy
= new_entry_needs_copy
;
16074 * Throw away the old object reference of the new entry.
16076 vm_object_deallocate(object
);
16078 if (result
!= KERN_SUCCESS
&&
16079 result
!= KERN_MEMORY_RESTART_COPY
) {
16080 _vm_map_entry_dispose(map_header
, new_entry
);
16086 * Verify that the map has not substantially
16087 * changed while the copy was being made.
16091 if (version
.main_timestamp
+ 1 != map
->timestamp
) {
16093 * Simple version comparison failed.
16095 * Retry the lookup and verify that the
16096 * same object/offset are still present.
16098 saved_src_entry
= VM_MAP_ENTRY_NULL
;
16099 vm_object_deallocate(VME_OBJECT(new_entry
));
16100 _vm_map_entry_dispose(map_header
, new_entry
);
16101 if (result
== KERN_MEMORY_RESTART_COPY
) {
16102 result
= KERN_SUCCESS
;
16106 /* map hasn't changed: src_entry is still valid */
16107 src_entry
= saved_src_entry
;
16108 saved_src_entry
= VM_MAP_ENTRY_NULL
;
16110 if (result
== KERN_MEMORY_RESTART_COPY
) {
16111 vm_object_reference(object
);
16116 _vm_map_store_entry_link(map_header
,
16117 map_header
->links
.prev
, new_entry
);
16119 /*Protections for submap mapping are irrelevant here*/
16120 if (!src_entry
->is_sub_map
) {
16121 *cur_protection
&= src_entry
->protection
;
16122 *max_protection
&= src_entry
->max_protection
;
16124 map_address
+= tmp_size
;
16125 mapped_size
+= tmp_size
;
16126 src_start
+= tmp_size
;
16129 vm_map_unlock(map
);
16130 if (result
!= KERN_SUCCESS
) {
16132 * Free all allocated elements.
16134 for (src_entry
= map_header
->links
.next
;
16135 src_entry
!= CAST_TO_VM_MAP_ENTRY(&map_header
->links
);
16136 src_entry
= new_entry
) {
16137 new_entry
= src_entry
->vme_next
;
16138 _vm_map_store_entry_unlink(map_header
, src_entry
);
16139 if (src_entry
->is_sub_map
) {
16140 vm_map_deallocate(VME_SUBMAP(src_entry
));
16142 vm_object_deallocate(VME_OBJECT(src_entry
));
16144 _vm_map_entry_dispose(map_header
, src_entry
);
16151 * Routine: vm_remap
16153 * Map portion of a task's address space.
16154 * Mapped region must not overlap more than
16155 * one vm memory object. Protections and
16156 * inheritance attributes remain the same
16157 * as in the original task and are out parameters.
16158 * Source and Target task can be identical
16159 * Other attributes are identical as for vm_map()
16163 vm_map_t target_map
,
16164 vm_map_address_t
*address
,
16165 vm_map_size_t size
,
16166 vm_map_offset_t mask
,
16168 vm_map_kernel_flags_t vmk_flags
,
16171 vm_map_offset_t memory_address
,
16173 vm_prot_t
*cur_protection
,
16174 vm_prot_t
*max_protection
,
16175 vm_inherit_t inheritance
)
16177 kern_return_t result
;
16178 vm_map_entry_t entry
;
16179 vm_map_entry_t insp_entry
= VM_MAP_ENTRY_NULL
;
16180 vm_map_entry_t new_entry
;
16181 struct vm_map_header map_header
;
16182 vm_map_offset_t offset_in_mapping
;
16184 if (target_map
== VM_MAP_NULL
) {
16185 return KERN_INVALID_ARGUMENT
;
16188 switch (inheritance
) {
16189 case VM_INHERIT_NONE
:
16190 case VM_INHERIT_COPY
:
16191 case VM_INHERIT_SHARE
:
16192 if (size
!= 0 && src_map
!= VM_MAP_NULL
) {
16197 return KERN_INVALID_ARGUMENT
;
16201 * If the user is requesting that we return the address of the
16202 * first byte of the data (rather than the base of the page),
16203 * then we use different rounding semantics: specifically,
16204 * we assume that (memory_address, size) describes a region
16205 * all of whose pages we must cover, rather than a base to be truncated
16206 * down and a size to be added to that base. So we figure out
16207 * the highest page that the requested region includes and make
16208 * sure that the size will cover it.
16210 * The key example we're worried about it is of the form:
16212 * memory_address = 0x1ff0, size = 0x20
16214 * With the old semantics, we round down the memory_address to 0x1000
16215 * and round up the size to 0x1000, resulting in our covering *only*
16216 * page 0x1000. With the new semantics, we'd realize that the region covers
16217 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
16218 * 0x1000 and page 0x2000 in the region we remap.
16220 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
16221 offset_in_mapping
= memory_address
- vm_map_trunc_page(memory_address
, PAGE_MASK
);
16222 size
= vm_map_round_page(memory_address
+ size
- vm_map_trunc_page(memory_address
, PAGE_MASK
), PAGE_MASK
);
16224 size
= vm_map_round_page(size
, PAGE_MASK
);
16227 return KERN_INVALID_ARGUMENT
;
16230 if (flags
& VM_FLAGS_RESILIENT_MEDIA
) {
16231 /* must be copy-on-write to be "media resilient" */
16233 return KERN_INVALID_ARGUMENT
;
16237 result
= vm_map_remap_extract(src_map
, memory_address
,
16238 size
, copy
, &map_header
,
16242 target_map
->hdr
.entries_pageable
,
16243 src_map
== target_map
,
16246 if (result
!= KERN_SUCCESS
) {
16251 * Allocate/check a range of free virtual address
16252 * space for the target
16254 *address
= vm_map_trunc_page(*address
,
16255 VM_MAP_PAGE_MASK(target_map
));
16256 vm_map_lock(target_map
);
16257 result
= vm_map_remap_range_allocate(target_map
, address
, size
,
16258 mask
, flags
, vmk_flags
, tag
,
16261 for (entry
= map_header
.links
.next
;
16262 entry
!= CAST_TO_VM_MAP_ENTRY(&map_header
.links
);
16263 entry
= new_entry
) {
16264 new_entry
= entry
->vme_next
;
16265 _vm_map_store_entry_unlink(&map_header
, entry
);
16266 if (result
== KERN_SUCCESS
) {
16267 if (flags
& VM_FLAGS_RESILIENT_CODESIGN
) {
16268 /* no codesigning -> read-only access */
16269 entry
->max_protection
= VM_PROT_READ
;
16270 entry
->protection
= VM_PROT_READ
;
16271 entry
->vme_resilient_codesign
= TRUE
;
16273 entry
->vme_start
+= *address
;
16274 entry
->vme_end
+= *address
;
16275 assert(!entry
->map_aligned
);
16276 if ((flags
& VM_FLAGS_RESILIENT_MEDIA
) &&
16277 !entry
->is_sub_map
&&
16278 (VME_OBJECT(entry
) == VM_OBJECT_NULL
||
16279 VME_OBJECT(entry
)->internal
)) {
16280 entry
->vme_resilient_media
= TRUE
;
16282 vm_map_store_entry_link(target_map
, insp_entry
, entry
,
16284 insp_entry
= entry
;
16286 if (!entry
->is_sub_map
) {
16287 vm_object_deallocate(VME_OBJECT(entry
));
16289 vm_map_deallocate(VME_SUBMAP(entry
));
16291 _vm_map_entry_dispose(&map_header
, entry
);
16295 if (flags
& VM_FLAGS_RESILIENT_CODESIGN
) {
16296 *cur_protection
= VM_PROT_READ
;
16297 *max_protection
= VM_PROT_READ
;
16300 if (target_map
->disable_vmentry_reuse
== TRUE
) {
16301 assert(!target_map
->is_nested_map
);
16302 if (target_map
->highest_entry_end
< insp_entry
->vme_end
) {
16303 target_map
->highest_entry_end
= insp_entry
->vme_end
;
16307 if (result
== KERN_SUCCESS
) {
16308 target_map
->size
+= size
;
16309 SAVE_HINT_MAP_WRITE(target_map
, insp_entry
);
16312 if (*max_protection
& VM_PROT_EXECUTE
) {
16313 vm_map_address_t region_start
= 0, region_size
= 0;
16314 struct pmap_cs_code_directory
*region_cd
= NULL
;
16315 vm_map_address_t base
= 0;
16316 struct pmap_cs_lookup_results results
= {};
16317 vm_map_size_t page_addr
= vm_map_trunc_page(memory_address
, PAGE_MASK
);
16318 vm_map_size_t assoc_size
= vm_map_round_page(memory_address
+ size
- page_addr
, PAGE_MASK
);
16320 pmap_cs_lookup(src_map
->pmap
, memory_address
, &results
);
16321 region_size
= results
.region_size
;
16322 region_start
= results
.region_start
;
16323 region_cd
= results
.region_cd_entry
;
16324 base
= results
.base
;
16326 if (region_cd
!= NULL
&& (page_addr
!= region_start
|| assoc_size
!= region_size
)) {
16327 *cur_protection
= VM_PROT_READ
;
16328 *max_protection
= VM_PROT_READ
;
16329 printf("mismatched remap of executable range 0x%llx-0x%llx to 0x%llx, "
16330 "region_start 0x%llx, region_size 0x%llx, cd_entry %sNULL, making non-executable.\n",
16331 page_addr
, page_addr
+ assoc_size
, *address
,
16332 region_start
, region_size
,
16333 region_cd
!= NULL
? "not " : "" // Don't leak kernel slide
16339 vm_map_unlock(target_map
);
16341 if (result
== KERN_SUCCESS
&& target_map
->wiring_required
) {
16342 result
= vm_map_wire_kernel(target_map
, *address
,
16343 *address
+ size
, *cur_protection
, VM_KERN_MEMORY_MLOCK
,
16348 * If requested, return the address of the data pointed to by the
16349 * request, rather than the base of the resulting page.
16351 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
16352 *address
+= offset_in_mapping
;
16359 * Routine: vm_map_remap_range_allocate
16362 * Allocate a range in the specified virtual address map.
16363 * returns the address and the map entry just before the allocated
16366 * Map must be locked.
16369 static kern_return_t
16370 vm_map_remap_range_allocate(
16372 vm_map_address_t
*address
, /* IN/OUT */
16373 vm_map_size_t size
,
16374 vm_map_offset_t mask
,
16376 vm_map_kernel_flags_t vmk_flags
,
16377 __unused vm_tag_t tag
,
16378 vm_map_entry_t
*map_entry
) /* OUT */
16380 vm_map_entry_t entry
;
16381 vm_map_offset_t start
;
16382 vm_map_offset_t end
;
16383 vm_map_offset_t desired_empty_end
;
16385 vm_map_entry_t hole_entry
;
16391 if (flags
& VM_FLAGS_ANYWHERE
) {
16392 if (flags
& VM_FLAGS_RANDOM_ADDR
) {
16394 * Get a random start address.
16396 kr
= vm_map_random_address_for_size(map
, address
, size
);
16397 if (kr
!= KERN_SUCCESS
) {
16404 * Calculate the first possible address.
16407 if (start
< map
->min_offset
) {
16408 start
= map
->min_offset
;
16410 if (start
> map
->max_offset
) {
16411 return KERN_NO_SPACE
;
16415 * Look for the first possible address;
16416 * if there's already something at this
16417 * address, we have to start after it.
16420 if (map
->disable_vmentry_reuse
== TRUE
) {
16421 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
16423 if (map
->holelistenabled
) {
16424 hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
16426 if (hole_entry
== NULL
) {
16428 * No more space in the map?
16430 return KERN_NO_SPACE
;
16432 boolean_t found_hole
= FALSE
;
16435 if (hole_entry
->vme_start
>= start
) {
16436 start
= hole_entry
->vme_start
;
16441 if (hole_entry
->vme_end
> start
) {
16445 hole_entry
= hole_entry
->vme_next
;
16446 } while (hole_entry
!= CAST_TO_VM_MAP_ENTRY(map
->holes_list
));
16448 if (found_hole
== FALSE
) {
16449 return KERN_NO_SPACE
;
16452 entry
= hole_entry
;
16455 assert(first_free_is_valid(map
));
16456 if (start
== map
->min_offset
) {
16457 if ((entry
= map
->first_free
) != vm_map_to_entry(map
)) {
16458 start
= entry
->vme_end
;
16461 vm_map_entry_t tmp_entry
;
16462 if (vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
16463 start
= tmp_entry
->vme_end
;
16468 start
= vm_map_round_page(start
,
16469 VM_MAP_PAGE_MASK(map
));
16473 * In any case, the "entry" always precedes
16474 * the proposed new region throughout the
16479 vm_map_entry_t next
;
16482 * Find the end of the proposed new region.
16483 * Be sure we didn't go beyond the end, or
16484 * wrap around the address.
16487 end
= ((start
+ mask
) & ~mask
);
16488 end
= vm_map_round_page(end
,
16489 VM_MAP_PAGE_MASK(map
));
16491 return KERN_NO_SPACE
;
16496 /* We want an entire page of empty space, but don't increase the allocation size. */
16497 desired_empty_end
= vm_map_round_page(end
, VM_MAP_PAGE_MASK(map
));
16499 if ((desired_empty_end
> map
->max_offset
) || (desired_empty_end
< start
)) {
16500 if (map
->wait_for_space
) {
16501 if (size
<= (map
->max_offset
-
16502 map
->min_offset
)) {
16503 assert_wait((event_t
) map
, THREAD_INTERRUPTIBLE
);
16504 vm_map_unlock(map
);
16505 thread_block(THREAD_CONTINUE_NULL
);
16511 return KERN_NO_SPACE
;
16514 next
= entry
->vme_next
;
16516 if (map
->holelistenabled
) {
16517 if (entry
->vme_end
>= desired_empty_end
) {
16522 * If there are no more entries, we must win.
16526 * If there is another entry, it must be
16527 * after the end of the potential new region.
16530 if (next
== vm_map_to_entry(map
)) {
16534 if (next
->vme_start
>= desired_empty_end
) {
16540 * Didn't fit -- move to the next entry.
16545 if (map
->holelistenabled
) {
16546 if (entry
== CAST_TO_VM_MAP_ENTRY(map
->holes_list
)) {
16550 return KERN_NO_SPACE
;
16552 start
= entry
->vme_start
;
16554 start
= entry
->vme_end
;
16558 if (map
->holelistenabled
) {
16559 if (vm_map_lookup_entry(map
, entry
->vme_start
, &entry
)) {
16560 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry
, (unsigned long long)entry
->vme_start
);
16566 vm_map_entry_t temp_entry
;
16570 * the address doesn't itself violate
16571 * the mask requirement.
16574 if ((start
& mask
) != 0) {
16575 return KERN_NO_SPACE
;
16580 * ... the address is within bounds
16583 end
= start
+ size
;
16585 if ((start
< map
->min_offset
) ||
16586 (end
> map
->max_offset
) ||
16588 return KERN_INVALID_ADDRESS
;
16592 * If we're asked to overwrite whatever was mapped in that
16593 * range, first deallocate that range.
16595 if (flags
& VM_FLAGS_OVERWRITE
) {
16597 int remove_flags
= VM_MAP_REMOVE_SAVE_ENTRIES
| VM_MAP_REMOVE_NO_MAP_ALIGN
;
16600 * We use a "zap_map" to avoid having to unlock
16601 * the "map" in vm_map_delete(), which would compromise
16602 * the atomicity of the "deallocate" and then "remap"
16605 zap_map
= vm_map_create(PMAP_NULL
,
16608 map
->hdr
.entries_pageable
);
16609 if (zap_map
== VM_MAP_NULL
) {
16610 return KERN_RESOURCE_SHORTAGE
;
16612 vm_map_set_page_shift(zap_map
, VM_MAP_PAGE_SHIFT(map
));
16613 vm_map_disable_hole_optimization(zap_map
);
16615 if (vmk_flags
.vmkf_overwrite_immutable
) {
16616 remove_flags
|= VM_MAP_REMOVE_IMMUTABLE
;
16618 kr
= vm_map_delete(map
, start
, end
,
16621 if (kr
== KERN_SUCCESS
) {
16622 vm_map_destroy(zap_map
,
16623 VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
16624 zap_map
= VM_MAP_NULL
;
16629 * ... the starting address isn't allocated
16632 if (vm_map_lookup_entry(map
, start
, &temp_entry
)) {
16633 return KERN_NO_SPACE
;
16636 entry
= temp_entry
;
16639 * ... the next region doesn't overlap the
16643 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
16644 (entry
->vme_next
->vme_start
< end
)) {
16645 return KERN_NO_SPACE
;
16648 *map_entry
= entry
;
16649 return KERN_SUCCESS
;
16655 * Set the address map for the current thread to the specified map
16663 thread_t thread
= current_thread();
16664 vm_map_t oldmap
= thread
->map
;
16666 mp_disable_preemption();
16667 mycpu
= cpu_number();
16670 * Deactivate the current map and activate the requested map
16672 PMAP_SWITCH_USER(thread
, map
, mycpu
);
16674 mp_enable_preemption();
16680 * Routine: vm_map_write_user
16683 * Copy out data from a kernel space into space in the
16684 * destination map. The space must already exist in the
16686 * NOTE: This routine should only be called by threads
16687 * which can block on a page fault. i.e. kernel mode user
16695 vm_map_address_t dst_addr
,
16698 kern_return_t kr
= KERN_SUCCESS
;
16700 if (current_map() == map
) {
16701 if (copyout(src_p
, dst_addr
, size
)) {
16702 kr
= KERN_INVALID_ADDRESS
;
16707 /* take on the identity of the target map while doing */
16710 vm_map_reference(map
);
16711 oldmap
= vm_map_switch(map
);
16712 if (copyout(src_p
, dst_addr
, size
)) {
16713 kr
= KERN_INVALID_ADDRESS
;
16715 vm_map_switch(oldmap
);
16716 vm_map_deallocate(map
);
16722 * Routine: vm_map_read_user
16725 * Copy in data from a user space source map into the
16726 * kernel map. The space must already exist in the
16728 * NOTE: This routine should only be called by threads
16729 * which can block on a page fault. i.e. kernel mode user
16736 vm_map_address_t src_addr
,
16740 kern_return_t kr
= KERN_SUCCESS
;
16742 if (current_map() == map
) {
16743 if (copyin(src_addr
, dst_p
, size
)) {
16744 kr
= KERN_INVALID_ADDRESS
;
16749 /* take on the identity of the target map while doing */
16752 vm_map_reference(map
);
16753 oldmap
= vm_map_switch(map
);
16754 if (copyin(src_addr
, dst_p
, size
)) {
16755 kr
= KERN_INVALID_ADDRESS
;
16757 vm_map_switch(oldmap
);
16758 vm_map_deallocate(map
);
16765 * vm_map_check_protection:
16767 * Assert that the target map allows the specified
16768 * privilege on the entire address region given.
16769 * The entire region must be allocated.
16772 vm_map_check_protection(vm_map_t map
, vm_map_offset_t start
,
16773 vm_map_offset_t end
, vm_prot_t protection
)
16775 vm_map_entry_t entry
;
16776 vm_map_entry_t tmp_entry
;
16780 if (start
< vm_map_min(map
) || end
> vm_map_max(map
) || start
> end
) {
16781 vm_map_unlock(map
);
16785 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
16786 vm_map_unlock(map
);
16792 while (start
< end
) {
16793 if (entry
== vm_map_to_entry(map
)) {
16794 vm_map_unlock(map
);
16799 * No holes allowed!
16802 if (start
< entry
->vme_start
) {
16803 vm_map_unlock(map
);
16808 * Check protection associated with entry.
16811 if ((entry
->protection
& protection
) != protection
) {
16812 vm_map_unlock(map
);
16816 /* go to next entry */
16818 start
= entry
->vme_end
;
16819 entry
= entry
->vme_next
;
16821 vm_map_unlock(map
);
16826 vm_map_purgable_control(
16828 vm_map_offset_t address
,
16829 vm_purgable_t control
,
16832 vm_map_entry_t entry
;
16833 vm_object_t object
;
16835 boolean_t was_nonvolatile
;
16838 * Vet all the input parameters and current type and state of the
16839 * underlaying object. Return with an error if anything is amiss.
16841 if (map
== VM_MAP_NULL
) {
16842 return KERN_INVALID_ARGUMENT
;
16845 if (control
!= VM_PURGABLE_SET_STATE
&&
16846 control
!= VM_PURGABLE_GET_STATE
&&
16847 control
!= VM_PURGABLE_PURGE_ALL
&&
16848 control
!= VM_PURGABLE_SET_STATE_FROM_KERNEL
) {
16849 return KERN_INVALID_ARGUMENT
;
16852 if (control
== VM_PURGABLE_PURGE_ALL
) {
16853 vm_purgeable_object_purge_all();
16854 return KERN_SUCCESS
;
16857 if ((control
== VM_PURGABLE_SET_STATE
||
16858 control
== VM_PURGABLE_SET_STATE_FROM_KERNEL
) &&
16859 (((*state
& ~(VM_PURGABLE_ALL_MASKS
)) != 0) ||
16860 ((*state
& VM_PURGABLE_STATE_MASK
) > VM_PURGABLE_STATE_MASK
))) {
16861 return KERN_INVALID_ARGUMENT
;
16864 vm_map_lock_read(map
);
16866 if (!vm_map_lookup_entry(map
, address
, &entry
) || entry
->is_sub_map
) {
16868 * Must pass a valid non-submap address.
16870 vm_map_unlock_read(map
);
16871 return KERN_INVALID_ADDRESS
;
16874 if ((entry
->protection
& VM_PROT_WRITE
) == 0) {
16876 * Can't apply purgable controls to something you can't write.
16878 vm_map_unlock_read(map
);
16879 return KERN_PROTECTION_FAILURE
;
16882 object
= VME_OBJECT(entry
);
16883 if (object
== VM_OBJECT_NULL
||
16884 object
->purgable
== VM_PURGABLE_DENY
) {
16886 * Object must already be present and be purgeable.
16888 vm_map_unlock_read(map
);
16889 return KERN_INVALID_ARGUMENT
;
16892 vm_object_lock(object
);
16895 if (VME_OFFSET(entry
) != 0 ||
16896 entry
->vme_end
- entry
->vme_start
!= object
->vo_size
) {
16898 * Can only apply purgable controls to the whole (existing)
16901 vm_map_unlock_read(map
);
16902 vm_object_unlock(object
);
16903 return KERN_INVALID_ARGUMENT
;
16907 assert(!entry
->is_sub_map
);
16908 assert(!entry
->use_pmap
); /* purgeable has its own accounting */
16910 vm_map_unlock_read(map
);
16912 was_nonvolatile
= (object
->purgable
== VM_PURGABLE_NONVOLATILE
);
16914 kr
= vm_object_purgable_control(object
, control
, state
);
16916 if (was_nonvolatile
&&
16917 object
->purgable
!= VM_PURGABLE_NONVOLATILE
&&
16918 map
->pmap
== kernel_pmap
) {
16920 object
->vo_purgeable_volatilizer
= kernel_task
;
16924 vm_object_unlock(object
);
16930 vm_map_page_query_internal(
16931 vm_map_t target_map
,
16932 vm_map_offset_t offset
,
16937 vm_page_info_basic_data_t info
;
16938 mach_msg_type_number_t count
;
16940 count
= VM_PAGE_INFO_BASIC_COUNT
;
16941 kr
= vm_map_page_info(target_map
,
16943 VM_PAGE_INFO_BASIC
,
16944 (vm_page_info_t
) &info
,
16946 if (kr
== KERN_SUCCESS
) {
16947 *disposition
= info
.disposition
;
16948 *ref_count
= info
.ref_count
;
16960 vm_map_offset_t offset
,
16961 vm_page_info_flavor_t flavor
,
16962 vm_page_info_t info
,
16963 mach_msg_type_number_t
*count
)
16965 return vm_map_page_range_info_internal(map
,
16966 offset
, /* start of range */
16967 (offset
+ 1), /* this will get rounded in the call to the page boundary */
16974 vm_map_page_range_info_internal(
16976 vm_map_offset_t start_offset
,
16977 vm_map_offset_t end_offset
,
16978 vm_page_info_flavor_t flavor
,
16979 vm_page_info_t info
,
16980 mach_msg_type_number_t
*count
)
16982 vm_map_entry_t map_entry
= VM_MAP_ENTRY_NULL
;
16983 vm_object_t object
= VM_OBJECT_NULL
, curr_object
= VM_OBJECT_NULL
;
16984 vm_page_t m
= VM_PAGE_NULL
;
16985 kern_return_t retval
= KERN_SUCCESS
;
16986 int disposition
= 0;
16988 int depth
= 0, info_idx
= 0;
16989 vm_page_info_basic_t basic_info
= 0;
16990 vm_map_offset_t offset_in_page
= 0, offset_in_object
= 0, curr_offset_in_object
= 0;
16991 vm_map_offset_t start
= 0, end
= 0, curr_s_offset
= 0, curr_e_offset
= 0;
16992 boolean_t do_region_footprint
;
16993 ledger_amount_t ledger_resident
, ledger_compressed
;
16996 case VM_PAGE_INFO_BASIC
:
16997 if (*count
!= VM_PAGE_INFO_BASIC_COUNT
) {
16999 * The "vm_page_info_basic_data" structure was not
17000 * properly padded, so allow the size to be off by
17001 * one to maintain backwards binary compatibility...
17003 if (*count
!= VM_PAGE_INFO_BASIC_COUNT
- 1) {
17004 return KERN_INVALID_ARGUMENT
;
17009 return KERN_INVALID_ARGUMENT
;
17012 do_region_footprint
= task_self_region_footprint();
17016 info_idx
= 0; /* Tracks the next index within the info structure to be filled.*/
17017 retval
= KERN_SUCCESS
;
17019 offset_in_page
= start_offset
& PAGE_MASK
;
17020 start
= vm_map_trunc_page(start_offset
, PAGE_MASK
);
17021 end
= vm_map_round_page(end_offset
, PAGE_MASK
);
17024 return KERN_INVALID_ARGUMENT
;
17027 assert((end
- start
) <= MAX_PAGE_RANGE_QUERY
);
17029 vm_map_lock_read(map
);
17031 task_ledgers_footprint(map
->pmap
->ledger
, &ledger_resident
, &ledger_compressed
);
17033 for (curr_s_offset
= start
; curr_s_offset
< end
;) {
17035 * New lookup needs reset of these variables.
17037 curr_object
= object
= VM_OBJECT_NULL
;
17038 offset_in_object
= 0;
17042 if (do_region_footprint
&&
17043 curr_s_offset
>= vm_map_last_entry(map
)->vme_end
) {
17045 * Request for "footprint" info about a page beyond
17046 * the end of address space: this must be for
17047 * the fake region vm_map_region_recurse_64()
17048 * reported to account for non-volatile purgeable
17049 * memory owned by this task.
17053 if (curr_s_offset
- vm_map_last_entry(map
)->vme_end
<=
17054 (unsigned) ledger_compressed
) {
17056 * We haven't reported all the "non-volatile
17057 * compressed" pages yet, so report this fake
17058 * page as "compressed".
17060 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
17063 * We've reported all the non-volatile
17064 * compressed page but not all the non-volatile
17065 * pages , so report this fake page as
17066 * "resident dirty".
17068 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
17069 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
17070 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
17073 case VM_PAGE_INFO_BASIC
:
17074 basic_info
= (vm_page_info_basic_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
17075 basic_info
->disposition
= disposition
;
17076 basic_info
->ref_count
= 1;
17077 basic_info
->object_id
= INFO_MAKE_FAKE_OBJECT_ID(map
, task_ledgers
.purgeable_nonvolatile
);
17078 basic_info
->offset
= 0;
17079 basic_info
->depth
= 0;
17084 curr_s_offset
+= PAGE_SIZE
;
17089 * First, find the map entry covering "curr_s_offset", going down
17090 * submaps if necessary.
17092 if (!vm_map_lookup_entry(map
, curr_s_offset
, &map_entry
)) {
17093 /* no entry -> no object -> no page */
17095 if (curr_s_offset
< vm_map_min(map
)) {
17097 * Illegal address that falls below map min.
17099 curr_e_offset
= MIN(end
, vm_map_min(map
));
17100 } else if (curr_s_offset
>= vm_map_max(map
)) {
17102 * Illegal address that falls on/after map max.
17104 curr_e_offset
= end
;
17105 } else if (map_entry
== vm_map_to_entry(map
)) {
17109 if (map_entry
->vme_next
== vm_map_to_entry(map
)) {
17113 curr_e_offset
= MIN(map
->max_offset
, end
);
17116 * Hole at start of the map.
17118 curr_e_offset
= MIN(map_entry
->vme_next
->vme_start
, end
);
17121 if (map_entry
->vme_next
== vm_map_to_entry(map
)) {
17123 * Hole at the end of the map.
17125 curr_e_offset
= MIN(map
->max_offset
, end
);
17127 curr_e_offset
= MIN(map_entry
->vme_next
->vme_start
, end
);
17131 assert(curr_e_offset
>= curr_s_offset
);
17133 uint64_t num_pages
= (curr_e_offset
- curr_s_offset
) >> PAGE_SHIFT
;
17135 void *info_ptr
= (void*) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
17137 bzero(info_ptr
, num_pages
* sizeof(struct vm_page_info_basic
));
17139 curr_s_offset
= curr_e_offset
;
17141 info_idx
+= num_pages
;
17146 /* compute offset from this map entry's start */
17147 offset_in_object
= curr_s_offset
- map_entry
->vme_start
;
17149 /* compute offset into this map entry's object (or submap) */
17150 offset_in_object
+= VME_OFFSET(map_entry
);
17152 if (map_entry
->is_sub_map
) {
17153 vm_map_t sub_map
= VM_MAP_NULL
;
17154 vm_page_info_t submap_info
= 0;
17155 vm_map_offset_t submap_s_offset
= 0, submap_e_offset
= 0, range_len
= 0;
17157 range_len
= MIN(map_entry
->vme_end
, end
) - curr_s_offset
;
17159 submap_s_offset
= offset_in_object
;
17160 submap_e_offset
= submap_s_offset
+ range_len
;
17162 sub_map
= VME_SUBMAP(map_entry
);
17164 vm_map_reference(sub_map
);
17165 vm_map_unlock_read(map
);
17167 submap_info
= (vm_page_info_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
17169 retval
= vm_map_page_range_info_internal(sub_map
,
17172 VM_PAGE_INFO_BASIC
,
17173 (vm_page_info_t
) submap_info
,
17176 assert(retval
== KERN_SUCCESS
);
17178 vm_map_lock_read(map
);
17179 vm_map_deallocate(sub_map
);
17181 /* Move the "info" index by the number of pages we inspected.*/
17182 info_idx
+= range_len
>> PAGE_SHIFT
;
17184 /* Move our current offset by the size of the range we inspected.*/
17185 curr_s_offset
+= range_len
;
17190 object
= VME_OBJECT(map_entry
);
17191 if (object
== VM_OBJECT_NULL
) {
17193 * We don't have an object here and, hence,
17194 * no pages to inspect. We'll fill up the
17195 * info structure appropriately.
17198 curr_e_offset
= MIN(map_entry
->vme_end
, end
);
17200 uint64_t num_pages
= (curr_e_offset
- curr_s_offset
) >> PAGE_SHIFT
;
17202 void *info_ptr
= (void*) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
17204 bzero(info_ptr
, num_pages
* sizeof(struct vm_page_info_basic
));
17206 curr_s_offset
= curr_e_offset
;
17208 info_idx
+= num_pages
;
17213 if (do_region_footprint
) {
17218 if (map
->has_corpse_footprint
) {
17220 * Query the page info data we saved
17221 * while forking the corpse.
17223 vm_map_corpse_footprint_query_page_info(
17231 pmap_query_page_info(map
->pmap
,
17235 if (object
->purgable
== VM_PURGABLE_NONVOLATILE
&&
17236 /* && not tagged as no-footprint? */
17237 VM_OBJECT_OWNER(object
) != NULL
&&
17238 VM_OBJECT_OWNER(object
)->map
== map
) {
17239 if ((((curr_s_offset
17240 - map_entry
->vme_start
17241 + VME_OFFSET(map_entry
))
17243 (object
->resident_page_count
+
17244 vm_compressor_pager_get_count(object
->pager
)))) {
17246 * Non-volatile purgeable object owned
17247 * by this task: report the first
17248 * "#resident + #compressed" pages as
17249 * "resident" (to show that they
17250 * contribute to the footprint) but not
17251 * "dirty" (to avoid double-counting
17252 * with the fake "non-volatile" region
17253 * we'll report at the end of the
17254 * address space to account for all
17255 * (mapped or not) non-volatile memory
17256 * owned by this task.
17258 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
17260 } else if ((object
->purgable
== VM_PURGABLE_VOLATILE
||
17261 object
->purgable
== VM_PURGABLE_EMPTY
) &&
17262 /* && not tagged as no-footprint? */
17263 VM_OBJECT_OWNER(object
) != NULL
&&
17264 VM_OBJECT_OWNER(object
)->map
== map
) {
17265 if ((((curr_s_offset
17266 - map_entry
->vme_start
17267 + VME_OFFSET(map_entry
))
17269 object
->wired_page_count
)) {
17271 * Volatile|empty purgeable object owned
17272 * by this task: report the first
17273 * "#wired" pages as "resident" (to
17274 * show that they contribute to the
17275 * footprint) but not "dirty" (to avoid
17276 * double-counting with the fake
17277 * "non-volatile" region we'll report
17278 * at the end of the address space to
17279 * account for all (mapped or not)
17280 * non-volatile memory owned by this
17283 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
17285 } else if (map_entry
->iokit_acct
&&
17286 object
->internal
&&
17287 object
->purgable
== VM_PURGABLE_DENY
) {
17289 * Non-purgeable IOKit memory: phys_footprint
17290 * includes the entire virtual mapping.
17292 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
17293 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
17294 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
17295 } else if (pmap_disp
& (PMAP_QUERY_PAGE_ALTACCT
|
17296 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT
)) {
17297 /* alternate accounting */
17298 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
17299 if (map
->pmap
->footprint_was_suspended
||
17301 * XXX corpse does not know if original
17302 * pmap had its footprint suspended...
17304 map
->has_corpse_footprint
) {
17306 * The assertion below can fail if dyld
17307 * suspended footprint accounting
17308 * while doing some adjustments to
17309 * this page; the mapping would say
17310 * "use pmap accounting" but the page
17311 * would be marked "alternate
17315 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
17316 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
17319 if (pmap_disp
& PMAP_QUERY_PAGE_PRESENT
) {
17320 assertf(map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
17321 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
17322 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
17323 if (pmap_disp
& PMAP_QUERY_PAGE_INTERNAL
) {
17324 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
17326 disposition
|= VM_PAGE_QUERY_PAGE_EXTERNAL
;
17328 if (pmap_disp
& PMAP_QUERY_PAGE_REUSABLE
) {
17329 disposition
|= VM_PAGE_QUERY_PAGE_REUSABLE
;
17331 } else if (pmap_disp
& PMAP_QUERY_PAGE_COMPRESSED
) {
17332 assertf(map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
17333 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
17337 case VM_PAGE_INFO_BASIC
:
17338 basic_info
= (vm_page_info_basic_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
17339 basic_info
->disposition
= disposition
;
17340 basic_info
->ref_count
= 1;
17341 basic_info
->object_id
= INFO_MAKE_FAKE_OBJECT_ID(map
, task_ledgers
.purgeable_nonvolatile
);
17342 basic_info
->offset
= 0;
17343 basic_info
->depth
= 0;
17348 curr_s_offset
+= PAGE_SIZE
;
17352 vm_object_reference(object
);
17354 * Shared mode -- so we can allow other readers
17355 * to grab the lock too.
17357 vm_object_lock_shared(object
);
17359 curr_e_offset
= MIN(map_entry
->vme_end
, end
);
17361 vm_map_unlock_read(map
);
17363 map_entry
= NULL
; /* map is unlocked, the entry is no longer valid. */
17365 curr_object
= object
;
17367 for (; curr_s_offset
< curr_e_offset
;) {
17368 if (object
== curr_object
) {
17369 ref_count
= curr_object
->ref_count
- 1; /* account for our object reference above. */
17371 ref_count
= curr_object
->ref_count
;
17374 curr_offset_in_object
= offset_in_object
;
17377 m
= vm_page_lookup(curr_object
, curr_offset_in_object
);
17379 if (m
!= VM_PAGE_NULL
) {
17380 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
17383 if (curr_object
->internal
&&
17384 curr_object
->alive
&&
17385 !curr_object
->terminating
&&
17386 curr_object
->pager_ready
) {
17387 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object
, curr_offset_in_object
)
17388 == VM_EXTERNAL_STATE_EXISTS
) {
17389 /* the pager has that page */
17390 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
17396 * Go down the VM object shadow chain until we find the page
17397 * we're looking for.
17400 if (curr_object
->shadow
!= VM_OBJECT_NULL
) {
17401 vm_object_t shadow
= VM_OBJECT_NULL
;
17403 curr_offset_in_object
+= curr_object
->vo_shadow_offset
;
17404 shadow
= curr_object
->shadow
;
17406 vm_object_lock_shared(shadow
);
17407 vm_object_unlock(curr_object
);
17409 curr_object
= shadow
;
17418 /* The ref_count is not strictly accurate, it measures the number */
17419 /* of entities holding a ref on the object, they may not be mapping */
17420 /* the object or may not be mapping the section holding the */
17421 /* target page but its still a ball park number and though an over- */
17422 /* count, it picks up the copy-on-write cases */
17424 /* We could also get a picture of page sharing from pmap_attributes */
17425 /* but this would under count as only faulted-in mappings would */
17428 if ((curr_object
== object
) && curr_object
->shadow
) {
17429 disposition
|= VM_PAGE_QUERY_PAGE_COPIED
;
17432 if (!curr_object
->internal
) {
17433 disposition
|= VM_PAGE_QUERY_PAGE_EXTERNAL
;
17436 if (m
!= VM_PAGE_NULL
) {
17437 if (m
->vmp_fictitious
) {
17438 disposition
|= VM_PAGE_QUERY_PAGE_FICTITIOUS
;
17440 if (m
->vmp_dirty
|| pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m
))) {
17441 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
17444 if (m
->vmp_reference
|| pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m
))) {
17445 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
17448 if (m
->vmp_q_state
== VM_PAGE_ON_SPECULATIVE_Q
) {
17449 disposition
|= VM_PAGE_QUERY_PAGE_SPECULATIVE
;
17452 if (m
->vmp_cs_validated
) {
17453 disposition
|= VM_PAGE_QUERY_PAGE_CS_VALIDATED
;
17455 if (m
->vmp_cs_tainted
) {
17456 disposition
|= VM_PAGE_QUERY_PAGE_CS_TAINTED
;
17458 if (m
->vmp_cs_nx
) {
17459 disposition
|= VM_PAGE_QUERY_PAGE_CS_NX
;
17461 if (m
->vmp_reusable
|| curr_object
->all_reusable
) {
17462 disposition
|= VM_PAGE_QUERY_PAGE_REUSABLE
;
17468 case VM_PAGE_INFO_BASIC
:
17469 basic_info
= (vm_page_info_basic_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
17470 basic_info
->disposition
= disposition
;
17471 basic_info
->ref_count
= ref_count
;
17472 basic_info
->object_id
= (vm_object_id_t
) (uintptr_t)
17473 VM_KERNEL_ADDRPERM(curr_object
);
17474 basic_info
->offset
=
17475 (memory_object_offset_t
) curr_offset_in_object
+ offset_in_page
;
17476 basic_info
->depth
= depth
;
17483 offset_in_page
= 0; // This doesn't really make sense for any offset other than the starting offset.
17486 * Move to next offset in the range and in our object.
17488 curr_s_offset
+= PAGE_SIZE
;
17489 offset_in_object
+= PAGE_SIZE
;
17490 curr_offset_in_object
= offset_in_object
;
17492 if (curr_object
!= object
) {
17493 vm_object_unlock(curr_object
);
17495 curr_object
= object
;
17497 vm_object_lock_shared(curr_object
);
17499 vm_object_lock_yield_shared(curr_object
);
17503 vm_object_unlock(curr_object
);
17504 vm_object_deallocate(curr_object
);
17506 vm_map_lock_read(map
);
17509 vm_map_unlock_read(map
);
17516 * Synchronises the memory range specified with its backing store
17517 * image by either flushing or cleaning the contents to the appropriate
17518 * memory manager engaging in a memory object synchronize dialog with
17519 * the manager. The client doesn't return until the manager issues
17520 * m_o_s_completed message. MIG Magically converts user task parameter
17521 * to the task's address map.
17523 * interpretation of sync_flags
17524 * VM_SYNC_INVALIDATE - discard pages, only return precious
17525 * pages to manager.
17527 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
17528 * - discard pages, write dirty or precious
17529 * pages back to memory manager.
17531 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
17532 * - write dirty or precious pages back to
17533 * the memory manager.
17535 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
17536 * is a hole in the region, and we would
17537 * have returned KERN_SUCCESS, return
17538 * KERN_INVALID_ADDRESS instead.
17541 * The memory object attributes have not yet been implemented, this
17542 * function will have to deal with the invalidate attribute
17545 * KERN_INVALID_TASK Bad task parameter
17546 * KERN_INVALID_ARGUMENT both sync and async were specified.
17547 * KERN_SUCCESS The usual.
17548 * KERN_INVALID_ADDRESS There was a hole in the region.
17554 vm_map_address_t address
,
17555 vm_map_size_t size
,
17556 vm_sync_t sync_flags
)
17558 vm_map_entry_t entry
;
17559 vm_map_size_t amount_left
;
17560 vm_object_offset_t offset
;
17561 boolean_t do_sync_req
;
17562 boolean_t had_hole
= FALSE
;
17563 vm_map_offset_t pmap_offset
;
17565 if ((sync_flags
& VM_SYNC_ASYNCHRONOUS
) &&
17566 (sync_flags
& VM_SYNC_SYNCHRONOUS
)) {
17567 return KERN_INVALID_ARGUMENT
;
17571 * align address and size on page boundaries
17573 size
= (vm_map_round_page(address
+ size
,
17574 VM_MAP_PAGE_MASK(map
)) -
17575 vm_map_trunc_page(address
,
17576 VM_MAP_PAGE_MASK(map
)));
17577 address
= vm_map_trunc_page(address
,
17578 VM_MAP_PAGE_MASK(map
));
17580 if (map
== VM_MAP_NULL
) {
17581 return KERN_INVALID_TASK
;
17585 return KERN_SUCCESS
;
17588 amount_left
= size
;
17590 while (amount_left
> 0) {
17591 vm_object_size_t flush_size
;
17592 vm_object_t object
;
17595 if (!vm_map_lookup_entry(map
,
17598 vm_map_size_t skip
;
17601 * hole in the address map.
17605 if (sync_flags
& VM_SYNC_KILLPAGES
) {
17607 * For VM_SYNC_KILLPAGES, there should be
17608 * no holes in the range, since we couldn't
17609 * prevent someone else from allocating in
17610 * that hole and we wouldn't want to "kill"
17613 vm_map_unlock(map
);
17618 * Check for empty map.
17620 if (entry
== vm_map_to_entry(map
) &&
17621 entry
->vme_next
== entry
) {
17622 vm_map_unlock(map
);
17626 * Check that we don't wrap and that
17627 * we have at least one real map entry.
17629 if ((map
->hdr
.nentries
== 0) ||
17630 (entry
->vme_next
->vme_start
< address
)) {
17631 vm_map_unlock(map
);
17635 * Move up to the next entry if needed
17637 skip
= (entry
->vme_next
->vme_start
- address
);
17638 if (skip
>= amount_left
) {
17641 amount_left
-= skip
;
17643 address
= entry
->vme_next
->vme_start
;
17644 vm_map_unlock(map
);
17648 offset
= address
- entry
->vme_start
;
17649 pmap_offset
= address
;
17652 * do we have more to flush than is contained in this
17655 if (amount_left
+ entry
->vme_start
+ offset
> entry
->vme_end
) {
17656 flush_size
= entry
->vme_end
-
17657 (entry
->vme_start
+ offset
);
17659 flush_size
= amount_left
;
17661 amount_left
-= flush_size
;
17662 address
+= flush_size
;
17664 if (entry
->is_sub_map
== TRUE
) {
17665 vm_map_t local_map
;
17666 vm_map_offset_t local_offset
;
17668 local_map
= VME_SUBMAP(entry
);
17669 local_offset
= VME_OFFSET(entry
);
17670 vm_map_reference(local_map
);
17671 vm_map_unlock(map
);
17676 sync_flags
) == KERN_INVALID_ADDRESS
) {
17679 vm_map_deallocate(local_map
);
17682 object
= VME_OBJECT(entry
);
17685 * We can't sync this object if the object has not been
17688 if (object
== VM_OBJECT_NULL
) {
17689 vm_map_unlock(map
);
17692 offset
+= VME_OFFSET(entry
);
17694 vm_object_lock(object
);
17696 if (sync_flags
& (VM_SYNC_KILLPAGES
| VM_SYNC_DEACTIVATE
)) {
17697 int kill_pages
= 0;
17698 boolean_t reusable_pages
= FALSE
;
17700 if (sync_flags
& VM_SYNC_KILLPAGES
) {
17701 if (((object
->ref_count
== 1) ||
17702 ((object
->copy_strategy
!=
17703 MEMORY_OBJECT_COPY_SYMMETRIC
) &&
17704 (object
->copy
== VM_OBJECT_NULL
))) &&
17705 (object
->shadow
== VM_OBJECT_NULL
)) {
17706 if (object
->ref_count
!= 1) {
17707 vm_page_stats_reusable
.free_shared
++;
17714 if (kill_pages
!= -1) {
17715 vm_object_deactivate_pages(
17718 (vm_object_size_t
) flush_size
,
17724 vm_object_unlock(object
);
17725 vm_map_unlock(map
);
17729 * We can't sync this object if there isn't a pager.
17730 * Don't bother to sync internal objects, since there can't
17731 * be any "permanent" storage for these objects anyway.
17733 if ((object
->pager
== MEMORY_OBJECT_NULL
) ||
17734 (object
->internal
) || (object
->private)) {
17735 vm_object_unlock(object
);
17736 vm_map_unlock(map
);
17740 * keep reference on the object until syncing is done
17742 vm_object_reference_locked(object
);
17743 vm_object_unlock(object
);
17745 vm_map_unlock(map
);
17747 do_sync_req
= vm_object_sync(object
,
17750 sync_flags
& VM_SYNC_INVALIDATE
,
17751 ((sync_flags
& VM_SYNC_SYNCHRONOUS
) ||
17752 (sync_flags
& VM_SYNC_ASYNCHRONOUS
)),
17753 sync_flags
& VM_SYNC_SYNCHRONOUS
);
17755 if ((sync_flags
& VM_SYNC_INVALIDATE
) && object
->resident_page_count
== 0) {
17757 * clear out the clustering and read-ahead hints
17759 vm_object_lock(object
);
17761 object
->pages_created
= 0;
17762 object
->pages_used
= 0;
17763 object
->sequential
= 0;
17764 object
->last_alloc
= 0;
17766 vm_object_unlock(object
);
17768 vm_object_deallocate(object
);
17771 /* for proper msync() behaviour */
17772 if (had_hole
== TRUE
&& (sync_flags
& VM_SYNC_CONTIGUOUS
)) {
17773 return KERN_INVALID_ADDRESS
;
17776 return KERN_SUCCESS
;
17780 * Routine: convert_port_entry_to_map
17782 * Convert from a port specifying an entry or a task
17783 * to a map. Doesn't consume the port ref; produces a map ref,
17784 * which may be null. Unlike convert_port_to_map, the
17785 * port may be task or a named entry backed.
17792 convert_port_entry_to_map(
17796 vm_named_entry_t named_entry
;
17797 uint32_t try_failed_count
= 0;
17799 if (IP_VALID(port
) && (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
17802 if (ip_active(port
) && (ip_kotype(port
)
17803 == IKOT_NAMED_ENTRY
)) {
17805 (vm_named_entry_t
)port
->ip_kobject
;
17806 if (!(lck_mtx_try_lock(&(named_entry
)->Lock
))) {
17809 try_failed_count
++;
17810 mutex_pause(try_failed_count
);
17813 named_entry
->ref_count
++;
17814 lck_mtx_unlock(&(named_entry
)->Lock
);
17816 if ((named_entry
->is_sub_map
) &&
17817 (named_entry
->protection
17818 & VM_PROT_WRITE
)) {
17819 map
= named_entry
->backing
.map
;
17821 mach_destroy_memory_entry(port
);
17822 return VM_MAP_NULL
;
17824 vm_map_reference_swap(map
);
17825 mach_destroy_memory_entry(port
);
17828 return VM_MAP_NULL
;
17832 map
= convert_port_to_map(port
);
17839 * Routine: convert_port_entry_to_object
17841 * Convert from a port specifying a named entry to an
17842 * object. Doesn't consume the port ref; produces a map ref,
17843 * which may be null.
17850 convert_port_entry_to_object(
17853 vm_object_t object
= VM_OBJECT_NULL
;
17854 vm_named_entry_t named_entry
;
17855 uint32_t try_failed_count
= 0;
17857 if (IP_VALID(port
) &&
17858 (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
17861 if (ip_active(port
) &&
17862 (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
17863 named_entry
= (vm_named_entry_t
)port
->ip_kobject
;
17864 if (!(lck_mtx_try_lock(&(named_entry
)->Lock
))) {
17866 try_failed_count
++;
17867 mutex_pause(try_failed_count
);
17870 named_entry
->ref_count
++;
17871 lck_mtx_unlock(&(named_entry
)->Lock
);
17873 if (!(named_entry
->is_sub_map
) &&
17874 !(named_entry
->is_copy
) &&
17875 (named_entry
->protection
& VM_PROT_WRITE
)) {
17876 object
= named_entry
->backing
.object
;
17877 vm_object_reference(object
);
17879 mach_destroy_memory_entry(port
);
17887 * Export routines to other components for the things we access locally through
17894 return current_map_fast();
17898 * vm_map_reference:
17900 * Most code internal to the osfmk will go through a
17901 * macro defining this. This is always here for the
17902 * use of other kernel components.
17904 #undef vm_map_reference
17909 if (map
== VM_MAP_NULL
) {
17913 lck_mtx_lock(&map
->s_lock
);
17915 assert(map
->res_count
> 0);
17916 assert(os_ref_get_count(&map
->map_refcnt
) >= map
->res_count
);
17919 os_ref_retain_locked(&map
->map_refcnt
);
17920 lck_mtx_unlock(&map
->s_lock
);
17924 * vm_map_deallocate:
17926 * Removes a reference from the specified map,
17927 * destroying it if no references remain.
17928 * The map should not be locked.
17936 if (map
== VM_MAP_NULL
) {
17940 lck_mtx_lock(&map
->s_lock
);
17941 ref
= os_ref_release_locked(&map
->map_refcnt
);
17943 vm_map_res_deallocate(map
);
17944 lck_mtx_unlock(&map
->s_lock
);
17947 assert(os_ref_get_count(&map
->map_refcnt
) == 0);
17948 lck_mtx_unlock(&map
->s_lock
);
17952 * The map residence count isn't decremented here because
17953 * the vm_map_delete below will traverse the entire map,
17954 * deleting entries, and the residence counts on objects
17955 * and sharing maps will go away then.
17959 vm_map_destroy(map
, VM_MAP_REMOVE_NO_FLAGS
);
17964 vm_map_disable_NX(vm_map_t map
)
17969 if (map
->pmap
== NULL
) {
17973 pmap_disable_NX(map
->pmap
);
17977 vm_map_disallow_data_exec(vm_map_t map
)
17983 map
->map_disallow_data_exec
= TRUE
;
17986 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
17987 * more descriptive.
17990 vm_map_set_32bit(vm_map_t map
)
17992 #if defined(__arm__) || defined(__arm64__)
17993 map
->max_offset
= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_DEVICE
);
17995 map
->max_offset
= (vm_map_offset_t
)VM_MAX_ADDRESS
;
18001 vm_map_set_64bit(vm_map_t map
)
18003 #if defined(__arm__) || defined(__arm64__)
18004 map
->max_offset
= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_DEVICE
);
18006 map
->max_offset
= (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
;
18011 * Expand the maximum size of an existing map to the maximum supported.
18014 vm_map_set_jumbo(vm_map_t map
)
18016 #if defined (__arm64__)
18017 vm_map_set_max_addr(map
, ~0);
18024 * This map has a JIT entitlement
18027 vm_map_set_jit_entitled(vm_map_t map
)
18029 #if defined (__arm64__)
18030 pmap_set_jit_entitled(map
->pmap
);
18037 * Expand the maximum size of an existing map.
18040 vm_map_set_max_addr(vm_map_t map
, vm_map_offset_t new_max_offset
)
18042 #if defined(__arm64__)
18043 vm_map_offset_t max_supported_offset
= 0;
18044 vm_map_offset_t old_max_offset
= map
->max_offset
;
18045 max_supported_offset
= pmap_max_offset(vm_map_is_64bit(map
), ARM_PMAP_MAX_OFFSET_JUMBO
);
18047 new_max_offset
= trunc_page(new_max_offset
);
18049 /* The address space cannot be shrunk using this routine. */
18050 if (old_max_offset
>= new_max_offset
) {
18054 if (max_supported_offset
< new_max_offset
) {
18055 new_max_offset
= max_supported_offset
;
18058 map
->max_offset
= new_max_offset
;
18060 if (map
->holes_list
->prev
->vme_end
== old_max_offset
) {
18062 * There is already a hole at the end of the map; simply make it bigger.
18064 map
->holes_list
->prev
->vme_end
= map
->max_offset
;
18067 * There is no hole at the end, so we need to create a new hole
18068 * for the new empty space we're creating.
18070 struct vm_map_links
*new_hole
= zalloc(vm_map_holes_zone
);
18071 new_hole
->start
= old_max_offset
;
18072 new_hole
->end
= map
->max_offset
;
18073 new_hole
->prev
= map
->holes_list
->prev
;
18074 new_hole
->next
= (struct vm_map_entry
*)map
->holes_list
;
18075 map
->holes_list
->prev
->links
.next
= (struct vm_map_entry
*)new_hole
;
18076 map
->holes_list
->prev
= (struct vm_map_entry
*)new_hole
;
18080 (void)new_max_offset
;
18085 vm_compute_max_offset(boolean_t is64
)
18087 #if defined(__arm__) || defined(__arm64__)
18088 return pmap_max_offset(is64
, ARM_PMAP_MAX_OFFSET_DEVICE
);
18090 return is64
? (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
: (vm_map_offset_t
)VM_MAX_ADDRESS
;
18095 vm_map_get_max_aslr_slide_section(
18096 vm_map_t map __unused
,
18097 int64_t *max_sections
,
18098 int64_t *section_size
)
18100 #if defined(__arm64__)
18102 *section_size
= ARM_TT_TWIG_SIZE
;
18110 vm_map_get_max_aslr_slide_pages(vm_map_t map
)
18112 #if defined(__arm64__)
18113 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
18114 * limited embedded address space; this is also meant to minimize pmap
18115 * memory usage on 16KB page systems.
18117 return 1 << (24 - VM_MAP_PAGE_SHIFT(map
));
18119 return 1 << (vm_map_is_64bit(map
) ? 16 : 8);
18124 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map
)
18126 #if defined(__arm64__)
18127 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
18128 * of independent entropy on 16KB page systems.
18130 return 1 << (22 - VM_MAP_PAGE_SHIFT(map
));
18132 return 1 << (vm_map_is_64bit(map
) ? 16 : 8);
18141 return map
->max_offset
> ((vm_map_offset_t
)VM_MAX_ADDRESS
);
18146 vm_map_has_hard_pagezero(
18148 vm_map_offset_t pagezero_size
)
18152 * We should lock the VM map (for read) here but we can get away
18153 * with it for now because there can't really be any race condition:
18154 * the VM map's min_offset is changed only when the VM map is created
18155 * and when the zero page is established (when the binary gets loaded),
18156 * and this routine gets called only when the task terminates and the
18157 * VM map is being torn down, and when a new map is created via
18158 * load_machfile()/execve().
18160 return map
->min_offset
>= pagezero_size
;
18164 * Raise a VM map's maximun offset.
18167 vm_map_raise_max_offset(
18169 vm_map_offset_t new_max_offset
)
18174 ret
= KERN_INVALID_ADDRESS
;
18176 if (new_max_offset
>= map
->max_offset
) {
18177 if (!vm_map_is_64bit(map
)) {
18178 if (new_max_offset
<= (vm_map_offset_t
)VM_MAX_ADDRESS
) {
18179 map
->max_offset
= new_max_offset
;
18180 ret
= KERN_SUCCESS
;
18183 if (new_max_offset
<= (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
) {
18184 map
->max_offset
= new_max_offset
;
18185 ret
= KERN_SUCCESS
;
18190 vm_map_unlock(map
);
18196 * Raise a VM map's minimum offset.
18197 * To strictly enforce "page zero" reservation.
18200 vm_map_raise_min_offset(
18202 vm_map_offset_t new_min_offset
)
18204 vm_map_entry_t first_entry
;
18206 new_min_offset
= vm_map_round_page(new_min_offset
,
18207 VM_MAP_PAGE_MASK(map
));
18211 if (new_min_offset
< map
->min_offset
) {
18213 * Can't move min_offset backwards, as that would expose
18214 * a part of the address space that was previously, and for
18215 * possibly good reasons, inaccessible.
18217 vm_map_unlock(map
);
18218 return KERN_INVALID_ADDRESS
;
18220 if (new_min_offset
>= map
->max_offset
) {
18221 /* can't go beyond the end of the address space */
18222 vm_map_unlock(map
);
18223 return KERN_INVALID_ADDRESS
;
18226 first_entry
= vm_map_first_entry(map
);
18227 if (first_entry
!= vm_map_to_entry(map
) &&
18228 first_entry
->vme_start
< new_min_offset
) {
18230 * Some memory was already allocated below the new
18231 * minimun offset. It's too late to change it now...
18233 vm_map_unlock(map
);
18234 return KERN_NO_SPACE
;
18237 map
->min_offset
= new_min_offset
;
18239 assert(map
->holes_list
);
18240 map
->holes_list
->start
= new_min_offset
;
18241 assert(new_min_offset
< map
->holes_list
->end
);
18243 vm_map_unlock(map
);
18245 return KERN_SUCCESS
;
18249 * Set the limit on the maximum amount of user wired memory allowed for this map.
18250 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
18251 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
18252 * don't have to reach over to the BSD data structures.
18256 vm_map_set_user_wire_limit(vm_map_t map
,
18259 map
->user_wire_limit
= limit
;
18264 vm_map_switch_protect(vm_map_t map
,
18268 map
->switch_protect
= val
;
18269 vm_map_unlock(map
);
18273 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
18274 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
18275 * bump both counters.
18278 vm_map_iokit_mapped_region(vm_map_t map
, vm_size_t bytes
)
18280 pmap_t pmap
= vm_map_pmap(map
);
18282 ledger_credit(pmap
->ledger
, task_ledgers
.iokit_mapped
, bytes
);
18283 ledger_credit(pmap
->ledger
, task_ledgers
.phys_footprint
, bytes
);
18287 vm_map_iokit_unmapped_region(vm_map_t map
, vm_size_t bytes
)
18289 pmap_t pmap
= vm_map_pmap(map
);
18291 ledger_debit(pmap
->ledger
, task_ledgers
.iokit_mapped
, bytes
);
18292 ledger_debit(pmap
->ledger
, task_ledgers
.phys_footprint
, bytes
);
18295 /* Add (generate) code signature for memory range */
18296 #if CONFIG_DYNAMIC_CODE_SIGNING
18298 vm_map_sign(vm_map_t map
,
18299 vm_map_offset_t start
,
18300 vm_map_offset_t end
)
18302 vm_map_entry_t entry
;
18304 vm_object_t object
;
18307 * Vet all the input parameters and current type and state of the
18308 * underlaying object. Return with an error if anything is amiss.
18310 if (map
== VM_MAP_NULL
) {
18311 return KERN_INVALID_ARGUMENT
;
18314 vm_map_lock_read(map
);
18316 if (!vm_map_lookup_entry(map
, start
, &entry
) || entry
->is_sub_map
) {
18318 * Must pass a valid non-submap address.
18320 vm_map_unlock_read(map
);
18321 return KERN_INVALID_ADDRESS
;
18324 if ((entry
->vme_start
> start
) || (entry
->vme_end
< end
)) {
18326 * Map entry doesn't cover the requested range. Not handling
18327 * this situation currently.
18329 vm_map_unlock_read(map
);
18330 return KERN_INVALID_ARGUMENT
;
18333 object
= VME_OBJECT(entry
);
18334 if (object
== VM_OBJECT_NULL
) {
18336 * Object must already be present or we can't sign.
18338 vm_map_unlock_read(map
);
18339 return KERN_INVALID_ARGUMENT
;
18342 vm_object_lock(object
);
18343 vm_map_unlock_read(map
);
18345 while (start
< end
) {
18348 m
= vm_page_lookup(object
,
18349 start
- entry
->vme_start
+ VME_OFFSET(entry
));
18350 if (m
== VM_PAGE_NULL
) {
18351 /* shoud we try to fault a page here? we can probably
18352 * demand it exists and is locked for this request */
18353 vm_object_unlock(object
);
18354 return KERN_FAILURE
;
18356 /* deal with special page status */
18358 (m
->vmp_unusual
&& (m
->vmp_error
|| m
->vmp_restart
|| m
->vmp_private
|| m
->vmp_absent
))) {
18359 vm_object_unlock(object
);
18360 return KERN_FAILURE
;
18363 /* Page is OK... now "validate" it */
18364 /* This is the place where we'll call out to create a code
18365 * directory, later */
18366 m
->vmp_cs_validated
= TRUE
;
18368 /* The page is now "clean" for codesigning purposes. That means
18369 * we don't consider it as modified (wpmapped) anymore. But
18370 * we'll disconnect the page so we note any future modification
18372 m
->vmp_wpmapped
= FALSE
;
18373 refmod
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
));
18375 /* Pull the dirty status from the pmap, since we cleared the
18377 if ((refmod
& VM_MEM_MODIFIED
) && !m
->vmp_dirty
) {
18378 SET_PAGE_DIRTY(m
, FALSE
);
18381 /* On to the next page */
18382 start
+= PAGE_SIZE
;
18384 vm_object_unlock(object
);
18386 return KERN_SUCCESS
;
18391 vm_map_partial_reap(vm_map_t map
, unsigned int *reclaimed_resident
, unsigned int *reclaimed_compressed
)
18393 vm_map_entry_t entry
= VM_MAP_ENTRY_NULL
;
18394 vm_map_entry_t next_entry
;
18395 kern_return_t kr
= KERN_SUCCESS
;
18401 * We use a "zap_map" to avoid having to unlock
18402 * the "map" in vm_map_delete().
18404 zap_map
= vm_map_create(PMAP_NULL
,
18407 map
->hdr
.entries_pageable
);
18409 if (zap_map
== VM_MAP_NULL
) {
18410 return KERN_RESOURCE_SHORTAGE
;
18413 vm_map_set_page_shift(zap_map
,
18414 VM_MAP_PAGE_SHIFT(map
));
18415 vm_map_disable_hole_optimization(zap_map
);
18417 for (entry
= vm_map_first_entry(map
);
18418 entry
!= vm_map_to_entry(map
);
18419 entry
= next_entry
) {
18420 next_entry
= entry
->vme_next
;
18422 if (VME_OBJECT(entry
) &&
18423 !entry
->is_sub_map
&&
18424 (VME_OBJECT(entry
)->internal
== TRUE
) &&
18425 (VME_OBJECT(entry
)->ref_count
== 1)) {
18426 *reclaimed_resident
+= VME_OBJECT(entry
)->resident_page_count
;
18427 *reclaimed_compressed
+= vm_compressor_pager_get_count(VME_OBJECT(entry
)->pager
);
18429 (void)vm_map_delete(map
,
18432 VM_MAP_REMOVE_SAVE_ENTRIES
,
18437 vm_map_unlock(map
);
18440 * Get rid of the "zap_maps" and all the map entries that
18441 * they may still contain.
18443 if (zap_map
!= VM_MAP_NULL
) {
18444 vm_map_destroy(zap_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
18445 zap_map
= VM_MAP_NULL
;
18452 #if DEVELOPMENT || DEBUG
18455 vm_map_disconnect_page_mappings(
18457 boolean_t do_unnest
)
18459 vm_map_entry_t entry
;
18460 int page_count
= 0;
18462 if (do_unnest
== TRUE
) {
18463 #ifndef NO_NESTED_PMAP
18466 for (entry
= vm_map_first_entry(map
);
18467 entry
!= vm_map_to_entry(map
);
18468 entry
= entry
->vme_next
) {
18469 if (entry
->is_sub_map
&& entry
->use_pmap
) {
18471 * Make sure the range between the start of this entry and
18472 * the end of this entry is no longer nested, so that
18473 * we will only remove mappings from the pmap in use by this
18476 vm_map_clip_unnest(map
, entry
, entry
->vme_start
, entry
->vme_end
);
18479 vm_map_unlock(map
);
18482 vm_map_lock_read(map
);
18484 page_count
= map
->pmap
->stats
.resident_count
;
18486 for (entry
= vm_map_first_entry(map
);
18487 entry
!= vm_map_to_entry(map
);
18488 entry
= entry
->vme_next
) {
18489 if (!entry
->is_sub_map
&& ((VME_OBJECT(entry
) == 0) ||
18490 (VME_OBJECT(entry
)->phys_contiguous
))) {
18493 if (entry
->is_sub_map
) {
18494 assert(!entry
->use_pmap
);
18497 pmap_remove_options(map
->pmap
, entry
->vme_start
, entry
->vme_end
, 0);
18499 vm_map_unlock_read(map
);
18510 int c_freezer_swapout_page_count
;
18511 int c_freezer_compression_count
= 0;
18512 AbsoluteTime c_freezer_last_yield_ts
= 0;
18514 extern unsigned int memorystatus_freeze_private_shared_pages_ratio
;
18515 extern unsigned int memorystatus_freeze_shared_mb_per_process_max
;
18520 unsigned int *purgeable_count
,
18521 unsigned int *wired_count
,
18522 unsigned int *clean_count
,
18523 unsigned int *dirty_count
,
18524 unsigned int dirty_budget
,
18525 unsigned int *shared_count
,
18526 int *freezer_error_code
,
18527 boolean_t eval_only
)
18529 vm_map_entry_t entry2
= VM_MAP_ENTRY_NULL
;
18530 kern_return_t kr
= KERN_SUCCESS
;
18531 boolean_t evaluation_phase
= TRUE
;
18532 vm_object_t cur_shared_object
= NULL
;
18533 int cur_shared_obj_ref_cnt
= 0;
18534 unsigned int dirty_private_count
= 0, dirty_shared_count
= 0, obj_pages_snapshot
= 0;
18536 *purgeable_count
= *wired_count
= *clean_count
= *dirty_count
= *shared_count
= 0;
18539 * We need the exclusive lock here so that we can
18540 * block any page faults or lookups while we are
18541 * in the middle of freezing this vm map.
18543 vm_map_t map
= task
->map
;
18547 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT
);
18549 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18550 if (vm_compressor_low_on_space()) {
18551 *freezer_error_code
= FREEZER_ERROR_NO_COMPRESSOR_SPACE
;
18554 if (vm_swap_low_on_space()) {
18555 *freezer_error_code
= FREEZER_ERROR_NO_SWAP_SPACE
;
18558 kr
= KERN_NO_SPACE
;
18562 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
== FALSE
) {
18564 * In-memory compressor backing the freezer. No disk.
18565 * So no need to do the evaluation phase.
18567 evaluation_phase
= FALSE
;
18569 if (eval_only
== TRUE
) {
18571 * We don't support 'eval_only' mode
18572 * in this non-swap config.
18574 *freezer_error_code
= FREEZER_ERROR_GENERIC
;
18575 kr
= KERN_INVALID_ARGUMENT
;
18579 c_freezer_compression_count
= 0;
18580 clock_get_uptime(&c_freezer_last_yield_ts
);
18584 for (entry2
= vm_map_first_entry(map
);
18585 entry2
!= vm_map_to_entry(map
);
18586 entry2
= entry2
->vme_next
) {
18587 vm_object_t src_object
= VME_OBJECT(entry2
);
18590 !entry2
->is_sub_map
&&
18591 !src_object
->phys_contiguous
) {
18592 /* If eligible, scan the entry, moving eligible pages over to our parent object */
18594 if (src_object
->internal
== TRUE
) {
18595 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
) {
18597 * We skip purgeable objects during evaluation phase only.
18598 * If we decide to freeze this process, we'll explicitly
18599 * purge these objects before we go around again with
18600 * 'evaluation_phase' set to FALSE.
18603 if ((src_object
->purgable
== VM_PURGABLE_EMPTY
) || (src_object
->purgable
== VM_PURGABLE_VOLATILE
)) {
18605 * We want to purge objects that may not belong to this task but are mapped
18606 * in this task alone. Since we already purged this task's purgeable memory
18607 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
18608 * on this task's purgeable objects. Hence the check for only volatile objects.
18610 if (evaluation_phase
== FALSE
&&
18611 (src_object
->purgable
== VM_PURGABLE_VOLATILE
) &&
18612 (src_object
->ref_count
== 1)) {
18613 vm_object_lock(src_object
);
18614 vm_object_purge(src_object
, 0);
18615 vm_object_unlock(src_object
);
18621 * Pages belonging to this object could be swapped to disk.
18622 * Make sure it's not a shared object because we could end
18623 * up just bringing it back in again.
18625 * We try to optimize somewhat by checking for objects that are mapped
18626 * more than once within our own map. But we don't do full searches,
18627 * we just look at the entries following our current entry.
18630 if (src_object
->ref_count
> 1) {
18631 if (src_object
!= cur_shared_object
) {
18632 obj_pages_snapshot
= (src_object
->resident_page_count
- src_object
->wired_page_count
) + vm_compressor_pager_get_count(src_object
->pager
);
18633 dirty_shared_count
+= obj_pages_snapshot
;
18635 cur_shared_object
= src_object
;
18636 cur_shared_obj_ref_cnt
= 1;
18639 cur_shared_obj_ref_cnt
++;
18640 if (src_object
->ref_count
== cur_shared_obj_ref_cnt
) {
18642 * Fall through to below and treat this object as private.
18643 * So deduct its pages from our shared total and add it to the
18647 dirty_shared_count
-= obj_pages_snapshot
;
18648 dirty_private_count
+= obj_pages_snapshot
;
18656 if (src_object
->ref_count
== 1) {
18657 dirty_private_count
+= (src_object
->resident_page_count
- src_object
->wired_page_count
) + vm_compressor_pager_get_count(src_object
->pager
);
18660 if (evaluation_phase
== TRUE
) {
18665 uint32_t paged_out_count
= vm_object_compressed_freezer_pageout(src_object
, dirty_budget
);
18666 *wired_count
+= src_object
->wired_page_count
;
18668 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18669 if (vm_compressor_low_on_space()) {
18670 *freezer_error_code
= FREEZER_ERROR_NO_COMPRESSOR_SPACE
;
18673 if (vm_swap_low_on_space()) {
18674 *freezer_error_code
= FREEZER_ERROR_NO_SWAP_SPACE
;
18677 kr
= KERN_NO_SPACE
;
18680 if (paged_out_count
>= dirty_budget
) {
18683 dirty_budget
-= paged_out_count
;
18688 if (evaluation_phase
) {
18689 unsigned int shared_pages_threshold
= (memorystatus_freeze_shared_mb_per_process_max
* 1024 * 1024ULL) / PAGE_SIZE_64
;
18691 if (dirty_shared_count
> shared_pages_threshold
) {
18692 *freezer_error_code
= FREEZER_ERROR_EXCESS_SHARED_MEMORY
;
18697 if (dirty_shared_count
&&
18698 ((dirty_private_count
/ dirty_shared_count
) < memorystatus_freeze_private_shared_pages_ratio
)) {
18699 *freezer_error_code
= FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO
;
18704 evaluation_phase
= FALSE
;
18705 dirty_shared_count
= dirty_private_count
= 0;
18707 c_freezer_compression_count
= 0;
18708 clock_get_uptime(&c_freezer_last_yield_ts
);
18715 vm_purgeable_purge_task_owned(task
);
18720 *shared_count
= (unsigned int) ((dirty_shared_count
* PAGE_SIZE_64
) / (1024 * 1024ULL));
18724 vm_map_unlock(map
);
18726 if ((eval_only
== FALSE
) && (kr
== KERN_SUCCESS
)) {
18727 vm_object_compressed_freezer_done();
18729 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
) {
18731 * reset the counter tracking the # of swapped compressed pages
18732 * because we are now done with this freeze session and task.
18735 *dirty_count
= c_freezer_swapout_page_count
; //used to track pageouts
18736 c_freezer_swapout_page_count
= 0;
18745 * vm_map_entry_should_cow_for_true_share:
18747 * Determines if the map entry should be clipped and setup for copy-on-write
18748 * to avoid applying "true_share" to a large VM object when only a subset is
18751 * For now, we target only the map entries created for the Objective C
18752 * Garbage Collector, which initially have the following properties:
18753 * - alias == VM_MEMORY_MALLOC
18754 * - wired_count == 0
18756 * and a VM object with:
18758 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
18760 * - vo_size == ANON_CHUNK_SIZE
18762 * Only non-kernel map entries.
18765 vm_map_entry_should_cow_for_true_share(
18766 vm_map_entry_t entry
)
18768 vm_object_t object
;
18770 if (entry
->is_sub_map
) {
18771 /* entry does not point at a VM object */
18775 if (entry
->needs_copy
) {
18776 /* already set for copy_on_write: done! */
18780 if (VME_ALIAS(entry
) != VM_MEMORY_MALLOC
&&
18781 VME_ALIAS(entry
) != VM_MEMORY_MALLOC_SMALL
) {
18782 /* not a malloc heap or Obj-C Garbage Collector heap */
18786 if (entry
->wired_count
) {
18787 /* wired: can't change the map entry... */
18788 vm_counters
.should_cow_but_wired
++;
18792 object
= VME_OBJECT(entry
);
18794 if (object
== VM_OBJECT_NULL
) {
18795 /* no object yet... */
18799 if (!object
->internal
) {
18800 /* not an internal object */
18804 if (object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) {
18805 /* not the default copy strategy */
18809 if (object
->true_share
) {
18810 /* already true_share: too late to avoid it */
18814 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC
&&
18815 object
->vo_size
!= ANON_CHUNK_SIZE
) {
18816 /* ... not an object created for the ObjC Garbage Collector */
18820 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC_SMALL
&&
18821 object
->vo_size
!= 2048 * 4096) {
18822 /* ... not a "MALLOC_SMALL" heap */
18827 * All the criteria match: we have a large object being targeted for "true_share".
18828 * To limit the adverse side-effects linked with "true_share", tell the caller to
18829 * try and avoid setting up the entire object for "true_share" by clipping the
18830 * targeted range and setting it up for copy-on-write.
18836 vm_map_round_page_mask(
18837 vm_map_offset_t offset
,
18838 vm_map_offset_t mask
)
18840 return VM_MAP_ROUND_PAGE(offset
, mask
);
18844 vm_map_trunc_page_mask(
18845 vm_map_offset_t offset
,
18846 vm_map_offset_t mask
)
18848 return VM_MAP_TRUNC_PAGE(offset
, mask
);
18852 vm_map_page_aligned(
18853 vm_map_offset_t offset
,
18854 vm_map_offset_t mask
)
18856 return ((offset
) & mask
) == 0;
18863 return VM_MAP_PAGE_SHIFT(map
);
18870 return VM_MAP_PAGE_SIZE(map
);
18877 return VM_MAP_PAGE_MASK(map
);
18881 vm_map_set_page_shift(
18885 if (map
->hdr
.nentries
!= 0) {
18886 /* too late to change page size */
18887 return KERN_FAILURE
;
18890 map
->hdr
.page_shift
= pageshift
;
18892 return KERN_SUCCESS
;
18896 vm_map_query_volatile(
18898 mach_vm_size_t
*volatile_virtual_size_p
,
18899 mach_vm_size_t
*volatile_resident_size_p
,
18900 mach_vm_size_t
*volatile_compressed_size_p
,
18901 mach_vm_size_t
*volatile_pmap_size_p
,
18902 mach_vm_size_t
*volatile_compressed_pmap_size_p
)
18904 mach_vm_size_t volatile_virtual_size
;
18905 mach_vm_size_t volatile_resident_count
;
18906 mach_vm_size_t volatile_compressed_count
;
18907 mach_vm_size_t volatile_pmap_count
;
18908 mach_vm_size_t volatile_compressed_pmap_count
;
18909 mach_vm_size_t resident_count
;
18910 vm_map_entry_t entry
;
18911 vm_object_t object
;
18913 /* map should be locked by caller */
18915 volatile_virtual_size
= 0;
18916 volatile_resident_count
= 0;
18917 volatile_compressed_count
= 0;
18918 volatile_pmap_count
= 0;
18919 volatile_compressed_pmap_count
= 0;
18921 for (entry
= vm_map_first_entry(map
);
18922 entry
!= vm_map_to_entry(map
);
18923 entry
= entry
->vme_next
) {
18924 mach_vm_size_t pmap_resident_bytes
, pmap_compressed_bytes
;
18926 if (entry
->is_sub_map
) {
18929 if (!(entry
->protection
& VM_PROT_WRITE
)) {
18932 object
= VME_OBJECT(entry
);
18933 if (object
== VM_OBJECT_NULL
) {
18936 if (object
->purgable
!= VM_PURGABLE_VOLATILE
&&
18937 object
->purgable
!= VM_PURGABLE_EMPTY
) {
18940 if (VME_OFFSET(entry
)) {
18942 * If the map entry has been split and the object now
18943 * appears several times in the VM map, we don't want
18944 * to count the object's resident_page_count more than
18945 * once. We count it only for the first one, starting
18946 * at offset 0 and ignore the other VM map entries.
18950 resident_count
= object
->resident_page_count
;
18951 if ((VME_OFFSET(entry
) / PAGE_SIZE
) >= resident_count
) {
18952 resident_count
= 0;
18954 resident_count
-= (VME_OFFSET(entry
) / PAGE_SIZE
);
18957 volatile_virtual_size
+= entry
->vme_end
- entry
->vme_start
;
18958 volatile_resident_count
+= resident_count
;
18959 if (object
->pager
) {
18960 volatile_compressed_count
+=
18961 vm_compressor_pager_get_count(object
->pager
);
18963 pmap_compressed_bytes
= 0;
18964 pmap_resident_bytes
=
18965 pmap_query_resident(map
->pmap
,
18968 &pmap_compressed_bytes
);
18969 volatile_pmap_count
+= (pmap_resident_bytes
/ PAGE_SIZE
);
18970 volatile_compressed_pmap_count
+= (pmap_compressed_bytes
18974 /* map is still locked on return */
18976 *volatile_virtual_size_p
= volatile_virtual_size
;
18977 *volatile_resident_size_p
= volatile_resident_count
* PAGE_SIZE
;
18978 *volatile_compressed_size_p
= volatile_compressed_count
* PAGE_SIZE
;
18979 *volatile_pmap_size_p
= volatile_pmap_count
* PAGE_SIZE
;
18980 *volatile_compressed_pmap_size_p
= volatile_compressed_pmap_count
* PAGE_SIZE
;
18982 return KERN_SUCCESS
;
18986 vm_map_sizes(vm_map_t map
,
18987 vm_map_size_t
* psize
,
18988 vm_map_size_t
* pfree
,
18989 vm_map_size_t
* plargest_free
)
18991 vm_map_entry_t entry
;
18992 vm_map_offset_t prev
;
18993 vm_map_size_t free
, total_free
, largest_free
;
18997 *psize
= *pfree
= *plargest_free
= 0;
19000 total_free
= largest_free
= 0;
19002 vm_map_lock_read(map
);
19004 *psize
= map
->max_offset
- map
->min_offset
;
19007 prev
= map
->min_offset
;
19008 for (entry
= vm_map_first_entry(map
);; entry
= entry
->vme_next
) {
19009 end
= (entry
== vm_map_to_entry(map
));
19012 free
= entry
->vme_end
- prev
;
19014 free
= entry
->vme_start
- prev
;
19017 total_free
+= free
;
19018 if (free
> largest_free
) {
19019 largest_free
= free
;
19025 prev
= entry
->vme_end
;
19027 vm_map_unlock_read(map
);
19029 *pfree
= total_free
;
19031 if (plargest_free
) {
19032 *plargest_free
= largest_free
;
19036 #if VM_SCAN_FOR_SHADOW_CHAIN
19037 int vm_map_shadow_max(vm_map_t map
);
19042 int shadows
, shadows_max
;
19043 vm_map_entry_t entry
;
19044 vm_object_t object
, next_object
;
19052 vm_map_lock_read(map
);
19054 for (entry
= vm_map_first_entry(map
);
19055 entry
!= vm_map_to_entry(map
);
19056 entry
= entry
->vme_next
) {
19057 if (entry
->is_sub_map
) {
19060 object
= VME_OBJECT(entry
);
19061 if (object
== NULL
) {
19064 vm_object_lock_shared(object
);
19066 object
->shadow
!= NULL
;
19067 shadows
++, object
= next_object
) {
19068 next_object
= object
->shadow
;
19069 vm_object_lock_shared(next_object
);
19070 vm_object_unlock(object
);
19072 vm_object_unlock(object
);
19073 if (shadows
> shadows_max
) {
19074 shadows_max
= shadows
;
19078 vm_map_unlock_read(map
);
19080 return shadows_max
;
19082 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
19085 vm_commit_pagezero_status(vm_map_t lmap
)
19087 pmap_advise_pagezero_range(lmap
->pmap
, lmap
->min_offset
);
19090 #if !CONFIG_EMBEDDED
19092 vm_map_set_high_start(
19094 vm_map_offset_t high_start
)
19096 map
->vmmap_high_start
= high_start
;
19102 vm_map_entry_cs_associate(
19104 vm_map_entry_t entry
,
19105 vm_map_kernel_flags_t vmk_flags
)
19107 vm_object_t cs_object
, cs_shadow
;
19108 vm_object_offset_t cs_offset
;
19110 struct vnode
*cs_vnode
;
19111 kern_return_t cs_ret
;
19113 if (map
->pmap
== NULL
||
19114 entry
->is_sub_map
|| /* XXX FBDP: recurse on sub-range? */
19115 VME_OBJECT(entry
) == VM_OBJECT_NULL
||
19116 !(entry
->protection
& VM_PROT_EXECUTE
)) {
19117 return KERN_SUCCESS
;
19120 vm_map_lock_assert_exclusive(map
);
19122 if (entry
->used_for_jit
) {
19123 cs_ret
= pmap_cs_associate(map
->pmap
,
19124 PMAP_CS_ASSOCIATE_JIT
,
19126 entry
->vme_end
- entry
->vme_start
);
19130 if (vmk_flags
.vmkf_remap_prot_copy
) {
19131 cs_ret
= pmap_cs_associate(map
->pmap
,
19132 PMAP_CS_ASSOCIATE_COW
,
19134 entry
->vme_end
- entry
->vme_start
);
19138 vm_object_lock_shared(VME_OBJECT(entry
));
19139 cs_offset
= VME_OFFSET(entry
);
19140 for (cs_object
= VME_OBJECT(entry
);
19141 (cs_object
!= VM_OBJECT_NULL
&&
19142 !cs_object
->code_signed
);
19143 cs_object
= cs_shadow
) {
19144 cs_shadow
= cs_object
->shadow
;
19145 if (cs_shadow
!= VM_OBJECT_NULL
) {
19146 cs_offset
+= cs_object
->vo_shadow_offset
;
19147 vm_object_lock_shared(cs_shadow
);
19149 vm_object_unlock(cs_object
);
19151 if (cs_object
== VM_OBJECT_NULL
) {
19152 return KERN_SUCCESS
;
19155 cs_offset
+= cs_object
->paging_offset
;
19156 cs_vnode
= vnode_pager_lookup_vnode(cs_object
->pager
);
19157 cs_ret
= vnode_pager_get_cs_blobs(cs_vnode
,
19159 assert(cs_ret
== KERN_SUCCESS
);
19160 cs_ret
= cs_associate_blob_with_mapping(map
->pmap
,
19166 vm_object_unlock(cs_object
);
19167 cs_object
= VM_OBJECT_NULL
;
19170 if (cs_ret
== KERN_SUCCESS
) {
19171 DTRACE_VM2(vm_map_entry_cs_associate_success
,
19172 vm_map_offset_t
, entry
->vme_start
,
19173 vm_map_offset_t
, entry
->vme_end
);
19174 if (vm_map_executable_immutable
) {
19176 * Prevent this executable
19177 * mapping from being unmapped
19180 entry
->permanent
= TRUE
;
19183 * pmap says it will validate the
19184 * code-signing validity of pages
19185 * faulted in via this mapping, so
19186 * this map entry should be marked so
19187 * that vm_fault() bypasses code-signing
19188 * validation for faults coming through
19191 entry
->pmap_cs_associated
= TRUE
;
19192 } else if (cs_ret
== KERN_NOT_SUPPORTED
) {
19194 * pmap won't check the code-signing
19195 * validity of pages faulted in via
19196 * this mapping, so VM should keep
19199 DTRACE_VM3(vm_map_entry_cs_associate_off
,
19200 vm_map_offset_t
, entry
->vme_start
,
19201 vm_map_offset_t
, entry
->vme_end
,
19205 * A real error: do not allow
19206 * execution in this mapping.
19208 DTRACE_VM3(vm_map_entry_cs_associate_failure
,
19209 vm_map_offset_t
, entry
->vme_start
,
19210 vm_map_offset_t
, entry
->vme_end
,
19212 entry
->protection
&= ~VM_PROT_EXECUTE
;
19213 entry
->max_protection
&= ~VM_PROT_EXECUTE
;
19218 #endif /* PMAP_CS */
19221 * FORKED CORPSE FOOTPRINT
19223 * A forked corpse gets a copy of the original VM map but its pmap is mostly
19224 * empty since it never ran and never got to fault in any pages.
19225 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
19226 * a forked corpse would therefore return very little information.
19228 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
19229 * to vm_map_fork() to collect footprint information from the original VM map
19230 * and its pmap, and store it in the forked corpse's VM map. That information
19231 * is stored in place of the VM map's "hole list" since we'll never need to
19232 * lookup for holes in the corpse's map.
19234 * The corpse's footprint info looks like this:
19236 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
19238 * +---------------------------------------+
19239 * header-> | cf_size |
19240 * +-------------------+-------------------+
19241 * | cf_last_region | cf_last_zeroes |
19242 * +-------------------+-------------------+
19243 * region1-> | cfr_vaddr |
19244 * +-------------------+-------------------+
19245 * | cfr_num_pages | d0 | d1 | d2 | d3 |
19246 * +---------------------------------------+
19247 * | d4 | d5 | ... |
19248 * +---------------------------------------+
19250 * +-------------------+-------------------+
19251 * | dy | dz | na | na | cfr_vaddr... | <-region2
19252 * +-------------------+-------------------+
19253 * | cfr_vaddr (ctd) | cfr_num_pages |
19254 * +---------------------------------------+
19256 * +---------------------------------------+
19258 * +---------------------------------------+
19259 * last region-> | cfr_vaddr |
19260 * +---------------------------------------+
19261 * + cfr_num_pages | d0 | d1 | d2 | d3 |
19262 * +---------------------------------------+
19264 * +---------------------------------------+
19265 * | dx | dy | dz | na | na | na | na | na |
19266 * +---------------------------------------+
19269 * cf_size: total size of the buffer (rounded to page size)
19270 * cf_last_region: offset in the buffer of the last "region" sub-header
19271 * cf_last_zeroes: number of trailing "zero" dispositions at the end
19273 * cfr_vaddr: virtual address of the start of the covered "region"
19274 * cfr_num_pages: number of pages in the covered "region"
19275 * d*: disposition of the page at that virtual address
19276 * Regions in the buffer are word-aligned.
19278 * We estimate the size of the buffer based on the number of memory regions
19279 * and the virtual size of the address space. While copying each memory region
19280 * during vm_map_fork(), we also collect the footprint info for that region
19281 * and store it in the buffer, packing it as much as possible (coalescing
19282 * contiguous memory regions to avoid having too many region headers and
19283 * avoiding long streaks of "zero" page dispositions by splitting footprint
19284 * "regions", so the number of regions in the footprint buffer might not match
19285 * the number of memory regions in the address space.
19287 * We also have to copy the original task's "nonvolatile" ledgers since that's
19288 * part of the footprint and will need to be reported to any tool asking for
19289 * the footprint information of the forked corpse.
19292 uint64_t vm_map_corpse_footprint_count
= 0;
19293 uint64_t vm_map_corpse_footprint_size_avg
= 0;
19294 uint64_t vm_map_corpse_footprint_size_max
= 0;
19295 uint64_t vm_map_corpse_footprint_full
= 0;
19296 uint64_t vm_map_corpse_footprint_no_buf
= 0;
19299 * vm_map_corpse_footprint_new_region:
19300 * closes the current footprint "region" and creates a new one
19302 * Returns NULL if there's not enough space in the buffer for a new region.
19304 static struct vm_map_corpse_footprint_region
*
19305 vm_map_corpse_footprint_new_region(
19306 struct vm_map_corpse_footprint_header
*footprint_header
)
19308 uintptr_t footprint_edge
;
19309 uint32_t new_region_offset
;
19310 struct vm_map_corpse_footprint_region
*footprint_region
;
19311 struct vm_map_corpse_footprint_region
*new_footprint_region
;
19313 footprint_edge
= ((uintptr_t)footprint_header
+
19314 footprint_header
->cf_size
);
19315 footprint_region
= ((struct vm_map_corpse_footprint_region
*)
19316 ((char *)footprint_header
+
19317 footprint_header
->cf_last_region
));
19318 assert((uintptr_t)footprint_region
+ sizeof(*footprint_region
) <=
19321 /* get rid of trailing zeroes in the last region */
19322 assert(footprint_region
->cfr_num_pages
>=
19323 footprint_header
->cf_last_zeroes
);
19324 footprint_region
->cfr_num_pages
-=
19325 footprint_header
->cf_last_zeroes
;
19326 footprint_header
->cf_last_zeroes
= 0;
19328 /* reuse this region if it's now empty */
19329 if (footprint_region
->cfr_num_pages
== 0) {
19330 return footprint_region
;
19333 /* compute offset of new region */
19334 new_region_offset
= footprint_header
->cf_last_region
;
19335 new_region_offset
+= sizeof(*footprint_region
);
19336 new_region_offset
+= footprint_region
->cfr_num_pages
;
19337 new_region_offset
= roundup(new_region_offset
, sizeof(int));
19339 /* check if we're going over the edge */
19340 if (((uintptr_t)footprint_header
+
19341 new_region_offset
+
19342 sizeof(*footprint_region
)) >=
19344 /* over the edge: no new region */
19348 /* adjust offset of last region in header */
19349 footprint_header
->cf_last_region
= new_region_offset
;
19351 new_footprint_region
= (struct vm_map_corpse_footprint_region
*)
19352 ((char *)footprint_header
+
19353 footprint_header
->cf_last_region
);
19354 new_footprint_region
->cfr_vaddr
= 0;
19355 new_footprint_region
->cfr_num_pages
= 0;
19356 /* caller needs to initialize new region */
19358 return new_footprint_region
;
19362 * vm_map_corpse_footprint_collect:
19363 * collect footprint information for "old_entry" in "old_map" and
19364 * stores it in "new_map"'s vmmap_footprint_info.
19367 vm_map_corpse_footprint_collect(
19369 vm_map_entry_t old_entry
,
19372 vm_map_offset_t va
;
19375 struct vm_map_corpse_footprint_header
*footprint_header
;
19376 struct vm_map_corpse_footprint_region
*footprint_region
;
19377 struct vm_map_corpse_footprint_region
*new_footprint_region
;
19378 unsigned char *next_disp_p
;
19379 uintptr_t footprint_edge
;
19380 uint32_t num_pages_tmp
;
19382 va
= old_entry
->vme_start
;
19384 vm_map_lock_assert_exclusive(old_map
);
19385 vm_map_lock_assert_exclusive(new_map
);
19387 assert(new_map
->has_corpse_footprint
);
19388 assert(!old_map
->has_corpse_footprint
);
19389 if (!new_map
->has_corpse_footprint
||
19390 old_map
->has_corpse_footprint
) {
19392 * This can only transfer footprint info from a
19393 * map with a live pmap to a map with a corpse footprint.
19395 return KERN_NOT_SUPPORTED
;
19398 if (new_map
->vmmap_corpse_footprint
== NULL
) {
19400 vm_size_t buf_size
;
19403 buf_size
= (sizeof(*footprint_header
) +
19404 (old_map
->hdr
.nentries
19406 (sizeof(*footprint_region
) +
19407 +3)) /* potential alignment for each region */
19409 ((old_map
->size
/ PAGE_SIZE
)
19411 sizeof(char))); /* disposition for each page */
19412 // printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
19413 buf_size
= round_page(buf_size
);
19415 /* limit buffer to 1 page to validate overflow detection */
19416 // buf_size = PAGE_SIZE;
19418 /* limit size to a somewhat sane amount */
19419 #if CONFIG_EMBEDDED
19420 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
19421 #else /* CONFIG_EMBEDDED */
19422 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
19423 #endif /* CONFIG_EMBEDDED */
19424 if (buf_size
> VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE
) {
19425 buf_size
= VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE
;
19429 * Allocate the pageable buffer (with a trailing guard page).
19430 * It will be zero-filled on demand.
19432 kr
= kernel_memory_allocate(kernel_map
,
19435 + PAGE_SIZE
), /* trailing guard page */
19437 KMA_PAGEABLE
| KMA_GUARD_LAST
,
19438 VM_KERN_MEMORY_DIAG
);
19439 if (kr
!= KERN_SUCCESS
) {
19440 vm_map_corpse_footprint_no_buf
++;
19444 /* initialize header and 1st region */
19445 footprint_header
= (struct vm_map_corpse_footprint_header
*)buf
;
19446 new_map
->vmmap_corpse_footprint
= footprint_header
;
19448 footprint_header
->cf_size
= buf_size
;
19449 footprint_header
->cf_last_region
=
19450 sizeof(*footprint_header
);
19451 footprint_header
->cf_last_zeroes
= 0;
19453 footprint_region
= (struct vm_map_corpse_footprint_region
*)
19454 ((char *)footprint_header
+
19455 footprint_header
->cf_last_region
);
19456 footprint_region
->cfr_vaddr
= 0;
19457 footprint_region
->cfr_num_pages
= 0;
19459 /* retrieve header and last region */
19460 footprint_header
= (struct vm_map_corpse_footprint_header
*)
19461 new_map
->vmmap_corpse_footprint
;
19462 footprint_region
= (struct vm_map_corpse_footprint_region
*)
19463 ((char *)footprint_header
+
19464 footprint_header
->cf_last_region
);
19466 footprint_edge
= ((uintptr_t)footprint_header
+
19467 footprint_header
->cf_size
);
19469 if ((footprint_region
->cfr_vaddr
+
19470 (((vm_map_offset_t
)footprint_region
->cfr_num_pages
) *
19472 != old_entry
->vme_start
) {
19473 uint64_t num_pages_delta
;
19474 uint32_t region_offset_delta
;
19477 * Not the next contiguous virtual address:
19478 * start a new region or store "zero" dispositions for
19479 * the missing pages?
19481 /* size of gap in actual page dispositions */
19482 num_pages_delta
= (((old_entry
->vme_start
-
19483 footprint_region
->cfr_vaddr
) / PAGE_SIZE
)
19484 - footprint_region
->cfr_num_pages
);
19485 /* size of gap as a new footprint region header */
19486 region_offset_delta
=
19487 (sizeof(*footprint_region
) +
19488 roundup((footprint_region
->cfr_num_pages
-
19489 footprint_header
->cf_last_zeroes
),
19491 (footprint_region
->cfr_num_pages
-
19492 footprint_header
->cf_last_zeroes
));
19493 // printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
19494 if (region_offset_delta
< num_pages_delta
||
19495 os_add3_overflow(footprint_region
->cfr_num_pages
,
19496 (uint32_t) num_pages_delta
,
19500 * Storing data for this gap would take more space
19501 * than inserting a new footprint region header:
19502 * let's start a new region and save space. If it's a
19503 * tie, let's avoid using a new region, since that
19504 * would require more region hops to find the right
19505 * range during lookups.
19507 * If the current region's cfr_num_pages would overflow
19508 * if we added "zero" page dispositions for the gap,
19509 * no choice but to start a new region.
19511 // printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
19512 new_footprint_region
=
19513 vm_map_corpse_footprint_new_region(footprint_header
);
19514 /* check that we're not going over the edge */
19515 if (new_footprint_region
== NULL
) {
19516 goto over_the_edge
;
19518 footprint_region
= new_footprint_region
;
19519 /* initialize new region as empty */
19520 footprint_region
->cfr_vaddr
= old_entry
->vme_start
;
19521 footprint_region
->cfr_num_pages
= 0;
19524 * Store "zero" page dispositions for the missing
19527 // printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
19528 for (; num_pages_delta
> 0; num_pages_delta
--) {
19530 ((unsigned char *) footprint_region
+
19531 sizeof(*footprint_region
) +
19532 footprint_region
->cfr_num_pages
);
19533 /* check that we're not going over the edge */
19534 if ((uintptr_t)next_disp_p
>= footprint_edge
) {
19535 goto over_the_edge
;
19537 /* store "zero" disposition for this gap page */
19538 footprint_region
->cfr_num_pages
++;
19539 *next_disp_p
= (unsigned char) 0;
19540 footprint_header
->cf_last_zeroes
++;
19545 for (va
= old_entry
->vme_start
;
19546 va
< old_entry
->vme_end
;
19548 vm_object_t object
;
19550 object
= VME_OBJECT(old_entry
);
19551 if (!old_entry
->is_sub_map
&&
19552 old_entry
->iokit_acct
&&
19553 object
!= VM_OBJECT_NULL
&&
19554 object
->internal
&&
19555 object
->purgable
== VM_PURGABLE_DENY
) {
19557 * Non-purgeable IOKit memory: phys_footprint
19558 * includes the entire virtual mapping.
19559 * Since the forked corpse's VM map entry will not
19560 * have "iokit_acct", pretend that this page's
19561 * disposition is "present & internal", so that it
19562 * shows up in the forked corpse's footprint.
19564 disp
= (PMAP_QUERY_PAGE_PRESENT
|
19565 PMAP_QUERY_PAGE_INTERNAL
);
19568 pmap_query_page_info(old_map
->pmap
,
19573 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
19575 if (disp
== 0 && footprint_region
->cfr_num_pages
== 0) {
19577 * Ignore "zero" dispositions at start of
19578 * region: just move start of region.
19580 footprint_region
->cfr_vaddr
+= PAGE_SIZE
;
19584 /* would region's cfr_num_pages overflow? */
19585 if (os_add_overflow(footprint_region
->cfr_num_pages
, 1,
19587 /* overflow: create a new region */
19588 new_footprint_region
=
19589 vm_map_corpse_footprint_new_region(
19591 if (new_footprint_region
== NULL
) {
19592 goto over_the_edge
;
19594 footprint_region
= new_footprint_region
;
19595 footprint_region
->cfr_vaddr
= va
;
19596 footprint_region
->cfr_num_pages
= 0;
19599 next_disp_p
= ((unsigned char *)footprint_region
+
19600 sizeof(*footprint_region
) +
19601 footprint_region
->cfr_num_pages
);
19602 /* check that we're not going over the edge */
19603 if ((uintptr_t)next_disp_p
>= footprint_edge
) {
19604 goto over_the_edge
;
19606 /* store this dispostion */
19607 *next_disp_p
= (unsigned char) disp
;
19608 footprint_region
->cfr_num_pages
++;
19611 /* non-zero disp: break the current zero streak */
19612 footprint_header
->cf_last_zeroes
= 0;
19617 /* zero disp: add to the current streak of zeroes */
19618 footprint_header
->cf_last_zeroes
++;
19619 if ((footprint_header
->cf_last_zeroes
+
19620 roundup((footprint_region
->cfr_num_pages
-
19621 footprint_header
->cf_last_zeroes
) &
19624 (sizeof(*footprint_header
))) {
19626 * There are not enough trailing "zero" dispositions
19627 * (+ the extra padding we would need for the previous
19628 * region); creating a new region would not save space
19629 * at this point, so let's keep this "zero" disposition
19630 * in this region and reconsider later.
19635 * Create a new region to avoid having too many consecutive
19636 * "zero" dispositions.
19638 new_footprint_region
=
19639 vm_map_corpse_footprint_new_region(footprint_header
);
19640 if (new_footprint_region
== NULL
) {
19641 goto over_the_edge
;
19643 footprint_region
= new_footprint_region
;
19644 /* initialize the new region as empty ... */
19645 footprint_region
->cfr_num_pages
= 0;
19646 /* ... and skip this "zero" disp */
19647 footprint_region
->cfr_vaddr
= va
+ PAGE_SIZE
;
19650 return KERN_SUCCESS
;
19653 // printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
19654 vm_map_corpse_footprint_full
++;
19655 return KERN_RESOURCE_SHORTAGE
;
19659 * vm_map_corpse_footprint_collect_done:
19660 * completes the footprint collection by getting rid of any remaining
19661 * trailing "zero" dispositions and trimming the unused part of the
19665 vm_map_corpse_footprint_collect_done(
19668 struct vm_map_corpse_footprint_header
*footprint_header
;
19669 struct vm_map_corpse_footprint_region
*footprint_region
;
19670 vm_size_t buf_size
, actual_size
;
19673 assert(new_map
->has_corpse_footprint
);
19674 if (!new_map
->has_corpse_footprint
||
19675 new_map
->vmmap_corpse_footprint
== NULL
) {
19679 footprint_header
= (struct vm_map_corpse_footprint_header
*)
19680 new_map
->vmmap_corpse_footprint
;
19681 buf_size
= footprint_header
->cf_size
;
19683 footprint_region
= (struct vm_map_corpse_footprint_region
*)
19684 ((char *)footprint_header
+
19685 footprint_header
->cf_last_region
);
19687 /* get rid of trailing zeroes in last region */
19688 assert(footprint_region
->cfr_num_pages
>= footprint_header
->cf_last_zeroes
);
19689 footprint_region
->cfr_num_pages
-= footprint_header
->cf_last_zeroes
;
19690 footprint_header
->cf_last_zeroes
= 0;
19692 actual_size
= (vm_size_t
)(footprint_header
->cf_last_region
+
19693 sizeof(*footprint_region
) +
19694 footprint_region
->cfr_num_pages
);
19696 // printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
19697 vm_map_corpse_footprint_size_avg
=
19698 (((vm_map_corpse_footprint_size_avg
*
19699 vm_map_corpse_footprint_count
) +
19701 (vm_map_corpse_footprint_count
+ 1));
19702 vm_map_corpse_footprint_count
++;
19703 if (actual_size
> vm_map_corpse_footprint_size_max
) {
19704 vm_map_corpse_footprint_size_max
= actual_size
;
19707 actual_size
= round_page(actual_size
);
19708 if (buf_size
> actual_size
) {
19709 kr
= vm_deallocate(kernel_map
,
19710 ((vm_address_t
)footprint_header
+
19712 PAGE_SIZE
), /* trailing guard page */
19713 (buf_size
- actual_size
));
19714 assertf(kr
== KERN_SUCCESS
,
19715 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19717 (uint64_t) buf_size
,
19718 (uint64_t) actual_size
,
19720 kr
= vm_protect(kernel_map
,
19721 ((vm_address_t
)footprint_header
+
19724 FALSE
, /* set_maximum */
19726 assertf(kr
== KERN_SUCCESS
,
19727 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19729 (uint64_t) buf_size
,
19730 (uint64_t) actual_size
,
19734 footprint_header
->cf_size
= actual_size
;
19738 * vm_map_corpse_footprint_query_page_info:
19739 * retrieves the disposition of the page at virtual address "vaddr"
19740 * in the forked corpse's VM map
19742 * This is the equivalent of pmap_query_page_info() for a forked corpse.
19745 vm_map_corpse_footprint_query_page_info(
19747 vm_map_offset_t va
,
19750 struct vm_map_corpse_footprint_header
*footprint_header
;
19751 struct vm_map_corpse_footprint_region
*footprint_region
;
19752 uint32_t footprint_region_offset
;
19753 vm_map_offset_t region_start
, region_end
;
19757 if (!map
->has_corpse_footprint
) {
19759 kr
= KERN_INVALID_ARGUMENT
;
19763 footprint_header
= map
->vmmap_corpse_footprint
;
19764 if (footprint_header
== NULL
) {
19766 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19767 kr
= KERN_INVALID_ARGUMENT
;
19771 /* start looking at the hint ("cf_hint_region") */
19772 footprint_region_offset
= footprint_header
->cf_hint_region
;
19775 if (footprint_region_offset
< sizeof(*footprint_header
)) {
19776 /* hint too low: start from 1st region */
19777 footprint_region_offset
= sizeof(*footprint_header
);
19779 if (footprint_region_offset
>= footprint_header
->cf_last_region
) {
19780 /* hint too high: re-start from 1st region */
19781 footprint_region_offset
= sizeof(*footprint_header
);
19783 footprint_region
= (struct vm_map_corpse_footprint_region
*)
19784 ((char *)footprint_header
+ footprint_region_offset
);
19785 region_start
= footprint_region
->cfr_vaddr
;
19786 region_end
= (region_start
+
19787 ((vm_map_offset_t
)(footprint_region
->cfr_num_pages
) *
19789 if (va
< region_start
&&
19790 footprint_region_offset
!= sizeof(*footprint_header
)) {
19791 /* our range starts before the hint region */
19793 /* reset the hint (in a racy way...) */
19794 footprint_header
->cf_hint_region
= sizeof(*footprint_header
);
19795 /* lookup "va" again from 1st region */
19796 footprint_region_offset
= sizeof(*footprint_header
);
19800 while (va
>= region_end
) {
19801 if (footprint_region_offset
>= footprint_header
->cf_last_region
) {
19804 /* skip the region's header */
19805 footprint_region_offset
+= sizeof(*footprint_region
);
19806 /* skip the region's page dispositions */
19807 footprint_region_offset
+= footprint_region
->cfr_num_pages
;
19808 /* align to next word boundary */
19809 footprint_region_offset
=
19810 roundup(footprint_region_offset
,
19812 footprint_region
= (struct vm_map_corpse_footprint_region
*)
19813 ((char *)footprint_header
+ footprint_region_offset
);
19814 region_start
= footprint_region
->cfr_vaddr
;
19815 region_end
= (region_start
+
19816 ((vm_map_offset_t
)(footprint_region
->cfr_num_pages
) *
19819 if (va
< region_start
|| va
>= region_end
) {
19820 /* page not found */
19822 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19827 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
19828 footprint_header
->cf_hint_region
= footprint_region_offset
;
19830 /* get page disposition for "va" in this region */
19831 disp_idx
= (int) ((va
- footprint_region
->cfr_vaddr
) / PAGE_SIZE
);
19832 *disp
= (int) (footprint_region
->cfr_disposition
[disp_idx
]);
19836 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19837 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
19838 DTRACE_VM4(footprint_query_page_info
,
19840 vm_map_offset_t
, va
,
19842 kern_return_t
, kr
);
19849 vm_map_corpse_footprint_destroy(
19852 if (map
->has_corpse_footprint
&&
19853 map
->vmmap_corpse_footprint
!= 0) {
19854 struct vm_map_corpse_footprint_header
*footprint_header
;
19855 vm_size_t buf_size
;
19858 footprint_header
= map
->vmmap_corpse_footprint
;
19859 buf_size
= footprint_header
->cf_size
;
19860 kr
= vm_deallocate(kernel_map
,
19861 (vm_offset_t
) map
->vmmap_corpse_footprint
,
19862 ((vm_size_t
) buf_size
19863 + PAGE_SIZE
)); /* trailing guard page */
19864 assertf(kr
== KERN_SUCCESS
, "kr=0x%x\n", kr
);
19865 map
->vmmap_corpse_footprint
= 0;
19866 map
->has_corpse_footprint
= FALSE
;
19871 * vm_map_copy_footprint_ledgers:
19872 * copies any ledger that's relevant to the memory footprint of "old_task"
19873 * into the forked corpse's task ("new_task")
19876 vm_map_copy_footprint_ledgers(
19880 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.phys_footprint
);
19881 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.purgeable_nonvolatile
);
19882 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.purgeable_nonvolatile_compressed
);
19883 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.internal
);
19884 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.internal_compressed
);
19885 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.iokit_mapped
);
19886 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.alternate_accounting
);
19887 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.alternate_accounting_compressed
);
19888 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.page_table
);
19889 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.tagged_footprint
);
19890 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.tagged_footprint_compressed
);
19891 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.network_nonvolatile
);
19892 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.network_nonvolatile_compressed
);
19893 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.media_footprint
);
19894 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.media_footprint_compressed
);
19895 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.graphics_footprint
);
19896 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.graphics_footprint_compressed
);
19897 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.neural_footprint
);
19898 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.neural_footprint_compressed
);
19899 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.wired_mem
);
19903 * vm_map_copy_ledger:
19904 * copy a single ledger from "old_task" to "new_task"
19907 vm_map_copy_ledger(
19912 ledger_amount_t old_balance
, new_balance
, delta
;
19914 assert(new_task
->map
->has_corpse_footprint
);
19915 if (!new_task
->map
->has_corpse_footprint
) {
19919 /* turn off sanity checks for the ledger we're about to mess with */
19920 ledger_disable_panic_on_negative(new_task
->ledger
,
19923 /* adjust "new_task" to match "old_task" */
19924 ledger_get_balance(old_task
->ledger
,
19927 ledger_get_balance(new_task
->ledger
,
19930 if (new_balance
== old_balance
) {
19931 /* new == old: done */
19932 } else if (new_balance
> old_balance
) {
19933 /* new > old ==> new -= new - old */
19934 delta
= new_balance
- old_balance
;
19935 ledger_debit(new_task
->ledger
,
19939 /* new < old ==> new += old - new */
19940 delta
= old_balance
- new_balance
;
19941 ledger_credit(new_task
->ledger
,
19949 extern int pmap_ledgers_panic
;
19950 extern int pmap_ledgers_panic_leeway
;
19952 #define LEDGER_DRIFT(__LEDGER) \
19953 int __LEDGER##_over; \
19954 ledger_amount_t __LEDGER##_over_total; \
19955 ledger_amount_t __LEDGER##_over_max; \
19956 int __LEDGER##_under; \
19957 ledger_amount_t __LEDGER##_under_total; \
19958 ledger_amount_t __LEDGER##_under_max
19961 uint64_t num_pmaps_checked
;
19963 LEDGER_DRIFT(phys_footprint
);
19964 LEDGER_DRIFT(internal
);
19965 LEDGER_DRIFT(internal_compressed
);
19966 LEDGER_DRIFT(iokit_mapped
);
19967 LEDGER_DRIFT(alternate_accounting
);
19968 LEDGER_DRIFT(alternate_accounting_compressed
);
19969 LEDGER_DRIFT(page_table
);
19970 LEDGER_DRIFT(purgeable_volatile
);
19971 LEDGER_DRIFT(purgeable_nonvolatile
);
19972 LEDGER_DRIFT(purgeable_volatile_compressed
);
19973 LEDGER_DRIFT(purgeable_nonvolatile_compressed
);
19974 LEDGER_DRIFT(tagged_nofootprint
);
19975 LEDGER_DRIFT(tagged_footprint
);
19976 LEDGER_DRIFT(tagged_nofootprint_compressed
);
19977 LEDGER_DRIFT(tagged_footprint_compressed
);
19978 LEDGER_DRIFT(network_volatile
);
19979 LEDGER_DRIFT(network_nonvolatile
);
19980 LEDGER_DRIFT(network_volatile_compressed
);
19981 LEDGER_DRIFT(network_nonvolatile_compressed
);
19982 LEDGER_DRIFT(media_nofootprint
);
19983 LEDGER_DRIFT(media_footprint
);
19984 LEDGER_DRIFT(media_nofootprint_compressed
);
19985 LEDGER_DRIFT(media_footprint_compressed
);
19986 LEDGER_DRIFT(graphics_nofootprint
);
19987 LEDGER_DRIFT(graphics_footprint
);
19988 LEDGER_DRIFT(graphics_nofootprint_compressed
);
19989 LEDGER_DRIFT(graphics_footprint_compressed
);
19990 LEDGER_DRIFT(neural_nofootprint
);
19991 LEDGER_DRIFT(neural_footprint
);
19992 LEDGER_DRIFT(neural_nofootprint_compressed
);
19993 LEDGER_DRIFT(neural_footprint_compressed
);
19994 } pmap_ledgers_drift
;
19997 vm_map_pmap_check_ledgers(
20003 ledger_amount_t bal
;
20004 boolean_t do_panic
;
20008 pmap_ledgers_drift
.num_pmaps_checked
++;
20010 #define LEDGER_CHECK_BALANCE(__LEDGER) \
20012 int panic_on_negative = TRUE; \
20013 ledger_get_balance(ledger, \
20014 task_ledgers.__LEDGER, \
20016 ledger_get_panic_on_negative(ledger, \
20017 task_ledgers.__LEDGER, \
20018 &panic_on_negative); \
20020 if (panic_on_negative || \
20021 (pmap_ledgers_panic && \
20022 pmap_ledgers_panic_leeway > 0 && \
20023 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
20024 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
20027 printf("LEDGER BALANCE proc %d (%s) " \
20028 "\"%s\" = %lld\n", \
20029 pid, procname, #__LEDGER, bal); \
20031 pmap_ledgers_drift.__LEDGER##_over++; \
20032 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
20033 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
20034 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
20036 } else if (bal < 0) { \
20037 pmap_ledgers_drift.__LEDGER##_under++; \
20038 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
20039 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
20040 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
20046 LEDGER_CHECK_BALANCE(phys_footprint
);
20047 LEDGER_CHECK_BALANCE(internal
);
20048 LEDGER_CHECK_BALANCE(internal_compressed
);
20049 LEDGER_CHECK_BALANCE(iokit_mapped
);
20050 LEDGER_CHECK_BALANCE(alternate_accounting
);
20051 LEDGER_CHECK_BALANCE(alternate_accounting_compressed
);
20052 LEDGER_CHECK_BALANCE(page_table
);
20053 LEDGER_CHECK_BALANCE(purgeable_volatile
);
20054 LEDGER_CHECK_BALANCE(purgeable_nonvolatile
);
20055 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed
);
20056 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed
);
20057 LEDGER_CHECK_BALANCE(tagged_nofootprint
);
20058 LEDGER_CHECK_BALANCE(tagged_footprint
);
20059 LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed
);
20060 LEDGER_CHECK_BALANCE(tagged_footprint_compressed
);
20061 LEDGER_CHECK_BALANCE(network_volatile
);
20062 LEDGER_CHECK_BALANCE(network_nonvolatile
);
20063 LEDGER_CHECK_BALANCE(network_volatile_compressed
);
20064 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed
);
20065 LEDGER_CHECK_BALANCE(media_nofootprint
);
20066 LEDGER_CHECK_BALANCE(media_footprint
);
20067 LEDGER_CHECK_BALANCE(media_nofootprint_compressed
);
20068 LEDGER_CHECK_BALANCE(media_footprint_compressed
);
20069 LEDGER_CHECK_BALANCE(graphics_nofootprint
);
20070 LEDGER_CHECK_BALANCE(graphics_footprint
);
20071 LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed
);
20072 LEDGER_CHECK_BALANCE(graphics_footprint_compressed
);
20073 LEDGER_CHECK_BALANCE(neural_nofootprint
);
20074 LEDGER_CHECK_BALANCE(neural_footprint
);
20075 LEDGER_CHECK_BALANCE(neural_nofootprint_compressed
);
20076 LEDGER_CHECK_BALANCE(neural_footprint_compressed
);
20079 if (pmap_ledgers_panic
) {
20080 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
20081 pmap
, pid
, procname
);
20083 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
20084 pmap
, pid
, procname
);
20088 #endif /* MACH_ASSERT */