2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
63 * Virtual memory mapping module.
66 #include <task_swapper.h>
67 #include <mach_assert.h>
69 #include <vm/vm_options.h>
71 #include <libkern/OSAtomic.h>
73 #include <mach/kern_return.h>
74 #include <mach/port.h>
75 #include <mach/vm_attributes.h>
76 #include <mach/vm_param.h>
77 #include <mach/vm_behavior.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/memory_object.h>
80 #include <mach/mach_vm.h>
81 #include <machine/cpu_capabilities.h>
84 #include <kern/assert.h>
85 #include <kern/backtrace.h>
86 #include <kern/counters.h>
87 #include <kern/exc_guard.h>
88 #include <kern/kalloc.h>
89 #include <kern/zalloc.h>
92 #include <vm/vm_compressor.h>
93 #include <vm/vm_compressor_pager.h>
94 #include <vm/vm_init.h>
95 #include <vm/vm_fault.h>
96 #include <vm/vm_map.h>
97 #include <vm/vm_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
101 #include <vm/vm_kern.h>
102 #include <ipc/ipc_port.h>
103 #include <kern/sched_prim.h>
104 #include <kern/misc_protos.h>
106 #include <mach/vm_map_server.h>
107 #include <mach/mach_host_server.h>
108 #include <vm/vm_protos.h>
109 #include <vm/vm_purgeable_internal.h>
111 #include <vm/vm_protos.h>
112 #include <vm/vm_shared_region.h>
113 #include <vm/vm_map_store.h>
115 #include <san/kasan.h>
117 #include <sys/codesign.h>
118 #include <libkern/section_keywords.h>
119 #if DEVELOPMENT || DEBUG
120 extern int proc_selfcsflags(void);
122 extern int panic_on_unsigned_execute
;
123 #endif /* CONFIG_EMBEDDED */
124 #endif /* DEVELOPMENT || DEBUG */
127 extern const int fourk_binary_compatibility_unsafe
;
128 extern const int fourk_binary_compatibility_allow_wx
;
129 #endif /* __arm64__ */
130 extern int proc_selfpid(void);
131 extern char *proc_name_address(void *p
);
133 #if VM_MAP_DEBUG_APPLE_PROTECT
134 int vm_map_debug_apple_protect
= 0;
135 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
136 #if VM_MAP_DEBUG_FOURK
137 int vm_map_debug_fourk
= 0;
138 #endif /* VM_MAP_DEBUG_FOURK */
140 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable
= 1;
141 int vm_map_executable_immutable_verbose
= 0;
143 os_refgrp_decl(static, map_refgrp
, "vm_map", NULL
);
145 extern u_int32_t
random(void); /* from <libkern/libkern.h> */
146 /* Internal prototypes
149 static void vm_map_simplify_range(
151 vm_map_offset_t start
,
152 vm_map_offset_t end
); /* forward */
154 static boolean_t
vm_map_range_check(
156 vm_map_offset_t start
,
158 vm_map_entry_t
*entry
);
160 static vm_map_entry_t
_vm_map_entry_create(
161 struct vm_map_header
*map_header
, boolean_t map_locked
);
163 static void _vm_map_entry_dispose(
164 struct vm_map_header
*map_header
,
165 vm_map_entry_t entry
);
167 static void vm_map_pmap_enter(
169 vm_map_offset_t addr
,
170 vm_map_offset_t end_addr
,
172 vm_object_offset_t offset
,
173 vm_prot_t protection
);
175 static void _vm_map_clip_end(
176 struct vm_map_header
*map_header
,
177 vm_map_entry_t entry
,
178 vm_map_offset_t end
);
180 static void _vm_map_clip_start(
181 struct vm_map_header
*map_header
,
182 vm_map_entry_t entry
,
183 vm_map_offset_t start
);
185 static void vm_map_entry_delete(
187 vm_map_entry_t entry
);
189 static kern_return_t
vm_map_delete(
191 vm_map_offset_t start
,
196 static void vm_map_copy_insert(
198 vm_map_entry_t after_where
,
201 static kern_return_t
vm_map_copy_overwrite_unaligned(
203 vm_map_entry_t entry
,
205 vm_map_address_t start
,
206 boolean_t discard_on_success
);
208 static kern_return_t
vm_map_copy_overwrite_aligned(
210 vm_map_entry_t tmp_entry
,
212 vm_map_offset_t start
,
215 static kern_return_t
vm_map_copyin_kernel_buffer(
217 vm_map_address_t src_addr
,
219 boolean_t src_destroy
,
220 vm_map_copy_t
*copy_result
); /* OUT */
222 static kern_return_t
vm_map_copyout_kernel_buffer(
224 vm_map_address_t
*addr
, /* IN/OUT */
226 vm_map_size_t copy_size
,
228 boolean_t consume_on_success
);
230 static void vm_map_fork_share(
232 vm_map_entry_t old_entry
,
235 static boolean_t
vm_map_fork_copy(
237 vm_map_entry_t
*old_entry_p
,
239 int vm_map_copyin_flags
);
241 static kern_return_t
vm_map_wire_nested(
243 vm_map_offset_t start
,
245 vm_prot_t caller_prot
,
249 vm_map_offset_t pmap_addr
,
250 ppnum_t
*physpage_p
);
252 static kern_return_t
vm_map_unwire_nested(
254 vm_map_offset_t start
,
258 vm_map_offset_t pmap_addr
);
260 static kern_return_t
vm_map_overwrite_submap_recurse(
262 vm_map_offset_t dst_addr
,
263 vm_map_size_t dst_size
);
265 static kern_return_t
vm_map_copy_overwrite_nested(
267 vm_map_offset_t dst_addr
,
269 boolean_t interruptible
,
271 boolean_t discard_on_success
);
273 static kern_return_t
vm_map_remap_extract(
275 vm_map_offset_t addr
,
278 struct vm_map_header
*map_header
,
279 vm_prot_t
*cur_protection
,
280 vm_prot_t
*max_protection
,
281 vm_inherit_t inheritance
,
284 vm_map_kernel_flags_t vmk_flags
);
286 static kern_return_t
vm_map_remap_range_allocate(
288 vm_map_address_t
*address
,
290 vm_map_offset_t mask
,
292 vm_map_kernel_flags_t vmk_flags
,
294 vm_map_entry_t
*map_entry
);
296 static void vm_map_region_look_for_page(
300 vm_object_offset_t offset
,
303 vm_region_extended_info_t extended
,
304 mach_msg_type_number_t count
);
306 static int vm_map_region_count_obj_refs(
307 vm_map_entry_t entry
,
311 static kern_return_t
vm_map_willneed(
313 vm_map_offset_t start
,
314 vm_map_offset_t end
);
316 static kern_return_t
vm_map_reuse_pages(
318 vm_map_offset_t start
,
319 vm_map_offset_t end
);
321 static kern_return_t
vm_map_reusable_pages(
323 vm_map_offset_t start
,
324 vm_map_offset_t end
);
326 static kern_return_t
vm_map_can_reuse(
328 vm_map_offset_t start
,
329 vm_map_offset_t end
);
332 static kern_return_t
vm_map_pageout(
334 vm_map_offset_t start
,
335 vm_map_offset_t end
);
336 #endif /* MACH_ASSERT */
338 static void vm_map_corpse_footprint_destroy(
341 pid_t
find_largest_process_vm_map_entries(void);
344 * Macros to copy a vm_map_entry. We must be careful to correctly
345 * manage the wired page count. vm_map_entry_copy() creates a new
346 * map entry to the same memory - the wired count in the new entry
347 * must be set to zero. vm_map_entry_copy_full() creates a new
348 * entry that is identical to the old entry. This preserves the
349 * wire count; it's used for map splitting and zone changing in
356 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
357 * But for security reasons on embedded platforms, we don't want the
358 * new mapping to be "used for jit", so we always reset the flag here.
359 * Same for "pmap_cs_associated".
361 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD) \
363 (NEW)->used_for_jit = FALSE; \
364 (NEW)->pmap_cs_associated = FALSE; \
367 #else /* CONFIG_EMBEDDED */
370 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
371 * On macOS, the new mapping can be "used for jit".
373 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD) \
375 assert((NEW)->used_for_jit == (OLD)->used_for_jit); \
376 assert((NEW)->pmap_cs_associated == FALSE); \
379 #endif /* CONFIG_EMBEDDED */
381 #define vm_map_entry_copy(NEW, OLD) \
383 boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
385 (NEW)->is_shared = FALSE; \
386 (NEW)->needs_wakeup = FALSE; \
387 (NEW)->in_transition = FALSE; \
388 (NEW)->wired_count = 0; \
389 (NEW)->user_wired_count = 0; \
390 (NEW)->permanent = FALSE; \
391 VM_MAP_ENTRY_COPY_CODE_SIGNING((NEW),(OLD)); \
392 (NEW)->from_reserved_zone = _vmec_reserved; \
393 if ((NEW)->iokit_acct) { \
394 assertf(!(NEW)->use_pmap, "old %p new %p\n", (OLD), (NEW)); \
395 (NEW)->iokit_acct = FALSE; \
396 (NEW)->use_pmap = TRUE; \
398 (NEW)->vme_resilient_codesign = FALSE; \
399 (NEW)->vme_resilient_media = FALSE; \
400 (NEW)->vme_atomic = FALSE; \
401 (NEW)->vme_no_copy_on_read = FALSE; \
404 #define vm_map_entry_copy_full(NEW, OLD) \
406 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
408 (NEW)->from_reserved_zone = _vmecf_reserved; \
412 * Normal lock_read_to_write() returns FALSE/0 on failure.
413 * These functions evaluate to zero on success and non-zero value on failure.
415 __attribute__((always_inline
))
417 vm_map_lock_read_to_write(vm_map_t map
)
419 if (lck_rw_lock_shared_to_exclusive(&(map
)->lock
)) {
420 DTRACE_VM(vm_map_lock_upgrade
);
426 __attribute__((always_inline
))
428 vm_map_try_lock(vm_map_t map
)
430 if (lck_rw_try_lock_exclusive(&(map
)->lock
)) {
431 DTRACE_VM(vm_map_lock_w
);
437 __attribute__((always_inline
))
439 vm_map_try_lock_read(vm_map_t map
)
441 if (lck_rw_try_lock_shared(&(map
)->lock
)) {
442 DTRACE_VM(vm_map_lock_r
);
449 * Decide if we want to allow processes to execute from their data or stack areas.
450 * override_nx() returns true if we do. Data/stack execution can be enabled independently
451 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
452 * or allow_stack_exec to enable data execution for that type of data area for that particular
453 * ABI (or both by or'ing the flags together). These are initialized in the architecture
454 * specific pmap files since the default behavior varies according to architecture. The
455 * main reason it varies is because of the need to provide binary compatibility with old
456 * applications that were written before these restrictions came into being. In the old
457 * days, an app could execute anything it could read, but this has slowly been tightened
458 * up over time. The default behavior is:
460 * 32-bit PPC apps may execute from both stack and data areas
461 * 32-bit Intel apps may exeucte from data areas but not stack
462 * 64-bit PPC/Intel apps may not execute from either data or stack
464 * An application on any architecture may override these defaults by explicitly
465 * adding PROT_EXEC permission to the page in question with the mprotect(2)
466 * system call. This code here just determines what happens when an app tries to
467 * execute from a page that lacks execute permission.
469 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
470 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
471 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
472 * execution from data areas for a particular binary even if the arch normally permits it. As
473 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
474 * to support some complicated use cases, notably browsers with out-of-process plugins that
475 * are not all NX-safe.
478 extern int allow_data_exec
, allow_stack_exec
;
481 override_nx(vm_map_t map
, uint32_t user_tag
) /* map unused on arm */
485 if (map
->pmap
== kernel_pmap
) {
490 * Determine if the app is running in 32 or 64 bit mode.
493 if (vm_map_is_64bit(map
)) {
494 current_abi
= VM_ABI_64
;
496 current_abi
= VM_ABI_32
;
500 * Determine if we should allow the execution based on whether it's a
501 * stack or data area and the current architecture.
504 if (user_tag
== VM_MEMORY_STACK
) {
505 return allow_stack_exec
& current_abi
;
508 return (allow_data_exec
& current_abi
) && (map
->map_disallow_data_exec
== FALSE
);
513 * Virtual memory maps provide for the mapping, protection,
514 * and sharing of virtual memory objects. In addition,
515 * this module provides for an efficient virtual copy of
516 * memory from one map to another.
518 * Synchronization is required prior to most operations.
520 * Maps consist of an ordered doubly-linked list of simple
521 * entries; a single hint is used to speed up lookups.
523 * Sharing maps have been deleted from this version of Mach.
524 * All shared objects are now mapped directly into the respective
525 * maps. This requires a change in the copy on write strategy;
526 * the asymmetric (delayed) strategy is used for shared temporary
527 * objects instead of the symmetric (shadow) strategy. All maps
528 * are now "top level" maps (either task map, kernel map or submap
529 * of the kernel map).
531 * Since portions of maps are specified by start/end addreses,
532 * which may not align with existing map entries, all
533 * routines merely "clip" entries to these start/end values.
534 * [That is, an entry is split into two, bordering at a
535 * start or end value.] Note that these clippings may not
536 * always be necessary (as the two resulting entries are then
537 * not changed); however, the clipping is done for convenience.
538 * No attempt is currently made to "glue back together" two
541 * The symmetric (shadow) copy strategy implements virtual copy
542 * by copying VM object references from one map to
543 * another, and then marking both regions as copy-on-write.
544 * It is important to note that only one writeable reference
545 * to a VM object region exists in any map when this strategy
546 * is used -- this means that shadow object creation can be
547 * delayed until a write operation occurs. The symmetric (delayed)
548 * strategy allows multiple maps to have writeable references to
549 * the same region of a vm object, and hence cannot delay creating
550 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
551 * Copying of permanent objects is completely different; see
552 * vm_object_copy_strategically() in vm_object.c.
555 static zone_t vm_map_zone
; /* zone for vm_map structures */
556 zone_t vm_map_entry_zone
; /* zone for vm_map_entry structures */
557 static zone_t vm_map_entry_reserved_zone
; /* zone with reserve for non-blocking allocations */
558 static zone_t vm_map_copy_zone
; /* zone for vm_map_copy structures */
559 zone_t vm_map_holes_zone
; /* zone for vm map holes (vm_map_links) structures */
563 * Placeholder object for submap operations. This object is dropped
564 * into the range by a call to vm_map_find, and removed when
565 * vm_map_submap creates the submap.
568 vm_object_t vm_submap_object
;
570 static void *map_data
;
571 static vm_size_t map_data_size
;
572 static void *kentry_data
;
573 static vm_size_t kentry_data_size
;
574 static void *map_holes_data
;
575 static vm_size_t map_holes_data_size
;
578 #define NO_COALESCE_LIMIT 0
580 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
583 /* Skip acquiring locks if we're in the midst of a kernel core dump */
584 unsigned int not_in_kdp
= 1;
586 unsigned int vm_map_set_cache_attr_count
= 0;
589 vm_map_set_cache_attr(
593 vm_map_entry_t map_entry
;
595 kern_return_t kr
= KERN_SUCCESS
;
597 vm_map_lock_read(map
);
599 if (!vm_map_lookup_entry(map
, va
, &map_entry
) ||
600 map_entry
->is_sub_map
) {
602 * that memory is not properly mapped
604 kr
= KERN_INVALID_ARGUMENT
;
607 object
= VME_OBJECT(map_entry
);
609 if (object
== VM_OBJECT_NULL
) {
611 * there should be a VM object here at this point
613 kr
= KERN_INVALID_ARGUMENT
;
616 vm_object_lock(object
);
617 object
->set_cache_attr
= TRUE
;
618 vm_object_unlock(object
);
620 vm_map_set_cache_attr_count
++;
622 vm_map_unlock_read(map
);
628 #if CONFIG_CODE_DECRYPTION
630 * vm_map_apple_protected:
631 * This remaps the requested part of the object with an object backed by
632 * the decrypting pager.
633 * crypt_info contains entry points and session data for the crypt module.
634 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
635 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
638 vm_map_apple_protected(
640 vm_map_offset_t start
,
642 vm_object_offset_t crypto_backing_offset
,
643 struct pager_crypt_info
*crypt_info
)
645 boolean_t map_locked
;
647 vm_map_entry_t map_entry
;
648 struct vm_map_entry tmp_entry
;
649 memory_object_t unprotected_mem_obj
;
650 vm_object_t protected_object
;
651 vm_map_offset_t map_addr
;
652 vm_map_offset_t start_aligned
, end_aligned
;
653 vm_object_offset_t crypto_start
, crypto_end
;
655 vm_map_kernel_flags_t vmk_flags
;
658 vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
661 unprotected_mem_obj
= MEMORY_OBJECT_NULL
;
663 start_aligned
= vm_map_trunc_page(start
, PAGE_MASK_64
);
664 end_aligned
= vm_map_round_page(end
, PAGE_MASK_64
);
665 start_aligned
= vm_map_trunc_page(start_aligned
, VM_MAP_PAGE_MASK(map
));
666 end_aligned
= vm_map_round_page(end_aligned
, VM_MAP_PAGE_MASK(map
));
670 * "start" and "end" might be 4K-aligned but not 16K-aligned,
671 * so we might have to loop and establish up to 3 mappings:
673 * + the first 16K-page, which might overlap with the previous
674 * 4K-aligned mapping,
676 * + the last 16K-page, which might overlap with the next
677 * 4K-aligned mapping.
678 * Each of these mapping might be backed by a vnode pager (if
679 * properly page-aligned) or a "fourk_pager", itself backed by a
680 * vnode pager (if 4K-aligned but not page-aligned).
682 #endif /* __arm64__ */
684 map_addr
= start_aligned
;
685 for (map_addr
= start_aligned
;
687 map_addr
= tmp_entry
.vme_end
) {
691 /* lookup the protected VM object */
692 if (!vm_map_lookup_entry(map
,
695 map_entry
->is_sub_map
||
696 VME_OBJECT(map_entry
) == VM_OBJECT_NULL
||
697 !(map_entry
->protection
& VM_PROT_EXECUTE
)) {
698 /* that memory is not properly mapped */
699 kr
= KERN_INVALID_ARGUMENT
;
703 /* get the protected object to be decrypted */
704 protected_object
= VME_OBJECT(map_entry
);
705 if (protected_object
== VM_OBJECT_NULL
) {
706 /* there should be a VM object here at this point */
707 kr
= KERN_INVALID_ARGUMENT
;
710 /* ensure protected object stays alive while map is unlocked */
711 vm_object_reference(protected_object
);
713 /* limit the map entry to the area we want to cover */
714 vm_map_clip_start(map
, map_entry
, start_aligned
);
715 vm_map_clip_end(map
, map_entry
, end_aligned
);
717 tmp_entry
= *map_entry
;
718 map_entry
= VM_MAP_ENTRY_NULL
; /* not valid after unlocking map */
723 * This map entry might be only partially encrypted
724 * (if not fully "page-aligned").
727 crypto_end
= tmp_entry
.vme_end
- tmp_entry
.vme_start
;
728 if (tmp_entry
.vme_start
< start
) {
729 if (tmp_entry
.vme_start
!= start_aligned
) {
730 kr
= KERN_INVALID_ADDRESS
;
732 crypto_start
+= (start
- tmp_entry
.vme_start
);
734 if (tmp_entry
.vme_end
> end
) {
735 if (tmp_entry
.vme_end
!= end_aligned
) {
736 kr
= KERN_INVALID_ADDRESS
;
738 crypto_end
-= (tmp_entry
.vme_end
- end
);
742 * This "extra backing offset" is needed to get the decryption
743 * routine to use the right key. It adjusts for the possibly
744 * relative offset of an interposed "4K" pager...
746 if (crypto_backing_offset
== (vm_object_offset_t
) -1) {
747 crypto_backing_offset
= VME_OFFSET(&tmp_entry
);
751 * Lookup (and create if necessary) the protected memory object
752 * matching that VM object.
753 * If successful, this also grabs a reference on the memory object,
754 * to guarantee that it doesn't go away before we get a chance to map
757 unprotected_mem_obj
= apple_protect_pager_setup(
759 VME_OFFSET(&tmp_entry
),
760 crypto_backing_offset
,
765 /* release extra ref on protected object */
766 vm_object_deallocate(protected_object
);
768 if (unprotected_mem_obj
== NULL
) {
773 vm_flags
= VM_FLAGS_FIXED
| VM_FLAGS_OVERWRITE
;
774 /* can overwrite an immutable mapping */
775 vmk_flags
.vmkf_overwrite_immutable
= TRUE
;
777 if (tmp_entry
.used_for_jit
&&
778 (VM_MAP_PAGE_SHIFT(map
) != FOURK_PAGE_SHIFT
||
779 PAGE_SHIFT
!= FOURK_PAGE_SHIFT
) &&
780 fourk_binary_compatibility_unsafe
&&
781 fourk_binary_compatibility_allow_wx
) {
782 printf("** FOURK_COMPAT [%d]: "
783 "allowing write+execute at 0x%llx\n",
784 proc_selfpid(), tmp_entry
.vme_start
);
785 vmk_flags
.vmkf_map_jit
= TRUE
;
787 #endif /* __arm64__ */
789 /* map this memory object in place of the current one */
790 map_addr
= tmp_entry
.vme_start
;
791 kr
= vm_map_enter_mem_object(map
,
794 tmp_entry
.vme_start
),
795 (mach_vm_offset_t
) 0,
799 (ipc_port_t
)(uintptr_t) unprotected_mem_obj
,
802 tmp_entry
.protection
,
803 tmp_entry
.max_protection
,
804 tmp_entry
.inheritance
);
805 assertf(kr
== KERN_SUCCESS
,
807 assertf(map_addr
== tmp_entry
.vme_start
,
808 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
810 (uint64_t) tmp_entry
.vme_start
,
813 #if VM_MAP_DEBUG_APPLE_PROTECT
814 if (vm_map_debug_apple_protect
) {
815 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
816 " backing:[object:%p,offset:0x%llx,"
817 "crypto_backing_offset:0x%llx,"
818 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
821 (uint64_t) (map_addr
+ (tmp_entry
.vme_end
-
822 tmp_entry
.vme_start
)),
825 VME_OFFSET(&tmp_entry
),
826 crypto_backing_offset
,
830 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
833 * Release the reference obtained by
834 * apple_protect_pager_setup().
835 * The mapping (if it succeeded) is now holding a reference on
838 memory_object_deallocate(unprotected_mem_obj
);
839 unprotected_mem_obj
= MEMORY_OBJECT_NULL
;
841 /* continue with next map entry */
842 crypto_backing_offset
+= (tmp_entry
.vme_end
-
843 tmp_entry
.vme_start
);
844 crypto_backing_offset
-= crypto_start
;
854 #endif /* CONFIG_CODE_DECRYPTION */
857 lck_grp_t vm_map_lck_grp
;
858 lck_grp_attr_t vm_map_lck_grp_attr
;
859 lck_attr_t vm_map_lck_attr
;
860 lck_attr_t vm_map_lck_rw_attr
;
863 int malloc_no_cow
= 1;
864 #define VM_PROTECT_WX_FAIL 0
865 #else /* CONFIG_EMBEDDED */
866 int malloc_no_cow
= 0;
867 #define VM_PROTECT_WX_FAIL 1
868 #endif /* CONFIG_EMBEDDED */
869 uint64_t vm_memory_malloc_no_cow_mask
= 0ULL;
871 int vm_check_map_sanity
= 0;
877 * Initialize the vm_map module. Must be called before
878 * any other vm_map routines.
880 * Map and entry structures are allocated from zones -- we must
881 * initialize those zones.
883 * There are three zones of interest:
885 * vm_map_zone: used to allocate maps.
886 * vm_map_entry_zone: used to allocate map entries.
887 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
889 * The kernel allocates map entries from a special zone that is initially
890 * "crammed" with memory. It would be difficult (perhaps impossible) for
891 * the kernel to allocate more memory to a entry zone when it became
892 * empty since the very act of allocating memory implies the creation
899 vm_size_t entry_zone_alloc_size
;
900 const char *mez_name
= "VM map entries";
902 vm_map_zone
= zinit((vm_map_size_t
) sizeof(struct _vm_map
), 40 * 1024,
904 zone_change(vm_map_zone
, Z_NOENCRYPT
, TRUE
);
905 #if defined(__LP64__)
906 entry_zone_alloc_size
= PAGE_SIZE
* 5;
908 entry_zone_alloc_size
= PAGE_SIZE
* 6;
910 vm_map_entry_zone
= zinit((vm_map_size_t
) sizeof(struct vm_map_entry
),
911 1024 * 1024, entry_zone_alloc_size
,
913 zone_change(vm_map_entry_zone
, Z_NOENCRYPT
, TRUE
);
914 zone_change(vm_map_entry_zone
, Z_NOCALLOUT
, TRUE
);
915 zone_change(vm_map_entry_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
917 vm_map_entry_reserved_zone
= zinit((vm_map_size_t
) sizeof(struct vm_map_entry
),
918 kentry_data_size
* 64, kentry_data_size
,
919 "Reserved VM map entries");
920 zone_change(vm_map_entry_reserved_zone
, Z_NOENCRYPT
, TRUE
);
921 /* Don't quarantine because we always need elements available */
922 zone_change(vm_map_entry_reserved_zone
, Z_KASAN_QUARANTINE
, FALSE
);
924 vm_map_copy_zone
= zinit((vm_map_size_t
) sizeof(struct vm_map_copy
),
925 16 * 1024, PAGE_SIZE
, "VM map copies");
926 zone_change(vm_map_copy_zone
, Z_NOENCRYPT
, TRUE
);
928 vm_map_holes_zone
= zinit((vm_map_size_t
) sizeof(struct vm_map_links
),
929 16 * 1024, PAGE_SIZE
, "VM map holes");
930 zone_change(vm_map_holes_zone
, Z_NOENCRYPT
, TRUE
);
933 * Cram the map and kentry zones with initial data.
934 * Set reserved_zone non-collectible to aid zone_gc().
936 zone_change(vm_map_zone
, Z_COLLECT
, FALSE
);
937 zone_change(vm_map_zone
, Z_FOREIGN
, TRUE
);
938 zone_change(vm_map_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
940 zone_change(vm_map_entry_reserved_zone
, Z_COLLECT
, FALSE
);
941 zone_change(vm_map_entry_reserved_zone
, Z_EXPAND
, FALSE
);
942 zone_change(vm_map_entry_reserved_zone
, Z_FOREIGN
, TRUE
);
943 zone_change(vm_map_entry_reserved_zone
, Z_NOCALLOUT
, TRUE
);
944 zone_change(vm_map_entry_reserved_zone
, Z_CALLERACCT
, FALSE
); /* don't charge caller */
945 zone_change(vm_map_copy_zone
, Z_CALLERACCT
, FALSE
); /* don't charge caller */
946 zone_change(vm_map_entry_reserved_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
948 zone_change(vm_map_holes_zone
, Z_COLLECT
, TRUE
);
949 zone_change(vm_map_holes_zone
, Z_EXPAND
, TRUE
);
950 zone_change(vm_map_holes_zone
, Z_FOREIGN
, TRUE
);
951 zone_change(vm_map_holes_zone
, Z_NOCALLOUT
, TRUE
);
952 zone_change(vm_map_holes_zone
, Z_CALLERACCT
, TRUE
);
953 zone_change(vm_map_holes_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
956 * Add the stolen memory to zones, adjust zone size and stolen counts.
957 * zcram only up to the maximum number of pages for each zone chunk.
959 zcram(vm_map_zone
, (vm_offset_t
)map_data
, map_data_size
);
961 const vm_size_t stride
= ZONE_CHUNK_MAXPAGES
* PAGE_SIZE
;
962 for (vm_offset_t off
= 0; off
< kentry_data_size
; off
+= stride
) {
963 zcram(vm_map_entry_reserved_zone
,
964 (vm_offset_t
)kentry_data
+ off
,
965 MIN(kentry_data_size
- off
, stride
));
967 for (vm_offset_t off
= 0; off
< map_holes_data_size
; off
+= stride
) {
968 zcram(vm_map_holes_zone
,
969 (vm_offset_t
)map_holes_data
+ off
,
970 MIN(map_holes_data_size
- off
, stride
));
974 * Since these are covered by zones, remove them from stolen page accounting.
976 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size
) + atop_64(kentry_data_size
) + atop_64(map_holes_data_size
));
978 lck_grp_attr_setdefault(&vm_map_lck_grp_attr
);
979 lck_grp_init(&vm_map_lck_grp
, "vm_map", &vm_map_lck_grp_attr
);
980 lck_attr_setdefault(&vm_map_lck_attr
);
982 lck_attr_setdefault(&vm_map_lck_rw_attr
);
983 lck_attr_cleardebug(&vm_map_lck_rw_attr
);
985 #if VM_MAP_DEBUG_APPLE_PROTECT
986 PE_parse_boot_argn("vm_map_debug_apple_protect",
987 &vm_map_debug_apple_protect
,
988 sizeof(vm_map_debug_apple_protect
));
989 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
990 #if VM_MAP_DEBUG_APPLE_FOURK
991 PE_parse_boot_argn("vm_map_debug_fourk",
993 sizeof(vm_map_debug_fourk
));
994 #endif /* VM_MAP_DEBUG_FOURK */
995 PE_parse_boot_argn("vm_map_executable_immutable",
996 &vm_map_executable_immutable
,
997 sizeof(vm_map_executable_immutable
));
998 PE_parse_boot_argn("vm_map_executable_immutable_verbose",
999 &vm_map_executable_immutable_verbose
,
1000 sizeof(vm_map_executable_immutable_verbose
));
1002 PE_parse_boot_argn("malloc_no_cow",
1004 sizeof(malloc_no_cow
));
1005 if (malloc_no_cow
) {
1006 vm_memory_malloc_no_cow_mask
= 0ULL;
1007 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC
;
1008 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_SMALL
;
1009 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_MEDIUM
;
1010 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_LARGE
;
1011 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1012 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1013 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_TINY
;
1014 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE
;
1015 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED
;
1016 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_NANO
;
1017 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1018 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1019 &vm_memory_malloc_no_cow_mask
,
1020 sizeof(vm_memory_malloc_no_cow_mask
));
1024 PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity
, sizeof(vm_check_map_sanity
));
1025 if (vm_check_map_sanity
) {
1026 kprintf("VM sanity checking enabled\n");
1028 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1034 vm_map_steal_memory(
1037 uint32_t kentry_initial_pages
;
1039 map_data_size
= round_page(10 * sizeof(struct _vm_map
));
1040 map_data
= pmap_steal_memory(map_data_size
);
1043 * kentry_initial_pages corresponds to the number of kernel map entries
1044 * required during bootstrap until the asynchronous replenishment
1045 * scheme is activated and/or entries are available from the general
1048 #if defined(__LP64__)
1049 kentry_initial_pages
= 10;
1051 kentry_initial_pages
= 6;
1055 /* If using the guard allocator, reserve more memory for the kernel
1056 * reserved map entry pool.
1058 if (gzalloc_enabled()) {
1059 kentry_initial_pages
*= 1024;
1063 kentry_data_size
= kentry_initial_pages
* PAGE_SIZE
;
1064 kentry_data
= pmap_steal_memory(kentry_data_size
);
1066 map_holes_data_size
= kentry_data_size
;
1067 map_holes_data
= pmap_steal_memory(map_holes_data_size
);
1070 boolean_t vm_map_supports_hole_optimization
= FALSE
;
1073 vm_kernel_reserved_entry_init(void)
1075 zone_prio_refill_configure(vm_map_entry_reserved_zone
, (6 * PAGE_SIZE
) / sizeof(struct vm_map_entry
));
1078 * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1080 zone_prio_refill_configure(vm_map_holes_zone
, (6 * PAGE_SIZE
) / sizeof(struct vm_map_links
));
1081 vm_map_supports_hole_optimization
= TRUE
;
1085 vm_map_disable_hole_optimization(vm_map_t map
)
1087 vm_map_entry_t head_entry
, hole_entry
, next_hole_entry
;
1089 if (map
->holelistenabled
) {
1090 head_entry
= hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
1092 while (hole_entry
!= NULL
) {
1093 next_hole_entry
= hole_entry
->vme_next
;
1095 hole_entry
->vme_next
= NULL
;
1096 hole_entry
->vme_prev
= NULL
;
1097 zfree(vm_map_holes_zone
, hole_entry
);
1099 if (next_hole_entry
== head_entry
) {
1102 hole_entry
= next_hole_entry
;
1106 map
->holes_list
= NULL
;
1107 map
->holelistenabled
= FALSE
;
1109 map
->first_free
= vm_map_first_entry(map
);
1110 SAVE_HINT_HOLE_WRITE(map
, NULL
);
1115 vm_kernel_map_is_kernel(vm_map_t map
)
1117 return map
->pmap
== kernel_pmap
;
1123 * Creates and returns a new empty VM map with
1124 * the given physical map structure, and having
1125 * the given lower and upper address bounds.
1131 vm_map_offset_t min
,
1132 vm_map_offset_t max
,
1139 options
|= VM_MAP_CREATE_PAGEABLE
;
1141 return vm_map_create_options(pmap
, min
, max
, options
);
1145 vm_map_create_options(
1147 vm_map_offset_t min
,
1148 vm_map_offset_t max
,
1152 struct vm_map_links
*hole_entry
= NULL
;
1154 if (options
& ~(VM_MAP_CREATE_ALL_OPTIONS
)) {
1155 /* unknown option */
1159 result
= (vm_map_t
) zalloc(vm_map_zone
);
1160 if (result
== VM_MAP_NULL
) {
1161 panic("vm_map_create");
1164 vm_map_first_entry(result
) = vm_map_to_entry(result
);
1165 vm_map_last_entry(result
) = vm_map_to_entry(result
);
1166 result
->hdr
.nentries
= 0;
1167 if (options
& VM_MAP_CREATE_PAGEABLE
) {
1168 result
->hdr
.entries_pageable
= TRUE
;
1170 result
->hdr
.entries_pageable
= FALSE
;
1173 vm_map_store_init( &(result
->hdr
));
1175 result
->hdr
.page_shift
= PAGE_SHIFT
;
1178 result
->user_wire_limit
= MACH_VM_MAX_ADDRESS
; /* default limit is unlimited */
1179 result
->user_wire_size
= 0;
1180 #if !CONFIG_EMBEDDED
1181 result
->vmmap_high_start
= 0;
1183 os_ref_init_count(&result
->map_refcnt
, &map_refgrp
, 1);
1185 result
->res_count
= 1;
1186 result
->sw_state
= MAP_SW_IN
;
1187 #endif /* TASK_SWAPPER */
1188 result
->pmap
= pmap
;
1189 result
->min_offset
= min
;
1190 result
->max_offset
= max
;
1191 result
->wiring_required
= FALSE
;
1192 result
->no_zero_fill
= FALSE
;
1193 result
->mapped_in_other_pmaps
= FALSE
;
1194 result
->wait_for_space
= FALSE
;
1195 result
->switch_protect
= FALSE
;
1196 result
->disable_vmentry_reuse
= FALSE
;
1197 result
->map_disallow_data_exec
= FALSE
;
1198 result
->is_nested_map
= FALSE
;
1199 result
->map_disallow_new_exec
= FALSE
;
1200 result
->terminated
= FALSE
;
1201 result
->highest_entry_end
= 0;
1202 result
->first_free
= vm_map_to_entry(result
);
1203 result
->hint
= vm_map_to_entry(result
);
1204 result
->jit_entry_exists
= FALSE
;
1206 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1207 if (options
& VM_MAP_CREATE_CORPSE_FOOTPRINT
) {
1208 result
->has_corpse_footprint
= TRUE
;
1209 result
->holelistenabled
= FALSE
;
1210 result
->vmmap_corpse_footprint
= NULL
;
1212 result
->has_corpse_footprint
= FALSE
;
1213 if (vm_map_supports_hole_optimization
) {
1214 hole_entry
= zalloc(vm_map_holes_zone
);
1216 hole_entry
->start
= min
;
1217 #if defined(__arm__) || defined(__arm64__)
1218 hole_entry
->end
= result
->max_offset
;
1220 hole_entry
->end
= (max
> (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
) ? max
: (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
;
1222 result
->holes_list
= result
->hole_hint
= hole_entry
;
1223 hole_entry
->prev
= hole_entry
->next
= CAST_TO_VM_MAP_ENTRY(hole_entry
);
1224 result
->holelistenabled
= TRUE
;
1226 result
->holelistenabled
= FALSE
;
1230 vm_map_lock_init(result
);
1231 lck_mtx_init_ext(&result
->s_lock
, &result
->s_lock_ext
, &vm_map_lck_grp
, &vm_map_lck_attr
);
1237 * vm_map_entry_create: [ internal use only ]
1239 * Allocates a VM map entry for insertion in the
1240 * given map (or map copy). No fields are filled.
1242 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1244 #define vm_map_copy_entry_create(copy, map_locked) \
1245 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1246 unsigned reserved_zalloc_count
, nonreserved_zalloc_count
;
1248 static vm_map_entry_t
1249 _vm_map_entry_create(
1250 struct vm_map_header
*map_header
, boolean_t __unused map_locked
)
1253 vm_map_entry_t entry
;
1255 zone
= vm_map_entry_zone
;
1257 assert(map_header
->entries_pageable
? !map_locked
: TRUE
);
1259 if (map_header
->entries_pageable
) {
1260 entry
= (vm_map_entry_t
) zalloc(zone
);
1262 entry
= (vm_map_entry_t
) zalloc_canblock(zone
, FALSE
);
1264 if (entry
== VM_MAP_ENTRY_NULL
) {
1265 zone
= vm_map_entry_reserved_zone
;
1266 entry
= (vm_map_entry_t
) zalloc(zone
);
1267 OSAddAtomic(1, &reserved_zalloc_count
);
1269 OSAddAtomic(1, &nonreserved_zalloc_count
);
1273 if (entry
== VM_MAP_ENTRY_NULL
) {
1274 panic("vm_map_entry_create");
1276 entry
->from_reserved_zone
= (zone
== vm_map_entry_reserved_zone
);
1278 vm_map_store_update((vm_map_t
) NULL
, entry
, VM_MAP_ENTRY_CREATE
);
1279 #if MAP_ENTRY_CREATION_DEBUG
1280 entry
->vme_creation_maphdr
= map_header
;
1281 backtrace(&entry
->vme_creation_bt
[0],
1282 (sizeof(entry
->vme_creation_bt
) / sizeof(uintptr_t)), NULL
);
1288 * vm_map_entry_dispose: [ internal use only ]
1290 * Inverse of vm_map_entry_create.
1292 * write map lock held so no need to
1293 * do anything special to insure correctness
1296 #define vm_map_entry_dispose(map, entry) \
1297 _vm_map_entry_dispose(&(map)->hdr, (entry))
1299 #define vm_map_copy_entry_dispose(map, entry) \
1300 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1303 _vm_map_entry_dispose(
1304 struct vm_map_header
*map_header
,
1305 vm_map_entry_t entry
)
1309 if (map_header
->entries_pageable
|| !(entry
->from_reserved_zone
)) {
1310 zone
= vm_map_entry_zone
;
1312 zone
= vm_map_entry_reserved_zone
;
1315 if (!map_header
->entries_pageable
) {
1316 if (zone
== vm_map_entry_zone
) {
1317 OSAddAtomic(-1, &nonreserved_zalloc_count
);
1319 OSAddAtomic(-1, &reserved_zalloc_count
);
1327 static boolean_t first_free_check
= FALSE
;
1329 first_free_is_valid(
1332 if (!first_free_check
) {
1336 return first_free_is_valid_store( map
);
1338 #endif /* MACH_ASSERT */
1341 #define vm_map_copy_entry_link(copy, after_where, entry) \
1342 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1344 #define vm_map_copy_entry_unlink(copy, entry) \
1345 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1347 #if MACH_ASSERT && TASK_SWAPPER
1349 * vm_map_res_reference:
1351 * Adds another valid residence count to the given map.
1353 * Map is locked so this function can be called from
1358 vm_map_res_reference(vm_map_t map
)
1360 /* assert map is locked */
1361 assert(map
->res_count
>= 0);
1362 assert(os_ref_get_count(&map
->map_refcnt
) >= map
->res_count
);
1363 if (map
->res_count
== 0) {
1364 lck_mtx_unlock(&map
->s_lock
);
1367 lck_mtx_lock(&map
->s_lock
);
1376 * vm_map_reference_swap:
1378 * Adds valid reference and residence counts to the given map.
1380 * The map may not be in memory (i.e. zero residence count).
1384 vm_map_reference_swap(vm_map_t map
)
1386 assert(map
!= VM_MAP_NULL
);
1387 lck_mtx_lock(&map
->s_lock
);
1388 assert(map
->res_count
>= 0);
1389 assert(os_ref_get_count(&map
->map_refcnt
) >= map
->res_count
);
1390 os_ref_retain_locked(&map
->map_refcnt
);
1391 vm_map_res_reference(map
);
1392 lck_mtx_unlock(&map
->s_lock
);
1396 * vm_map_res_deallocate:
1398 * Decrement residence count on a map; possibly causing swapout.
1400 * The map must be in memory (i.e. non-zero residence count).
1402 * The map is locked, so this function is callable from vm_map_deallocate.
1406 vm_map_res_deallocate(vm_map_t map
)
1408 assert(map
->res_count
> 0);
1409 if (--map
->res_count
== 0) {
1410 lck_mtx_unlock(&map
->s_lock
);
1412 vm_map_swapout(map
);
1414 lck_mtx_lock(&map
->s_lock
);
1416 assert(os_ref_get_count(&map
->map_refcnt
) >= map
->res_count
);
1418 #endif /* MACH_ASSERT && TASK_SWAPPER */
1423 * Actually destroy a map.
1432 /* final cleanup: no need to unnest shared region */
1433 flags
|= VM_MAP_REMOVE_NO_UNNESTING
;
1434 /* final cleanup: ok to remove immutable mappings */
1435 flags
|= VM_MAP_REMOVE_IMMUTABLE
;
1436 /* final cleanup: allow gaps in range */
1437 flags
|= VM_MAP_REMOVE_GAPS_OK
;
1439 /* clean up regular map entries */
1440 (void) vm_map_delete(map
, map
->min_offset
, map
->max_offset
,
1441 flags
, VM_MAP_NULL
);
1442 /* clean up leftover special mappings (commpage, etc...) */
1443 #if !defined(__arm__) && !defined(__arm64__)
1444 (void) vm_map_delete(map
, 0x0, 0xFFFFFFFFFFFFF000ULL
,
1445 flags
, VM_MAP_NULL
);
1446 #endif /* !__arm__ && !__arm64__ */
1448 vm_map_disable_hole_optimization(map
);
1449 vm_map_corpse_footprint_destroy(map
);
1453 assert(map
->hdr
.nentries
== 0);
1456 pmap_destroy(map
->pmap
);
1459 if (vm_map_lck_attr
.lck_attr_val
& LCK_ATTR_DEBUG
) {
1461 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1462 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1463 * structure or kalloc'ed via lck_mtx_init.
1464 * An example is s_lock_ext within struct _vm_map.
1466 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1467 * can add another tag to detect embedded vs alloc'ed indirect external
1468 * mutexes but that'll be additional checks in the lock path and require
1469 * updating dependencies for the old vs new tag.
1471 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1472 * just when lock debugging is ON, we choose to forego explicitly destroying
1473 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1474 * count on vm_map_lck_grp, which has no serious side-effect.
1477 lck_rw_destroy(&(map
)->lock
, &vm_map_lck_grp
);
1478 lck_mtx_destroy(&(map
)->s_lock
, &vm_map_lck_grp
);
1481 zfree(vm_map_zone
, map
);
1485 * Returns pid of the task with the largest number of VM map entries.
1486 * Used in the zone-map-exhaustion jetsam path.
1489 find_largest_process_vm_map_entries(void)
1491 pid_t victim_pid
= -1;
1492 int max_vm_map_entries
= 0;
1493 task_t task
= TASK_NULL
;
1494 queue_head_t
*task_list
= &tasks
;
1496 lck_mtx_lock(&tasks_threads_lock
);
1497 queue_iterate(task_list
, task
, task_t
, tasks
) {
1498 if (task
== kernel_task
|| !task
->active
) {
1502 vm_map_t task_map
= task
->map
;
1503 if (task_map
!= VM_MAP_NULL
) {
1504 int task_vm_map_entries
= task_map
->hdr
.nentries
;
1505 if (task_vm_map_entries
> max_vm_map_entries
) {
1506 max_vm_map_entries
= task_vm_map_entries
;
1507 victim_pid
= pid_from_task(task
);
1511 lck_mtx_unlock(&tasks_threads_lock
);
1513 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid
, max_vm_map_entries
);
1519 * vm_map_swapin/vm_map_swapout
1521 * Swap a map in and out, either referencing or releasing its resources.
1522 * These functions are internal use only; however, they must be exported
1523 * because they may be called from macros, which are exported.
1525 * In the case of swapout, there could be races on the residence count,
1526 * so if the residence count is up, we return, assuming that a
1527 * vm_map_deallocate() call in the near future will bring us back.
1530 * -- We use the map write lock for synchronization among races.
1531 * -- The map write lock, and not the simple s_lock, protects the
1532 * swap state of the map.
1533 * -- If a map entry is a share map, then we hold both locks, in
1534 * hierarchical order.
1536 * Synchronization Notes:
1537 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1538 * will block on the map lock and proceed when swapout is through.
1539 * 2) A vm_map_reference() call at this time is illegal, and will
1540 * cause a panic. vm_map_reference() is only allowed on resident
1541 * maps, since it refuses to block.
1542 * 3) A vm_map_swapin() call during a swapin will block, and
1543 * proceeed when the first swapin is done, turning into a nop.
1544 * This is the reason the res_count is not incremented until
1545 * after the swapin is complete.
1546 * 4) There is a timing hole after the checks of the res_count, before
1547 * the map lock is taken, during which a swapin may get the lock
1548 * before a swapout about to happen. If this happens, the swapin
1549 * will detect the state and increment the reference count, causing
1550 * the swapout to be a nop, thereby delaying it until a later
1551 * vm_map_deallocate. If the swapout gets the lock first, then
1552 * the swapin will simply block until the swapout is done, and
1555 * Because vm_map_swapin() is potentially an expensive operation, it
1556 * should be used with caution.
1559 * 1) A map with a residence count of zero is either swapped, or
1561 * 2) A map with a non-zero residence count is either resident,
1562 * or being swapped in.
1565 int vm_map_swap_enable
= 1;
1568 vm_map_swapin(vm_map_t map
)
1570 vm_map_entry_t entry
;
1572 if (!vm_map_swap_enable
) { /* debug */
1578 * First deal with various races.
1580 if (map
->sw_state
== MAP_SW_IN
) {
1582 * we raced with swapout and won. Returning will incr.
1583 * the res_count, turning the swapout into a nop.
1589 * The residence count must be zero. If we raced with another
1590 * swapin, the state would have been IN; if we raced with a
1591 * swapout (after another competing swapin), we must have lost
1592 * the race to get here (see above comment), in which case
1593 * res_count is still 0.
1595 assert(map
->res_count
== 0);
1598 * There are no intermediate states of a map going out or
1599 * coming in, since the map is locked during the transition.
1601 assert(map
->sw_state
== MAP_SW_OUT
);
1604 * We now operate upon each map entry. If the entry is a sub-
1605 * or share-map, we call vm_map_res_reference upon it.
1606 * If the entry is an object, we call vm_object_res_reference
1607 * (this may iterate through the shadow chain).
1608 * Note that we hold the map locked the entire time,
1609 * even if we get back here via a recursive call in
1610 * vm_map_res_reference.
1612 entry
= vm_map_first_entry(map
);
1614 while (entry
!= vm_map_to_entry(map
)) {
1615 if (VME_OBJECT(entry
) != VM_OBJECT_NULL
) {
1616 if (entry
->is_sub_map
) {
1617 vm_map_t lmap
= VME_SUBMAP(entry
);
1618 lck_mtx_lock(&lmap
->s_lock
);
1619 vm_map_res_reference(lmap
);
1620 lck_mtx_unlock(&lmap
->s_lock
);
1622 vm_object_t object
= VME_OBEJCT(entry
);
1623 vm_object_lock(object
);
1625 * This call may iterate through the
1628 vm_object_res_reference(object
);
1629 vm_object_unlock(object
);
1632 entry
= entry
->vme_next
;
1634 assert(map
->sw_state
== MAP_SW_OUT
);
1635 map
->sw_state
= MAP_SW_IN
;
1639 vm_map_swapout(vm_map_t map
)
1641 vm_map_entry_t entry
;
1645 * First deal with various races.
1646 * If we raced with a swapin and lost, the residence count
1647 * will have been incremented to 1, and we simply return.
1649 lck_mtx_lock(&map
->s_lock
);
1650 if (map
->res_count
!= 0) {
1651 lck_mtx_unlock(&map
->s_lock
);
1654 lck_mtx_unlock(&map
->s_lock
);
1657 * There are no intermediate states of a map going out or
1658 * coming in, since the map is locked during the transition.
1660 assert(map
->sw_state
== MAP_SW_IN
);
1662 if (!vm_map_swap_enable
) {
1667 * We now operate upon each map entry. If the entry is a sub-
1668 * or share-map, we call vm_map_res_deallocate upon it.
1669 * If the entry is an object, we call vm_object_res_deallocate
1670 * (this may iterate through the shadow chain).
1671 * Note that we hold the map locked the entire time,
1672 * even if we get back here via a recursive call in
1673 * vm_map_res_deallocate.
1675 entry
= vm_map_first_entry(map
);
1677 while (entry
!= vm_map_to_entry(map
)) {
1678 if (VME_OBJECT(entry
) != VM_OBJECT_NULL
) {
1679 if (entry
->is_sub_map
) {
1680 vm_map_t lmap
= VME_SUBMAP(entry
);
1681 lck_mtx_lock(&lmap
->s_lock
);
1682 vm_map_res_deallocate(lmap
);
1683 lck_mtx_unlock(&lmap
->s_lock
);
1685 vm_object_t object
= VME_OBJECT(entry
);
1686 vm_object_lock(object
);
1688 * This call may take a long time,
1689 * since it could actively push
1690 * out pages (if we implement it
1693 vm_object_res_deallocate(object
);
1694 vm_object_unlock(object
);
1697 entry
= entry
->vme_next
;
1699 assert(map
->sw_state
== MAP_SW_IN
);
1700 map
->sw_state
= MAP_SW_OUT
;
1703 #endif /* TASK_SWAPPER */
1706 * vm_map_lookup_entry: [ internal use only ]
1708 * Calls into the vm map store layer to find the map
1709 * entry containing (or immediately preceding) the
1710 * specified address in the given map; the entry is returned
1711 * in the "entry" parameter. The boolean
1712 * result indicates whether the address is
1713 * actually contained in the map.
1716 vm_map_lookup_entry(
1718 vm_map_offset_t address
,
1719 vm_map_entry_t
*entry
) /* OUT */
1721 return vm_map_store_lookup_entry( map
, address
, entry
);
1725 * Routine: vm_map_find_space
1727 * Allocate a range in the specified virtual address map,
1728 * returning the entry allocated for that range.
1729 * Used by kmem_alloc, etc.
1731 * The map must be NOT be locked. It will be returned locked
1732 * on KERN_SUCCESS, unlocked on failure.
1734 * If an entry is allocated, the object/offset fields
1735 * are initialized to zero.
1740 vm_map_offset_t
*address
, /* OUT */
1742 vm_map_offset_t mask
,
1744 vm_map_kernel_flags_t vmk_flags
,
1746 vm_map_entry_t
*o_entry
) /* OUT */
1748 vm_map_entry_t entry
, new_entry
;
1749 vm_map_offset_t start
;
1750 vm_map_offset_t end
;
1751 vm_map_entry_t hole_entry
;
1755 return KERN_INVALID_ARGUMENT
;
1758 if (vmk_flags
.vmkf_guard_after
) {
1759 /* account for the back guard page in the size */
1760 size
+= VM_MAP_PAGE_SIZE(map
);
1763 new_entry
= vm_map_entry_create(map
, FALSE
);
1766 * Look for the first possible address; if there's already
1767 * something at this address, we have to start after it.
1772 if (map
->disable_vmentry_reuse
== TRUE
) {
1773 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
1775 if (map
->holelistenabled
) {
1776 hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
1778 if (hole_entry
== NULL
) {
1780 * No more space in the map?
1782 vm_map_entry_dispose(map
, new_entry
);
1784 return KERN_NO_SPACE
;
1788 start
= entry
->vme_start
;
1790 assert(first_free_is_valid(map
));
1791 if ((entry
= map
->first_free
) == vm_map_to_entry(map
)) {
1792 start
= map
->min_offset
;
1794 start
= entry
->vme_end
;
1800 * In any case, the "entry" always precedes
1801 * the proposed new region throughout the loop:
1805 vm_map_entry_t next
;
1808 * Find the end of the proposed new region.
1809 * Be sure we didn't go beyond the end, or
1810 * wrap around the address.
1813 if (vmk_flags
.vmkf_guard_before
) {
1814 /* reserve space for the front guard page */
1815 start
+= VM_MAP_PAGE_SIZE(map
);
1817 end
= ((start
+ mask
) & ~mask
);
1820 vm_map_entry_dispose(map
, new_entry
);
1822 return KERN_NO_SPACE
;
1825 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
1827 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
1829 if ((end
> map
->max_offset
) || (end
< start
)) {
1830 vm_map_entry_dispose(map
, new_entry
);
1832 return KERN_NO_SPACE
;
1835 next
= entry
->vme_next
;
1837 if (map
->holelistenabled
) {
1838 if (entry
->vme_end
>= end
) {
1843 * If there are no more entries, we must win.
1847 * If there is another entry, it must be
1848 * after the end of the potential new region.
1851 if (next
== vm_map_to_entry(map
)) {
1855 if (next
->vme_start
>= end
) {
1861 * Didn't fit -- move to the next entry.
1866 if (map
->holelistenabled
) {
1867 if (entry
== CAST_TO_VM_MAP_ENTRY(map
->holes_list
)) {
1871 vm_map_entry_dispose(map
, new_entry
);
1873 return KERN_NO_SPACE
;
1875 start
= entry
->vme_start
;
1877 start
= entry
->vme_end
;
1881 if (map
->holelistenabled
) {
1882 if (vm_map_lookup_entry(map
, entry
->vme_start
, &entry
)) {
1883 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry
, (unsigned long long)entry
->vme_start
);
1889 * "start" and "end" should define the endpoints of the
1890 * available new range, and
1891 * "entry" should refer to the region before the new
1894 * the map should be locked.
1897 if (vmk_flags
.vmkf_guard_before
) {
1898 /* go back for the front guard page */
1899 start
-= VM_MAP_PAGE_SIZE(map
);
1903 assert(start
< end
);
1904 new_entry
->vme_start
= start
;
1905 new_entry
->vme_end
= end
;
1906 assert(page_aligned(new_entry
->vme_start
));
1907 assert(page_aligned(new_entry
->vme_end
));
1908 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_start
,
1909 VM_MAP_PAGE_MASK(map
)));
1910 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_end
,
1911 VM_MAP_PAGE_MASK(map
)));
1913 new_entry
->is_shared
= FALSE
;
1914 new_entry
->is_sub_map
= FALSE
;
1915 new_entry
->use_pmap
= TRUE
;
1916 VME_OBJECT_SET(new_entry
, VM_OBJECT_NULL
);
1917 VME_OFFSET_SET(new_entry
, (vm_object_offset_t
) 0);
1919 new_entry
->needs_copy
= FALSE
;
1921 new_entry
->inheritance
= VM_INHERIT_DEFAULT
;
1922 new_entry
->protection
= VM_PROT_DEFAULT
;
1923 new_entry
->max_protection
= VM_PROT_ALL
;
1924 new_entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
1925 new_entry
->wired_count
= 0;
1926 new_entry
->user_wired_count
= 0;
1928 new_entry
->in_transition
= FALSE
;
1929 new_entry
->needs_wakeup
= FALSE
;
1930 new_entry
->no_cache
= FALSE
;
1931 new_entry
->permanent
= FALSE
;
1932 new_entry
->superpage_size
= FALSE
;
1933 if (VM_MAP_PAGE_SHIFT(map
) != PAGE_SHIFT
) {
1934 new_entry
->map_aligned
= TRUE
;
1936 new_entry
->map_aligned
= FALSE
;
1939 new_entry
->used_for_jit
= FALSE
;
1940 new_entry
->pmap_cs_associated
= FALSE
;
1941 new_entry
->zero_wired_pages
= FALSE
;
1942 new_entry
->iokit_acct
= FALSE
;
1943 new_entry
->vme_resilient_codesign
= FALSE
;
1944 new_entry
->vme_resilient_media
= FALSE
;
1945 if (vmk_flags
.vmkf_atomic_entry
) {
1946 new_entry
->vme_atomic
= TRUE
;
1948 new_entry
->vme_atomic
= FALSE
;
1951 VME_ALIAS_SET(new_entry
, tag
);
1954 * Insert the new entry into the list
1957 vm_map_store_entry_link(map
, entry
, new_entry
, VM_MAP_KERNEL_FLAGS_NONE
);
1962 * Update the lookup hint
1964 SAVE_HINT_MAP_WRITE(map
, new_entry
);
1966 *o_entry
= new_entry
;
1967 return KERN_SUCCESS
;
1970 int vm_map_pmap_enter_print
= FALSE
;
1971 int vm_map_pmap_enter_enable
= FALSE
;
1974 * Routine: vm_map_pmap_enter [internal only]
1977 * Force pages from the specified object to be entered into
1978 * the pmap at the specified address if they are present.
1979 * As soon as a page not found in the object the scan ends.
1984 * In/out conditions:
1985 * The source map should not be locked on entry.
1987 __unused
static void
1990 vm_map_offset_t addr
,
1991 vm_map_offset_t end_addr
,
1993 vm_object_offset_t offset
,
1994 vm_prot_t protection
)
1998 struct vm_object_fault_info fault_info
= {};
2000 if (map
->pmap
== 0) {
2004 while (addr
< end_addr
) {
2010 * From vm_map_enter(), we come into this function without the map
2011 * lock held or the object lock held.
2012 * We haven't taken a reference on the object either.
2013 * We should do a proper lookup on the map to make sure
2014 * that things are sane before we go locking objects that
2015 * could have been deallocated from under us.
2018 vm_object_lock(object
);
2020 m
= vm_page_lookup(object
, offset
);
2022 if (m
== VM_PAGE_NULL
|| m
->vmp_busy
|| m
->vmp_fictitious
||
2023 (m
->vmp_unusual
&& (m
->vmp_error
|| m
->vmp_restart
|| m
->vmp_absent
))) {
2024 vm_object_unlock(object
);
2028 if (vm_map_pmap_enter_print
) {
2029 printf("vm_map_pmap_enter:");
2030 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2031 map
, (unsigned long long)addr
, object
, (unsigned long long)offset
);
2033 type_of_fault
= DBG_CACHE_HIT_FAULT
;
2034 kr
= vm_fault_enter(m
, map
->pmap
,
2035 addr
, protection
, protection
,
2037 FALSE
, /* change_wiring */
2038 VM_KERN_MEMORY_NONE
, /* tag - not wiring */
2040 NULL
, /* need_retry */
2043 vm_object_unlock(object
);
2045 offset
+= PAGE_SIZE_64
;
2050 boolean_t
vm_map_pmap_is_empty(
2052 vm_map_offset_t start
,
2053 vm_map_offset_t end
);
2055 vm_map_pmap_is_empty(
2057 vm_map_offset_t start
,
2058 vm_map_offset_t end
)
2060 #ifdef MACHINE_PMAP_IS_EMPTY
2061 return pmap_is_empty(map
->pmap
, start
, end
);
2062 #else /* MACHINE_PMAP_IS_EMPTY */
2063 vm_map_offset_t offset
;
2066 if (map
->pmap
== NULL
) {
2070 for (offset
= start
;
2072 offset
+= PAGE_SIZE
) {
2073 phys_page
= pmap_find_phys(map
->pmap
, offset
);
2075 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
2076 "page %d at 0x%llx\n",
2077 map
, (long long)start
, (long long)end
,
2078 phys_page
, (long long)offset
);
2083 #endif /* MACHINE_PMAP_IS_EMPTY */
2086 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2088 vm_map_random_address_for_size(
2090 vm_map_offset_t
*address
,
2093 kern_return_t kr
= KERN_SUCCESS
;
2095 vm_map_offset_t random_addr
= 0;
2096 vm_map_offset_t hole_end
;
2098 vm_map_entry_t next_entry
= VM_MAP_ENTRY_NULL
;
2099 vm_map_entry_t prev_entry
= VM_MAP_ENTRY_NULL
;
2100 vm_map_size_t vm_hole_size
= 0;
2101 vm_map_size_t addr_space_size
;
2103 addr_space_size
= vm_map_max(map
) - vm_map_min(map
);
2105 assert(page_aligned(size
));
2107 while (tries
< MAX_TRIES_TO_GET_RANDOM_ADDRESS
) {
2108 random_addr
= ((vm_map_offset_t
)random()) << PAGE_SHIFT
;
2109 random_addr
= vm_map_trunc_page(
2110 vm_map_min(map
) + (random_addr
% addr_space_size
),
2111 VM_MAP_PAGE_MASK(map
));
2113 if (vm_map_lookup_entry(map
, random_addr
, &prev_entry
) == FALSE
) {
2114 if (prev_entry
== vm_map_to_entry(map
)) {
2115 next_entry
= vm_map_first_entry(map
);
2117 next_entry
= prev_entry
->vme_next
;
2119 if (next_entry
== vm_map_to_entry(map
)) {
2120 hole_end
= vm_map_max(map
);
2122 hole_end
= next_entry
->vme_start
;
2124 vm_hole_size
= hole_end
- random_addr
;
2125 if (vm_hole_size
>= size
) {
2126 *address
= random_addr
;
2133 if (tries
== MAX_TRIES_TO_GET_RANDOM_ADDRESS
) {
2140 vm_memory_malloc_no_cow(
2143 uint64_t alias_mask
;
2149 alias_mask
= 1ULL << alias
;
2150 if (alias_mask
& vm_memory_malloc_no_cow_mask
) {
2157 * Routine: vm_map_enter
2160 * Allocate a range in the specified virtual address map.
2161 * The resulting range will refer to memory defined by
2162 * the given memory object and offset into that object.
2164 * Arguments are as defined in the vm_map call.
2166 int _map_enter_debug
= 0;
2167 static unsigned int vm_map_enter_restore_successes
= 0;
2168 static unsigned int vm_map_enter_restore_failures
= 0;
2172 vm_map_offset_t
*address
, /* IN/OUT */
2174 vm_map_offset_t mask
,
2176 vm_map_kernel_flags_t vmk_flags
,
2179 vm_object_offset_t offset
,
2180 boolean_t needs_copy
,
2181 vm_prot_t cur_protection
,
2182 vm_prot_t max_protection
,
2183 vm_inherit_t inheritance
)
2185 vm_map_entry_t entry
, new_entry
;
2186 vm_map_offset_t start
, tmp_start
, tmp_offset
;
2187 vm_map_offset_t end
, tmp_end
;
2188 vm_map_offset_t tmp2_start
, tmp2_end
;
2189 vm_map_offset_t desired_empty_end
;
2190 vm_map_offset_t step
;
2191 kern_return_t result
= KERN_SUCCESS
;
2192 vm_map_t zap_old_map
= VM_MAP_NULL
;
2193 vm_map_t zap_new_map
= VM_MAP_NULL
;
2194 boolean_t map_locked
= FALSE
;
2195 boolean_t pmap_empty
= TRUE
;
2196 boolean_t new_mapping_established
= FALSE
;
2197 boolean_t keep_map_locked
= vmk_flags
.vmkf_keep_map_locked
;
2198 boolean_t anywhere
= ((flags
& VM_FLAGS_ANYWHERE
) != 0);
2199 boolean_t purgable
= ((flags
& VM_FLAGS_PURGABLE
) != 0);
2200 boolean_t overwrite
= ((flags
& VM_FLAGS_OVERWRITE
) != 0);
2201 boolean_t no_cache
= ((flags
& VM_FLAGS_NO_CACHE
) != 0);
2202 boolean_t is_submap
= vmk_flags
.vmkf_submap
;
2203 boolean_t permanent
= vmk_flags
.vmkf_permanent
;
2204 boolean_t no_copy_on_read
= vmk_flags
.vmkf_no_copy_on_read
;
2205 boolean_t entry_for_jit
= vmk_flags
.vmkf_map_jit
;
2206 boolean_t iokit_acct
= vmk_flags
.vmkf_iokit_acct
;
2207 boolean_t resilient_codesign
= ((flags
& VM_FLAGS_RESILIENT_CODESIGN
) != 0);
2208 boolean_t resilient_media
= ((flags
& VM_FLAGS_RESILIENT_MEDIA
) != 0);
2209 boolean_t random_address
= ((flags
& VM_FLAGS_RANDOM_ADDR
) != 0);
2210 unsigned int superpage_size
= ((flags
& VM_FLAGS_SUPERPAGE_MASK
) >> VM_FLAGS_SUPERPAGE_SHIFT
);
2211 vm_tag_t user_alias
;
2212 vm_map_offset_t effective_min_offset
, effective_max_offset
;
2214 boolean_t clear_map_aligned
= FALSE
;
2215 vm_map_entry_t hole_entry
;
2216 vm_map_size_t chunk_size
= 0;
2218 assertf(vmk_flags
.__vmkf_unused
== 0, "vmk_flags unused=0x%x\n", vmk_flags
.__vmkf_unused
);
2220 if (flags
& VM_FLAGS_4GB_CHUNK
) {
2221 #if defined(__LP64__)
2222 chunk_size
= (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2223 #else /* __LP64__ */
2224 chunk_size
= ANON_CHUNK_SIZE
;
2225 #endif /* __LP64__ */
2227 chunk_size
= ANON_CHUNK_SIZE
;
2230 if (superpage_size
) {
2231 switch (superpage_size
) {
2233 * Note that the current implementation only supports
2234 * a single size for superpages, SUPERPAGE_SIZE, per
2235 * architecture. As soon as more sizes are supposed
2236 * to be supported, SUPERPAGE_SIZE has to be replaced
2237 * with a lookup of the size depending on superpage_size.
2240 case SUPERPAGE_SIZE_ANY
:
2241 /* handle it like 2 MB and round up to page size */
2242 size
= (size
+ 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2243 case SUPERPAGE_SIZE_2MB
:
2247 return KERN_INVALID_ARGUMENT
;
2249 mask
= SUPERPAGE_SIZE
- 1;
2250 if (size
& (SUPERPAGE_SIZE
- 1)) {
2251 return KERN_INVALID_ARGUMENT
;
2253 inheritance
= VM_INHERIT_NONE
; /* fork() children won't inherit superpages */
2257 if ((cur_protection
& VM_PROT_WRITE
) &&
2258 (cur_protection
& VM_PROT_EXECUTE
) &&
2259 #if !CONFIG_EMBEDDED
2260 map
!= kernel_map
&&
2261 (cs_process_global_enforcement() ||
2262 (vmk_flags
.vmkf_cs_enforcement_override
2263 ? vmk_flags
.vmkf_cs_enforcement
2264 : cs_process_enforcement(NULL
))) &&
2265 #endif /* !CONFIG_EMBEDDED */
2270 vm_prot_t
, cur_protection
);
2271 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
2272 #if VM_PROTECT_WX_FAIL
2274 #else /* VM_PROTECT_WX_FAIL */
2275 "turning off execute\n",
2276 #endif /* VM_PROTECT_WX_FAIL */
2278 (current_task()->bsd_info
2279 ? proc_name_address(current_task()->bsd_info
)
2282 cur_protection
&= ~VM_PROT_EXECUTE
;
2283 #if VM_PROTECT_WX_FAIL
2284 return KERN_PROTECTION_FAILURE
;
2285 #endif /* VM_PROTECT_WX_FAIL */
2289 * If the task has requested executable lockdown,
2290 * deny any new executable mapping.
2292 if (map
->map_disallow_new_exec
== TRUE
) {
2293 if (cur_protection
& VM_PROT_EXECUTE
) {
2294 return KERN_PROTECTION_FAILURE
;
2298 if (resilient_codesign
) {
2300 if ((cur_protection
& (VM_PROT_WRITE
| VM_PROT_EXECUTE
)) ||
2301 (max_protection
& (VM_PROT_WRITE
| VM_PROT_EXECUTE
))) {
2302 return KERN_PROTECTION_FAILURE
;
2306 if (resilient_media
) {
2308 // assert(!needs_copy);
2309 if (object
!= VM_OBJECT_NULL
&&
2310 !object
->internal
) {
2312 * This mapping is directly backed by an external
2313 * memory manager (e.g. a vnode pager for a file):
2314 * we would not have any safe place to inject
2315 * a zero-filled page if an actual page is not
2316 * available, without possibly impacting the actual
2317 * contents of the mapped object (e.g. the file),
2318 * so we can't provide any media resiliency here.
2320 return KERN_INVALID_ARGUMENT
;
2326 /* submaps can not be purgeable */
2327 return KERN_INVALID_ARGUMENT
;
2329 if (object
== VM_OBJECT_NULL
) {
2330 /* submaps can not be created lazily */
2331 return KERN_INVALID_ARGUMENT
;
2334 if (vmk_flags
.vmkf_already
) {
2336 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2337 * is already present. For it to be meaningul, the requested
2338 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2339 * we shouldn't try and remove what was mapped there first
2340 * (!VM_FLAGS_OVERWRITE).
2342 if ((flags
& VM_FLAGS_ANYWHERE
) ||
2343 (flags
& VM_FLAGS_OVERWRITE
)) {
2344 return KERN_INVALID_ARGUMENT
;
2348 effective_min_offset
= map
->min_offset
;
2350 if (vmk_flags
.vmkf_beyond_max
) {
2352 * Allow an insertion beyond the map's max offset.
2354 #if !defined(__arm__) && !defined(__arm64__)
2355 if (vm_map_is_64bit(map
)) {
2356 effective_max_offset
= 0xFFFFFFFFFFFFF000ULL
;
2358 #endif /* __arm__ */
2359 effective_max_offset
= 0x00000000FFFFF000ULL
;
2361 #if !defined(CONFIG_EMBEDDED)
2362 if (__improbable(vmk_flags
.vmkf_32bit_map_va
)) {
2363 effective_max_offset
= MIN(map
->max_offset
, 0x00000000FFFFF000ULL
);
2365 effective_max_offset
= map
->max_offset
;
2368 effective_max_offset
= map
->max_offset
;
2373 (offset
& PAGE_MASK_64
) != 0) {
2375 return KERN_INVALID_ARGUMENT
;
2378 if (map
->pmap
== kernel_pmap
) {
2379 user_alias
= VM_KERN_MEMORY_NONE
;
2384 if (user_alias
== VM_MEMORY_MALLOC_MEDIUM
) {
2385 chunk_size
= MALLOC_MEDIUM_CHUNK_SIZE
;
2388 #define RETURN(value) { result = value; goto BailOut; }
2390 assert(page_aligned(*address
));
2391 assert(page_aligned(size
));
2393 if (!VM_MAP_PAGE_ALIGNED(size
, VM_MAP_PAGE_MASK(map
))) {
2395 * In most cases, the caller rounds the size up to the
2397 * If we get a size that is explicitly not map-aligned here,
2398 * we'll have to respect the caller's wish and mark the
2399 * mapping as "not map-aligned" to avoid tripping the
2400 * map alignment checks later.
2402 clear_map_aligned
= TRUE
;
2405 !VM_MAP_PAGE_ALIGNED(*address
, VM_MAP_PAGE_MASK(map
))) {
2407 * We've been asked to map at a fixed address and that
2408 * address is not aligned to the map's specific alignment.
2409 * The caller should know what it's doing (i.e. most likely
2410 * mapping some fragmented copy map, transferring memory from
2411 * a VM map with a different alignment), so clear map_aligned
2412 * for this new VM map entry and proceed.
2414 clear_map_aligned
= TRUE
;
2418 * Only zero-fill objects are allowed to be purgable.
2419 * LP64todo - limit purgable objects to 32-bits for now
2423 (object
!= VM_OBJECT_NULL
&&
2424 (object
->vo_size
!= size
||
2425 object
->purgable
== VM_PURGABLE_DENY
))
2426 || size
> ANON_MAX_SIZE
)) { /* LP64todo: remove when dp capable */
2427 return KERN_INVALID_ARGUMENT
;
2430 if (!anywhere
&& overwrite
) {
2432 * Create a temporary VM map to hold the old mappings in the
2433 * affected area while we create the new one.
2434 * This avoids releasing the VM map lock in
2435 * vm_map_entry_delete() and allows atomicity
2436 * when we want to replace some mappings with a new one.
2437 * It also allows us to restore the old VM mappings if the
2438 * new mapping fails.
2440 zap_old_map
= vm_map_create(PMAP_NULL
,
2443 map
->hdr
.entries_pageable
);
2444 vm_map_set_page_shift(zap_old_map
, VM_MAP_PAGE_SHIFT(map
));
2445 vm_map_disable_hole_optimization(zap_old_map
);
2456 if (entry_for_jit
) {
2458 if (map
->jit_entry_exists
) {
2459 result
= KERN_INVALID_ARGUMENT
;
2462 random_address
= TRUE
;
2463 #endif /* CONFIG_EMBEDDED */
2466 if (random_address
) {
2468 * Get a random start address.
2470 result
= vm_map_random_address_for_size(map
, address
, size
);
2471 if (result
!= KERN_SUCCESS
) {
2476 #if !CONFIG_EMBEDDED
2477 else if ((start
== 0 || start
== vm_map_min(map
)) &&
2478 !map
->disable_vmentry_reuse
&&
2479 map
->vmmap_high_start
!= 0) {
2480 start
= map
->vmmap_high_start
;
2486 * Calculate the first possible address.
2489 if (start
< effective_min_offset
) {
2490 start
= effective_min_offset
;
2492 if (start
> effective_max_offset
) {
2493 RETURN(KERN_NO_SPACE
);
2497 * Look for the first possible address;
2498 * if there's already something at this
2499 * address, we have to start after it.
2502 if (map
->disable_vmentry_reuse
== TRUE
) {
2503 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
2505 if (map
->holelistenabled
) {
2506 hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
2508 if (hole_entry
== NULL
) {
2510 * No more space in the map?
2512 result
= KERN_NO_SPACE
;
2515 boolean_t found_hole
= FALSE
;
2518 if (hole_entry
->vme_start
>= start
) {
2519 start
= hole_entry
->vme_start
;
2524 if (hole_entry
->vme_end
> start
) {
2528 hole_entry
= hole_entry
->vme_next
;
2529 } while (hole_entry
!= CAST_TO_VM_MAP_ENTRY(map
->holes_list
));
2531 if (found_hole
== FALSE
) {
2532 result
= KERN_NO_SPACE
;
2539 start
+= PAGE_SIZE_64
;
2543 assert(first_free_is_valid(map
));
2545 entry
= map
->first_free
;
2547 if (entry
== vm_map_to_entry(map
)) {
2550 if (entry
->vme_next
== vm_map_to_entry(map
)) {
2552 * Hole at the end of the map.
2556 if (start
< (entry
->vme_next
)->vme_start
) {
2557 start
= entry
->vme_end
;
2558 start
= vm_map_round_page(start
,
2559 VM_MAP_PAGE_MASK(map
));
2562 * Need to do a lookup.
2569 if (entry
== NULL
) {
2570 vm_map_entry_t tmp_entry
;
2571 if (vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
2572 assert(!entry_for_jit
);
2573 start
= tmp_entry
->vme_end
;
2574 start
= vm_map_round_page(start
,
2575 VM_MAP_PAGE_MASK(map
));
2583 * In any case, the "entry" always precedes
2584 * the proposed new region throughout the
2589 vm_map_entry_t next
;
2592 * Find the end of the proposed new region.
2593 * Be sure we didn't go beyond the end, or
2594 * wrap around the address.
2597 end
= ((start
+ mask
) & ~mask
);
2598 end
= vm_map_round_page(end
,
2599 VM_MAP_PAGE_MASK(map
));
2601 RETURN(KERN_NO_SPACE
);
2604 assert(VM_MAP_PAGE_ALIGNED(start
,
2605 VM_MAP_PAGE_MASK(map
)));
2608 /* We want an entire page of empty space, but don't increase the allocation size. */
2609 desired_empty_end
= vm_map_round_page(end
, VM_MAP_PAGE_MASK(map
));
2611 if ((desired_empty_end
> effective_max_offset
) || (desired_empty_end
< start
)) {
2612 if (map
->wait_for_space
) {
2613 assert(!keep_map_locked
);
2614 if (size
<= (effective_max_offset
-
2615 effective_min_offset
)) {
2616 assert_wait((event_t
)map
,
2620 thread_block(THREAD_CONTINUE_NULL
);
2624 RETURN(KERN_NO_SPACE
);
2627 next
= entry
->vme_next
;
2629 if (map
->holelistenabled
) {
2630 if (entry
->vme_end
>= desired_empty_end
) {
2635 * If there are no more entries, we must win.
2639 * If there is another entry, it must be
2640 * after the end of the potential new region.
2643 if (next
== vm_map_to_entry(map
)) {
2647 if (next
->vme_start
>= desired_empty_end
) {
2653 * Didn't fit -- move to the next entry.
2658 if (map
->holelistenabled
) {
2659 if (entry
== CAST_TO_VM_MAP_ENTRY(map
->holes_list
)) {
2663 result
= KERN_NO_SPACE
;
2666 start
= entry
->vme_start
;
2668 start
= entry
->vme_end
;
2671 start
= vm_map_round_page(start
,
2672 VM_MAP_PAGE_MASK(map
));
2675 if (map
->holelistenabled
) {
2676 if (vm_map_lookup_entry(map
, entry
->vme_start
, &entry
)) {
2677 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry
, (unsigned long long)entry
->vme_start
);
2682 assert(VM_MAP_PAGE_ALIGNED(*address
,
2683 VM_MAP_PAGE_MASK(map
)));
2687 * the address doesn't itself violate
2688 * the mask requirement.
2693 if ((start
& mask
) != 0) {
2694 RETURN(KERN_NO_SPACE
);
2698 * ... the address is within bounds
2703 if ((start
< effective_min_offset
) ||
2704 (end
> effective_max_offset
) ||
2706 RETURN(KERN_INVALID_ADDRESS
);
2709 if (overwrite
&& zap_old_map
!= VM_MAP_NULL
) {
2712 * Fixed mapping and "overwrite" flag: attempt to
2713 * remove all existing mappings in the specified
2714 * address range, saving them in our "zap_old_map".
2716 remove_flags
= VM_MAP_REMOVE_SAVE_ENTRIES
;
2717 remove_flags
|= VM_MAP_REMOVE_NO_MAP_ALIGN
;
2718 if (vmk_flags
.vmkf_overwrite_immutable
) {
2719 /* we can overwrite immutable mappings */
2720 remove_flags
|= VM_MAP_REMOVE_IMMUTABLE
;
2722 (void) vm_map_delete(map
, start
, end
,
2728 * ... the starting address isn't allocated
2731 if (vm_map_lookup_entry(map
, start
, &entry
)) {
2732 if (!(vmk_flags
.vmkf_already
)) {
2733 RETURN(KERN_NO_SPACE
);
2736 * Check if what's already there is what we want.
2739 tmp_offset
= offset
;
2740 if (entry
->vme_start
< start
) {
2741 tmp_start
-= start
- entry
->vme_start
;
2742 tmp_offset
-= start
- entry
->vme_start
;
2744 for (; entry
->vme_start
< end
;
2745 entry
= entry
->vme_next
) {
2747 * Check if the mapping's attributes
2748 * match the existing map entry.
2750 if (entry
== vm_map_to_entry(map
) ||
2751 entry
->vme_start
!= tmp_start
||
2752 entry
->is_sub_map
!= is_submap
||
2753 VME_OFFSET(entry
) != tmp_offset
||
2754 entry
->needs_copy
!= needs_copy
||
2755 entry
->protection
!= cur_protection
||
2756 entry
->max_protection
!= max_protection
||
2757 entry
->inheritance
!= inheritance
||
2758 entry
->iokit_acct
!= iokit_acct
||
2759 VME_ALIAS(entry
) != alias
) {
2760 /* not the same mapping ! */
2761 RETURN(KERN_NO_SPACE
);
2764 * Check if the same object is being mapped.
2767 if (VME_SUBMAP(entry
) !=
2768 (vm_map_t
) object
) {
2769 /* not the same submap */
2770 RETURN(KERN_NO_SPACE
);
2773 if (VME_OBJECT(entry
) != object
) {
2774 /* not the same VM object... */
2777 obj2
= VME_OBJECT(entry
);
2778 if ((obj2
== VM_OBJECT_NULL
||
2780 (object
== VM_OBJECT_NULL
||
2781 object
->internal
)) {
2788 RETURN(KERN_NO_SPACE
);
2793 tmp_offset
+= entry
->vme_end
- entry
->vme_start
;
2794 tmp_start
+= entry
->vme_end
- entry
->vme_start
;
2795 if (entry
->vme_end
>= end
) {
2796 /* reached the end of our mapping */
2800 /* it all matches: let's use what's already there ! */
2801 RETURN(KERN_MEMORY_PRESENT
);
2805 * ... the next region doesn't overlap the
2809 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
2810 (entry
->vme_next
->vme_start
< end
)) {
2811 RETURN(KERN_NO_SPACE
);
2817 * "start" and "end" should define the endpoints of the
2818 * available new range, and
2819 * "entry" should refer to the region before the new
2822 * the map should be locked.
2826 * See whether we can avoid creating a new entry (and object) by
2827 * extending one of our neighbors. [So far, we only attempt to
2828 * extend from below.] Note that we can never extend/join
2829 * purgable objects because they need to remain distinct
2830 * entities in order to implement their "volatile object"
2836 vm_memory_malloc_no_cow(user_alias
)) {
2837 if (object
== VM_OBJECT_NULL
) {
2838 object
= vm_object_allocate(size
);
2839 object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
2840 object
->true_share
= FALSE
;
2843 object
->purgable
= VM_PURGABLE_NONVOLATILE
;
2844 if (map
->pmap
== kernel_pmap
) {
2846 * Purgeable mappings made in a kernel
2847 * map are "owned" by the kernel itself
2848 * rather than the current user task
2849 * because they're likely to be used by
2850 * more than this user task (see
2851 * execargs_purgeable_allocate(), for
2854 owner
= kernel_task
;
2856 owner
= current_task();
2858 assert(object
->vo_owner
== NULL
);
2859 assert(object
->resident_page_count
== 0);
2860 assert(object
->wired_page_count
== 0);
2861 vm_object_lock(object
);
2862 vm_purgeable_nonvolatile_enqueue(object
, owner
);
2863 vm_object_unlock(object
);
2865 offset
= (vm_object_offset_t
)0;
2867 } else if ((is_submap
== FALSE
) &&
2868 (object
== VM_OBJECT_NULL
) &&
2869 (entry
!= vm_map_to_entry(map
)) &&
2870 (entry
->vme_end
== start
) &&
2871 (!entry
->is_shared
) &&
2872 (!entry
->is_sub_map
) &&
2873 (!entry
->in_transition
) &&
2874 (!entry
->needs_wakeup
) &&
2875 (entry
->behavior
== VM_BEHAVIOR_DEFAULT
) &&
2876 (entry
->protection
== cur_protection
) &&
2877 (entry
->max_protection
== max_protection
) &&
2878 (entry
->inheritance
== inheritance
) &&
2879 ((user_alias
== VM_MEMORY_REALLOC
) ||
2880 (VME_ALIAS(entry
) == alias
)) &&
2881 (entry
->no_cache
== no_cache
) &&
2882 (entry
->permanent
== permanent
) &&
2883 /* no coalescing for immutable executable mappings */
2884 !((entry
->protection
& VM_PROT_EXECUTE
) &&
2885 entry
->permanent
) &&
2886 (!entry
->superpage_size
&& !superpage_size
) &&
2888 * No coalescing if not map-aligned, to avoid propagating
2889 * that condition any further than needed:
2891 (!entry
->map_aligned
|| !clear_map_aligned
) &&
2892 (!entry
->zero_wired_pages
) &&
2893 (!entry
->used_for_jit
&& !entry_for_jit
) &&
2894 (!entry
->pmap_cs_associated
) &&
2895 (entry
->iokit_acct
== iokit_acct
) &&
2896 (!entry
->vme_resilient_codesign
) &&
2897 (!entry
->vme_resilient_media
) &&
2898 (!entry
->vme_atomic
) &&
2899 (entry
->vme_no_copy_on_read
== no_copy_on_read
) &&
2901 ((entry
->vme_end
- entry
->vme_start
) + size
<=
2902 (user_alias
== VM_MEMORY_REALLOC
?
2904 NO_COALESCE_LIMIT
)) &&
2906 (entry
->wired_count
== 0)) { /* implies user_wired_count == 0 */
2907 if (vm_object_coalesce(VME_OBJECT(entry
),
2910 (vm_object_offset_t
) 0,
2911 (vm_map_size_t
)(entry
->vme_end
- entry
->vme_start
),
2912 (vm_map_size_t
)(end
- entry
->vme_end
))) {
2914 * Coalesced the two objects - can extend
2915 * the previous map entry to include the
2918 map
->size
+= (end
- entry
->vme_end
);
2919 assert(entry
->vme_start
< end
);
2920 assert(VM_MAP_PAGE_ALIGNED(end
,
2921 VM_MAP_PAGE_MASK(map
)));
2922 if (__improbable(vm_debug_events
)) {
2923 DTRACE_VM5(map_entry_extend
, vm_map_t
, map
, vm_map_entry_t
, entry
, vm_address_t
, entry
->vme_start
, vm_address_t
, entry
->vme_end
, vm_address_t
, end
);
2925 entry
->vme_end
= end
;
2926 if (map
->holelistenabled
) {
2927 vm_map_store_update_first_free(map
, entry
, TRUE
);
2929 vm_map_store_update_first_free(map
, map
->first_free
, TRUE
);
2931 new_mapping_established
= TRUE
;
2932 RETURN(KERN_SUCCESS
);
2936 step
= superpage_size
? SUPERPAGE_SIZE
: (end
- start
);
2939 for (tmp2_start
= start
; tmp2_start
< end
; tmp2_start
+= step
) {
2940 tmp2_end
= tmp2_start
+ step
;
2942 * Create a new entry
2945 * The reserved "page zero" in each process's address space can
2946 * be arbitrarily large. Splitting it into separate objects and
2947 * therefore different VM map entries serves no purpose and just
2948 * slows down operations on the VM map, so let's not split the
2949 * allocation into chunks if the max protection is NONE. That
2950 * memory should never be accessible, so it will never get to the
2953 tmp_start
= tmp2_start
;
2954 if (object
== VM_OBJECT_NULL
&&
2955 size
> chunk_size
&&
2956 max_protection
!= VM_PROT_NONE
&&
2957 superpage_size
== 0) {
2958 tmp_end
= tmp_start
+ chunk_size
;
2963 new_entry
= vm_map_entry_insert(
2964 map
, entry
, tmp_start
, tmp_end
,
2965 object
, offset
, needs_copy
,
2967 cur_protection
, max_protection
,
2968 VM_BEHAVIOR_DEFAULT
,
2969 (entry_for_jit
)? VM_INHERIT_NONE
: inheritance
,
2980 assert((object
!= kernel_object
) || (VM_KERN_MEMORY_NONE
!= alias
));
2982 if (resilient_codesign
&&
2983 !((cur_protection
| max_protection
) &
2984 (VM_PROT_WRITE
| VM_PROT_EXECUTE
))) {
2985 new_entry
->vme_resilient_codesign
= TRUE
;
2988 if (resilient_media
&&
2989 (object
== VM_OBJECT_NULL
||
2990 object
->internal
)) {
2991 new_entry
->vme_resilient_media
= TRUE
;
2994 assert(!new_entry
->iokit_acct
);
2996 object
!= VM_OBJECT_NULL
&&
2997 (object
->purgable
!= VM_PURGABLE_DENY
||
2998 object
->vo_ledger_tag
)) {
2999 assert(new_entry
->use_pmap
);
3000 assert(!new_entry
->iokit_acct
);
3002 * Turn off pmap accounting since
3003 * purgeable (or tagged) objects have their
3006 new_entry
->use_pmap
= FALSE
;
3007 } else if (!is_submap
&&
3009 object
!= VM_OBJECT_NULL
&&
3011 /* alternate accounting */
3012 assert(!new_entry
->iokit_acct
);
3013 assert(new_entry
->use_pmap
);
3014 new_entry
->iokit_acct
= TRUE
;
3015 new_entry
->use_pmap
= FALSE
;
3017 vm_map_iokit_mapped_region
,
3019 vm_map_offset_t
, new_entry
->vme_start
,
3020 vm_map_offset_t
, new_entry
->vme_end
,
3021 int, VME_ALIAS(new_entry
));
3022 vm_map_iokit_mapped_region(
3024 (new_entry
->vme_end
-
3025 new_entry
->vme_start
));
3026 } else if (!is_submap
) {
3027 assert(!new_entry
->iokit_acct
);
3028 assert(new_entry
->use_pmap
);
3033 boolean_t submap_is_64bit
;
3036 assert(new_entry
->is_sub_map
);
3037 assert(!new_entry
->use_pmap
);
3038 assert(!new_entry
->iokit_acct
);
3039 submap
= (vm_map_t
) object
;
3040 submap_is_64bit
= vm_map_is_64bit(submap
);
3041 use_pmap
= vmk_flags
.vmkf_nested_pmap
;
3042 #ifndef NO_NESTED_PMAP
3043 if (use_pmap
&& submap
->pmap
== NULL
) {
3044 ledger_t ledger
= map
->pmap
->ledger
;
3045 /* we need a sub pmap to nest... */
3046 submap
->pmap
= pmap_create_options(ledger
, 0,
3047 submap_is_64bit
? PMAP_CREATE_64BIT
: 0);
3048 if (submap
->pmap
== NULL
) {
3049 /* let's proceed without nesting... */
3051 #if defined(__arm__) || defined(__arm64__)
3053 pmap_set_nested(submap
->pmap
);
3057 if (use_pmap
&& submap
->pmap
!= NULL
) {
3058 kr
= pmap_nest(map
->pmap
,
3062 tmp_end
- tmp_start
);
3063 if (kr
!= KERN_SUCCESS
) {
3064 printf("vm_map_enter: "
3065 "pmap_nest(0x%llx,0x%llx) "
3067 (long long)tmp_start
,
3071 /* we're now nested ! */
3072 new_entry
->use_pmap
= TRUE
;
3076 #endif /* NO_NESTED_PMAP */
3080 if (superpage_size
) {
3082 vm_object_t sp_object
;
3083 vm_object_offset_t sp_offset
;
3085 VME_OFFSET_SET(entry
, 0);
3087 /* allocate one superpage */
3088 kr
= cpm_allocate(SUPERPAGE_SIZE
, &pages
, 0, SUPERPAGE_NBASEPAGES
- 1, TRUE
, 0);
3089 if (kr
!= KERN_SUCCESS
) {
3090 /* deallocate whole range... */
3091 new_mapping_established
= TRUE
;
3092 /* ... but only up to "tmp_end" */
3093 size
-= end
- tmp_end
;
3097 /* create one vm_object per superpage */
3098 sp_object
= vm_object_allocate((vm_map_size_t
)(entry
->vme_end
- entry
->vme_start
));
3099 sp_object
->phys_contiguous
= TRUE
;
3100 sp_object
->vo_shadow_offset
= (vm_object_offset_t
)VM_PAGE_GET_PHYS_PAGE(pages
) * PAGE_SIZE
;
3101 VME_OBJECT_SET(entry
, sp_object
);
3102 assert(entry
->use_pmap
);
3104 /* enter the base pages into the object */
3105 vm_object_lock(sp_object
);
3107 sp_offset
< SUPERPAGE_SIZE
;
3108 sp_offset
+= PAGE_SIZE
) {
3110 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m
));
3111 pages
= NEXT_PAGE(m
);
3112 *(NEXT_PAGE_PTR(m
)) = VM_PAGE_NULL
;
3113 vm_page_insert_wired(m
, sp_object
, sp_offset
, VM_KERN_MEMORY_OSFMK
);
3115 vm_object_unlock(sp_object
);
3117 } while (tmp_end
!= tmp2_end
&&
3118 (tmp_start
= tmp_end
) &&
3119 (tmp_end
= (tmp2_end
- tmp_end
> chunk_size
) ?
3120 tmp_end
+ chunk_size
: tmp2_end
));
3123 new_mapping_established
= TRUE
;
3126 assert(map_locked
== TRUE
);
3128 if (result
== KERN_SUCCESS
) {
3129 vm_prot_t pager_prot
;
3130 memory_object_t pager
;
3134 !(vmk_flags
.vmkf_no_pmap_check
)) {
3135 assert(vm_map_pmap_is_empty(map
,
3142 * For "named" VM objects, let the pager know that the
3143 * memory object is being mapped. Some pagers need to keep
3144 * track of this, to know when they can reclaim the memory
3145 * object, for example.
3146 * VM calls memory_object_map() for each mapping (specifying
3147 * the protection of each mapping) and calls
3148 * memory_object_last_unmap() when all the mappings are gone.
3150 pager_prot
= max_protection
;
3153 * Copy-On-Write mapping: won't modify
3154 * the memory object.
3156 pager_prot
&= ~VM_PROT_WRITE
;
3159 object
!= VM_OBJECT_NULL
&&
3161 object
->pager
!= MEMORY_OBJECT_NULL
) {
3162 vm_object_lock(object
);
3163 pager
= object
->pager
;
3164 if (object
->named
&&
3165 pager
!= MEMORY_OBJECT_NULL
) {
3166 assert(object
->pager_ready
);
3167 vm_object_mapping_wait(object
, THREAD_UNINT
);
3168 vm_object_mapping_begin(object
);
3169 vm_object_unlock(object
);
3171 kr
= memory_object_map(pager
, pager_prot
);
3172 assert(kr
== KERN_SUCCESS
);
3174 vm_object_lock(object
);
3175 vm_object_mapping_end(object
);
3177 vm_object_unlock(object
);
3181 assert(map_locked
== TRUE
);
3183 if (!keep_map_locked
) {
3189 * We can't hold the map lock if we enter this block.
3192 if (result
== KERN_SUCCESS
) {
3193 /* Wire down the new entry if the user
3194 * requested all new map entries be wired.
3196 if ((map
->wiring_required
) || (superpage_size
)) {
3197 assert(!keep_map_locked
);
3198 pmap_empty
= FALSE
; /* pmap won't be empty */
3199 kr
= vm_map_wire_kernel(map
, start
, end
,
3200 new_entry
->protection
, VM_KERN_MEMORY_MLOCK
,
3207 if (result
!= KERN_SUCCESS
) {
3208 if (new_mapping_established
) {
3210 * We have to get rid of the new mappings since we
3211 * won't make them available to the user.
3212 * Try and do that atomically, to minimize the risk
3213 * that someone else create new mappings that range.
3215 zap_new_map
= vm_map_create(PMAP_NULL
,
3218 map
->hdr
.entries_pageable
);
3219 vm_map_set_page_shift(zap_new_map
,
3220 VM_MAP_PAGE_SHIFT(map
));
3221 vm_map_disable_hole_optimization(zap_new_map
);
3227 (void) vm_map_delete(map
, *address
, *address
+ size
,
3228 (VM_MAP_REMOVE_SAVE_ENTRIES
|
3229 VM_MAP_REMOVE_NO_MAP_ALIGN
),
3232 if (zap_old_map
!= VM_MAP_NULL
&&
3233 zap_old_map
->hdr
.nentries
!= 0) {
3234 vm_map_entry_t entry1
, entry2
;
3237 * The new mapping failed. Attempt to restore
3238 * the old mappings, saved in the "zap_old_map".
3245 /* first check if the coast is still clear */
3246 start
= vm_map_first_entry(zap_old_map
)->vme_start
;
3247 end
= vm_map_last_entry(zap_old_map
)->vme_end
;
3248 if (vm_map_lookup_entry(map
, start
, &entry1
) ||
3249 vm_map_lookup_entry(map
, end
, &entry2
) ||
3252 * Part of that range has already been
3253 * re-mapped: we can't restore the old
3256 vm_map_enter_restore_failures
++;
3259 * Transfer the saved map entries from
3260 * "zap_old_map" to the original "map",
3261 * inserting them all after "entry1".
3263 for (entry2
= vm_map_first_entry(zap_old_map
);
3264 entry2
!= vm_map_to_entry(zap_old_map
);
3265 entry2
= vm_map_first_entry(zap_old_map
)) {
3266 vm_map_size_t entry_size
;
3268 entry_size
= (entry2
->vme_end
-
3270 vm_map_store_entry_unlink(zap_old_map
,
3272 zap_old_map
->size
-= entry_size
;
3273 vm_map_store_entry_link(map
, entry1
, entry2
,
3274 VM_MAP_KERNEL_FLAGS_NONE
);
3275 map
->size
+= entry_size
;
3278 if (map
->wiring_required
) {
3280 * XXX TODO: we should rewire the
3284 vm_map_enter_restore_successes
++;
3290 * The caller is responsible for releasing the lock if it requested to
3291 * keep the map locked.
3293 if (map_locked
&& !keep_map_locked
) {
3298 * Get rid of the "zap_maps" and all the map entries that
3299 * they may still contain.
3301 if (zap_old_map
!= VM_MAP_NULL
) {
3302 vm_map_destroy(zap_old_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3303 zap_old_map
= VM_MAP_NULL
;
3305 if (zap_new_map
!= VM_MAP_NULL
) {
3306 vm_map_destroy(zap_new_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3307 zap_new_map
= VM_MAP_NULL
;
3316 extern const struct memory_object_pager_ops fourk_pager_ops
;
3320 vm_map_offset_t
*address
, /* IN/OUT */
3322 vm_map_offset_t mask
,
3324 vm_map_kernel_flags_t vmk_flags
,
3327 vm_object_offset_t offset
,
3328 boolean_t needs_copy
,
3329 vm_prot_t cur_protection
,
3330 vm_prot_t max_protection
,
3331 vm_inherit_t inheritance
)
3333 vm_map_entry_t entry
, new_entry
;
3334 vm_map_offset_t start
, fourk_start
;
3335 vm_map_offset_t end
, fourk_end
;
3336 vm_map_size_t fourk_size
;
3337 kern_return_t result
= KERN_SUCCESS
;
3338 vm_map_t zap_old_map
= VM_MAP_NULL
;
3339 vm_map_t zap_new_map
= VM_MAP_NULL
;
3340 boolean_t map_locked
= FALSE
;
3341 boolean_t pmap_empty
= TRUE
;
3342 boolean_t new_mapping_established
= FALSE
;
3343 boolean_t keep_map_locked
= vmk_flags
.vmkf_keep_map_locked
;
3344 boolean_t anywhere
= ((flags
& VM_FLAGS_ANYWHERE
) != 0);
3345 boolean_t purgable
= ((flags
& VM_FLAGS_PURGABLE
) != 0);
3346 boolean_t overwrite
= ((flags
& VM_FLAGS_OVERWRITE
) != 0);
3347 boolean_t no_cache
= ((flags
& VM_FLAGS_NO_CACHE
) != 0);
3348 boolean_t is_submap
= vmk_flags
.vmkf_submap
;
3349 boolean_t permanent
= vmk_flags
.vmkf_permanent
;
3350 boolean_t no_copy_on_read
= vmk_flags
.vmkf_permanent
;
3351 boolean_t entry_for_jit
= vmk_flags
.vmkf_map_jit
;
3352 // boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3353 unsigned int superpage_size
= ((flags
& VM_FLAGS_SUPERPAGE_MASK
) >> VM_FLAGS_SUPERPAGE_SHIFT
);
3354 vm_map_offset_t effective_min_offset
, effective_max_offset
;
3356 boolean_t clear_map_aligned
= FALSE
;
3357 memory_object_t fourk_mem_obj
;
3358 vm_object_t fourk_object
;
3359 vm_map_offset_t fourk_pager_offset
;
3360 int fourk_pager_index_start
, fourk_pager_index_num
;
3362 boolean_t fourk_copy
;
3363 vm_object_t copy_object
;
3364 vm_object_offset_t copy_offset
;
3366 fourk_mem_obj
= MEMORY_OBJECT_NULL
;
3367 fourk_object
= VM_OBJECT_NULL
;
3369 if (superpage_size
) {
3370 return KERN_NOT_SUPPORTED
;
3373 if ((cur_protection
& VM_PROT_WRITE
) &&
3374 (cur_protection
& VM_PROT_EXECUTE
) &&
3375 #if !CONFIG_EMBEDDED
3376 map
!= kernel_map
&&
3377 cs_process_enforcement(NULL
) &&
3378 #endif /* !CONFIG_EMBEDDED */
3383 vm_prot_t
, cur_protection
);
3384 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3385 "turning off execute\n",
3387 (current_task()->bsd_info
3388 ? proc_name_address(current_task()->bsd_info
)
3391 cur_protection
&= ~VM_PROT_EXECUTE
;
3395 * If the task has requested executable lockdown,
3396 * deny any new executable mapping.
3398 if (map
->map_disallow_new_exec
== TRUE
) {
3399 if (cur_protection
& VM_PROT_EXECUTE
) {
3400 return KERN_PROTECTION_FAILURE
;
3405 return KERN_NOT_SUPPORTED
;
3407 if (vmk_flags
.vmkf_already
) {
3408 return KERN_NOT_SUPPORTED
;
3410 if (purgable
|| entry_for_jit
) {
3411 return KERN_NOT_SUPPORTED
;
3414 effective_min_offset
= map
->min_offset
;
3416 if (vmk_flags
.vmkf_beyond_max
) {
3417 return KERN_NOT_SUPPORTED
;
3419 effective_max_offset
= map
->max_offset
;
3423 (offset
& FOURK_PAGE_MASK
) != 0) {
3425 return KERN_INVALID_ARGUMENT
;
3428 #define RETURN(value) { result = value; goto BailOut; }
3430 assert(VM_MAP_PAGE_ALIGNED(*address
, FOURK_PAGE_MASK
));
3431 assert(VM_MAP_PAGE_ALIGNED(size
, FOURK_PAGE_MASK
));
3433 if (!anywhere
&& overwrite
) {
3434 return KERN_NOT_SUPPORTED
;
3436 if (!anywhere
&& overwrite
) {
3438 * Create a temporary VM map to hold the old mappings in the
3439 * affected area while we create the new one.
3440 * This avoids releasing the VM map lock in
3441 * vm_map_entry_delete() and allows atomicity
3442 * when we want to replace some mappings with a new one.
3443 * It also allows us to restore the old VM mappings if the
3444 * new mapping fails.
3446 zap_old_map
= vm_map_create(PMAP_NULL
,
3449 map
->hdr
.entries_pageable
);
3450 vm_map_set_page_shift(zap_old_map
, VM_MAP_PAGE_SHIFT(map
));
3451 vm_map_disable_hole_optimization(zap_old_map
);
3454 fourk_start
= *address
;
3456 fourk_end
= fourk_start
+ fourk_size
;
3458 start
= vm_map_trunc_page(*address
, VM_MAP_PAGE_MASK(map
));
3459 end
= vm_map_round_page(fourk_end
, VM_MAP_PAGE_MASK(map
));
3463 return KERN_NOT_SUPPORTED
;
3467 * the address doesn't itself violate
3468 * the mask requirement.
3473 if ((start
& mask
) != 0) {
3474 RETURN(KERN_NO_SPACE
);
3478 * ... the address is within bounds
3483 if ((start
< effective_min_offset
) ||
3484 (end
> effective_max_offset
) ||
3486 RETURN(KERN_INVALID_ADDRESS
);
3489 if (overwrite
&& zap_old_map
!= VM_MAP_NULL
) {
3491 * Fixed mapping and "overwrite" flag: attempt to
3492 * remove all existing mappings in the specified
3493 * address range, saving them in our "zap_old_map".
3495 (void) vm_map_delete(map
, start
, end
,
3496 (VM_MAP_REMOVE_SAVE_ENTRIES
|
3497 VM_MAP_REMOVE_NO_MAP_ALIGN
),
3502 * ... the starting address isn't allocated
3504 if (vm_map_lookup_entry(map
, start
, &entry
)) {
3505 vm_object_t cur_object
, shadow_object
;
3508 * We might already some 4K mappings
3509 * in a 16K page here.
3512 if (entry
->vme_end
- entry
->vme_start
3513 != SIXTEENK_PAGE_SIZE
) {
3514 RETURN(KERN_NO_SPACE
);
3516 if (entry
->is_sub_map
) {
3517 RETURN(KERN_NO_SPACE
);
3519 if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
3520 RETURN(KERN_NO_SPACE
);
3523 /* go all the way down the shadow chain */
3524 cur_object
= VME_OBJECT(entry
);
3525 vm_object_lock(cur_object
);
3526 while (cur_object
->shadow
!= VM_OBJECT_NULL
) {
3527 shadow_object
= cur_object
->shadow
;
3528 vm_object_lock(shadow_object
);
3529 vm_object_unlock(cur_object
);
3530 cur_object
= shadow_object
;
3531 shadow_object
= VM_OBJECT_NULL
;
3533 if (cur_object
->internal
||
3534 cur_object
->pager
== NULL
) {
3535 vm_object_unlock(cur_object
);
3536 RETURN(KERN_NO_SPACE
);
3538 if (cur_object
->pager
->mo_pager_ops
3539 != &fourk_pager_ops
) {
3540 vm_object_unlock(cur_object
);
3541 RETURN(KERN_NO_SPACE
);
3543 fourk_object
= cur_object
;
3544 fourk_mem_obj
= fourk_object
->pager
;
3546 /* keep the "4K" object alive */
3547 vm_object_reference_locked(fourk_object
);
3548 vm_object_unlock(fourk_object
);
3550 /* merge permissions */
3551 entry
->protection
|= cur_protection
;
3552 entry
->max_protection
|= max_protection
;
3553 if ((entry
->protection
& (VM_PROT_WRITE
|
3554 VM_PROT_EXECUTE
)) ==
3555 (VM_PROT_WRITE
| VM_PROT_EXECUTE
) &&
3556 fourk_binary_compatibility_unsafe
&&
3557 fourk_binary_compatibility_allow_wx
) {
3558 /* write+execute: need to be "jit" */
3559 entry
->used_for_jit
= TRUE
;
3562 goto map_in_fourk_pager
;
3566 * ... the next region doesn't overlap the
3570 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
3571 (entry
->vme_next
->vme_start
< end
)) {
3572 RETURN(KERN_NO_SPACE
);
3578 * "start" and "end" should define the endpoints of the
3579 * available new range, and
3580 * "entry" should refer to the region before the new
3583 * the map should be locked.
3586 /* create a new "4K" pager */
3587 fourk_mem_obj
= fourk_pager_create();
3588 fourk_object
= fourk_pager_to_vm_object(fourk_mem_obj
);
3589 assert(fourk_object
);
3591 /* keep the "4" object alive */
3592 vm_object_reference(fourk_object
);
3594 /* create a "copy" object, to map the "4K" object copy-on-write */
3596 result
= vm_object_copy_strategically(fourk_object
,
3602 assert(result
== KERN_SUCCESS
);
3603 assert(copy_object
!= VM_OBJECT_NULL
);
3604 assert(copy_offset
== 0);
3606 /* take a reference on the copy object, for this mapping */
3607 vm_object_reference(copy_object
);
3609 /* map the "4K" pager's copy object */
3611 vm_map_entry_insert(map
, entry
,
3612 vm_map_trunc_page(start
,
3613 VM_MAP_PAGE_MASK(map
)),
3614 vm_map_round_page(end
,
3615 VM_MAP_PAGE_MASK(map
)),
3618 FALSE
, /* needs_copy */
3621 cur_protection
, max_protection
,
3622 VM_BEHAVIOR_DEFAULT
,
3637 #if VM_MAP_DEBUG_FOURK
3638 if (vm_map_debug_fourk
) {
3639 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3641 (uint64_t) entry
->vme_start
,
3642 (uint64_t) entry
->vme_end
,
3645 #endif /* VM_MAP_DEBUG_FOURK */
3647 new_mapping_established
= TRUE
;
3650 /* "map" the original "object" where it belongs in the "4K" pager */
3651 fourk_pager_offset
= (fourk_start
& SIXTEENK_PAGE_MASK
);
3652 fourk_pager_index_start
= (int) (fourk_pager_offset
/ FOURK_PAGE_SIZE
);
3653 if (fourk_size
> SIXTEENK_PAGE_SIZE
) {
3654 fourk_pager_index_num
= 4;
3656 fourk_pager_index_num
= (int) (fourk_size
/ FOURK_PAGE_SIZE
);
3658 if (fourk_pager_index_start
+ fourk_pager_index_num
> 4) {
3659 fourk_pager_index_num
= 4 - fourk_pager_index_start
;
3662 cur_idx
< fourk_pager_index_num
;
3664 vm_object_t old_object
;
3665 vm_object_offset_t old_offset
;
3667 kr
= fourk_pager_populate(fourk_mem_obj
,
3668 TRUE
, /* overwrite */
3669 fourk_pager_index_start
+ cur_idx
,
3673 (cur_idx
* FOURK_PAGE_SIZE
))
3677 #if VM_MAP_DEBUG_FOURK
3678 if (vm_map_debug_fourk
) {
3679 if (old_object
== (vm_object_t
) -1 &&
3680 old_offset
== (vm_object_offset_t
) -1) {
3681 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3682 "pager [%p:0x%llx] "
3684 "[object:%p,offset:0x%llx]\n",
3686 (uint64_t) entry
->vme_start
,
3687 (uint64_t) entry
->vme_end
,
3690 fourk_pager_index_start
+ cur_idx
,
3693 ? (offset
+ (cur_idx
* FOURK_PAGE_SIZE
))
3696 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3697 "pager [%p:0x%llx] "
3698 "populate[%d] [object:%p,offset:0x%llx] "
3699 "old [%p:0x%llx]\n",
3701 (uint64_t) entry
->vme_start
,
3702 (uint64_t) entry
->vme_end
,
3705 fourk_pager_index_start
+ cur_idx
,
3708 ? (offset
+ (cur_idx
* FOURK_PAGE_SIZE
))
3714 #endif /* VM_MAP_DEBUG_FOURK */
3716 assert(kr
== KERN_SUCCESS
);
3717 if (object
!= old_object
&&
3718 object
!= VM_OBJECT_NULL
&&
3719 object
!= (vm_object_t
) -1) {
3720 vm_object_reference(object
);
3722 if (object
!= old_object
&&
3723 old_object
!= VM_OBJECT_NULL
&&
3724 old_object
!= (vm_object_t
) -1) {
3725 vm_object_deallocate(old_object
);
3730 assert(map_locked
== TRUE
);
3732 if (fourk_object
!= VM_OBJECT_NULL
) {
3733 vm_object_deallocate(fourk_object
);
3734 fourk_object
= VM_OBJECT_NULL
;
3735 fourk_mem_obj
= MEMORY_OBJECT_NULL
;
3738 if (result
== KERN_SUCCESS
) {
3739 vm_prot_t pager_prot
;
3740 memory_object_t pager
;
3744 !(vmk_flags
.vmkf_no_pmap_check
)) {
3745 assert(vm_map_pmap_is_empty(map
,
3752 * For "named" VM objects, let the pager know that the
3753 * memory object is being mapped. Some pagers need to keep
3754 * track of this, to know when they can reclaim the memory
3755 * object, for example.
3756 * VM calls memory_object_map() for each mapping (specifying
3757 * the protection of each mapping) and calls
3758 * memory_object_last_unmap() when all the mappings are gone.
3760 pager_prot
= max_protection
;
3763 * Copy-On-Write mapping: won't modify
3764 * the memory object.
3766 pager_prot
&= ~VM_PROT_WRITE
;
3769 object
!= VM_OBJECT_NULL
&&
3771 object
->pager
!= MEMORY_OBJECT_NULL
) {
3772 vm_object_lock(object
);
3773 pager
= object
->pager
;
3774 if (object
->named
&&
3775 pager
!= MEMORY_OBJECT_NULL
) {
3776 assert(object
->pager_ready
);
3777 vm_object_mapping_wait(object
, THREAD_UNINT
);
3778 vm_object_mapping_begin(object
);
3779 vm_object_unlock(object
);
3781 kr
= memory_object_map(pager
, pager_prot
);
3782 assert(kr
== KERN_SUCCESS
);
3784 vm_object_lock(object
);
3785 vm_object_mapping_end(object
);
3787 vm_object_unlock(object
);
3790 fourk_object
!= VM_OBJECT_NULL
&&
3791 fourk_object
->named
&&
3792 fourk_object
->pager
!= MEMORY_OBJECT_NULL
) {
3793 vm_object_lock(fourk_object
);
3794 pager
= fourk_object
->pager
;
3795 if (fourk_object
->named
&&
3796 pager
!= MEMORY_OBJECT_NULL
) {
3797 assert(fourk_object
->pager_ready
);
3798 vm_object_mapping_wait(fourk_object
,
3800 vm_object_mapping_begin(fourk_object
);
3801 vm_object_unlock(fourk_object
);
3803 kr
= memory_object_map(pager
, VM_PROT_READ
);
3804 assert(kr
== KERN_SUCCESS
);
3806 vm_object_lock(fourk_object
);
3807 vm_object_mapping_end(fourk_object
);
3809 vm_object_unlock(fourk_object
);
3813 assert(map_locked
== TRUE
);
3815 if (!keep_map_locked
) {
3821 * We can't hold the map lock if we enter this block.
3824 if (result
== KERN_SUCCESS
) {
3825 /* Wire down the new entry if the user
3826 * requested all new map entries be wired.
3828 if ((map
->wiring_required
) || (superpage_size
)) {
3829 assert(!keep_map_locked
);
3830 pmap_empty
= FALSE
; /* pmap won't be empty */
3831 kr
= vm_map_wire_kernel(map
, start
, end
,
3832 new_entry
->protection
, VM_KERN_MEMORY_MLOCK
,
3839 if (result
!= KERN_SUCCESS
) {
3840 if (new_mapping_established
) {
3842 * We have to get rid of the new mappings since we
3843 * won't make them available to the user.
3844 * Try and do that atomically, to minimize the risk
3845 * that someone else create new mappings that range.
3847 zap_new_map
= vm_map_create(PMAP_NULL
,
3850 map
->hdr
.entries_pageable
);
3851 vm_map_set_page_shift(zap_new_map
,
3852 VM_MAP_PAGE_SHIFT(map
));
3853 vm_map_disable_hole_optimization(zap_new_map
);
3859 (void) vm_map_delete(map
, *address
, *address
+ size
,
3860 (VM_MAP_REMOVE_SAVE_ENTRIES
|
3861 VM_MAP_REMOVE_NO_MAP_ALIGN
),
3864 if (zap_old_map
!= VM_MAP_NULL
&&
3865 zap_old_map
->hdr
.nentries
!= 0) {
3866 vm_map_entry_t entry1
, entry2
;
3869 * The new mapping failed. Attempt to restore
3870 * the old mappings, saved in the "zap_old_map".
3877 /* first check if the coast is still clear */
3878 start
= vm_map_first_entry(zap_old_map
)->vme_start
;
3879 end
= vm_map_last_entry(zap_old_map
)->vme_end
;
3880 if (vm_map_lookup_entry(map
, start
, &entry1
) ||
3881 vm_map_lookup_entry(map
, end
, &entry2
) ||
3884 * Part of that range has already been
3885 * re-mapped: we can't restore the old
3888 vm_map_enter_restore_failures
++;
3891 * Transfer the saved map entries from
3892 * "zap_old_map" to the original "map",
3893 * inserting them all after "entry1".
3895 for (entry2
= vm_map_first_entry(zap_old_map
);
3896 entry2
!= vm_map_to_entry(zap_old_map
);
3897 entry2
= vm_map_first_entry(zap_old_map
)) {
3898 vm_map_size_t entry_size
;
3900 entry_size
= (entry2
->vme_end
-
3902 vm_map_store_entry_unlink(zap_old_map
,
3904 zap_old_map
->size
-= entry_size
;
3905 vm_map_store_entry_link(map
, entry1
, entry2
,
3906 VM_MAP_KERNEL_FLAGS_NONE
);
3907 map
->size
+= entry_size
;
3910 if (map
->wiring_required
) {
3912 * XXX TODO: we should rewire the
3916 vm_map_enter_restore_successes
++;
3922 * The caller is responsible for releasing the lock if it requested to
3923 * keep the map locked.
3925 if (map_locked
&& !keep_map_locked
) {
3930 * Get rid of the "zap_maps" and all the map entries that
3931 * they may still contain.
3933 if (zap_old_map
!= VM_MAP_NULL
) {
3934 vm_map_destroy(zap_old_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3935 zap_old_map
= VM_MAP_NULL
;
3937 if (zap_new_map
!= VM_MAP_NULL
) {
3938 vm_map_destroy(zap_new_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3939 zap_new_map
= VM_MAP_NULL
;
3946 #endif /* __arm64__ */
3949 * Counters for the prefault optimization.
3951 int64_t vm_prefault_nb_pages
= 0;
3952 int64_t vm_prefault_nb_bailout
= 0;
3954 static kern_return_t
3955 vm_map_enter_mem_object_helper(
3956 vm_map_t target_map
,
3957 vm_map_offset_t
*address
,
3958 vm_map_size_t initial_size
,
3959 vm_map_offset_t mask
,
3961 vm_map_kernel_flags_t vmk_flags
,
3964 vm_object_offset_t offset
,
3966 vm_prot_t cur_protection
,
3967 vm_prot_t max_protection
,
3968 vm_inherit_t inheritance
,
3969 upl_page_list_ptr_t page_list
,
3970 unsigned int page_list_count
)
3972 vm_map_address_t map_addr
;
3973 vm_map_size_t map_size
;
3975 vm_object_size_t size
;
3976 kern_return_t result
;
3977 boolean_t mask_cur_protection
, mask_max_protection
;
3978 boolean_t kernel_prefault
, try_prefault
= (page_list_count
!= 0);
3979 vm_map_offset_t offset_in_mapping
= 0;
3981 boolean_t fourk
= vmk_flags
.vmkf_fourk
;
3982 #endif /* __arm64__ */
3984 assertf(vmk_flags
.__vmkf_unused
== 0, "vmk_flags unused=0x%x\n", vmk_flags
.__vmkf_unused
);
3986 mask_cur_protection
= cur_protection
& VM_PROT_IS_MASK
;
3987 mask_max_protection
= max_protection
& VM_PROT_IS_MASK
;
3988 cur_protection
&= ~VM_PROT_IS_MASK
;
3989 max_protection
&= ~VM_PROT_IS_MASK
;
3992 * Check arguments for validity
3994 if ((target_map
== VM_MAP_NULL
) ||
3995 (cur_protection
& ~VM_PROT_ALL
) ||
3996 (max_protection
& ~VM_PROT_ALL
) ||
3997 (inheritance
> VM_INHERIT_LAST_VALID
) ||
3998 (try_prefault
&& (copy
|| !page_list
)) ||
3999 initial_size
== 0) {
4000 return KERN_INVALID_ARGUMENT
;
4005 map_addr
= vm_map_trunc_page(*address
, FOURK_PAGE_MASK
);
4006 map_size
= vm_map_round_page(initial_size
, FOURK_PAGE_MASK
);
4008 #endif /* __arm64__ */
4010 map_addr
= vm_map_trunc_page(*address
,
4011 VM_MAP_PAGE_MASK(target_map
));
4012 map_size
= vm_map_round_page(initial_size
,
4013 VM_MAP_PAGE_MASK(target_map
));
4015 size
= vm_object_round_page(initial_size
);
4018 * Find the vm object (if any) corresponding to this port.
4020 if (!IP_VALID(port
)) {
4021 object
= VM_OBJECT_NULL
;
4024 } else if (ip_kotype(port
) == IKOT_NAMED_ENTRY
) {
4025 vm_named_entry_t named_entry
;
4027 named_entry
= (vm_named_entry_t
) ip_get_kobject(port
);
4029 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4030 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4031 offset
+= named_entry
->data_offset
;
4034 /* a few checks to make sure user is obeying rules */
4036 if (offset
>= named_entry
->size
) {
4037 return KERN_INVALID_RIGHT
;
4039 size
= named_entry
->size
- offset
;
4041 if (mask_max_protection
) {
4042 max_protection
&= named_entry
->protection
;
4044 if (mask_cur_protection
) {
4045 cur_protection
&= named_entry
->protection
;
4047 if ((named_entry
->protection
& max_protection
) !=
4049 return KERN_INVALID_RIGHT
;
4051 if ((named_entry
->protection
& cur_protection
) !=
4053 return KERN_INVALID_RIGHT
;
4055 if (offset
+ size
< offset
) {
4057 return KERN_INVALID_ARGUMENT
;
4059 if (named_entry
->size
< (offset
+ initial_size
)) {
4060 return KERN_INVALID_ARGUMENT
;
4063 if (named_entry
->is_copy
) {
4064 /* for a vm_map_copy, we can only map it whole */
4065 if ((size
!= named_entry
->size
) &&
4066 (vm_map_round_page(size
,
4067 VM_MAP_PAGE_MASK(target_map
)) ==
4068 named_entry
->size
)) {
4069 /* XXX FBDP use the rounded size... */
4070 size
= vm_map_round_page(
4072 VM_MAP_PAGE_MASK(target_map
));
4075 if (!(flags
& VM_FLAGS_ANYWHERE
) &&
4077 size
!= named_entry
->size
)) {
4079 * XXX for a mapping at a "fixed" address,
4080 * we can't trim after mapping the whole
4081 * memory entry, so reject a request for a
4084 return KERN_INVALID_ARGUMENT
;
4088 /* the callers parameter offset is defined to be the */
4089 /* offset from beginning of named entry offset in object */
4090 offset
= offset
+ named_entry
->offset
;
4092 if (!VM_MAP_PAGE_ALIGNED(size
,
4093 VM_MAP_PAGE_MASK(target_map
))) {
4095 * Let's not map more than requested;
4096 * vm_map_enter() will handle this "not map-aligned"
4102 named_entry_lock(named_entry
);
4103 if (named_entry
->is_sub_map
) {
4106 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4107 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4108 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4111 submap
= named_entry
->backing
.map
;
4112 vm_map_lock(submap
);
4113 vm_map_reference(submap
);
4114 vm_map_unlock(submap
);
4115 named_entry_unlock(named_entry
);
4117 vmk_flags
.vmkf_submap
= TRUE
;
4119 result
= vm_map_enter(target_map
,
4126 (vm_object_t
)(uintptr_t) submap
,
4132 if (result
!= KERN_SUCCESS
) {
4133 vm_map_deallocate(submap
);
4136 * No need to lock "submap" just to check its
4137 * "mapped" flag: that flag is never reset
4138 * once it's been set and if we race, we'll
4139 * just end up setting it twice, which is OK.
4141 if (submap
->mapped_in_other_pmaps
== FALSE
&&
4142 vm_map_pmap(submap
) != PMAP_NULL
&&
4143 vm_map_pmap(submap
) !=
4144 vm_map_pmap(target_map
)) {
4146 * This submap is being mapped in a map
4147 * that uses a different pmap.
4148 * Set its "mapped_in_other_pmaps" flag
4149 * to indicate that we now need to
4150 * remove mappings from all pmaps rather
4151 * than just the submap's pmap.
4153 vm_map_lock(submap
);
4154 submap
->mapped_in_other_pmaps
= TRUE
;
4155 vm_map_unlock(submap
);
4157 *address
= map_addr
;
4160 } else if (named_entry
->is_copy
) {
4162 vm_map_copy_t copy_map
;
4163 vm_map_entry_t copy_entry
;
4164 vm_map_offset_t copy_addr
;
4166 if (flags
& ~(VM_FLAGS_FIXED
|
4168 VM_FLAGS_OVERWRITE
|
4169 VM_FLAGS_RETURN_4K_DATA_ADDR
|
4170 VM_FLAGS_RETURN_DATA_ADDR
|
4171 VM_FLAGS_ALIAS_MASK
)) {
4172 named_entry_unlock(named_entry
);
4173 return KERN_INVALID_ARGUMENT
;
4176 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4177 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4178 offset_in_mapping
= offset
- vm_object_trunc_page(offset
);
4179 if (flags
& VM_FLAGS_RETURN_4K_DATA_ADDR
) {
4180 offset_in_mapping
&= ~((signed)(0xFFF));
4182 offset
= vm_object_trunc_page(offset
);
4183 map_size
= vm_object_round_page(offset
+ offset_in_mapping
+ initial_size
) - offset
;
4186 copy_map
= named_entry
->backing
.copy
;
4187 assert(copy_map
->type
== VM_MAP_COPY_ENTRY_LIST
);
4188 if (copy_map
->type
!= VM_MAP_COPY_ENTRY_LIST
) {
4189 /* unsupported type; should not happen */
4190 printf("vm_map_enter_mem_object: "
4191 "memory_entry->backing.copy "
4192 "unsupported type 0x%x\n",
4194 named_entry_unlock(named_entry
);
4195 return KERN_INVALID_ARGUMENT
;
4198 /* reserve a contiguous range */
4199 kr
= vm_map_enter(target_map
,
4201 /* map whole mem entry, trim later: */
4204 flags
& (VM_FLAGS_ANYWHERE
|
4205 VM_FLAGS_OVERWRITE
|
4206 VM_FLAGS_RETURN_4K_DATA_ADDR
|
4207 VM_FLAGS_RETURN_DATA_ADDR
),
4216 if (kr
!= KERN_SUCCESS
) {
4217 named_entry_unlock(named_entry
);
4221 copy_addr
= map_addr
;
4223 for (copy_entry
= vm_map_copy_first_entry(copy_map
);
4224 copy_entry
!= vm_map_copy_to_entry(copy_map
);
4225 copy_entry
= copy_entry
->vme_next
) {
4227 vm_map_kernel_flags_t vmk_remap_flags
;
4228 vm_map_t copy_submap
;
4229 vm_object_t copy_object
;
4230 vm_map_size_t copy_size
;
4231 vm_object_offset_t copy_offset
;
4235 vmk_remap_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
4237 copy_object
= VME_OBJECT(copy_entry
);
4238 copy_offset
= VME_OFFSET(copy_entry
);
4239 copy_size
= (copy_entry
->vme_end
-
4240 copy_entry
->vme_start
);
4241 VM_GET_FLAGS_ALIAS(flags
, copy_vm_alias
);
4242 if (copy_vm_alias
== 0) {
4244 * Caller does not want a specific
4245 * alias for this new mapping: use
4246 * the alias of the original mapping.
4248 copy_vm_alias
= VME_ALIAS(copy_entry
);
4252 if ((copy_addr
+ copy_size
) >
4254 named_entry
->size
/* XXX full size */)) {
4255 /* over-mapping too much !? */
4256 kr
= KERN_INVALID_ARGUMENT
;
4261 /* take a reference on the object */
4262 if (copy_entry
->is_sub_map
) {
4263 vmk_remap_flags
.vmkf_submap
= TRUE
;
4264 copy_submap
= VME_SUBMAP(copy_entry
);
4265 vm_map_lock(copy_submap
);
4266 vm_map_reference(copy_submap
);
4267 vm_map_unlock(copy_submap
);
4268 copy_object
= (vm_object_t
)(uintptr_t) copy_submap
;
4270 copy_object
!= VM_OBJECT_NULL
&&
4271 (copy_entry
->needs_copy
||
4272 copy_object
->shadowed
||
4273 (!copy_object
->true_share
&&
4274 !copy_entry
->is_shared
&&
4275 copy_object
->vo_size
> copy_size
))) {
4277 * We need to resolve our side of this
4278 * "symmetric" copy-on-write now; we
4279 * need a new object to map and share,
4280 * instead of the current one which
4281 * might still be shared with the
4284 * Note: A "vm_map_copy_t" does not
4285 * have a lock but we're protected by
4286 * the named entry's lock here.
4288 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4289 VME_OBJECT_SHADOW(copy_entry
, copy_size
);
4290 if (!copy_entry
->needs_copy
&&
4291 copy_entry
->protection
& VM_PROT_WRITE
) {
4294 prot
= copy_entry
->protection
& ~VM_PROT_WRITE
;
4295 vm_object_pmap_protect(copy_object
,
4303 copy_entry
->needs_copy
= FALSE
;
4304 copy_entry
->is_shared
= TRUE
;
4305 copy_object
= VME_OBJECT(copy_entry
);
4306 copy_offset
= VME_OFFSET(copy_entry
);
4307 vm_object_lock(copy_object
);
4308 vm_object_reference_locked(copy_object
);
4309 if (copy_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
) {
4310 /* we're about to make a shared mapping of this object */
4311 copy_object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
4312 copy_object
->true_share
= TRUE
;
4314 vm_object_unlock(copy_object
);
4317 * We already have the right object
4320 copy_object
= VME_OBJECT(copy_entry
);
4321 vm_object_reference(copy_object
);
4324 /* over-map the object into destination */
4325 remap_flags
|= flags
;
4326 remap_flags
|= VM_FLAGS_FIXED
;
4327 remap_flags
|= VM_FLAGS_OVERWRITE
;
4328 remap_flags
&= ~VM_FLAGS_ANYWHERE
;
4329 if (!copy
&& !copy_entry
->is_sub_map
) {
4331 * copy-on-write should have been
4332 * resolved at this point, or we would
4333 * end up sharing instead of copying.
4335 assert(!copy_entry
->needs_copy
);
4337 #if !CONFIG_EMBEDDED
4338 if (copy_entry
->used_for_jit
) {
4339 vmk_remap_flags
.vmkf_map_jit
= TRUE
;
4341 #endif /* !CONFIG_EMBEDDED */
4342 kr
= vm_map_enter(target_map
,
4345 (vm_map_offset_t
) 0,
4351 ((copy_object
== NULL
) ? FALSE
: copy
),
4355 if (kr
!= KERN_SUCCESS
) {
4356 if (copy_entry
->is_sub_map
) {
4357 vm_map_deallocate(copy_submap
);
4359 vm_object_deallocate(copy_object
);
4366 copy_addr
+= copy_size
;
4369 if (kr
== KERN_SUCCESS
) {
4370 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4371 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4372 *address
= map_addr
+ offset_in_mapping
;
4374 *address
= map_addr
;
4379 * Trim in front, from 0 to "offset".
4381 vm_map_remove(target_map
,
4384 VM_MAP_REMOVE_NO_FLAGS
);
4387 if (offset
+ map_size
< named_entry
->size
) {
4389 * Trim in back, from
4390 * "offset + map_size" to
4391 * "named_entry->size".
4393 vm_map_remove(target_map
,
4398 VM_MAP_REMOVE_NO_FLAGS
);
4401 named_entry_unlock(named_entry
);
4403 if (kr
!= KERN_SUCCESS
) {
4404 if (!(flags
& VM_FLAGS_OVERWRITE
)) {
4405 /* deallocate the contiguous range */
4406 (void) vm_deallocate(target_map
,
4414 unsigned int access
;
4415 vm_prot_t protections
;
4416 unsigned int wimg_mode
;
4418 /* we are mapping a VM object */
4420 protections
= named_entry
->protection
& VM_PROT_ALL
;
4421 access
= GET_MAP_MEM(named_entry
->protection
);
4423 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4424 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4425 offset_in_mapping
= offset
- vm_object_trunc_page(offset
);
4426 if (flags
& VM_FLAGS_RETURN_4K_DATA_ADDR
) {
4427 offset_in_mapping
&= ~((signed)(0xFFF));
4429 offset
= vm_object_trunc_page(offset
);
4430 map_size
= vm_object_round_page(offset
+ offset_in_mapping
+ initial_size
) - offset
;
4433 object
= named_entry
->backing
.object
;
4434 assert(object
!= VM_OBJECT_NULL
);
4435 vm_object_lock(object
);
4436 named_entry_unlock(named_entry
);
4438 vm_object_reference_locked(object
);
4440 wimg_mode
= object
->wimg_bits
;
4441 vm_prot_to_wimg(access
, &wimg_mode
);
4442 if (object
->wimg_bits
!= wimg_mode
) {
4443 vm_object_change_wimg_mode(object
, wimg_mode
);
4446 vm_object_unlock(object
);
4448 } else if (ip_kotype(port
) == IKOT_MEMORY_OBJECT
) {
4450 * JMM - This is temporary until we unify named entries
4451 * and raw memory objects.
4453 * Detected fake ip_kotype for a memory object. In
4454 * this case, the port isn't really a port at all, but
4455 * instead is just a raw memory object.
4457 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4458 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4459 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4462 object
= memory_object_to_vm_object((memory_object_t
)port
);
4463 if (object
== VM_OBJECT_NULL
) {
4464 return KERN_INVALID_OBJECT
;
4466 vm_object_reference(object
);
4468 /* wait for object (if any) to be ready */
4469 if (object
!= VM_OBJECT_NULL
) {
4470 if (object
== kernel_object
) {
4471 printf("Warning: Attempt to map kernel object"
4472 " by a non-private kernel entity\n");
4473 return KERN_INVALID_OBJECT
;
4475 if (!object
->pager_ready
) {
4476 vm_object_lock(object
);
4478 while (!object
->pager_ready
) {
4479 vm_object_wait(object
,
4480 VM_OBJECT_EVENT_PAGER_READY
,
4482 vm_object_lock(object
);
4484 vm_object_unlock(object
);
4488 return KERN_INVALID_OBJECT
;
4491 if (object
!= VM_OBJECT_NULL
&&
4493 object
->pager
!= MEMORY_OBJECT_NULL
&&
4494 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
4495 memory_object_t pager
;
4496 vm_prot_t pager_prot
;
4500 * For "named" VM objects, let the pager know that the
4501 * memory object is being mapped. Some pagers need to keep
4502 * track of this, to know when they can reclaim the memory
4503 * object, for example.
4504 * VM calls memory_object_map() for each mapping (specifying
4505 * the protection of each mapping) and calls
4506 * memory_object_last_unmap() when all the mappings are gone.
4508 pager_prot
= max_protection
;
4511 * Copy-On-Write mapping: won't modify the
4514 pager_prot
&= ~VM_PROT_WRITE
;
4516 vm_object_lock(object
);
4517 pager
= object
->pager
;
4518 if (object
->named
&&
4519 pager
!= MEMORY_OBJECT_NULL
&&
4520 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
4521 assert(object
->pager_ready
);
4522 vm_object_mapping_wait(object
, THREAD_UNINT
);
4523 vm_object_mapping_begin(object
);
4524 vm_object_unlock(object
);
4526 kr
= memory_object_map(pager
, pager_prot
);
4527 assert(kr
== KERN_SUCCESS
);
4529 vm_object_lock(object
);
4530 vm_object_mapping_end(object
);
4532 vm_object_unlock(object
);
4536 * Perform the copy if requested
4540 vm_object_t new_object
;
4541 vm_object_offset_t new_offset
;
4543 result
= vm_object_copy_strategically(object
, offset
,
4545 &new_object
, &new_offset
,
4549 if (result
== KERN_MEMORY_RESTART_COPY
) {
4551 boolean_t src_needs_copy
;
4555 * We currently ignore src_needs_copy.
4556 * This really is the issue of how to make
4557 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4558 * non-kernel users to use. Solution forthcoming.
4559 * In the meantime, since we don't allow non-kernel
4560 * memory managers to specify symmetric copy,
4561 * we won't run into problems here.
4563 new_object
= object
;
4564 new_offset
= offset
;
4565 success
= vm_object_copy_quickly(&new_object
,
4571 result
= KERN_SUCCESS
;
4574 * Throw away the reference to the
4575 * original object, as it won't be mapped.
4578 vm_object_deallocate(object
);
4580 if (result
!= KERN_SUCCESS
) {
4584 object
= new_object
;
4585 offset
= new_offset
;
4589 * If non-kernel users want to try to prefault pages, the mapping and prefault
4590 * needs to be atomic.
4592 kernel_prefault
= (try_prefault
&& vm_kernel_map_is_kernel(target_map
));
4593 vmk_flags
.vmkf_keep_map_locked
= (try_prefault
&& !kernel_prefault
);
4597 /* map this object in a "4K" pager */
4598 result
= vm_map_enter_fourk(target_map
,
4601 (vm_map_offset_t
) mask
,
4612 #endif /* __arm64__ */
4614 result
= vm_map_enter(target_map
,
4615 &map_addr
, map_size
,
4616 (vm_map_offset_t
)mask
,
4622 cur_protection
, max_protection
,
4625 if (result
!= KERN_SUCCESS
) {
4626 vm_object_deallocate(object
);
4630 * Try to prefault, and do not forget to release the vm map lock.
4632 if (result
== KERN_SUCCESS
&& try_prefault
) {
4633 mach_vm_address_t va
= map_addr
;
4634 kern_return_t kr
= KERN_SUCCESS
;
4638 pmap_options
= kernel_prefault
? 0 : PMAP_OPTIONS_NOWAIT
;
4639 if (object
->internal
) {
4640 pmap_options
|= PMAP_OPTIONS_INTERNAL
;
4643 for (i
= 0; i
< page_list_count
; ++i
) {
4644 if (!UPL_VALID_PAGE(page_list
, i
)) {
4645 if (kernel_prefault
) {
4646 assertf(FALSE
, "kernel_prefault && !UPL_VALID_PAGE");
4647 result
= KERN_MEMORY_ERROR
;
4652 * If this function call failed, we should stop
4653 * trying to optimize, other calls are likely
4654 * going to fail too.
4656 * We are not gonna report an error for such
4657 * failure though. That's an optimization, not
4658 * something critical.
4660 kr
= pmap_enter_options(target_map
->pmap
,
4661 va
, UPL_PHYS_PAGE(page_list
, i
),
4662 cur_protection
, VM_PROT_NONE
,
4663 0, TRUE
, pmap_options
, NULL
);
4664 if (kr
!= KERN_SUCCESS
) {
4665 OSIncrementAtomic64(&vm_prefault_nb_bailout
);
4666 if (kernel_prefault
) {
4671 OSIncrementAtomic64(&vm_prefault_nb_pages
);
4674 /* Next virtual address */
4677 if (vmk_flags
.vmkf_keep_map_locked
) {
4678 vm_map_unlock(target_map
);
4682 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4683 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4684 *address
= map_addr
+ offset_in_mapping
;
4686 *address
= map_addr
;
4692 vm_map_enter_mem_object(
4693 vm_map_t target_map
,
4694 vm_map_offset_t
*address
,
4695 vm_map_size_t initial_size
,
4696 vm_map_offset_t mask
,
4698 vm_map_kernel_flags_t vmk_flags
,
4701 vm_object_offset_t offset
,
4703 vm_prot_t cur_protection
,
4704 vm_prot_t max_protection
,
4705 vm_inherit_t inheritance
)
4709 ret
= vm_map_enter_mem_object_helper(target_map
,
4726 if (ret
== KERN_SUCCESS
&& address
&& target_map
->pmap
== kernel_pmap
) {
4727 kasan_notify_address(*address
, initial_size
);
4735 vm_map_enter_mem_object_prefault(
4736 vm_map_t target_map
,
4737 vm_map_offset_t
*address
,
4738 vm_map_size_t initial_size
,
4739 vm_map_offset_t mask
,
4741 vm_map_kernel_flags_t vmk_flags
,
4744 vm_object_offset_t offset
,
4745 vm_prot_t cur_protection
,
4746 vm_prot_t max_protection
,
4747 upl_page_list_ptr_t page_list
,
4748 unsigned int page_list_count
)
4752 ret
= vm_map_enter_mem_object_helper(target_map
,
4769 if (ret
== KERN_SUCCESS
&& address
&& target_map
->pmap
== kernel_pmap
) {
4770 kasan_notify_address(*address
, initial_size
);
4779 vm_map_enter_mem_object_control(
4780 vm_map_t target_map
,
4781 vm_map_offset_t
*address
,
4782 vm_map_size_t initial_size
,
4783 vm_map_offset_t mask
,
4785 vm_map_kernel_flags_t vmk_flags
,
4787 memory_object_control_t control
,
4788 vm_object_offset_t offset
,
4790 vm_prot_t cur_protection
,
4791 vm_prot_t max_protection
,
4792 vm_inherit_t inheritance
)
4794 vm_map_address_t map_addr
;
4795 vm_map_size_t map_size
;
4797 vm_object_size_t size
;
4798 kern_return_t result
;
4799 memory_object_t pager
;
4800 vm_prot_t pager_prot
;
4803 boolean_t fourk
= vmk_flags
.vmkf_fourk
;
4804 #endif /* __arm64__ */
4807 * Check arguments for validity
4809 if ((target_map
== VM_MAP_NULL
) ||
4810 (cur_protection
& ~VM_PROT_ALL
) ||
4811 (max_protection
& ~VM_PROT_ALL
) ||
4812 (inheritance
> VM_INHERIT_LAST_VALID
) ||
4813 initial_size
== 0) {
4814 return KERN_INVALID_ARGUMENT
;
4819 map_addr
= vm_map_trunc_page(*address
,
4821 map_size
= vm_map_round_page(initial_size
,
4824 #endif /* __arm64__ */
4826 map_addr
= vm_map_trunc_page(*address
,
4827 VM_MAP_PAGE_MASK(target_map
));
4828 map_size
= vm_map_round_page(initial_size
,
4829 VM_MAP_PAGE_MASK(target_map
));
4831 size
= vm_object_round_page(initial_size
);
4833 object
= memory_object_control_to_vm_object(control
);
4835 if (object
== VM_OBJECT_NULL
) {
4836 return KERN_INVALID_OBJECT
;
4839 if (object
== kernel_object
) {
4840 printf("Warning: Attempt to map kernel object"
4841 " by a non-private kernel entity\n");
4842 return KERN_INVALID_OBJECT
;
4845 vm_object_lock(object
);
4846 object
->ref_count
++;
4847 vm_object_res_reference(object
);
4850 * For "named" VM objects, let the pager know that the
4851 * memory object is being mapped. Some pagers need to keep
4852 * track of this, to know when they can reclaim the memory
4853 * object, for example.
4854 * VM calls memory_object_map() for each mapping (specifying
4855 * the protection of each mapping) and calls
4856 * memory_object_last_unmap() when all the mappings are gone.
4858 pager_prot
= max_protection
;
4860 pager_prot
&= ~VM_PROT_WRITE
;
4862 pager
= object
->pager
;
4863 if (object
->named
&&
4864 pager
!= MEMORY_OBJECT_NULL
&&
4865 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
4866 assert(object
->pager_ready
);
4867 vm_object_mapping_wait(object
, THREAD_UNINT
);
4868 vm_object_mapping_begin(object
);
4869 vm_object_unlock(object
);
4871 kr
= memory_object_map(pager
, pager_prot
);
4872 assert(kr
== KERN_SUCCESS
);
4874 vm_object_lock(object
);
4875 vm_object_mapping_end(object
);
4877 vm_object_unlock(object
);
4880 * Perform the copy if requested
4884 vm_object_t new_object
;
4885 vm_object_offset_t new_offset
;
4887 result
= vm_object_copy_strategically(object
, offset
, size
,
4888 &new_object
, &new_offset
,
4892 if (result
== KERN_MEMORY_RESTART_COPY
) {
4894 boolean_t src_needs_copy
;
4898 * We currently ignore src_needs_copy.
4899 * This really is the issue of how to make
4900 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4901 * non-kernel users to use. Solution forthcoming.
4902 * In the meantime, since we don't allow non-kernel
4903 * memory managers to specify symmetric copy,
4904 * we won't run into problems here.
4906 new_object
= object
;
4907 new_offset
= offset
;
4908 success
= vm_object_copy_quickly(&new_object
,
4913 result
= KERN_SUCCESS
;
4916 * Throw away the reference to the
4917 * original object, as it won't be mapped.
4920 vm_object_deallocate(object
);
4922 if (result
!= KERN_SUCCESS
) {
4926 object
= new_object
;
4927 offset
= new_offset
;
4932 result
= vm_map_enter_fourk(target_map
,
4935 (vm_map_offset_t
)mask
,
4941 cur_protection
, max_protection
,
4944 #endif /* __arm64__ */
4946 result
= vm_map_enter(target_map
,
4947 &map_addr
, map_size
,
4948 (vm_map_offset_t
)mask
,
4954 cur_protection
, max_protection
,
4957 if (result
!= KERN_SUCCESS
) {
4958 vm_object_deallocate(object
);
4960 *address
= map_addr
;
4969 extern pmap_paddr_t avail_start
, avail_end
;
4973 * Allocate memory in the specified map, with the caveat that
4974 * the memory is physically contiguous. This call may fail
4975 * if the system can't find sufficient contiguous memory.
4976 * This call may cause or lead to heart-stopping amounts of
4979 * Memory obtained from this call should be freed in the
4980 * normal way, viz., via vm_deallocate.
4985 vm_map_offset_t
*addr
,
4989 vm_object_t cpm_obj
;
4993 vm_map_offset_t va
, start
, end
, offset
;
4995 vm_map_offset_t prev_addr
= 0;
4996 #endif /* MACH_ASSERT */
4998 boolean_t anywhere
= ((VM_FLAGS_ANYWHERE
& flags
) != 0);
5001 VM_GET_FLAGS_ALIAS(flags
, tag
);
5005 return KERN_SUCCESS
;
5008 *addr
= vm_map_min(map
);
5010 *addr
= vm_map_trunc_page(*addr
,
5011 VM_MAP_PAGE_MASK(map
));
5013 size
= vm_map_round_page(size
,
5014 VM_MAP_PAGE_MASK(map
));
5017 * LP64todo - cpm_allocate should probably allow
5018 * allocations of >4GB, but not with the current
5019 * algorithm, so just cast down the size for now.
5021 if (size
> VM_MAX_ADDRESS
) {
5022 return KERN_RESOURCE_SHORTAGE
;
5024 if ((kr
= cpm_allocate(CAST_DOWN(vm_size_t
, size
),
5025 &pages
, 0, 0, TRUE
, flags
)) != KERN_SUCCESS
) {
5029 cpm_obj
= vm_object_allocate((vm_object_size_t
)size
);
5030 assert(cpm_obj
!= VM_OBJECT_NULL
);
5031 assert(cpm_obj
->internal
);
5032 assert(cpm_obj
->vo_size
== (vm_object_size_t
)size
);
5033 assert(cpm_obj
->can_persist
== FALSE
);
5034 assert(cpm_obj
->pager_created
== FALSE
);
5035 assert(cpm_obj
->pageout
== FALSE
);
5036 assert(cpm_obj
->shadow
== VM_OBJECT_NULL
);
5039 * Insert pages into object.
5042 vm_object_lock(cpm_obj
);
5043 for (offset
= 0; offset
< size
; offset
+= PAGE_SIZE
) {
5045 pages
= NEXT_PAGE(m
);
5046 *(NEXT_PAGE_PTR(m
)) = VM_PAGE_NULL
;
5048 assert(!m
->vmp_gobbled
);
5049 assert(!m
->vmp_wanted
);
5050 assert(!m
->vmp_pageout
);
5051 assert(!m
->vmp_tabled
);
5052 assert(VM_PAGE_WIRED(m
));
5053 assert(m
->vmp_busy
);
5054 assert(VM_PAGE_GET_PHYS_PAGE(m
) >= (avail_start
>> PAGE_SHIFT
) && VM_PAGE_GET_PHYS_PAGE(m
) <= (avail_end
>> PAGE_SHIFT
));
5056 m
->vmp_busy
= FALSE
;
5057 vm_page_insert(m
, cpm_obj
, offset
);
5059 assert(cpm_obj
->resident_page_count
== size
/ PAGE_SIZE
);
5060 vm_object_unlock(cpm_obj
);
5063 * Hang onto a reference on the object in case a
5064 * multi-threaded application for some reason decides
5065 * to deallocate the portion of the address space into
5066 * which we will insert this object.
5068 * Unfortunately, we must insert the object now before
5069 * we can talk to the pmap module about which addresses
5070 * must be wired down. Hence, the race with a multi-
5073 vm_object_reference(cpm_obj
);
5076 * Insert object into map.
5085 VM_MAP_KERNEL_FLAGS_NONE
,
5087 (vm_object_offset_t
)0,
5091 VM_INHERIT_DEFAULT
);
5093 if (kr
!= KERN_SUCCESS
) {
5095 * A CPM object doesn't have can_persist set,
5096 * so all we have to do is deallocate it to
5097 * free up these pages.
5099 assert(cpm_obj
->pager_created
== FALSE
);
5100 assert(cpm_obj
->can_persist
== FALSE
);
5101 assert(cpm_obj
->pageout
== FALSE
);
5102 assert(cpm_obj
->shadow
== VM_OBJECT_NULL
);
5103 vm_object_deallocate(cpm_obj
); /* kill acquired ref */
5104 vm_object_deallocate(cpm_obj
); /* kill creation ref */
5108 * Inform the physical mapping system that the
5109 * range of addresses may not fault, so that
5110 * page tables and such can be locked down as well.
5114 pmap
= vm_map_pmap(map
);
5115 pmap_pageable(pmap
, start
, end
, FALSE
);
5118 * Enter each page into the pmap, to avoid faults.
5119 * Note that this loop could be coded more efficiently,
5120 * if the need arose, rather than looking up each page
5123 for (offset
= 0, va
= start
; offset
< size
;
5124 va
+= PAGE_SIZE
, offset
+= PAGE_SIZE
) {
5127 vm_object_lock(cpm_obj
);
5128 m
= vm_page_lookup(cpm_obj
, (vm_object_offset_t
)offset
);
5129 assert(m
!= VM_PAGE_NULL
);
5131 vm_page_zero_fill(m
);
5133 type_of_fault
= DBG_ZERO_FILL_FAULT
;
5135 vm_fault_enter(m
, pmap
, va
, VM_PROT_ALL
, VM_PROT_WRITE
,
5137 FALSE
, /* change_wiring */
5138 VM_KERN_MEMORY_NONE
, /* tag - not wiring */
5139 FALSE
, /* no_cache */
5140 FALSE
, /* cs_bypass */
5142 0, /* pmap_options */
5143 NULL
, /* need_retry */
5146 vm_object_unlock(cpm_obj
);
5151 * Verify ordering in address space.
5153 for (offset
= 0; offset
< size
; offset
+= PAGE_SIZE
) {
5154 vm_object_lock(cpm_obj
);
5155 m
= vm_page_lookup(cpm_obj
, (vm_object_offset_t
)offset
);
5156 vm_object_unlock(cpm_obj
);
5157 if (m
== VM_PAGE_NULL
) {
5158 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
5159 cpm_obj
, (uint64_t)offset
);
5161 assert(m
->vmp_tabled
);
5162 assert(!m
->vmp_busy
);
5163 assert(!m
->vmp_wanted
);
5164 assert(!m
->vmp_fictitious
);
5165 assert(!m
->vmp_private
);
5166 assert(!m
->vmp_absent
);
5167 assert(!m
->vmp_error
);
5168 assert(!m
->vmp_cleaning
);
5169 assert(!m
->vmp_laundry
);
5170 assert(!m
->vmp_precious
);
5171 assert(!m
->vmp_clustered
);
5173 if (VM_PAGE_GET_PHYS_PAGE(m
) != prev_addr
+ 1) {
5174 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5175 (uint64_t)start
, (uint64_t)end
, (uint64_t)va
);
5176 printf("obj %p off 0x%llx\n", cpm_obj
, (uint64_t)offset
);
5177 printf("m %p prev_address 0x%llx\n", m
, (uint64_t)prev_addr
);
5178 panic("vm_allocate_cpm: pages not contig!");
5181 prev_addr
= VM_PAGE_GET_PHYS_PAGE(m
);
5183 #endif /* MACH_ASSERT */
5185 vm_object_deallocate(cpm_obj
); /* kill extra ref */
5194 * Interface is defined in all cases, but unless the kernel
5195 * is built explicitly for this option, the interface does
5201 __unused vm_map_t map
,
5202 __unused vm_map_offset_t
*addr
,
5203 __unused vm_map_size_t size
,
5206 return KERN_FAILURE
;
5210 /* Not used without nested pmaps */
5211 #ifndef NO_NESTED_PMAP
5213 * Clip and unnest a portion of a nested submap mapping.
5220 vm_map_entry_t entry
,
5221 vm_map_offset_t start_unnest
,
5222 vm_map_offset_t end_unnest
)
5224 vm_map_offset_t old_start_unnest
= start_unnest
;
5225 vm_map_offset_t old_end_unnest
= end_unnest
;
5227 assert(entry
->is_sub_map
);
5228 assert(VME_SUBMAP(entry
) != NULL
);
5229 assert(entry
->use_pmap
);
5232 * Query the platform for the optimal unnest range.
5233 * DRK: There's some duplication of effort here, since
5234 * callers may have adjusted the range to some extent. This
5235 * routine was introduced to support 1GiB subtree nesting
5236 * for x86 platforms, which can also nest on 2MiB boundaries
5237 * depending on size/alignment.
5239 if (pmap_adjust_unnest_parameters(map
->pmap
, &start_unnest
, &end_unnest
)) {
5240 assert(VME_SUBMAP(entry
)->is_nested_map
);
5241 assert(!VME_SUBMAP(entry
)->disable_vmentry_reuse
);
5242 log_unnest_badness(map
,
5245 VME_SUBMAP(entry
)->is_nested_map
,
5247 VME_SUBMAP(entry
)->lowest_unnestable_start
-
5248 VME_OFFSET(entry
)));
5251 if (entry
->vme_start
> start_unnest
||
5252 entry
->vme_end
< end_unnest
) {
5253 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5254 "bad nested entry: start=0x%llx end=0x%llx\n",
5255 (long long)start_unnest
, (long long)end_unnest
,
5256 (long long)entry
->vme_start
, (long long)entry
->vme_end
);
5259 if (start_unnest
> entry
->vme_start
) {
5260 _vm_map_clip_start(&map
->hdr
,
5263 if (map
->holelistenabled
) {
5264 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5266 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5269 if (entry
->vme_end
> end_unnest
) {
5270 _vm_map_clip_end(&map
->hdr
,
5273 if (map
->holelistenabled
) {
5274 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5276 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5280 pmap_unnest(map
->pmap
,
5282 entry
->vme_end
- entry
->vme_start
);
5283 if ((map
->mapped_in_other_pmaps
) && os_ref_get_count(&map
->map_refcnt
) != 0) {
5284 /* clean up parent map/maps */
5285 vm_map_submap_pmap_clean(
5286 map
, entry
->vme_start
,
5291 entry
->use_pmap
= FALSE
;
5292 if ((map
->pmap
!= kernel_pmap
) &&
5293 (VME_ALIAS(entry
) == VM_MEMORY_SHARED_PMAP
)) {
5294 VME_ALIAS_SET(entry
, VM_MEMORY_UNSHARED_PMAP
);
5297 #endif /* NO_NESTED_PMAP */
5300 * vm_map_clip_start: [ internal use only ]
5302 * Asserts that the given entry begins at or after
5303 * the specified address; if necessary,
5304 * it splits the entry into two.
5309 vm_map_entry_t entry
,
5310 vm_map_offset_t startaddr
)
5312 #ifndef NO_NESTED_PMAP
5313 if (entry
->is_sub_map
&&
5315 startaddr
>= entry
->vme_start
) {
5316 vm_map_offset_t start_unnest
, end_unnest
;
5319 * Make sure "startaddr" is no longer in a nested range
5320 * before we clip. Unnest only the minimum range the platform
5322 * vm_map_clip_unnest may perform additional adjustments to
5325 start_unnest
= startaddr
& ~(pmap_nesting_size_min
- 1);
5326 end_unnest
= start_unnest
+ pmap_nesting_size_min
;
5327 vm_map_clip_unnest(map
, entry
, start_unnest
, end_unnest
);
5329 #endif /* NO_NESTED_PMAP */
5330 if (startaddr
> entry
->vme_start
) {
5331 if (VME_OBJECT(entry
) &&
5332 !entry
->is_sub_map
&&
5333 VME_OBJECT(entry
)->phys_contiguous
) {
5334 pmap_remove(map
->pmap
,
5335 (addr64_t
)(entry
->vme_start
),
5336 (addr64_t
)(entry
->vme_end
));
5338 if (entry
->vme_atomic
) {
5339 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map
, entry
);
5345 vm_map_offset_t
, entry
->vme_start
,
5346 vm_map_offset_t
, entry
->vme_end
,
5347 vm_map_offset_t
, startaddr
,
5348 int, VME_ALIAS(entry
));
5350 _vm_map_clip_start(&map
->hdr
, entry
, startaddr
);
5351 if (map
->holelistenabled
) {
5352 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5354 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5360 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5362 if ((startaddr) > (entry)->vme_start) \
5363 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5367 * This routine is called only when it is known that
5368 * the entry must be split.
5372 struct vm_map_header
*map_header
,
5373 vm_map_entry_t entry
,
5374 vm_map_offset_t start
)
5376 vm_map_entry_t new_entry
;
5379 * Split off the front portion --
5380 * note that we must insert the new
5381 * entry BEFORE this one, so that
5382 * this entry has the specified starting
5386 if (entry
->map_aligned
) {
5387 assert(VM_MAP_PAGE_ALIGNED(start
,
5388 VM_MAP_HDR_PAGE_MASK(map_header
)));
5391 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
5392 vm_map_entry_copy_full(new_entry
, entry
);
5394 new_entry
->vme_end
= start
;
5395 assert(new_entry
->vme_start
< new_entry
->vme_end
);
5396 VME_OFFSET_SET(entry
, VME_OFFSET(entry
) + (start
- entry
->vme_start
));
5397 assert(start
< entry
->vme_end
);
5398 entry
->vme_start
= start
;
5400 _vm_map_store_entry_link(map_header
, entry
->vme_prev
, new_entry
);
5402 if (entry
->is_sub_map
) {
5403 vm_map_reference(VME_SUBMAP(new_entry
));
5405 vm_object_reference(VME_OBJECT(new_entry
));
5411 * vm_map_clip_end: [ internal use only ]
5413 * Asserts that the given entry ends at or before
5414 * the specified address; if necessary,
5415 * it splits the entry into two.
5420 vm_map_entry_t entry
,
5421 vm_map_offset_t endaddr
)
5423 if (endaddr
> entry
->vme_end
) {
5425 * Within the scope of this clipping, limit "endaddr" to
5426 * the end of this map entry...
5428 endaddr
= entry
->vme_end
;
5430 #ifndef NO_NESTED_PMAP
5431 if (entry
->is_sub_map
&& entry
->use_pmap
) {
5432 vm_map_offset_t start_unnest
, end_unnest
;
5435 * Make sure the range between the start of this entry and
5436 * the new "endaddr" is no longer nested before we clip.
5437 * Unnest only the minimum range the platform can handle.
5438 * vm_map_clip_unnest may perform additional adjustments to
5441 start_unnest
= entry
->vme_start
;
5443 (endaddr
+ pmap_nesting_size_min
- 1) &
5444 ~(pmap_nesting_size_min
- 1);
5445 vm_map_clip_unnest(map
, entry
, start_unnest
, end_unnest
);
5447 #endif /* NO_NESTED_PMAP */
5448 if (endaddr
< entry
->vme_end
) {
5449 if (VME_OBJECT(entry
) &&
5450 !entry
->is_sub_map
&&
5451 VME_OBJECT(entry
)->phys_contiguous
) {
5452 pmap_remove(map
->pmap
,
5453 (addr64_t
)(entry
->vme_start
),
5454 (addr64_t
)(entry
->vme_end
));
5456 if (entry
->vme_atomic
) {
5457 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map
, entry
);
5462 vm_map_offset_t
, entry
->vme_start
,
5463 vm_map_offset_t
, entry
->vme_end
,
5464 vm_map_offset_t
, endaddr
,
5465 int, VME_ALIAS(entry
));
5467 _vm_map_clip_end(&map
->hdr
, entry
, endaddr
);
5468 if (map
->holelistenabled
) {
5469 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5471 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5477 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5479 if ((endaddr) < (entry)->vme_end) \
5480 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5484 * This routine is called only when it is known that
5485 * the entry must be split.
5489 struct vm_map_header
*map_header
,
5490 vm_map_entry_t entry
,
5491 vm_map_offset_t end
)
5493 vm_map_entry_t new_entry
;
5496 * Create a new entry and insert it
5497 * AFTER the specified entry
5500 if (entry
->map_aligned
) {
5501 assert(VM_MAP_PAGE_ALIGNED(end
,
5502 VM_MAP_HDR_PAGE_MASK(map_header
)));
5505 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
5506 vm_map_entry_copy_full(new_entry
, entry
);
5508 assert(entry
->vme_start
< end
);
5509 new_entry
->vme_start
= entry
->vme_end
= end
;
5510 VME_OFFSET_SET(new_entry
,
5511 VME_OFFSET(new_entry
) + (end
- entry
->vme_start
));
5512 assert(new_entry
->vme_start
< new_entry
->vme_end
);
5514 _vm_map_store_entry_link(map_header
, entry
, new_entry
);
5516 if (entry
->is_sub_map
) {
5517 vm_map_reference(VME_SUBMAP(new_entry
));
5519 vm_object_reference(VME_OBJECT(new_entry
));
5525 * VM_MAP_RANGE_CHECK: [ internal use only ]
5527 * Asserts that the starting and ending region
5528 * addresses fall within the valid range of the map.
5530 #define VM_MAP_RANGE_CHECK(map, start, end) \
5532 if (start < vm_map_min(map)) \
5533 start = vm_map_min(map); \
5534 if (end > vm_map_max(map)) \
5535 end = vm_map_max(map); \
5541 * vm_map_range_check: [ internal use only ]
5543 * Check that the region defined by the specified start and
5544 * end addresses are wholly contained within a single map
5545 * entry or set of adjacent map entries of the spacified map,
5546 * i.e. the specified region contains no unmapped space.
5547 * If any or all of the region is unmapped, FALSE is returned.
5548 * Otherwise, TRUE is returned and if the output argument 'entry'
5549 * is not NULL it points to the map entry containing the start
5552 * The map is locked for reading on entry and is left locked.
5557 vm_map_offset_t start
,
5558 vm_map_offset_t end
,
5559 vm_map_entry_t
*entry
)
5562 vm_map_offset_t prev
;
5565 * Basic sanity checks first
5567 if (start
< vm_map_min(map
) || end
> vm_map_max(map
) || start
> end
) {
5572 * Check first if the region starts within a valid
5573 * mapping for the map.
5575 if (!vm_map_lookup_entry(map
, start
, &cur
)) {
5580 * Optimize for the case that the region is contained
5581 * in a single map entry.
5583 if (entry
!= (vm_map_entry_t
*) NULL
) {
5586 if (end
<= cur
->vme_end
) {
5591 * If the region is not wholly contained within a
5592 * single entry, walk the entries looking for holes.
5594 prev
= cur
->vme_end
;
5595 cur
= cur
->vme_next
;
5596 while ((cur
!= vm_map_to_entry(map
)) && (prev
== cur
->vme_start
)) {
5597 if (end
<= cur
->vme_end
) {
5600 prev
= cur
->vme_end
;
5601 cur
= cur
->vme_next
;
5607 * vm_map_submap: [ kernel use only ]
5609 * Mark the given range as handled by a subordinate map.
5611 * This range must have been created with vm_map_find using
5612 * the vm_submap_object, and no other operations may have been
5613 * performed on this range prior to calling vm_map_submap.
5615 * Only a limited number of operations can be performed
5616 * within this rage after calling vm_map_submap:
5618 * [Don't try vm_map_copyin!]
5620 * To remove a submapping, one must first remove the
5621 * range from the superior map, and then destroy the
5622 * submap (if desired). [Better yet, don't try it.]
5627 vm_map_offset_t start
,
5628 vm_map_offset_t end
,
5630 vm_map_offset_t offset
,
5631 #ifdef NO_NESTED_PMAP
5633 #endif /* NO_NESTED_PMAP */
5636 vm_map_entry_t entry
;
5637 kern_return_t result
= KERN_INVALID_ARGUMENT
;
5642 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
5643 entry
= entry
->vme_next
;
5646 if (entry
== vm_map_to_entry(map
) ||
5647 entry
->is_sub_map
) {
5649 return KERN_INVALID_ARGUMENT
;
5652 vm_map_clip_start(map
, entry
, start
);
5653 vm_map_clip_end(map
, entry
, end
);
5655 if ((entry
->vme_start
== start
) && (entry
->vme_end
== end
) &&
5656 (!entry
->is_sub_map
) &&
5657 ((object
= VME_OBJECT(entry
)) == vm_submap_object
) &&
5658 (object
->resident_page_count
== 0) &&
5659 (object
->copy
== VM_OBJECT_NULL
) &&
5660 (object
->shadow
== VM_OBJECT_NULL
) &&
5661 (!object
->pager_created
)) {
5662 VME_OFFSET_SET(entry
, (vm_object_offset_t
)offset
);
5663 VME_OBJECT_SET(entry
, VM_OBJECT_NULL
);
5664 vm_object_deallocate(object
);
5665 entry
->is_sub_map
= TRUE
;
5666 entry
->use_pmap
= FALSE
;
5667 VME_SUBMAP_SET(entry
, submap
);
5668 vm_map_reference(submap
);
5669 if (submap
->mapped_in_other_pmaps
== FALSE
&&
5670 vm_map_pmap(submap
) != PMAP_NULL
&&
5671 vm_map_pmap(submap
) != vm_map_pmap(map
)) {
5673 * This submap is being mapped in a map
5674 * that uses a different pmap.
5675 * Set its "mapped_in_other_pmaps" flag
5676 * to indicate that we now need to
5677 * remove mappings from all pmaps rather
5678 * than just the submap's pmap.
5680 submap
->mapped_in_other_pmaps
= TRUE
;
5683 #ifndef NO_NESTED_PMAP
5685 /* nest if platform code will allow */
5686 if (submap
->pmap
== NULL
) {
5687 ledger_t ledger
= map
->pmap
->ledger
;
5688 submap
->pmap
= pmap_create_options(ledger
,
5689 (vm_map_size_t
) 0, 0);
5690 if (submap
->pmap
== PMAP_NULL
) {
5692 return KERN_NO_SPACE
;
5694 #if defined(__arm__) || defined(__arm64__)
5695 pmap_set_nested(submap
->pmap
);
5698 result
= pmap_nest(map
->pmap
,
5699 (VME_SUBMAP(entry
))->pmap
,
5702 (uint64_t)(end
- start
));
5704 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result
);
5706 entry
->use_pmap
= TRUE
;
5708 #else /* NO_NESTED_PMAP */
5709 pmap_remove(map
->pmap
, (addr64_t
)start
, (addr64_t
)end
);
5710 #endif /* NO_NESTED_PMAP */
5711 result
= KERN_SUCCESS
;
5721 * Sets the protection of the specified address
5722 * region in the target map. If "set_max" is
5723 * specified, the maximum protection is to be set;
5724 * otherwise, only the current protection is affected.
5729 vm_map_offset_t start
,
5730 vm_map_offset_t end
,
5734 vm_map_entry_t current
;
5735 vm_map_offset_t prev
;
5736 vm_map_entry_t entry
;
5738 int pmap_options
= 0;
5741 if (new_prot
& VM_PROT_COPY
) {
5742 vm_map_offset_t new_start
;
5743 vm_prot_t cur_prot
, max_prot
;
5744 vm_map_kernel_flags_t kflags
;
5746 /* LP64todo - see below */
5747 if (start
>= map
->max_offset
) {
5748 return KERN_INVALID_ADDRESS
;
5751 #if VM_PROTECT_WX_FAIL
5752 if ((new_prot
& VM_PROT_EXECUTE
) &&
5753 map
!= kernel_map
&&
5754 cs_process_enforcement(NULL
)) {
5756 uint64_t, (uint64_t) start
,
5757 uint64_t, (uint64_t) end
,
5758 vm_prot_t
, new_prot
);
5759 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5761 (current_task()->bsd_info
5762 ? proc_name_address(current_task()->bsd_info
)
5765 return KERN_PROTECTION_FAILURE
;
5767 #endif /* VM_PROTECT_WX_FAIL */
5770 * Let vm_map_remap_extract() know that it will need to:
5771 * + make a copy of the mapping
5772 * + add VM_PROT_WRITE to the max protections
5773 * + remove any protections that are no longer allowed from the
5774 * max protections (to avoid any WRITE/EXECUTE conflict, for
5776 * Note that "max_prot" is an IN/OUT parameter only for this
5777 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
5780 max_prot
= new_prot
& VM_PROT_ALL
;
5781 kflags
= VM_MAP_KERNEL_FLAGS_NONE
;
5782 kflags
.vmkf_remap_prot_copy
= TRUE
;
5783 kflags
.vmkf_overwrite_immutable
= TRUE
;
5785 kr
= vm_map_remap(map
,
5789 VM_FLAGS_FIXED
| VM_FLAGS_OVERWRITE
,
5794 TRUE
, /* copy-on-write remapping! */
5797 VM_INHERIT_DEFAULT
);
5798 if (kr
!= KERN_SUCCESS
) {
5801 new_prot
&= ~VM_PROT_COPY
;
5806 /* LP64todo - remove this check when vm_map_commpage64()
5807 * no longer has to stuff in a map_entry for the commpage
5808 * above the map's max_offset.
5810 if (start
>= map
->max_offset
) {
5812 return KERN_INVALID_ADDRESS
;
5817 * Lookup the entry. If it doesn't start in a valid
5818 * entry, return an error.
5820 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
5822 return KERN_INVALID_ADDRESS
;
5825 if (entry
->superpage_size
&& (start
& (SUPERPAGE_SIZE
- 1))) { /* extend request to whole entry */
5826 start
= SUPERPAGE_ROUND_DOWN(start
);
5831 if (entry
->superpage_size
) {
5832 end
= SUPERPAGE_ROUND_UP(end
);
5836 * Make a first pass to check for protection and address
5841 prev
= current
->vme_start
;
5842 while ((current
!= vm_map_to_entry(map
)) &&
5843 (current
->vme_start
< end
)) {
5845 * If there is a hole, return an error.
5847 if (current
->vme_start
!= prev
) {
5849 return KERN_INVALID_ADDRESS
;
5852 new_max
= current
->max_protection
;
5853 if ((new_prot
& new_max
) != new_prot
) {
5855 return KERN_PROTECTION_FAILURE
;
5858 if ((new_prot
& VM_PROT_WRITE
) &&
5859 (new_prot
& VM_PROT_EXECUTE
) &&
5860 #if !CONFIG_EMBEDDED
5861 map
!= kernel_map
&&
5862 cs_process_enforcement(NULL
) &&
5863 #endif /* !CONFIG_EMBEDDED */
5864 !(current
->used_for_jit
)) {
5866 uint64_t, (uint64_t) current
->vme_start
,
5867 uint64_t, (uint64_t) current
->vme_end
,
5868 vm_prot_t
, new_prot
);
5869 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5871 (current_task()->bsd_info
5872 ? proc_name_address(current_task()->bsd_info
)
5875 new_prot
&= ~VM_PROT_EXECUTE
;
5876 #if VM_PROTECT_WX_FAIL
5878 return KERN_PROTECTION_FAILURE
;
5879 #endif /* VM_PROTECT_WX_FAIL */
5883 * If the task has requested executable lockdown,
5885 * - adding executable protections OR
5886 * - adding write protections to an existing executable mapping.
5888 if (map
->map_disallow_new_exec
== TRUE
) {
5889 if ((new_prot
& VM_PROT_EXECUTE
) ||
5890 ((current
->protection
& VM_PROT_EXECUTE
) && (new_prot
& VM_PROT_WRITE
))) {
5892 return KERN_PROTECTION_FAILURE
;
5896 prev
= current
->vme_end
;
5897 current
= current
->vme_next
;
5902 end
== vm_map_round_page(prev
, VM_MAP_PAGE_MASK(map
))) {
5903 vm_map_entry_t prev_entry
;
5905 prev_entry
= current
->vme_prev
;
5906 if (prev_entry
!= vm_map_to_entry(map
) &&
5907 !prev_entry
->map_aligned
&&
5908 (vm_map_round_page(prev_entry
->vme_end
,
5909 VM_MAP_PAGE_MASK(map
))
5912 * The last entry in our range is not "map-aligned"
5913 * but it would have reached all the way to "end"
5914 * if it had been map-aligned, so this is not really
5915 * a hole in the range and we can proceed.
5920 #endif /* __arm64__ */
5924 return KERN_INVALID_ADDRESS
;
5928 * Go back and fix up protections.
5929 * Clip to start here if the range starts within
5934 if (current
!= vm_map_to_entry(map
)) {
5935 /* clip and unnest if necessary */
5936 vm_map_clip_start(map
, current
, start
);
5939 while ((current
!= vm_map_to_entry(map
)) &&
5940 (current
->vme_start
< end
)) {
5943 vm_map_clip_end(map
, current
, end
);
5945 if (current
->is_sub_map
) {
5946 /* clipping did unnest if needed */
5947 assert(!current
->use_pmap
);
5950 old_prot
= current
->protection
;
5953 current
->max_protection
= new_prot
;
5954 current
->protection
= new_prot
& old_prot
;
5956 current
->protection
= new_prot
;
5960 * Update physical map if necessary.
5961 * If the request is to turn off write protection,
5962 * we won't do it for real (in pmap). This is because
5963 * it would cause copy-on-write to fail. We've already
5964 * set, the new protection in the map, so if a
5965 * write-protect fault occurred, it will be fixed up
5966 * properly, COW or not.
5968 if (current
->protection
!= old_prot
) {
5969 /* Look one level in we support nested pmaps */
5970 /* from mapped submaps which are direct entries */
5975 prot
= current
->protection
;
5976 if (current
->is_sub_map
|| (VME_OBJECT(current
) == NULL
) || (VME_OBJECT(current
) != compressor_object
)) {
5977 prot
&= ~VM_PROT_WRITE
;
5979 assert(!VME_OBJECT(current
)->code_signed
);
5980 assert(VME_OBJECT(current
)->copy_strategy
== MEMORY_OBJECT_COPY_NONE
);
5983 if (override_nx(map
, VME_ALIAS(current
)) && prot
) {
5984 prot
|= VM_PROT_EXECUTE
;
5987 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5988 if (!(old_prot
& VM_PROT_EXECUTE
) &&
5989 (prot
& VM_PROT_EXECUTE
) &&
5990 panic_on_unsigned_execute
&&
5991 (proc_selfcsflags() & CS_KILL
)) {
5992 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map
, (uint64_t)current
->vme_start
, (uint64_t)current
->vme_end
, old_prot
, prot
);
5994 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5996 if (pmap_has_prot_policy(prot
)) {
5997 if (current
->wired_count
) {
5998 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
5999 map
, (uint64_t)current
->vme_start
, (uint64_t)current
->vme_end
, prot
, current
->wired_count
);
6002 /* If the pmap layer cares about this
6003 * protection type, force a fault for
6004 * each page so that vm_fault will
6005 * repopulate the page with the full
6006 * set of protections.
6009 * TODO: We don't seem to need this,
6010 * but this is due to an internal
6011 * implementation detail of
6012 * pmap_protect. Do we want to rely
6015 prot
= VM_PROT_NONE
;
6018 if (current
->is_sub_map
&& current
->use_pmap
) {
6019 pmap_protect(VME_SUBMAP(current
)->pmap
,
6024 if (prot
& VM_PROT_WRITE
) {
6025 if (VME_OBJECT(current
) == compressor_object
) {
6027 * For write requests on the
6028 * compressor, we wil ask the
6029 * pmap layer to prevent us from
6030 * taking a write fault when we
6031 * attempt to access the mapping
6034 pmap_options
|= PMAP_OPTIONS_PROTECT_IMMEDIATE
;
6038 pmap_protect_options(map
->pmap
,
6046 current
= current
->vme_next
;
6050 while ((current
!= vm_map_to_entry(map
)) &&
6051 (current
->vme_start
<= end
)) {
6052 vm_map_simplify_entry(map
, current
);
6053 current
= current
->vme_next
;
6057 return KERN_SUCCESS
;
6063 * Sets the inheritance of the specified address
6064 * range in the target map. Inheritance
6065 * affects how the map will be shared with
6066 * child maps at the time of vm_map_fork.
6071 vm_map_offset_t start
,
6072 vm_map_offset_t end
,
6073 vm_inherit_t new_inheritance
)
6075 vm_map_entry_t entry
;
6076 vm_map_entry_t temp_entry
;
6080 VM_MAP_RANGE_CHECK(map
, start
, end
);
6082 if (vm_map_lookup_entry(map
, start
, &temp_entry
)) {
6085 temp_entry
= temp_entry
->vme_next
;
6089 /* first check entire range for submaps which can't support the */
6090 /* given inheritance. */
6091 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
6092 if (entry
->is_sub_map
) {
6093 if (new_inheritance
== VM_INHERIT_COPY
) {
6095 return KERN_INVALID_ARGUMENT
;
6099 entry
= entry
->vme_next
;
6103 if (entry
!= vm_map_to_entry(map
)) {
6104 /* clip and unnest if necessary */
6105 vm_map_clip_start(map
, entry
, start
);
6108 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
6109 vm_map_clip_end(map
, entry
, end
);
6110 if (entry
->is_sub_map
) {
6111 /* clip did unnest if needed */
6112 assert(!entry
->use_pmap
);
6115 entry
->inheritance
= new_inheritance
;
6117 entry
= entry
->vme_next
;
6121 return KERN_SUCCESS
;
6125 * Update the accounting for the amount of wired memory in this map. If the user has
6126 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6129 static kern_return_t
6132 vm_map_entry_t entry
,
6133 boolean_t user_wire
)
6138 unsigned int total_wire_count
= vm_page_wire_count
+ vm_lopage_free_count
;
6141 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6145 if (entry
->user_wired_count
== 0) {
6146 size
= entry
->vme_end
- entry
->vme_start
;
6149 * Since this is the first time the user is wiring this map entry, check to see if we're
6150 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6151 * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value. There is also
6152 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6153 * limit, then we fail.
6156 if (size
+ map
->user_wire_size
> MIN(map
->user_wire_limit
, vm_per_task_user_wire_limit
) ||
6157 size
+ ptoa_64(total_wire_count
) > vm_global_user_wire_limit
) {
6158 return KERN_RESOURCE_SHORTAGE
;
6162 * The first time the user wires an entry, we also increment the wired_count and add this to
6163 * the total that has been wired in the map.
6166 if (entry
->wired_count
>= MAX_WIRE_COUNT
) {
6167 return KERN_FAILURE
;
6170 entry
->wired_count
++;
6171 map
->user_wire_size
+= size
;
6174 if (entry
->user_wired_count
>= MAX_WIRE_COUNT
) {
6175 return KERN_FAILURE
;
6178 entry
->user_wired_count
++;
6181 * The kernel's wiring the memory. Just bump the count and continue.
6184 if (entry
->wired_count
>= MAX_WIRE_COUNT
) {
6185 panic("vm_map_wire: too many wirings");
6188 entry
->wired_count
++;
6191 return KERN_SUCCESS
;
6195 * Update the memory wiring accounting now that the given map entry is being unwired.
6199 subtract_wire_counts(
6201 vm_map_entry_t entry
,
6202 boolean_t user_wire
)
6206 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6209 if (entry
->user_wired_count
== 1) {
6211 * We're removing the last user wire reference. Decrement the wired_count and the total
6212 * user wired memory for this map.
6215 assert(entry
->wired_count
>= 1);
6216 entry
->wired_count
--;
6217 map
->user_wire_size
-= entry
->vme_end
- entry
->vme_start
;
6220 assert(entry
->user_wired_count
>= 1);
6221 entry
->user_wired_count
--;
6224 * The kernel is unwiring the memory. Just update the count.
6227 assert(entry
->wired_count
>= 1);
6228 entry
->wired_count
--;
6232 int cs_executable_wire
= 0;
6237 * Sets the pageability of the specified address range in the
6238 * target map as wired. Regions specified as not pageable require
6239 * locked-down physical memory and physical page maps. The
6240 * access_type variable indicates types of accesses that must not
6241 * generate page faults. This is checked against protection of
6242 * memory being locked-down.
6244 * The map must not be locked, but a reference must remain to the
6245 * map throughout the call.
6247 static kern_return_t
6250 vm_map_offset_t start
,
6251 vm_map_offset_t end
,
6252 vm_prot_t caller_prot
,
6254 boolean_t user_wire
,
6256 vm_map_offset_t pmap_addr
,
6257 ppnum_t
*physpage_p
)
6259 vm_map_entry_t entry
;
6260 vm_prot_t access_type
;
6261 struct vm_map_entry
*first_entry
, tmp_entry
;
6263 vm_map_offset_t s
, e
;
6265 boolean_t need_wakeup
;
6266 boolean_t main_map
= FALSE
;
6267 wait_interrupt_t interruptible_state
;
6268 thread_t cur_thread
;
6269 unsigned int last_timestamp
;
6271 boolean_t wire_and_extract
;
6273 access_type
= (caller_prot
& VM_PROT_ALL
);
6275 wire_and_extract
= FALSE
;
6276 if (physpage_p
!= NULL
) {
6278 * The caller wants the physical page number of the
6279 * wired page. We return only one physical page number
6280 * so this works for only one page at a time.
6282 if ((end
- start
) != PAGE_SIZE
) {
6283 return KERN_INVALID_ARGUMENT
;
6285 wire_and_extract
= TRUE
;
6290 if (map_pmap
== NULL
) {
6293 last_timestamp
= map
->timestamp
;
6295 VM_MAP_RANGE_CHECK(map
, start
, end
);
6296 assert(page_aligned(start
));
6297 assert(page_aligned(end
));
6298 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
6299 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
6301 /* We wired what the caller asked for, zero pages */
6303 return KERN_SUCCESS
;
6306 need_wakeup
= FALSE
;
6307 cur_thread
= current_thread();
6312 if (vm_map_lookup_entry(map
, s
, &first_entry
)) {
6313 entry
= first_entry
;
6315 * vm_map_clip_start will be done later.
6316 * We don't want to unnest any nested submaps here !
6319 /* Start address is not in map */
6320 rc
= KERN_INVALID_ADDRESS
;
6324 while ((entry
!= vm_map_to_entry(map
)) && (s
< end
)) {
6326 * At this point, we have wired from "start" to "s".
6327 * We still need to wire from "s" to "end".
6329 * "entry" hasn't been clipped, so it could start before "s"
6330 * and/or end after "end".
6333 /* "e" is how far we want to wire in this entry */
6340 * If another thread is wiring/unwiring this entry then
6341 * block after informing other thread to wake us up.
6343 if (entry
->in_transition
) {
6344 wait_result_t wait_result
;
6347 * We have not clipped the entry. Make sure that
6348 * the start address is in range so that the lookup
6349 * below will succeed.
6350 * "s" is the current starting point: we've already
6351 * wired from "start" to "s" and we still have
6352 * to wire from "s" to "end".
6355 entry
->needs_wakeup
= TRUE
;
6358 * wake up anybody waiting on entries that we have
6362 vm_map_entry_wakeup(map
);
6363 need_wakeup
= FALSE
;
6366 * User wiring is interruptible
6368 wait_result
= vm_map_entry_wait(map
,
6369 (user_wire
) ? THREAD_ABORTSAFE
:
6371 if (user_wire
&& wait_result
== THREAD_INTERRUPTED
) {
6373 * undo the wirings we have done so far
6374 * We do not clear the needs_wakeup flag,
6375 * because we cannot tell if we were the
6383 * Cannot avoid a lookup here. reset timestamp.
6385 last_timestamp
= map
->timestamp
;
6388 * The entry could have been clipped, look it up again.
6389 * Worse that can happen is, it may not exist anymore.
6391 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
6393 * User: undo everything upto the previous
6394 * entry. let vm_map_unwire worry about
6395 * checking the validity of the range.
6400 entry
= first_entry
;
6404 if (entry
->is_sub_map
) {
6405 vm_map_offset_t sub_start
;
6406 vm_map_offset_t sub_end
;
6407 vm_map_offset_t local_start
;
6408 vm_map_offset_t local_end
;
6411 if (wire_and_extract
) {
6413 * Wiring would result in copy-on-write
6414 * which would not be compatible with
6415 * the sharing we have with the original
6416 * provider of this memory.
6418 rc
= KERN_INVALID_ARGUMENT
;
6422 vm_map_clip_start(map
, entry
, s
);
6423 vm_map_clip_end(map
, entry
, end
);
6425 sub_start
= VME_OFFSET(entry
);
6426 sub_end
= entry
->vme_end
;
6427 sub_end
+= VME_OFFSET(entry
) - entry
->vme_start
;
6429 local_end
= entry
->vme_end
;
6430 if (map_pmap
== NULL
) {
6432 vm_object_offset_t offset
;
6435 vm_map_entry_t local_entry
;
6436 vm_map_version_t version
;
6437 vm_map_t lookup_map
;
6439 if (entry
->use_pmap
) {
6440 pmap
= VME_SUBMAP(entry
)->pmap
;
6441 /* ppc implementation requires that */
6442 /* submaps pmap address ranges line */
6443 /* up with parent map */
6445 pmap_addr
= sub_start
;
6453 if (entry
->wired_count
) {
6454 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
6459 * The map was not unlocked:
6460 * no need to goto re-lookup.
6461 * Just go directly to next entry.
6463 entry
= entry
->vme_next
;
6464 s
= entry
->vme_start
;
6468 /* call vm_map_lookup_locked to */
6469 /* cause any needs copy to be */
6471 local_start
= entry
->vme_start
;
6473 vm_map_lock_write_to_read(map
);
6474 if (vm_map_lookup_locked(
6475 &lookup_map
, local_start
,
6476 access_type
| VM_PROT_COPY
,
6477 OBJECT_LOCK_EXCLUSIVE
,
6479 &offset
, &prot
, &wired
,
6482 vm_map_unlock_read(lookup_map
);
6483 assert(map_pmap
== NULL
);
6484 vm_map_unwire(map
, start
,
6486 return KERN_FAILURE
;
6488 vm_object_unlock(object
);
6489 if (real_map
!= lookup_map
) {
6490 vm_map_unlock(real_map
);
6492 vm_map_unlock_read(lookup_map
);
6495 /* we unlocked, so must re-lookup */
6496 if (!vm_map_lookup_entry(map
,
6504 * entry could have been "simplified",
6507 entry
= local_entry
;
6508 assert(s
== local_start
);
6509 vm_map_clip_start(map
, entry
, s
);
6510 vm_map_clip_end(map
, entry
, end
);
6511 /* re-compute "e" */
6517 /* did we have a change of type? */
6518 if (!entry
->is_sub_map
) {
6519 last_timestamp
= map
->timestamp
;
6523 local_start
= entry
->vme_start
;
6527 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
6531 entry
->in_transition
= TRUE
;
6534 rc
= vm_map_wire_nested(VME_SUBMAP(entry
),
6537 user_wire
, pmap
, pmap_addr
,
6542 * Find the entry again. It could have been clipped
6543 * after we unlocked the map.
6545 if (!vm_map_lookup_entry(map
, local_start
,
6547 panic("vm_map_wire: re-lookup failed");
6549 entry
= first_entry
;
6551 assert(local_start
== s
);
6552 /* re-compute "e" */
6558 last_timestamp
= map
->timestamp
;
6559 while ((entry
!= vm_map_to_entry(map
)) &&
6560 (entry
->vme_start
< e
)) {
6561 assert(entry
->in_transition
);
6562 entry
->in_transition
= FALSE
;
6563 if (entry
->needs_wakeup
) {
6564 entry
->needs_wakeup
= FALSE
;
6567 if (rc
!= KERN_SUCCESS
) {/* from vm_*_wire */
6568 subtract_wire_counts(map
, entry
, user_wire
);
6570 entry
= entry
->vme_next
;
6572 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
6576 /* no need to relookup again */
6577 s
= entry
->vme_start
;
6582 * If this entry is already wired then increment
6583 * the appropriate wire reference count.
6585 if (entry
->wired_count
) {
6586 if ((entry
->protection
& access_type
) != access_type
) {
6587 /* found a protection problem */
6591 * We should always return an error
6592 * in this case but since we didn't
6593 * enforce it before, let's do
6594 * it only for the new "wire_and_extract"
6595 * code path for now...
6597 if (wire_and_extract
) {
6598 rc
= KERN_PROTECTION_FAILURE
;
6604 * entry is already wired down, get our reference
6605 * after clipping to our range.
6607 vm_map_clip_start(map
, entry
, s
);
6608 vm_map_clip_end(map
, entry
, end
);
6610 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
6614 if (wire_and_extract
) {
6616 vm_object_offset_t offset
;
6620 * We don't have to "wire" the page again
6621 * bit we still have to "extract" its
6622 * physical page number, after some sanity
6625 assert((entry
->vme_end
- entry
->vme_start
)
6627 assert(!entry
->needs_copy
);
6628 assert(!entry
->is_sub_map
);
6629 assert(VME_OBJECT(entry
));
6630 if (((entry
->vme_end
- entry
->vme_start
)
6632 entry
->needs_copy
||
6633 entry
->is_sub_map
||
6634 VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
6635 rc
= KERN_INVALID_ARGUMENT
;
6639 object
= VME_OBJECT(entry
);
6640 offset
= VME_OFFSET(entry
);
6641 /* need exclusive lock to update m->dirty */
6642 if (entry
->protection
& VM_PROT_WRITE
) {
6643 vm_object_lock(object
);
6645 vm_object_lock_shared(object
);
6647 m
= vm_page_lookup(object
, offset
);
6648 assert(m
!= VM_PAGE_NULL
);
6649 assert(VM_PAGE_WIRED(m
));
6650 if (m
!= VM_PAGE_NULL
&& VM_PAGE_WIRED(m
)) {
6651 *physpage_p
= VM_PAGE_GET_PHYS_PAGE(m
);
6652 if (entry
->protection
& VM_PROT_WRITE
) {
6653 vm_object_lock_assert_exclusive(
6655 m
->vmp_dirty
= TRUE
;
6658 /* not already wired !? */
6661 vm_object_unlock(object
);
6664 /* map was not unlocked: no need to relookup */
6665 entry
= entry
->vme_next
;
6666 s
= entry
->vme_start
;
6671 * Unwired entry or wire request transmitted via submap
6675 * Wiring would copy the pages to the shadow object.
6676 * The shadow object would not be code-signed so
6677 * attempting to execute code from these copied pages
6678 * would trigger a code-signing violation.
6681 if ((entry
->protection
& VM_PROT_EXECUTE
)
6682 #if !CONFIG_EMBEDDED
6684 map
!= kernel_map
&&
6685 cs_process_enforcement(NULL
)
6686 #endif /* !CONFIG_EMBEDDED */
6689 printf("pid %d[%s] wiring executable range from "
6690 "0x%llx to 0x%llx: rejected to preserve "
6693 (current_task()->bsd_info
6694 ? proc_name_address(current_task()->bsd_info
)
6696 (uint64_t) entry
->vme_start
,
6697 (uint64_t) entry
->vme_end
);
6698 #endif /* MACH_ASSERT */
6699 DTRACE_VM2(cs_executable_wire
,
6700 uint64_t, (uint64_t)entry
->vme_start
,
6701 uint64_t, (uint64_t)entry
->vme_end
);
6702 cs_executable_wire
++;
6703 rc
= KERN_PROTECTION_FAILURE
;
6708 * Perform actions of vm_map_lookup that need the write
6709 * lock on the map: create a shadow object for a
6710 * copy-on-write region, or an object for a zero-fill
6713 size
= entry
->vme_end
- entry
->vme_start
;
6715 * If wiring a copy-on-write page, we need to copy it now
6716 * even if we're only (currently) requesting read access.
6717 * This is aggressive, but once it's wired we can't move it.
6719 if (entry
->needs_copy
) {
6720 if (wire_and_extract
) {
6722 * We're supposed to share with the original
6723 * provider so should not be "needs_copy"
6725 rc
= KERN_INVALID_ARGUMENT
;
6729 VME_OBJECT_SHADOW(entry
, size
);
6730 entry
->needs_copy
= FALSE
;
6731 } else if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
6732 if (wire_and_extract
) {
6734 * We're supposed to share with the original
6735 * provider so should already have an object.
6737 rc
= KERN_INVALID_ARGUMENT
;
6740 VME_OBJECT_SET(entry
, vm_object_allocate(size
));
6741 VME_OFFSET_SET(entry
, (vm_object_offset_t
)0);
6742 assert(entry
->use_pmap
);
6745 vm_map_clip_start(map
, entry
, s
);
6746 vm_map_clip_end(map
, entry
, end
);
6748 /* re-compute "e" */
6755 * Check for holes and protection mismatch.
6756 * Holes: Next entry should be contiguous unless this
6757 * is the end of the region.
6758 * Protection: Access requested must be allowed, unless
6759 * wiring is by protection class
6761 if ((entry
->vme_end
< end
) &&
6762 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
6763 (entry
->vme_next
->vme_start
> entry
->vme_end
))) {
6765 rc
= KERN_INVALID_ADDRESS
;
6768 if ((entry
->protection
& access_type
) != access_type
) {
6769 /* found a protection problem */
6770 rc
= KERN_PROTECTION_FAILURE
;
6774 assert(entry
->wired_count
== 0 && entry
->user_wired_count
== 0);
6776 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
) {
6780 entry
->in_transition
= TRUE
;
6783 * This entry might get split once we unlock the map.
6784 * In vm_fault_wire(), we need the current range as
6785 * defined by this entry. In order for this to work
6786 * along with a simultaneous clip operation, we make a
6787 * temporary copy of this entry and use that for the
6788 * wiring. Note that the underlying objects do not
6789 * change during a clip.
6794 * The in_transition state guarentees that the entry
6795 * (or entries for this range, if split occured) will be
6796 * there when the map lock is acquired for the second time.
6800 if (!user_wire
&& cur_thread
!= THREAD_NULL
) {
6801 interruptible_state
= thread_interrupt_level(THREAD_UNINT
);
6803 interruptible_state
= THREAD_UNINT
;
6807 rc
= vm_fault_wire(map
,
6808 &tmp_entry
, caller_prot
, tag
, map_pmap
, pmap_addr
,
6811 rc
= vm_fault_wire(map
,
6812 &tmp_entry
, caller_prot
, tag
, map
->pmap
,
6813 tmp_entry
.vme_start
,
6817 if (!user_wire
&& cur_thread
!= THREAD_NULL
) {
6818 thread_interrupt_level(interruptible_state
);
6823 if (last_timestamp
+ 1 != map
->timestamp
) {
6825 * Find the entry again. It could have been clipped
6826 * after we unlocked the map.
6828 if (!vm_map_lookup_entry(map
, tmp_entry
.vme_start
,
6830 panic("vm_map_wire: re-lookup failed");
6833 entry
= first_entry
;
6836 last_timestamp
= map
->timestamp
;
6838 while ((entry
!= vm_map_to_entry(map
)) &&
6839 (entry
->vme_start
< tmp_entry
.vme_end
)) {
6840 assert(entry
->in_transition
);
6841 entry
->in_transition
= FALSE
;
6842 if (entry
->needs_wakeup
) {
6843 entry
->needs_wakeup
= FALSE
;
6846 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
6847 subtract_wire_counts(map
, entry
, user_wire
);
6849 entry
= entry
->vme_next
;
6852 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
6856 if ((entry
!= vm_map_to_entry(map
)) && /* we still have entries in the map */
6857 (tmp_entry
.vme_end
!= end
) && /* AND, we are not at the end of the requested range */
6858 (entry
->vme_start
!= tmp_entry
.vme_end
)) { /* AND, the next entry is not contiguous. */
6859 /* found a "new" hole */
6860 s
= tmp_entry
.vme_end
;
6861 rc
= KERN_INVALID_ADDRESS
;
6865 s
= entry
->vme_start
;
6866 } /* end while loop through map entries */
6869 if (rc
== KERN_SUCCESS
) {
6870 /* repair any damage we may have made to the VM map */
6871 vm_map_simplify_range(map
, start
, end
);
6877 * wake up anybody waiting on entries we wired.
6880 vm_map_entry_wakeup(map
);
6883 if (rc
!= KERN_SUCCESS
) {
6884 /* undo what has been wired so far */
6885 vm_map_unwire_nested(map
, start
, s
, user_wire
,
6886 map_pmap
, pmap_addr
);
6896 vm_map_wire_external(
6898 vm_map_offset_t start
,
6899 vm_map_offset_t end
,
6900 vm_prot_t caller_prot
,
6901 boolean_t user_wire
)
6905 kret
= vm_map_wire_nested(map
, start
, end
, caller_prot
, vm_tag_bt(),
6906 user_wire
, (pmap_t
)NULL
, 0, NULL
);
6913 vm_map_offset_t start
,
6914 vm_map_offset_t end
,
6915 vm_prot_t caller_prot
,
6917 boolean_t user_wire
)
6921 kret
= vm_map_wire_nested(map
, start
, end
, caller_prot
, tag
,
6922 user_wire
, (pmap_t
)NULL
, 0, NULL
);
6927 vm_map_wire_and_extract_external(
6929 vm_map_offset_t start
,
6930 vm_prot_t caller_prot
,
6931 boolean_t user_wire
,
6932 ppnum_t
*physpage_p
)
6936 kret
= vm_map_wire_nested(map
,
6938 start
+ VM_MAP_PAGE_SIZE(map
),
6945 if (kret
!= KERN_SUCCESS
&&
6946 physpage_p
!= NULL
) {
6953 vm_map_wire_and_extract_kernel(
6955 vm_map_offset_t start
,
6956 vm_prot_t caller_prot
,
6958 boolean_t user_wire
,
6959 ppnum_t
*physpage_p
)
6963 kret
= vm_map_wire_nested(map
,
6965 start
+ VM_MAP_PAGE_SIZE(map
),
6972 if (kret
!= KERN_SUCCESS
&&
6973 physpage_p
!= NULL
) {
6982 * Sets the pageability of the specified address range in the target
6983 * as pageable. Regions specified must have been wired previously.
6985 * The map must not be locked, but a reference must remain to the map
6986 * throughout the call.
6988 * Kernel will panic on failures. User unwire ignores holes and
6989 * unwired and intransition entries to avoid losing memory by leaving
6992 static kern_return_t
6993 vm_map_unwire_nested(
6995 vm_map_offset_t start
,
6996 vm_map_offset_t end
,
6997 boolean_t user_wire
,
6999 vm_map_offset_t pmap_addr
)
7001 vm_map_entry_t entry
;
7002 struct vm_map_entry
*first_entry
, tmp_entry
;
7003 boolean_t need_wakeup
;
7004 boolean_t main_map
= FALSE
;
7005 unsigned int last_timestamp
;
7008 if (map_pmap
== NULL
) {
7011 last_timestamp
= map
->timestamp
;
7013 VM_MAP_RANGE_CHECK(map
, start
, end
);
7014 assert(page_aligned(start
));
7015 assert(page_aligned(end
));
7016 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
7017 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
7020 /* We unwired what the caller asked for: zero pages */
7022 return KERN_SUCCESS
;
7025 if (vm_map_lookup_entry(map
, start
, &first_entry
)) {
7026 entry
= first_entry
;
7028 * vm_map_clip_start will be done later.
7029 * We don't want to unnest any nested sub maps here !
7033 panic("vm_map_unwire: start not found");
7035 /* Start address is not in map. */
7037 return KERN_INVALID_ADDRESS
;
7040 if (entry
->superpage_size
) {
7041 /* superpages are always wired */
7043 return KERN_INVALID_ADDRESS
;
7046 need_wakeup
= FALSE
;
7047 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
7048 if (entry
->in_transition
) {
7051 * Another thread is wiring down this entry. Note
7052 * that if it is not for the other thread we would
7053 * be unwiring an unwired entry. This is not
7054 * permitted. If we wait, we will be unwiring memory
7058 * Another thread is unwiring this entry. We did not
7059 * have a reference to it, because if we did, this
7060 * entry will not be getting unwired now.
7065 * This could happen: there could be some
7066 * overlapping vslock/vsunlock operations
7068 * We should probably just wait and retry,
7069 * but then we have to be careful that this
7070 * entry could get "simplified" after
7071 * "in_transition" gets unset and before
7072 * we re-lookup the entry, so we would
7073 * have to re-clip the entry to avoid
7074 * re-unwiring what we have already unwired...
7075 * See vm_map_wire_nested().
7077 * Or we could just ignore "in_transition"
7078 * here and proceed to decement the wired
7079 * count(s) on this entry. That should be fine
7080 * as long as "wired_count" doesn't drop all
7081 * the way to 0 (and we should panic if THAT
7084 panic("vm_map_unwire: in_transition entry");
7087 entry
= entry
->vme_next
;
7091 if (entry
->is_sub_map
) {
7092 vm_map_offset_t sub_start
;
7093 vm_map_offset_t sub_end
;
7094 vm_map_offset_t local_end
;
7097 vm_map_clip_start(map
, entry
, start
);
7098 vm_map_clip_end(map
, entry
, end
);
7100 sub_start
= VME_OFFSET(entry
);
7101 sub_end
= entry
->vme_end
- entry
->vme_start
;
7102 sub_end
+= VME_OFFSET(entry
);
7103 local_end
= entry
->vme_end
;
7104 if (map_pmap
== NULL
) {
7105 if (entry
->use_pmap
) {
7106 pmap
= VME_SUBMAP(entry
)->pmap
;
7107 pmap_addr
= sub_start
;
7112 if (entry
->wired_count
== 0 ||
7113 (user_wire
&& entry
->user_wired_count
== 0)) {
7115 panic("vm_map_unwire: entry is unwired");
7117 entry
= entry
->vme_next
;
7123 * Holes: Next entry should be contiguous unless
7124 * this is the end of the region.
7126 if (((entry
->vme_end
< end
) &&
7127 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
7128 (entry
->vme_next
->vme_start
7129 > entry
->vme_end
)))) {
7131 panic("vm_map_unwire: non-contiguous region");
7134 * entry = entry->vme_next;
7139 subtract_wire_counts(map
, entry
, user_wire
);
7141 if (entry
->wired_count
!= 0) {
7142 entry
= entry
->vme_next
;
7146 entry
->in_transition
= TRUE
;
7147 tmp_entry
= *entry
;/* see comment in vm_map_wire() */
7150 * We can unlock the map now. The in_transition state
7151 * guarantees existance of the entry.
7154 vm_map_unwire_nested(VME_SUBMAP(entry
),
7155 sub_start
, sub_end
, user_wire
, pmap
, pmap_addr
);
7158 if (last_timestamp
+ 1 != map
->timestamp
) {
7160 * Find the entry again. It could have been
7161 * clipped or deleted after we unlocked the map.
7163 if (!vm_map_lookup_entry(map
,
7164 tmp_entry
.vme_start
,
7167 panic("vm_map_unwire: re-lookup failed");
7169 entry
= first_entry
->vme_next
;
7171 entry
= first_entry
;
7174 last_timestamp
= map
->timestamp
;
7177 * clear transition bit for all constituent entries
7178 * that were in the original entry (saved in
7179 * tmp_entry). Also check for waiters.
7181 while ((entry
!= vm_map_to_entry(map
)) &&
7182 (entry
->vme_start
< tmp_entry
.vme_end
)) {
7183 assert(entry
->in_transition
);
7184 entry
->in_transition
= FALSE
;
7185 if (entry
->needs_wakeup
) {
7186 entry
->needs_wakeup
= FALSE
;
7189 entry
= entry
->vme_next
;
7194 vm_map_unwire_nested(VME_SUBMAP(entry
),
7195 sub_start
, sub_end
, user_wire
, map_pmap
,
7199 if (last_timestamp
+ 1 != map
->timestamp
) {
7201 * Find the entry again. It could have been
7202 * clipped or deleted after we unlocked the map.
7204 if (!vm_map_lookup_entry(map
,
7205 tmp_entry
.vme_start
,
7208 panic("vm_map_unwire: re-lookup failed");
7210 entry
= first_entry
->vme_next
;
7212 entry
= first_entry
;
7215 last_timestamp
= map
->timestamp
;
7220 if ((entry
->wired_count
== 0) ||
7221 (user_wire
&& entry
->user_wired_count
== 0)) {
7223 panic("vm_map_unwire: entry is unwired");
7226 entry
= entry
->vme_next
;
7230 assert(entry
->wired_count
> 0 &&
7231 (!user_wire
|| entry
->user_wired_count
> 0));
7233 vm_map_clip_start(map
, entry
, start
);
7234 vm_map_clip_end(map
, entry
, end
);
7238 * Holes: Next entry should be contiguous unless
7239 * this is the end of the region.
7241 if (((entry
->vme_end
< end
) &&
7242 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
7243 (entry
->vme_next
->vme_start
> entry
->vme_end
)))) {
7245 panic("vm_map_unwire: non-contiguous region");
7247 entry
= entry
->vme_next
;
7251 subtract_wire_counts(map
, entry
, user_wire
);
7253 if (entry
->wired_count
!= 0) {
7254 entry
= entry
->vme_next
;
7258 if (entry
->zero_wired_pages
) {
7259 entry
->zero_wired_pages
= FALSE
;
7262 entry
->in_transition
= TRUE
;
7263 tmp_entry
= *entry
; /* see comment in vm_map_wire() */
7266 * We can unlock the map now. The in_transition state
7267 * guarantees existance of the entry.
7271 vm_fault_unwire(map
,
7272 &tmp_entry
, FALSE
, map_pmap
, pmap_addr
);
7274 vm_fault_unwire(map
,
7275 &tmp_entry
, FALSE
, map
->pmap
,
7276 tmp_entry
.vme_start
);
7280 if (last_timestamp
+ 1 != map
->timestamp
) {
7282 * Find the entry again. It could have been clipped
7283 * or deleted after we unlocked the map.
7285 if (!vm_map_lookup_entry(map
, tmp_entry
.vme_start
,
7288 panic("vm_map_unwire: re-lookup failed");
7290 entry
= first_entry
->vme_next
;
7292 entry
= first_entry
;
7295 last_timestamp
= map
->timestamp
;
7298 * clear transition bit for all constituent entries that
7299 * were in the original entry (saved in tmp_entry). Also
7300 * check for waiters.
7302 while ((entry
!= vm_map_to_entry(map
)) &&
7303 (entry
->vme_start
< tmp_entry
.vme_end
)) {
7304 assert(entry
->in_transition
);
7305 entry
->in_transition
= FALSE
;
7306 if (entry
->needs_wakeup
) {
7307 entry
->needs_wakeup
= FALSE
;
7310 entry
= entry
->vme_next
;
7315 * We might have fragmented the address space when we wired this
7316 * range of addresses. Attempt to re-coalesce these VM map entries
7317 * with their neighbors now that they're no longer wired.
7318 * Under some circumstances, address space fragmentation can
7319 * prevent VM object shadow chain collapsing, which can cause
7322 vm_map_simplify_range(map
, start
, end
);
7326 * wake up anybody waiting on entries that we have unwired.
7329 vm_map_entry_wakeup(map
);
7331 return KERN_SUCCESS
;
7337 vm_map_offset_t start
,
7338 vm_map_offset_t end
,
7339 boolean_t user_wire
)
7341 return vm_map_unwire_nested(map
, start
, end
,
7342 user_wire
, (pmap_t
)NULL
, 0);
7347 * vm_map_entry_delete: [ internal use only ]
7349 * Deallocate the given entry from the target map.
7352 vm_map_entry_delete(
7354 vm_map_entry_t entry
)
7356 vm_map_offset_t s
, e
;
7360 s
= entry
->vme_start
;
7362 assert(page_aligned(s
));
7363 assert(page_aligned(e
));
7364 if (entry
->map_aligned
== TRUE
) {
7365 assert(VM_MAP_PAGE_ALIGNED(s
, VM_MAP_PAGE_MASK(map
)));
7366 assert(VM_MAP_PAGE_ALIGNED(e
, VM_MAP_PAGE_MASK(map
)));
7368 assert(entry
->wired_count
== 0);
7369 assert(entry
->user_wired_count
== 0);
7370 assert(!entry
->permanent
);
7372 if (entry
->is_sub_map
) {
7374 submap
= VME_SUBMAP(entry
);
7377 object
= VME_OBJECT(entry
);
7380 vm_map_store_entry_unlink(map
, entry
);
7383 vm_map_entry_dispose(map
, entry
);
7387 * Deallocate the object only after removing all
7388 * pmap entries pointing to its pages.
7391 vm_map_deallocate(submap
);
7393 vm_object_deallocate(object
);
7398 vm_map_submap_pmap_clean(
7400 vm_map_offset_t start
,
7401 vm_map_offset_t end
,
7403 vm_map_offset_t offset
)
7405 vm_map_offset_t submap_start
;
7406 vm_map_offset_t submap_end
;
7407 vm_map_size_t remove_size
;
7408 vm_map_entry_t entry
;
7410 submap_end
= offset
+ (end
- start
);
7411 submap_start
= offset
;
7413 vm_map_lock_read(sub_map
);
7414 if (vm_map_lookup_entry(sub_map
, offset
, &entry
)) {
7415 remove_size
= (entry
->vme_end
- entry
->vme_start
);
7416 if (offset
> entry
->vme_start
) {
7417 remove_size
-= offset
- entry
->vme_start
;
7421 if (submap_end
< entry
->vme_end
) {
7423 entry
->vme_end
- submap_end
;
7425 if (entry
->is_sub_map
) {
7426 vm_map_submap_pmap_clean(
7429 start
+ remove_size
,
7433 if (map
->mapped_in_other_pmaps
&&
7434 os_ref_get_count(&map
->map_refcnt
) != 0 &&
7435 VME_OBJECT(entry
) != NULL
) {
7436 vm_object_pmap_protect_options(
7438 (VME_OFFSET(entry
) +
7445 PMAP_OPTIONS_REMOVE
);
7447 pmap_remove(map
->pmap
,
7449 (addr64_t
)(start
+ remove_size
));
7454 entry
= entry
->vme_next
;
7456 while ((entry
!= vm_map_to_entry(sub_map
))
7457 && (entry
->vme_start
< submap_end
)) {
7458 remove_size
= (entry
->vme_end
- entry
->vme_start
);
7459 if (submap_end
< entry
->vme_end
) {
7460 remove_size
-= entry
->vme_end
- submap_end
;
7462 if (entry
->is_sub_map
) {
7463 vm_map_submap_pmap_clean(
7465 (start
+ entry
->vme_start
) - offset
,
7466 ((start
+ entry
->vme_start
) - offset
) + remove_size
,
7470 if (map
->mapped_in_other_pmaps
&&
7471 os_ref_get_count(&map
->map_refcnt
) != 0 &&
7472 VME_OBJECT(entry
) != NULL
) {
7473 vm_object_pmap_protect_options(
7480 PMAP_OPTIONS_REMOVE
);
7482 pmap_remove(map
->pmap
,
7483 (addr64_t
)((start
+ entry
->vme_start
)
7485 (addr64_t
)(((start
+ entry
->vme_start
)
7486 - offset
) + remove_size
));
7489 entry
= entry
->vme_next
;
7491 vm_map_unlock_read(sub_map
);
7496 * virt_memory_guard_ast:
7498 * Handle the AST callout for a virtual memory guard.
7499 * raise an EXC_GUARD exception and terminate the task
7500 * if configured to do so.
7503 virt_memory_guard_ast(
7505 mach_exception_data_type_t code
,
7506 mach_exception_data_type_t subcode
)
7508 task_t task
= thread
->task
;
7509 assert(task
!= kernel_task
);
7510 assert(task
== current_task());
7513 behavior
= task
->task_exc_guard
;
7515 /* Is delivery enabled */
7516 if ((behavior
& TASK_EXC_GUARD_VM_DELIVER
) == 0) {
7520 /* If only once, make sure we're that once */
7521 while (behavior
& TASK_EXC_GUARD_VM_ONCE
) {
7522 uint32_t new_behavior
= behavior
& ~TASK_EXC_GUARD_VM_DELIVER
;
7524 if (OSCompareAndSwap(behavior
, new_behavior
, &task
->task_exc_guard
)) {
7527 behavior
= task
->task_exc_guard
;
7528 if ((behavior
& TASK_EXC_GUARD_VM_DELIVER
) == 0) {
7533 /* Raise exception via corpse fork or synchronously */
7534 if ((task
->task_exc_guard
& TASK_EXC_GUARD_VM_CORPSE
) &&
7535 (task
->task_exc_guard
& TASK_EXC_GUARD_VM_FATAL
) == 0) {
7536 task_violated_guard(code
, subcode
, NULL
);
7538 task_exception_notify(EXC_GUARD
, code
, subcode
);
7541 /* Terminate the task if desired */
7542 if (task
->task_exc_guard
& TASK_EXC_GUARD_VM_FATAL
) {
7543 task_bsdtask_kill(current_task());
7548 * vm_map_guard_exception:
7550 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7552 * Right now, we do this when we find nothing mapped, or a
7553 * gap in the mapping when a user address space deallocate
7554 * was requested. We report the address of the first gap found.
7557 vm_map_guard_exception(
7558 vm_map_offset_t gap_start
,
7561 mach_exception_code_t code
= 0;
7562 unsigned int guard_type
= GUARD_TYPE_VIRT_MEMORY
;
7563 unsigned int target
= 0; /* should we pass in pid associated with map? */
7564 mach_exception_data_type_t subcode
= (uint64_t)gap_start
;
7565 boolean_t fatal
= FALSE
;
7567 task_t task
= current_task();
7569 /* Can't deliver exceptions to kernel task */
7570 if (task
== kernel_task
) {
7574 EXC_GUARD_ENCODE_TYPE(code
, guard_type
);
7575 EXC_GUARD_ENCODE_FLAVOR(code
, reason
);
7576 EXC_GUARD_ENCODE_TARGET(code
, target
);
7578 if (task
->task_exc_guard
& TASK_EXC_GUARD_VM_FATAL
) {
7581 thread_guard_violation(current_thread(), code
, subcode
, fatal
);
7585 * vm_map_delete: [ internal use only ]
7587 * Deallocates the given address range from the target map.
7588 * Removes all user wirings. Unwires one kernel wiring if
7589 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7590 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7591 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7593 * This routine is called with map locked and leaves map locked.
7595 static kern_return_t
7598 vm_map_offset_t start
,
7599 vm_map_offset_t end
,
7603 vm_map_entry_t entry
, next
;
7604 struct vm_map_entry
*first_entry
, tmp_entry
;
7607 boolean_t need_wakeup
;
7608 unsigned int last_timestamp
= ~0; /* unlikely value */
7610 vm_map_offset_t gap_start
;
7611 __unused vm_map_offset_t save_start
= start
;
7612 __unused vm_map_offset_t save_end
= end
;
7613 const vm_map_offset_t FIND_GAP
= 1; /* a not page aligned value */
7614 const vm_map_offset_t GAPS_OK
= 2; /* a different not page aligned value */
7616 if (map
!= kernel_map
&& !(flags
& VM_MAP_REMOVE_GAPS_OK
) && !map
->terminated
) {
7617 gap_start
= FIND_GAP
;
7619 gap_start
= GAPS_OK
;
7622 interruptible
= (flags
& VM_MAP_REMOVE_INTERRUPTIBLE
) ?
7623 THREAD_ABORTSAFE
: THREAD_UNINT
;
7626 * All our DMA I/O operations in IOKit are currently done by
7627 * wiring through the map entries of the task requesting the I/O.
7628 * Because of this, we must always wait for kernel wirings
7629 * to go away on the entries before deleting them.
7631 * Any caller who wants to actually remove a kernel wiring
7632 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7633 * properly remove one wiring instead of blasting through
7636 flags
|= VM_MAP_REMOVE_WAIT_FOR_KWIRE
;
7640 * Find the start of the region, and clip it
7642 if (vm_map_lookup_entry(map
, start
, &first_entry
)) {
7643 entry
= first_entry
;
7644 if (map
== kalloc_map
&&
7645 (entry
->vme_start
!= start
||
7646 entry
->vme_end
!= end
)) {
7647 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7648 "mismatched entry %p [0x%llx:0x%llx]\n",
7653 (uint64_t)entry
->vme_start
,
7654 (uint64_t)entry
->vme_end
);
7658 * If in a superpage, extend the range to include the start of the mapping.
7660 if (entry
->superpage_size
&& (start
& ~SUPERPAGE_MASK
)) {
7661 start
= SUPERPAGE_ROUND_DOWN(start
);
7665 if (start
== entry
->vme_start
) {
7667 * No need to clip. We don't want to cause
7668 * any unnecessary unnesting in this case...
7671 if ((flags
& VM_MAP_REMOVE_NO_MAP_ALIGN
) &&
7672 entry
->map_aligned
&&
7673 !VM_MAP_PAGE_ALIGNED(
7675 VM_MAP_PAGE_MASK(map
))) {
7677 * The entry will no longer be
7678 * map-aligned after clipping
7679 * and the caller said it's OK.
7681 entry
->map_aligned
= FALSE
;
7683 if (map
== kalloc_map
) {
7684 panic("vm_map_delete(%p,0x%llx,0x%llx):"
7685 " clipping %p at 0x%llx\n",
7692 vm_map_clip_start(map
, entry
, start
);
7696 * Fix the lookup hint now, rather than each
7697 * time through the loop.
7699 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
7701 if (map
->pmap
== kernel_pmap
&&
7702 os_ref_get_count(&map
->map_refcnt
) != 0) {
7703 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7704 "no map entry at 0x%llx\n",
7710 entry
= first_entry
->vme_next
;
7711 if (gap_start
== FIND_GAP
) {
7717 if (entry
->superpage_size
) {
7718 end
= SUPERPAGE_ROUND_UP(end
);
7721 need_wakeup
= FALSE
;
7723 * Step through all entries in this region
7725 s
= entry
->vme_start
;
7726 while ((entry
!= vm_map_to_entry(map
)) && (s
< end
)) {
7728 * At this point, we have deleted all the memory entries
7729 * between "start" and "s". We still need to delete
7730 * all memory entries between "s" and "end".
7731 * While we were blocked and the map was unlocked, some
7732 * new memory entries could have been re-allocated between
7733 * "start" and "s" and we don't want to mess with those.
7734 * Some of those entries could even have been re-assembled
7735 * with an entry after "s" (in vm_map_simplify_entry()), so
7736 * we may have to vm_map_clip_start() again.
7739 if (entry
->vme_start
>= s
) {
7741 * This entry starts on or after "s"
7742 * so no need to clip its start.
7746 * This entry has been re-assembled by a
7747 * vm_map_simplify_entry(). We need to
7748 * re-clip its start.
7750 if ((flags
& VM_MAP_REMOVE_NO_MAP_ALIGN
) &&
7751 entry
->map_aligned
&&
7752 !VM_MAP_PAGE_ALIGNED(s
,
7753 VM_MAP_PAGE_MASK(map
))) {
7755 * The entry will no longer be map-aligned
7756 * after clipping and the caller said it's OK.
7758 entry
->map_aligned
= FALSE
;
7760 if (map
== kalloc_map
) {
7761 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7762 "clipping %p at 0x%llx\n",
7769 vm_map_clip_start(map
, entry
, s
);
7771 if (entry
->vme_end
<= end
) {
7773 * This entry is going away completely, so no need
7774 * to clip and possibly cause an unnecessary unnesting.
7777 if ((flags
& VM_MAP_REMOVE_NO_MAP_ALIGN
) &&
7778 entry
->map_aligned
&&
7779 !VM_MAP_PAGE_ALIGNED(end
,
7780 VM_MAP_PAGE_MASK(map
))) {
7782 * The entry will no longer be map-aligned
7783 * after clipping and the caller said it's OK.
7785 entry
->map_aligned
= FALSE
;
7787 if (map
== kalloc_map
) {
7788 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7789 "clipping %p at 0x%llx\n",
7796 vm_map_clip_end(map
, entry
, end
);
7799 if (entry
->permanent
) {
7800 if (map
->pmap
== kernel_pmap
) {
7801 panic("%s(%p,0x%llx,0x%llx): "
7802 "attempt to remove permanent "
7804 "%p [0x%llx:0x%llx]\n",
7810 (uint64_t) entry
->vme_start
,
7811 (uint64_t) entry
->vme_end
);
7812 } else if (flags
& VM_MAP_REMOVE_IMMUTABLE
) {
7813 // printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7814 entry
->permanent
= FALSE
;
7816 } else if ((entry
->protection
& VM_PROT_EXECUTE
) && !pmap_cs_enforced(map
->pmap
)) {
7817 entry
->permanent
= FALSE
;
7819 printf("%d[%s] %s(0x%llx,0x%llx): "
7820 "pmap_cs disabled, allowing for permanent executable entry [0x%llx:0x%llx] "
7823 (current_task()->bsd_info
7824 ? proc_name_address(current_task()->bsd_info
)
7829 (uint64_t)entry
->vme_start
,
7830 (uint64_t)entry
->vme_end
,
7832 entry
->max_protection
);
7835 if (vm_map_executable_immutable_verbose
) {
7836 printf("%d[%s] %s(0x%llx,0x%llx): "
7837 "permanent entry [0x%llx:0x%llx] "
7840 (current_task()->bsd_info
7841 ? proc_name_address(current_task()->bsd_info
)
7846 (uint64_t)entry
->vme_start
,
7847 (uint64_t)entry
->vme_end
,
7849 entry
->max_protection
);
7852 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7854 DTRACE_VM5(vm_map_delete_permanent
,
7855 vm_map_offset_t
, entry
->vme_start
,
7856 vm_map_offset_t
, entry
->vme_end
,
7857 vm_prot_t
, entry
->protection
,
7858 vm_prot_t
, entry
->max_protection
,
7859 int, VME_ALIAS(entry
));
7864 if (entry
->in_transition
) {
7865 wait_result_t wait_result
;
7868 * Another thread is wiring/unwiring this entry.
7869 * Let the other thread know we are waiting.
7871 assert(s
== entry
->vme_start
);
7872 entry
->needs_wakeup
= TRUE
;
7875 * wake up anybody waiting on entries that we have
7876 * already unwired/deleted.
7879 vm_map_entry_wakeup(map
);
7880 need_wakeup
= FALSE
;
7883 wait_result
= vm_map_entry_wait(map
, interruptible
);
7885 if (interruptible
&&
7886 wait_result
== THREAD_INTERRUPTED
) {
7888 * We do not clear the needs_wakeup flag,
7889 * since we cannot tell if we were the only one.
7891 return KERN_ABORTED
;
7895 * The entry could have been clipped or it
7896 * may not exist anymore. Look it up again.
7898 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
7900 * User: use the next entry
7902 if (gap_start
== FIND_GAP
) {
7905 entry
= first_entry
->vme_next
;
7906 s
= entry
->vme_start
;
7908 entry
= first_entry
;
7909 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
7911 last_timestamp
= map
->timestamp
;
7913 } /* end in_transition */
7915 if (entry
->wired_count
) {
7916 boolean_t user_wire
;
7918 user_wire
= entry
->user_wired_count
> 0;
7921 * Remove a kernel wiring if requested
7923 if (flags
& VM_MAP_REMOVE_KUNWIRE
) {
7924 entry
->wired_count
--;
7928 * Remove all user wirings for proper accounting
7930 if (entry
->user_wired_count
> 0) {
7931 while (entry
->user_wired_count
) {
7932 subtract_wire_counts(map
, entry
, user_wire
);
7936 if (entry
->wired_count
!= 0) {
7937 assert(map
!= kernel_map
);
7939 * Cannot continue. Typical case is when
7940 * a user thread has physical io pending on
7941 * on this page. Either wait for the
7942 * kernel wiring to go away or return an
7945 if (flags
& VM_MAP_REMOVE_WAIT_FOR_KWIRE
) {
7946 wait_result_t wait_result
;
7948 assert(s
== entry
->vme_start
);
7949 entry
->needs_wakeup
= TRUE
;
7950 wait_result
= vm_map_entry_wait(map
,
7953 if (interruptible
&&
7954 wait_result
== THREAD_INTERRUPTED
) {
7956 * We do not clear the
7957 * needs_wakeup flag, since we
7958 * cannot tell if we were the
7961 return KERN_ABORTED
;
7965 * The entry could have been clipped or
7966 * it may not exist anymore. Look it
7969 if (!vm_map_lookup_entry(map
, s
,
7971 assert(map
!= kernel_map
);
7973 * User: use the next entry
7975 if (gap_start
== FIND_GAP
) {
7978 entry
= first_entry
->vme_next
;
7979 s
= entry
->vme_start
;
7981 entry
= first_entry
;
7982 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
7984 last_timestamp
= map
->timestamp
;
7987 return KERN_FAILURE
;
7991 entry
->in_transition
= TRUE
;
7993 * copy current entry. see comment in vm_map_wire()
7996 assert(s
== entry
->vme_start
);
7999 * We can unlock the map now. The in_transition
8000 * state guarentees existance of the entry.
8004 if (tmp_entry
.is_sub_map
) {
8006 vm_map_offset_t sub_start
, sub_end
;
8008 vm_map_offset_t pmap_addr
;
8011 sub_map
= VME_SUBMAP(&tmp_entry
);
8012 sub_start
= VME_OFFSET(&tmp_entry
);
8013 sub_end
= sub_start
+ (tmp_entry
.vme_end
-
8014 tmp_entry
.vme_start
);
8015 if (tmp_entry
.use_pmap
) {
8016 pmap
= sub_map
->pmap
;
8017 pmap_addr
= tmp_entry
.vme_start
;
8020 pmap_addr
= tmp_entry
.vme_start
;
8022 (void) vm_map_unwire_nested(sub_map
,
8027 if (VME_OBJECT(&tmp_entry
) == kernel_object
) {
8028 pmap_protect_options(
8030 tmp_entry
.vme_start
,
8033 PMAP_OPTIONS_REMOVE
,
8036 vm_fault_unwire(map
, &tmp_entry
,
8037 VME_OBJECT(&tmp_entry
) == kernel_object
,
8038 map
->pmap
, tmp_entry
.vme_start
);
8043 if (last_timestamp
+ 1 != map
->timestamp
) {
8045 * Find the entry again. It could have
8046 * been clipped after we unlocked the map.
8048 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
8049 assert((map
!= kernel_map
) &&
8050 (!entry
->is_sub_map
));
8051 if (gap_start
== FIND_GAP
) {
8054 first_entry
= first_entry
->vme_next
;
8055 s
= first_entry
->vme_start
;
8057 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8060 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8061 first_entry
= entry
;
8064 last_timestamp
= map
->timestamp
;
8066 entry
= first_entry
;
8067 while ((entry
!= vm_map_to_entry(map
)) &&
8068 (entry
->vme_start
< tmp_entry
.vme_end
)) {
8069 assert(entry
->in_transition
);
8070 entry
->in_transition
= FALSE
;
8071 if (entry
->needs_wakeup
) {
8072 entry
->needs_wakeup
= FALSE
;
8075 entry
= entry
->vme_next
;
8078 * We have unwired the entry(s). Go back and
8081 entry
= first_entry
;
8085 /* entry is unwired */
8086 assert(entry
->wired_count
== 0);
8087 assert(entry
->user_wired_count
== 0);
8089 assert(s
== entry
->vme_start
);
8091 if (flags
& VM_MAP_REMOVE_NO_PMAP_CLEANUP
) {
8093 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8094 * vm_map_delete(), some map entries might have been
8095 * transferred to a "zap_map", which doesn't have a
8096 * pmap. The original pmap has already been flushed
8097 * in the vm_map_delete() call targeting the original
8098 * map, but when we get to destroying the "zap_map",
8099 * we don't have any pmap to flush, so let's just skip
8102 } else if (entry
->is_sub_map
) {
8103 if (entry
->use_pmap
) {
8104 #ifndef NO_NESTED_PMAP
8107 if (flags
& VM_MAP_REMOVE_NO_UNNESTING
) {
8109 * This is the final cleanup of the
8110 * address space being terminated.
8111 * No new mappings are expected and
8112 * we don't really need to unnest the
8113 * shared region (and lose the "global"
8114 * pmap mappings, if applicable).
8116 * Tell the pmap layer that we're
8117 * "clean" wrt nesting.
8119 pmap_flags
= PMAP_UNNEST_CLEAN
;
8122 * We're unmapping part of the nested
8123 * shared region, so we can't keep the
8128 pmap_unnest_options(
8130 (addr64_t
)entry
->vme_start
,
8131 entry
->vme_end
- entry
->vme_start
,
8133 #endif /* NO_NESTED_PMAP */
8134 if (map
->mapped_in_other_pmaps
&&
8135 os_ref_get_count(&map
->map_refcnt
) != 0) {
8136 /* clean up parent map/maps */
8137 vm_map_submap_pmap_clean(
8138 map
, entry
->vme_start
,
8144 vm_map_submap_pmap_clean(
8145 map
, entry
->vme_start
, entry
->vme_end
,
8149 } else if (VME_OBJECT(entry
) != kernel_object
&&
8150 VME_OBJECT(entry
) != compressor_object
) {
8151 object
= VME_OBJECT(entry
);
8152 if (map
->mapped_in_other_pmaps
&&
8153 os_ref_get_count(&map
->map_refcnt
) != 0) {
8154 vm_object_pmap_protect_options(
8155 object
, VME_OFFSET(entry
),
8156 entry
->vme_end
- entry
->vme_start
,
8160 PMAP_OPTIONS_REMOVE
);
8161 } else if ((VME_OBJECT(entry
) != VM_OBJECT_NULL
) ||
8162 (map
->pmap
== kernel_pmap
)) {
8163 /* Remove translations associated
8164 * with this range unless the entry
8165 * does not have an object, or
8166 * it's the kernel map or a descendant
8167 * since the platform could potentially
8168 * create "backdoor" mappings invisible
8169 * to the VM. It is expected that
8170 * objectless, non-kernel ranges
8171 * do not have such VM invisible
8174 pmap_remove_options(map
->pmap
,
8175 (addr64_t
)entry
->vme_start
,
8176 (addr64_t
)entry
->vme_end
,
8177 PMAP_OPTIONS_REMOVE
);
8181 if (entry
->iokit_acct
) {
8182 /* alternate accounting */
8183 DTRACE_VM4(vm_map_iokit_unmapped_region
,
8185 vm_map_offset_t
, entry
->vme_start
,
8186 vm_map_offset_t
, entry
->vme_end
,
8187 int, VME_ALIAS(entry
));
8188 vm_map_iokit_unmapped_region(map
,
8191 entry
->iokit_acct
= FALSE
;
8192 entry
->use_pmap
= FALSE
;
8196 * All pmap mappings for this map entry must have been
8200 assert(vm_map_pmap_is_empty(map
,
8205 next
= entry
->vme_next
;
8207 if (map
->pmap
== kernel_pmap
&&
8208 os_ref_get_count(&map
->map_refcnt
) != 0 &&
8209 entry
->vme_end
< end
&&
8210 (next
== vm_map_to_entry(map
) ||
8211 next
->vme_start
!= entry
->vme_end
)) {
8212 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8213 "hole after %p at 0x%llx\n",
8218 (uint64_t)entry
->vme_end
);
8222 * If the desired range didn't end with "entry", then there is a gap if
8223 * we wrapped around to the start of the map or if "entry" and "next"
8224 * aren't contiguous.
8226 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8227 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8229 if (gap_start
== FIND_GAP
&&
8230 vm_map_round_page(entry
->vme_end
, VM_MAP_PAGE_MASK(map
)) < end
&&
8231 (next
== vm_map_to_entry(map
) || entry
->vme_end
!= next
->vme_start
)) {
8232 gap_start
= entry
->vme_end
;
8234 s
= next
->vme_start
;
8235 last_timestamp
= map
->timestamp
;
8237 if (entry
->permanent
) {
8239 * A permanent entry can not be removed, so leave it
8240 * in place but remove all access permissions.
8242 entry
->protection
= VM_PROT_NONE
;
8243 entry
->max_protection
= VM_PROT_NONE
;
8244 } else if ((flags
& VM_MAP_REMOVE_SAVE_ENTRIES
) &&
8245 zap_map
!= VM_MAP_NULL
) {
8246 vm_map_size_t entry_size
;
8248 * The caller wants to save the affected VM map entries
8249 * into the "zap_map". The caller will take care of
8252 /* unlink the entry from "map" ... */
8253 vm_map_store_entry_unlink(map
, entry
);
8254 /* ... and add it to the end of the "zap_map" */
8255 vm_map_store_entry_link(zap_map
,
8256 vm_map_last_entry(zap_map
),
8258 VM_MAP_KERNEL_FLAGS_NONE
);
8259 entry_size
= entry
->vme_end
- entry
->vme_start
;
8260 map
->size
-= entry_size
;
8261 zap_map
->size
+= entry_size
;
8262 /* we didn't unlock the map, so no timestamp increase */
8265 vm_map_entry_delete(map
, entry
);
8266 /* vm_map_entry_delete unlocks the map */
8272 if (entry
== vm_map_to_entry(map
)) {
8275 if (last_timestamp
+ 1 != map
->timestamp
) {
8277 * We are responsible for deleting everything
8278 * from the given space. If someone has interfered,
8279 * we pick up where we left off. Back fills should
8280 * be all right for anyone, except map_delete, and
8281 * we have to assume that the task has been fully
8282 * disabled before we get here
8284 if (!vm_map_lookup_entry(map
, s
, &entry
)) {
8285 entry
= entry
->vme_next
;
8288 * Nothing found for s. If we weren't already done, then there is a gap.
8290 if (gap_start
== FIND_GAP
&& s
< end
) {
8293 s
= entry
->vme_start
;
8295 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8298 * others can not only allocate behind us, we can
8299 * also see coalesce while we don't have the map lock
8301 if (entry
== vm_map_to_entry(map
)) {
8305 last_timestamp
= map
->timestamp
;
8308 if (map
->wait_for_space
) {
8309 thread_wakeup((event_t
) map
);
8312 * wake up anybody waiting on entries that we have already deleted.
8315 vm_map_entry_wakeup(map
);
8318 if (gap_start
!= FIND_GAP
&& gap_start
!= GAPS_OK
) {
8319 DTRACE_VM3(kern_vm_deallocate_gap
,
8320 vm_map_offset_t
, gap_start
,
8321 vm_map_offset_t
, save_start
,
8322 vm_map_offset_t
, save_end
);
8323 if (!(flags
& VM_MAP_REMOVE_GAPS_OK
)) {
8324 vm_map_guard_exception(gap_start
, kGUARD_EXC_DEALLOC_GAP
);
8328 return KERN_SUCCESS
;
8335 * Clean out a task's map.
8342 map
->terminated
= TRUE
;
8345 return vm_map_remove(map
,
8351 * + remove immutable mappings
8352 * + allow gaps in range
8354 (VM_MAP_REMOVE_NO_UNNESTING
|
8355 VM_MAP_REMOVE_IMMUTABLE
|
8356 VM_MAP_REMOVE_GAPS_OK
));
8362 * Remove the given address range from the target map.
8363 * This is the exported form of vm_map_delete.
8368 vm_map_offset_t start
,
8369 vm_map_offset_t end
,
8372 kern_return_t result
;
8375 VM_MAP_RANGE_CHECK(map
, start
, end
);
8377 * For the zone_map, the kernel controls the allocation/freeing of memory.
8378 * Any free to the zone_map should be within the bounds of the map and
8379 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8380 * free to the zone_map into a no-op, there is a problem and we should
8383 if ((map
== zone_map
) && (start
== end
)) {
8384 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start
);
8386 result
= vm_map_delete(map
, start
, end
, flags
, VM_MAP_NULL
);
8393 * vm_map_remove_locked:
8395 * Remove the given address range from the target locked map.
8396 * This is the exported form of vm_map_delete.
8399 vm_map_remove_locked(
8401 vm_map_offset_t start
,
8402 vm_map_offset_t end
,
8405 kern_return_t result
;
8407 VM_MAP_RANGE_CHECK(map
, start
, end
);
8408 result
= vm_map_delete(map
, start
, end
, flags
, VM_MAP_NULL
);
8414 * Routine: vm_map_copy_allocate
8417 * Allocates and initializes a map copy object.
8419 static vm_map_copy_t
8420 vm_map_copy_allocate(void)
8422 vm_map_copy_t new_copy
;
8424 new_copy
= zalloc(vm_map_copy_zone
);
8425 bzero(new_copy
, sizeof(*new_copy
));
8426 new_copy
->c_u
.hdr
.rb_head_store
.rbh_root
= (void*)(int)SKIP_RB_TREE
;
8427 vm_map_copy_first_entry(new_copy
) = vm_map_copy_to_entry(new_copy
);
8428 vm_map_copy_last_entry(new_copy
) = vm_map_copy_to_entry(new_copy
);
8433 * Routine: vm_map_copy_discard
8436 * Dispose of a map copy object (returned by
8440 vm_map_copy_discard(
8443 if (copy
== VM_MAP_COPY_NULL
) {
8447 switch (copy
->type
) {
8448 case VM_MAP_COPY_ENTRY_LIST
:
8449 while (vm_map_copy_first_entry(copy
) !=
8450 vm_map_copy_to_entry(copy
)) {
8451 vm_map_entry_t entry
= vm_map_copy_first_entry(copy
);
8453 vm_map_copy_entry_unlink(copy
, entry
);
8454 if (entry
->is_sub_map
) {
8455 vm_map_deallocate(VME_SUBMAP(entry
));
8457 vm_object_deallocate(VME_OBJECT(entry
));
8459 vm_map_copy_entry_dispose(copy
, entry
);
8462 case VM_MAP_COPY_OBJECT
:
8463 vm_object_deallocate(copy
->cpy_object
);
8465 case VM_MAP_COPY_KERNEL_BUFFER
:
8468 * The vm_map_copy_t and possibly the data buffer were
8469 * allocated by a single call to kalloc(), i.e. the
8470 * vm_map_copy_t was not allocated out of the zone.
8472 if (copy
->size
> msg_ool_size_small
|| copy
->offset
) {
8473 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8474 (long long)copy
->size
, (long long)copy
->offset
);
8476 kfree(copy
, copy
->size
+ cpy_kdata_hdr_sz
);
8479 zfree(vm_map_copy_zone
, copy
);
8483 * Routine: vm_map_copy_copy
8486 * Move the information in a map copy object to
8487 * a new map copy object, leaving the old one
8490 * This is used by kernel routines that need
8491 * to look at out-of-line data (in copyin form)
8492 * before deciding whether to return SUCCESS.
8493 * If the routine returns FAILURE, the original
8494 * copy object will be deallocated; therefore,
8495 * these routines must make a copy of the copy
8496 * object and leave the original empty so that
8497 * deallocation will not fail.
8503 vm_map_copy_t new_copy
;
8505 if (copy
== VM_MAP_COPY_NULL
) {
8506 return VM_MAP_COPY_NULL
;
8510 * Allocate a new copy object, and copy the information
8511 * from the old one into it.
8514 new_copy
= (vm_map_copy_t
) zalloc(vm_map_copy_zone
);
8517 if (copy
->type
== VM_MAP_COPY_ENTRY_LIST
) {
8519 * The links in the entry chain must be
8520 * changed to point to the new copy object.
8522 vm_map_copy_first_entry(copy
)->vme_prev
8523 = vm_map_copy_to_entry(new_copy
);
8524 vm_map_copy_last_entry(copy
)->vme_next
8525 = vm_map_copy_to_entry(new_copy
);
8529 * Change the old copy object into one that contains
8530 * nothing to be deallocated.
8532 copy
->type
= VM_MAP_COPY_OBJECT
;
8533 copy
->cpy_object
= VM_OBJECT_NULL
;
8536 * Return the new object.
8541 static kern_return_t
8542 vm_map_overwrite_submap_recurse(
8544 vm_map_offset_t dst_addr
,
8545 vm_map_size_t dst_size
)
8547 vm_map_offset_t dst_end
;
8548 vm_map_entry_t tmp_entry
;
8549 vm_map_entry_t entry
;
8550 kern_return_t result
;
8551 boolean_t encountered_sub_map
= FALSE
;
8556 * Verify that the destination is all writeable
8557 * initially. We have to trunc the destination
8558 * address and round the copy size or we'll end up
8559 * splitting entries in strange ways.
8562 dst_end
= vm_map_round_page(dst_addr
+ dst_size
,
8563 VM_MAP_PAGE_MASK(dst_map
));
8564 vm_map_lock(dst_map
);
8567 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &tmp_entry
)) {
8568 vm_map_unlock(dst_map
);
8569 return KERN_INVALID_ADDRESS
;
8572 vm_map_clip_start(dst_map
,
8574 vm_map_trunc_page(dst_addr
,
8575 VM_MAP_PAGE_MASK(dst_map
)));
8576 if (tmp_entry
->is_sub_map
) {
8577 /* clipping did unnest if needed */
8578 assert(!tmp_entry
->use_pmap
);
8581 for (entry
= tmp_entry
;;) {
8582 vm_map_entry_t next
;
8584 next
= entry
->vme_next
;
8585 while (entry
->is_sub_map
) {
8586 vm_map_offset_t sub_start
;
8587 vm_map_offset_t sub_end
;
8588 vm_map_offset_t local_end
;
8590 if (entry
->in_transition
) {
8592 * Say that we are waiting, and wait for entry.
8594 entry
->needs_wakeup
= TRUE
;
8595 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8600 encountered_sub_map
= TRUE
;
8601 sub_start
= VME_OFFSET(entry
);
8603 if (entry
->vme_end
< dst_end
) {
8604 sub_end
= entry
->vme_end
;
8608 sub_end
-= entry
->vme_start
;
8609 sub_end
+= VME_OFFSET(entry
);
8610 local_end
= entry
->vme_end
;
8611 vm_map_unlock(dst_map
);
8613 result
= vm_map_overwrite_submap_recurse(
8616 sub_end
- sub_start
);
8618 if (result
!= KERN_SUCCESS
) {
8621 if (dst_end
<= entry
->vme_end
) {
8622 return KERN_SUCCESS
;
8624 vm_map_lock(dst_map
);
8625 if (!vm_map_lookup_entry(dst_map
, local_end
,
8627 vm_map_unlock(dst_map
);
8628 return KERN_INVALID_ADDRESS
;
8631 next
= entry
->vme_next
;
8634 if (!(entry
->protection
& VM_PROT_WRITE
)) {
8635 vm_map_unlock(dst_map
);
8636 return KERN_PROTECTION_FAILURE
;
8640 * If the entry is in transition, we must wait
8641 * for it to exit that state. Anything could happen
8642 * when we unlock the map, so start over.
8644 if (entry
->in_transition
) {
8646 * Say that we are waiting, and wait for entry.
8648 entry
->needs_wakeup
= TRUE
;
8649 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8655 * our range is contained completely within this map entry
8657 if (dst_end
<= entry
->vme_end
) {
8658 vm_map_unlock(dst_map
);
8659 return KERN_SUCCESS
;
8662 * check that range specified is contiguous region
8664 if ((next
== vm_map_to_entry(dst_map
)) ||
8665 (next
->vme_start
!= entry
->vme_end
)) {
8666 vm_map_unlock(dst_map
);
8667 return KERN_INVALID_ADDRESS
;
8671 * Check for permanent objects in the destination.
8673 if ((VME_OBJECT(entry
) != VM_OBJECT_NULL
) &&
8674 ((!VME_OBJECT(entry
)->internal
) ||
8675 (VME_OBJECT(entry
)->true_share
))) {
8676 if (encountered_sub_map
) {
8677 vm_map_unlock(dst_map
);
8678 return KERN_FAILURE
;
8685 vm_map_unlock(dst_map
);
8686 return KERN_SUCCESS
;
8690 * Routine: vm_map_copy_overwrite
8693 * Copy the memory described by the map copy
8694 * object (copy; returned by vm_map_copyin) onto
8695 * the specified destination region (dst_map, dst_addr).
8696 * The destination must be writeable.
8698 * Unlike vm_map_copyout, this routine actually
8699 * writes over previously-mapped memory. If the
8700 * previous mapping was to a permanent (user-supplied)
8701 * memory object, it is preserved.
8703 * The attributes (protection and inheritance) of the
8704 * destination region are preserved.
8706 * If successful, consumes the copy object.
8707 * Otherwise, the caller is responsible for it.
8709 * Implementation notes:
8710 * To overwrite aligned temporary virtual memory, it is
8711 * sufficient to remove the previous mapping and insert
8712 * the new copy. This replacement is done either on
8713 * the whole region (if no permanent virtual memory
8714 * objects are embedded in the destination region) or
8715 * in individual map entries.
8717 * To overwrite permanent virtual memory , it is necessary
8718 * to copy each page, as the external memory management
8719 * interface currently does not provide any optimizations.
8721 * Unaligned memory also has to be copied. It is possible
8722 * to use 'vm_trickery' to copy the aligned data. This is
8723 * not done but not hard to implement.
8725 * Once a page of permanent memory has been overwritten,
8726 * it is impossible to interrupt this function; otherwise,
8727 * the call would be neither atomic nor location-independent.
8728 * The kernel-state portion of a user thread must be
8731 * It may be expensive to forward all requests that might
8732 * overwrite permanent memory (vm_write, vm_copy) to
8733 * uninterruptible kernel threads. This routine may be
8734 * called by interruptible threads; however, success is
8735 * not guaranteed -- if the request cannot be performed
8736 * atomically and interruptibly, an error indication is
8740 static kern_return_t
8741 vm_map_copy_overwrite_nested(
8743 vm_map_address_t dst_addr
,
8745 boolean_t interruptible
,
8747 boolean_t discard_on_success
)
8749 vm_map_offset_t dst_end
;
8750 vm_map_entry_t tmp_entry
;
8751 vm_map_entry_t entry
;
8753 boolean_t aligned
= TRUE
;
8754 boolean_t contains_permanent_objects
= FALSE
;
8755 boolean_t encountered_sub_map
= FALSE
;
8756 vm_map_offset_t base_addr
;
8757 vm_map_size_t copy_size
;
8758 vm_map_size_t total_size
;
8762 * Check for null copy object.
8765 if (copy
== VM_MAP_COPY_NULL
) {
8766 return KERN_SUCCESS
;
8770 * Check for special kernel buffer allocated
8771 * by new_ipc_kmsg_copyin.
8774 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
8775 return vm_map_copyout_kernel_buffer(
8777 copy
, copy
->size
, TRUE
, discard_on_success
);
8781 * Only works for entry lists at the moment. Will
8782 * support page lists later.
8785 assert(copy
->type
== VM_MAP_COPY_ENTRY_LIST
);
8787 if (copy
->size
== 0) {
8788 if (discard_on_success
) {
8789 vm_map_copy_discard(copy
);
8791 return KERN_SUCCESS
;
8795 * Verify that the destination is all writeable
8796 * initially. We have to trunc the destination
8797 * address and round the copy size or we'll end up
8798 * splitting entries in strange ways.
8801 if (!VM_MAP_PAGE_ALIGNED(copy
->size
,
8802 VM_MAP_PAGE_MASK(dst_map
)) ||
8803 !VM_MAP_PAGE_ALIGNED(copy
->offset
,
8804 VM_MAP_PAGE_MASK(dst_map
)) ||
8805 !VM_MAP_PAGE_ALIGNED(dst_addr
,
8806 VM_MAP_PAGE_MASK(dst_map
))) {
8808 dst_end
= vm_map_round_page(dst_addr
+ copy
->size
,
8809 VM_MAP_PAGE_MASK(dst_map
));
8811 dst_end
= dst_addr
+ copy
->size
;
8814 vm_map_lock(dst_map
);
8816 /* LP64todo - remove this check when vm_map_commpage64()
8817 * no longer has to stuff in a map_entry for the commpage
8818 * above the map's max_offset.
8820 if (dst_addr
>= dst_map
->max_offset
) {
8821 vm_map_unlock(dst_map
);
8822 return KERN_INVALID_ADDRESS
;
8826 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &tmp_entry
)) {
8827 vm_map_unlock(dst_map
);
8828 return KERN_INVALID_ADDRESS
;
8830 vm_map_clip_start(dst_map
,
8832 vm_map_trunc_page(dst_addr
,
8833 VM_MAP_PAGE_MASK(dst_map
)));
8834 for (entry
= tmp_entry
;;) {
8835 vm_map_entry_t next
= entry
->vme_next
;
8837 while (entry
->is_sub_map
) {
8838 vm_map_offset_t sub_start
;
8839 vm_map_offset_t sub_end
;
8840 vm_map_offset_t local_end
;
8842 if (entry
->in_transition
) {
8844 * Say that we are waiting, and wait for entry.
8846 entry
->needs_wakeup
= TRUE
;
8847 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8852 local_end
= entry
->vme_end
;
8853 if (!(entry
->needs_copy
)) {
8854 /* if needs_copy we are a COW submap */
8855 /* in such a case we just replace so */
8856 /* there is no need for the follow- */
8858 encountered_sub_map
= TRUE
;
8859 sub_start
= VME_OFFSET(entry
);
8861 if (entry
->vme_end
< dst_end
) {
8862 sub_end
= entry
->vme_end
;
8866 sub_end
-= entry
->vme_start
;
8867 sub_end
+= VME_OFFSET(entry
);
8868 vm_map_unlock(dst_map
);
8870 kr
= vm_map_overwrite_submap_recurse(
8873 sub_end
- sub_start
);
8874 if (kr
!= KERN_SUCCESS
) {
8877 vm_map_lock(dst_map
);
8880 if (dst_end
<= entry
->vme_end
) {
8881 goto start_overwrite
;
8883 if (!vm_map_lookup_entry(dst_map
, local_end
,
8885 vm_map_unlock(dst_map
);
8886 return KERN_INVALID_ADDRESS
;
8888 next
= entry
->vme_next
;
8891 if (!(entry
->protection
& VM_PROT_WRITE
)) {
8892 vm_map_unlock(dst_map
);
8893 return KERN_PROTECTION_FAILURE
;
8897 * If the entry is in transition, we must wait
8898 * for it to exit that state. Anything could happen
8899 * when we unlock the map, so start over.
8901 if (entry
->in_transition
) {
8903 * Say that we are waiting, and wait for entry.
8905 entry
->needs_wakeup
= TRUE
;
8906 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8912 * our range is contained completely within this map entry
8914 if (dst_end
<= entry
->vme_end
) {
8918 * check that range specified is contiguous region
8920 if ((next
== vm_map_to_entry(dst_map
)) ||
8921 (next
->vme_start
!= entry
->vme_end
)) {
8922 vm_map_unlock(dst_map
);
8923 return KERN_INVALID_ADDRESS
;
8928 * Check for permanent objects in the destination.
8930 if ((VME_OBJECT(entry
) != VM_OBJECT_NULL
) &&
8931 ((!VME_OBJECT(entry
)->internal
) ||
8932 (VME_OBJECT(entry
)->true_share
))) {
8933 contains_permanent_objects
= TRUE
;
8941 * If there are permanent objects in the destination, then
8942 * the copy cannot be interrupted.
8945 if (interruptible
&& contains_permanent_objects
) {
8946 vm_map_unlock(dst_map
);
8947 return KERN_FAILURE
; /* XXX */
8952 * Make a second pass, overwriting the data
8953 * At the beginning of each loop iteration,
8954 * the next entry to be overwritten is "tmp_entry"
8955 * (initially, the value returned from the lookup above),
8956 * and the starting address expected in that entry
8960 total_size
= copy
->size
;
8961 if (encountered_sub_map
) {
8963 /* re-calculate tmp_entry since we've had the map */
8965 if (!vm_map_lookup_entry( dst_map
, dst_addr
, &tmp_entry
)) {
8966 vm_map_unlock(dst_map
);
8967 return KERN_INVALID_ADDRESS
;
8970 copy_size
= copy
->size
;
8973 base_addr
= dst_addr
;
8975 /* deconstruct the copy object and do in parts */
8976 /* only in sub_map, interruptable case */
8977 vm_map_entry_t copy_entry
;
8978 vm_map_entry_t previous_prev
= VM_MAP_ENTRY_NULL
;
8979 vm_map_entry_t next_copy
= VM_MAP_ENTRY_NULL
;
8981 int remaining_entries
= 0;
8982 vm_map_offset_t new_offset
= 0;
8984 for (entry
= tmp_entry
; copy_size
== 0;) {
8985 vm_map_entry_t next
;
8987 next
= entry
->vme_next
;
8989 /* tmp_entry and base address are moved along */
8990 /* each time we encounter a sub-map. Otherwise */
8991 /* entry can outpase tmp_entry, and the copy_size */
8992 /* may reflect the distance between them */
8993 /* if the current entry is found to be in transition */
8994 /* we will start over at the beginning or the last */
8995 /* encounter of a submap as dictated by base_addr */
8996 /* we will zero copy_size accordingly. */
8997 if (entry
->in_transition
) {
8999 * Say that we are waiting, and wait for entry.
9001 entry
->needs_wakeup
= TRUE
;
9002 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
9004 if (!vm_map_lookup_entry(dst_map
, base_addr
,
9006 vm_map_unlock(dst_map
);
9007 return KERN_INVALID_ADDRESS
;
9013 if (entry
->is_sub_map
) {
9014 vm_map_offset_t sub_start
;
9015 vm_map_offset_t sub_end
;
9016 vm_map_offset_t local_end
;
9018 if (entry
->needs_copy
) {
9019 /* if this is a COW submap */
9020 /* just back the range with a */
9021 /* anonymous entry */
9022 if (entry
->vme_end
< dst_end
) {
9023 sub_end
= entry
->vme_end
;
9027 if (entry
->vme_start
< base_addr
) {
9028 sub_start
= base_addr
;
9030 sub_start
= entry
->vme_start
;
9033 dst_map
, entry
, sub_end
);
9035 dst_map
, entry
, sub_start
);
9036 assert(!entry
->use_pmap
);
9037 assert(!entry
->iokit_acct
);
9038 entry
->use_pmap
= TRUE
;
9039 entry
->is_sub_map
= FALSE
;
9042 VME_OBJECT_SET(entry
, VM_OBJECT_NULL
);
9043 VME_OFFSET_SET(entry
, 0);
9044 entry
->is_shared
= FALSE
;
9045 entry
->needs_copy
= FALSE
;
9046 entry
->protection
= VM_PROT_DEFAULT
;
9047 entry
->max_protection
= VM_PROT_ALL
;
9048 entry
->wired_count
= 0;
9049 entry
->user_wired_count
= 0;
9050 if (entry
->inheritance
9051 == VM_INHERIT_SHARE
) {
9052 entry
->inheritance
= VM_INHERIT_COPY
;
9056 /* first take care of any non-sub_map */
9057 /* entries to send */
9058 if (base_addr
< entry
->vme_start
) {
9061 entry
->vme_start
- base_addr
;
9064 sub_start
= VME_OFFSET(entry
);
9066 if (entry
->vme_end
< dst_end
) {
9067 sub_end
= entry
->vme_end
;
9071 sub_end
-= entry
->vme_start
;
9072 sub_end
+= VME_OFFSET(entry
);
9073 local_end
= entry
->vme_end
;
9074 vm_map_unlock(dst_map
);
9075 copy_size
= sub_end
- sub_start
;
9077 /* adjust the copy object */
9078 if (total_size
> copy_size
) {
9079 vm_map_size_t local_size
= 0;
9080 vm_map_size_t entry_size
;
9083 new_offset
= copy
->offset
;
9084 copy_entry
= vm_map_copy_first_entry(copy
);
9085 while (copy_entry
!=
9086 vm_map_copy_to_entry(copy
)) {
9087 entry_size
= copy_entry
->vme_end
-
9088 copy_entry
->vme_start
;
9089 if ((local_size
< copy_size
) &&
9090 ((local_size
+ entry_size
)
9092 vm_map_copy_clip_end(copy
,
9094 copy_entry
->vme_start
+
9095 (copy_size
- local_size
));
9096 entry_size
= copy_entry
->vme_end
-
9097 copy_entry
->vme_start
;
9098 local_size
+= entry_size
;
9099 new_offset
+= entry_size
;
9101 if (local_size
>= copy_size
) {
9102 next_copy
= copy_entry
->vme_next
;
9103 copy_entry
->vme_next
=
9104 vm_map_copy_to_entry(copy
);
9106 copy
->cpy_hdr
.links
.prev
;
9107 copy
->cpy_hdr
.links
.prev
= copy_entry
;
9108 copy
->size
= copy_size
;
9110 copy
->cpy_hdr
.nentries
;
9111 remaining_entries
-= nentries
;
9112 copy
->cpy_hdr
.nentries
= nentries
;
9115 local_size
+= entry_size
;
9116 new_offset
+= entry_size
;
9119 copy_entry
= copy_entry
->vme_next
;
9123 if ((entry
->use_pmap
) && (pmap
== NULL
)) {
9124 kr
= vm_map_copy_overwrite_nested(
9129 VME_SUBMAP(entry
)->pmap
,
9131 } else if (pmap
!= NULL
) {
9132 kr
= vm_map_copy_overwrite_nested(
9136 interruptible
, pmap
,
9139 kr
= vm_map_copy_overwrite_nested(
9147 if (kr
!= KERN_SUCCESS
) {
9148 if (next_copy
!= NULL
) {
9149 copy
->cpy_hdr
.nentries
+=
9151 copy
->cpy_hdr
.links
.prev
->vme_next
=
9153 copy
->cpy_hdr
.links
.prev
9155 copy
->size
= total_size
;
9159 if (dst_end
<= local_end
) {
9160 return KERN_SUCCESS
;
9162 /* otherwise copy no longer exists, it was */
9163 /* destroyed after successful copy_overwrite */
9164 copy
= vm_map_copy_allocate();
9165 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
9166 copy
->offset
= new_offset
;
9170 * this does not seem to deal with
9171 * the VM map store (R&B tree)
9174 total_size
-= copy_size
;
9176 /* put back remainder of copy in container */
9177 if (next_copy
!= NULL
) {
9178 copy
->cpy_hdr
.nentries
= remaining_entries
;
9179 copy
->cpy_hdr
.links
.next
= next_copy
;
9180 copy
->cpy_hdr
.links
.prev
= previous_prev
;
9181 copy
->size
= total_size
;
9182 next_copy
->vme_prev
=
9183 vm_map_copy_to_entry(copy
);
9186 base_addr
= local_end
;
9187 vm_map_lock(dst_map
);
9188 if (!vm_map_lookup_entry(dst_map
,
9189 local_end
, &tmp_entry
)) {
9190 vm_map_unlock(dst_map
);
9191 return KERN_INVALID_ADDRESS
;
9196 if (dst_end
<= entry
->vme_end
) {
9197 copy_size
= dst_end
- base_addr
;
9201 if ((next
== vm_map_to_entry(dst_map
)) ||
9202 (next
->vme_start
!= entry
->vme_end
)) {
9203 vm_map_unlock(dst_map
);
9204 return KERN_INVALID_ADDRESS
;
9213 /* adjust the copy object */
9214 if (total_size
> copy_size
) {
9215 vm_map_size_t local_size
= 0;
9216 vm_map_size_t entry_size
;
9218 new_offset
= copy
->offset
;
9219 copy_entry
= vm_map_copy_first_entry(copy
);
9220 while (copy_entry
!= vm_map_copy_to_entry(copy
)) {
9221 entry_size
= copy_entry
->vme_end
-
9222 copy_entry
->vme_start
;
9223 if ((local_size
< copy_size
) &&
9224 ((local_size
+ entry_size
)
9226 vm_map_copy_clip_end(copy
, copy_entry
,
9227 copy_entry
->vme_start
+
9228 (copy_size
- local_size
));
9229 entry_size
= copy_entry
->vme_end
-
9230 copy_entry
->vme_start
;
9231 local_size
+= entry_size
;
9232 new_offset
+= entry_size
;
9234 if (local_size
>= copy_size
) {
9235 next_copy
= copy_entry
->vme_next
;
9236 copy_entry
->vme_next
=
9237 vm_map_copy_to_entry(copy
);
9239 copy
->cpy_hdr
.links
.prev
;
9240 copy
->cpy_hdr
.links
.prev
= copy_entry
;
9241 copy
->size
= copy_size
;
9243 copy
->cpy_hdr
.nentries
;
9244 remaining_entries
-= nentries
;
9245 copy
->cpy_hdr
.nentries
= nentries
;
9248 local_size
+= entry_size
;
9249 new_offset
+= entry_size
;
9252 copy_entry
= copy_entry
->vme_next
;
9262 local_pmap
= dst_map
->pmap
;
9265 if ((kr
= vm_map_copy_overwrite_aligned(
9266 dst_map
, tmp_entry
, copy
,
9267 base_addr
, local_pmap
)) != KERN_SUCCESS
) {
9268 if (next_copy
!= NULL
) {
9269 copy
->cpy_hdr
.nentries
+=
9271 copy
->cpy_hdr
.links
.prev
->vme_next
=
9273 copy
->cpy_hdr
.links
.prev
=
9275 copy
->size
+= copy_size
;
9279 vm_map_unlock(dst_map
);
9284 * if the copy and dst address are misaligned but the same
9285 * offset within the page we can copy_not_aligned the
9286 * misaligned parts and copy aligned the rest. If they are
9287 * aligned but len is unaligned we simply need to copy
9288 * the end bit unaligned. We'll need to split the misaligned
9289 * bits of the region in this case !
9291 /* ALWAYS UNLOCKS THE dst_map MAP */
9292 kr
= vm_map_copy_overwrite_unaligned(
9297 discard_on_success
);
9298 if (kr
!= KERN_SUCCESS
) {
9299 if (next_copy
!= NULL
) {
9300 copy
->cpy_hdr
.nentries
+=
9302 copy
->cpy_hdr
.links
.prev
->vme_next
=
9304 copy
->cpy_hdr
.links
.prev
=
9306 copy
->size
+= copy_size
;
9311 total_size
-= copy_size
;
9312 if (total_size
== 0) {
9315 base_addr
+= copy_size
;
9317 copy
->offset
= new_offset
;
9318 if (next_copy
!= NULL
) {
9319 copy
->cpy_hdr
.nentries
= remaining_entries
;
9320 copy
->cpy_hdr
.links
.next
= next_copy
;
9321 copy
->cpy_hdr
.links
.prev
= previous_prev
;
9322 next_copy
->vme_prev
= vm_map_copy_to_entry(copy
);
9323 copy
->size
= total_size
;
9325 vm_map_lock(dst_map
);
9327 if (!vm_map_lookup_entry(dst_map
,
9328 base_addr
, &tmp_entry
)) {
9329 vm_map_unlock(dst_map
);
9330 return KERN_INVALID_ADDRESS
;
9332 if (tmp_entry
->in_transition
) {
9333 entry
->needs_wakeup
= TRUE
;
9334 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
9339 vm_map_clip_start(dst_map
,
9341 vm_map_trunc_page(base_addr
,
9342 VM_MAP_PAGE_MASK(dst_map
)));
9348 * Throw away the vm_map_copy object
9350 if (discard_on_success
) {
9351 vm_map_copy_discard(copy
);
9354 return KERN_SUCCESS
;
9355 }/* vm_map_copy_overwrite */
9358 vm_map_copy_overwrite(
9360 vm_map_offset_t dst_addr
,
9362 boolean_t interruptible
)
9364 vm_map_size_t head_size
, tail_size
;
9365 vm_map_copy_t head_copy
, tail_copy
;
9366 vm_map_offset_t head_addr
, tail_addr
;
9367 vm_map_entry_t entry
;
9369 vm_map_offset_t effective_page_mask
, effective_page_size
;
9378 if (interruptible
||
9379 copy
== VM_MAP_COPY_NULL
||
9380 copy
->type
!= VM_MAP_COPY_ENTRY_LIST
) {
9382 * We can't split the "copy" map if we're interruptible
9383 * or if we don't have a "copy" map...
9386 return vm_map_copy_overwrite_nested(dst_map
,
9394 effective_page_mask
= MAX(VM_MAP_PAGE_MASK(dst_map
), PAGE_MASK
);
9395 effective_page_mask
= MAX(VM_MAP_COPY_PAGE_MASK(copy
),
9396 effective_page_mask
);
9397 effective_page_size
= effective_page_mask
+ 1;
9399 if (copy
->size
< 3 * effective_page_size
) {
9401 * Too small to bother with optimizing...
9406 if ((dst_addr
& effective_page_mask
) !=
9407 (copy
->offset
& effective_page_mask
)) {
9409 * Incompatible mis-alignment of source and destination...
9415 * Proper alignment or identical mis-alignment at the beginning.
9416 * Let's try and do a small unaligned copy first (if needed)
9417 * and then an aligned copy for the rest.
9419 if (!vm_map_page_aligned(dst_addr
, effective_page_mask
)) {
9420 head_addr
= dst_addr
;
9421 head_size
= (effective_page_size
-
9422 (copy
->offset
& effective_page_mask
));
9423 head_size
= MIN(head_size
, copy
->size
);
9425 if (!vm_map_page_aligned(copy
->offset
+ copy
->size
,
9426 effective_page_mask
)) {
9428 * Mis-alignment at the end.
9429 * Do an aligned copy up to the last page and
9430 * then an unaligned copy for the remaining bytes.
9432 tail_size
= ((copy
->offset
+ copy
->size
) &
9433 effective_page_mask
);
9434 tail_size
= MIN(tail_size
, copy
->size
);
9435 tail_addr
= dst_addr
+ copy
->size
- tail_size
;
9436 assert(tail_addr
>= head_addr
+ head_size
);
9438 assert(head_size
+ tail_size
<= copy
->size
);
9440 if (head_size
+ tail_size
== copy
->size
) {
9442 * It's all unaligned, no optimization possible...
9448 * Can't optimize if there are any submaps in the
9449 * destination due to the way we free the "copy" map
9450 * progressively in vm_map_copy_overwrite_nested()
9453 vm_map_lock_read(dst_map
);
9454 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &entry
)) {
9455 vm_map_unlock_read(dst_map
);
9459 (entry
!= vm_map_copy_to_entry(copy
) &&
9460 entry
->vme_start
< dst_addr
+ copy
->size
);
9461 entry
= entry
->vme_next
) {
9462 if (entry
->is_sub_map
) {
9463 vm_map_unlock_read(dst_map
);
9467 vm_map_unlock_read(dst_map
);
9471 * Unaligned copy of the first "head_size" bytes, to reach
9476 * Extract "head_copy" out of "copy".
9478 head_copy
= vm_map_copy_allocate();
9479 head_copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
9480 head_copy
->cpy_hdr
.entries_pageable
=
9481 copy
->cpy_hdr
.entries_pageable
;
9482 vm_map_store_init(&head_copy
->cpy_hdr
);
9484 entry
= vm_map_copy_first_entry(copy
);
9485 if (entry
->vme_end
< copy
->offset
+ head_size
) {
9486 head_size
= entry
->vme_end
- copy
->offset
;
9489 head_copy
->offset
= copy
->offset
;
9490 head_copy
->size
= head_size
;
9491 copy
->offset
+= head_size
;
9492 copy
->size
-= head_size
;
9494 vm_map_copy_clip_end(copy
, entry
, copy
->offset
);
9495 vm_map_copy_entry_unlink(copy
, entry
);
9496 vm_map_copy_entry_link(head_copy
,
9497 vm_map_copy_to_entry(head_copy
),
9501 * Do the unaligned copy.
9503 kr
= vm_map_copy_overwrite_nested(dst_map
,
9509 if (kr
!= KERN_SUCCESS
) {
9516 * Extract "tail_copy" out of "copy".
9518 tail_copy
= vm_map_copy_allocate();
9519 tail_copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
9520 tail_copy
->cpy_hdr
.entries_pageable
=
9521 copy
->cpy_hdr
.entries_pageable
;
9522 vm_map_store_init(&tail_copy
->cpy_hdr
);
9524 tail_copy
->offset
= copy
->offset
+ copy
->size
- tail_size
;
9525 tail_copy
->size
= tail_size
;
9527 copy
->size
-= tail_size
;
9529 entry
= vm_map_copy_last_entry(copy
);
9530 vm_map_copy_clip_start(copy
, entry
, tail_copy
->offset
);
9531 entry
= vm_map_copy_last_entry(copy
);
9532 vm_map_copy_entry_unlink(copy
, entry
);
9533 vm_map_copy_entry_link(tail_copy
,
9534 vm_map_copy_last_entry(tail_copy
),
9539 * Copy most (or possibly all) of the data.
9541 kr
= vm_map_copy_overwrite_nested(dst_map
,
9542 dst_addr
+ head_size
,
9547 if (kr
!= KERN_SUCCESS
) {
9552 kr
= vm_map_copy_overwrite_nested(dst_map
,
9561 assert(copy
->type
== VM_MAP_COPY_ENTRY_LIST
);
9562 if (kr
== KERN_SUCCESS
) {
9564 * Discard all the copy maps.
9567 vm_map_copy_discard(head_copy
);
9570 vm_map_copy_discard(copy
);
9572 vm_map_copy_discard(tail_copy
);
9577 * Re-assemble the original copy map.
9580 entry
= vm_map_copy_first_entry(head_copy
);
9581 vm_map_copy_entry_unlink(head_copy
, entry
);
9582 vm_map_copy_entry_link(copy
,
9583 vm_map_copy_to_entry(copy
),
9585 copy
->offset
-= head_size
;
9586 copy
->size
+= head_size
;
9587 vm_map_copy_discard(head_copy
);
9591 entry
= vm_map_copy_last_entry(tail_copy
);
9592 vm_map_copy_entry_unlink(tail_copy
, entry
);
9593 vm_map_copy_entry_link(copy
,
9594 vm_map_copy_last_entry(copy
),
9596 copy
->size
+= tail_size
;
9597 vm_map_copy_discard(tail_copy
);
9606 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
9609 * Physically copy unaligned data
9612 * Unaligned parts of pages have to be physically copied. We use
9613 * a modified form of vm_fault_copy (which understands none-aligned
9614 * page offsets and sizes) to do the copy. We attempt to copy as
9615 * much memory in one go as possibly, however vm_fault_copy copies
9616 * within 1 memory object so we have to find the smaller of "amount left"
9617 * "source object data size" and "target object data size". With
9618 * unaligned data we don't need to split regions, therefore the source
9619 * (copy) object should be one map entry, the target range may be split
9620 * over multiple map entries however. In any event we are pessimistic
9621 * about these assumptions.
9624 * dst_map is locked on entry and is return locked on success,
9625 * unlocked on error.
9628 static kern_return_t
9629 vm_map_copy_overwrite_unaligned(
9631 vm_map_entry_t entry
,
9633 vm_map_offset_t start
,
9634 boolean_t discard_on_success
)
9636 vm_map_entry_t copy_entry
;
9637 vm_map_entry_t copy_entry_next
;
9638 vm_map_version_t version
;
9639 vm_object_t dst_object
;
9640 vm_object_offset_t dst_offset
;
9641 vm_object_offset_t src_offset
;
9642 vm_object_offset_t entry_offset
;
9643 vm_map_offset_t entry_end
;
9644 vm_map_size_t src_size
,
9648 kern_return_t kr
= KERN_SUCCESS
;
9651 copy_entry
= vm_map_copy_first_entry(copy
);
9653 vm_map_lock_write_to_read(dst_map
);
9655 src_offset
= copy
->offset
- vm_object_trunc_page(copy
->offset
);
9656 amount_left
= copy
->size
;
9658 * unaligned so we never clipped this entry, we need the offset into
9659 * the vm_object not just the data.
9661 while (amount_left
> 0) {
9662 if (entry
== vm_map_to_entry(dst_map
)) {
9663 vm_map_unlock_read(dst_map
);
9664 return KERN_INVALID_ADDRESS
;
9667 /* "start" must be within the current map entry */
9668 assert((start
>= entry
->vme_start
) && (start
< entry
->vme_end
));
9670 dst_offset
= start
- entry
->vme_start
;
9672 dst_size
= entry
->vme_end
- start
;
9674 src_size
= copy_entry
->vme_end
-
9675 (copy_entry
->vme_start
+ src_offset
);
9677 if (dst_size
< src_size
) {
9679 * we can only copy dst_size bytes before
9680 * we have to get the next destination entry
9682 copy_size
= dst_size
;
9685 * we can only copy src_size bytes before
9686 * we have to get the next source copy entry
9688 copy_size
= src_size
;
9691 if (copy_size
> amount_left
) {
9692 copy_size
= amount_left
;
9695 * Entry needs copy, create a shadow shadow object for
9696 * Copy on write region.
9698 if (entry
->needs_copy
&&
9699 ((entry
->protection
& VM_PROT_WRITE
) != 0)) {
9700 if (vm_map_lock_read_to_write(dst_map
)) {
9701 vm_map_lock_read(dst_map
);
9704 VME_OBJECT_SHADOW(entry
,
9705 (vm_map_size_t
)(entry
->vme_end
9706 - entry
->vme_start
));
9707 entry
->needs_copy
= FALSE
;
9708 vm_map_lock_write_to_read(dst_map
);
9710 dst_object
= VME_OBJECT(entry
);
9712 * unlike with the virtual (aligned) copy we're going
9713 * to fault on it therefore we need a target object.
9715 if (dst_object
== VM_OBJECT_NULL
) {
9716 if (vm_map_lock_read_to_write(dst_map
)) {
9717 vm_map_lock_read(dst_map
);
9720 dst_object
= vm_object_allocate((vm_map_size_t
)
9721 entry
->vme_end
- entry
->vme_start
);
9722 VME_OBJECT_SET(entry
, dst_object
);
9723 VME_OFFSET_SET(entry
, 0);
9724 assert(entry
->use_pmap
);
9725 vm_map_lock_write_to_read(dst_map
);
9728 * Take an object reference and unlock map. The "entry" may
9729 * disappear or change when the map is unlocked.
9731 vm_object_reference(dst_object
);
9732 version
.main_timestamp
= dst_map
->timestamp
;
9733 entry_offset
= VME_OFFSET(entry
);
9734 entry_end
= entry
->vme_end
;
9735 vm_map_unlock_read(dst_map
);
9737 * Copy as much as possible in one pass
9740 VME_OBJECT(copy_entry
),
9741 VME_OFFSET(copy_entry
) + src_offset
,
9744 entry_offset
+ dst_offset
,
9750 src_offset
+= copy_size
;
9751 amount_left
-= copy_size
;
9753 * Release the object reference
9755 vm_object_deallocate(dst_object
);
9757 * If a hard error occurred, return it now
9759 if (kr
!= KERN_SUCCESS
) {
9763 if ((copy_entry
->vme_start
+ src_offset
) == copy_entry
->vme_end
9764 || amount_left
== 0) {
9766 * all done with this copy entry, dispose.
9768 copy_entry_next
= copy_entry
->vme_next
;
9770 if (discard_on_success
) {
9771 vm_map_copy_entry_unlink(copy
, copy_entry
);
9772 assert(!copy_entry
->is_sub_map
);
9773 vm_object_deallocate(VME_OBJECT(copy_entry
));
9774 vm_map_copy_entry_dispose(copy
, copy_entry
);
9777 if (copy_entry_next
== vm_map_copy_to_entry(copy
) &&
9780 * not finished copying but run out of source
9782 return KERN_INVALID_ADDRESS
;
9785 copy_entry
= copy_entry_next
;
9790 if (amount_left
== 0) {
9791 return KERN_SUCCESS
;
9794 vm_map_lock_read(dst_map
);
9795 if (version
.main_timestamp
== dst_map
->timestamp
) {
9796 if (start
== entry_end
) {
9798 * destination region is split. Use the version
9799 * information to avoid a lookup in the normal
9802 entry
= entry
->vme_next
;
9804 * should be contiguous. Fail if we encounter
9805 * a hole in the destination.
9807 if (start
!= entry
->vme_start
) {
9808 vm_map_unlock_read(dst_map
);
9809 return KERN_INVALID_ADDRESS
;
9814 * Map version check failed.
9815 * we must lookup the entry because somebody
9816 * might have changed the map behind our backs.
9819 if (!vm_map_lookup_entry(dst_map
, start
, &entry
)) {
9820 vm_map_unlock_read(dst_map
);
9821 return KERN_INVALID_ADDRESS
;
9826 return KERN_SUCCESS
;
9827 }/* vm_map_copy_overwrite_unaligned */
9830 * Routine: vm_map_copy_overwrite_aligned [internal use only]
9833 * Does all the vm_trickery possible for whole pages.
9837 * If there are no permanent objects in the destination,
9838 * and the source and destination map entry zones match,
9839 * and the destination map entry is not shared,
9840 * then the map entries can be deleted and replaced
9841 * with those from the copy. The following code is the
9842 * basic idea of what to do, but there are lots of annoying
9843 * little details about getting protection and inheritance
9844 * right. Should add protection, inheritance, and sharing checks
9845 * to the above pass and make sure that no wiring is involved.
9848 int vm_map_copy_overwrite_aligned_src_not_internal
= 0;
9849 int vm_map_copy_overwrite_aligned_src_not_symmetric
= 0;
9850 int vm_map_copy_overwrite_aligned_src_large
= 0;
9852 static kern_return_t
9853 vm_map_copy_overwrite_aligned(
9855 vm_map_entry_t tmp_entry
,
9857 vm_map_offset_t start
,
9858 __unused pmap_t pmap
)
9861 vm_map_entry_t copy_entry
;
9862 vm_map_size_t copy_size
;
9864 vm_map_entry_t entry
;
9866 while ((copy_entry
= vm_map_copy_first_entry(copy
))
9867 != vm_map_copy_to_entry(copy
)) {
9868 copy_size
= (copy_entry
->vme_end
- copy_entry
->vme_start
);
9871 if (entry
->is_sub_map
) {
9872 /* unnested when clipped earlier */
9873 assert(!entry
->use_pmap
);
9875 if (entry
== vm_map_to_entry(dst_map
)) {
9876 vm_map_unlock(dst_map
);
9877 return KERN_INVALID_ADDRESS
;
9879 size
= (entry
->vme_end
- entry
->vme_start
);
9881 * Make sure that no holes popped up in the
9882 * address map, and that the protection is
9883 * still valid, in case the map was unlocked
9887 if ((entry
->vme_start
!= start
) || ((entry
->is_sub_map
)
9888 && !entry
->needs_copy
)) {
9889 vm_map_unlock(dst_map
);
9890 return KERN_INVALID_ADDRESS
;
9892 assert(entry
!= vm_map_to_entry(dst_map
));
9895 * Check protection again
9898 if (!(entry
->protection
& VM_PROT_WRITE
)) {
9899 vm_map_unlock(dst_map
);
9900 return KERN_PROTECTION_FAILURE
;
9904 * Adjust to source size first
9907 if (copy_size
< size
) {
9908 if (entry
->map_aligned
&&
9909 !VM_MAP_PAGE_ALIGNED(entry
->vme_start
+ copy_size
,
9910 VM_MAP_PAGE_MASK(dst_map
))) {
9911 /* no longer map-aligned */
9912 entry
->map_aligned
= FALSE
;
9914 vm_map_clip_end(dst_map
, entry
, entry
->vme_start
+ copy_size
);
9919 * Adjust to destination size
9922 if (size
< copy_size
) {
9923 vm_map_copy_clip_end(copy
, copy_entry
,
9924 copy_entry
->vme_start
+ size
);
9928 assert((entry
->vme_end
- entry
->vme_start
) == size
);
9929 assert((tmp_entry
->vme_end
- tmp_entry
->vme_start
) == size
);
9930 assert((copy_entry
->vme_end
- copy_entry
->vme_start
) == size
);
9933 * If the destination contains temporary unshared memory,
9934 * we can perform the copy by throwing it away and
9935 * installing the source data.
9938 object
= VME_OBJECT(entry
);
9939 if ((!entry
->is_shared
&&
9940 ((object
== VM_OBJECT_NULL
) ||
9941 (object
->internal
&& !object
->true_share
))) ||
9942 entry
->needs_copy
) {
9943 vm_object_t old_object
= VME_OBJECT(entry
);
9944 vm_object_offset_t old_offset
= VME_OFFSET(entry
);
9945 vm_object_offset_t offset
;
9948 * Ensure that the source and destination aren't
9951 if (old_object
== VME_OBJECT(copy_entry
) &&
9952 old_offset
== VME_OFFSET(copy_entry
)) {
9953 vm_map_copy_entry_unlink(copy
, copy_entry
);
9954 vm_map_copy_entry_dispose(copy
, copy_entry
);
9956 if (old_object
!= VM_OBJECT_NULL
) {
9957 vm_object_deallocate(old_object
);
9960 start
= tmp_entry
->vme_end
;
9961 tmp_entry
= tmp_entry
->vme_next
;
9965 #if !CONFIG_EMBEDDED
9966 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
9967 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
9968 if (VME_OBJECT(copy_entry
) != VM_OBJECT_NULL
&&
9969 VME_OBJECT(copy_entry
)->vo_size
>= __TRADEOFF1_OBJ_SIZE
&&
9970 copy_size
<= __TRADEOFF1_COPY_SIZE
) {
9972 * Virtual vs. Physical copy tradeoff #1.
9974 * Copying only a few pages out of a large
9975 * object: do a physical copy instead of
9976 * a virtual copy, to avoid possibly keeping
9977 * the entire large object alive because of
9978 * those few copy-on-write pages.
9980 vm_map_copy_overwrite_aligned_src_large
++;
9983 #endif /* !CONFIG_EMBEDDED */
9985 if ((dst_map
->pmap
!= kernel_pmap
) &&
9986 (VME_ALIAS(entry
) >= VM_MEMORY_MALLOC
) &&
9987 (VME_ALIAS(entry
) <= VM_MEMORY_MALLOC_MEDIUM
)) {
9988 vm_object_t new_object
, new_shadow
;
9991 * We're about to map something over a mapping
9992 * established by malloc()...
9994 new_object
= VME_OBJECT(copy_entry
);
9995 if (new_object
!= VM_OBJECT_NULL
) {
9996 vm_object_lock_shared(new_object
);
9998 while (new_object
!= VM_OBJECT_NULL
&&
9999 #if !CONFIG_EMBEDDED
10000 !new_object
->true_share
&&
10001 new_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
&&
10002 #endif /* !CONFIG_EMBEDDED */
10003 new_object
->internal
) {
10004 new_shadow
= new_object
->shadow
;
10005 if (new_shadow
== VM_OBJECT_NULL
) {
10008 vm_object_lock_shared(new_shadow
);
10009 vm_object_unlock(new_object
);
10010 new_object
= new_shadow
;
10012 if (new_object
!= VM_OBJECT_NULL
) {
10013 if (!new_object
->internal
) {
10015 * The new mapping is backed
10016 * by an external object. We
10017 * don't want malloc'ed memory
10018 * to be replaced with such a
10019 * non-anonymous mapping, so
10020 * let's go off the optimized
10023 vm_map_copy_overwrite_aligned_src_not_internal
++;
10024 vm_object_unlock(new_object
);
10027 #if !CONFIG_EMBEDDED
10028 if (new_object
->true_share
||
10029 new_object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) {
10031 * Same if there's a "true_share"
10032 * object in the shadow chain, or
10033 * an object with a non-default
10034 * (SYMMETRIC) copy strategy.
10036 vm_map_copy_overwrite_aligned_src_not_symmetric
++;
10037 vm_object_unlock(new_object
);
10040 #endif /* !CONFIG_EMBEDDED */
10041 vm_object_unlock(new_object
);
10044 * The new mapping is still backed by
10045 * anonymous (internal) memory, so it's
10046 * OK to substitute it for the original
10047 * malloc() mapping.
10051 if (old_object
!= VM_OBJECT_NULL
) {
10052 if (entry
->is_sub_map
) {
10053 if (entry
->use_pmap
) {
10054 #ifndef NO_NESTED_PMAP
10055 pmap_unnest(dst_map
->pmap
,
10056 (addr64_t
)entry
->vme_start
,
10057 entry
->vme_end
- entry
->vme_start
);
10058 #endif /* NO_NESTED_PMAP */
10059 if (dst_map
->mapped_in_other_pmaps
) {
10060 /* clean up parent */
10062 vm_map_submap_pmap_clean(
10063 dst_map
, entry
->vme_start
,
10066 VME_OFFSET(entry
));
10069 vm_map_submap_pmap_clean(
10070 dst_map
, entry
->vme_start
,
10073 VME_OFFSET(entry
));
10075 vm_map_deallocate(VME_SUBMAP(entry
));
10077 if (dst_map
->mapped_in_other_pmaps
) {
10078 vm_object_pmap_protect_options(
10082 - entry
->vme_start
,
10086 PMAP_OPTIONS_REMOVE
);
10088 pmap_remove_options(
10090 (addr64_t
)(entry
->vme_start
),
10091 (addr64_t
)(entry
->vme_end
),
10092 PMAP_OPTIONS_REMOVE
);
10094 vm_object_deallocate(old_object
);
10098 if (entry
->iokit_acct
) {
10099 /* keep using iokit accounting */
10100 entry
->use_pmap
= FALSE
;
10102 /* use pmap accounting */
10103 entry
->use_pmap
= TRUE
;
10105 entry
->is_sub_map
= FALSE
;
10106 VME_OBJECT_SET(entry
, VME_OBJECT(copy_entry
));
10107 object
= VME_OBJECT(entry
);
10108 entry
->needs_copy
= copy_entry
->needs_copy
;
10109 entry
->wired_count
= 0;
10110 entry
->user_wired_count
= 0;
10111 offset
= VME_OFFSET(copy_entry
);
10112 VME_OFFSET_SET(entry
, offset
);
10114 vm_map_copy_entry_unlink(copy
, copy_entry
);
10115 vm_map_copy_entry_dispose(copy
, copy_entry
);
10118 * we could try to push pages into the pmap at this point, BUT
10119 * this optimization only saved on average 2 us per page if ALL
10120 * the pages in the source were currently mapped
10121 * and ALL the pages in the dest were touched, if there were fewer
10122 * than 2/3 of the pages touched, this optimization actually cost more cycles
10123 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10127 * Set up for the next iteration. The map
10128 * has not been unlocked, so the next
10129 * address should be at the end of this
10130 * entry, and the next map entry should be
10131 * the one following it.
10134 start
= tmp_entry
->vme_end
;
10135 tmp_entry
= tmp_entry
->vme_next
;
10137 vm_map_version_t version
;
10138 vm_object_t dst_object
;
10139 vm_object_offset_t dst_offset
;
10143 if (entry
->needs_copy
) {
10144 VME_OBJECT_SHADOW(entry
,
10146 entry
->vme_start
));
10147 entry
->needs_copy
= FALSE
;
10150 dst_object
= VME_OBJECT(entry
);
10151 dst_offset
= VME_OFFSET(entry
);
10154 * Take an object reference, and record
10155 * the map version information so that the
10156 * map can be safely unlocked.
10159 if (dst_object
== VM_OBJECT_NULL
) {
10161 * We would usually have just taken the
10162 * optimized path above if the destination
10163 * object has not been allocated yet. But we
10164 * now disable that optimization if the copy
10165 * entry's object is not backed by anonymous
10166 * memory to avoid replacing malloc'ed
10167 * (i.e. re-usable) anonymous memory with a
10168 * not-so-anonymous mapping.
10169 * So we have to handle this case here and
10170 * allocate a new VM object for this map entry.
10172 dst_object
= vm_object_allocate(
10173 entry
->vme_end
- entry
->vme_start
);
10175 VME_OBJECT_SET(entry
, dst_object
);
10176 VME_OFFSET_SET(entry
, dst_offset
);
10177 assert(entry
->use_pmap
);
10180 vm_object_reference(dst_object
);
10182 /* account for unlock bumping up timestamp */
10183 version
.main_timestamp
= dst_map
->timestamp
+ 1;
10185 vm_map_unlock(dst_map
);
10188 * Copy as much as possible in one pass
10193 VME_OBJECT(copy_entry
),
10194 VME_OFFSET(copy_entry
),
10203 * Release the object reference
10206 vm_object_deallocate(dst_object
);
10209 * If a hard error occurred, return it now
10212 if (r
!= KERN_SUCCESS
) {
10216 if (copy_size
!= 0) {
10218 * Dispose of the copied region
10221 vm_map_copy_clip_end(copy
, copy_entry
,
10222 copy_entry
->vme_start
+ copy_size
);
10223 vm_map_copy_entry_unlink(copy
, copy_entry
);
10224 vm_object_deallocate(VME_OBJECT(copy_entry
));
10225 vm_map_copy_entry_dispose(copy
, copy_entry
);
10229 * Pick up in the destination map where we left off.
10231 * Use the version information to avoid a lookup
10232 * in the normal case.
10235 start
+= copy_size
;
10236 vm_map_lock(dst_map
);
10237 if (version
.main_timestamp
== dst_map
->timestamp
&&
10239 /* We can safely use saved tmp_entry value */
10241 if (tmp_entry
->map_aligned
&&
10242 !VM_MAP_PAGE_ALIGNED(
10244 VM_MAP_PAGE_MASK(dst_map
))) {
10245 /* no longer map-aligned */
10246 tmp_entry
->map_aligned
= FALSE
;
10248 vm_map_clip_end(dst_map
, tmp_entry
, start
);
10249 tmp_entry
= tmp_entry
->vme_next
;
10251 /* Must do lookup of tmp_entry */
10253 if (!vm_map_lookup_entry(dst_map
, start
, &tmp_entry
)) {
10254 vm_map_unlock(dst_map
);
10255 return KERN_INVALID_ADDRESS
;
10257 if (tmp_entry
->map_aligned
&&
10258 !VM_MAP_PAGE_ALIGNED(
10260 VM_MAP_PAGE_MASK(dst_map
))) {
10261 /* no longer map-aligned */
10262 tmp_entry
->map_aligned
= FALSE
;
10264 vm_map_clip_start(dst_map
, tmp_entry
, start
);
10269 return KERN_SUCCESS
;
10270 }/* vm_map_copy_overwrite_aligned */
10273 * Routine: vm_map_copyin_kernel_buffer [internal use only]
10276 * Copy in data to a kernel buffer from space in the
10277 * source map. The original space may be optionally
10280 * If successful, returns a new copy object.
10282 static kern_return_t
10283 vm_map_copyin_kernel_buffer(
10285 vm_map_offset_t src_addr
,
10287 boolean_t src_destroy
,
10288 vm_map_copy_t
*copy_result
)
10291 vm_map_copy_t copy
;
10292 vm_size_t kalloc_size
;
10294 if (len
> msg_ool_size_small
) {
10295 return KERN_INVALID_ARGUMENT
;
10298 kalloc_size
= (vm_size_t
)(cpy_kdata_hdr_sz
+ len
);
10300 copy
= (vm_map_copy_t
)kalloc(kalloc_size
);
10301 if (copy
== VM_MAP_COPY_NULL
) {
10302 return KERN_RESOURCE_SHORTAGE
;
10304 copy
->type
= VM_MAP_COPY_KERNEL_BUFFER
;
10308 kr
= copyinmap(src_map
, src_addr
, copy
->cpy_kdata
, (vm_size_t
)len
);
10309 if (kr
!= KERN_SUCCESS
) {
10310 kfree(copy
, kalloc_size
);
10314 (void) vm_map_remove(
10316 vm_map_trunc_page(src_addr
,
10317 VM_MAP_PAGE_MASK(src_map
)),
10318 vm_map_round_page(src_addr
+ len
,
10319 VM_MAP_PAGE_MASK(src_map
)),
10320 (VM_MAP_REMOVE_INTERRUPTIBLE
|
10321 VM_MAP_REMOVE_WAIT_FOR_KWIRE
|
10322 ((src_map
== kernel_map
) ? VM_MAP_REMOVE_KUNWIRE
: VM_MAP_REMOVE_NO_FLAGS
)));
10324 *copy_result
= copy
;
10325 return KERN_SUCCESS
;
10329 * Routine: vm_map_copyout_kernel_buffer [internal use only]
10332 * Copy out data from a kernel buffer into space in the
10333 * destination map. The space may be otpionally dynamically
10336 * If successful, consumes the copy object.
10337 * Otherwise, the caller is responsible for it.
10339 static int vm_map_copyout_kernel_buffer_failures
= 0;
10340 static kern_return_t
10341 vm_map_copyout_kernel_buffer(
10343 vm_map_address_t
*addr
, /* IN/OUT */
10344 vm_map_copy_t copy
,
10345 vm_map_size_t copy_size
,
10346 boolean_t overwrite
,
10347 boolean_t consume_on_success
)
10349 kern_return_t kr
= KERN_SUCCESS
;
10350 thread_t thread
= current_thread();
10352 assert(copy
->size
== copy_size
);
10355 * check for corrupted vm_map_copy structure
10357 if (copy_size
> msg_ool_size_small
|| copy
->offset
) {
10358 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10359 (long long)copy
->size
, (long long)copy
->offset
);
10364 * Allocate space in the target map for the data
10367 kr
= vm_map_enter(map
,
10369 vm_map_round_page(copy_size
,
10370 VM_MAP_PAGE_MASK(map
)),
10371 (vm_map_offset_t
) 0,
10373 VM_MAP_KERNEL_FLAGS_NONE
,
10374 VM_KERN_MEMORY_NONE
,
10376 (vm_object_offset_t
) 0,
10380 VM_INHERIT_DEFAULT
);
10381 if (kr
!= KERN_SUCCESS
) {
10385 if (map
->pmap
== kernel_pmap
) {
10386 kasan_notify_address(*addr
, copy
->size
);
10392 * Copyout the data from the kernel buffer to the target map.
10394 if (thread
->map
== map
) {
10396 * If the target map is the current map, just do
10399 assert((vm_size_t
)copy_size
== copy_size
);
10400 if (copyout(copy
->cpy_kdata
, *addr
, (vm_size_t
)copy_size
)) {
10401 kr
= KERN_INVALID_ADDRESS
;
10407 * If the target map is another map, assume the
10408 * target's address space identity for the duration
10411 vm_map_reference(map
);
10412 oldmap
= vm_map_switch(map
);
10414 assert((vm_size_t
)copy_size
== copy_size
);
10415 if (copyout(copy
->cpy_kdata
, *addr
, (vm_size_t
)copy_size
)) {
10416 vm_map_copyout_kernel_buffer_failures
++;
10417 kr
= KERN_INVALID_ADDRESS
;
10420 (void) vm_map_switch(oldmap
);
10421 vm_map_deallocate(map
);
10424 if (kr
!= KERN_SUCCESS
) {
10425 /* the copy failed, clean up */
10428 * Deallocate the space we allocated in the target map.
10430 (void) vm_map_remove(
10432 vm_map_trunc_page(*addr
,
10433 VM_MAP_PAGE_MASK(map
)),
10434 vm_map_round_page((*addr
+
10435 vm_map_round_page(copy_size
,
10436 VM_MAP_PAGE_MASK(map
))),
10437 VM_MAP_PAGE_MASK(map
)),
10438 VM_MAP_REMOVE_NO_FLAGS
);
10442 /* copy was successful, dicard the copy structure */
10443 if (consume_on_success
) {
10444 kfree(copy
, copy_size
+ cpy_kdata_hdr_sz
);
10452 * Routine: vm_map_copy_insert [internal use only]
10455 * Link a copy chain ("copy") into a map at the
10456 * specified location (after "where").
10458 * The copy chain is destroyed.
10461 vm_map_copy_insert(
10463 vm_map_entry_t after_where
,
10464 vm_map_copy_t copy
)
10466 vm_map_entry_t entry
;
10468 while (vm_map_copy_first_entry(copy
) != vm_map_copy_to_entry(copy
)) {
10469 entry
= vm_map_copy_first_entry(copy
);
10470 vm_map_copy_entry_unlink(copy
, entry
);
10471 vm_map_store_entry_link(map
, after_where
, entry
,
10472 VM_MAP_KERNEL_FLAGS_NONE
);
10473 after_where
= entry
;
10475 zfree(vm_map_copy_zone
, copy
);
10481 vm_map_entry_t where
,
10482 vm_map_copy_t copy
,
10483 vm_map_offset_t adjustment
,
10484 vm_prot_t cur_prot
,
10485 vm_prot_t max_prot
,
10486 vm_inherit_t inheritance
)
10488 vm_map_entry_t copy_entry
, new_entry
;
10490 for (copy_entry
= vm_map_copy_first_entry(copy
);
10491 copy_entry
!= vm_map_copy_to_entry(copy
);
10492 copy_entry
= copy_entry
->vme_next
) {
10493 /* get a new VM map entry for the map */
10494 new_entry
= vm_map_entry_create(map
,
10495 !map
->hdr
.entries_pageable
);
10496 /* copy the "copy entry" to the new entry */
10497 vm_map_entry_copy(new_entry
, copy_entry
);
10498 /* adjust "start" and "end" */
10499 new_entry
->vme_start
+= adjustment
;
10500 new_entry
->vme_end
+= adjustment
;
10501 /* clear some attributes */
10502 new_entry
->inheritance
= inheritance
;
10503 new_entry
->protection
= cur_prot
;
10504 new_entry
->max_protection
= max_prot
;
10505 new_entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
10506 /* take an extra reference on the entry's "object" */
10507 if (new_entry
->is_sub_map
) {
10508 assert(!new_entry
->use_pmap
); /* not nested */
10509 vm_map_lock(VME_SUBMAP(new_entry
));
10510 vm_map_reference(VME_SUBMAP(new_entry
));
10511 vm_map_unlock(VME_SUBMAP(new_entry
));
10513 vm_object_reference(VME_OBJECT(new_entry
));
10515 /* insert the new entry in the map */
10516 vm_map_store_entry_link(map
, where
, new_entry
,
10517 VM_MAP_KERNEL_FLAGS_NONE
);
10518 /* continue inserting the "copy entries" after the new entry */
10525 * Returns true if *size matches (or is in the range of) copy->size.
10526 * Upon returning true, the *size field is updated with the actual size of the
10527 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10530 vm_map_copy_validate_size(
10532 vm_map_copy_t copy
,
10533 vm_map_size_t
*size
)
10535 if (copy
== VM_MAP_COPY_NULL
) {
10538 vm_map_size_t copy_sz
= copy
->size
;
10539 vm_map_size_t sz
= *size
;
10540 switch (copy
->type
) {
10541 case VM_MAP_COPY_OBJECT
:
10542 case VM_MAP_COPY_KERNEL_BUFFER
:
10543 if (sz
== copy_sz
) {
10547 case VM_MAP_COPY_ENTRY_LIST
:
10549 * potential page-size rounding prevents us from exactly
10550 * validating this flavor of vm_map_copy, but we can at least
10551 * assert that it's within a range.
10553 if (copy_sz
>= sz
&&
10554 copy_sz
<= vm_map_round_page(sz
, VM_MAP_PAGE_MASK(dst_map
))) {
10566 * Routine: vm_map_copyout_size
10569 * Copy out a copy chain ("copy") into newly-allocated
10570 * space in the destination map. Uses a prevalidated
10571 * size for the copy object (vm_map_copy_validate_size).
10573 * If successful, consumes the copy object.
10574 * Otherwise, the caller is responsible for it.
10577 vm_map_copyout_size(
10579 vm_map_address_t
*dst_addr
, /* OUT */
10580 vm_map_copy_t copy
,
10581 vm_map_size_t copy_size
)
10583 return vm_map_copyout_internal(dst_map
, dst_addr
, copy
, copy_size
,
10584 TRUE
, /* consume_on_success */
10587 VM_INHERIT_DEFAULT
);
10591 * Routine: vm_map_copyout
10594 * Copy out a copy chain ("copy") into newly-allocated
10595 * space in the destination map.
10597 * If successful, consumes the copy object.
10598 * Otherwise, the caller is responsible for it.
10603 vm_map_address_t
*dst_addr
, /* OUT */
10604 vm_map_copy_t copy
)
10606 return vm_map_copyout_internal(dst_map
, dst_addr
, copy
, copy
? copy
->size
: 0,
10607 TRUE
, /* consume_on_success */
10610 VM_INHERIT_DEFAULT
);
10614 vm_map_copyout_internal(
10616 vm_map_address_t
*dst_addr
, /* OUT */
10617 vm_map_copy_t copy
,
10618 vm_map_size_t copy_size
,
10619 boolean_t consume_on_success
,
10620 vm_prot_t cur_protection
,
10621 vm_prot_t max_protection
,
10622 vm_inherit_t inheritance
)
10624 vm_map_size_t size
;
10625 vm_map_size_t adjustment
;
10626 vm_map_offset_t start
;
10627 vm_object_offset_t vm_copy_start
;
10628 vm_map_entry_t last
;
10629 vm_map_entry_t entry
;
10630 vm_map_entry_t hole_entry
;
10633 * Check for null copy object.
10636 if (copy
== VM_MAP_COPY_NULL
) {
10638 return KERN_SUCCESS
;
10641 if (copy
->size
!= copy_size
) {
10643 return KERN_FAILURE
;
10647 * Check for special copy object, created
10648 * by vm_map_copyin_object.
10651 if (copy
->type
== VM_MAP_COPY_OBJECT
) {
10652 vm_object_t object
= copy
->cpy_object
;
10654 vm_object_offset_t offset
;
10656 offset
= vm_object_trunc_page(copy
->offset
);
10657 size
= vm_map_round_page((copy_size
+
10658 (vm_map_size_t
)(copy
->offset
-
10660 VM_MAP_PAGE_MASK(dst_map
));
10662 kr
= vm_map_enter(dst_map
, dst_addr
, size
,
10663 (vm_map_offset_t
) 0, VM_FLAGS_ANYWHERE
,
10664 VM_MAP_KERNEL_FLAGS_NONE
,
10665 VM_KERN_MEMORY_NONE
,
10666 object
, offset
, FALSE
,
10667 VM_PROT_DEFAULT
, VM_PROT_ALL
,
10668 VM_INHERIT_DEFAULT
);
10669 if (kr
!= KERN_SUCCESS
) {
10672 /* Account for non-pagealigned copy object */
10673 *dst_addr
+= (vm_map_offset_t
)(copy
->offset
- offset
);
10674 if (consume_on_success
) {
10675 zfree(vm_map_copy_zone
, copy
);
10677 return KERN_SUCCESS
;
10681 * Check for special kernel buffer allocated
10682 * by new_ipc_kmsg_copyin.
10685 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
10686 return vm_map_copyout_kernel_buffer(dst_map
, dst_addr
,
10687 copy
, copy_size
, FALSE
,
10688 consume_on_success
);
10693 * Find space for the data
10696 vm_copy_start
= vm_map_trunc_page((vm_map_size_t
)copy
->offset
,
10697 VM_MAP_COPY_PAGE_MASK(copy
));
10698 size
= vm_map_round_page((vm_map_size_t
)copy
->offset
+ copy_size
,
10699 VM_MAP_COPY_PAGE_MASK(copy
))
10705 vm_map_lock(dst_map
);
10706 if (dst_map
->disable_vmentry_reuse
== TRUE
) {
10707 VM_MAP_HIGHEST_ENTRY(dst_map
, entry
, start
);
10710 if (dst_map
->holelistenabled
) {
10711 hole_entry
= CAST_TO_VM_MAP_ENTRY(dst_map
->holes_list
);
10713 if (hole_entry
== NULL
) {
10715 * No more space in the map?
10717 vm_map_unlock(dst_map
);
10718 return KERN_NO_SPACE
;
10722 start
= last
->vme_start
;
10724 assert(first_free_is_valid(dst_map
));
10725 start
= ((last
= dst_map
->first_free
) == vm_map_to_entry(dst_map
)) ?
10726 vm_map_min(dst_map
) : last
->vme_end
;
10728 start
= vm_map_round_page(start
,
10729 VM_MAP_PAGE_MASK(dst_map
));
10733 vm_map_entry_t next
= last
->vme_next
;
10734 vm_map_offset_t end
= start
+ size
;
10736 if ((end
> dst_map
->max_offset
) || (end
< start
)) {
10737 if (dst_map
->wait_for_space
) {
10738 if (size
<= (dst_map
->max_offset
- dst_map
->min_offset
)) {
10739 assert_wait((event_t
) dst_map
,
10740 THREAD_INTERRUPTIBLE
);
10741 vm_map_unlock(dst_map
);
10742 thread_block(THREAD_CONTINUE_NULL
);
10746 vm_map_unlock(dst_map
);
10747 return KERN_NO_SPACE
;
10750 if (dst_map
->holelistenabled
) {
10751 if (last
->vme_end
>= end
) {
10756 * If there are no more entries, we must win.
10760 * If there is another entry, it must be
10761 * after the end of the potential new region.
10764 if (next
== vm_map_to_entry(dst_map
)) {
10768 if (next
->vme_start
>= end
) {
10775 if (dst_map
->holelistenabled
) {
10776 if (last
== CAST_TO_VM_MAP_ENTRY(dst_map
->holes_list
)) {
10780 vm_map_unlock(dst_map
);
10781 return KERN_NO_SPACE
;
10783 start
= last
->vme_start
;
10785 start
= last
->vme_end
;
10787 start
= vm_map_round_page(start
,
10788 VM_MAP_PAGE_MASK(dst_map
));
10791 if (dst_map
->holelistenabled
) {
10792 if (vm_map_lookup_entry(dst_map
, last
->vme_start
, &last
)) {
10793 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last
, (unsigned long long)last
->vme_start
);
10798 adjustment
= start
- vm_copy_start
;
10799 if (!consume_on_success
) {
10801 * We're not allowed to consume "copy", so we'll have to
10802 * copy its map entries into the destination map below.
10803 * No need to re-allocate map entries from the correct
10804 * (pageable or not) zone, since we'll get new map entries
10805 * during the transfer.
10806 * We'll also adjust the map entries's "start" and "end"
10807 * during the transfer, to keep "copy"'s entries consistent
10808 * with its "offset".
10810 goto after_adjustments
;
10814 * Since we're going to just drop the map
10815 * entries from the copy into the destination
10816 * map, they must come from the same pool.
10819 if (copy
->cpy_hdr
.entries_pageable
!= dst_map
->hdr
.entries_pageable
) {
10821 * Mismatches occur when dealing with the default
10825 vm_map_entry_t next
, new;
10828 * Find the zone that the copies were allocated from
10831 entry
= vm_map_copy_first_entry(copy
);
10834 * Reinitialize the copy so that vm_map_copy_entry_link
10837 vm_map_store_copy_reset(copy
, entry
);
10838 copy
->cpy_hdr
.entries_pageable
= dst_map
->hdr
.entries_pageable
;
10843 while (entry
!= vm_map_copy_to_entry(copy
)) {
10844 new = vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
10845 vm_map_entry_copy_full(new, entry
);
10846 new->vme_no_copy_on_read
= FALSE
;
10847 assert(!new->iokit_acct
);
10848 if (new->is_sub_map
) {
10849 /* clr address space specifics */
10850 new->use_pmap
= FALSE
;
10852 vm_map_copy_entry_link(copy
,
10853 vm_map_copy_last_entry(copy
),
10855 next
= entry
->vme_next
;
10856 old_zone
= entry
->from_reserved_zone
? vm_map_entry_reserved_zone
: vm_map_entry_zone
;
10857 zfree(old_zone
, entry
);
10863 * Adjust the addresses in the copy chain, and
10864 * reset the region attributes.
10867 for (entry
= vm_map_copy_first_entry(copy
);
10868 entry
!= vm_map_copy_to_entry(copy
);
10869 entry
= entry
->vme_next
) {
10870 if (VM_MAP_PAGE_SHIFT(dst_map
) == PAGE_SHIFT
) {
10872 * We're injecting this copy entry into a map that
10873 * has the standard page alignment, so clear
10874 * "map_aligned" (which might have been inherited
10875 * from the original map entry).
10877 entry
->map_aligned
= FALSE
;
10880 entry
->vme_start
+= adjustment
;
10881 entry
->vme_end
+= adjustment
;
10883 if (entry
->map_aligned
) {
10884 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_start
,
10885 VM_MAP_PAGE_MASK(dst_map
)));
10886 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_end
,
10887 VM_MAP_PAGE_MASK(dst_map
)));
10890 entry
->inheritance
= VM_INHERIT_DEFAULT
;
10891 entry
->protection
= VM_PROT_DEFAULT
;
10892 entry
->max_protection
= VM_PROT_ALL
;
10893 entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
10896 * If the entry is now wired,
10897 * map the pages into the destination map.
10899 if (entry
->wired_count
!= 0) {
10900 vm_map_offset_t va
;
10901 vm_object_offset_t offset
;
10902 vm_object_t object
;
10906 object
= VME_OBJECT(entry
);
10907 offset
= VME_OFFSET(entry
);
10908 va
= entry
->vme_start
;
10910 pmap_pageable(dst_map
->pmap
,
10915 while (va
< entry
->vme_end
) {
10917 struct vm_object_fault_info fault_info
= {};
10920 * Look up the page in the object.
10921 * Assert that the page will be found in the
10924 * the object was newly created by
10925 * vm_object_copy_slowly, and has
10926 * copies of all of the pages from
10927 * the source object
10929 * the object was moved from the old
10930 * map entry; because the old map
10931 * entry was wired, all of the pages
10932 * were in the top-level object.
10933 * (XXX not true if we wire pages for
10936 vm_object_lock(object
);
10938 m
= vm_page_lookup(object
, offset
);
10939 if (m
== VM_PAGE_NULL
|| !VM_PAGE_WIRED(m
) ||
10941 panic("vm_map_copyout: wiring %p", m
);
10944 prot
= entry
->protection
;
10946 if (override_nx(dst_map
, VME_ALIAS(entry
)) &&
10948 prot
|= VM_PROT_EXECUTE
;
10951 type_of_fault
= DBG_CACHE_HIT_FAULT
;
10953 fault_info
.user_tag
= VME_ALIAS(entry
);
10954 fault_info
.pmap_options
= 0;
10955 if (entry
->iokit_acct
||
10956 (!entry
->is_sub_map
&& !entry
->use_pmap
)) {
10957 fault_info
.pmap_options
|= PMAP_OPTIONS_ALT_ACCT
;
10966 FALSE
, /* change_wiring */
10967 VM_KERN_MEMORY_NONE
, /* tag - not wiring */
10969 NULL
, /* need_retry */
10972 vm_object_unlock(object
);
10974 offset
+= PAGE_SIZE_64
;
10983 * Correct the page alignment for the result
10986 *dst_addr
= start
+ (copy
->offset
- vm_copy_start
);
10989 kasan_notify_address(*dst_addr
, size
);
10993 * Update the hints and the map size
10996 if (consume_on_success
) {
10997 SAVE_HINT_MAP_WRITE(dst_map
, vm_map_copy_last_entry(copy
));
10999 SAVE_HINT_MAP_WRITE(dst_map
, last
);
11002 dst_map
->size
+= size
;
11008 if (consume_on_success
) {
11009 vm_map_copy_insert(dst_map
, last
, copy
);
11011 vm_map_copy_remap(dst_map
, last
, copy
, adjustment
,
11012 cur_protection
, max_protection
,
11016 vm_map_unlock(dst_map
);
11019 * XXX If wiring_required, call vm_map_pageable
11022 return KERN_SUCCESS
;
11026 * Routine: vm_map_copyin
11029 * see vm_map_copyin_common. Exported via Unsupported.exports.
11033 #undef vm_map_copyin
11038 vm_map_address_t src_addr
,
11040 boolean_t src_destroy
,
11041 vm_map_copy_t
*copy_result
) /* OUT */
11043 return vm_map_copyin_common(src_map
, src_addr
, len
, src_destroy
,
11044 FALSE
, copy_result
, FALSE
);
11048 * Routine: vm_map_copyin_common
11051 * Copy the specified region (src_addr, len) from the
11052 * source address space (src_map), possibly removing
11053 * the region from the source address space (src_destroy).
11056 * A vm_map_copy_t object (copy_result), suitable for
11057 * insertion into another address space (using vm_map_copyout),
11058 * copying over another address space region (using
11059 * vm_map_copy_overwrite). If the copy is unused, it
11060 * should be destroyed (using vm_map_copy_discard).
11062 * In/out conditions:
11063 * The source map should not be locked on entry.
11066 typedef struct submap_map
{
11067 vm_map_t parent_map
;
11068 vm_map_offset_t base_start
;
11069 vm_map_offset_t base_end
;
11070 vm_map_size_t base_len
;
11071 struct submap_map
*next
;
11075 vm_map_copyin_common(
11077 vm_map_address_t src_addr
,
11079 boolean_t src_destroy
,
11080 __unused boolean_t src_volatile
,
11081 vm_map_copy_t
*copy_result
, /* OUT */
11082 boolean_t use_maxprot
)
11088 flags
|= VM_MAP_COPYIN_SRC_DESTROY
;
11091 flags
|= VM_MAP_COPYIN_USE_MAXPROT
;
11093 return vm_map_copyin_internal(src_map
,
11100 vm_map_copyin_internal(
11102 vm_map_address_t src_addr
,
11105 vm_map_copy_t
*copy_result
) /* OUT */
11107 vm_map_entry_t tmp_entry
; /* Result of last map lookup --
11108 * in multi-level lookup, this
11109 * entry contains the actual
11110 * vm_object/offset.
11112 vm_map_entry_t new_entry
= VM_MAP_ENTRY_NULL
; /* Map entry for copy */
11114 vm_map_offset_t src_start
; /* Start of current entry --
11115 * where copy is taking place now
11117 vm_map_offset_t src_end
; /* End of entire region to be
11119 vm_map_offset_t src_base
;
11120 vm_map_t base_map
= src_map
;
11121 boolean_t map_share
= FALSE
;
11122 submap_map_t
*parent_maps
= NULL
;
11124 vm_map_copy_t copy
; /* Resulting copy */
11125 vm_map_address_t copy_addr
;
11126 vm_map_size_t copy_size
;
11127 boolean_t src_destroy
;
11128 boolean_t use_maxprot
;
11129 boolean_t preserve_purgeable
;
11130 boolean_t entry_was_shared
;
11131 vm_map_entry_t saved_src_entry
;
11133 if (flags
& ~VM_MAP_COPYIN_ALL_FLAGS
) {
11134 return KERN_INVALID_ARGUMENT
;
11137 src_destroy
= (flags
& VM_MAP_COPYIN_SRC_DESTROY
) ? TRUE
: FALSE
;
11138 use_maxprot
= (flags
& VM_MAP_COPYIN_USE_MAXPROT
) ? TRUE
: FALSE
;
11139 preserve_purgeable
=
11140 (flags
& VM_MAP_COPYIN_PRESERVE_PURGEABLE
) ? TRUE
: FALSE
;
11143 * Check for copies of zero bytes.
11147 *copy_result
= VM_MAP_COPY_NULL
;
11148 return KERN_SUCCESS
;
11152 * Check that the end address doesn't overflow
11154 src_end
= src_addr
+ len
;
11155 if (src_end
< src_addr
) {
11156 return KERN_INVALID_ADDRESS
;
11160 * Compute (page aligned) start and end of region
11162 src_start
= vm_map_trunc_page(src_addr
,
11163 VM_MAP_PAGE_MASK(src_map
));
11164 src_end
= vm_map_round_page(src_end
,
11165 VM_MAP_PAGE_MASK(src_map
));
11168 * If the copy is sufficiently small, use a kernel buffer instead
11169 * of making a virtual copy. The theory being that the cost of
11170 * setting up VM (and taking C-O-W faults) dominates the copy costs
11171 * for small regions.
11173 if ((len
< msg_ool_size_small
) &&
11175 !preserve_purgeable
&&
11176 !(flags
& VM_MAP_COPYIN_ENTRY_LIST
) &&
11178 * Since the "msg_ool_size_small" threshold was increased and
11179 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11180 * address space limits, we revert to doing a virtual copy if the
11181 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11182 * of the commpage would now fail when it used to work.
11184 (src_start
>= vm_map_min(src_map
) &&
11185 src_start
< vm_map_max(src_map
) &&
11186 src_end
>= vm_map_min(src_map
) &&
11187 src_end
< vm_map_max(src_map
))) {
11188 return vm_map_copyin_kernel_buffer(src_map
, src_addr
, len
,
11189 src_destroy
, copy_result
);
11193 * Allocate a header element for the list.
11195 * Use the start and end in the header to
11196 * remember the endpoints prior to rounding.
11199 copy
= vm_map_copy_allocate();
11200 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
11201 copy
->cpy_hdr
.entries_pageable
= TRUE
;
11203 copy
->cpy_hdr
.page_shift
= src_map
->hdr
.page_shift
;
11206 * The copy entries can be broken down for a variety of reasons,
11207 * so we can't guarantee that they will remain map-aligned...
11208 * Will need to adjust the first copy_entry's "vme_start" and
11209 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
11210 * rather than the original map's alignment.
11212 copy
->cpy_hdr
.page_shift
= PAGE_SHIFT
;
11215 vm_map_store_init( &(copy
->cpy_hdr
));
11217 copy
->offset
= src_addr
;
11220 new_entry
= vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
11222 #define RETURN(x) \
11224 vm_map_unlock(src_map); \
11225 if(src_map != base_map) \
11226 vm_map_deallocate(src_map); \
11227 if (new_entry != VM_MAP_ENTRY_NULL) \
11228 vm_map_copy_entry_dispose(copy,new_entry); \
11229 vm_map_copy_discard(copy); \
11231 submap_map_t *_ptr; \
11233 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11234 parent_maps=parent_maps->next; \
11235 if (_ptr->parent_map != base_map) \
11236 vm_map_deallocate(_ptr->parent_map); \
11237 kfree(_ptr, sizeof(submap_map_t)); \
11244 * Find the beginning of the region.
11247 vm_map_lock(src_map
);
11250 * Lookup the original "src_addr" rather than the truncated
11251 * "src_start", in case "src_start" falls in a non-map-aligned
11252 * map entry *before* the map entry that contains "src_addr"...
11254 if (!vm_map_lookup_entry(src_map
, src_addr
, &tmp_entry
)) {
11255 RETURN(KERN_INVALID_ADDRESS
);
11257 if (!tmp_entry
->is_sub_map
) {
11259 * ... but clip to the map-rounded "src_start" rather than
11260 * "src_addr" to preserve map-alignment. We'll adjust the
11261 * first copy entry at the end, if needed.
11263 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
11265 if (src_start
< tmp_entry
->vme_start
) {
11267 * Move "src_start" up to the start of the
11268 * first map entry to copy.
11270 src_start
= tmp_entry
->vme_start
;
11272 /* set for later submap fix-up */
11273 copy_addr
= src_start
;
11276 * Go through entries until we get to the end.
11280 vm_map_entry_t src_entry
= tmp_entry
; /* Top-level entry */
11281 vm_map_size_t src_size
; /* Size of source
11282 * map entry (in both
11286 vm_object_t src_object
; /* Object to copy */
11287 vm_object_offset_t src_offset
;
11289 boolean_t src_needs_copy
; /* Should source map
11290 * be made read-only
11291 * for copy-on-write?
11294 boolean_t new_entry_needs_copy
; /* Will new entry be COW? */
11296 boolean_t was_wired
; /* Was source wired? */
11297 vm_map_version_t version
; /* Version before locks
11298 * dropped to make copy
11300 kern_return_t result
; /* Return value from
11301 * copy_strategically.
11303 while (tmp_entry
->is_sub_map
) {
11304 vm_map_size_t submap_len
;
11307 ptr
= (submap_map_t
*)kalloc(sizeof(submap_map_t
));
11308 ptr
->next
= parent_maps
;
11310 ptr
->parent_map
= src_map
;
11311 ptr
->base_start
= src_start
;
11312 ptr
->base_end
= src_end
;
11313 submap_len
= tmp_entry
->vme_end
- src_start
;
11314 if (submap_len
> (src_end
- src_start
)) {
11315 submap_len
= src_end
- src_start
;
11317 ptr
->base_len
= submap_len
;
11319 src_start
-= tmp_entry
->vme_start
;
11320 src_start
+= VME_OFFSET(tmp_entry
);
11321 src_end
= src_start
+ submap_len
;
11322 src_map
= VME_SUBMAP(tmp_entry
);
11323 vm_map_lock(src_map
);
11324 /* keep an outstanding reference for all maps in */
11325 /* the parents tree except the base map */
11326 vm_map_reference(src_map
);
11327 vm_map_unlock(ptr
->parent_map
);
11328 if (!vm_map_lookup_entry(
11329 src_map
, src_start
, &tmp_entry
)) {
11330 RETURN(KERN_INVALID_ADDRESS
);
11333 if (!tmp_entry
->is_sub_map
) {
11334 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
11336 src_entry
= tmp_entry
;
11338 /* we are now in the lowest level submap... */
11340 if ((VME_OBJECT(tmp_entry
) != VM_OBJECT_NULL
) &&
11341 (VME_OBJECT(tmp_entry
)->phys_contiguous
)) {
11342 /* This is not, supported for now.In future */
11343 /* we will need to detect the phys_contig */
11344 /* condition and then upgrade copy_slowly */
11345 /* to do physical copy from the device mem */
11346 /* based object. We can piggy-back off of */
11347 /* the was wired boolean to set-up the */
11348 /* proper handling */
11349 RETURN(KERN_PROTECTION_FAILURE
);
11352 * Create a new address map entry to hold the result.
11353 * Fill in the fields from the appropriate source entries.
11354 * We must unlock the source map to do this if we need
11355 * to allocate a map entry.
11357 if (new_entry
== VM_MAP_ENTRY_NULL
) {
11358 version
.main_timestamp
= src_map
->timestamp
;
11359 vm_map_unlock(src_map
);
11361 new_entry
= vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
11363 vm_map_lock(src_map
);
11364 if ((version
.main_timestamp
+ 1) != src_map
->timestamp
) {
11365 if (!vm_map_lookup_entry(src_map
, src_start
,
11367 RETURN(KERN_INVALID_ADDRESS
);
11369 if (!tmp_entry
->is_sub_map
) {
11370 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
11372 continue; /* restart w/ new tmp_entry */
11377 * Verify that the region can be read.
11379 if (((src_entry
->protection
& VM_PROT_READ
) == VM_PROT_NONE
&&
11381 (src_entry
->max_protection
& VM_PROT_READ
) == 0) {
11382 RETURN(KERN_PROTECTION_FAILURE
);
11386 * Clip against the endpoints of the entire region.
11389 vm_map_clip_end(src_map
, src_entry
, src_end
);
11391 src_size
= src_entry
->vme_end
- src_start
;
11392 src_object
= VME_OBJECT(src_entry
);
11393 src_offset
= VME_OFFSET(src_entry
);
11394 was_wired
= (src_entry
->wired_count
!= 0);
11396 vm_map_entry_copy(new_entry
, src_entry
);
11397 if (new_entry
->is_sub_map
) {
11398 /* clr address space specifics */
11399 new_entry
->use_pmap
= FALSE
;
11402 * We're dealing with a copy-on-write operation,
11403 * so the resulting mapping should not inherit the
11404 * original mapping's accounting settings.
11405 * "iokit_acct" should have been cleared in
11406 * vm_map_entry_copy().
11407 * "use_pmap" should be reset to its default (TRUE)
11408 * so that the new mapping gets accounted for in
11409 * the task's memory footprint.
11411 assert(!new_entry
->iokit_acct
);
11412 new_entry
->use_pmap
= TRUE
;
11416 * Attempt non-blocking copy-on-write optimizations.
11420 * If we are destroying the source, and the object
11421 * is internal, we could move the object reference
11422 * from the source to the copy. The copy is
11423 * copy-on-write only if the source is.
11424 * We make another reference to the object, because
11425 * destroying the source entry will deallocate it.
11427 * This memory transfer has to be atomic, (to prevent
11428 * the VM object from being shared or copied while
11429 * it's being moved here), so we could only do this
11430 * if we won't have to unlock the VM map until the
11431 * original mapping has been fully removed.
11435 if ((src_object
== VM_OBJECT_NULL
||
11436 (!was_wired
&& !map_share
&& !tmp_entry
->is_shared
)) &&
11437 vm_object_copy_quickly(
11438 VME_OBJECT_PTR(new_entry
),
11442 &new_entry_needs_copy
)) {
11443 new_entry
->needs_copy
= new_entry_needs_copy
;
11446 * Handle copy-on-write obligations
11449 if (src_needs_copy
&& !tmp_entry
->needs_copy
) {
11452 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
11454 if (override_nx(src_map
, VME_ALIAS(src_entry
))
11456 prot
|= VM_PROT_EXECUTE
;
11459 vm_object_pmap_protect(
11463 (src_entry
->is_shared
?
11466 src_entry
->vme_start
,
11469 assert(tmp_entry
->wired_count
== 0);
11470 tmp_entry
->needs_copy
= TRUE
;
11474 * The map has never been unlocked, so it's safe
11475 * to move to the next entry rather than doing
11479 goto CopySuccessful
;
11482 entry_was_shared
= tmp_entry
->is_shared
;
11485 * Take an object reference, so that we may
11486 * release the map lock(s).
11489 assert(src_object
!= VM_OBJECT_NULL
);
11490 vm_object_reference(src_object
);
11493 * Record the timestamp for later verification.
11497 version
.main_timestamp
= src_map
->timestamp
;
11498 vm_map_unlock(src_map
); /* Increments timestamp once! */
11499 saved_src_entry
= src_entry
;
11500 tmp_entry
= VM_MAP_ENTRY_NULL
;
11501 src_entry
= VM_MAP_ENTRY_NULL
;
11509 vm_object_lock(src_object
);
11510 result
= vm_object_copy_slowly(
11515 VME_OBJECT_PTR(new_entry
));
11516 VME_OFFSET_SET(new_entry
, 0);
11517 new_entry
->needs_copy
= FALSE
;
11518 } else if (src_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
&&
11519 (entry_was_shared
|| map_share
)) {
11520 vm_object_t new_object
;
11522 vm_object_lock_shared(src_object
);
11523 new_object
= vm_object_copy_delayed(
11528 if (new_object
== VM_OBJECT_NULL
) {
11532 VME_OBJECT_SET(new_entry
, new_object
);
11533 assert(new_entry
->wired_count
== 0);
11534 new_entry
->needs_copy
= TRUE
;
11535 assert(!new_entry
->iokit_acct
);
11536 assert(new_object
->purgable
== VM_PURGABLE_DENY
);
11537 assertf(new_entry
->use_pmap
, "src_map %p new_entry %p\n", src_map
, new_entry
);
11538 result
= KERN_SUCCESS
;
11540 vm_object_offset_t new_offset
;
11541 new_offset
= VME_OFFSET(new_entry
);
11542 result
= vm_object_copy_strategically(src_object
,
11545 VME_OBJECT_PTR(new_entry
),
11547 &new_entry_needs_copy
);
11548 if (new_offset
!= VME_OFFSET(new_entry
)) {
11549 VME_OFFSET_SET(new_entry
, new_offset
);
11552 new_entry
->needs_copy
= new_entry_needs_copy
;
11555 if (result
== KERN_SUCCESS
&&
11556 preserve_purgeable
&&
11557 src_object
->purgable
!= VM_PURGABLE_DENY
) {
11558 vm_object_t new_object
;
11560 new_object
= VME_OBJECT(new_entry
);
11561 assert(new_object
!= src_object
);
11562 vm_object_lock(new_object
);
11563 assert(new_object
->ref_count
== 1);
11564 assert(new_object
->shadow
== VM_OBJECT_NULL
);
11565 assert(new_object
->copy
== VM_OBJECT_NULL
);
11566 assert(new_object
->vo_owner
== NULL
);
11568 new_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
11569 new_object
->true_share
= TRUE
;
11570 /* start as non-volatile with no owner... */
11571 new_object
->purgable
= VM_PURGABLE_NONVOLATILE
;
11572 vm_purgeable_nonvolatile_enqueue(new_object
, NULL
);
11573 /* ... and move to src_object's purgeable state */
11574 if (src_object
->purgable
!= VM_PURGABLE_NONVOLATILE
) {
11576 state
= src_object
->purgable
;
11577 vm_object_purgable_control(
11579 VM_PURGABLE_SET_STATE_FROM_KERNEL
,
11582 vm_object_unlock(new_object
);
11583 new_object
= VM_OBJECT_NULL
;
11584 /* no pmap accounting for purgeable objects */
11585 new_entry
->use_pmap
= FALSE
;
11588 if (result
!= KERN_SUCCESS
&&
11589 result
!= KERN_MEMORY_RESTART_COPY
) {
11590 vm_map_lock(src_map
);
11595 * Throw away the extra reference
11598 vm_object_deallocate(src_object
);
11601 * Verify that the map has not substantially
11602 * changed while the copy was being made.
11605 vm_map_lock(src_map
);
11607 if ((version
.main_timestamp
+ 1) == src_map
->timestamp
) {
11608 /* src_map hasn't changed: src_entry is still valid */
11609 src_entry
= saved_src_entry
;
11610 goto VerificationSuccessful
;
11614 * Simple version comparison failed.
11616 * Retry the lookup and verify that the
11617 * same object/offset are still present.
11619 * [Note: a memory manager that colludes with
11620 * the calling task can detect that we have
11621 * cheated. While the map was unlocked, the
11622 * mapping could have been changed and restored.]
11625 if (!vm_map_lookup_entry(src_map
, src_start
, &tmp_entry
)) {
11626 if (result
!= KERN_MEMORY_RESTART_COPY
) {
11627 vm_object_deallocate(VME_OBJECT(new_entry
));
11628 VME_OBJECT_SET(new_entry
, VM_OBJECT_NULL
);
11629 /* reset accounting state */
11630 new_entry
->iokit_acct
= FALSE
;
11631 new_entry
->use_pmap
= TRUE
;
11633 RETURN(KERN_INVALID_ADDRESS
);
11636 src_entry
= tmp_entry
;
11637 vm_map_clip_start(src_map
, src_entry
, src_start
);
11639 if ((((src_entry
->protection
& VM_PROT_READ
) == VM_PROT_NONE
) &&
11641 ((src_entry
->max_protection
& VM_PROT_READ
) == 0)) {
11642 goto VerificationFailed
;
11645 if (src_entry
->vme_end
< new_entry
->vme_end
) {
11647 * This entry might have been shortened
11648 * (vm_map_clip_end) or been replaced with
11649 * an entry that ends closer to "src_start"
11651 * Adjust "new_entry" accordingly; copying
11652 * less memory would be correct but we also
11653 * redo the copy (see below) if the new entry
11654 * no longer points at the same object/offset.
11656 assert(VM_MAP_PAGE_ALIGNED(src_entry
->vme_end
,
11657 VM_MAP_COPY_PAGE_MASK(copy
)));
11658 new_entry
->vme_end
= src_entry
->vme_end
;
11659 src_size
= new_entry
->vme_end
- src_start
;
11660 } else if (src_entry
->vme_end
> new_entry
->vme_end
) {
11662 * This entry might have been extended
11663 * (vm_map_entry_simplify() or coalesce)
11664 * or been replaced with an entry that ends farther
11665 * from "src_start" than before.
11667 * We've called vm_object_copy_*() only on
11668 * the previous <start:end> range, so we can't
11669 * just extend new_entry. We have to re-do
11670 * the copy based on the new entry as if it was
11671 * pointing at a different object/offset (see
11672 * "Verification failed" below).
11676 if ((VME_OBJECT(src_entry
) != src_object
) ||
11677 (VME_OFFSET(src_entry
) != src_offset
) ||
11678 (src_entry
->vme_end
> new_entry
->vme_end
)) {
11680 * Verification failed.
11682 * Start over with this top-level entry.
11685 VerificationFailed
: ;
11687 vm_object_deallocate(VME_OBJECT(new_entry
));
11688 tmp_entry
= src_entry
;
11693 * Verification succeeded.
11696 VerificationSuccessful
:;
11698 if (result
== KERN_MEMORY_RESTART_COPY
) {
11709 * Link in the new copy entry.
11712 vm_map_copy_entry_link(copy
, vm_map_copy_last_entry(copy
),
11716 * Determine whether the entire region
11719 src_base
= src_start
;
11720 src_start
= new_entry
->vme_end
;
11721 new_entry
= VM_MAP_ENTRY_NULL
;
11722 while ((src_start
>= src_end
) && (src_end
!= 0)) {
11725 if (src_map
== base_map
) {
11726 /* back to the top */
11731 assert(ptr
!= NULL
);
11732 parent_maps
= parent_maps
->next
;
11734 /* fix up the damage we did in that submap */
11735 vm_map_simplify_range(src_map
,
11739 vm_map_unlock(src_map
);
11740 vm_map_deallocate(src_map
);
11741 vm_map_lock(ptr
->parent_map
);
11742 src_map
= ptr
->parent_map
;
11743 src_base
= ptr
->base_start
;
11744 src_start
= ptr
->base_start
+ ptr
->base_len
;
11745 src_end
= ptr
->base_end
;
11746 if (!vm_map_lookup_entry(src_map
,
11749 (src_end
> src_start
)) {
11750 RETURN(KERN_INVALID_ADDRESS
);
11752 kfree(ptr
, sizeof(submap_map_t
));
11753 if (parent_maps
== NULL
) {
11756 src_entry
= tmp_entry
->vme_prev
;
11759 if ((VM_MAP_PAGE_SHIFT(src_map
) != PAGE_SHIFT
) &&
11760 (src_start
>= src_addr
+ len
) &&
11761 (src_addr
+ len
!= 0)) {
11763 * Stop copying now, even though we haven't reached
11764 * "src_end". We'll adjust the end of the last copy
11765 * entry at the end, if needed.
11767 * If src_map's aligment is different from the
11768 * system's page-alignment, there could be
11769 * extra non-map-aligned map entries between
11770 * the original (non-rounded) "src_addr + len"
11771 * and the rounded "src_end".
11772 * We do not want to copy those map entries since
11773 * they're not part of the copied range.
11778 if ((src_start
>= src_end
) && (src_end
!= 0)) {
11783 * Verify that there are no gaps in the region
11786 tmp_entry
= src_entry
->vme_next
;
11787 if ((tmp_entry
->vme_start
!= src_start
) ||
11788 (tmp_entry
== vm_map_to_entry(src_map
))) {
11789 RETURN(KERN_INVALID_ADDRESS
);
11794 * If the source should be destroyed, do it now, since the
11795 * copy was successful.
11798 (void) vm_map_delete(
11800 vm_map_trunc_page(src_addr
,
11801 VM_MAP_PAGE_MASK(src_map
)),
11803 ((src_map
== kernel_map
) ?
11804 VM_MAP_REMOVE_KUNWIRE
:
11805 VM_MAP_REMOVE_NO_FLAGS
),
11808 /* fix up the damage we did in the base map */
11809 vm_map_simplify_range(
11811 vm_map_trunc_page(src_addr
,
11812 VM_MAP_PAGE_MASK(src_map
)),
11813 vm_map_round_page(src_end
,
11814 VM_MAP_PAGE_MASK(src_map
)));
11817 vm_map_unlock(src_map
);
11818 tmp_entry
= VM_MAP_ENTRY_NULL
;
11820 if (VM_MAP_PAGE_SHIFT(src_map
) != PAGE_SHIFT
) {
11821 vm_map_offset_t original_start
, original_offset
, original_end
;
11823 assert(VM_MAP_COPY_PAGE_MASK(copy
) == PAGE_MASK
);
11825 /* adjust alignment of first copy_entry's "vme_start" */
11826 tmp_entry
= vm_map_copy_first_entry(copy
);
11827 if (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
11828 vm_map_offset_t adjustment
;
11830 original_start
= tmp_entry
->vme_start
;
11831 original_offset
= VME_OFFSET(tmp_entry
);
11833 /* map-align the start of the first copy entry... */
11834 adjustment
= (tmp_entry
->vme_start
-
11836 tmp_entry
->vme_start
,
11837 VM_MAP_PAGE_MASK(src_map
)));
11838 tmp_entry
->vme_start
-= adjustment
;
11839 VME_OFFSET_SET(tmp_entry
,
11840 VME_OFFSET(tmp_entry
) - adjustment
);
11841 copy_addr
-= adjustment
;
11842 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
11843 /* ... adjust for mis-aligned start of copy range */
11845 (vm_map_trunc_page(copy
->offset
,
11847 vm_map_trunc_page(copy
->offset
,
11848 VM_MAP_PAGE_MASK(src_map
)));
11850 assert(page_aligned(adjustment
));
11851 assert(adjustment
< VM_MAP_PAGE_SIZE(src_map
));
11852 tmp_entry
->vme_start
+= adjustment
;
11853 VME_OFFSET_SET(tmp_entry
,
11854 (VME_OFFSET(tmp_entry
) +
11856 copy_addr
+= adjustment
;
11857 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
11861 * Assert that the adjustments haven't exposed
11862 * more than was originally copied...
11864 assert(tmp_entry
->vme_start
>= original_start
);
11865 assert(VME_OFFSET(tmp_entry
) >= original_offset
);
11867 * ... and that it did not adjust outside of a
11868 * a single 16K page.
11870 assert(vm_map_trunc_page(tmp_entry
->vme_start
,
11871 VM_MAP_PAGE_MASK(src_map
)) ==
11872 vm_map_trunc_page(original_start
,
11873 VM_MAP_PAGE_MASK(src_map
)));
11876 /* adjust alignment of last copy_entry's "vme_end" */
11877 tmp_entry
= vm_map_copy_last_entry(copy
);
11878 if (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
11879 vm_map_offset_t adjustment
;
11881 original_end
= tmp_entry
->vme_end
;
11883 /* map-align the end of the last copy entry... */
11884 tmp_entry
->vme_end
=
11885 vm_map_round_page(tmp_entry
->vme_end
,
11886 VM_MAP_PAGE_MASK(src_map
));
11887 /* ... adjust for mis-aligned end of copy range */
11889 (vm_map_round_page((copy
->offset
+
11891 VM_MAP_PAGE_MASK(src_map
)) -
11892 vm_map_round_page((copy
->offset
+
11896 assert(page_aligned(adjustment
));
11897 assert(adjustment
< VM_MAP_PAGE_SIZE(src_map
));
11898 tmp_entry
->vme_end
-= adjustment
;
11899 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
11903 * Assert that the adjustments haven't exposed
11904 * more than was originally copied...
11906 assert(tmp_entry
->vme_end
<= original_end
);
11908 * ... and that it did not adjust outside of a
11909 * a single 16K page.
11911 assert(vm_map_round_page(tmp_entry
->vme_end
,
11912 VM_MAP_PAGE_MASK(src_map
)) ==
11913 vm_map_round_page(original_end
,
11914 VM_MAP_PAGE_MASK(src_map
)));
11918 /* Fix-up start and end points in copy. This is necessary */
11919 /* when the various entries in the copy object were picked */
11920 /* up from different sub-maps */
11922 tmp_entry
= vm_map_copy_first_entry(copy
);
11923 copy_size
= 0; /* compute actual size */
11924 while (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
11925 assert(VM_MAP_PAGE_ALIGNED(
11926 copy_addr
+ (tmp_entry
->vme_end
-
11927 tmp_entry
->vme_start
),
11928 VM_MAP_COPY_PAGE_MASK(copy
)));
11929 assert(VM_MAP_PAGE_ALIGNED(
11931 VM_MAP_COPY_PAGE_MASK(copy
)));
11934 * The copy_entries will be injected directly into the
11935 * destination map and might not be "map aligned" there...
11937 tmp_entry
->map_aligned
= FALSE
;
11939 tmp_entry
->vme_end
= copy_addr
+
11940 (tmp_entry
->vme_end
- tmp_entry
->vme_start
);
11941 tmp_entry
->vme_start
= copy_addr
;
11942 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
11943 copy_addr
+= tmp_entry
->vme_end
- tmp_entry
->vme_start
;
11944 copy_size
+= tmp_entry
->vme_end
- tmp_entry
->vme_start
;
11945 tmp_entry
= (struct vm_map_entry
*)tmp_entry
->vme_next
;
11948 if (VM_MAP_PAGE_SHIFT(src_map
) != PAGE_SHIFT
&&
11949 copy_size
< copy
->size
) {
11951 * The actual size of the VM map copy is smaller than what
11952 * was requested by the caller. This must be because some
11953 * PAGE_SIZE-sized pages are missing at the end of the last
11954 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
11955 * The caller might not have been aware of those missing
11956 * pages and might not want to be aware of it, which is
11957 * fine as long as they don't try to access (and crash on)
11958 * those missing pages.
11959 * Let's adjust the size of the "copy", to avoid failing
11960 * in vm_map_copyout() or vm_map_copy_overwrite().
11962 assert(vm_map_round_page(copy_size
,
11963 VM_MAP_PAGE_MASK(src_map
)) ==
11964 vm_map_round_page(copy
->size
,
11965 VM_MAP_PAGE_MASK(src_map
)));
11966 copy
->size
= copy_size
;
11969 *copy_result
= copy
;
11970 return KERN_SUCCESS
;
11976 vm_map_copy_extract(
11978 vm_map_address_t src_addr
,
11980 vm_map_copy_t
*copy_result
, /* OUT */
11981 vm_prot_t
*cur_prot
, /* OUT */
11982 vm_prot_t
*max_prot
)
11984 vm_map_offset_t src_start
, src_end
;
11985 vm_map_copy_t copy
;
11989 * Check for copies of zero bytes.
11993 *copy_result
= VM_MAP_COPY_NULL
;
11994 return KERN_SUCCESS
;
11998 * Check that the end address doesn't overflow
12000 src_end
= src_addr
+ len
;
12001 if (src_end
< src_addr
) {
12002 return KERN_INVALID_ADDRESS
;
12006 * Compute (page aligned) start and end of region
12008 src_start
= vm_map_trunc_page(src_addr
, PAGE_MASK
);
12009 src_end
= vm_map_round_page(src_end
, PAGE_MASK
);
12012 * Allocate a header element for the list.
12014 * Use the start and end in the header to
12015 * remember the endpoints prior to rounding.
12018 copy
= vm_map_copy_allocate();
12019 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
12020 copy
->cpy_hdr
.entries_pageable
= TRUE
;
12022 vm_map_store_init(©
->cpy_hdr
);
12027 kr
= vm_map_remap_extract(src_map
,
12035 TRUE
, /* pageable */
12036 FALSE
, /* same_map */
12037 VM_MAP_KERNEL_FLAGS_NONE
);
12038 if (kr
!= KERN_SUCCESS
) {
12039 vm_map_copy_discard(copy
);
12043 *copy_result
= copy
;
12044 return KERN_SUCCESS
;
12048 * vm_map_copyin_object:
12050 * Create a copy object from an object.
12051 * Our caller donates an object reference.
12055 vm_map_copyin_object(
12056 vm_object_t object
,
12057 vm_object_offset_t offset
, /* offset of region in object */
12058 vm_object_size_t size
, /* size of region in object */
12059 vm_map_copy_t
*copy_result
) /* OUT */
12061 vm_map_copy_t copy
; /* Resulting copy */
12064 * We drop the object into a special copy object
12065 * that contains the object directly.
12068 copy
= vm_map_copy_allocate();
12069 copy
->type
= VM_MAP_COPY_OBJECT
;
12070 copy
->cpy_object
= object
;
12071 copy
->offset
= offset
;
12074 *copy_result
= copy
;
12075 return KERN_SUCCESS
;
12081 vm_map_entry_t old_entry
,
12084 vm_object_t object
;
12085 vm_map_entry_t new_entry
;
12088 * New sharing code. New map entry
12089 * references original object. Internal
12090 * objects use asynchronous copy algorithm for
12091 * future copies. First make sure we have
12092 * the right object. If we need a shadow,
12093 * or someone else already has one, then
12094 * make a new shadow and share it.
12097 object
= VME_OBJECT(old_entry
);
12098 if (old_entry
->is_sub_map
) {
12099 assert(old_entry
->wired_count
== 0);
12100 #ifndef NO_NESTED_PMAP
12101 if (old_entry
->use_pmap
) {
12102 kern_return_t result
;
12104 result
= pmap_nest(new_map
->pmap
,
12105 (VME_SUBMAP(old_entry
))->pmap
,
12106 (addr64_t
)old_entry
->vme_start
,
12107 (addr64_t
)old_entry
->vme_start
,
12108 (uint64_t)(old_entry
->vme_end
- old_entry
->vme_start
));
12110 panic("vm_map_fork_share: pmap_nest failed!");
12113 #endif /* NO_NESTED_PMAP */
12114 } else if (object
== VM_OBJECT_NULL
) {
12115 object
= vm_object_allocate((vm_map_size_t
)(old_entry
->vme_end
-
12116 old_entry
->vme_start
));
12117 VME_OFFSET_SET(old_entry
, 0);
12118 VME_OBJECT_SET(old_entry
, object
);
12119 old_entry
->use_pmap
= TRUE
;
12120 // assert(!old_entry->needs_copy);
12121 } else if (object
->copy_strategy
!=
12122 MEMORY_OBJECT_COPY_SYMMETRIC
) {
12124 * We are already using an asymmetric
12125 * copy, and therefore we already have
12126 * the right object.
12129 assert(!old_entry
->needs_copy
);
12130 } else if (old_entry
->needs_copy
|| /* case 1 */
12131 object
->shadowed
|| /* case 2 */
12132 (!object
->true_share
&& /* case 3 */
12133 !old_entry
->is_shared
&&
12135 (vm_map_size_t
)(old_entry
->vme_end
-
12136 old_entry
->vme_start
)))) {
12138 * We need to create a shadow.
12139 * There are three cases here.
12140 * In the first case, we need to
12141 * complete a deferred symmetrical
12142 * copy that we participated in.
12143 * In the second and third cases,
12144 * we need to create the shadow so
12145 * that changes that we make to the
12146 * object do not interfere with
12147 * any symmetrical copies which
12148 * have occured (case 2) or which
12149 * might occur (case 3).
12151 * The first case is when we had
12152 * deferred shadow object creation
12153 * via the entry->needs_copy mechanism.
12154 * This mechanism only works when
12155 * only one entry points to the source
12156 * object, and we are about to create
12157 * a second entry pointing to the
12158 * same object. The problem is that
12159 * there is no way of mapping from
12160 * an object to the entries pointing
12161 * to it. (Deferred shadow creation
12162 * works with one entry because occurs
12163 * at fault time, and we walk from the
12164 * entry to the object when handling
12167 * The second case is when the object
12168 * to be shared has already been copied
12169 * with a symmetric copy, but we point
12170 * directly to the object without
12171 * needs_copy set in our entry. (This
12172 * can happen because different ranges
12173 * of an object can be pointed to by
12174 * different entries. In particular,
12175 * a single entry pointing to an object
12176 * can be split by a call to vm_inherit,
12177 * which, combined with task_create, can
12178 * result in the different entries
12179 * having different needs_copy values.)
12180 * The shadowed flag in the object allows
12181 * us to detect this case. The problem
12182 * with this case is that if this object
12183 * has or will have shadows, then we
12184 * must not perform an asymmetric copy
12185 * of this object, since such a copy
12186 * allows the object to be changed, which
12187 * will break the previous symmetrical
12188 * copies (which rely upon the object
12189 * not changing). In a sense, the shadowed
12190 * flag says "don't change this object".
12191 * We fix this by creating a shadow
12192 * object for this object, and sharing
12193 * that. This works because we are free
12194 * to change the shadow object (and thus
12195 * to use an asymmetric copy strategy);
12196 * this is also semantically correct,
12197 * since this object is temporary, and
12198 * therefore a copy of the object is
12199 * as good as the object itself. (This
12200 * is not true for permanent objects,
12201 * since the pager needs to see changes,
12202 * which won't happen if the changes
12203 * are made to a copy.)
12205 * The third case is when the object
12206 * to be shared has parts sticking
12207 * outside of the entry we're working
12208 * with, and thus may in the future
12209 * be subject to a symmetrical copy.
12210 * (This is a preemptive version of
12213 VME_OBJECT_SHADOW(old_entry
,
12214 (vm_map_size_t
) (old_entry
->vme_end
-
12215 old_entry
->vme_start
));
12218 * If we're making a shadow for other than
12219 * copy on write reasons, then we have
12220 * to remove write permission.
12223 if (!old_entry
->needs_copy
&&
12224 (old_entry
->protection
& VM_PROT_WRITE
)) {
12227 assert(!pmap_has_prot_policy(old_entry
->protection
));
12229 prot
= old_entry
->protection
& ~VM_PROT_WRITE
;
12231 assert(!pmap_has_prot_policy(prot
));
12233 if (override_nx(old_map
, VME_ALIAS(old_entry
)) && prot
) {
12234 prot
|= VM_PROT_EXECUTE
;
12238 if (old_map
->mapped_in_other_pmaps
) {
12239 vm_object_pmap_protect(
12240 VME_OBJECT(old_entry
),
12241 VME_OFFSET(old_entry
),
12242 (old_entry
->vme_end
-
12243 old_entry
->vme_start
),
12245 old_entry
->vme_start
,
12248 pmap_protect(old_map
->pmap
,
12249 old_entry
->vme_start
,
12250 old_entry
->vme_end
,
12255 old_entry
->needs_copy
= FALSE
;
12256 object
= VME_OBJECT(old_entry
);
12261 * If object was using a symmetric copy strategy,
12262 * change its copy strategy to the default
12263 * asymmetric copy strategy, which is copy_delay
12264 * in the non-norma case and copy_call in the
12265 * norma case. Bump the reference count for the
12269 if (old_entry
->is_sub_map
) {
12270 vm_map_lock(VME_SUBMAP(old_entry
));
12271 vm_map_reference(VME_SUBMAP(old_entry
));
12272 vm_map_unlock(VME_SUBMAP(old_entry
));
12274 vm_object_lock(object
);
12275 vm_object_reference_locked(object
);
12276 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
) {
12277 object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
12279 vm_object_unlock(object
);
12283 * Clone the entry, using object ref from above.
12284 * Mark both entries as shared.
12287 new_entry
= vm_map_entry_create(new_map
, FALSE
); /* Never the kernel
12288 * map or descendants */
12289 vm_map_entry_copy(new_entry
, old_entry
);
12290 old_entry
->is_shared
= TRUE
;
12291 new_entry
->is_shared
= TRUE
;
12294 * We're dealing with a shared mapping, so the resulting mapping
12295 * should inherit some of the original mapping's accounting settings.
12296 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12297 * "use_pmap" should stay the same as before (if it hasn't been reset
12298 * to TRUE when we cleared "iokit_acct").
12300 assert(!new_entry
->iokit_acct
);
12303 * If old entry's inheritence is VM_INHERIT_NONE,
12304 * the new entry is for corpse fork, remove the
12305 * write permission from the new entry.
12307 if (old_entry
->inheritance
== VM_INHERIT_NONE
) {
12308 new_entry
->protection
&= ~VM_PROT_WRITE
;
12309 new_entry
->max_protection
&= ~VM_PROT_WRITE
;
12313 * Insert the entry into the new map -- we
12314 * know we're inserting at the end of the new
12318 vm_map_store_entry_link(new_map
, vm_map_last_entry(new_map
), new_entry
,
12319 VM_MAP_KERNEL_FLAGS_NONE
);
12322 * Update the physical map
12325 if (old_entry
->is_sub_map
) {
12326 /* Bill Angell pmap support goes here */
12328 pmap_copy(new_map
->pmap
, old_map
->pmap
, new_entry
->vme_start
,
12329 old_entry
->vme_end
- old_entry
->vme_start
,
12330 old_entry
->vme_start
);
12337 vm_map_entry_t
*old_entry_p
,
12339 int vm_map_copyin_flags
)
12341 vm_map_entry_t old_entry
= *old_entry_p
;
12342 vm_map_size_t entry_size
= old_entry
->vme_end
- old_entry
->vme_start
;
12343 vm_map_offset_t start
= old_entry
->vme_start
;
12344 vm_map_copy_t copy
;
12345 vm_map_entry_t last
= vm_map_last_entry(new_map
);
12347 vm_map_unlock(old_map
);
12349 * Use maxprot version of copyin because we
12350 * care about whether this memory can ever
12351 * be accessed, not just whether it's accessible
12354 vm_map_copyin_flags
|= VM_MAP_COPYIN_USE_MAXPROT
;
12355 if (vm_map_copyin_internal(old_map
, start
, entry_size
,
12356 vm_map_copyin_flags
, ©
)
12359 * The map might have changed while it
12360 * was unlocked, check it again. Skip
12361 * any blank space or permanently
12362 * unreadable region.
12364 vm_map_lock(old_map
);
12365 if (!vm_map_lookup_entry(old_map
, start
, &last
) ||
12366 (last
->max_protection
& VM_PROT_READ
) == VM_PROT_NONE
) {
12367 last
= last
->vme_next
;
12369 *old_entry_p
= last
;
12372 * XXX For some error returns, want to
12373 * XXX skip to the next element. Note
12374 * that INVALID_ADDRESS and
12375 * PROTECTION_FAILURE are handled above.
12382 * Insert the copy into the new map
12385 vm_map_copy_insert(new_map
, last
, copy
);
12388 * Pick up the traversal at the end of
12389 * the copied region.
12392 vm_map_lock(old_map
);
12393 start
+= entry_size
;
12394 if (!vm_map_lookup_entry(old_map
, start
, &last
)) {
12395 last
= last
->vme_next
;
12397 if (last
->vme_start
== start
) {
12399 * No need to clip here and we don't
12400 * want to cause any unnecessary
12404 vm_map_clip_start(old_map
, last
, start
);
12407 *old_entry_p
= last
;
12415 * Create and return a new map based on the old
12416 * map, according to the inheritance values on the
12417 * regions in that map and the options.
12419 * The source map must not be locked.
12429 vm_map_entry_t old_entry
;
12430 vm_map_size_t new_size
= 0, entry_size
;
12431 vm_map_entry_t new_entry
;
12432 boolean_t src_needs_copy
;
12433 boolean_t new_entry_needs_copy
;
12434 boolean_t pmap_is64bit
;
12435 int vm_map_copyin_flags
;
12436 vm_inherit_t old_entry_inheritance
;
12437 int map_create_options
;
12438 kern_return_t footprint_collect_kr
;
12440 if (options
& ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE
|
12441 VM_MAP_FORK_PRESERVE_PURGEABLE
|
12442 VM_MAP_FORK_CORPSE_FOOTPRINT
)) {
12443 /* unsupported option */
12444 return VM_MAP_NULL
;
12448 #if defined(__i386__) || defined(__x86_64__)
12449 old_map
->pmap
->pm_task_map
!= TASK_MAP_32BIT
;
12450 #elif defined(__arm64__)
12451 old_map
->pmap
->max
== MACH_VM_MAX_ADDRESS
;
12452 #elif defined(__arm__)
12455 #error Unknown architecture.
12458 unsigned int pmap_flags
= 0;
12459 pmap_flags
|= pmap_is64bit
? PMAP_CREATE_64BIT
: 0;
12460 #if defined(HAS_APPLE_PAC)
12461 pmap_flags
|= old_map
->pmap
->disable_jop
? PMAP_CREATE_DISABLE_JOP
: 0;
12463 new_pmap
= pmap_create_options(ledger
, (vm_map_size_t
) 0, pmap_flags
);
12465 vm_map_reference_swap(old_map
);
12466 vm_map_lock(old_map
);
12468 map_create_options
= 0;
12469 if (old_map
->hdr
.entries_pageable
) {
12470 map_create_options
|= VM_MAP_CREATE_PAGEABLE
;
12472 if (options
& VM_MAP_FORK_CORPSE_FOOTPRINT
) {
12473 map_create_options
|= VM_MAP_CREATE_CORPSE_FOOTPRINT
;
12474 footprint_collect_kr
= KERN_SUCCESS
;
12476 new_map
= vm_map_create_options(new_pmap
,
12477 old_map
->min_offset
,
12478 old_map
->max_offset
,
12479 map_create_options
);
12480 vm_map_lock(new_map
);
12481 vm_commit_pagezero_status(new_map
);
12482 /* inherit the parent map's page size */
12483 vm_map_set_page_shift(new_map
, VM_MAP_PAGE_SHIFT(old_map
));
12485 old_entry
= vm_map_first_entry(old_map
);
12486 old_entry
!= vm_map_to_entry(old_map
);
12488 entry_size
= old_entry
->vme_end
- old_entry
->vme_start
;
12490 old_entry_inheritance
= old_entry
->inheritance
;
12492 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12493 * share VM_INHERIT_NONE entries that are not backed by a
12496 if (old_entry_inheritance
== VM_INHERIT_NONE
&&
12497 (options
& VM_MAP_FORK_SHARE_IF_INHERIT_NONE
) &&
12498 !(!old_entry
->is_sub_map
&&
12499 VME_OBJECT(old_entry
) != NULL
&&
12500 VME_OBJECT(old_entry
)->pager
!= NULL
&&
12501 is_device_pager_ops(
12502 VME_OBJECT(old_entry
)->pager
->mo_pager_ops
))) {
12503 old_entry_inheritance
= VM_INHERIT_SHARE
;
12506 if (old_entry_inheritance
!= VM_INHERIT_NONE
&&
12507 (options
& VM_MAP_FORK_CORPSE_FOOTPRINT
) &&
12508 footprint_collect_kr
== KERN_SUCCESS
) {
12510 * The corpse won't have old_map->pmap to query
12511 * footprint information, so collect that data now
12512 * and store it in new_map->vmmap_corpse_footprint
12513 * for later autopsy.
12515 footprint_collect_kr
=
12516 vm_map_corpse_footprint_collect(old_map
,
12521 switch (old_entry_inheritance
) {
12522 case VM_INHERIT_NONE
:
12525 case VM_INHERIT_SHARE
:
12526 vm_map_fork_share(old_map
, old_entry
, new_map
);
12527 new_size
+= entry_size
;
12530 case VM_INHERIT_COPY
:
12533 * Inline the copy_quickly case;
12534 * upon failure, fall back on call
12535 * to vm_map_fork_copy.
12538 if (old_entry
->is_sub_map
) {
12541 if ((old_entry
->wired_count
!= 0) ||
12542 ((VME_OBJECT(old_entry
) != NULL
) &&
12543 (VME_OBJECT(old_entry
)->true_share
))) {
12544 goto slow_vm_map_fork_copy
;
12547 new_entry
= vm_map_entry_create(new_map
, FALSE
); /* never the kernel map or descendants */
12548 vm_map_entry_copy(new_entry
, old_entry
);
12549 if (new_entry
->is_sub_map
) {
12550 /* clear address space specifics */
12551 new_entry
->use_pmap
= FALSE
;
12554 * We're dealing with a copy-on-write operation,
12555 * so the resulting mapping should not inherit
12556 * the original mapping's accounting settings.
12557 * "iokit_acct" should have been cleared in
12558 * vm_map_entry_copy().
12559 * "use_pmap" should be reset to its default
12560 * (TRUE) so that the new mapping gets
12561 * accounted for in the task's memory footprint.
12563 assert(!new_entry
->iokit_acct
);
12564 new_entry
->use_pmap
= TRUE
;
12567 if (!vm_object_copy_quickly(
12568 VME_OBJECT_PTR(new_entry
),
12569 VME_OFFSET(old_entry
),
12570 (old_entry
->vme_end
-
12571 old_entry
->vme_start
),
12573 &new_entry_needs_copy
)) {
12574 vm_map_entry_dispose(new_map
, new_entry
);
12575 goto slow_vm_map_fork_copy
;
12579 * Handle copy-on-write obligations
12582 if (src_needs_copy
&& !old_entry
->needs_copy
) {
12585 assert(!pmap_has_prot_policy(old_entry
->protection
));
12587 prot
= old_entry
->protection
& ~VM_PROT_WRITE
;
12589 if (override_nx(old_map
, VME_ALIAS(old_entry
))
12591 prot
|= VM_PROT_EXECUTE
;
12594 assert(!pmap_has_prot_policy(prot
));
12596 vm_object_pmap_protect(
12597 VME_OBJECT(old_entry
),
12598 VME_OFFSET(old_entry
),
12599 (old_entry
->vme_end
-
12600 old_entry
->vme_start
),
12601 ((old_entry
->is_shared
12602 || old_map
->mapped_in_other_pmaps
)
12605 old_entry
->vme_start
,
12608 assert(old_entry
->wired_count
== 0);
12609 old_entry
->needs_copy
= TRUE
;
12611 new_entry
->needs_copy
= new_entry_needs_copy
;
12614 * Insert the entry at the end
12618 vm_map_store_entry_link(new_map
,
12619 vm_map_last_entry(new_map
),
12621 VM_MAP_KERNEL_FLAGS_NONE
);
12622 new_size
+= entry_size
;
12625 slow_vm_map_fork_copy
:
12626 vm_map_copyin_flags
= 0;
12627 if (options
& VM_MAP_FORK_PRESERVE_PURGEABLE
) {
12628 vm_map_copyin_flags
|=
12629 VM_MAP_COPYIN_PRESERVE_PURGEABLE
;
12631 if (vm_map_fork_copy(old_map
,
12634 vm_map_copyin_flags
)) {
12635 new_size
+= entry_size
;
12639 old_entry
= old_entry
->vme_next
;
12642 #if defined(__arm64__)
12643 pmap_insert_sharedpage(new_map
->pmap
);
12646 new_map
->size
= new_size
;
12648 if (options
& VM_MAP_FORK_CORPSE_FOOTPRINT
) {
12649 vm_map_corpse_footprint_collect_done(new_map
);
12652 vm_map_unlock(new_map
);
12653 vm_map_unlock(old_map
);
12654 vm_map_deallocate(old_map
);
12662 * Setup the "new_map" with the proper execution environment according
12663 * to the type of executable (platform, 64bit, chroot environment).
12664 * Map the comm page and shared region, etc...
12673 cpu_subtype_t cpu_subtype
)
12675 SHARED_REGION_TRACE_DEBUG(
12676 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
12677 (void *)VM_KERNEL_ADDRPERM(current_task()),
12678 (void *)VM_KERNEL_ADDRPERM(new_map
),
12679 (void *)VM_KERNEL_ADDRPERM(task
),
12680 (void *)VM_KERNEL_ADDRPERM(fsroot
),
12683 (void) vm_commpage_enter(new_map
, task
, is64bit
);
12684 (void) vm_shared_region_enter(new_map
, task
, is64bit
, fsroot
, cpu
, cpu_subtype
);
12685 SHARED_REGION_TRACE_DEBUG(
12686 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
12687 (void *)VM_KERNEL_ADDRPERM(current_task()),
12688 (void *)VM_KERNEL_ADDRPERM(new_map
),
12689 (void *)VM_KERNEL_ADDRPERM(task
),
12690 (void *)VM_KERNEL_ADDRPERM(fsroot
),
12693 return KERN_SUCCESS
;
12697 * vm_map_lookup_locked:
12699 * Finds the VM object, offset, and
12700 * protection for a given virtual address in the
12701 * specified map, assuming a page fault of the
12704 * Returns the (object, offset, protection) for
12705 * this address, whether it is wired down, and whether
12706 * this map has the only reference to the data in question.
12707 * In order to later verify this lookup, a "version"
12710 * The map MUST be locked by the caller and WILL be
12711 * locked on exit. In order to guarantee the
12712 * existence of the returned object, it is returned
12715 * If a lookup is requested with "write protection"
12716 * specified, the map may be changed to perform virtual
12717 * copying operations, although the data referenced will
12721 vm_map_lookup_locked(
12722 vm_map_t
*var_map
, /* IN/OUT */
12723 vm_map_offset_t vaddr
,
12724 vm_prot_t fault_type
,
12725 int object_lock_type
,
12726 vm_map_version_t
*out_version
, /* OUT */
12727 vm_object_t
*object
, /* OUT */
12728 vm_object_offset_t
*offset
, /* OUT */
12729 vm_prot_t
*out_prot
, /* OUT */
12730 boolean_t
*wired
, /* OUT */
12731 vm_object_fault_info_t fault_info
, /* OUT */
12732 vm_map_t
*real_map
)
12734 vm_map_entry_t entry
;
12735 vm_map_t map
= *var_map
;
12736 vm_map_t old_map
= *var_map
;
12737 vm_map_t cow_sub_map_parent
= VM_MAP_NULL
;
12738 vm_map_offset_t cow_parent_vaddr
= 0;
12739 vm_map_offset_t old_start
= 0;
12740 vm_map_offset_t old_end
= 0;
12742 boolean_t mask_protections
;
12743 boolean_t force_copy
;
12744 vm_prot_t original_fault_type
;
12747 * VM_PROT_MASK means that the caller wants us to use "fault_type"
12748 * as a mask against the mapping's actual protections, not as an
12751 mask_protections
= (fault_type
& VM_PROT_IS_MASK
) ? TRUE
: FALSE
;
12752 force_copy
= (fault_type
& VM_PROT_COPY
) ? TRUE
: FALSE
;
12753 fault_type
&= VM_PROT_ALL
;
12754 original_fault_type
= fault_type
;
12759 fault_type
= original_fault_type
;
12762 * If the map has an interesting hint, try it before calling
12763 * full blown lookup routine.
12767 if ((entry
== vm_map_to_entry(map
)) ||
12768 (vaddr
< entry
->vme_start
) || (vaddr
>= entry
->vme_end
)) {
12769 vm_map_entry_t tmp_entry
;
12772 * Entry was either not a valid hint, or the vaddr
12773 * was not contained in the entry, so do a full lookup.
12775 if (!vm_map_lookup_entry(map
, vaddr
, &tmp_entry
)) {
12776 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
12777 vm_map_unlock(cow_sub_map_parent
);
12779 if ((*real_map
!= map
)
12780 && (*real_map
!= cow_sub_map_parent
)) {
12781 vm_map_unlock(*real_map
);
12783 return KERN_INVALID_ADDRESS
;
12788 if (map
== old_map
) {
12789 old_start
= entry
->vme_start
;
12790 old_end
= entry
->vme_end
;
12794 * Handle submaps. Drop lock on upper map, submap is
12799 if (entry
->is_sub_map
) {
12800 vm_map_offset_t local_vaddr
;
12801 vm_map_offset_t end_delta
;
12802 vm_map_offset_t start_delta
;
12803 vm_map_entry_t submap_entry
;
12804 vm_prot_t subentry_protection
;
12805 vm_prot_t subentry_max_protection
;
12806 boolean_t subentry_no_copy_on_read
;
12807 boolean_t mapped_needs_copy
= FALSE
;
12809 local_vaddr
= vaddr
;
12811 if ((entry
->use_pmap
&&
12812 !((fault_type
& VM_PROT_WRITE
) ||
12814 /* if real_map equals map we unlock below */
12815 if ((*real_map
!= map
) &&
12816 (*real_map
!= cow_sub_map_parent
)) {
12817 vm_map_unlock(*real_map
);
12819 *real_map
= VME_SUBMAP(entry
);
12822 if (entry
->needs_copy
&&
12823 ((fault_type
& VM_PROT_WRITE
) ||
12825 if (!mapped_needs_copy
) {
12826 if (vm_map_lock_read_to_write(map
)) {
12827 vm_map_lock_read(map
);
12831 vm_map_lock_read(VME_SUBMAP(entry
));
12832 *var_map
= VME_SUBMAP(entry
);
12833 cow_sub_map_parent
= map
;
12834 /* reset base to map before cow object */
12835 /* this is the map which will accept */
12836 /* the new cow object */
12837 old_start
= entry
->vme_start
;
12838 old_end
= entry
->vme_end
;
12839 cow_parent_vaddr
= vaddr
;
12840 mapped_needs_copy
= TRUE
;
12842 vm_map_lock_read(VME_SUBMAP(entry
));
12843 *var_map
= VME_SUBMAP(entry
);
12844 if ((cow_sub_map_parent
!= map
) &&
12845 (*real_map
!= map
)) {
12846 vm_map_unlock(map
);
12850 vm_map_lock_read(VME_SUBMAP(entry
));
12851 *var_map
= VME_SUBMAP(entry
);
12852 /* leave map locked if it is a target */
12853 /* cow sub_map above otherwise, just */
12854 /* follow the maps down to the object */
12855 /* here we unlock knowing we are not */
12856 /* revisiting the map. */
12857 if ((*real_map
!= map
) && (map
!= cow_sub_map_parent
)) {
12858 vm_map_unlock_read(map
);
12864 /* calculate the offset in the submap for vaddr */
12865 local_vaddr
= (local_vaddr
- entry
->vme_start
) + VME_OFFSET(entry
);
12868 if (!vm_map_lookup_entry(map
, local_vaddr
, &submap_entry
)) {
12869 if ((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)) {
12870 vm_map_unlock(cow_sub_map_parent
);
12872 if ((*real_map
!= map
)
12873 && (*real_map
!= cow_sub_map_parent
)) {
12874 vm_map_unlock(*real_map
);
12877 return KERN_INVALID_ADDRESS
;
12880 /* find the attenuated shadow of the underlying object */
12881 /* on our target map */
12883 /* in english the submap object may extend beyond the */
12884 /* region mapped by the entry or, may only fill a portion */
12885 /* of it. For our purposes, we only care if the object */
12886 /* doesn't fill. In this case the area which will */
12887 /* ultimately be clipped in the top map will only need */
12888 /* to be as big as the portion of the underlying entry */
12889 /* which is mapped */
12890 start_delta
= submap_entry
->vme_start
> VME_OFFSET(entry
) ?
12891 submap_entry
->vme_start
- VME_OFFSET(entry
) : 0;
12894 (VME_OFFSET(entry
) + start_delta
+ (old_end
- old_start
)) <=
12895 submap_entry
->vme_end
?
12896 0 : (VME_OFFSET(entry
) +
12897 (old_end
- old_start
))
12898 - submap_entry
->vme_end
;
12900 old_start
+= start_delta
;
12901 old_end
-= end_delta
;
12903 if (submap_entry
->is_sub_map
) {
12904 entry
= submap_entry
;
12905 vaddr
= local_vaddr
;
12906 goto submap_recurse
;
12909 if (((fault_type
& VM_PROT_WRITE
) ||
12911 && cow_sub_map_parent
) {
12912 vm_object_t sub_object
, copy_object
;
12913 vm_object_offset_t copy_offset
;
12914 vm_map_offset_t local_start
;
12915 vm_map_offset_t local_end
;
12916 boolean_t copied_slowly
= FALSE
;
12918 if (vm_map_lock_read_to_write(map
)) {
12919 vm_map_lock_read(map
);
12920 old_start
-= start_delta
;
12921 old_end
+= end_delta
;
12926 sub_object
= VME_OBJECT(submap_entry
);
12927 if (sub_object
== VM_OBJECT_NULL
) {
12929 vm_object_allocate(
12931 (submap_entry
->vme_end
-
12932 submap_entry
->vme_start
));
12933 VME_OBJECT_SET(submap_entry
, sub_object
);
12934 VME_OFFSET_SET(submap_entry
, 0);
12935 assert(!submap_entry
->is_sub_map
);
12936 assert(submap_entry
->use_pmap
);
12938 local_start
= local_vaddr
-
12939 (cow_parent_vaddr
- old_start
);
12940 local_end
= local_vaddr
+
12941 (old_end
- cow_parent_vaddr
);
12942 vm_map_clip_start(map
, submap_entry
, local_start
);
12943 vm_map_clip_end(map
, submap_entry
, local_end
);
12944 if (submap_entry
->is_sub_map
) {
12945 /* unnesting was done when clipping */
12946 assert(!submap_entry
->use_pmap
);
12949 /* This is the COW case, lets connect */
12950 /* an entry in our space to the underlying */
12951 /* object in the submap, bypassing the */
12955 if (submap_entry
->wired_count
!= 0 ||
12956 (sub_object
->copy_strategy
==
12957 MEMORY_OBJECT_COPY_NONE
)) {
12958 vm_object_lock(sub_object
);
12959 vm_object_copy_slowly(sub_object
,
12960 VME_OFFSET(submap_entry
),
12961 (submap_entry
->vme_end
-
12962 submap_entry
->vme_start
),
12965 copied_slowly
= TRUE
;
12967 /* set up shadow object */
12968 copy_object
= sub_object
;
12969 vm_object_lock(sub_object
);
12970 vm_object_reference_locked(sub_object
);
12971 sub_object
->shadowed
= TRUE
;
12972 vm_object_unlock(sub_object
);
12974 assert(submap_entry
->wired_count
== 0);
12975 submap_entry
->needs_copy
= TRUE
;
12977 prot
= submap_entry
->protection
;
12978 assert(!pmap_has_prot_policy(prot
));
12979 prot
= prot
& ~VM_PROT_WRITE
;
12980 assert(!pmap_has_prot_policy(prot
));
12982 if (override_nx(old_map
,
12983 VME_ALIAS(submap_entry
))
12985 prot
|= VM_PROT_EXECUTE
;
12988 vm_object_pmap_protect(
12990 VME_OFFSET(submap_entry
),
12991 submap_entry
->vme_end
-
12992 submap_entry
->vme_start
,
12993 (submap_entry
->is_shared
12994 || map
->mapped_in_other_pmaps
) ?
12995 PMAP_NULL
: map
->pmap
,
12996 submap_entry
->vme_start
,
13001 * Adjust the fault offset to the submap entry.
13003 copy_offset
= (local_vaddr
-
13004 submap_entry
->vme_start
+
13005 VME_OFFSET(submap_entry
));
13007 /* This works diffently than the */
13008 /* normal submap case. We go back */
13009 /* to the parent of the cow map and*/
13010 /* clip out the target portion of */
13011 /* the sub_map, substituting the */
13012 /* new copy object, */
13014 subentry_protection
= submap_entry
->protection
;
13015 subentry_max_protection
= submap_entry
->max_protection
;
13016 subentry_no_copy_on_read
= submap_entry
->vme_no_copy_on_read
;
13017 vm_map_unlock(map
);
13018 submap_entry
= NULL
; /* not valid after map unlock */
13020 local_start
= old_start
;
13021 local_end
= old_end
;
13022 map
= cow_sub_map_parent
;
13023 *var_map
= cow_sub_map_parent
;
13024 vaddr
= cow_parent_vaddr
;
13025 cow_sub_map_parent
= NULL
;
13027 if (!vm_map_lookup_entry(map
,
13029 vm_object_deallocate(
13031 vm_map_lock_write_to_read(map
);
13032 return KERN_INVALID_ADDRESS
;
13035 /* clip out the portion of space */
13036 /* mapped by the sub map which */
13037 /* corresponds to the underlying */
13041 * Clip (and unnest) the smallest nested chunk
13042 * possible around the faulting address...
13044 local_start
= vaddr
& ~(pmap_nesting_size_min
- 1);
13045 local_end
= local_start
+ pmap_nesting_size_min
;
13047 * ... but don't go beyond the "old_start" to "old_end"
13048 * range, to avoid spanning over another VM region
13049 * with a possibly different VM object and/or offset.
13051 if (local_start
< old_start
) {
13052 local_start
= old_start
;
13054 if (local_end
> old_end
) {
13055 local_end
= old_end
;
13058 * Adjust copy_offset to the start of the range.
13060 copy_offset
-= (vaddr
- local_start
);
13062 vm_map_clip_start(map
, entry
, local_start
);
13063 vm_map_clip_end(map
, entry
, local_end
);
13064 if (entry
->is_sub_map
) {
13065 /* unnesting was done when clipping */
13066 assert(!entry
->use_pmap
);
13069 /* substitute copy object for */
13070 /* shared map entry */
13071 vm_map_deallocate(VME_SUBMAP(entry
));
13072 assert(!entry
->iokit_acct
);
13073 entry
->is_sub_map
= FALSE
;
13074 entry
->use_pmap
= TRUE
;
13075 VME_OBJECT_SET(entry
, copy_object
);
13077 /* propagate the submap entry's protections */
13078 if (entry
->protection
!= VM_PROT_READ
) {
13080 * Someone has already altered the top entry's
13081 * protections via vm_protect(VM_PROT_COPY).
13082 * Respect these new values and ignore the
13083 * submap entry's protections.
13087 * Regular copy-on-write: propagate the submap
13088 * entry's protections to the top map entry.
13090 entry
->protection
|= subentry_protection
;
13092 entry
->max_protection
|= subentry_max_protection
;
13093 /* propagate no_copy_on_read */
13094 entry
->vme_no_copy_on_read
= subentry_no_copy_on_read
;
13096 if ((entry
->protection
& VM_PROT_WRITE
) &&
13097 (entry
->protection
& VM_PROT_EXECUTE
) &&
13098 #if !CONFIG_EMBEDDED
13099 map
!= kernel_map
&&
13100 cs_process_enforcement(NULL
) &&
13101 #endif /* !CONFIG_EMBEDDED */
13102 !(entry
->used_for_jit
)) {
13104 uint64_t, (uint64_t)entry
->vme_start
,
13105 uint64_t, (uint64_t)entry
->vme_end
,
13106 vm_prot_t
, entry
->protection
);
13107 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13109 (current_task()->bsd_info
13110 ? proc_name_address(current_task()->bsd_info
)
13113 entry
->protection
&= ~VM_PROT_EXECUTE
;
13116 if (copied_slowly
) {
13117 VME_OFFSET_SET(entry
, local_start
- old_start
);
13118 entry
->needs_copy
= FALSE
;
13119 entry
->is_shared
= FALSE
;
13121 VME_OFFSET_SET(entry
, copy_offset
);
13122 assert(entry
->wired_count
== 0);
13123 entry
->needs_copy
= TRUE
;
13124 if (entry
->inheritance
== VM_INHERIT_SHARE
) {
13125 entry
->inheritance
= VM_INHERIT_COPY
;
13127 if (map
!= old_map
) {
13128 entry
->is_shared
= TRUE
;
13131 if (entry
->inheritance
== VM_INHERIT_SHARE
) {
13132 entry
->inheritance
= VM_INHERIT_COPY
;
13135 vm_map_lock_write_to_read(map
);
13137 if ((cow_sub_map_parent
)
13138 && (cow_sub_map_parent
!= *real_map
)
13139 && (cow_sub_map_parent
!= map
)) {
13140 vm_map_unlock(cow_sub_map_parent
);
13142 entry
= submap_entry
;
13143 vaddr
= local_vaddr
;
13148 * Check whether this task is allowed to have
13152 prot
= entry
->protection
;
13154 if (override_nx(old_map
, VME_ALIAS(entry
)) && prot
) {
13156 * HACK -- if not a stack, then allow execution
13158 prot
|= VM_PROT_EXECUTE
;
13161 if (mask_protections
) {
13162 fault_type
&= prot
;
13163 if (fault_type
== VM_PROT_NONE
) {
13164 goto protection_failure
;
13167 if (((fault_type
& prot
) != fault_type
)
13169 /* prefetch abort in execute-only page */
13170 && !(prot
== VM_PROT_EXECUTE
&& fault_type
== (VM_PROT_READ
| VM_PROT_EXECUTE
))
13173 protection_failure
:
13174 if (*real_map
!= map
) {
13175 vm_map_unlock(*real_map
);
13179 if ((fault_type
& VM_PROT_EXECUTE
) && prot
) {
13180 log_stack_execution_failure((addr64_t
)vaddr
, prot
);
13183 DTRACE_VM2(prot_fault
, int, 1, (uint64_t *), NULL
);
13184 return KERN_PROTECTION_FAILURE
;
13188 * If this page is not pageable, we have to get
13189 * it for all possible accesses.
13192 *wired
= (entry
->wired_count
!= 0);
13198 * If the entry was copy-on-write, we either ...
13201 if (entry
->needs_copy
) {
13203 * If we want to write the page, we may as well
13204 * handle that now since we've got the map locked.
13206 * If we don't need to write the page, we just
13207 * demote the permissions allowed.
13210 if ((fault_type
& VM_PROT_WRITE
) || *wired
|| force_copy
) {
13212 * Make a new object, and place it in the
13213 * object chain. Note that no new references
13214 * have appeared -- one just moved from the
13215 * map to the new object.
13218 if (vm_map_lock_read_to_write(map
)) {
13219 vm_map_lock_read(map
);
13223 if (VME_OBJECT(entry
)->shadowed
== FALSE
) {
13224 vm_object_lock(VME_OBJECT(entry
));
13225 VME_OBJECT(entry
)->shadowed
= TRUE
;
13226 vm_object_unlock(VME_OBJECT(entry
));
13228 VME_OBJECT_SHADOW(entry
,
13229 (vm_map_size_t
) (entry
->vme_end
-
13230 entry
->vme_start
));
13231 entry
->needs_copy
= FALSE
;
13233 vm_map_lock_write_to_read(map
);
13235 if ((fault_type
& VM_PROT_WRITE
) == 0 && *wired
== 0) {
13237 * We're attempting to read a copy-on-write
13238 * page -- don't allow writes.
13241 prot
&= (~VM_PROT_WRITE
);
13246 * Create an object if necessary.
13248 if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
13249 if (vm_map_lock_read_to_write(map
)) {
13250 vm_map_lock_read(map
);
13254 VME_OBJECT_SET(entry
,
13255 vm_object_allocate(
13256 (vm_map_size_t
)(entry
->vme_end
-
13257 entry
->vme_start
)));
13258 VME_OFFSET_SET(entry
, 0);
13259 assert(entry
->use_pmap
);
13260 vm_map_lock_write_to_read(map
);
13264 * Return the object/offset from this entry. If the entry
13265 * was copy-on-write or empty, it has been fixed up. Also
13266 * return the protection.
13269 *offset
= (vaddr
- entry
->vme_start
) + VME_OFFSET(entry
);
13270 *object
= VME_OBJECT(entry
);
13272 KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET
, VM_MAP_LOOKUP_OBJECT
), VM_KERNEL_UNSLIDE_OR_PERM(*object
), 0, 0, 0, 0);
13275 fault_info
->interruptible
= THREAD_UNINT
; /* for now... */
13276 /* ... the caller will change "interruptible" if needed */
13277 fault_info
->cluster_size
= 0;
13278 fault_info
->user_tag
= VME_ALIAS(entry
);
13279 fault_info
->pmap_options
= 0;
13280 if (entry
->iokit_acct
||
13281 (!entry
->is_sub_map
&& !entry
->use_pmap
)) {
13282 fault_info
->pmap_options
|= PMAP_OPTIONS_ALT_ACCT
;
13284 fault_info
->behavior
= entry
->behavior
;
13285 fault_info
->lo_offset
= VME_OFFSET(entry
);
13286 fault_info
->hi_offset
=
13287 (entry
->vme_end
- entry
->vme_start
) + VME_OFFSET(entry
);
13288 fault_info
->no_cache
= entry
->no_cache
;
13289 fault_info
->stealth
= FALSE
;
13290 fault_info
->io_sync
= FALSE
;
13291 if (entry
->used_for_jit
||
13292 entry
->vme_resilient_codesign
) {
13293 fault_info
->cs_bypass
= TRUE
;
13295 fault_info
->cs_bypass
= FALSE
;
13297 fault_info
->pmap_cs_associated
= FALSE
;
13299 if (entry
->pmap_cs_associated
) {
13301 * The pmap layer will validate this page
13302 * before allowing it to be executed from.
13304 fault_info
->pmap_cs_associated
= TRUE
;
13306 #endif /* CONFIG_PMAP_CS */
13307 fault_info
->mark_zf_absent
= FALSE
;
13308 fault_info
->batch_pmap_op
= FALSE
;
13309 fault_info
->resilient_media
= entry
->vme_resilient_media
;
13310 fault_info
->no_copy_on_read
= entry
->vme_no_copy_on_read
;
13314 * Lock the object to prevent it from disappearing
13316 if (object_lock_type
== OBJECT_LOCK_EXCLUSIVE
) {
13317 vm_object_lock(*object
);
13319 vm_object_lock_shared(*object
);
13323 * Save the version number
13326 out_version
->main_timestamp
= map
->timestamp
;
13328 return KERN_SUCCESS
;
13335 * Verifies that the map in question has not changed
13336 * since the given version. The map has to be locked
13337 * ("shared" mode is fine) before calling this function
13338 * and it will be returned locked too.
13343 vm_map_version_t
*version
) /* REF */
13347 vm_map_lock_assert_held(map
);
13348 result
= (map
->timestamp
== version
->main_timestamp
);
13354 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
13355 * Goes away after regular vm_region_recurse function migrates to
13357 * vm_region_recurse: A form of vm_region which follows the
13358 * submaps in a target map
13363 vm_map_region_recurse_64(
13365 vm_map_offset_t
*address
, /* IN/OUT */
13366 vm_map_size_t
*size
, /* OUT */
13367 natural_t
*nesting_depth
, /* IN/OUT */
13368 vm_region_submap_info_64_t submap_info
, /* IN/OUT */
13369 mach_msg_type_number_t
*count
) /* IN/OUT */
13371 mach_msg_type_number_t original_count
;
13372 vm_region_extended_info_data_t extended
;
13373 vm_map_entry_t tmp_entry
;
13374 vm_map_offset_t user_address
;
13375 unsigned int user_max_depth
;
13378 * "curr_entry" is the VM map entry preceding or including the
13379 * address we're looking for.
13380 * "curr_map" is the map or sub-map containing "curr_entry".
13381 * "curr_address" is the equivalent of the top map's "user_address"
13382 * in the current map.
13383 * "curr_offset" is the cumulated offset of "curr_map" in the
13384 * target task's address space.
13385 * "curr_depth" is the depth of "curr_map" in the chain of
13388 * "curr_max_below" and "curr_max_above" limit the range (around
13389 * "curr_address") we should take into account in the current (sub)map.
13390 * They limit the range to what's visible through the map entries
13391 * we've traversed from the top map to the current map.
13394 vm_map_entry_t curr_entry
;
13395 vm_map_address_t curr_address
;
13396 vm_map_offset_t curr_offset
;
13398 unsigned int curr_depth
;
13399 vm_map_offset_t curr_max_below
, curr_max_above
;
13400 vm_map_offset_t curr_skip
;
13403 * "next_" is the same as "curr_" but for the VM region immediately
13404 * after the address we're looking for. We need to keep track of this
13405 * too because we want to return info about that region if the
13406 * address we're looking for is not mapped.
13408 vm_map_entry_t next_entry
;
13409 vm_map_offset_t next_offset
;
13410 vm_map_offset_t next_address
;
13412 unsigned int next_depth
;
13413 vm_map_offset_t next_max_below
, next_max_above
;
13414 vm_map_offset_t next_skip
;
13416 boolean_t look_for_pages
;
13417 vm_region_submap_short_info_64_t short_info
;
13418 boolean_t do_region_footprint
;
13420 if (map
== VM_MAP_NULL
) {
13421 /* no address space to work on */
13422 return KERN_INVALID_ARGUMENT
;
13426 if (*count
< VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
) {
13428 * "info" structure is not big enough and
13431 return KERN_INVALID_ARGUMENT
;
13434 do_region_footprint
= task_self_region_footprint();
13435 original_count
= *count
;
13437 if (original_count
< VM_REGION_SUBMAP_INFO_V0_COUNT_64
) {
13438 *count
= VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
;
13439 look_for_pages
= FALSE
;
13440 short_info
= (vm_region_submap_short_info_64_t
) submap_info
;
13441 submap_info
= NULL
;
13443 look_for_pages
= TRUE
;
13444 *count
= VM_REGION_SUBMAP_INFO_V0_COUNT_64
;
13447 if (original_count
>= VM_REGION_SUBMAP_INFO_V1_COUNT_64
) {
13448 *count
= VM_REGION_SUBMAP_INFO_V1_COUNT_64
;
13450 if (original_count
>= VM_REGION_SUBMAP_INFO_V2_COUNT_64
) {
13451 *count
= VM_REGION_SUBMAP_INFO_V2_COUNT_64
;
13455 user_address
= *address
;
13456 user_max_depth
= *nesting_depth
;
13459 vm_map_lock_read(map
);
13465 curr_address
= user_address
;
13469 curr_max_above
= ((vm_map_offset_t
) -1) - curr_address
;
13470 curr_max_below
= curr_address
;
13478 next_max_above
= (vm_map_offset_t
) -1;
13479 next_max_below
= (vm_map_offset_t
) -1;
13482 if (vm_map_lookup_entry(curr_map
,
13485 /* tmp_entry contains the address we're looking for */
13486 curr_entry
= tmp_entry
;
13488 vm_map_offset_t skip
;
13490 * The address is not mapped. "tmp_entry" is the
13491 * map entry preceding the address. We want the next
13492 * one, if it exists.
13494 curr_entry
= tmp_entry
->vme_next
;
13496 if (curr_entry
== vm_map_to_entry(curr_map
) ||
13497 (curr_entry
->vme_start
>=
13498 curr_address
+ curr_max_above
)) {
13499 /* no next entry at this level: stop looking */
13501 vm_map_unlock_read(curr_map
);
13508 curr_max_above
= 0;
13509 curr_max_below
= 0;
13513 /* adjust current address and offset */
13514 skip
= curr_entry
->vme_start
- curr_address
;
13515 curr_address
= curr_entry
->vme_start
;
13517 curr_offset
+= skip
;
13518 curr_max_above
-= skip
;
13519 curr_max_below
= 0;
13523 * Is the next entry at this level closer to the address (or
13524 * deeper in the submap chain) than the one we had
13527 tmp_entry
= curr_entry
->vme_next
;
13528 if (tmp_entry
== vm_map_to_entry(curr_map
)) {
13529 /* no next entry at this level */
13530 } else if (tmp_entry
->vme_start
>=
13531 curr_address
+ curr_max_above
) {
13533 * tmp_entry is beyond the scope of what we mapped of
13534 * this submap in the upper level: ignore it.
13536 } else if ((next_entry
== NULL
) ||
13537 (tmp_entry
->vme_start
+ curr_offset
<=
13538 next_entry
->vme_start
+ next_offset
)) {
13540 * We didn't have a "next_entry" or this one is
13541 * closer to the address we're looking for:
13542 * use this "tmp_entry" as the new "next_entry".
13544 if (next_entry
!= NULL
) {
13545 /* unlock the last "next_map" */
13546 if (next_map
!= curr_map
&& not_in_kdp
) {
13547 vm_map_unlock_read(next_map
);
13550 next_entry
= tmp_entry
;
13551 next_map
= curr_map
;
13552 next_depth
= curr_depth
;
13553 next_address
= next_entry
->vme_start
;
13554 next_skip
= curr_skip
;
13555 next_skip
+= (next_address
- curr_address
);
13556 next_offset
= curr_offset
;
13557 next_offset
+= (next_address
- curr_address
);
13558 next_max_above
= MIN(next_max_above
, curr_max_above
);
13559 next_max_above
= MIN(next_max_above
,
13560 next_entry
->vme_end
- next_address
);
13561 next_max_below
= MIN(next_max_below
, curr_max_below
);
13562 next_max_below
= MIN(next_max_below
,
13563 next_address
- next_entry
->vme_start
);
13567 * "curr_max_{above,below}" allow us to keep track of the
13568 * portion of the submap that is actually mapped at this level:
13569 * the rest of that submap is irrelevant to us, since it's not
13571 * The relevant portion of the map starts at
13572 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
13574 curr_max_above
= MIN(curr_max_above
,
13575 curr_entry
->vme_end
- curr_address
);
13576 curr_max_below
= MIN(curr_max_below
,
13577 curr_address
- curr_entry
->vme_start
);
13579 if (!curr_entry
->is_sub_map
||
13580 curr_depth
>= user_max_depth
) {
13582 * We hit a leaf map or we reached the maximum depth
13583 * we could, so stop looking. Keep the current map
13590 * Get down to the next submap level.
13594 * Lock the next level and unlock the current level,
13595 * unless we need to keep it locked to access the "next_entry"
13599 vm_map_lock_read(VME_SUBMAP(curr_entry
));
13601 if (curr_map
== next_map
) {
13602 /* keep "next_map" locked in case we need it */
13604 /* release this map */
13606 vm_map_unlock_read(curr_map
);
13611 * Adjust the offset. "curr_entry" maps the submap
13612 * at relative address "curr_entry->vme_start" in the
13613 * curr_map but skips the first "VME_OFFSET(curr_entry)"
13614 * bytes of the submap.
13615 * "curr_offset" always represents the offset of a virtual
13616 * address in the curr_map relative to the absolute address
13617 * space (i.e. the top-level VM map).
13620 (VME_OFFSET(curr_entry
) - curr_entry
->vme_start
);
13621 curr_address
= user_address
+ curr_offset
;
13622 /* switch to the submap */
13623 curr_map
= VME_SUBMAP(curr_entry
);
13628 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13629 // so probably should be a real 32b ID vs. ptr.
13630 // Current users just check for equality
13632 if (curr_entry
== NULL
) {
13633 /* no VM region contains the address... */
13635 if (do_region_footprint
&& /* we want footprint numbers */
13636 next_entry
== NULL
&& /* & there are no more regions */
13637 /* & we haven't already provided our fake region: */
13638 user_address
<= vm_map_last_entry(map
)->vme_end
) {
13639 ledger_amount_t ledger_resident
, ledger_compressed
;
13642 * Add a fake memory region to account for
13643 * purgeable and/or ledger-tagged memory that
13644 * counts towards this task's memory footprint,
13645 * i.e. the resident/compressed pages of non-volatile
13646 * objects owned by that task.
13648 task_ledgers_footprint(map
->pmap
->ledger
,
13650 &ledger_compressed
);
13651 if (ledger_resident
+ ledger_compressed
== 0) {
13652 /* no purgeable memory usage to report */
13653 return KERN_INVALID_ADDRESS
;
13655 /* fake region to show nonvolatile footprint */
13656 if (look_for_pages
) {
13657 submap_info
->protection
= VM_PROT_DEFAULT
;
13658 submap_info
->max_protection
= VM_PROT_DEFAULT
;
13659 submap_info
->inheritance
= VM_INHERIT_DEFAULT
;
13660 submap_info
->offset
= 0;
13661 submap_info
->user_tag
= -1;
13662 submap_info
->pages_resident
= (unsigned int) (ledger_resident
/ PAGE_SIZE
);
13663 submap_info
->pages_shared_now_private
= 0;
13664 submap_info
->pages_swapped_out
= (unsigned int) (ledger_compressed
/ PAGE_SIZE
);
13665 submap_info
->pages_dirtied
= submap_info
->pages_resident
;
13666 submap_info
->ref_count
= 1;
13667 submap_info
->shadow_depth
= 0;
13668 submap_info
->external_pager
= 0;
13669 submap_info
->share_mode
= SM_PRIVATE
;
13670 submap_info
->is_submap
= 0;
13671 submap_info
->behavior
= VM_BEHAVIOR_DEFAULT
;
13672 submap_info
->object_id
= INFO_MAKE_FAKE_OBJECT_ID(map
, task_ledgers
.purgeable_nonvolatile
);
13673 submap_info
->user_wired_count
= 0;
13674 submap_info
->pages_reusable
= 0;
13676 short_info
->user_tag
= -1;
13677 short_info
->offset
= 0;
13678 short_info
->protection
= VM_PROT_DEFAULT
;
13679 short_info
->inheritance
= VM_INHERIT_DEFAULT
;
13680 short_info
->max_protection
= VM_PROT_DEFAULT
;
13681 short_info
->behavior
= VM_BEHAVIOR_DEFAULT
;
13682 short_info
->user_wired_count
= 0;
13683 short_info
->is_submap
= 0;
13684 short_info
->object_id
= INFO_MAKE_FAKE_OBJECT_ID(map
, task_ledgers
.purgeable_nonvolatile
);
13685 short_info
->external_pager
= 0;
13686 short_info
->shadow_depth
= 0;
13687 short_info
->share_mode
= SM_PRIVATE
;
13688 short_info
->ref_count
= 1;
13690 *nesting_depth
= 0;
13691 *size
= (vm_map_size_t
) (ledger_resident
+ ledger_compressed
);
13692 // *address = user_address;
13693 *address
= vm_map_last_entry(map
)->vme_end
;
13694 return KERN_SUCCESS
;
13697 if (next_entry
== NULL
) {
13698 /* ... and no VM region follows it either */
13699 return KERN_INVALID_ADDRESS
;
13701 /* ... gather info about the next VM region */
13702 curr_entry
= next_entry
;
13703 curr_map
= next_map
; /* still locked ... */
13704 curr_address
= next_address
;
13705 curr_skip
= next_skip
;
13706 curr_offset
= next_offset
;
13707 curr_depth
= next_depth
;
13708 curr_max_above
= next_max_above
;
13709 curr_max_below
= next_max_below
;
13711 /* we won't need "next_entry" after all */
13712 if (next_entry
!= NULL
) {
13713 /* release "next_map" */
13714 if (next_map
!= curr_map
&& not_in_kdp
) {
13715 vm_map_unlock_read(next_map
);
13724 next_max_below
= -1;
13725 next_max_above
= -1;
13727 if (curr_entry
->is_sub_map
&&
13728 curr_depth
< user_max_depth
) {
13730 * We're not as deep as we could be: we must have
13731 * gone back up after not finding anything mapped
13732 * below the original top-level map entry's.
13733 * Let's move "curr_address" forward and recurse again.
13735 user_address
= curr_address
;
13736 goto recurse_again
;
13739 *nesting_depth
= curr_depth
;
13740 *size
= curr_max_above
+ curr_max_below
;
13741 *address
= user_address
+ curr_skip
- curr_max_below
;
13743 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13744 // so probably should be a real 32b ID vs. ptr.
13745 // Current users just check for equality
13746 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
13748 if (look_for_pages
) {
13749 submap_info
->user_tag
= VME_ALIAS(curr_entry
);
13750 submap_info
->offset
= VME_OFFSET(curr_entry
);
13751 submap_info
->protection
= curr_entry
->protection
;
13752 submap_info
->inheritance
= curr_entry
->inheritance
;
13753 submap_info
->max_protection
= curr_entry
->max_protection
;
13754 submap_info
->behavior
= curr_entry
->behavior
;
13755 submap_info
->user_wired_count
= curr_entry
->user_wired_count
;
13756 submap_info
->is_submap
= curr_entry
->is_sub_map
;
13757 submap_info
->object_id
= INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry
));
13759 short_info
->user_tag
= VME_ALIAS(curr_entry
);
13760 short_info
->offset
= VME_OFFSET(curr_entry
);
13761 short_info
->protection
= curr_entry
->protection
;
13762 short_info
->inheritance
= curr_entry
->inheritance
;
13763 short_info
->max_protection
= curr_entry
->max_protection
;
13764 short_info
->behavior
= curr_entry
->behavior
;
13765 short_info
->user_wired_count
= curr_entry
->user_wired_count
;
13766 short_info
->is_submap
= curr_entry
->is_sub_map
;
13767 short_info
->object_id
= INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry
));
13770 extended
.pages_resident
= 0;
13771 extended
.pages_swapped_out
= 0;
13772 extended
.pages_shared_now_private
= 0;
13773 extended
.pages_dirtied
= 0;
13774 extended
.pages_reusable
= 0;
13775 extended
.external_pager
= 0;
13776 extended
.shadow_depth
= 0;
13777 extended
.share_mode
= SM_EMPTY
;
13778 extended
.ref_count
= 0;
13781 if (!curr_entry
->is_sub_map
) {
13782 vm_map_offset_t range_start
, range_end
;
13783 range_start
= MAX((curr_address
- curr_max_below
),
13784 curr_entry
->vme_start
);
13785 range_end
= MIN((curr_address
+ curr_max_above
),
13786 curr_entry
->vme_end
);
13787 vm_map_region_walk(curr_map
,
13790 (VME_OFFSET(curr_entry
) +
13792 curr_entry
->vme_start
)),
13793 range_end
- range_start
,
13795 look_for_pages
, VM_REGION_EXTENDED_INFO_COUNT
);
13796 if (extended
.external_pager
&&
13797 extended
.ref_count
== 2 &&
13798 extended
.share_mode
== SM_SHARED
) {
13799 extended
.share_mode
= SM_PRIVATE
;
13802 if (curr_entry
->use_pmap
) {
13803 extended
.share_mode
= SM_TRUESHARED
;
13805 extended
.share_mode
= SM_PRIVATE
;
13807 extended
.ref_count
= os_ref_get_count(&VME_SUBMAP(curr_entry
)->map_refcnt
);
13811 if (look_for_pages
) {
13812 submap_info
->pages_resident
= extended
.pages_resident
;
13813 submap_info
->pages_swapped_out
= extended
.pages_swapped_out
;
13814 submap_info
->pages_shared_now_private
=
13815 extended
.pages_shared_now_private
;
13816 submap_info
->pages_dirtied
= extended
.pages_dirtied
;
13817 submap_info
->external_pager
= extended
.external_pager
;
13818 submap_info
->shadow_depth
= extended
.shadow_depth
;
13819 submap_info
->share_mode
= extended
.share_mode
;
13820 submap_info
->ref_count
= extended
.ref_count
;
13822 if (original_count
>= VM_REGION_SUBMAP_INFO_V1_COUNT_64
) {
13823 submap_info
->pages_reusable
= extended
.pages_reusable
;
13825 if (original_count
>= VM_REGION_SUBMAP_INFO_V2_COUNT_64
) {
13826 submap_info
->object_id_full
= (vm_object_id_t
) (VME_OBJECT(curr_entry
) != NULL
) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry
)) : 0ULL;
13829 short_info
->external_pager
= extended
.external_pager
;
13830 short_info
->shadow_depth
= extended
.shadow_depth
;
13831 short_info
->share_mode
= extended
.share_mode
;
13832 short_info
->ref_count
= extended
.ref_count
;
13836 vm_map_unlock_read(curr_map
);
13839 return KERN_SUCCESS
;
13845 * User call to obtain information about a region in
13846 * a task's address map. Currently, only one flavor is
13849 * XXX The reserved and behavior fields cannot be filled
13850 * in until the vm merge from the IK is completed, and
13851 * vm_reserve is implemented.
13857 vm_map_offset_t
*address
, /* IN/OUT */
13858 vm_map_size_t
*size
, /* OUT */
13859 vm_region_flavor_t flavor
, /* IN */
13860 vm_region_info_t info
, /* OUT */
13861 mach_msg_type_number_t
*count
, /* IN/OUT */
13862 mach_port_t
*object_name
) /* OUT */
13864 vm_map_entry_t tmp_entry
;
13865 vm_map_entry_t entry
;
13866 vm_map_offset_t start
;
13868 if (map
== VM_MAP_NULL
) {
13869 return KERN_INVALID_ARGUMENT
;
13873 case VM_REGION_BASIC_INFO
:
13874 /* legacy for old 32-bit objects info */
13876 vm_region_basic_info_t basic
;
13878 if (*count
< VM_REGION_BASIC_INFO_COUNT
) {
13879 return KERN_INVALID_ARGUMENT
;
13882 basic
= (vm_region_basic_info_t
) info
;
13883 *count
= VM_REGION_BASIC_INFO_COUNT
;
13885 vm_map_lock_read(map
);
13888 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
13889 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
13890 vm_map_unlock_read(map
);
13891 return KERN_INVALID_ADDRESS
;
13897 start
= entry
->vme_start
;
13899 basic
->offset
= (uint32_t)VME_OFFSET(entry
);
13900 basic
->protection
= entry
->protection
;
13901 basic
->inheritance
= entry
->inheritance
;
13902 basic
->max_protection
= entry
->max_protection
;
13903 basic
->behavior
= entry
->behavior
;
13904 basic
->user_wired_count
= entry
->user_wired_count
;
13905 basic
->reserved
= entry
->is_sub_map
;
13907 *size
= (entry
->vme_end
- start
);
13910 *object_name
= IP_NULL
;
13912 if (entry
->is_sub_map
) {
13913 basic
->shared
= FALSE
;
13915 basic
->shared
= entry
->is_shared
;
13918 vm_map_unlock_read(map
);
13919 return KERN_SUCCESS
;
13922 case VM_REGION_BASIC_INFO_64
:
13924 vm_region_basic_info_64_t basic
;
13926 if (*count
< VM_REGION_BASIC_INFO_COUNT_64
) {
13927 return KERN_INVALID_ARGUMENT
;
13930 basic
= (vm_region_basic_info_64_t
) info
;
13931 *count
= VM_REGION_BASIC_INFO_COUNT_64
;
13933 vm_map_lock_read(map
);
13936 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
13937 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
13938 vm_map_unlock_read(map
);
13939 return KERN_INVALID_ADDRESS
;
13945 start
= entry
->vme_start
;
13947 basic
->offset
= VME_OFFSET(entry
);
13948 basic
->protection
= entry
->protection
;
13949 basic
->inheritance
= entry
->inheritance
;
13950 basic
->max_protection
= entry
->max_protection
;
13951 basic
->behavior
= entry
->behavior
;
13952 basic
->user_wired_count
= entry
->user_wired_count
;
13953 basic
->reserved
= entry
->is_sub_map
;
13955 *size
= (entry
->vme_end
- start
);
13958 *object_name
= IP_NULL
;
13960 if (entry
->is_sub_map
) {
13961 basic
->shared
= FALSE
;
13963 basic
->shared
= entry
->is_shared
;
13966 vm_map_unlock_read(map
);
13967 return KERN_SUCCESS
;
13969 case VM_REGION_EXTENDED_INFO
:
13970 if (*count
< VM_REGION_EXTENDED_INFO_COUNT
) {
13971 return KERN_INVALID_ARGUMENT
;
13974 case VM_REGION_EXTENDED_INFO__legacy
:
13975 if (*count
< VM_REGION_EXTENDED_INFO_COUNT__legacy
) {
13976 return KERN_INVALID_ARGUMENT
;
13980 vm_region_extended_info_t extended
;
13981 mach_msg_type_number_t original_count
;
13983 extended
= (vm_region_extended_info_t
) info
;
13985 vm_map_lock_read(map
);
13988 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
13989 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
13990 vm_map_unlock_read(map
);
13991 return KERN_INVALID_ADDRESS
;
13996 start
= entry
->vme_start
;
13998 extended
->protection
= entry
->protection
;
13999 extended
->user_tag
= VME_ALIAS(entry
);
14000 extended
->pages_resident
= 0;
14001 extended
->pages_swapped_out
= 0;
14002 extended
->pages_shared_now_private
= 0;
14003 extended
->pages_dirtied
= 0;
14004 extended
->external_pager
= 0;
14005 extended
->shadow_depth
= 0;
14007 original_count
= *count
;
14008 if (flavor
== VM_REGION_EXTENDED_INFO__legacy
) {
14009 *count
= VM_REGION_EXTENDED_INFO_COUNT__legacy
;
14011 extended
->pages_reusable
= 0;
14012 *count
= VM_REGION_EXTENDED_INFO_COUNT
;
14015 vm_map_region_walk(map
, start
, entry
, VME_OFFSET(entry
), entry
->vme_end
- start
, extended
, TRUE
, *count
);
14017 if (extended
->external_pager
&& extended
->ref_count
== 2 && extended
->share_mode
== SM_SHARED
) {
14018 extended
->share_mode
= SM_PRIVATE
;
14022 *object_name
= IP_NULL
;
14025 *size
= (entry
->vme_end
- start
);
14027 vm_map_unlock_read(map
);
14028 return KERN_SUCCESS
;
14030 case VM_REGION_TOP_INFO
:
14032 vm_region_top_info_t top
;
14034 if (*count
< VM_REGION_TOP_INFO_COUNT
) {
14035 return KERN_INVALID_ARGUMENT
;
14038 top
= (vm_region_top_info_t
) info
;
14039 *count
= VM_REGION_TOP_INFO_COUNT
;
14041 vm_map_lock_read(map
);
14044 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
14045 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
14046 vm_map_unlock_read(map
);
14047 return KERN_INVALID_ADDRESS
;
14052 start
= entry
->vme_start
;
14054 top
->private_pages_resident
= 0;
14055 top
->shared_pages_resident
= 0;
14057 vm_map_region_top_walk(entry
, top
);
14060 *object_name
= IP_NULL
;
14063 *size
= (entry
->vme_end
- start
);
14065 vm_map_unlock_read(map
);
14066 return KERN_SUCCESS
;
14069 return KERN_INVALID_ARGUMENT
;
14073 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
14074 MIN((entry_size), \
14075 ((obj)->all_reusable ? \
14076 (obj)->wired_page_count : \
14077 (obj)->resident_page_count - (obj)->reusable_page_count))
14080 vm_map_region_top_walk(
14081 vm_map_entry_t entry
,
14082 vm_region_top_info_t top
)
14084 if (VME_OBJECT(entry
) == 0 || entry
->is_sub_map
) {
14085 top
->share_mode
= SM_EMPTY
;
14086 top
->ref_count
= 0;
14092 struct vm_object
*obj
, *tmp_obj
;
14094 uint32_t entry_size
;
14096 entry_size
= (uint32_t) ((entry
->vme_end
- entry
->vme_start
) / PAGE_SIZE_64
);
14098 obj
= VME_OBJECT(entry
);
14100 vm_object_lock(obj
);
14102 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14106 assert(obj
->reusable_page_count
<= obj
->resident_page_count
);
14108 if (ref_count
== 1) {
14109 top
->private_pages_resident
=
14110 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14112 top
->shared_pages_resident
=
14113 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14115 top
->ref_count
= ref_count
;
14116 top
->share_mode
= SM_COW
;
14118 while ((tmp_obj
= obj
->shadow
)) {
14119 vm_object_lock(tmp_obj
);
14120 vm_object_unlock(obj
);
14123 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14127 assert(obj
->reusable_page_count
<= obj
->resident_page_count
);
14128 top
->shared_pages_resident
+=
14129 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14130 top
->ref_count
+= ref_count
- 1;
14133 if (entry
->superpage_size
) {
14134 top
->share_mode
= SM_LARGE_PAGE
;
14135 top
->shared_pages_resident
= 0;
14136 top
->private_pages_resident
= entry_size
;
14137 } else if (entry
->needs_copy
) {
14138 top
->share_mode
= SM_COW
;
14139 top
->shared_pages_resident
=
14140 OBJ_RESIDENT_COUNT(obj
, entry_size
);
14142 if (ref_count
== 1 ||
14143 (ref_count
== 2 && obj
->named
)) {
14144 top
->share_mode
= SM_PRIVATE
;
14145 top
->private_pages_resident
=
14146 OBJ_RESIDENT_COUNT(obj
,
14149 top
->share_mode
= SM_SHARED
;
14150 top
->shared_pages_resident
=
14151 OBJ_RESIDENT_COUNT(obj
,
14155 top
->ref_count
= ref_count
;
14157 /* XXX K64: obj_id will be truncated */
14158 top
->obj_id
= (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj
);
14160 vm_object_unlock(obj
);
14165 vm_map_region_walk(
14167 vm_map_offset_t va
,
14168 vm_map_entry_t entry
,
14169 vm_object_offset_t offset
,
14170 vm_object_size_t range
,
14171 vm_region_extended_info_t extended
,
14172 boolean_t look_for_pages
,
14173 mach_msg_type_number_t count
)
14175 struct vm_object
*obj
, *tmp_obj
;
14176 vm_map_offset_t last_offset
;
14179 struct vm_object
*shadow_object
;
14181 boolean_t do_region_footprint
;
14183 do_region_footprint
= task_self_region_footprint();
14185 if ((VME_OBJECT(entry
) == 0) ||
14186 (entry
->is_sub_map
) ||
14187 (VME_OBJECT(entry
)->phys_contiguous
&&
14188 !entry
->superpage_size
)) {
14189 extended
->share_mode
= SM_EMPTY
;
14190 extended
->ref_count
= 0;
14194 if (entry
->superpage_size
) {
14195 extended
->shadow_depth
= 0;
14196 extended
->share_mode
= SM_LARGE_PAGE
;
14197 extended
->ref_count
= 1;
14198 extended
->external_pager
= 0;
14199 extended
->pages_resident
= (unsigned int)(range
>> PAGE_SHIFT
);
14200 extended
->shadow_depth
= 0;
14204 obj
= VME_OBJECT(entry
);
14206 vm_object_lock(obj
);
14208 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14212 if (look_for_pages
) {
14213 for (last_offset
= offset
+ range
;
14214 offset
< last_offset
;
14215 offset
+= PAGE_SIZE_64
, va
+= PAGE_SIZE
) {
14216 if (do_region_footprint
) {
14220 if (map
->has_corpse_footprint
) {
14222 * Query the page info data we saved
14223 * while forking the corpse.
14225 vm_map_corpse_footprint_query_page_info(
14233 pmap_query_page_info(map
->pmap
,
14237 if (disp
& PMAP_QUERY_PAGE_PRESENT
) {
14238 if (!(disp
& PMAP_QUERY_PAGE_ALTACCT
)) {
14239 extended
->pages_resident
++;
14241 if (disp
& PMAP_QUERY_PAGE_REUSABLE
) {
14242 extended
->pages_reusable
++;
14243 } else if (!(disp
& PMAP_QUERY_PAGE_INTERNAL
) ||
14244 (disp
& PMAP_QUERY_PAGE_ALTACCT
)) {
14245 /* alternate accounting */
14247 extended
->pages_dirtied
++;
14249 } else if (disp
& PMAP_QUERY_PAGE_COMPRESSED
) {
14250 if (disp
& PMAP_QUERY_PAGE_COMPRESSED_ALTACCT
) {
14251 /* alternate accounting */
14253 extended
->pages_swapped_out
++;
14256 /* deal with alternate accounting */
14257 if (obj
->purgable
== VM_PURGABLE_NONVOLATILE
&&
14258 /* && not tagged as no-footprint? */
14259 VM_OBJECT_OWNER(obj
) != NULL
&&
14260 VM_OBJECT_OWNER(obj
)->map
== map
) {
14263 + VME_OFFSET(entry
))
14265 (obj
->resident_page_count
+
14266 vm_compressor_pager_get_count(obj
->pager
)))) {
14268 * Non-volatile purgeable object owned
14269 * by this task: report the first
14270 * "#resident + #compressed" pages as
14271 * "resident" (to show that they
14272 * contribute to the footprint) but not
14273 * "dirty" (to avoid double-counting
14274 * with the fake "non-volatile" region
14275 * we'll report at the end of the
14276 * address space to account for all
14277 * (mapped or not) non-volatile memory
14278 * owned by this task.
14280 extended
->pages_resident
++;
14282 } else if ((obj
->purgable
== VM_PURGABLE_VOLATILE
||
14283 obj
->purgable
== VM_PURGABLE_EMPTY
) &&
14284 /* && not tagged as no-footprint? */
14285 VM_OBJECT_OWNER(obj
) != NULL
&&
14286 VM_OBJECT_OWNER(obj
)->map
== map
) {
14289 + VME_OFFSET(entry
))
14291 obj
->wired_page_count
)) {
14293 * Volatile|empty purgeable object owned
14294 * by this task: report the first
14295 * "#wired" pages as "resident" (to
14296 * show that they contribute to the
14297 * footprint) but not "dirty" (to avoid
14298 * double-counting with the fake
14299 * "non-volatile" region we'll report
14300 * at the end of the address space to
14301 * account for all (mapped or not)
14302 * non-volatile memory owned by this
14305 extended
->pages_resident
++;
14307 } else if (obj
->purgable
!= VM_PURGABLE_DENY
) {
14309 * Pages from purgeable objects
14310 * will be reported as dirty
14311 * appropriately in an extra
14312 * fake memory region at the end of
14313 * the address space.
14315 } else if (entry
->iokit_acct
) {
14317 * IOKit mappings are considered
14318 * as fully dirty for footprint's
14321 extended
->pages_dirtied
++;
14326 vm_map_region_look_for_page(map
, va
, obj
,
14328 0, extended
, count
);
14331 if (do_region_footprint
) {
14332 goto collect_object_info
;
14335 collect_object_info
:
14336 shadow_object
= obj
->shadow
;
14339 if (!(obj
->internal
)) {
14340 extended
->external_pager
= 1;
14343 if (shadow_object
!= VM_OBJECT_NULL
) {
14344 vm_object_lock(shadow_object
);
14346 shadow_object
!= VM_OBJECT_NULL
;
14348 vm_object_t next_shadow
;
14350 if (!(shadow_object
->internal
)) {
14351 extended
->external_pager
= 1;
14354 next_shadow
= shadow_object
->shadow
;
14356 vm_object_lock(next_shadow
);
14358 vm_object_unlock(shadow_object
);
14359 shadow_object
= next_shadow
;
14362 extended
->shadow_depth
= shadow_depth
;
14365 if (extended
->shadow_depth
|| entry
->needs_copy
) {
14366 extended
->share_mode
= SM_COW
;
14368 if (ref_count
== 1) {
14369 extended
->share_mode
= SM_PRIVATE
;
14371 if (obj
->true_share
) {
14372 extended
->share_mode
= SM_TRUESHARED
;
14374 extended
->share_mode
= SM_SHARED
;
14378 extended
->ref_count
= ref_count
- extended
->shadow_depth
;
14380 for (i
= 0; i
< extended
->shadow_depth
; i
++) {
14381 if ((tmp_obj
= obj
->shadow
) == 0) {
14384 vm_object_lock(tmp_obj
);
14385 vm_object_unlock(obj
);
14387 if ((ref_count
= tmp_obj
->ref_count
) > 1 && tmp_obj
->paging_in_progress
) {
14391 extended
->ref_count
+= ref_count
;
14394 vm_object_unlock(obj
);
14396 if (extended
->share_mode
== SM_SHARED
) {
14397 vm_map_entry_t cur
;
14398 vm_map_entry_t last
;
14401 obj
= VME_OBJECT(entry
);
14402 last
= vm_map_to_entry(map
);
14405 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
) {
14408 for (cur
= vm_map_first_entry(map
); cur
!= last
; cur
= cur
->vme_next
) {
14409 my_refs
+= vm_map_region_count_obj_refs(cur
, obj
);
14412 if (my_refs
== ref_count
) {
14413 extended
->share_mode
= SM_PRIVATE_ALIASED
;
14414 } else if (my_refs
> 1) {
14415 extended
->share_mode
= SM_SHARED_ALIASED
;
14421 /* object is locked on entry and locked on return */
14425 vm_map_region_look_for_page(
14426 __unused vm_map_t map
,
14427 __unused vm_map_offset_t va
,
14428 vm_object_t object
,
14429 vm_object_offset_t offset
,
14432 vm_region_extended_info_t extended
,
14433 mach_msg_type_number_t count
)
14436 vm_object_t shadow
;
14438 vm_object_t caller_object
;
14440 shadow
= object
->shadow
;
14441 caller_object
= object
;
14445 if (!(object
->internal
)) {
14446 extended
->external_pager
= 1;
14449 if ((p
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
14450 if (shadow
&& (max_refcnt
== 1)) {
14451 extended
->pages_shared_now_private
++;
14454 if (!p
->vmp_fictitious
&&
14455 (p
->vmp_dirty
|| pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p
)))) {
14456 extended
->pages_dirtied
++;
14457 } else if (count
>= VM_REGION_EXTENDED_INFO_COUNT
) {
14458 if (p
->vmp_reusable
|| object
->all_reusable
) {
14459 extended
->pages_reusable
++;
14463 extended
->pages_resident
++;
14465 if (object
!= caller_object
) {
14466 vm_object_unlock(object
);
14471 if (object
->internal
&&
14473 !object
->terminating
&&
14474 object
->pager_ready
) {
14475 if (VM_COMPRESSOR_PAGER_STATE_GET(object
, offset
)
14476 == VM_EXTERNAL_STATE_EXISTS
) {
14477 /* the pager has that page */
14478 extended
->pages_swapped_out
++;
14479 if (object
!= caller_object
) {
14480 vm_object_unlock(object
);
14487 vm_object_lock(shadow
);
14489 if ((ref_count
= shadow
->ref_count
) > 1 && shadow
->paging_in_progress
) {
14493 if (++depth
> extended
->shadow_depth
) {
14494 extended
->shadow_depth
= depth
;
14497 if (ref_count
> max_refcnt
) {
14498 max_refcnt
= ref_count
;
14501 if (object
!= caller_object
) {
14502 vm_object_unlock(object
);
14505 offset
= offset
+ object
->vo_shadow_offset
;
14507 shadow
= object
->shadow
;
14510 if (object
!= caller_object
) {
14511 vm_object_unlock(object
);
14518 vm_map_region_count_obj_refs(
14519 vm_map_entry_t entry
,
14520 vm_object_t object
)
14523 vm_object_t chk_obj
;
14524 vm_object_t tmp_obj
;
14526 if (VME_OBJECT(entry
) == 0) {
14530 if (entry
->is_sub_map
) {
14535 chk_obj
= VME_OBJECT(entry
);
14536 vm_object_lock(chk_obj
);
14539 if (chk_obj
== object
) {
14542 tmp_obj
= chk_obj
->shadow
;
14544 vm_object_lock(tmp_obj
);
14546 vm_object_unlock(chk_obj
);
14556 * Routine: vm_map_simplify
14559 * Attempt to simplify the map representation in
14560 * the vicinity of the given starting address.
14562 * This routine is intended primarily to keep the
14563 * kernel maps more compact -- they generally don't
14564 * benefit from the "expand a map entry" technology
14565 * at allocation time because the adjacent entry
14566 * is often wired down.
14569 vm_map_simplify_entry(
14571 vm_map_entry_t this_entry
)
14573 vm_map_entry_t prev_entry
;
14575 counter(c_vm_map_simplify_entry_called
++);
14577 prev_entry
= this_entry
->vme_prev
;
14579 if ((this_entry
!= vm_map_to_entry(map
)) &&
14580 (prev_entry
!= vm_map_to_entry(map
)) &&
14582 (prev_entry
->vme_end
== this_entry
->vme_start
) &&
14584 (prev_entry
->is_sub_map
== this_entry
->is_sub_map
) &&
14585 (VME_OBJECT(prev_entry
) == VME_OBJECT(this_entry
)) &&
14586 ((VME_OFFSET(prev_entry
) + (prev_entry
->vme_end
-
14587 prev_entry
->vme_start
))
14588 == VME_OFFSET(this_entry
)) &&
14590 (prev_entry
->behavior
== this_entry
->behavior
) &&
14591 (prev_entry
->needs_copy
== this_entry
->needs_copy
) &&
14592 (prev_entry
->protection
== this_entry
->protection
) &&
14593 (prev_entry
->max_protection
== this_entry
->max_protection
) &&
14594 (prev_entry
->inheritance
== this_entry
->inheritance
) &&
14595 (prev_entry
->use_pmap
== this_entry
->use_pmap
) &&
14596 (VME_ALIAS(prev_entry
) == VME_ALIAS(this_entry
)) &&
14597 (prev_entry
->no_cache
== this_entry
->no_cache
) &&
14598 (prev_entry
->permanent
== this_entry
->permanent
) &&
14599 (prev_entry
->map_aligned
== this_entry
->map_aligned
) &&
14600 (prev_entry
->zero_wired_pages
== this_entry
->zero_wired_pages
) &&
14601 (prev_entry
->used_for_jit
== this_entry
->used_for_jit
) &&
14602 (prev_entry
->pmap_cs_associated
== this_entry
->pmap_cs_associated
) &&
14603 /* from_reserved_zone: OK if that field doesn't match */
14604 (prev_entry
->iokit_acct
== this_entry
->iokit_acct
) &&
14605 (prev_entry
->vme_resilient_codesign
==
14606 this_entry
->vme_resilient_codesign
) &&
14607 (prev_entry
->vme_resilient_media
==
14608 this_entry
->vme_resilient_media
) &&
14609 (prev_entry
->vme_no_copy_on_read
== this_entry
->vme_no_copy_on_read
) &&
14611 (prev_entry
->wired_count
== this_entry
->wired_count
) &&
14612 (prev_entry
->user_wired_count
== this_entry
->user_wired_count
) &&
14614 ((prev_entry
->vme_atomic
== FALSE
) && (this_entry
->vme_atomic
== FALSE
)) &&
14615 (prev_entry
->in_transition
== FALSE
) &&
14616 (this_entry
->in_transition
== FALSE
) &&
14617 (prev_entry
->needs_wakeup
== FALSE
) &&
14618 (this_entry
->needs_wakeup
== FALSE
) &&
14619 (prev_entry
->is_shared
== FALSE
) &&
14620 (this_entry
->is_shared
== FALSE
) &&
14621 (prev_entry
->superpage_size
== FALSE
) &&
14622 (this_entry
->superpage_size
== FALSE
)
14624 vm_map_store_entry_unlink(map
, prev_entry
);
14625 assert(prev_entry
->vme_start
< this_entry
->vme_end
);
14626 if (prev_entry
->map_aligned
) {
14627 assert(VM_MAP_PAGE_ALIGNED(prev_entry
->vme_start
,
14628 VM_MAP_PAGE_MASK(map
)));
14630 this_entry
->vme_start
= prev_entry
->vme_start
;
14631 VME_OFFSET_SET(this_entry
, VME_OFFSET(prev_entry
));
14633 if (map
->holelistenabled
) {
14634 vm_map_store_update_first_free(map
, this_entry
, TRUE
);
14637 if (prev_entry
->is_sub_map
) {
14638 vm_map_deallocate(VME_SUBMAP(prev_entry
));
14640 vm_object_deallocate(VME_OBJECT(prev_entry
));
14642 vm_map_entry_dispose(map
, prev_entry
);
14643 SAVE_HINT_MAP_WRITE(map
, this_entry
);
14644 counter(c_vm_map_simplified
++);
14651 vm_map_offset_t start
)
14653 vm_map_entry_t this_entry
;
14656 if (vm_map_lookup_entry(map
, start
, &this_entry
)) {
14657 vm_map_simplify_entry(map
, this_entry
);
14658 vm_map_simplify_entry(map
, this_entry
->vme_next
);
14660 counter(c_vm_map_simplify_called
++);
14661 vm_map_unlock(map
);
14665 vm_map_simplify_range(
14667 vm_map_offset_t start
,
14668 vm_map_offset_t end
)
14670 vm_map_entry_t entry
;
14673 * The map should be locked (for "write") by the caller.
14676 if (start
>= end
) {
14677 /* invalid address range */
14681 start
= vm_map_trunc_page(start
,
14682 VM_MAP_PAGE_MASK(map
));
14683 end
= vm_map_round_page(end
,
14684 VM_MAP_PAGE_MASK(map
));
14686 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
14687 /* "start" is not mapped and "entry" ends before "start" */
14688 if (entry
== vm_map_to_entry(map
)) {
14689 /* start with first entry in the map */
14690 entry
= vm_map_first_entry(map
);
14692 /* start with next entry */
14693 entry
= entry
->vme_next
;
14697 while (entry
!= vm_map_to_entry(map
) &&
14698 entry
->vme_start
<= end
) {
14699 /* try and coalesce "entry" with its previous entry */
14700 vm_map_simplify_entry(map
, entry
);
14701 entry
= entry
->vme_next
;
14707 * Routine: vm_map_machine_attribute
14709 * Provide machine-specific attributes to mappings,
14710 * such as cachability etc. for machines that provide
14711 * them. NUMA architectures and machines with big/strange
14712 * caches will use this.
14714 * Responsibilities for locking and checking are handled here,
14715 * everything else in the pmap module. If any non-volatile
14716 * information must be kept, the pmap module should handle
14717 * it itself. [This assumes that attributes do not
14718 * need to be inherited, which seems ok to me]
14721 vm_map_machine_attribute(
14723 vm_map_offset_t start
,
14724 vm_map_offset_t end
,
14725 vm_machine_attribute_t attribute
,
14726 vm_machine_attribute_val_t
* value
) /* IN/OUT */
14729 vm_map_size_t sync_size
;
14730 vm_map_entry_t entry
;
14732 if (start
< vm_map_min(map
) || end
> vm_map_max(map
)) {
14733 return KERN_INVALID_ADDRESS
;
14736 /* Figure how much memory we need to flush (in page increments) */
14737 sync_size
= end
- start
;
14741 if (attribute
!= MATTR_CACHE
) {
14742 /* If we don't have to find physical addresses, we */
14743 /* don't have to do an explicit traversal here. */
14744 ret
= pmap_attribute(map
->pmap
, start
, end
- start
,
14746 vm_map_unlock(map
);
14750 ret
= KERN_SUCCESS
; /* Assume it all worked */
14752 while (sync_size
) {
14753 if (vm_map_lookup_entry(map
, start
, &entry
)) {
14754 vm_map_size_t sub_size
;
14755 if ((entry
->vme_end
- start
) > sync_size
) {
14756 sub_size
= sync_size
;
14759 sub_size
= entry
->vme_end
- start
;
14760 sync_size
-= sub_size
;
14762 if (entry
->is_sub_map
) {
14763 vm_map_offset_t sub_start
;
14764 vm_map_offset_t sub_end
;
14766 sub_start
= (start
- entry
->vme_start
)
14767 + VME_OFFSET(entry
);
14768 sub_end
= sub_start
+ sub_size
;
14769 vm_map_machine_attribute(
14775 if (VME_OBJECT(entry
)) {
14777 vm_object_t object
;
14778 vm_object_t base_object
;
14779 vm_object_t last_object
;
14780 vm_object_offset_t offset
;
14781 vm_object_offset_t base_offset
;
14782 vm_map_size_t range
;
14784 offset
= (start
- entry
->vme_start
)
14785 + VME_OFFSET(entry
);
14786 base_offset
= offset
;
14787 object
= VME_OBJECT(entry
);
14788 base_object
= object
;
14789 last_object
= NULL
;
14791 vm_object_lock(object
);
14794 m
= vm_page_lookup(
14797 if (m
&& !m
->vmp_fictitious
) {
14799 pmap_attribute_cache_sync(
14800 VM_PAGE_GET_PHYS_PAGE(m
),
14803 } else if (object
->shadow
) {
14804 offset
= offset
+ object
->vo_shadow_offset
;
14805 last_object
= object
;
14806 object
= object
->shadow
;
14807 vm_object_lock(last_object
->shadow
);
14808 vm_object_unlock(last_object
);
14811 range
-= PAGE_SIZE
;
14813 if (base_object
!= object
) {
14814 vm_object_unlock(object
);
14815 vm_object_lock(base_object
);
14816 object
= base_object
;
14818 /* Bump to the next page */
14819 base_offset
+= PAGE_SIZE
;
14820 offset
= base_offset
;
14822 vm_object_unlock(object
);
14827 vm_map_unlock(map
);
14828 return KERN_FAILURE
;
14832 vm_map_unlock(map
);
14838 * vm_map_behavior_set:
14840 * Sets the paging reference behavior of the specified address
14841 * range in the target map. Paging reference behavior affects
14842 * how pagein operations resulting from faults on the map will be
14846 vm_map_behavior_set(
14848 vm_map_offset_t start
,
14849 vm_map_offset_t end
,
14850 vm_behavior_t new_behavior
)
14852 vm_map_entry_t entry
;
14853 vm_map_entry_t temp_entry
;
14856 start
< vm_map_min(map
) ||
14857 end
> vm_map_max(map
)) {
14858 return KERN_NO_SPACE
;
14861 switch (new_behavior
) {
14863 * This first block of behaviors all set a persistent state on the specified
14864 * memory range. All we have to do here is to record the desired behavior
14865 * in the vm_map_entry_t's.
14868 case VM_BEHAVIOR_DEFAULT
:
14869 case VM_BEHAVIOR_RANDOM
:
14870 case VM_BEHAVIOR_SEQUENTIAL
:
14871 case VM_BEHAVIOR_RSEQNTL
:
14872 case VM_BEHAVIOR_ZERO_WIRED_PAGES
:
14876 * The entire address range must be valid for the map.
14877 * Note that vm_map_range_check() does a
14878 * vm_map_lookup_entry() internally and returns the
14879 * entry containing the start of the address range if
14880 * the entire range is valid.
14882 if (vm_map_range_check(map
, start
, end
, &temp_entry
)) {
14883 entry
= temp_entry
;
14884 vm_map_clip_start(map
, entry
, start
);
14886 vm_map_unlock(map
);
14887 return KERN_INVALID_ADDRESS
;
14890 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
14891 vm_map_clip_end(map
, entry
, end
);
14892 if (entry
->is_sub_map
) {
14893 assert(!entry
->use_pmap
);
14896 if (new_behavior
== VM_BEHAVIOR_ZERO_WIRED_PAGES
) {
14897 entry
->zero_wired_pages
= TRUE
;
14899 entry
->behavior
= new_behavior
;
14901 entry
= entry
->vme_next
;
14904 vm_map_unlock(map
);
14908 * The rest of these are different from the above in that they cause
14909 * an immediate action to take place as opposed to setting a behavior that
14910 * affects future actions.
14913 case VM_BEHAVIOR_WILLNEED
:
14914 return vm_map_willneed(map
, start
, end
);
14916 case VM_BEHAVIOR_DONTNEED
:
14917 return vm_map_msync(map
, start
, end
- start
, VM_SYNC_DEACTIVATE
| VM_SYNC_CONTIGUOUS
);
14919 case VM_BEHAVIOR_FREE
:
14920 return vm_map_msync(map
, start
, end
- start
, VM_SYNC_KILLPAGES
| VM_SYNC_CONTIGUOUS
);
14922 case VM_BEHAVIOR_REUSABLE
:
14923 return vm_map_reusable_pages(map
, start
, end
);
14925 case VM_BEHAVIOR_REUSE
:
14926 return vm_map_reuse_pages(map
, start
, end
);
14928 case VM_BEHAVIOR_CAN_REUSE
:
14929 return vm_map_can_reuse(map
, start
, end
);
14932 case VM_BEHAVIOR_PAGEOUT
:
14933 return vm_map_pageout(map
, start
, end
);
14934 #endif /* MACH_ASSERT */
14937 return KERN_INVALID_ARGUMENT
;
14940 return KERN_SUCCESS
;
14945 * Internals for madvise(MADV_WILLNEED) system call.
14947 * The implementation is to do:-
14948 * a) read-ahead if the mapping corresponds to a mapped regular file
14949 * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
14953 static kern_return_t
14956 vm_map_offset_t start
,
14957 vm_map_offset_t end
14960 vm_map_entry_t entry
;
14961 vm_object_t object
;
14962 memory_object_t pager
;
14963 struct vm_object_fault_info fault_info
= {};
14965 vm_object_size_t len
;
14966 vm_object_offset_t offset
;
14968 fault_info
.interruptible
= THREAD_UNINT
; /* ignored value */
14969 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
14970 fault_info
.stealth
= TRUE
;
14973 * The MADV_WILLNEED operation doesn't require any changes to the
14974 * vm_map_entry_t's, so the read lock is sufficient.
14977 vm_map_lock_read(map
);
14980 * The madvise semantics require that the address range be fully
14981 * allocated with no holes. Otherwise, we're required to return
14985 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
14986 vm_map_unlock_read(map
);
14987 return KERN_INVALID_ADDRESS
;
14991 * Examine each vm_map_entry_t in the range.
14993 for (; entry
!= vm_map_to_entry(map
) && start
< end
;) {
14995 * The first time through, the start address could be anywhere
14996 * within the vm_map_entry we found. So adjust the offset to
14997 * correspond. After that, the offset will always be zero to
14998 * correspond to the beginning of the current vm_map_entry.
15000 offset
= (start
- entry
->vme_start
) + VME_OFFSET(entry
);
15003 * Set the length so we don't go beyond the end of the
15004 * map_entry or beyond the end of the range we were given.
15005 * This range could span also multiple map entries all of which
15006 * map different files, so make sure we only do the right amount
15007 * of I/O for each object. Note that it's possible for there
15008 * to be multiple map entries all referring to the same object
15009 * but with different page permissions, but it's not worth
15010 * trying to optimize that case.
15012 len
= MIN(entry
->vme_end
- start
, end
- start
);
15014 if ((vm_size_t
) len
!= len
) {
15015 /* 32-bit overflow */
15016 len
= (vm_size_t
) (0 - PAGE_SIZE
);
15018 fault_info
.cluster_size
= (vm_size_t
) len
;
15019 fault_info
.lo_offset
= offset
;
15020 fault_info
.hi_offset
= offset
+ len
;
15021 fault_info
.user_tag
= VME_ALIAS(entry
);
15022 fault_info
.pmap_options
= 0;
15023 if (entry
->iokit_acct
||
15024 (!entry
->is_sub_map
&& !entry
->use_pmap
)) {
15025 fault_info
.pmap_options
|= PMAP_OPTIONS_ALT_ACCT
;
15029 * If the entry is a submap OR there's no read permission
15030 * to this mapping, then just skip it.
15032 if ((entry
->is_sub_map
) || (entry
->protection
& VM_PROT_READ
) == 0) {
15033 entry
= entry
->vme_next
;
15034 start
= entry
->vme_start
;
15038 object
= VME_OBJECT(entry
);
15040 if (object
== NULL
||
15041 (object
&& object
->internal
)) {
15043 * Memory range backed by anonymous memory.
15045 vm_size_t region_size
= 0, effective_page_size
= 0;
15046 vm_map_offset_t addr
= 0, effective_page_mask
= 0;
15051 effective_page_mask
= MAX(vm_map_page_mask(current_map()), PAGE_MASK
);
15052 effective_page_size
= effective_page_mask
+ 1;
15054 vm_map_unlock_read(map
);
15056 while (region_size
) {
15058 vm_map_trunc_page(addr
, effective_page_mask
),
15059 VM_PROT_READ
| VM_PROT_WRITE
);
15061 region_size
-= effective_page_size
;
15062 addr
+= effective_page_size
;
15066 * Find the file object backing this map entry. If there is
15067 * none, then we simply ignore the "will need" advice for this
15068 * entry and go on to the next one.
15070 if ((object
= find_vnode_object(entry
)) == VM_OBJECT_NULL
) {
15071 entry
= entry
->vme_next
;
15072 start
= entry
->vme_start
;
15076 vm_object_paging_begin(object
);
15077 pager
= object
->pager
;
15078 vm_object_unlock(object
);
15081 * The data_request() could take a long time, so let's
15082 * release the map lock to avoid blocking other threads.
15084 vm_map_unlock_read(map
);
15087 * Get the data from the object asynchronously.
15089 * Note that memory_object_data_request() places limits on the
15090 * amount of I/O it will do. Regardless of the len we
15091 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15092 * silently truncates the len to that size. This isn't
15093 * necessarily bad since madvise shouldn't really be used to
15094 * page in unlimited amounts of data. Other Unix variants
15095 * limit the willneed case as well. If this turns out to be an
15096 * issue for developers, then we can always adjust the policy
15097 * here and still be backwards compatible since this is all
15100 kr
= memory_object_data_request(
15102 offset
+ object
->paging_offset
,
15105 (memory_object_fault_info_t
)&fault_info
);
15107 vm_object_lock(object
);
15108 vm_object_paging_end(object
);
15109 vm_object_unlock(object
);
15112 * If we couldn't do the I/O for some reason, just give up on
15113 * the madvise. We still return success to the user since
15114 * madvise isn't supposed to fail when the advice can't be
15118 if (kr
!= KERN_SUCCESS
) {
15119 return KERN_SUCCESS
;
15124 if (start
>= end
) {
15126 return KERN_SUCCESS
;
15129 /* look up next entry */
15130 vm_map_lock_read(map
);
15131 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
15133 * There's a new hole in the address range.
15135 vm_map_unlock_read(map
);
15136 return KERN_INVALID_ADDRESS
;
15140 vm_map_unlock_read(map
);
15141 return KERN_SUCCESS
;
15145 vm_map_entry_is_reusable(
15146 vm_map_entry_t entry
)
15148 /* Only user map entries */
15150 vm_object_t object
;
15152 if (entry
->is_sub_map
) {
15156 switch (VME_ALIAS(entry
)) {
15157 case VM_MEMORY_MALLOC
:
15158 case VM_MEMORY_MALLOC_SMALL
:
15159 case VM_MEMORY_MALLOC_LARGE
:
15160 case VM_MEMORY_REALLOC
:
15161 case VM_MEMORY_MALLOC_TINY
:
15162 case VM_MEMORY_MALLOC_LARGE_REUSABLE
:
15163 case VM_MEMORY_MALLOC_LARGE_REUSED
:
15165 * This is a malloc() memory region: check if it's still
15166 * in its original state and can be re-used for more
15167 * malloc() allocations.
15172 * Not a malloc() memory region: let the caller decide if
15178 if (/*entry->is_shared ||*/
15179 entry
->is_sub_map
||
15180 entry
->in_transition
||
15181 entry
->protection
!= VM_PROT_DEFAULT
||
15182 entry
->max_protection
!= VM_PROT_ALL
||
15183 entry
->inheritance
!= VM_INHERIT_DEFAULT
||
15185 entry
->permanent
||
15186 entry
->superpage_size
!= FALSE
||
15187 entry
->zero_wired_pages
||
15188 entry
->wired_count
!= 0 ||
15189 entry
->user_wired_count
!= 0) {
15193 object
= VME_OBJECT(entry
);
15194 if (object
== VM_OBJECT_NULL
) {
15200 * Let's proceed even if the VM object is potentially
15202 * We check for this later when processing the actual
15203 * VM pages, so the contents will be safe if shared.
15205 * But we can still mark this memory region as "reusable" to
15206 * acknowledge that the caller did let us know that the memory
15207 * could be re-used and should not be penalized for holding
15208 * on to it. This allows its "resident size" to not include
15209 * the reusable range.
15211 object
->ref_count
== 1 &&
15213 object
->wired_page_count
== 0 &&
15214 object
->copy
== VM_OBJECT_NULL
&&
15215 object
->shadow
== VM_OBJECT_NULL
&&
15216 object
->internal
&&
15217 object
->purgable
== VM_PURGABLE_DENY
&&
15218 object
->copy_strategy
!= MEMORY_OBJECT_COPY_DELAY
&&
15219 !object
->true_share
&&
15220 object
->wimg_bits
== VM_WIMG_USE_DEFAULT
&&
15221 !object
->code_signed
) {
15227 static kern_return_t
15228 vm_map_reuse_pages(
15230 vm_map_offset_t start
,
15231 vm_map_offset_t end
)
15233 vm_map_entry_t entry
;
15234 vm_object_t object
;
15235 vm_object_offset_t start_offset
, end_offset
;
15238 * The MADV_REUSE operation doesn't require any changes to the
15239 * vm_map_entry_t's, so the read lock is sufficient.
15242 vm_map_lock_read(map
);
15243 assert(map
->pmap
!= kernel_pmap
); /* protect alias access */
15246 * The madvise semantics require that the address range be fully
15247 * allocated with no holes. Otherwise, we're required to return
15251 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15252 vm_map_unlock_read(map
);
15253 vm_page_stats_reusable
.reuse_pages_failure
++;
15254 return KERN_INVALID_ADDRESS
;
15258 * Examine each vm_map_entry_t in the range.
15260 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
15261 entry
= entry
->vme_next
) {
15263 * Sanity check on the VM map entry.
15265 if (!vm_map_entry_is_reusable(entry
)) {
15266 vm_map_unlock_read(map
);
15267 vm_page_stats_reusable
.reuse_pages_failure
++;
15268 return KERN_INVALID_ADDRESS
;
15272 * The first time through, the start address could be anywhere
15273 * within the vm_map_entry we found. So adjust the offset to
15276 if (entry
->vme_start
< start
) {
15277 start_offset
= start
- entry
->vme_start
;
15281 end_offset
= MIN(end
, entry
->vme_end
) - entry
->vme_start
;
15282 start_offset
+= VME_OFFSET(entry
);
15283 end_offset
+= VME_OFFSET(entry
);
15285 assert(!entry
->is_sub_map
);
15286 object
= VME_OBJECT(entry
);
15287 if (object
!= VM_OBJECT_NULL
) {
15288 vm_object_lock(object
);
15289 vm_object_reuse_pages(object
, start_offset
, end_offset
,
15291 vm_object_unlock(object
);
15294 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC_LARGE_REUSABLE
) {
15297 * We do not hold the VM map exclusively here.
15298 * The "alias" field is not that critical, so it's
15299 * safe to update it here, as long as it is the only
15300 * one that can be modified while holding the VM map
15303 VME_ALIAS_SET(entry
, VM_MEMORY_MALLOC_LARGE_REUSED
);
15307 vm_map_unlock_read(map
);
15308 vm_page_stats_reusable
.reuse_pages_success
++;
15309 return KERN_SUCCESS
;
15313 static kern_return_t
15314 vm_map_reusable_pages(
15316 vm_map_offset_t start
,
15317 vm_map_offset_t end
)
15319 vm_map_entry_t entry
;
15320 vm_object_t object
;
15321 vm_object_offset_t start_offset
, end_offset
;
15322 vm_map_offset_t pmap_offset
;
15325 * The MADV_REUSABLE operation doesn't require any changes to the
15326 * vm_map_entry_t's, so the read lock is sufficient.
15329 vm_map_lock_read(map
);
15330 assert(map
->pmap
!= kernel_pmap
); /* protect alias access */
15333 * The madvise semantics require that the address range be fully
15334 * allocated with no holes. Otherwise, we're required to return
15338 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15339 vm_map_unlock_read(map
);
15340 vm_page_stats_reusable
.reusable_pages_failure
++;
15341 return KERN_INVALID_ADDRESS
;
15345 * Examine each vm_map_entry_t in the range.
15347 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
15348 entry
= entry
->vme_next
) {
15349 int kill_pages
= 0;
15352 * Sanity check on the VM map entry.
15354 if (!vm_map_entry_is_reusable(entry
)) {
15355 vm_map_unlock_read(map
);
15356 vm_page_stats_reusable
.reusable_pages_failure
++;
15357 return KERN_INVALID_ADDRESS
;
15360 if (!(entry
->protection
& VM_PROT_WRITE
) && !entry
->used_for_jit
) {
15361 /* not writable: can't discard contents */
15362 vm_map_unlock_read(map
);
15363 vm_page_stats_reusable
.reusable_nonwritable
++;
15364 vm_page_stats_reusable
.reusable_pages_failure
++;
15365 return KERN_PROTECTION_FAILURE
;
15369 * The first time through, the start address could be anywhere
15370 * within the vm_map_entry we found. So adjust the offset to
15373 if (entry
->vme_start
< start
) {
15374 start_offset
= start
- entry
->vme_start
;
15375 pmap_offset
= start
;
15378 pmap_offset
= entry
->vme_start
;
15380 end_offset
= MIN(end
, entry
->vme_end
) - entry
->vme_start
;
15381 start_offset
+= VME_OFFSET(entry
);
15382 end_offset
+= VME_OFFSET(entry
);
15384 assert(!entry
->is_sub_map
);
15385 object
= VME_OBJECT(entry
);
15386 if (object
== VM_OBJECT_NULL
) {
15391 vm_object_lock(object
);
15392 if (((object
->ref_count
== 1) ||
15393 (object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
&&
15394 object
->copy
== VM_OBJECT_NULL
)) &&
15395 object
->shadow
== VM_OBJECT_NULL
&&
15397 * "iokit_acct" entries are billed for their virtual size
15398 * (rather than for their resident pages only), so they
15399 * wouldn't benefit from making pages reusable, and it
15400 * would be hard to keep track of pages that are both
15401 * "iokit_acct" and "reusable" in the pmap stats and
15404 !(entry
->iokit_acct
||
15405 (!entry
->is_sub_map
&& !entry
->use_pmap
))) {
15406 if (object
->ref_count
!= 1) {
15407 vm_page_stats_reusable
.reusable_shared
++;
15413 if (kill_pages
!= -1) {
15414 vm_object_deactivate_pages(object
,
15416 end_offset
- start_offset
,
15418 TRUE
/*reusable_pages*/,
15422 vm_page_stats_reusable
.reusable_pages_shared
++;
15424 vm_object_unlock(object
);
15426 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC_LARGE
||
15427 VME_ALIAS(entry
) == VM_MEMORY_MALLOC_LARGE_REUSED
) {
15430 * We do not hold the VM map exclusively here.
15431 * The "alias" field is not that critical, so it's
15432 * safe to update it here, as long as it is the only
15433 * one that can be modified while holding the VM map
15436 VME_ALIAS_SET(entry
, VM_MEMORY_MALLOC_LARGE_REUSABLE
);
15440 vm_map_unlock_read(map
);
15441 vm_page_stats_reusable
.reusable_pages_success
++;
15442 return KERN_SUCCESS
;
15446 static kern_return_t
15449 vm_map_offset_t start
,
15450 vm_map_offset_t end
)
15452 vm_map_entry_t entry
;
15455 * The MADV_REUSABLE operation doesn't require any changes to the
15456 * vm_map_entry_t's, so the read lock is sufficient.
15459 vm_map_lock_read(map
);
15460 assert(map
->pmap
!= kernel_pmap
); /* protect alias access */
15463 * The madvise semantics require that the address range be fully
15464 * allocated with no holes. Otherwise, we're required to return
15468 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15469 vm_map_unlock_read(map
);
15470 vm_page_stats_reusable
.can_reuse_failure
++;
15471 return KERN_INVALID_ADDRESS
;
15475 * Examine each vm_map_entry_t in the range.
15477 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
15478 entry
= entry
->vme_next
) {
15480 * Sanity check on the VM map entry.
15482 if (!vm_map_entry_is_reusable(entry
)) {
15483 vm_map_unlock_read(map
);
15484 vm_page_stats_reusable
.can_reuse_failure
++;
15485 return KERN_INVALID_ADDRESS
;
15489 vm_map_unlock_read(map
);
15490 vm_page_stats_reusable
.can_reuse_success
++;
15491 return KERN_SUCCESS
;
15496 static kern_return_t
15499 vm_map_offset_t start
,
15500 vm_map_offset_t end
)
15502 vm_map_entry_t entry
;
15505 * The MADV_PAGEOUT operation doesn't require any changes to the
15506 * vm_map_entry_t's, so the read lock is sufficient.
15509 vm_map_lock_read(map
);
15512 * The madvise semantics require that the address range be fully
15513 * allocated with no holes. Otherwise, we're required to return
15517 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15518 vm_map_unlock_read(map
);
15519 return KERN_INVALID_ADDRESS
;
15523 * Examine each vm_map_entry_t in the range.
15525 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
15526 entry
= entry
->vme_next
) {
15527 vm_object_t object
;
15530 * Sanity check on the VM map entry.
15532 if (entry
->is_sub_map
) {
15534 vm_map_offset_t submap_start
;
15535 vm_map_offset_t submap_end
;
15536 vm_map_entry_t submap_entry
;
15538 submap
= VME_SUBMAP(entry
);
15539 submap_start
= VME_OFFSET(entry
);
15540 submap_end
= submap_start
+ (entry
->vme_end
-
15543 vm_map_lock_read(submap
);
15545 if (!vm_map_range_check(submap
,
15549 vm_map_unlock_read(submap
);
15550 vm_map_unlock_read(map
);
15551 return KERN_INVALID_ADDRESS
;
15554 object
= VME_OBJECT(submap_entry
);
15555 if (submap_entry
->is_sub_map
||
15556 object
== VM_OBJECT_NULL
||
15557 !object
->internal
) {
15558 vm_map_unlock_read(submap
);
15562 vm_object_pageout(object
);
15564 vm_map_unlock_read(submap
);
15565 submap
= VM_MAP_NULL
;
15566 submap_entry
= VM_MAP_ENTRY_NULL
;
15570 object
= VME_OBJECT(entry
);
15571 if (entry
->is_sub_map
||
15572 object
== VM_OBJECT_NULL
||
15573 !object
->internal
) {
15577 vm_object_pageout(object
);
15580 vm_map_unlock_read(map
);
15581 return KERN_SUCCESS
;
15583 #endif /* MACH_ASSERT */
15587 * Routine: vm_map_entry_insert
15589 * Description: This routine inserts a new vm_entry in a locked map.
15592 vm_map_entry_insert(
15594 vm_map_entry_t insp_entry
,
15595 vm_map_offset_t start
,
15596 vm_map_offset_t end
,
15597 vm_object_t object
,
15598 vm_object_offset_t offset
,
15599 boolean_t needs_copy
,
15600 boolean_t is_shared
,
15601 boolean_t in_transition
,
15602 vm_prot_t cur_protection
,
15603 vm_prot_t max_protection
,
15604 vm_behavior_t behavior
,
15605 vm_inherit_t inheritance
,
15606 unsigned wired_count
,
15607 boolean_t no_cache
,
15608 boolean_t permanent
,
15609 boolean_t no_copy_on_read
,
15610 unsigned int superpage_size
,
15611 boolean_t clear_map_aligned
,
15612 boolean_t is_submap
,
15613 boolean_t used_for_jit
,
15616 vm_map_entry_t new_entry
;
15618 assert(insp_entry
!= (vm_map_entry_t
)0);
15619 vm_map_lock_assert_exclusive(map
);
15621 #if DEVELOPMENT || DEBUG
15622 vm_object_offset_t end_offset
= 0;
15623 assertf(!os_add_overflow(end
- start
, offset
, &end_offset
), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end
- start
), offset
);
15624 #endif /* DEVELOPMENT || DEBUG */
15626 new_entry
= vm_map_entry_create(map
, !map
->hdr
.entries_pageable
);
15628 if (VM_MAP_PAGE_SHIFT(map
) != PAGE_SHIFT
) {
15629 new_entry
->map_aligned
= TRUE
;
15631 new_entry
->map_aligned
= FALSE
;
15633 if (clear_map_aligned
&&
15634 (!VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)) ||
15635 !VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)))) {
15636 new_entry
->map_aligned
= FALSE
;
15639 new_entry
->vme_start
= start
;
15640 new_entry
->vme_end
= end
;
15641 assert(page_aligned(new_entry
->vme_start
));
15642 assert(page_aligned(new_entry
->vme_end
));
15643 if (new_entry
->map_aligned
) {
15644 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_start
,
15645 VM_MAP_PAGE_MASK(map
)));
15646 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_end
,
15647 VM_MAP_PAGE_MASK(map
)));
15649 assert(new_entry
->vme_start
< new_entry
->vme_end
);
15651 VME_OBJECT_SET(new_entry
, object
);
15652 VME_OFFSET_SET(new_entry
, offset
);
15653 new_entry
->is_shared
= is_shared
;
15654 new_entry
->is_sub_map
= is_submap
;
15655 new_entry
->needs_copy
= needs_copy
;
15656 new_entry
->in_transition
= in_transition
;
15657 new_entry
->needs_wakeup
= FALSE
;
15658 new_entry
->inheritance
= inheritance
;
15659 new_entry
->protection
= cur_protection
;
15660 new_entry
->max_protection
= max_protection
;
15661 new_entry
->behavior
= behavior
;
15662 new_entry
->wired_count
= wired_count
;
15663 new_entry
->user_wired_count
= 0;
15666 * submap: "use_pmap" means "nested".
15669 new_entry
->use_pmap
= FALSE
;
15672 * object: "use_pmap" means "use pmap accounting" for footprint.
15675 new_entry
->use_pmap
= TRUE
;
15677 VME_ALIAS_SET(new_entry
, alias
);
15678 new_entry
->zero_wired_pages
= FALSE
;
15679 new_entry
->no_cache
= no_cache
;
15680 new_entry
->permanent
= permanent
;
15681 if (superpage_size
) {
15682 new_entry
->superpage_size
= TRUE
;
15684 new_entry
->superpage_size
= FALSE
;
15686 if (used_for_jit
) {
15687 #if CONFIG_EMBEDDED
15688 if (!(map
->jit_entry_exists
))
15689 #endif /* CONFIG_EMBEDDED */
15691 new_entry
->used_for_jit
= TRUE
;
15692 map
->jit_entry_exists
= TRUE
;
15695 new_entry
->used_for_jit
= FALSE
;
15697 new_entry
->pmap_cs_associated
= FALSE
;
15698 new_entry
->iokit_acct
= FALSE
;
15699 new_entry
->vme_resilient_codesign
= FALSE
;
15700 new_entry
->vme_resilient_media
= FALSE
;
15701 new_entry
->vme_atomic
= FALSE
;
15702 new_entry
->vme_no_copy_on_read
= no_copy_on_read
;
15705 * Insert the new entry into the list.
15708 vm_map_store_entry_link(map
, insp_entry
, new_entry
,
15709 VM_MAP_KERNEL_FLAGS_NONE
);
15710 map
->size
+= end
- start
;
15713 * Update the free space hint and the lookup hint.
15716 SAVE_HINT_MAP_WRITE(map
, new_entry
);
15721 * Routine: vm_map_remap_extract
15723 * Descritpion: This routine returns a vm_entry list from a map.
15725 static kern_return_t
15726 vm_map_remap_extract(
15728 vm_map_offset_t addr
,
15729 vm_map_size_t size
,
15731 struct vm_map_header
*map_header
,
15732 vm_prot_t
*cur_protection
,
15733 vm_prot_t
*max_protection
,
15734 /* What, no behavior? */
15735 vm_inherit_t inheritance
,
15736 boolean_t pageable
,
15737 boolean_t same_map
,
15738 vm_map_kernel_flags_t vmk_flags
)
15740 kern_return_t result
;
15741 vm_map_size_t mapped_size
;
15742 vm_map_size_t tmp_size
;
15743 vm_map_entry_t src_entry
; /* result of last map lookup */
15744 vm_map_entry_t new_entry
;
15745 vm_object_offset_t offset
;
15746 vm_map_offset_t map_address
;
15747 vm_map_offset_t src_start
; /* start of entry to map */
15748 vm_map_offset_t src_end
; /* end of region to be mapped */
15749 vm_object_t object
;
15750 vm_map_version_t version
;
15751 boolean_t src_needs_copy
;
15752 boolean_t new_entry_needs_copy
;
15753 vm_map_entry_t saved_src_entry
;
15754 boolean_t src_entry_was_wired
;
15755 vm_prot_t max_prot_for_prot_copy
;
15757 assert(map
!= VM_MAP_NULL
);
15759 assert(size
== vm_map_round_page(size
, PAGE_MASK
));
15760 assert(inheritance
== VM_INHERIT_NONE
||
15761 inheritance
== VM_INHERIT_COPY
||
15762 inheritance
== VM_INHERIT_SHARE
);
15765 * Compute start and end of region.
15767 src_start
= vm_map_trunc_page(addr
, PAGE_MASK
);
15768 src_end
= vm_map_round_page(src_start
+ size
, PAGE_MASK
);
15772 * Initialize map_header.
15774 map_header
->links
.next
= CAST_TO_VM_MAP_ENTRY(&map_header
->links
);
15775 map_header
->links
.prev
= CAST_TO_VM_MAP_ENTRY(&map_header
->links
);
15776 map_header
->nentries
= 0;
15777 map_header
->entries_pageable
= pageable
;
15778 map_header
->page_shift
= PAGE_SHIFT
;
15780 vm_map_store_init( map_header
);
15782 if (copy
&& vmk_flags
.vmkf_remap_prot_copy
) {
15783 max_prot_for_prot_copy
= *max_protection
& VM_PROT_ALL
;
15785 max_prot_for_prot_copy
= VM_PROT_NONE
;
15787 *cur_protection
= VM_PROT_ALL
;
15788 *max_protection
= VM_PROT_ALL
;
15792 result
= KERN_SUCCESS
;
15795 * The specified source virtual space might correspond to
15796 * multiple map entries, need to loop on them.
15799 while (mapped_size
!= size
) {
15800 vm_map_size_t entry_size
;
15803 * Find the beginning of the region.
15805 if (!vm_map_lookup_entry(map
, src_start
, &src_entry
)) {
15806 result
= KERN_INVALID_ADDRESS
;
15810 if (src_start
< src_entry
->vme_start
||
15811 (mapped_size
&& src_start
!= src_entry
->vme_start
)) {
15812 result
= KERN_INVALID_ADDRESS
;
15816 tmp_size
= size
- mapped_size
;
15817 if (src_end
> src_entry
->vme_end
) {
15818 tmp_size
-= (src_end
- src_entry
->vme_end
);
15821 entry_size
= (vm_map_size_t
)(src_entry
->vme_end
-
15822 src_entry
->vme_start
);
15824 if (src_entry
->is_sub_map
) {
15825 vm_map_reference(VME_SUBMAP(src_entry
));
15826 object
= VM_OBJECT_NULL
;
15828 object
= VME_OBJECT(src_entry
);
15829 if (src_entry
->iokit_acct
) {
15831 * This entry uses "IOKit accounting".
15833 } else if (object
!= VM_OBJECT_NULL
&&
15834 (object
->purgable
!= VM_PURGABLE_DENY
||
15835 object
->vo_ledger_tag
!= VM_LEDGER_TAG_NONE
)) {
15837 * Purgeable objects have their own accounting:
15838 * no pmap accounting for them.
15840 assertf(!src_entry
->use_pmap
,
15841 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15844 (uint64_t)src_entry
->vme_start
,
15845 (uint64_t)src_entry
->vme_end
,
15846 src_entry
->protection
,
15847 src_entry
->max_protection
,
15848 VME_ALIAS(src_entry
));
15851 * Not IOKit or purgeable:
15852 * must be accounted by pmap stats.
15854 assertf(src_entry
->use_pmap
,
15855 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15858 (uint64_t)src_entry
->vme_start
,
15859 (uint64_t)src_entry
->vme_end
,
15860 src_entry
->protection
,
15861 src_entry
->max_protection
,
15862 VME_ALIAS(src_entry
));
15865 if (object
== VM_OBJECT_NULL
) {
15866 object
= vm_object_allocate(entry_size
);
15867 VME_OFFSET_SET(src_entry
, 0);
15868 VME_OBJECT_SET(src_entry
, object
);
15869 assert(src_entry
->use_pmap
);
15870 } else if (object
->copy_strategy
!=
15871 MEMORY_OBJECT_COPY_SYMMETRIC
) {
15873 * We are already using an asymmetric
15874 * copy, and therefore we already have
15875 * the right object.
15877 assert(!src_entry
->needs_copy
);
15878 } else if (src_entry
->needs_copy
|| object
->shadowed
||
15879 (object
->internal
&& !object
->true_share
&&
15880 !src_entry
->is_shared
&&
15881 object
->vo_size
> entry_size
)) {
15882 VME_OBJECT_SHADOW(src_entry
, entry_size
);
15883 assert(src_entry
->use_pmap
);
15885 if (!src_entry
->needs_copy
&&
15886 (src_entry
->protection
& VM_PROT_WRITE
)) {
15889 assert(!pmap_has_prot_policy(src_entry
->protection
));
15891 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
15893 if (override_nx(map
,
15894 VME_ALIAS(src_entry
))
15896 prot
|= VM_PROT_EXECUTE
;
15899 assert(!pmap_has_prot_policy(prot
));
15901 if (map
->mapped_in_other_pmaps
) {
15902 vm_object_pmap_protect(
15903 VME_OBJECT(src_entry
),
15904 VME_OFFSET(src_entry
),
15907 src_entry
->vme_start
,
15910 pmap_protect(vm_map_pmap(map
),
15911 src_entry
->vme_start
,
15912 src_entry
->vme_end
,
15917 object
= VME_OBJECT(src_entry
);
15918 src_entry
->needs_copy
= FALSE
;
15922 vm_object_lock(object
);
15923 vm_object_reference_locked(object
); /* object ref. for new entry */
15924 if (object
->copy_strategy
==
15925 MEMORY_OBJECT_COPY_SYMMETRIC
) {
15926 object
->copy_strategy
=
15927 MEMORY_OBJECT_COPY_DELAY
;
15929 vm_object_unlock(object
);
15932 offset
= (VME_OFFSET(src_entry
) +
15933 (src_start
- src_entry
->vme_start
));
15935 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
15936 vm_map_entry_copy(new_entry
, src_entry
);
15937 if (new_entry
->is_sub_map
) {
15938 /* clr address space specifics */
15939 new_entry
->use_pmap
= FALSE
;
15942 * We're dealing with a copy-on-write operation,
15943 * so the resulting mapping should not inherit the
15944 * original mapping's accounting settings.
15945 * "use_pmap" should be reset to its default (TRUE)
15946 * so that the new mapping gets accounted for in
15947 * the task's memory footprint.
15949 new_entry
->use_pmap
= TRUE
;
15951 /* "iokit_acct" was cleared in vm_map_entry_copy() */
15952 assert(!new_entry
->iokit_acct
);
15954 new_entry
->map_aligned
= FALSE
;
15956 new_entry
->vme_start
= map_address
;
15957 new_entry
->vme_end
= map_address
+ tmp_size
;
15958 assert(new_entry
->vme_start
< new_entry
->vme_end
);
15959 if (copy
&& vmk_flags
.vmkf_remap_prot_copy
) {
15961 * Remapping for vm_map_protect(VM_PROT_COPY)
15962 * to convert a read-only mapping into a
15963 * copy-on-write version of itself but
15964 * with write access:
15965 * keep the original inheritance and add
15966 * VM_PROT_WRITE to the max protection.
15968 new_entry
->inheritance
= src_entry
->inheritance
;
15969 new_entry
->protection
&= max_prot_for_prot_copy
;
15970 new_entry
->max_protection
|= VM_PROT_WRITE
;
15972 new_entry
->inheritance
= inheritance
;
15974 VME_OFFSET_SET(new_entry
, offset
);
15977 * The new region has to be copied now if required.
15981 if (src_entry
->used_for_jit
== TRUE
) {
15983 #if __APRR_SUPPORTED__
15985 * Disallow re-mapping of any JIT regions on APRR devices.
15987 result
= KERN_PROTECTION_FAILURE
;
15989 #endif /* __APRR_SUPPORTED__*/
15991 #if CONFIG_EMBEDDED
15993 * Cannot allow an entry describing a JIT
15994 * region to be shared across address spaces.
15996 result
= KERN_INVALID_ARGUMENT
;
15998 #endif /* CONFIG_EMBEDDED */
16002 src_entry
->is_shared
= TRUE
;
16003 new_entry
->is_shared
= TRUE
;
16004 if (!(new_entry
->is_sub_map
)) {
16005 new_entry
->needs_copy
= FALSE
;
16007 } else if (src_entry
->is_sub_map
) {
16008 /* make this a COW sub_map if not already */
16009 assert(new_entry
->wired_count
== 0);
16010 new_entry
->needs_copy
= TRUE
;
16011 object
= VM_OBJECT_NULL
;
16012 } else if (src_entry
->wired_count
== 0 &&
16013 vm_object_copy_quickly(VME_OBJECT_PTR(new_entry
),
16014 VME_OFFSET(new_entry
),
16015 (new_entry
->vme_end
-
16016 new_entry
->vme_start
),
16018 &new_entry_needs_copy
)) {
16019 new_entry
->needs_copy
= new_entry_needs_copy
;
16020 new_entry
->is_shared
= FALSE
;
16021 assertf(new_entry
->use_pmap
, "map %p new_entry %p\n", map
, new_entry
);
16024 * Handle copy_on_write semantics.
16026 if (src_needs_copy
&& !src_entry
->needs_copy
) {
16029 assert(!pmap_has_prot_policy(src_entry
->protection
));
16031 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
16033 if (override_nx(map
,
16034 VME_ALIAS(src_entry
))
16036 prot
|= VM_PROT_EXECUTE
;
16039 assert(!pmap_has_prot_policy(prot
));
16041 vm_object_pmap_protect(object
,
16044 ((src_entry
->is_shared
16045 || map
->mapped_in_other_pmaps
) ?
16046 PMAP_NULL
: map
->pmap
),
16047 src_entry
->vme_start
,
16050 assert(src_entry
->wired_count
== 0);
16051 src_entry
->needs_copy
= TRUE
;
16054 * Throw away the old object reference of the new entry.
16056 vm_object_deallocate(object
);
16058 new_entry
->is_shared
= FALSE
;
16059 assertf(new_entry
->use_pmap
, "map %p new_entry %p\n", map
, new_entry
);
16061 src_entry_was_wired
= (src_entry
->wired_count
> 0);
16062 saved_src_entry
= src_entry
;
16063 src_entry
= VM_MAP_ENTRY_NULL
;
16066 * The map can be safely unlocked since we
16067 * already hold a reference on the object.
16069 * Record the timestamp of the map for later
16070 * verification, and unlock the map.
16072 version
.main_timestamp
= map
->timestamp
;
16073 vm_map_unlock(map
); /* Increments timestamp once! */
16076 * Perform the copy.
16078 if (src_entry_was_wired
> 0) {
16079 vm_object_lock(object
);
16080 result
= vm_object_copy_slowly(
16083 (new_entry
->vme_end
-
16084 new_entry
->vme_start
),
16086 VME_OBJECT_PTR(new_entry
));
16088 VME_OFFSET_SET(new_entry
, 0);
16089 new_entry
->needs_copy
= FALSE
;
16091 vm_object_offset_t new_offset
;
16093 new_offset
= VME_OFFSET(new_entry
);
16094 result
= vm_object_copy_strategically(
16097 (new_entry
->vme_end
-
16098 new_entry
->vme_start
),
16099 VME_OBJECT_PTR(new_entry
),
16101 &new_entry_needs_copy
);
16102 if (new_offset
!= VME_OFFSET(new_entry
)) {
16103 VME_OFFSET_SET(new_entry
, new_offset
);
16106 new_entry
->needs_copy
= new_entry_needs_copy
;
16110 * Throw away the old object reference of the new entry.
16112 vm_object_deallocate(object
);
16114 if (result
!= KERN_SUCCESS
&&
16115 result
!= KERN_MEMORY_RESTART_COPY
) {
16116 _vm_map_entry_dispose(map_header
, new_entry
);
16122 * Verify that the map has not substantially
16123 * changed while the copy was being made.
16127 if (version
.main_timestamp
+ 1 != map
->timestamp
) {
16129 * Simple version comparison failed.
16131 * Retry the lookup and verify that the
16132 * same object/offset are still present.
16134 saved_src_entry
= VM_MAP_ENTRY_NULL
;
16135 vm_object_deallocate(VME_OBJECT(new_entry
));
16136 _vm_map_entry_dispose(map_header
, new_entry
);
16137 if (result
== KERN_MEMORY_RESTART_COPY
) {
16138 result
= KERN_SUCCESS
;
16142 /* map hasn't changed: src_entry is still valid */
16143 src_entry
= saved_src_entry
;
16144 saved_src_entry
= VM_MAP_ENTRY_NULL
;
16146 if (result
== KERN_MEMORY_RESTART_COPY
) {
16147 vm_object_reference(object
);
16152 _vm_map_store_entry_link(map_header
,
16153 map_header
->links
.prev
, new_entry
);
16155 /*Protections for submap mapping are irrelevant here*/
16156 if (!src_entry
->is_sub_map
) {
16157 *cur_protection
&= src_entry
->protection
;
16158 *max_protection
&= src_entry
->max_protection
;
16160 map_address
+= tmp_size
;
16161 mapped_size
+= tmp_size
;
16162 src_start
+= tmp_size
;
16165 vm_map_unlock(map
);
16166 if (result
!= KERN_SUCCESS
) {
16168 * Free all allocated elements.
16170 for (src_entry
= map_header
->links
.next
;
16171 src_entry
!= CAST_TO_VM_MAP_ENTRY(&map_header
->links
);
16172 src_entry
= new_entry
) {
16173 new_entry
= src_entry
->vme_next
;
16174 _vm_map_store_entry_unlink(map_header
, src_entry
);
16175 if (src_entry
->is_sub_map
) {
16176 vm_map_deallocate(VME_SUBMAP(src_entry
));
16178 vm_object_deallocate(VME_OBJECT(src_entry
));
16180 _vm_map_entry_dispose(map_header
, src_entry
);
16187 * Routine: vm_remap
16189 * Map portion of a task's address space.
16190 * Mapped region must not overlap more than
16191 * one vm memory object. Protections and
16192 * inheritance attributes remain the same
16193 * as in the original task and are out parameters.
16194 * Source and Target task can be identical
16195 * Other attributes are identical as for vm_map()
16199 vm_map_t target_map
,
16200 vm_map_address_t
*address
,
16201 vm_map_size_t size
,
16202 vm_map_offset_t mask
,
16204 vm_map_kernel_flags_t vmk_flags
,
16207 vm_map_offset_t memory_address
,
16209 vm_prot_t
*cur_protection
,
16210 vm_prot_t
*max_protection
,
16211 vm_inherit_t inheritance
)
16213 kern_return_t result
;
16214 vm_map_entry_t entry
;
16215 vm_map_entry_t insp_entry
= VM_MAP_ENTRY_NULL
;
16216 vm_map_entry_t new_entry
;
16217 struct vm_map_header map_header
;
16218 vm_map_offset_t offset_in_mapping
;
16220 if (target_map
== VM_MAP_NULL
) {
16221 return KERN_INVALID_ARGUMENT
;
16224 switch (inheritance
) {
16225 case VM_INHERIT_NONE
:
16226 case VM_INHERIT_COPY
:
16227 case VM_INHERIT_SHARE
:
16228 if (size
!= 0 && src_map
!= VM_MAP_NULL
) {
16233 return KERN_INVALID_ARGUMENT
;
16237 * If the user is requesting that we return the address of the
16238 * first byte of the data (rather than the base of the page),
16239 * then we use different rounding semantics: specifically,
16240 * we assume that (memory_address, size) describes a region
16241 * all of whose pages we must cover, rather than a base to be truncated
16242 * down and a size to be added to that base. So we figure out
16243 * the highest page that the requested region includes and make
16244 * sure that the size will cover it.
16246 * The key example we're worried about it is of the form:
16248 * memory_address = 0x1ff0, size = 0x20
16250 * With the old semantics, we round down the memory_address to 0x1000
16251 * and round up the size to 0x1000, resulting in our covering *only*
16252 * page 0x1000. With the new semantics, we'd realize that the region covers
16253 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
16254 * 0x1000 and page 0x2000 in the region we remap.
16256 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
16257 offset_in_mapping
= memory_address
- vm_map_trunc_page(memory_address
, PAGE_MASK
);
16258 size
= vm_map_round_page(memory_address
+ size
- vm_map_trunc_page(memory_address
, PAGE_MASK
), PAGE_MASK
);
16260 size
= vm_map_round_page(size
, PAGE_MASK
);
16263 return KERN_INVALID_ARGUMENT
;
16266 if (flags
& VM_FLAGS_RESILIENT_MEDIA
) {
16267 /* must be copy-on-write to be "media resilient" */
16269 return KERN_INVALID_ARGUMENT
;
16273 result
= vm_map_remap_extract(src_map
, memory_address
,
16274 size
, copy
, &map_header
,
16278 target_map
->hdr
.entries_pageable
,
16279 src_map
== target_map
,
16282 if (result
!= KERN_SUCCESS
) {
16287 * Allocate/check a range of free virtual address
16288 * space for the target
16290 *address
= vm_map_trunc_page(*address
,
16291 VM_MAP_PAGE_MASK(target_map
));
16292 vm_map_lock(target_map
);
16293 result
= vm_map_remap_range_allocate(target_map
, address
, size
,
16294 mask
, flags
, vmk_flags
, tag
,
16297 for (entry
= map_header
.links
.next
;
16298 entry
!= CAST_TO_VM_MAP_ENTRY(&map_header
.links
);
16299 entry
= new_entry
) {
16300 new_entry
= entry
->vme_next
;
16301 _vm_map_store_entry_unlink(&map_header
, entry
);
16302 if (result
== KERN_SUCCESS
) {
16303 if (flags
& VM_FLAGS_RESILIENT_CODESIGN
) {
16304 /* no codesigning -> read-only access */
16305 entry
->max_protection
= VM_PROT_READ
;
16306 entry
->protection
= VM_PROT_READ
;
16307 entry
->vme_resilient_codesign
= TRUE
;
16309 entry
->vme_start
+= *address
;
16310 entry
->vme_end
+= *address
;
16311 assert(!entry
->map_aligned
);
16312 if ((flags
& VM_FLAGS_RESILIENT_MEDIA
) &&
16313 !entry
->is_sub_map
&&
16314 (VME_OBJECT(entry
) == VM_OBJECT_NULL
||
16315 VME_OBJECT(entry
)->internal
)) {
16316 entry
->vme_resilient_media
= TRUE
;
16318 vm_map_store_entry_link(target_map
, insp_entry
, entry
,
16320 insp_entry
= entry
;
16322 if (!entry
->is_sub_map
) {
16323 vm_object_deallocate(VME_OBJECT(entry
));
16325 vm_map_deallocate(VME_SUBMAP(entry
));
16327 _vm_map_entry_dispose(&map_header
, entry
);
16331 if (flags
& VM_FLAGS_RESILIENT_CODESIGN
) {
16332 *cur_protection
= VM_PROT_READ
;
16333 *max_protection
= VM_PROT_READ
;
16336 if (target_map
->disable_vmentry_reuse
== TRUE
) {
16337 assert(!target_map
->is_nested_map
);
16338 if (target_map
->highest_entry_end
< insp_entry
->vme_end
) {
16339 target_map
->highest_entry_end
= insp_entry
->vme_end
;
16343 if (result
== KERN_SUCCESS
) {
16344 target_map
->size
+= size
;
16345 SAVE_HINT_MAP_WRITE(target_map
, insp_entry
);
16348 if (*max_protection
& VM_PROT_EXECUTE
) {
16349 vm_map_address_t region_start
= 0, region_size
= 0;
16350 struct pmap_cs_code_directory
*region_cd
= NULL
;
16351 vm_map_address_t base
= 0;
16352 struct pmap_cs_lookup_results results
= {};
16353 vm_map_size_t page_addr
= vm_map_trunc_page(memory_address
, PAGE_MASK
);
16354 vm_map_size_t assoc_size
= vm_map_round_page(memory_address
+ size
- page_addr
, PAGE_MASK
);
16356 pmap_cs_lookup(src_map
->pmap
, memory_address
, &results
);
16357 region_size
= results
.region_size
;
16358 region_start
= results
.region_start
;
16359 region_cd
= results
.region_cd_entry
;
16360 base
= results
.base
;
16362 if (region_cd
!= NULL
&& (page_addr
!= region_start
|| assoc_size
!= region_size
)) {
16363 *cur_protection
= VM_PROT_READ
;
16364 *max_protection
= VM_PROT_READ
;
16365 printf("mismatched remap of executable range 0x%llx-0x%llx to 0x%llx, "
16366 "region_start 0x%llx, region_size 0x%llx, cd_entry %sNULL, making non-executable.\n",
16367 page_addr
, page_addr
+ assoc_size
, *address
,
16368 region_start
, region_size
,
16369 region_cd
!= NULL
? "not " : "" // Don't leak kernel slide
16375 vm_map_unlock(target_map
);
16377 if (result
== KERN_SUCCESS
&& target_map
->wiring_required
) {
16378 result
= vm_map_wire_kernel(target_map
, *address
,
16379 *address
+ size
, *cur_protection
, VM_KERN_MEMORY_MLOCK
,
16384 * If requested, return the address of the data pointed to by the
16385 * request, rather than the base of the resulting page.
16387 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
16388 *address
+= offset_in_mapping
;
16395 * Routine: vm_map_remap_range_allocate
16398 * Allocate a range in the specified virtual address map.
16399 * returns the address and the map entry just before the allocated
16402 * Map must be locked.
16405 static kern_return_t
16406 vm_map_remap_range_allocate(
16408 vm_map_address_t
*address
, /* IN/OUT */
16409 vm_map_size_t size
,
16410 vm_map_offset_t mask
,
16412 vm_map_kernel_flags_t vmk_flags
,
16413 __unused vm_tag_t tag
,
16414 vm_map_entry_t
*map_entry
) /* OUT */
16416 vm_map_entry_t entry
;
16417 vm_map_offset_t start
;
16418 vm_map_offset_t end
;
16419 vm_map_offset_t desired_empty_end
;
16421 vm_map_entry_t hole_entry
;
16427 if (flags
& VM_FLAGS_ANYWHERE
) {
16428 if (flags
& VM_FLAGS_RANDOM_ADDR
) {
16430 * Get a random start address.
16432 kr
= vm_map_random_address_for_size(map
, address
, size
);
16433 if (kr
!= KERN_SUCCESS
) {
16440 * Calculate the first possible address.
16443 if (start
< map
->min_offset
) {
16444 start
= map
->min_offset
;
16446 if (start
> map
->max_offset
) {
16447 return KERN_NO_SPACE
;
16451 * Look for the first possible address;
16452 * if there's already something at this
16453 * address, we have to start after it.
16456 if (map
->disable_vmentry_reuse
== TRUE
) {
16457 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
16459 if (map
->holelistenabled
) {
16460 hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
16462 if (hole_entry
== NULL
) {
16464 * No more space in the map?
16466 return KERN_NO_SPACE
;
16468 boolean_t found_hole
= FALSE
;
16471 if (hole_entry
->vme_start
>= start
) {
16472 start
= hole_entry
->vme_start
;
16477 if (hole_entry
->vme_end
> start
) {
16481 hole_entry
= hole_entry
->vme_next
;
16482 } while (hole_entry
!= CAST_TO_VM_MAP_ENTRY(map
->holes_list
));
16484 if (found_hole
== FALSE
) {
16485 return KERN_NO_SPACE
;
16488 entry
= hole_entry
;
16491 assert(first_free_is_valid(map
));
16492 if (start
== map
->min_offset
) {
16493 if ((entry
= map
->first_free
) != vm_map_to_entry(map
)) {
16494 start
= entry
->vme_end
;
16497 vm_map_entry_t tmp_entry
;
16498 if (vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
16499 start
= tmp_entry
->vme_end
;
16504 start
= vm_map_round_page(start
,
16505 VM_MAP_PAGE_MASK(map
));
16509 * In any case, the "entry" always precedes
16510 * the proposed new region throughout the
16515 vm_map_entry_t next
;
16518 * Find the end of the proposed new region.
16519 * Be sure we didn't go beyond the end, or
16520 * wrap around the address.
16523 end
= ((start
+ mask
) & ~mask
);
16524 end
= vm_map_round_page(end
,
16525 VM_MAP_PAGE_MASK(map
));
16527 return KERN_NO_SPACE
;
16532 /* We want an entire page of empty space, but don't increase the allocation size. */
16533 desired_empty_end
= vm_map_round_page(end
, VM_MAP_PAGE_MASK(map
));
16535 if ((desired_empty_end
> map
->max_offset
) || (desired_empty_end
< start
)) {
16536 if (map
->wait_for_space
) {
16537 if (size
<= (map
->max_offset
-
16538 map
->min_offset
)) {
16539 assert_wait((event_t
) map
, THREAD_INTERRUPTIBLE
);
16540 vm_map_unlock(map
);
16541 thread_block(THREAD_CONTINUE_NULL
);
16547 return KERN_NO_SPACE
;
16550 next
= entry
->vme_next
;
16552 if (map
->holelistenabled
) {
16553 if (entry
->vme_end
>= desired_empty_end
) {
16558 * If there are no more entries, we must win.
16562 * If there is another entry, it must be
16563 * after the end of the potential new region.
16566 if (next
== vm_map_to_entry(map
)) {
16570 if (next
->vme_start
>= desired_empty_end
) {
16576 * Didn't fit -- move to the next entry.
16581 if (map
->holelistenabled
) {
16582 if (entry
== CAST_TO_VM_MAP_ENTRY(map
->holes_list
)) {
16586 return KERN_NO_SPACE
;
16588 start
= entry
->vme_start
;
16590 start
= entry
->vme_end
;
16594 if (map
->holelistenabled
) {
16595 if (vm_map_lookup_entry(map
, entry
->vme_start
, &entry
)) {
16596 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry
, (unsigned long long)entry
->vme_start
);
16602 vm_map_entry_t temp_entry
;
16606 * the address doesn't itself violate
16607 * the mask requirement.
16610 if ((start
& mask
) != 0) {
16611 return KERN_NO_SPACE
;
16616 * ... the address is within bounds
16619 end
= start
+ size
;
16621 if ((start
< map
->min_offset
) ||
16622 (end
> map
->max_offset
) ||
16624 return KERN_INVALID_ADDRESS
;
16628 * If we're asked to overwrite whatever was mapped in that
16629 * range, first deallocate that range.
16631 if (flags
& VM_FLAGS_OVERWRITE
) {
16633 int remove_flags
= VM_MAP_REMOVE_SAVE_ENTRIES
| VM_MAP_REMOVE_NO_MAP_ALIGN
;
16636 * We use a "zap_map" to avoid having to unlock
16637 * the "map" in vm_map_delete(), which would compromise
16638 * the atomicity of the "deallocate" and then "remap"
16641 zap_map
= vm_map_create(PMAP_NULL
,
16644 map
->hdr
.entries_pageable
);
16645 if (zap_map
== VM_MAP_NULL
) {
16646 return KERN_RESOURCE_SHORTAGE
;
16648 vm_map_set_page_shift(zap_map
, VM_MAP_PAGE_SHIFT(map
));
16649 vm_map_disable_hole_optimization(zap_map
);
16651 if (vmk_flags
.vmkf_overwrite_immutable
) {
16652 remove_flags
|= VM_MAP_REMOVE_IMMUTABLE
;
16654 kr
= vm_map_delete(map
, start
, end
,
16657 if (kr
== KERN_SUCCESS
) {
16658 vm_map_destroy(zap_map
,
16659 VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
16660 zap_map
= VM_MAP_NULL
;
16665 * ... the starting address isn't allocated
16668 if (vm_map_lookup_entry(map
, start
, &temp_entry
)) {
16669 return KERN_NO_SPACE
;
16672 entry
= temp_entry
;
16675 * ... the next region doesn't overlap the
16679 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
16680 (entry
->vme_next
->vme_start
< end
)) {
16681 return KERN_NO_SPACE
;
16684 *map_entry
= entry
;
16685 return KERN_SUCCESS
;
16691 * Set the address map for the current thread to the specified map
16699 thread_t thread
= current_thread();
16700 vm_map_t oldmap
= thread
->map
;
16702 mp_disable_preemption();
16703 mycpu
= cpu_number();
16706 * Deactivate the current map and activate the requested map
16708 PMAP_SWITCH_USER(thread
, map
, mycpu
);
16710 mp_enable_preemption();
16716 * Routine: vm_map_write_user
16719 * Copy out data from a kernel space into space in the
16720 * destination map. The space must already exist in the
16722 * NOTE: This routine should only be called by threads
16723 * which can block on a page fault. i.e. kernel mode user
16731 vm_map_address_t dst_addr
,
16734 kern_return_t kr
= KERN_SUCCESS
;
16736 if (current_map() == map
) {
16737 if (copyout(src_p
, dst_addr
, size
)) {
16738 kr
= KERN_INVALID_ADDRESS
;
16743 /* take on the identity of the target map while doing */
16746 vm_map_reference(map
);
16747 oldmap
= vm_map_switch(map
);
16748 if (copyout(src_p
, dst_addr
, size
)) {
16749 kr
= KERN_INVALID_ADDRESS
;
16751 vm_map_switch(oldmap
);
16752 vm_map_deallocate(map
);
16758 * Routine: vm_map_read_user
16761 * Copy in data from a user space source map into the
16762 * kernel map. The space must already exist in the
16764 * NOTE: This routine should only be called by threads
16765 * which can block on a page fault. i.e. kernel mode user
16772 vm_map_address_t src_addr
,
16776 kern_return_t kr
= KERN_SUCCESS
;
16778 if (current_map() == map
) {
16779 if (copyin(src_addr
, dst_p
, size
)) {
16780 kr
= KERN_INVALID_ADDRESS
;
16785 /* take on the identity of the target map while doing */
16788 vm_map_reference(map
);
16789 oldmap
= vm_map_switch(map
);
16790 if (copyin(src_addr
, dst_p
, size
)) {
16791 kr
= KERN_INVALID_ADDRESS
;
16793 vm_map_switch(oldmap
);
16794 vm_map_deallocate(map
);
16801 * vm_map_check_protection:
16803 * Assert that the target map allows the specified
16804 * privilege on the entire address region given.
16805 * The entire region must be allocated.
16808 vm_map_check_protection(vm_map_t map
, vm_map_offset_t start
,
16809 vm_map_offset_t end
, vm_prot_t protection
)
16811 vm_map_entry_t entry
;
16812 vm_map_entry_t tmp_entry
;
16816 if (start
< vm_map_min(map
) || end
> vm_map_max(map
) || start
> end
) {
16817 vm_map_unlock(map
);
16821 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
16822 vm_map_unlock(map
);
16828 while (start
< end
) {
16829 if (entry
== vm_map_to_entry(map
)) {
16830 vm_map_unlock(map
);
16835 * No holes allowed!
16838 if (start
< entry
->vme_start
) {
16839 vm_map_unlock(map
);
16844 * Check protection associated with entry.
16847 if ((entry
->protection
& protection
) != protection
) {
16848 vm_map_unlock(map
);
16852 /* go to next entry */
16854 start
= entry
->vme_end
;
16855 entry
= entry
->vme_next
;
16857 vm_map_unlock(map
);
16862 vm_map_purgable_control(
16864 vm_map_offset_t address
,
16865 vm_purgable_t control
,
16868 vm_map_entry_t entry
;
16869 vm_object_t object
;
16871 boolean_t was_nonvolatile
;
16874 * Vet all the input parameters and current type and state of the
16875 * underlaying object. Return with an error if anything is amiss.
16877 if (map
== VM_MAP_NULL
) {
16878 return KERN_INVALID_ARGUMENT
;
16881 if (control
!= VM_PURGABLE_SET_STATE
&&
16882 control
!= VM_PURGABLE_GET_STATE
&&
16883 control
!= VM_PURGABLE_PURGE_ALL
&&
16884 control
!= VM_PURGABLE_SET_STATE_FROM_KERNEL
) {
16885 return KERN_INVALID_ARGUMENT
;
16888 if (control
== VM_PURGABLE_PURGE_ALL
) {
16889 vm_purgeable_object_purge_all();
16890 return KERN_SUCCESS
;
16893 if ((control
== VM_PURGABLE_SET_STATE
||
16894 control
== VM_PURGABLE_SET_STATE_FROM_KERNEL
) &&
16895 (((*state
& ~(VM_PURGABLE_ALL_MASKS
)) != 0) ||
16896 ((*state
& VM_PURGABLE_STATE_MASK
) > VM_PURGABLE_STATE_MASK
))) {
16897 return KERN_INVALID_ARGUMENT
;
16900 vm_map_lock_read(map
);
16902 if (!vm_map_lookup_entry(map
, address
, &entry
) || entry
->is_sub_map
) {
16904 * Must pass a valid non-submap address.
16906 vm_map_unlock_read(map
);
16907 return KERN_INVALID_ADDRESS
;
16910 if ((entry
->protection
& VM_PROT_WRITE
) == 0) {
16912 * Can't apply purgable controls to something you can't write.
16914 vm_map_unlock_read(map
);
16915 return KERN_PROTECTION_FAILURE
;
16918 object
= VME_OBJECT(entry
);
16919 if (object
== VM_OBJECT_NULL
||
16920 object
->purgable
== VM_PURGABLE_DENY
) {
16922 * Object must already be present and be purgeable.
16924 vm_map_unlock_read(map
);
16925 return KERN_INVALID_ARGUMENT
;
16928 vm_object_lock(object
);
16931 if (VME_OFFSET(entry
) != 0 ||
16932 entry
->vme_end
- entry
->vme_start
!= object
->vo_size
) {
16934 * Can only apply purgable controls to the whole (existing)
16937 vm_map_unlock_read(map
);
16938 vm_object_unlock(object
);
16939 return KERN_INVALID_ARGUMENT
;
16943 assert(!entry
->is_sub_map
);
16944 assert(!entry
->use_pmap
); /* purgeable has its own accounting */
16946 vm_map_unlock_read(map
);
16948 was_nonvolatile
= (object
->purgable
== VM_PURGABLE_NONVOLATILE
);
16950 kr
= vm_object_purgable_control(object
, control
, state
);
16952 if (was_nonvolatile
&&
16953 object
->purgable
!= VM_PURGABLE_NONVOLATILE
&&
16954 map
->pmap
== kernel_pmap
) {
16956 object
->vo_purgeable_volatilizer
= kernel_task
;
16960 vm_object_unlock(object
);
16966 vm_map_page_query_internal(
16967 vm_map_t target_map
,
16968 vm_map_offset_t offset
,
16973 vm_page_info_basic_data_t info
;
16974 mach_msg_type_number_t count
;
16976 count
= VM_PAGE_INFO_BASIC_COUNT
;
16977 kr
= vm_map_page_info(target_map
,
16979 VM_PAGE_INFO_BASIC
,
16980 (vm_page_info_t
) &info
,
16982 if (kr
== KERN_SUCCESS
) {
16983 *disposition
= info
.disposition
;
16984 *ref_count
= info
.ref_count
;
16996 vm_map_offset_t offset
,
16997 vm_page_info_flavor_t flavor
,
16998 vm_page_info_t info
,
16999 mach_msg_type_number_t
*count
)
17001 return vm_map_page_range_info_internal(map
,
17002 offset
, /* start of range */
17003 (offset
+ 1), /* this will get rounded in the call to the page boundary */
17010 vm_map_page_range_info_internal(
17012 vm_map_offset_t start_offset
,
17013 vm_map_offset_t end_offset
,
17014 vm_page_info_flavor_t flavor
,
17015 vm_page_info_t info
,
17016 mach_msg_type_number_t
*count
)
17018 vm_map_entry_t map_entry
= VM_MAP_ENTRY_NULL
;
17019 vm_object_t object
= VM_OBJECT_NULL
, curr_object
= VM_OBJECT_NULL
;
17020 vm_page_t m
= VM_PAGE_NULL
;
17021 kern_return_t retval
= KERN_SUCCESS
;
17022 int disposition
= 0;
17024 int depth
= 0, info_idx
= 0;
17025 vm_page_info_basic_t basic_info
= 0;
17026 vm_map_offset_t offset_in_page
= 0, offset_in_object
= 0, curr_offset_in_object
= 0;
17027 vm_map_offset_t start
= 0, end
= 0, curr_s_offset
= 0, curr_e_offset
= 0;
17028 boolean_t do_region_footprint
;
17029 ledger_amount_t ledger_resident
, ledger_compressed
;
17032 case VM_PAGE_INFO_BASIC
:
17033 if (*count
!= VM_PAGE_INFO_BASIC_COUNT
) {
17035 * The "vm_page_info_basic_data" structure was not
17036 * properly padded, so allow the size to be off by
17037 * one to maintain backwards binary compatibility...
17039 if (*count
!= VM_PAGE_INFO_BASIC_COUNT
- 1) {
17040 return KERN_INVALID_ARGUMENT
;
17045 return KERN_INVALID_ARGUMENT
;
17048 do_region_footprint
= task_self_region_footprint();
17052 info_idx
= 0; /* Tracks the next index within the info structure to be filled.*/
17053 retval
= KERN_SUCCESS
;
17055 offset_in_page
= start_offset
& PAGE_MASK
;
17056 start
= vm_map_trunc_page(start_offset
, PAGE_MASK
);
17057 end
= vm_map_round_page(end_offset
, PAGE_MASK
);
17060 return KERN_INVALID_ARGUMENT
;
17063 assert((end
- start
) <= MAX_PAGE_RANGE_QUERY
);
17065 vm_map_lock_read(map
);
17067 task_ledgers_footprint(map
->pmap
->ledger
, &ledger_resident
, &ledger_compressed
);
17069 for (curr_s_offset
= start
; curr_s_offset
< end
;) {
17071 * New lookup needs reset of these variables.
17073 curr_object
= object
= VM_OBJECT_NULL
;
17074 offset_in_object
= 0;
17078 if (do_region_footprint
&&
17079 curr_s_offset
>= vm_map_last_entry(map
)->vme_end
) {
17081 * Request for "footprint" info about a page beyond
17082 * the end of address space: this must be for
17083 * the fake region vm_map_region_recurse_64()
17084 * reported to account for non-volatile purgeable
17085 * memory owned by this task.
17089 if (curr_s_offset
- vm_map_last_entry(map
)->vme_end
<=
17090 (unsigned) ledger_compressed
) {
17092 * We haven't reported all the "non-volatile
17093 * compressed" pages yet, so report this fake
17094 * page as "compressed".
17096 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
17099 * We've reported all the non-volatile
17100 * compressed page but not all the non-volatile
17101 * pages , so report this fake page as
17102 * "resident dirty".
17104 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
17105 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
17106 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
17109 case VM_PAGE_INFO_BASIC
:
17110 basic_info
= (vm_page_info_basic_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
17111 basic_info
->disposition
= disposition
;
17112 basic_info
->ref_count
= 1;
17113 basic_info
->object_id
= INFO_MAKE_FAKE_OBJECT_ID(map
, task_ledgers
.purgeable_nonvolatile
);
17114 basic_info
->offset
= 0;
17115 basic_info
->depth
= 0;
17120 curr_s_offset
+= PAGE_SIZE
;
17125 * First, find the map entry covering "curr_s_offset", going down
17126 * submaps if necessary.
17128 if (!vm_map_lookup_entry(map
, curr_s_offset
, &map_entry
)) {
17129 /* no entry -> no object -> no page */
17131 if (curr_s_offset
< vm_map_min(map
)) {
17133 * Illegal address that falls below map min.
17135 curr_e_offset
= MIN(end
, vm_map_min(map
));
17136 } else if (curr_s_offset
>= vm_map_max(map
)) {
17138 * Illegal address that falls on/after map max.
17140 curr_e_offset
= end
;
17141 } else if (map_entry
== vm_map_to_entry(map
)) {
17145 if (map_entry
->vme_next
== vm_map_to_entry(map
)) {
17149 curr_e_offset
= MIN(map
->max_offset
, end
);
17152 * Hole at start of the map.
17154 curr_e_offset
= MIN(map_entry
->vme_next
->vme_start
, end
);
17157 if (map_entry
->vme_next
== vm_map_to_entry(map
)) {
17159 * Hole at the end of the map.
17161 curr_e_offset
= MIN(map
->max_offset
, end
);
17163 curr_e_offset
= MIN(map_entry
->vme_next
->vme_start
, end
);
17167 assert(curr_e_offset
>= curr_s_offset
);
17169 uint64_t num_pages
= (curr_e_offset
- curr_s_offset
) >> PAGE_SHIFT
;
17171 void *info_ptr
= (void*) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
17173 bzero(info_ptr
, num_pages
* sizeof(struct vm_page_info_basic
));
17175 curr_s_offset
= curr_e_offset
;
17177 info_idx
+= num_pages
;
17182 /* compute offset from this map entry's start */
17183 offset_in_object
= curr_s_offset
- map_entry
->vme_start
;
17185 /* compute offset into this map entry's object (or submap) */
17186 offset_in_object
+= VME_OFFSET(map_entry
);
17188 if (map_entry
->is_sub_map
) {
17189 vm_map_t sub_map
= VM_MAP_NULL
;
17190 vm_page_info_t submap_info
= 0;
17191 vm_map_offset_t submap_s_offset
= 0, submap_e_offset
= 0, range_len
= 0;
17193 range_len
= MIN(map_entry
->vme_end
, end
) - curr_s_offset
;
17195 submap_s_offset
= offset_in_object
;
17196 submap_e_offset
= submap_s_offset
+ range_len
;
17198 sub_map
= VME_SUBMAP(map_entry
);
17200 vm_map_reference(sub_map
);
17201 vm_map_unlock_read(map
);
17203 submap_info
= (vm_page_info_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
17205 retval
= vm_map_page_range_info_internal(sub_map
,
17208 VM_PAGE_INFO_BASIC
,
17209 (vm_page_info_t
) submap_info
,
17212 assert(retval
== KERN_SUCCESS
);
17214 vm_map_lock_read(map
);
17215 vm_map_deallocate(sub_map
);
17217 /* Move the "info" index by the number of pages we inspected.*/
17218 info_idx
+= range_len
>> PAGE_SHIFT
;
17220 /* Move our current offset by the size of the range we inspected.*/
17221 curr_s_offset
+= range_len
;
17226 object
= VME_OBJECT(map_entry
);
17227 if (object
== VM_OBJECT_NULL
) {
17229 * We don't have an object here and, hence,
17230 * no pages to inspect. We'll fill up the
17231 * info structure appropriately.
17234 curr_e_offset
= MIN(map_entry
->vme_end
, end
);
17236 uint64_t num_pages
= (curr_e_offset
- curr_s_offset
) >> PAGE_SHIFT
;
17238 void *info_ptr
= (void*) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
17240 bzero(info_ptr
, num_pages
* sizeof(struct vm_page_info_basic
));
17242 curr_s_offset
= curr_e_offset
;
17244 info_idx
+= num_pages
;
17249 if (do_region_footprint
) {
17254 if (map
->has_corpse_footprint
) {
17256 * Query the page info data we saved
17257 * while forking the corpse.
17259 vm_map_corpse_footprint_query_page_info(
17267 pmap_query_page_info(map
->pmap
,
17271 if (object
->purgable
== VM_PURGABLE_NONVOLATILE
&&
17272 /* && not tagged as no-footprint? */
17273 VM_OBJECT_OWNER(object
) != NULL
&&
17274 VM_OBJECT_OWNER(object
)->map
== map
) {
17275 if ((((curr_s_offset
17276 - map_entry
->vme_start
17277 + VME_OFFSET(map_entry
))
17279 (object
->resident_page_count
+
17280 vm_compressor_pager_get_count(object
->pager
)))) {
17282 * Non-volatile purgeable object owned
17283 * by this task: report the first
17284 * "#resident + #compressed" pages as
17285 * "resident" (to show that they
17286 * contribute to the footprint) but not
17287 * "dirty" (to avoid double-counting
17288 * with the fake "non-volatile" region
17289 * we'll report at the end of the
17290 * address space to account for all
17291 * (mapped or not) non-volatile memory
17292 * owned by this task.
17294 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
17296 } else if ((object
->purgable
== VM_PURGABLE_VOLATILE
||
17297 object
->purgable
== VM_PURGABLE_EMPTY
) &&
17298 /* && not tagged as no-footprint? */
17299 VM_OBJECT_OWNER(object
) != NULL
&&
17300 VM_OBJECT_OWNER(object
)->map
== map
) {
17301 if ((((curr_s_offset
17302 - map_entry
->vme_start
17303 + VME_OFFSET(map_entry
))
17305 object
->wired_page_count
)) {
17307 * Volatile|empty purgeable object owned
17308 * by this task: report the first
17309 * "#wired" pages as "resident" (to
17310 * show that they contribute to the
17311 * footprint) but not "dirty" (to avoid
17312 * double-counting with the fake
17313 * "non-volatile" region we'll report
17314 * at the end of the address space to
17315 * account for all (mapped or not)
17316 * non-volatile memory owned by this
17319 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
17321 } else if (map_entry
->iokit_acct
&&
17322 object
->internal
&&
17323 object
->purgable
== VM_PURGABLE_DENY
) {
17325 * Non-purgeable IOKit memory: phys_footprint
17326 * includes the entire virtual mapping.
17328 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
17329 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
17330 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
17331 } else if (pmap_disp
& (PMAP_QUERY_PAGE_ALTACCT
|
17332 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT
)) {
17333 /* alternate accounting */
17334 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
17335 if (map
->pmap
->footprint_was_suspended
||
17337 * XXX corpse does not know if original
17338 * pmap had its footprint suspended...
17340 map
->has_corpse_footprint
) {
17342 * The assertion below can fail if dyld
17343 * suspended footprint accounting
17344 * while doing some adjustments to
17345 * this page; the mapping would say
17346 * "use pmap accounting" but the page
17347 * would be marked "alternate
17351 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
17352 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
17355 if (pmap_disp
& PMAP_QUERY_PAGE_PRESENT
) {
17356 assertf(map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
17357 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
17358 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
17359 if (pmap_disp
& PMAP_QUERY_PAGE_INTERNAL
) {
17360 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
17362 disposition
|= VM_PAGE_QUERY_PAGE_EXTERNAL
;
17364 if (pmap_disp
& PMAP_QUERY_PAGE_REUSABLE
) {
17365 disposition
|= VM_PAGE_QUERY_PAGE_REUSABLE
;
17367 } else if (pmap_disp
& PMAP_QUERY_PAGE_COMPRESSED
) {
17368 assertf(map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
17369 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
17373 case VM_PAGE_INFO_BASIC
:
17374 basic_info
= (vm_page_info_basic_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
17375 basic_info
->disposition
= disposition
;
17376 basic_info
->ref_count
= 1;
17377 basic_info
->object_id
= INFO_MAKE_FAKE_OBJECT_ID(map
, task_ledgers
.purgeable_nonvolatile
);
17378 basic_info
->offset
= 0;
17379 basic_info
->depth
= 0;
17384 curr_s_offset
+= PAGE_SIZE
;
17388 vm_object_reference(object
);
17390 * Shared mode -- so we can allow other readers
17391 * to grab the lock too.
17393 vm_object_lock_shared(object
);
17395 curr_e_offset
= MIN(map_entry
->vme_end
, end
);
17397 vm_map_unlock_read(map
);
17399 map_entry
= NULL
; /* map is unlocked, the entry is no longer valid. */
17401 curr_object
= object
;
17403 for (; curr_s_offset
< curr_e_offset
;) {
17404 if (object
== curr_object
) {
17405 ref_count
= curr_object
->ref_count
- 1; /* account for our object reference above. */
17407 ref_count
= curr_object
->ref_count
;
17410 curr_offset_in_object
= offset_in_object
;
17413 m
= vm_page_lookup(curr_object
, curr_offset_in_object
);
17415 if (m
!= VM_PAGE_NULL
) {
17416 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
17419 if (curr_object
->internal
&&
17420 curr_object
->alive
&&
17421 !curr_object
->terminating
&&
17422 curr_object
->pager_ready
) {
17423 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object
, curr_offset_in_object
)
17424 == VM_EXTERNAL_STATE_EXISTS
) {
17425 /* the pager has that page */
17426 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
17432 * Go down the VM object shadow chain until we find the page
17433 * we're looking for.
17436 if (curr_object
->shadow
!= VM_OBJECT_NULL
) {
17437 vm_object_t shadow
= VM_OBJECT_NULL
;
17439 curr_offset_in_object
+= curr_object
->vo_shadow_offset
;
17440 shadow
= curr_object
->shadow
;
17442 vm_object_lock_shared(shadow
);
17443 vm_object_unlock(curr_object
);
17445 curr_object
= shadow
;
17454 /* The ref_count is not strictly accurate, it measures the number */
17455 /* of entities holding a ref on the object, they may not be mapping */
17456 /* the object or may not be mapping the section holding the */
17457 /* target page but its still a ball park number and though an over- */
17458 /* count, it picks up the copy-on-write cases */
17460 /* We could also get a picture of page sharing from pmap_attributes */
17461 /* but this would under count as only faulted-in mappings would */
17464 if ((curr_object
== object
) && curr_object
->shadow
) {
17465 disposition
|= VM_PAGE_QUERY_PAGE_COPIED
;
17468 if (!curr_object
->internal
) {
17469 disposition
|= VM_PAGE_QUERY_PAGE_EXTERNAL
;
17472 if (m
!= VM_PAGE_NULL
) {
17473 if (m
->vmp_fictitious
) {
17474 disposition
|= VM_PAGE_QUERY_PAGE_FICTITIOUS
;
17476 if (m
->vmp_dirty
|| pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m
))) {
17477 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
17480 if (m
->vmp_reference
|| pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m
))) {
17481 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
17484 if (m
->vmp_q_state
== VM_PAGE_ON_SPECULATIVE_Q
) {
17485 disposition
|= VM_PAGE_QUERY_PAGE_SPECULATIVE
;
17488 if (m
->vmp_cs_validated
) {
17489 disposition
|= VM_PAGE_QUERY_PAGE_CS_VALIDATED
;
17491 if (m
->vmp_cs_tainted
) {
17492 disposition
|= VM_PAGE_QUERY_PAGE_CS_TAINTED
;
17494 if (m
->vmp_cs_nx
) {
17495 disposition
|= VM_PAGE_QUERY_PAGE_CS_NX
;
17497 if (m
->vmp_reusable
|| curr_object
->all_reusable
) {
17498 disposition
|= VM_PAGE_QUERY_PAGE_REUSABLE
;
17504 case VM_PAGE_INFO_BASIC
:
17505 basic_info
= (vm_page_info_basic_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
17506 basic_info
->disposition
= disposition
;
17507 basic_info
->ref_count
= ref_count
;
17508 basic_info
->object_id
= (vm_object_id_t
) (uintptr_t)
17509 VM_KERNEL_ADDRPERM(curr_object
);
17510 basic_info
->offset
=
17511 (memory_object_offset_t
) curr_offset_in_object
+ offset_in_page
;
17512 basic_info
->depth
= depth
;
17519 offset_in_page
= 0; // This doesn't really make sense for any offset other than the starting offset.
17522 * Move to next offset in the range and in our object.
17524 curr_s_offset
+= PAGE_SIZE
;
17525 offset_in_object
+= PAGE_SIZE
;
17526 curr_offset_in_object
= offset_in_object
;
17528 if (curr_object
!= object
) {
17529 vm_object_unlock(curr_object
);
17531 curr_object
= object
;
17533 vm_object_lock_shared(curr_object
);
17535 vm_object_lock_yield_shared(curr_object
);
17539 vm_object_unlock(curr_object
);
17540 vm_object_deallocate(curr_object
);
17542 vm_map_lock_read(map
);
17545 vm_map_unlock_read(map
);
17552 * Synchronises the memory range specified with its backing store
17553 * image by either flushing or cleaning the contents to the appropriate
17554 * memory manager engaging in a memory object synchronize dialog with
17555 * the manager. The client doesn't return until the manager issues
17556 * m_o_s_completed message. MIG Magically converts user task parameter
17557 * to the task's address map.
17559 * interpretation of sync_flags
17560 * VM_SYNC_INVALIDATE - discard pages, only return precious
17561 * pages to manager.
17563 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
17564 * - discard pages, write dirty or precious
17565 * pages back to memory manager.
17567 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
17568 * - write dirty or precious pages back to
17569 * the memory manager.
17571 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
17572 * is a hole in the region, and we would
17573 * have returned KERN_SUCCESS, return
17574 * KERN_INVALID_ADDRESS instead.
17577 * The memory object attributes have not yet been implemented, this
17578 * function will have to deal with the invalidate attribute
17581 * KERN_INVALID_TASK Bad task parameter
17582 * KERN_INVALID_ARGUMENT both sync and async were specified.
17583 * KERN_SUCCESS The usual.
17584 * KERN_INVALID_ADDRESS There was a hole in the region.
17590 vm_map_address_t address
,
17591 vm_map_size_t size
,
17592 vm_sync_t sync_flags
)
17594 vm_map_entry_t entry
;
17595 vm_map_size_t amount_left
;
17596 vm_object_offset_t offset
;
17597 boolean_t do_sync_req
;
17598 boolean_t had_hole
= FALSE
;
17599 vm_map_offset_t pmap_offset
;
17601 if ((sync_flags
& VM_SYNC_ASYNCHRONOUS
) &&
17602 (sync_flags
& VM_SYNC_SYNCHRONOUS
)) {
17603 return KERN_INVALID_ARGUMENT
;
17607 * align address and size on page boundaries
17609 size
= (vm_map_round_page(address
+ size
,
17610 VM_MAP_PAGE_MASK(map
)) -
17611 vm_map_trunc_page(address
,
17612 VM_MAP_PAGE_MASK(map
)));
17613 address
= vm_map_trunc_page(address
,
17614 VM_MAP_PAGE_MASK(map
));
17616 if (map
== VM_MAP_NULL
) {
17617 return KERN_INVALID_TASK
;
17621 return KERN_SUCCESS
;
17624 amount_left
= size
;
17626 while (amount_left
> 0) {
17627 vm_object_size_t flush_size
;
17628 vm_object_t object
;
17631 if (!vm_map_lookup_entry(map
,
17634 vm_map_size_t skip
;
17637 * hole in the address map.
17641 if (sync_flags
& VM_SYNC_KILLPAGES
) {
17643 * For VM_SYNC_KILLPAGES, there should be
17644 * no holes in the range, since we couldn't
17645 * prevent someone else from allocating in
17646 * that hole and we wouldn't want to "kill"
17649 vm_map_unlock(map
);
17654 * Check for empty map.
17656 if (entry
== vm_map_to_entry(map
) &&
17657 entry
->vme_next
== entry
) {
17658 vm_map_unlock(map
);
17662 * Check that we don't wrap and that
17663 * we have at least one real map entry.
17665 if ((map
->hdr
.nentries
== 0) ||
17666 (entry
->vme_next
->vme_start
< address
)) {
17667 vm_map_unlock(map
);
17671 * Move up to the next entry if needed
17673 skip
= (entry
->vme_next
->vme_start
- address
);
17674 if (skip
>= amount_left
) {
17677 amount_left
-= skip
;
17679 address
= entry
->vme_next
->vme_start
;
17680 vm_map_unlock(map
);
17684 offset
= address
- entry
->vme_start
;
17685 pmap_offset
= address
;
17688 * do we have more to flush than is contained in this
17691 if (amount_left
+ entry
->vme_start
+ offset
> entry
->vme_end
) {
17692 flush_size
= entry
->vme_end
-
17693 (entry
->vme_start
+ offset
);
17695 flush_size
= amount_left
;
17697 amount_left
-= flush_size
;
17698 address
+= flush_size
;
17700 if (entry
->is_sub_map
== TRUE
) {
17701 vm_map_t local_map
;
17702 vm_map_offset_t local_offset
;
17704 local_map
= VME_SUBMAP(entry
);
17705 local_offset
= VME_OFFSET(entry
);
17706 vm_map_reference(local_map
);
17707 vm_map_unlock(map
);
17712 sync_flags
) == KERN_INVALID_ADDRESS
) {
17715 vm_map_deallocate(local_map
);
17718 object
= VME_OBJECT(entry
);
17721 * We can't sync this object if the object has not been
17724 if (object
== VM_OBJECT_NULL
) {
17725 vm_map_unlock(map
);
17728 offset
+= VME_OFFSET(entry
);
17730 vm_object_lock(object
);
17732 if (sync_flags
& (VM_SYNC_KILLPAGES
| VM_SYNC_DEACTIVATE
)) {
17733 int kill_pages
= 0;
17734 boolean_t reusable_pages
= FALSE
;
17736 if (sync_flags
& VM_SYNC_KILLPAGES
) {
17737 if (((object
->ref_count
== 1) ||
17738 ((object
->copy_strategy
!=
17739 MEMORY_OBJECT_COPY_SYMMETRIC
) &&
17740 (object
->copy
== VM_OBJECT_NULL
))) &&
17741 (object
->shadow
== VM_OBJECT_NULL
)) {
17742 if (object
->ref_count
!= 1) {
17743 vm_page_stats_reusable
.free_shared
++;
17750 if (kill_pages
!= -1) {
17751 vm_object_deactivate_pages(
17754 (vm_object_size_t
) flush_size
,
17760 vm_object_unlock(object
);
17761 vm_map_unlock(map
);
17765 * We can't sync this object if there isn't a pager.
17766 * Don't bother to sync internal objects, since there can't
17767 * be any "permanent" storage for these objects anyway.
17769 if ((object
->pager
== MEMORY_OBJECT_NULL
) ||
17770 (object
->internal
) || (object
->private)) {
17771 vm_object_unlock(object
);
17772 vm_map_unlock(map
);
17776 * keep reference on the object until syncing is done
17778 vm_object_reference_locked(object
);
17779 vm_object_unlock(object
);
17781 vm_map_unlock(map
);
17783 do_sync_req
= vm_object_sync(object
,
17786 sync_flags
& VM_SYNC_INVALIDATE
,
17787 ((sync_flags
& VM_SYNC_SYNCHRONOUS
) ||
17788 (sync_flags
& VM_SYNC_ASYNCHRONOUS
)),
17789 sync_flags
& VM_SYNC_SYNCHRONOUS
);
17791 if ((sync_flags
& VM_SYNC_INVALIDATE
) && object
->resident_page_count
== 0) {
17793 * clear out the clustering and read-ahead hints
17795 vm_object_lock(object
);
17797 object
->pages_created
= 0;
17798 object
->pages_used
= 0;
17799 object
->sequential
= 0;
17800 object
->last_alloc
= 0;
17802 vm_object_unlock(object
);
17804 vm_object_deallocate(object
);
17807 /* for proper msync() behaviour */
17808 if (had_hole
== TRUE
&& (sync_flags
& VM_SYNC_CONTIGUOUS
)) {
17809 return KERN_INVALID_ADDRESS
;
17812 return KERN_SUCCESS
;
17816 * Routine: convert_port_entry_to_map
17818 * Convert from a port specifying an entry or a task
17819 * to a map. Doesn't consume the port ref; produces a map ref,
17820 * which may be null. Unlike convert_port_to_map, the
17821 * port may be task or a named entry backed.
17828 convert_port_entry_to_map(
17832 vm_named_entry_t named_entry
;
17833 uint32_t try_failed_count
= 0;
17835 if (IP_VALID(port
) && (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
17838 if (ip_active(port
) && (ip_kotype(port
)
17839 == IKOT_NAMED_ENTRY
)) {
17841 (vm_named_entry_t
) ip_get_kobject(port
);
17842 if (!(lck_mtx_try_lock(&(named_entry
)->Lock
))) {
17845 try_failed_count
++;
17846 mutex_pause(try_failed_count
);
17849 named_entry
->ref_count
++;
17850 lck_mtx_unlock(&(named_entry
)->Lock
);
17852 if ((named_entry
->is_sub_map
) &&
17853 (named_entry
->protection
17854 & VM_PROT_WRITE
)) {
17855 map
= named_entry
->backing
.map
;
17857 mach_destroy_memory_entry(port
);
17858 return VM_MAP_NULL
;
17860 vm_map_reference_swap(map
);
17861 mach_destroy_memory_entry(port
);
17864 return VM_MAP_NULL
;
17868 map
= convert_port_to_map(port
);
17875 * Routine: convert_port_entry_to_object
17877 * Convert from a port specifying a named entry to an
17878 * object. Doesn't consume the port ref; produces a map ref,
17879 * which may be null.
17886 convert_port_entry_to_object(
17889 vm_object_t object
= VM_OBJECT_NULL
;
17890 vm_named_entry_t named_entry
;
17891 uint32_t try_failed_count
= 0;
17893 if (IP_VALID(port
) &&
17894 (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
17897 if (ip_active(port
) &&
17898 (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
17899 named_entry
= (vm_named_entry_t
) ip_get_kobject(port
);
17900 if (!(lck_mtx_try_lock(&(named_entry
)->Lock
))) {
17902 try_failed_count
++;
17903 mutex_pause(try_failed_count
);
17906 named_entry
->ref_count
++;
17907 lck_mtx_unlock(&(named_entry
)->Lock
);
17909 if (!(named_entry
->is_sub_map
) &&
17910 !(named_entry
->is_copy
) &&
17911 (named_entry
->protection
& VM_PROT_WRITE
)) {
17912 object
= named_entry
->backing
.object
;
17913 vm_object_reference(object
);
17915 mach_destroy_memory_entry(port
);
17923 * Export routines to other components for the things we access locally through
17930 return current_map_fast();
17934 * vm_map_reference:
17936 * Most code internal to the osfmk will go through a
17937 * macro defining this. This is always here for the
17938 * use of other kernel components.
17940 #undef vm_map_reference
17945 if (map
== VM_MAP_NULL
) {
17949 lck_mtx_lock(&map
->s_lock
);
17951 assert(map
->res_count
> 0);
17952 assert(os_ref_get_count(&map
->map_refcnt
) >= map
->res_count
);
17955 os_ref_retain_locked(&map
->map_refcnt
);
17956 lck_mtx_unlock(&map
->s_lock
);
17960 * vm_map_deallocate:
17962 * Removes a reference from the specified map,
17963 * destroying it if no references remain.
17964 * The map should not be locked.
17972 if (map
== VM_MAP_NULL
) {
17976 lck_mtx_lock(&map
->s_lock
);
17977 ref
= os_ref_release_locked(&map
->map_refcnt
);
17979 vm_map_res_deallocate(map
);
17980 lck_mtx_unlock(&map
->s_lock
);
17983 assert(os_ref_get_count(&map
->map_refcnt
) == 0);
17984 lck_mtx_unlock(&map
->s_lock
);
17988 * The map residence count isn't decremented here because
17989 * the vm_map_delete below will traverse the entire map,
17990 * deleting entries, and the residence counts on objects
17991 * and sharing maps will go away then.
17995 vm_map_destroy(map
, VM_MAP_REMOVE_NO_FLAGS
);
18000 vm_map_disable_NX(vm_map_t map
)
18005 if (map
->pmap
== NULL
) {
18009 pmap_disable_NX(map
->pmap
);
18013 vm_map_disallow_data_exec(vm_map_t map
)
18019 map
->map_disallow_data_exec
= TRUE
;
18022 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
18023 * more descriptive.
18026 vm_map_set_32bit(vm_map_t map
)
18028 #if defined(__arm__) || defined(__arm64__)
18029 map
->max_offset
= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_DEVICE
);
18031 map
->max_offset
= (vm_map_offset_t
)VM_MAX_ADDRESS
;
18037 vm_map_set_64bit(vm_map_t map
)
18039 #if defined(__arm__) || defined(__arm64__)
18040 map
->max_offset
= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_DEVICE
);
18042 map
->max_offset
= (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
;
18047 * Expand the maximum size of an existing map to the maximum supported.
18050 vm_map_set_jumbo(vm_map_t map
)
18052 #if defined (__arm64__)
18053 vm_map_set_max_addr(map
, ~0);
18060 * This map has a JIT entitlement
18063 vm_map_set_jit_entitled(vm_map_t map
)
18065 #if defined (__arm64__)
18066 pmap_set_jit_entitled(map
->pmap
);
18073 * Expand the maximum size of an existing map.
18076 vm_map_set_max_addr(vm_map_t map
, vm_map_offset_t new_max_offset
)
18078 #if defined(__arm64__)
18079 vm_map_offset_t max_supported_offset
= 0;
18080 vm_map_offset_t old_max_offset
= map
->max_offset
;
18081 max_supported_offset
= pmap_max_offset(vm_map_is_64bit(map
), ARM_PMAP_MAX_OFFSET_JUMBO
);
18083 new_max_offset
= trunc_page(new_max_offset
);
18085 /* The address space cannot be shrunk using this routine. */
18086 if (old_max_offset
>= new_max_offset
) {
18090 if (max_supported_offset
< new_max_offset
) {
18091 new_max_offset
= max_supported_offset
;
18094 map
->max_offset
= new_max_offset
;
18096 if (map
->holes_list
->prev
->vme_end
== old_max_offset
) {
18098 * There is already a hole at the end of the map; simply make it bigger.
18100 map
->holes_list
->prev
->vme_end
= map
->max_offset
;
18103 * There is no hole at the end, so we need to create a new hole
18104 * for the new empty space we're creating.
18106 struct vm_map_links
*new_hole
= zalloc(vm_map_holes_zone
);
18107 new_hole
->start
= old_max_offset
;
18108 new_hole
->end
= map
->max_offset
;
18109 new_hole
->prev
= map
->holes_list
->prev
;
18110 new_hole
->next
= (struct vm_map_entry
*)map
->holes_list
;
18111 map
->holes_list
->prev
->links
.next
= (struct vm_map_entry
*)new_hole
;
18112 map
->holes_list
->prev
= (struct vm_map_entry
*)new_hole
;
18116 (void)new_max_offset
;
18121 vm_compute_max_offset(boolean_t is64
)
18123 #if defined(__arm__) || defined(__arm64__)
18124 return pmap_max_offset(is64
, ARM_PMAP_MAX_OFFSET_DEVICE
);
18126 return is64
? (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
: (vm_map_offset_t
)VM_MAX_ADDRESS
;
18131 vm_map_get_max_aslr_slide_section(
18132 vm_map_t map __unused
,
18133 int64_t *max_sections
,
18134 int64_t *section_size
)
18136 #if defined(__arm64__)
18138 *section_size
= ARM_TT_TWIG_SIZE
;
18146 vm_map_get_max_aslr_slide_pages(vm_map_t map
)
18148 #if defined(__arm64__)
18149 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
18150 * limited embedded address space; this is also meant to minimize pmap
18151 * memory usage on 16KB page systems.
18153 return 1 << (24 - VM_MAP_PAGE_SHIFT(map
));
18155 return 1 << (vm_map_is_64bit(map
) ? 16 : 8);
18160 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map
)
18162 #if defined(__arm64__)
18163 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
18164 * of independent entropy on 16KB page systems.
18166 return 1 << (22 - VM_MAP_PAGE_SHIFT(map
));
18168 return 1 << (vm_map_is_64bit(map
) ? 16 : 8);
18177 return map
->max_offset
> ((vm_map_offset_t
)VM_MAX_ADDRESS
);
18182 vm_map_has_hard_pagezero(
18184 vm_map_offset_t pagezero_size
)
18188 * We should lock the VM map (for read) here but we can get away
18189 * with it for now because there can't really be any race condition:
18190 * the VM map's min_offset is changed only when the VM map is created
18191 * and when the zero page is established (when the binary gets loaded),
18192 * and this routine gets called only when the task terminates and the
18193 * VM map is being torn down, and when a new map is created via
18194 * load_machfile()/execve().
18196 return map
->min_offset
>= pagezero_size
;
18200 * Raise a VM map's maximun offset.
18203 vm_map_raise_max_offset(
18205 vm_map_offset_t new_max_offset
)
18210 ret
= KERN_INVALID_ADDRESS
;
18212 if (new_max_offset
>= map
->max_offset
) {
18213 if (!vm_map_is_64bit(map
)) {
18214 if (new_max_offset
<= (vm_map_offset_t
)VM_MAX_ADDRESS
) {
18215 map
->max_offset
= new_max_offset
;
18216 ret
= KERN_SUCCESS
;
18219 if (new_max_offset
<= (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
) {
18220 map
->max_offset
= new_max_offset
;
18221 ret
= KERN_SUCCESS
;
18226 vm_map_unlock(map
);
18232 * Raise a VM map's minimum offset.
18233 * To strictly enforce "page zero" reservation.
18236 vm_map_raise_min_offset(
18238 vm_map_offset_t new_min_offset
)
18240 vm_map_entry_t first_entry
;
18242 new_min_offset
= vm_map_round_page(new_min_offset
,
18243 VM_MAP_PAGE_MASK(map
));
18247 if (new_min_offset
< map
->min_offset
) {
18249 * Can't move min_offset backwards, as that would expose
18250 * a part of the address space that was previously, and for
18251 * possibly good reasons, inaccessible.
18253 vm_map_unlock(map
);
18254 return KERN_INVALID_ADDRESS
;
18256 if (new_min_offset
>= map
->max_offset
) {
18257 /* can't go beyond the end of the address space */
18258 vm_map_unlock(map
);
18259 return KERN_INVALID_ADDRESS
;
18262 first_entry
= vm_map_first_entry(map
);
18263 if (first_entry
!= vm_map_to_entry(map
) &&
18264 first_entry
->vme_start
< new_min_offset
) {
18266 * Some memory was already allocated below the new
18267 * minimun offset. It's too late to change it now...
18269 vm_map_unlock(map
);
18270 return KERN_NO_SPACE
;
18273 map
->min_offset
= new_min_offset
;
18275 assert(map
->holes_list
);
18276 map
->holes_list
->start
= new_min_offset
;
18277 assert(new_min_offset
< map
->holes_list
->end
);
18279 vm_map_unlock(map
);
18281 return KERN_SUCCESS
;
18285 * Set the limit on the maximum amount of user wired memory allowed for this map.
18286 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
18287 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
18288 * don't have to reach over to the BSD data structures.
18292 vm_map_set_user_wire_limit(vm_map_t map
,
18295 map
->user_wire_limit
= limit
;
18300 vm_map_switch_protect(vm_map_t map
,
18304 map
->switch_protect
= val
;
18305 vm_map_unlock(map
);
18309 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
18310 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
18311 * bump both counters.
18314 vm_map_iokit_mapped_region(vm_map_t map
, vm_size_t bytes
)
18316 pmap_t pmap
= vm_map_pmap(map
);
18318 ledger_credit(pmap
->ledger
, task_ledgers
.iokit_mapped
, bytes
);
18319 ledger_credit(pmap
->ledger
, task_ledgers
.phys_footprint
, bytes
);
18323 vm_map_iokit_unmapped_region(vm_map_t map
, vm_size_t bytes
)
18325 pmap_t pmap
= vm_map_pmap(map
);
18327 ledger_debit(pmap
->ledger
, task_ledgers
.iokit_mapped
, bytes
);
18328 ledger_debit(pmap
->ledger
, task_ledgers
.phys_footprint
, bytes
);
18331 /* Add (generate) code signature for memory range */
18332 #if CONFIG_DYNAMIC_CODE_SIGNING
18334 vm_map_sign(vm_map_t map
,
18335 vm_map_offset_t start
,
18336 vm_map_offset_t end
)
18338 vm_map_entry_t entry
;
18340 vm_object_t object
;
18343 * Vet all the input parameters and current type and state of the
18344 * underlaying object. Return with an error if anything is amiss.
18346 if (map
== VM_MAP_NULL
) {
18347 return KERN_INVALID_ARGUMENT
;
18350 vm_map_lock_read(map
);
18352 if (!vm_map_lookup_entry(map
, start
, &entry
) || entry
->is_sub_map
) {
18354 * Must pass a valid non-submap address.
18356 vm_map_unlock_read(map
);
18357 return KERN_INVALID_ADDRESS
;
18360 if ((entry
->vme_start
> start
) || (entry
->vme_end
< end
)) {
18362 * Map entry doesn't cover the requested range. Not handling
18363 * this situation currently.
18365 vm_map_unlock_read(map
);
18366 return KERN_INVALID_ARGUMENT
;
18369 object
= VME_OBJECT(entry
);
18370 if (object
== VM_OBJECT_NULL
) {
18372 * Object must already be present or we can't sign.
18374 vm_map_unlock_read(map
);
18375 return KERN_INVALID_ARGUMENT
;
18378 vm_object_lock(object
);
18379 vm_map_unlock_read(map
);
18381 while (start
< end
) {
18384 m
= vm_page_lookup(object
,
18385 start
- entry
->vme_start
+ VME_OFFSET(entry
));
18386 if (m
== VM_PAGE_NULL
) {
18387 /* shoud we try to fault a page here? we can probably
18388 * demand it exists and is locked for this request */
18389 vm_object_unlock(object
);
18390 return KERN_FAILURE
;
18392 /* deal with special page status */
18394 (m
->vmp_unusual
&& (m
->vmp_error
|| m
->vmp_restart
|| m
->vmp_private
|| m
->vmp_absent
))) {
18395 vm_object_unlock(object
);
18396 return KERN_FAILURE
;
18399 /* Page is OK... now "validate" it */
18400 /* This is the place where we'll call out to create a code
18401 * directory, later */
18402 m
->vmp_cs_validated
= TRUE
;
18404 /* The page is now "clean" for codesigning purposes. That means
18405 * we don't consider it as modified (wpmapped) anymore. But
18406 * we'll disconnect the page so we note any future modification
18408 m
->vmp_wpmapped
= FALSE
;
18409 refmod
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
));
18411 /* Pull the dirty status from the pmap, since we cleared the
18413 if ((refmod
& VM_MEM_MODIFIED
) && !m
->vmp_dirty
) {
18414 SET_PAGE_DIRTY(m
, FALSE
);
18417 /* On to the next page */
18418 start
+= PAGE_SIZE
;
18420 vm_object_unlock(object
);
18422 return KERN_SUCCESS
;
18427 vm_map_partial_reap(vm_map_t map
, unsigned int *reclaimed_resident
, unsigned int *reclaimed_compressed
)
18429 vm_map_entry_t entry
= VM_MAP_ENTRY_NULL
;
18430 vm_map_entry_t next_entry
;
18431 kern_return_t kr
= KERN_SUCCESS
;
18437 * We use a "zap_map" to avoid having to unlock
18438 * the "map" in vm_map_delete().
18440 zap_map
= vm_map_create(PMAP_NULL
,
18443 map
->hdr
.entries_pageable
);
18445 if (zap_map
== VM_MAP_NULL
) {
18446 return KERN_RESOURCE_SHORTAGE
;
18449 vm_map_set_page_shift(zap_map
,
18450 VM_MAP_PAGE_SHIFT(map
));
18451 vm_map_disable_hole_optimization(zap_map
);
18453 for (entry
= vm_map_first_entry(map
);
18454 entry
!= vm_map_to_entry(map
);
18455 entry
= next_entry
) {
18456 next_entry
= entry
->vme_next
;
18458 if (VME_OBJECT(entry
) &&
18459 !entry
->is_sub_map
&&
18460 (VME_OBJECT(entry
)->internal
== TRUE
) &&
18461 (VME_OBJECT(entry
)->ref_count
== 1)) {
18462 *reclaimed_resident
+= VME_OBJECT(entry
)->resident_page_count
;
18463 *reclaimed_compressed
+= vm_compressor_pager_get_count(VME_OBJECT(entry
)->pager
);
18465 (void)vm_map_delete(map
,
18468 VM_MAP_REMOVE_SAVE_ENTRIES
,
18473 vm_map_unlock(map
);
18476 * Get rid of the "zap_maps" and all the map entries that
18477 * they may still contain.
18479 if (zap_map
!= VM_MAP_NULL
) {
18480 vm_map_destroy(zap_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
18481 zap_map
= VM_MAP_NULL
;
18488 #if DEVELOPMENT || DEBUG
18491 vm_map_disconnect_page_mappings(
18493 boolean_t do_unnest
)
18495 vm_map_entry_t entry
;
18496 int page_count
= 0;
18498 if (do_unnest
== TRUE
) {
18499 #ifndef NO_NESTED_PMAP
18502 for (entry
= vm_map_first_entry(map
);
18503 entry
!= vm_map_to_entry(map
);
18504 entry
= entry
->vme_next
) {
18505 if (entry
->is_sub_map
&& entry
->use_pmap
) {
18507 * Make sure the range between the start of this entry and
18508 * the end of this entry is no longer nested, so that
18509 * we will only remove mappings from the pmap in use by this
18512 vm_map_clip_unnest(map
, entry
, entry
->vme_start
, entry
->vme_end
);
18515 vm_map_unlock(map
);
18518 vm_map_lock_read(map
);
18520 page_count
= map
->pmap
->stats
.resident_count
;
18522 for (entry
= vm_map_first_entry(map
);
18523 entry
!= vm_map_to_entry(map
);
18524 entry
= entry
->vme_next
) {
18525 if (!entry
->is_sub_map
&& ((VME_OBJECT(entry
) == 0) ||
18526 (VME_OBJECT(entry
)->phys_contiguous
))) {
18529 if (entry
->is_sub_map
) {
18530 assert(!entry
->use_pmap
);
18533 pmap_remove_options(map
->pmap
, entry
->vme_start
, entry
->vme_end
, 0);
18535 vm_map_unlock_read(map
);
18546 int c_freezer_swapout_page_count
;
18547 int c_freezer_compression_count
= 0;
18548 AbsoluteTime c_freezer_last_yield_ts
= 0;
18550 extern unsigned int memorystatus_freeze_private_shared_pages_ratio
;
18551 extern unsigned int memorystatus_freeze_shared_mb_per_process_max
;
18556 unsigned int *purgeable_count
,
18557 unsigned int *wired_count
,
18558 unsigned int *clean_count
,
18559 unsigned int *dirty_count
,
18560 unsigned int dirty_budget
,
18561 unsigned int *shared_count
,
18562 int *freezer_error_code
,
18563 boolean_t eval_only
)
18565 vm_map_entry_t entry2
= VM_MAP_ENTRY_NULL
;
18566 kern_return_t kr
= KERN_SUCCESS
;
18567 boolean_t evaluation_phase
= TRUE
;
18568 vm_object_t cur_shared_object
= NULL
;
18569 int cur_shared_obj_ref_cnt
= 0;
18570 unsigned int dirty_private_count
= 0, dirty_shared_count
= 0, obj_pages_snapshot
= 0;
18572 *purgeable_count
= *wired_count
= *clean_count
= *dirty_count
= *shared_count
= 0;
18575 * We need the exclusive lock here so that we can
18576 * block any page faults or lookups while we are
18577 * in the middle of freezing this vm map.
18579 vm_map_t map
= task
->map
;
18583 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT
);
18585 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18586 if (vm_compressor_low_on_space()) {
18587 *freezer_error_code
= FREEZER_ERROR_NO_COMPRESSOR_SPACE
;
18590 if (vm_swap_low_on_space()) {
18591 *freezer_error_code
= FREEZER_ERROR_NO_SWAP_SPACE
;
18594 kr
= KERN_NO_SPACE
;
18598 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
== FALSE
) {
18600 * In-memory compressor backing the freezer. No disk.
18601 * So no need to do the evaluation phase.
18603 evaluation_phase
= FALSE
;
18605 if (eval_only
== TRUE
) {
18607 * We don't support 'eval_only' mode
18608 * in this non-swap config.
18610 *freezer_error_code
= FREEZER_ERROR_GENERIC
;
18611 kr
= KERN_INVALID_ARGUMENT
;
18615 c_freezer_compression_count
= 0;
18616 clock_get_uptime(&c_freezer_last_yield_ts
);
18620 for (entry2
= vm_map_first_entry(map
);
18621 entry2
!= vm_map_to_entry(map
);
18622 entry2
= entry2
->vme_next
) {
18623 vm_object_t src_object
= VME_OBJECT(entry2
);
18626 !entry2
->is_sub_map
&&
18627 !src_object
->phys_contiguous
) {
18628 /* If eligible, scan the entry, moving eligible pages over to our parent object */
18630 if (src_object
->internal
== TRUE
) {
18631 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
) {
18633 * We skip purgeable objects during evaluation phase only.
18634 * If we decide to freeze this process, we'll explicitly
18635 * purge these objects before we go around again with
18636 * 'evaluation_phase' set to FALSE.
18639 if ((src_object
->purgable
== VM_PURGABLE_EMPTY
) || (src_object
->purgable
== VM_PURGABLE_VOLATILE
)) {
18641 * We want to purge objects that may not belong to this task but are mapped
18642 * in this task alone. Since we already purged this task's purgeable memory
18643 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
18644 * on this task's purgeable objects. Hence the check for only volatile objects.
18646 if (evaluation_phase
== FALSE
&&
18647 (src_object
->purgable
== VM_PURGABLE_VOLATILE
) &&
18648 (src_object
->ref_count
== 1)) {
18649 vm_object_lock(src_object
);
18650 vm_object_purge(src_object
, 0);
18651 vm_object_unlock(src_object
);
18657 * Pages belonging to this object could be swapped to disk.
18658 * Make sure it's not a shared object because we could end
18659 * up just bringing it back in again.
18661 * We try to optimize somewhat by checking for objects that are mapped
18662 * more than once within our own map. But we don't do full searches,
18663 * we just look at the entries following our current entry.
18666 if (src_object
->ref_count
> 1) {
18667 if (src_object
!= cur_shared_object
) {
18668 obj_pages_snapshot
= (src_object
->resident_page_count
- src_object
->wired_page_count
) + vm_compressor_pager_get_count(src_object
->pager
);
18669 dirty_shared_count
+= obj_pages_snapshot
;
18671 cur_shared_object
= src_object
;
18672 cur_shared_obj_ref_cnt
= 1;
18675 cur_shared_obj_ref_cnt
++;
18676 if (src_object
->ref_count
== cur_shared_obj_ref_cnt
) {
18678 * Fall through to below and treat this object as private.
18679 * So deduct its pages from our shared total and add it to the
18683 dirty_shared_count
-= obj_pages_snapshot
;
18684 dirty_private_count
+= obj_pages_snapshot
;
18692 if (src_object
->ref_count
== 1) {
18693 dirty_private_count
+= (src_object
->resident_page_count
- src_object
->wired_page_count
) + vm_compressor_pager_get_count(src_object
->pager
);
18696 if (evaluation_phase
== TRUE
) {
18701 uint32_t paged_out_count
= vm_object_compressed_freezer_pageout(src_object
, dirty_budget
);
18702 *wired_count
+= src_object
->wired_page_count
;
18704 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18705 if (vm_compressor_low_on_space()) {
18706 *freezer_error_code
= FREEZER_ERROR_NO_COMPRESSOR_SPACE
;
18709 if (vm_swap_low_on_space()) {
18710 *freezer_error_code
= FREEZER_ERROR_NO_SWAP_SPACE
;
18713 kr
= KERN_NO_SPACE
;
18716 if (paged_out_count
>= dirty_budget
) {
18719 dirty_budget
-= paged_out_count
;
18724 *shared_count
= (unsigned int) ((dirty_shared_count
* PAGE_SIZE_64
) / (1024 * 1024ULL));
18725 if (evaluation_phase
) {
18726 unsigned int shared_pages_threshold
= (memorystatus_freeze_shared_mb_per_process_max
* 1024 * 1024ULL) / PAGE_SIZE_64
;
18728 if (dirty_shared_count
> shared_pages_threshold
) {
18729 *freezer_error_code
= FREEZER_ERROR_EXCESS_SHARED_MEMORY
;
18734 if (dirty_shared_count
&&
18735 ((dirty_private_count
/ dirty_shared_count
) < memorystatus_freeze_private_shared_pages_ratio
)) {
18736 *freezer_error_code
= FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO
;
18741 evaluation_phase
= FALSE
;
18742 dirty_shared_count
= dirty_private_count
= 0;
18744 c_freezer_compression_count
= 0;
18745 clock_get_uptime(&c_freezer_last_yield_ts
);
18752 vm_purgeable_purge_task_owned(task
);
18760 vm_map_unlock(map
);
18762 if ((eval_only
== FALSE
) && (kr
== KERN_SUCCESS
)) {
18763 vm_object_compressed_freezer_done();
18765 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
) {
18767 * reset the counter tracking the # of swapped compressed pages
18768 * because we are now done with this freeze session and task.
18771 *dirty_count
= c_freezer_swapout_page_count
; //used to track pageouts
18772 c_freezer_swapout_page_count
= 0;
18781 * vm_map_entry_should_cow_for_true_share:
18783 * Determines if the map entry should be clipped and setup for copy-on-write
18784 * to avoid applying "true_share" to a large VM object when only a subset is
18787 * For now, we target only the map entries created for the Objective C
18788 * Garbage Collector, which initially have the following properties:
18789 * - alias == VM_MEMORY_MALLOC
18790 * - wired_count == 0
18792 * and a VM object with:
18794 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
18796 * - vo_size == ANON_CHUNK_SIZE
18798 * Only non-kernel map entries.
18801 vm_map_entry_should_cow_for_true_share(
18802 vm_map_entry_t entry
)
18804 vm_object_t object
;
18806 if (entry
->is_sub_map
) {
18807 /* entry does not point at a VM object */
18811 if (entry
->needs_copy
) {
18812 /* already set for copy_on_write: done! */
18816 if (VME_ALIAS(entry
) != VM_MEMORY_MALLOC
&&
18817 VME_ALIAS(entry
) != VM_MEMORY_MALLOC_SMALL
) {
18818 /* not a malloc heap or Obj-C Garbage Collector heap */
18822 if (entry
->wired_count
) {
18823 /* wired: can't change the map entry... */
18824 vm_counters
.should_cow_but_wired
++;
18828 object
= VME_OBJECT(entry
);
18830 if (object
== VM_OBJECT_NULL
) {
18831 /* no object yet... */
18835 if (!object
->internal
) {
18836 /* not an internal object */
18840 if (object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) {
18841 /* not the default copy strategy */
18845 if (object
->true_share
) {
18846 /* already true_share: too late to avoid it */
18850 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC
&&
18851 object
->vo_size
!= ANON_CHUNK_SIZE
) {
18852 /* ... not an object created for the ObjC Garbage Collector */
18856 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC_SMALL
&&
18857 object
->vo_size
!= 2048 * 4096) {
18858 /* ... not a "MALLOC_SMALL" heap */
18863 * All the criteria match: we have a large object being targeted for "true_share".
18864 * To limit the adverse side-effects linked with "true_share", tell the caller to
18865 * try and avoid setting up the entire object for "true_share" by clipping the
18866 * targeted range and setting it up for copy-on-write.
18872 vm_map_round_page_mask(
18873 vm_map_offset_t offset
,
18874 vm_map_offset_t mask
)
18876 return VM_MAP_ROUND_PAGE(offset
, mask
);
18880 vm_map_trunc_page_mask(
18881 vm_map_offset_t offset
,
18882 vm_map_offset_t mask
)
18884 return VM_MAP_TRUNC_PAGE(offset
, mask
);
18888 vm_map_page_aligned(
18889 vm_map_offset_t offset
,
18890 vm_map_offset_t mask
)
18892 return ((offset
) & mask
) == 0;
18899 return VM_MAP_PAGE_SHIFT(map
);
18906 return VM_MAP_PAGE_SIZE(map
);
18913 return VM_MAP_PAGE_MASK(map
);
18917 vm_map_set_page_shift(
18921 if (map
->hdr
.nentries
!= 0) {
18922 /* too late to change page size */
18923 return KERN_FAILURE
;
18926 map
->hdr
.page_shift
= pageshift
;
18928 return KERN_SUCCESS
;
18932 vm_map_query_volatile(
18934 mach_vm_size_t
*volatile_virtual_size_p
,
18935 mach_vm_size_t
*volatile_resident_size_p
,
18936 mach_vm_size_t
*volatile_compressed_size_p
,
18937 mach_vm_size_t
*volatile_pmap_size_p
,
18938 mach_vm_size_t
*volatile_compressed_pmap_size_p
)
18940 mach_vm_size_t volatile_virtual_size
;
18941 mach_vm_size_t volatile_resident_count
;
18942 mach_vm_size_t volatile_compressed_count
;
18943 mach_vm_size_t volatile_pmap_count
;
18944 mach_vm_size_t volatile_compressed_pmap_count
;
18945 mach_vm_size_t resident_count
;
18946 vm_map_entry_t entry
;
18947 vm_object_t object
;
18949 /* map should be locked by caller */
18951 volatile_virtual_size
= 0;
18952 volatile_resident_count
= 0;
18953 volatile_compressed_count
= 0;
18954 volatile_pmap_count
= 0;
18955 volatile_compressed_pmap_count
= 0;
18957 for (entry
= vm_map_first_entry(map
);
18958 entry
!= vm_map_to_entry(map
);
18959 entry
= entry
->vme_next
) {
18960 mach_vm_size_t pmap_resident_bytes
, pmap_compressed_bytes
;
18962 if (entry
->is_sub_map
) {
18965 if (!(entry
->protection
& VM_PROT_WRITE
)) {
18968 object
= VME_OBJECT(entry
);
18969 if (object
== VM_OBJECT_NULL
) {
18972 if (object
->purgable
!= VM_PURGABLE_VOLATILE
&&
18973 object
->purgable
!= VM_PURGABLE_EMPTY
) {
18976 if (VME_OFFSET(entry
)) {
18978 * If the map entry has been split and the object now
18979 * appears several times in the VM map, we don't want
18980 * to count the object's resident_page_count more than
18981 * once. We count it only for the first one, starting
18982 * at offset 0 and ignore the other VM map entries.
18986 resident_count
= object
->resident_page_count
;
18987 if ((VME_OFFSET(entry
) / PAGE_SIZE
) >= resident_count
) {
18988 resident_count
= 0;
18990 resident_count
-= (VME_OFFSET(entry
) / PAGE_SIZE
);
18993 volatile_virtual_size
+= entry
->vme_end
- entry
->vme_start
;
18994 volatile_resident_count
+= resident_count
;
18995 if (object
->pager
) {
18996 volatile_compressed_count
+=
18997 vm_compressor_pager_get_count(object
->pager
);
18999 pmap_compressed_bytes
= 0;
19000 pmap_resident_bytes
=
19001 pmap_query_resident(map
->pmap
,
19004 &pmap_compressed_bytes
);
19005 volatile_pmap_count
+= (pmap_resident_bytes
/ PAGE_SIZE
);
19006 volatile_compressed_pmap_count
+= (pmap_compressed_bytes
19010 /* map is still locked on return */
19012 *volatile_virtual_size_p
= volatile_virtual_size
;
19013 *volatile_resident_size_p
= volatile_resident_count
* PAGE_SIZE
;
19014 *volatile_compressed_size_p
= volatile_compressed_count
* PAGE_SIZE
;
19015 *volatile_pmap_size_p
= volatile_pmap_count
* PAGE_SIZE
;
19016 *volatile_compressed_pmap_size_p
= volatile_compressed_pmap_count
* PAGE_SIZE
;
19018 return KERN_SUCCESS
;
19022 vm_map_sizes(vm_map_t map
,
19023 vm_map_size_t
* psize
,
19024 vm_map_size_t
* pfree
,
19025 vm_map_size_t
* plargest_free
)
19027 vm_map_entry_t entry
;
19028 vm_map_offset_t prev
;
19029 vm_map_size_t free
, total_free
, largest_free
;
19033 *psize
= *pfree
= *plargest_free
= 0;
19036 total_free
= largest_free
= 0;
19038 vm_map_lock_read(map
);
19040 *psize
= map
->max_offset
- map
->min_offset
;
19043 prev
= map
->min_offset
;
19044 for (entry
= vm_map_first_entry(map
);; entry
= entry
->vme_next
) {
19045 end
= (entry
== vm_map_to_entry(map
));
19048 free
= entry
->vme_end
- prev
;
19050 free
= entry
->vme_start
- prev
;
19053 total_free
+= free
;
19054 if (free
> largest_free
) {
19055 largest_free
= free
;
19061 prev
= entry
->vme_end
;
19063 vm_map_unlock_read(map
);
19065 *pfree
= total_free
;
19067 if (plargest_free
) {
19068 *plargest_free
= largest_free
;
19072 #if VM_SCAN_FOR_SHADOW_CHAIN
19073 int vm_map_shadow_max(vm_map_t map
);
19078 int shadows
, shadows_max
;
19079 vm_map_entry_t entry
;
19080 vm_object_t object
, next_object
;
19088 vm_map_lock_read(map
);
19090 for (entry
= vm_map_first_entry(map
);
19091 entry
!= vm_map_to_entry(map
);
19092 entry
= entry
->vme_next
) {
19093 if (entry
->is_sub_map
) {
19096 object
= VME_OBJECT(entry
);
19097 if (object
== NULL
) {
19100 vm_object_lock_shared(object
);
19102 object
->shadow
!= NULL
;
19103 shadows
++, object
= next_object
) {
19104 next_object
= object
->shadow
;
19105 vm_object_lock_shared(next_object
);
19106 vm_object_unlock(object
);
19108 vm_object_unlock(object
);
19109 if (shadows
> shadows_max
) {
19110 shadows_max
= shadows
;
19114 vm_map_unlock_read(map
);
19116 return shadows_max
;
19118 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
19121 vm_commit_pagezero_status(vm_map_t lmap
)
19123 pmap_advise_pagezero_range(lmap
->pmap
, lmap
->min_offset
);
19126 #if !CONFIG_EMBEDDED
19128 vm_map_set_high_start(
19130 vm_map_offset_t high_start
)
19132 map
->vmmap_high_start
= high_start
;
19138 vm_map_entry_cs_associate(
19140 vm_map_entry_t entry
,
19141 vm_map_kernel_flags_t vmk_flags
)
19143 vm_object_t cs_object
, cs_shadow
;
19144 vm_object_offset_t cs_offset
;
19146 struct vnode
*cs_vnode
;
19147 kern_return_t cs_ret
;
19149 if (map
->pmap
== NULL
||
19150 entry
->is_sub_map
|| /* XXX FBDP: recurse on sub-range? */
19151 VME_OBJECT(entry
) == VM_OBJECT_NULL
||
19152 !(entry
->protection
& VM_PROT_EXECUTE
)) {
19153 return KERN_SUCCESS
;
19156 vm_map_lock_assert_exclusive(map
);
19158 if (entry
->used_for_jit
) {
19159 cs_ret
= pmap_cs_associate(map
->pmap
,
19160 PMAP_CS_ASSOCIATE_JIT
,
19162 entry
->vme_end
- entry
->vme_start
);
19166 if (vmk_flags
.vmkf_remap_prot_copy
) {
19167 cs_ret
= pmap_cs_associate(map
->pmap
,
19168 PMAP_CS_ASSOCIATE_COW
,
19170 entry
->vme_end
- entry
->vme_start
);
19174 vm_object_lock_shared(VME_OBJECT(entry
));
19175 cs_offset
= VME_OFFSET(entry
);
19176 for (cs_object
= VME_OBJECT(entry
);
19177 (cs_object
!= VM_OBJECT_NULL
&&
19178 !cs_object
->code_signed
);
19179 cs_object
= cs_shadow
) {
19180 cs_shadow
= cs_object
->shadow
;
19181 if (cs_shadow
!= VM_OBJECT_NULL
) {
19182 cs_offset
+= cs_object
->vo_shadow_offset
;
19183 vm_object_lock_shared(cs_shadow
);
19185 vm_object_unlock(cs_object
);
19187 if (cs_object
== VM_OBJECT_NULL
) {
19188 return KERN_SUCCESS
;
19191 cs_offset
+= cs_object
->paging_offset
;
19192 cs_vnode
= vnode_pager_lookup_vnode(cs_object
->pager
);
19193 cs_ret
= vnode_pager_get_cs_blobs(cs_vnode
,
19195 assert(cs_ret
== KERN_SUCCESS
);
19196 cs_ret
= cs_associate_blob_with_mapping(map
->pmap
,
19202 vm_object_unlock(cs_object
);
19203 cs_object
= VM_OBJECT_NULL
;
19206 if (cs_ret
== KERN_SUCCESS
) {
19207 DTRACE_VM2(vm_map_entry_cs_associate_success
,
19208 vm_map_offset_t
, entry
->vme_start
,
19209 vm_map_offset_t
, entry
->vme_end
);
19210 if (vm_map_executable_immutable
) {
19212 * Prevent this executable
19213 * mapping from being unmapped
19216 entry
->permanent
= TRUE
;
19219 * pmap says it will validate the
19220 * code-signing validity of pages
19221 * faulted in via this mapping, so
19222 * this map entry should be marked so
19223 * that vm_fault() bypasses code-signing
19224 * validation for faults coming through
19227 entry
->pmap_cs_associated
= TRUE
;
19228 } else if (cs_ret
== KERN_NOT_SUPPORTED
) {
19230 * pmap won't check the code-signing
19231 * validity of pages faulted in via
19232 * this mapping, so VM should keep
19235 DTRACE_VM3(vm_map_entry_cs_associate_off
,
19236 vm_map_offset_t
, entry
->vme_start
,
19237 vm_map_offset_t
, entry
->vme_end
,
19241 * A real error: do not allow
19242 * execution in this mapping.
19244 DTRACE_VM3(vm_map_entry_cs_associate_failure
,
19245 vm_map_offset_t
, entry
->vme_start
,
19246 vm_map_offset_t
, entry
->vme_end
,
19248 entry
->protection
&= ~VM_PROT_EXECUTE
;
19249 entry
->max_protection
&= ~VM_PROT_EXECUTE
;
19254 #endif /* PMAP_CS */
19257 * FORKED CORPSE FOOTPRINT
19259 * A forked corpse gets a copy of the original VM map but its pmap is mostly
19260 * empty since it never ran and never got to fault in any pages.
19261 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
19262 * a forked corpse would therefore return very little information.
19264 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
19265 * to vm_map_fork() to collect footprint information from the original VM map
19266 * and its pmap, and store it in the forked corpse's VM map. That information
19267 * is stored in place of the VM map's "hole list" since we'll never need to
19268 * lookup for holes in the corpse's map.
19270 * The corpse's footprint info looks like this:
19272 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
19274 * +---------------------------------------+
19275 * header-> | cf_size |
19276 * +-------------------+-------------------+
19277 * | cf_last_region | cf_last_zeroes |
19278 * +-------------------+-------------------+
19279 * region1-> | cfr_vaddr |
19280 * +-------------------+-------------------+
19281 * | cfr_num_pages | d0 | d1 | d2 | d3 |
19282 * +---------------------------------------+
19283 * | d4 | d5 | ... |
19284 * +---------------------------------------+
19286 * +-------------------+-------------------+
19287 * | dy | dz | na | na | cfr_vaddr... | <-region2
19288 * +-------------------+-------------------+
19289 * | cfr_vaddr (ctd) | cfr_num_pages |
19290 * +---------------------------------------+
19292 * +---------------------------------------+
19294 * +---------------------------------------+
19295 * last region-> | cfr_vaddr |
19296 * +---------------------------------------+
19297 * + cfr_num_pages | d0 | d1 | d2 | d3 |
19298 * +---------------------------------------+
19300 * +---------------------------------------+
19301 * | dx | dy | dz | na | na | na | na | na |
19302 * +---------------------------------------+
19305 * cf_size: total size of the buffer (rounded to page size)
19306 * cf_last_region: offset in the buffer of the last "region" sub-header
19307 * cf_last_zeroes: number of trailing "zero" dispositions at the end
19309 * cfr_vaddr: virtual address of the start of the covered "region"
19310 * cfr_num_pages: number of pages in the covered "region"
19311 * d*: disposition of the page at that virtual address
19312 * Regions in the buffer are word-aligned.
19314 * We estimate the size of the buffer based on the number of memory regions
19315 * and the virtual size of the address space. While copying each memory region
19316 * during vm_map_fork(), we also collect the footprint info for that region
19317 * and store it in the buffer, packing it as much as possible (coalescing
19318 * contiguous memory regions to avoid having too many region headers and
19319 * avoiding long streaks of "zero" page dispositions by splitting footprint
19320 * "regions", so the number of regions in the footprint buffer might not match
19321 * the number of memory regions in the address space.
19323 * We also have to copy the original task's "nonvolatile" ledgers since that's
19324 * part of the footprint and will need to be reported to any tool asking for
19325 * the footprint information of the forked corpse.
19328 uint64_t vm_map_corpse_footprint_count
= 0;
19329 uint64_t vm_map_corpse_footprint_size_avg
= 0;
19330 uint64_t vm_map_corpse_footprint_size_max
= 0;
19331 uint64_t vm_map_corpse_footprint_full
= 0;
19332 uint64_t vm_map_corpse_footprint_no_buf
= 0;
19335 * vm_map_corpse_footprint_new_region:
19336 * closes the current footprint "region" and creates a new one
19338 * Returns NULL if there's not enough space in the buffer for a new region.
19340 static struct vm_map_corpse_footprint_region
*
19341 vm_map_corpse_footprint_new_region(
19342 struct vm_map_corpse_footprint_header
*footprint_header
)
19344 uintptr_t footprint_edge
;
19345 uint32_t new_region_offset
;
19346 struct vm_map_corpse_footprint_region
*footprint_region
;
19347 struct vm_map_corpse_footprint_region
*new_footprint_region
;
19349 footprint_edge
= ((uintptr_t)footprint_header
+
19350 footprint_header
->cf_size
);
19351 footprint_region
= ((struct vm_map_corpse_footprint_region
*)
19352 ((char *)footprint_header
+
19353 footprint_header
->cf_last_region
));
19354 assert((uintptr_t)footprint_region
+ sizeof(*footprint_region
) <=
19357 /* get rid of trailing zeroes in the last region */
19358 assert(footprint_region
->cfr_num_pages
>=
19359 footprint_header
->cf_last_zeroes
);
19360 footprint_region
->cfr_num_pages
-=
19361 footprint_header
->cf_last_zeroes
;
19362 footprint_header
->cf_last_zeroes
= 0;
19364 /* reuse this region if it's now empty */
19365 if (footprint_region
->cfr_num_pages
== 0) {
19366 return footprint_region
;
19369 /* compute offset of new region */
19370 new_region_offset
= footprint_header
->cf_last_region
;
19371 new_region_offset
+= sizeof(*footprint_region
);
19372 new_region_offset
+= footprint_region
->cfr_num_pages
;
19373 new_region_offset
= roundup(new_region_offset
, sizeof(int));
19375 /* check if we're going over the edge */
19376 if (((uintptr_t)footprint_header
+
19377 new_region_offset
+
19378 sizeof(*footprint_region
)) >=
19380 /* over the edge: no new region */
19384 /* adjust offset of last region in header */
19385 footprint_header
->cf_last_region
= new_region_offset
;
19387 new_footprint_region
= (struct vm_map_corpse_footprint_region
*)
19388 ((char *)footprint_header
+
19389 footprint_header
->cf_last_region
);
19390 new_footprint_region
->cfr_vaddr
= 0;
19391 new_footprint_region
->cfr_num_pages
= 0;
19392 /* caller needs to initialize new region */
19394 return new_footprint_region
;
19398 * vm_map_corpse_footprint_collect:
19399 * collect footprint information for "old_entry" in "old_map" and
19400 * stores it in "new_map"'s vmmap_footprint_info.
19403 vm_map_corpse_footprint_collect(
19405 vm_map_entry_t old_entry
,
19408 vm_map_offset_t va
;
19411 struct vm_map_corpse_footprint_header
*footprint_header
;
19412 struct vm_map_corpse_footprint_region
*footprint_region
;
19413 struct vm_map_corpse_footprint_region
*new_footprint_region
;
19414 unsigned char *next_disp_p
;
19415 uintptr_t footprint_edge
;
19416 uint32_t num_pages_tmp
;
19418 va
= old_entry
->vme_start
;
19420 vm_map_lock_assert_exclusive(old_map
);
19421 vm_map_lock_assert_exclusive(new_map
);
19423 assert(new_map
->has_corpse_footprint
);
19424 assert(!old_map
->has_corpse_footprint
);
19425 if (!new_map
->has_corpse_footprint
||
19426 old_map
->has_corpse_footprint
) {
19428 * This can only transfer footprint info from a
19429 * map with a live pmap to a map with a corpse footprint.
19431 return KERN_NOT_SUPPORTED
;
19434 if (new_map
->vmmap_corpse_footprint
== NULL
) {
19436 vm_size_t buf_size
;
19439 buf_size
= (sizeof(*footprint_header
) +
19440 (old_map
->hdr
.nentries
19442 (sizeof(*footprint_region
) +
19443 +3)) /* potential alignment for each region */
19445 ((old_map
->size
/ PAGE_SIZE
)
19447 sizeof(char))); /* disposition for each page */
19448 // printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
19449 buf_size
= round_page(buf_size
);
19451 /* limit buffer to 1 page to validate overflow detection */
19452 // buf_size = PAGE_SIZE;
19454 /* limit size to a somewhat sane amount */
19455 #if CONFIG_EMBEDDED
19456 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
19457 #else /* CONFIG_EMBEDDED */
19458 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
19459 #endif /* CONFIG_EMBEDDED */
19460 if (buf_size
> VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE
) {
19461 buf_size
= VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE
;
19465 * Allocate the pageable buffer (with a trailing guard page).
19466 * It will be zero-filled on demand.
19468 kr
= kernel_memory_allocate(kernel_map
,
19471 + PAGE_SIZE
), /* trailing guard page */
19473 KMA_PAGEABLE
| KMA_GUARD_LAST
,
19474 VM_KERN_MEMORY_DIAG
);
19475 if (kr
!= KERN_SUCCESS
) {
19476 vm_map_corpse_footprint_no_buf
++;
19480 /* initialize header and 1st region */
19481 footprint_header
= (struct vm_map_corpse_footprint_header
*)buf
;
19482 new_map
->vmmap_corpse_footprint
= footprint_header
;
19484 footprint_header
->cf_size
= buf_size
;
19485 footprint_header
->cf_last_region
=
19486 sizeof(*footprint_header
);
19487 footprint_header
->cf_last_zeroes
= 0;
19489 footprint_region
= (struct vm_map_corpse_footprint_region
*)
19490 ((char *)footprint_header
+
19491 footprint_header
->cf_last_region
);
19492 footprint_region
->cfr_vaddr
= 0;
19493 footprint_region
->cfr_num_pages
= 0;
19495 /* retrieve header and last region */
19496 footprint_header
= (struct vm_map_corpse_footprint_header
*)
19497 new_map
->vmmap_corpse_footprint
;
19498 footprint_region
= (struct vm_map_corpse_footprint_region
*)
19499 ((char *)footprint_header
+
19500 footprint_header
->cf_last_region
);
19502 footprint_edge
= ((uintptr_t)footprint_header
+
19503 footprint_header
->cf_size
);
19505 if ((footprint_region
->cfr_vaddr
+
19506 (((vm_map_offset_t
)footprint_region
->cfr_num_pages
) *
19508 != old_entry
->vme_start
) {
19509 uint64_t num_pages_delta
;
19510 uint32_t region_offset_delta
;
19513 * Not the next contiguous virtual address:
19514 * start a new region or store "zero" dispositions for
19515 * the missing pages?
19517 /* size of gap in actual page dispositions */
19518 num_pages_delta
= (((old_entry
->vme_start
-
19519 footprint_region
->cfr_vaddr
) / PAGE_SIZE
)
19520 - footprint_region
->cfr_num_pages
);
19521 /* size of gap as a new footprint region header */
19522 region_offset_delta
=
19523 (sizeof(*footprint_region
) +
19524 roundup((footprint_region
->cfr_num_pages
-
19525 footprint_header
->cf_last_zeroes
),
19527 (footprint_region
->cfr_num_pages
-
19528 footprint_header
->cf_last_zeroes
));
19529 // printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
19530 if (region_offset_delta
< num_pages_delta
||
19531 os_add3_overflow(footprint_region
->cfr_num_pages
,
19532 (uint32_t) num_pages_delta
,
19536 * Storing data for this gap would take more space
19537 * than inserting a new footprint region header:
19538 * let's start a new region and save space. If it's a
19539 * tie, let's avoid using a new region, since that
19540 * would require more region hops to find the right
19541 * range during lookups.
19543 * If the current region's cfr_num_pages would overflow
19544 * if we added "zero" page dispositions for the gap,
19545 * no choice but to start a new region.
19547 // printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
19548 new_footprint_region
=
19549 vm_map_corpse_footprint_new_region(footprint_header
);
19550 /* check that we're not going over the edge */
19551 if (new_footprint_region
== NULL
) {
19552 goto over_the_edge
;
19554 footprint_region
= new_footprint_region
;
19555 /* initialize new region as empty */
19556 footprint_region
->cfr_vaddr
= old_entry
->vme_start
;
19557 footprint_region
->cfr_num_pages
= 0;
19560 * Store "zero" page dispositions for the missing
19563 // printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
19564 for (; num_pages_delta
> 0; num_pages_delta
--) {
19566 ((unsigned char *) footprint_region
+
19567 sizeof(*footprint_region
) +
19568 footprint_region
->cfr_num_pages
);
19569 /* check that we're not going over the edge */
19570 if ((uintptr_t)next_disp_p
>= footprint_edge
) {
19571 goto over_the_edge
;
19573 /* store "zero" disposition for this gap page */
19574 footprint_region
->cfr_num_pages
++;
19575 *next_disp_p
= (unsigned char) 0;
19576 footprint_header
->cf_last_zeroes
++;
19581 for (va
= old_entry
->vme_start
;
19582 va
< old_entry
->vme_end
;
19584 vm_object_t object
;
19586 object
= VME_OBJECT(old_entry
);
19587 if (!old_entry
->is_sub_map
&&
19588 old_entry
->iokit_acct
&&
19589 object
!= VM_OBJECT_NULL
&&
19590 object
->internal
&&
19591 object
->purgable
== VM_PURGABLE_DENY
) {
19593 * Non-purgeable IOKit memory: phys_footprint
19594 * includes the entire virtual mapping.
19595 * Since the forked corpse's VM map entry will not
19596 * have "iokit_acct", pretend that this page's
19597 * disposition is "present & internal", so that it
19598 * shows up in the forked corpse's footprint.
19600 disp
= (PMAP_QUERY_PAGE_PRESENT
|
19601 PMAP_QUERY_PAGE_INTERNAL
);
19604 pmap_query_page_info(old_map
->pmap
,
19609 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
19611 if (disp
== 0 && footprint_region
->cfr_num_pages
== 0) {
19613 * Ignore "zero" dispositions at start of
19614 * region: just move start of region.
19616 footprint_region
->cfr_vaddr
+= PAGE_SIZE
;
19620 /* would region's cfr_num_pages overflow? */
19621 if (os_add_overflow(footprint_region
->cfr_num_pages
, 1,
19623 /* overflow: create a new region */
19624 new_footprint_region
=
19625 vm_map_corpse_footprint_new_region(
19627 if (new_footprint_region
== NULL
) {
19628 goto over_the_edge
;
19630 footprint_region
= new_footprint_region
;
19631 footprint_region
->cfr_vaddr
= va
;
19632 footprint_region
->cfr_num_pages
= 0;
19635 next_disp_p
= ((unsigned char *)footprint_region
+
19636 sizeof(*footprint_region
) +
19637 footprint_region
->cfr_num_pages
);
19638 /* check that we're not going over the edge */
19639 if ((uintptr_t)next_disp_p
>= footprint_edge
) {
19640 goto over_the_edge
;
19642 /* store this dispostion */
19643 *next_disp_p
= (unsigned char) disp
;
19644 footprint_region
->cfr_num_pages
++;
19647 /* non-zero disp: break the current zero streak */
19648 footprint_header
->cf_last_zeroes
= 0;
19653 /* zero disp: add to the current streak of zeroes */
19654 footprint_header
->cf_last_zeroes
++;
19655 if ((footprint_header
->cf_last_zeroes
+
19656 roundup((footprint_region
->cfr_num_pages
-
19657 footprint_header
->cf_last_zeroes
) &
19660 (sizeof(*footprint_header
))) {
19662 * There are not enough trailing "zero" dispositions
19663 * (+ the extra padding we would need for the previous
19664 * region); creating a new region would not save space
19665 * at this point, so let's keep this "zero" disposition
19666 * in this region and reconsider later.
19671 * Create a new region to avoid having too many consecutive
19672 * "zero" dispositions.
19674 new_footprint_region
=
19675 vm_map_corpse_footprint_new_region(footprint_header
);
19676 if (new_footprint_region
== NULL
) {
19677 goto over_the_edge
;
19679 footprint_region
= new_footprint_region
;
19680 /* initialize the new region as empty ... */
19681 footprint_region
->cfr_num_pages
= 0;
19682 /* ... and skip this "zero" disp */
19683 footprint_region
->cfr_vaddr
= va
+ PAGE_SIZE
;
19686 return KERN_SUCCESS
;
19689 // printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
19690 vm_map_corpse_footprint_full
++;
19691 return KERN_RESOURCE_SHORTAGE
;
19695 * vm_map_corpse_footprint_collect_done:
19696 * completes the footprint collection by getting rid of any remaining
19697 * trailing "zero" dispositions and trimming the unused part of the
19701 vm_map_corpse_footprint_collect_done(
19704 struct vm_map_corpse_footprint_header
*footprint_header
;
19705 struct vm_map_corpse_footprint_region
*footprint_region
;
19706 vm_size_t buf_size
, actual_size
;
19709 assert(new_map
->has_corpse_footprint
);
19710 if (!new_map
->has_corpse_footprint
||
19711 new_map
->vmmap_corpse_footprint
== NULL
) {
19715 footprint_header
= (struct vm_map_corpse_footprint_header
*)
19716 new_map
->vmmap_corpse_footprint
;
19717 buf_size
= footprint_header
->cf_size
;
19719 footprint_region
= (struct vm_map_corpse_footprint_region
*)
19720 ((char *)footprint_header
+
19721 footprint_header
->cf_last_region
);
19723 /* get rid of trailing zeroes in last region */
19724 assert(footprint_region
->cfr_num_pages
>= footprint_header
->cf_last_zeroes
);
19725 footprint_region
->cfr_num_pages
-= footprint_header
->cf_last_zeroes
;
19726 footprint_header
->cf_last_zeroes
= 0;
19728 actual_size
= (vm_size_t
)(footprint_header
->cf_last_region
+
19729 sizeof(*footprint_region
) +
19730 footprint_region
->cfr_num_pages
);
19732 // printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
19733 vm_map_corpse_footprint_size_avg
=
19734 (((vm_map_corpse_footprint_size_avg
*
19735 vm_map_corpse_footprint_count
) +
19737 (vm_map_corpse_footprint_count
+ 1));
19738 vm_map_corpse_footprint_count
++;
19739 if (actual_size
> vm_map_corpse_footprint_size_max
) {
19740 vm_map_corpse_footprint_size_max
= actual_size
;
19743 actual_size
= round_page(actual_size
);
19744 if (buf_size
> actual_size
) {
19745 kr
= vm_deallocate(kernel_map
,
19746 ((vm_address_t
)footprint_header
+
19748 PAGE_SIZE
), /* trailing guard page */
19749 (buf_size
- actual_size
));
19750 assertf(kr
== KERN_SUCCESS
,
19751 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19753 (uint64_t) buf_size
,
19754 (uint64_t) actual_size
,
19756 kr
= vm_protect(kernel_map
,
19757 ((vm_address_t
)footprint_header
+
19760 FALSE
, /* set_maximum */
19762 assertf(kr
== KERN_SUCCESS
,
19763 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19765 (uint64_t) buf_size
,
19766 (uint64_t) actual_size
,
19770 footprint_header
->cf_size
= actual_size
;
19774 * vm_map_corpse_footprint_query_page_info:
19775 * retrieves the disposition of the page at virtual address "vaddr"
19776 * in the forked corpse's VM map
19778 * This is the equivalent of pmap_query_page_info() for a forked corpse.
19781 vm_map_corpse_footprint_query_page_info(
19783 vm_map_offset_t va
,
19786 struct vm_map_corpse_footprint_header
*footprint_header
;
19787 struct vm_map_corpse_footprint_region
*footprint_region
;
19788 uint32_t footprint_region_offset
;
19789 vm_map_offset_t region_start
, region_end
;
19793 if (!map
->has_corpse_footprint
) {
19795 kr
= KERN_INVALID_ARGUMENT
;
19799 footprint_header
= map
->vmmap_corpse_footprint
;
19800 if (footprint_header
== NULL
) {
19802 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19803 kr
= KERN_INVALID_ARGUMENT
;
19807 /* start looking at the hint ("cf_hint_region") */
19808 footprint_region_offset
= footprint_header
->cf_hint_region
;
19811 if (footprint_region_offset
< sizeof(*footprint_header
)) {
19812 /* hint too low: start from 1st region */
19813 footprint_region_offset
= sizeof(*footprint_header
);
19815 if (footprint_region_offset
>= footprint_header
->cf_last_region
) {
19816 /* hint too high: re-start from 1st region */
19817 footprint_region_offset
= sizeof(*footprint_header
);
19819 footprint_region
= (struct vm_map_corpse_footprint_region
*)
19820 ((char *)footprint_header
+ footprint_region_offset
);
19821 region_start
= footprint_region
->cfr_vaddr
;
19822 region_end
= (region_start
+
19823 ((vm_map_offset_t
)(footprint_region
->cfr_num_pages
) *
19825 if (va
< region_start
&&
19826 footprint_region_offset
!= sizeof(*footprint_header
)) {
19827 /* our range starts before the hint region */
19829 /* reset the hint (in a racy way...) */
19830 footprint_header
->cf_hint_region
= sizeof(*footprint_header
);
19831 /* lookup "va" again from 1st region */
19832 footprint_region_offset
= sizeof(*footprint_header
);
19836 while (va
>= region_end
) {
19837 if (footprint_region_offset
>= footprint_header
->cf_last_region
) {
19840 /* skip the region's header */
19841 footprint_region_offset
+= sizeof(*footprint_region
);
19842 /* skip the region's page dispositions */
19843 footprint_region_offset
+= footprint_region
->cfr_num_pages
;
19844 /* align to next word boundary */
19845 footprint_region_offset
=
19846 roundup(footprint_region_offset
,
19848 footprint_region
= (struct vm_map_corpse_footprint_region
*)
19849 ((char *)footprint_header
+ footprint_region_offset
);
19850 region_start
= footprint_region
->cfr_vaddr
;
19851 region_end
= (region_start
+
19852 ((vm_map_offset_t
)(footprint_region
->cfr_num_pages
) *
19855 if (va
< region_start
|| va
>= region_end
) {
19856 /* page not found */
19858 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19863 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
19864 footprint_header
->cf_hint_region
= footprint_region_offset
;
19866 /* get page disposition for "va" in this region */
19867 disp_idx
= (int) ((va
- footprint_region
->cfr_vaddr
) / PAGE_SIZE
);
19868 *disp
= (int) (footprint_region
->cfr_disposition
[disp_idx
]);
19872 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19873 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
19874 DTRACE_VM4(footprint_query_page_info
,
19876 vm_map_offset_t
, va
,
19878 kern_return_t
, kr
);
19885 vm_map_corpse_footprint_destroy(
19888 if (map
->has_corpse_footprint
&&
19889 map
->vmmap_corpse_footprint
!= 0) {
19890 struct vm_map_corpse_footprint_header
*footprint_header
;
19891 vm_size_t buf_size
;
19894 footprint_header
= map
->vmmap_corpse_footprint
;
19895 buf_size
= footprint_header
->cf_size
;
19896 kr
= vm_deallocate(kernel_map
,
19897 (vm_offset_t
) map
->vmmap_corpse_footprint
,
19898 ((vm_size_t
) buf_size
19899 + PAGE_SIZE
)); /* trailing guard page */
19900 assertf(kr
== KERN_SUCCESS
, "kr=0x%x\n", kr
);
19901 map
->vmmap_corpse_footprint
= 0;
19902 map
->has_corpse_footprint
= FALSE
;
19907 * vm_map_copy_footprint_ledgers:
19908 * copies any ledger that's relevant to the memory footprint of "old_task"
19909 * into the forked corpse's task ("new_task")
19912 vm_map_copy_footprint_ledgers(
19916 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.phys_footprint
);
19917 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.purgeable_nonvolatile
);
19918 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.purgeable_nonvolatile_compressed
);
19919 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.internal
);
19920 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.internal_compressed
);
19921 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.iokit_mapped
);
19922 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.alternate_accounting
);
19923 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.alternate_accounting_compressed
);
19924 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.page_table
);
19925 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.tagged_footprint
);
19926 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.tagged_footprint_compressed
);
19927 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.network_nonvolatile
);
19928 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.network_nonvolatile_compressed
);
19929 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.media_footprint
);
19930 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.media_footprint_compressed
);
19931 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.graphics_footprint
);
19932 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.graphics_footprint_compressed
);
19933 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.neural_footprint
);
19934 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.neural_footprint_compressed
);
19935 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.wired_mem
);
19939 * vm_map_copy_ledger:
19940 * copy a single ledger from "old_task" to "new_task"
19943 vm_map_copy_ledger(
19948 ledger_amount_t old_balance
, new_balance
, delta
;
19950 assert(new_task
->map
->has_corpse_footprint
);
19951 if (!new_task
->map
->has_corpse_footprint
) {
19955 /* turn off sanity checks for the ledger we're about to mess with */
19956 ledger_disable_panic_on_negative(new_task
->ledger
,
19959 /* adjust "new_task" to match "old_task" */
19960 ledger_get_balance(old_task
->ledger
,
19963 ledger_get_balance(new_task
->ledger
,
19966 if (new_balance
== old_balance
) {
19967 /* new == old: done */
19968 } else if (new_balance
> old_balance
) {
19969 /* new > old ==> new -= new - old */
19970 delta
= new_balance
- old_balance
;
19971 ledger_debit(new_task
->ledger
,
19975 /* new < old ==> new += old - new */
19976 delta
= old_balance
- new_balance
;
19977 ledger_credit(new_task
->ledger
,
19985 extern int pmap_ledgers_panic
;
19986 extern int pmap_ledgers_panic_leeway
;
19988 #define LEDGER_DRIFT(__LEDGER) \
19989 int __LEDGER##_over; \
19990 ledger_amount_t __LEDGER##_over_total; \
19991 ledger_amount_t __LEDGER##_over_max; \
19992 int __LEDGER##_under; \
19993 ledger_amount_t __LEDGER##_under_total; \
19994 ledger_amount_t __LEDGER##_under_max
19997 uint64_t num_pmaps_checked
;
19999 LEDGER_DRIFT(phys_footprint
);
20000 LEDGER_DRIFT(internal
);
20001 LEDGER_DRIFT(internal_compressed
);
20002 LEDGER_DRIFT(iokit_mapped
);
20003 LEDGER_DRIFT(alternate_accounting
);
20004 LEDGER_DRIFT(alternate_accounting_compressed
);
20005 LEDGER_DRIFT(page_table
);
20006 LEDGER_DRIFT(purgeable_volatile
);
20007 LEDGER_DRIFT(purgeable_nonvolatile
);
20008 LEDGER_DRIFT(purgeable_volatile_compressed
);
20009 LEDGER_DRIFT(purgeable_nonvolatile_compressed
);
20010 LEDGER_DRIFT(tagged_nofootprint
);
20011 LEDGER_DRIFT(tagged_footprint
);
20012 LEDGER_DRIFT(tagged_nofootprint_compressed
);
20013 LEDGER_DRIFT(tagged_footprint_compressed
);
20014 LEDGER_DRIFT(network_volatile
);
20015 LEDGER_DRIFT(network_nonvolatile
);
20016 LEDGER_DRIFT(network_volatile_compressed
);
20017 LEDGER_DRIFT(network_nonvolatile_compressed
);
20018 LEDGER_DRIFT(media_nofootprint
);
20019 LEDGER_DRIFT(media_footprint
);
20020 LEDGER_DRIFT(media_nofootprint_compressed
);
20021 LEDGER_DRIFT(media_footprint_compressed
);
20022 LEDGER_DRIFT(graphics_nofootprint
);
20023 LEDGER_DRIFT(graphics_footprint
);
20024 LEDGER_DRIFT(graphics_nofootprint_compressed
);
20025 LEDGER_DRIFT(graphics_footprint_compressed
);
20026 LEDGER_DRIFT(neural_nofootprint
);
20027 LEDGER_DRIFT(neural_footprint
);
20028 LEDGER_DRIFT(neural_nofootprint_compressed
);
20029 LEDGER_DRIFT(neural_footprint_compressed
);
20030 } pmap_ledgers_drift
;
20033 vm_map_pmap_check_ledgers(
20039 ledger_amount_t bal
;
20040 boolean_t do_panic
;
20044 pmap_ledgers_drift
.num_pmaps_checked
++;
20046 #define LEDGER_CHECK_BALANCE(__LEDGER) \
20048 int panic_on_negative = TRUE; \
20049 ledger_get_balance(ledger, \
20050 task_ledgers.__LEDGER, \
20052 ledger_get_panic_on_negative(ledger, \
20053 task_ledgers.__LEDGER, \
20054 &panic_on_negative); \
20056 if (panic_on_negative || \
20057 (pmap_ledgers_panic && \
20058 pmap_ledgers_panic_leeway > 0 && \
20059 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
20060 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
20063 printf("LEDGER BALANCE proc %d (%s) " \
20064 "\"%s\" = %lld\n", \
20065 pid, procname, #__LEDGER, bal); \
20067 pmap_ledgers_drift.__LEDGER##_over++; \
20068 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
20069 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
20070 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
20072 } else if (bal < 0) { \
20073 pmap_ledgers_drift.__LEDGER##_under++; \
20074 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
20075 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
20076 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
20082 LEDGER_CHECK_BALANCE(phys_footprint
);
20083 LEDGER_CHECK_BALANCE(internal
);
20084 LEDGER_CHECK_BALANCE(internal_compressed
);
20085 LEDGER_CHECK_BALANCE(iokit_mapped
);
20086 LEDGER_CHECK_BALANCE(alternate_accounting
);
20087 LEDGER_CHECK_BALANCE(alternate_accounting_compressed
);
20088 LEDGER_CHECK_BALANCE(page_table
);
20089 LEDGER_CHECK_BALANCE(purgeable_volatile
);
20090 LEDGER_CHECK_BALANCE(purgeable_nonvolatile
);
20091 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed
);
20092 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed
);
20093 LEDGER_CHECK_BALANCE(tagged_nofootprint
);
20094 LEDGER_CHECK_BALANCE(tagged_footprint
);
20095 LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed
);
20096 LEDGER_CHECK_BALANCE(tagged_footprint_compressed
);
20097 LEDGER_CHECK_BALANCE(network_volatile
);
20098 LEDGER_CHECK_BALANCE(network_nonvolatile
);
20099 LEDGER_CHECK_BALANCE(network_volatile_compressed
);
20100 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed
);
20101 LEDGER_CHECK_BALANCE(media_nofootprint
);
20102 LEDGER_CHECK_BALANCE(media_footprint
);
20103 LEDGER_CHECK_BALANCE(media_nofootprint_compressed
);
20104 LEDGER_CHECK_BALANCE(media_footprint_compressed
);
20105 LEDGER_CHECK_BALANCE(graphics_nofootprint
);
20106 LEDGER_CHECK_BALANCE(graphics_footprint
);
20107 LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed
);
20108 LEDGER_CHECK_BALANCE(graphics_footprint_compressed
);
20109 LEDGER_CHECK_BALANCE(neural_nofootprint
);
20110 LEDGER_CHECK_BALANCE(neural_footprint
);
20111 LEDGER_CHECK_BALANCE(neural_nofootprint_compressed
);
20112 LEDGER_CHECK_BALANCE(neural_footprint_compressed
);
20115 if (pmap_ledgers_panic
) {
20116 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
20117 pmap
, pid
, procname
);
20119 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
20120 pmap
, pid
, procname
);
20124 #endif /* MACH_ASSERT */