2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
63 * Virtual memory mapping module.
66 #include <task_swapper.h>
67 #include <mach_assert.h>
69 #include <vm/vm_options.h>
71 #include <libkern/OSAtomic.h>
73 #include <mach/kern_return.h>
74 #include <mach/port.h>
75 #include <mach/vm_attributes.h>
76 #include <mach/vm_param.h>
77 #include <mach/vm_behavior.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/memory_object.h>
80 #include <mach/mach_vm.h>
81 #include <machine/cpu_capabilities.h>
84 #include <kern/assert.h>
85 #include <kern/backtrace.h>
86 #include <kern/counters.h>
87 #include <kern/exc_guard.h>
88 #include <kern/kalloc.h>
89 #include <kern/zalloc.h>
92 #include <vm/vm_compressor.h>
93 #include <vm/vm_compressor_pager.h>
94 #include <vm/vm_init.h>
95 #include <vm/vm_fault.h>
96 #include <vm/vm_map.h>
97 #include <vm/vm_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
101 #include <vm/vm_kern.h>
102 #include <ipc/ipc_port.h>
103 #include <kern/sched_prim.h>
104 #include <kern/misc_protos.h>
105 #include <kern/xpr.h>
107 #include <mach/vm_map_server.h>
108 #include <mach/mach_host_server.h>
109 #include <vm/vm_protos.h>
110 #include <vm/vm_purgeable_internal.h>
112 #include <vm/vm_protos.h>
113 #include <vm/vm_shared_region.h>
114 #include <vm/vm_map_store.h>
116 #include <san/kasan.h>
118 #include <sys/codesign.h>
119 #include <libkern/section_keywords.h>
120 #if DEVELOPMENT || DEBUG
121 extern int proc_selfcsflags(void);
123 extern int panic_on_unsigned_execute
;
124 #endif /* CONFIG_EMBEDDED */
125 #endif /* DEVELOPMENT || DEBUG */
128 extern const int fourk_binary_compatibility_unsafe
;
129 extern const int fourk_binary_compatibility_allow_wx
;
130 #endif /* __arm64__ */
131 extern int proc_selfpid(void);
132 extern char *proc_name_address(void *p
);
134 #if VM_MAP_DEBUG_APPLE_PROTECT
135 int vm_map_debug_apple_protect
= 0;
136 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
137 #if VM_MAP_DEBUG_FOURK
138 int vm_map_debug_fourk
= 0;
139 #endif /* VM_MAP_DEBUG_FOURK */
141 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable
= 1;
142 int vm_map_executable_immutable_verbose
= 0;
144 extern u_int32_t
random(void); /* from <libkern/libkern.h> */
145 /* Internal prototypes
148 static void vm_map_simplify_range(
150 vm_map_offset_t start
,
151 vm_map_offset_t end
); /* forward */
153 static boolean_t
vm_map_range_check(
155 vm_map_offset_t start
,
157 vm_map_entry_t
*entry
);
159 static vm_map_entry_t
_vm_map_entry_create(
160 struct vm_map_header
*map_header
, boolean_t map_locked
);
162 static void _vm_map_entry_dispose(
163 struct vm_map_header
*map_header
,
164 vm_map_entry_t entry
);
166 static void vm_map_pmap_enter(
168 vm_map_offset_t addr
,
169 vm_map_offset_t end_addr
,
171 vm_object_offset_t offset
,
172 vm_prot_t protection
);
174 static void _vm_map_clip_end(
175 struct vm_map_header
*map_header
,
176 vm_map_entry_t entry
,
177 vm_map_offset_t end
);
179 static void _vm_map_clip_start(
180 struct vm_map_header
*map_header
,
181 vm_map_entry_t entry
,
182 vm_map_offset_t start
);
184 static void vm_map_entry_delete(
186 vm_map_entry_t entry
);
188 static kern_return_t
vm_map_delete(
190 vm_map_offset_t start
,
195 static void vm_map_copy_insert(
197 vm_map_entry_t after_where
,
200 static kern_return_t
vm_map_copy_overwrite_unaligned(
202 vm_map_entry_t entry
,
204 vm_map_address_t start
,
205 boolean_t discard_on_success
);
207 static kern_return_t
vm_map_copy_overwrite_aligned(
209 vm_map_entry_t tmp_entry
,
211 vm_map_offset_t start
,
214 static kern_return_t
vm_map_copyin_kernel_buffer(
216 vm_map_address_t src_addr
,
218 boolean_t src_destroy
,
219 vm_map_copy_t
*copy_result
); /* OUT */
221 static kern_return_t
vm_map_copyout_kernel_buffer(
223 vm_map_address_t
*addr
, /* IN/OUT */
225 vm_map_size_t copy_size
,
227 boolean_t consume_on_success
);
229 static void vm_map_fork_share(
231 vm_map_entry_t old_entry
,
234 static boolean_t
vm_map_fork_copy(
236 vm_map_entry_t
*old_entry_p
,
238 int vm_map_copyin_flags
);
240 static kern_return_t
vm_map_wire_nested(
242 vm_map_offset_t start
,
244 vm_prot_t caller_prot
,
248 vm_map_offset_t pmap_addr
,
249 ppnum_t
*physpage_p
);
251 static kern_return_t
vm_map_unwire_nested(
253 vm_map_offset_t start
,
257 vm_map_offset_t pmap_addr
);
259 static kern_return_t
vm_map_overwrite_submap_recurse(
261 vm_map_offset_t dst_addr
,
262 vm_map_size_t dst_size
);
264 static kern_return_t
vm_map_copy_overwrite_nested(
266 vm_map_offset_t dst_addr
,
268 boolean_t interruptible
,
270 boolean_t discard_on_success
);
272 static kern_return_t
vm_map_remap_extract(
274 vm_map_offset_t addr
,
277 struct vm_map_header
*map_header
,
278 vm_prot_t
*cur_protection
,
279 vm_prot_t
*max_protection
,
280 vm_inherit_t inheritance
,
283 vm_map_kernel_flags_t vmk_flags
);
285 static kern_return_t
vm_map_remap_range_allocate(
287 vm_map_address_t
*address
,
289 vm_map_offset_t mask
,
291 vm_map_kernel_flags_t vmk_flags
,
293 vm_map_entry_t
*map_entry
);
295 static void vm_map_region_look_for_page(
299 vm_object_offset_t offset
,
302 vm_region_extended_info_t extended
,
303 mach_msg_type_number_t count
);
305 static int vm_map_region_count_obj_refs(
306 vm_map_entry_t entry
,
310 static kern_return_t
vm_map_willneed(
312 vm_map_offset_t start
,
313 vm_map_offset_t end
);
315 static kern_return_t
vm_map_reuse_pages(
317 vm_map_offset_t start
,
318 vm_map_offset_t end
);
320 static kern_return_t
vm_map_reusable_pages(
322 vm_map_offset_t start
,
323 vm_map_offset_t end
);
325 static kern_return_t
vm_map_can_reuse(
327 vm_map_offset_t start
,
328 vm_map_offset_t end
);
331 static kern_return_t
vm_map_pageout(
333 vm_map_offset_t start
,
334 vm_map_offset_t end
);
335 #endif /* MACH_ASSERT */
337 static void vm_map_corpse_footprint_destroy(
340 pid_t
find_largest_process_vm_map_entries(void);
343 * Macros to copy a vm_map_entry. We must be careful to correctly
344 * manage the wired page count. vm_map_entry_copy() creates a new
345 * map entry to the same memory - the wired count in the new entry
346 * must be set to zero. vm_map_entry_copy_full() creates a new
347 * entry that is identical to the old entry. This preserves the
348 * wire count; it's used for map splitting and zone changing in
355 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
356 * But for security reasons on embedded platforms, we don't want the
357 * new mapping to be "used for jit", so we always reset the flag here.
358 * Same for "pmap_cs_associated".
360 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW,OLD) \
362 (NEW)->used_for_jit = FALSE; \
363 (NEW)->pmap_cs_associated = FALSE; \
366 #else /* CONFIG_EMBEDDED */
369 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
370 * On macOS, the new mapping can be "used for jit".
372 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW,OLD) \
374 assert((NEW)->used_for_jit == (OLD)->used_for_jit); \
375 assert((NEW)->pmap_cs_associated == FALSE); \
378 #endif /* CONFIG_EMBEDDED */
380 #define vm_map_entry_copy(NEW,OLD) \
382 boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
384 (NEW)->is_shared = FALSE; \
385 (NEW)->needs_wakeup = FALSE; \
386 (NEW)->in_transition = FALSE; \
387 (NEW)->wired_count = 0; \
388 (NEW)->user_wired_count = 0; \
389 (NEW)->permanent = FALSE; \
390 VM_MAP_ENTRY_COPY_CODE_SIGNING((NEW),(OLD)); \
391 (NEW)->from_reserved_zone = _vmec_reserved; \
392 if ((NEW)->iokit_acct) { \
393 assertf(!(NEW)->use_pmap, "old %p new %p\n", (OLD), (NEW)); \
394 (NEW)->iokit_acct = FALSE; \
395 (NEW)->use_pmap = TRUE; \
397 (NEW)->vme_resilient_codesign = FALSE; \
398 (NEW)->vme_resilient_media = FALSE; \
399 (NEW)->vme_atomic = FALSE; \
402 #define vm_map_entry_copy_full(NEW,OLD) \
404 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
406 (NEW)->from_reserved_zone = _vmecf_reserved; \
410 * Decide if we want to allow processes to execute from their data or stack areas.
411 * override_nx() returns true if we do. Data/stack execution can be enabled independently
412 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
413 * or allow_stack_exec to enable data execution for that type of data area for that particular
414 * ABI (or both by or'ing the flags together). These are initialized in the architecture
415 * specific pmap files since the default behavior varies according to architecture. The
416 * main reason it varies is because of the need to provide binary compatibility with old
417 * applications that were written before these restrictions came into being. In the old
418 * days, an app could execute anything it could read, but this has slowly been tightened
419 * up over time. The default behavior is:
421 * 32-bit PPC apps may execute from both stack and data areas
422 * 32-bit Intel apps may exeucte from data areas but not stack
423 * 64-bit PPC/Intel apps may not execute from either data or stack
425 * An application on any architecture may override these defaults by explicitly
426 * adding PROT_EXEC permission to the page in question with the mprotect(2)
427 * system call. This code here just determines what happens when an app tries to
428 * execute from a page that lacks execute permission.
430 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
431 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
432 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
433 * execution from data areas for a particular binary even if the arch normally permits it. As
434 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
435 * to support some complicated use cases, notably browsers with out-of-process plugins that
436 * are not all NX-safe.
439 extern int allow_data_exec
, allow_stack_exec
;
442 override_nx(vm_map_t map
, uint32_t user_tag
) /* map unused on arm */
446 if (map
->pmap
== kernel_pmap
) return FALSE
;
449 * Determine if the app is running in 32 or 64 bit mode.
452 if (vm_map_is_64bit(map
))
453 current_abi
= VM_ABI_64
;
455 current_abi
= VM_ABI_32
;
458 * Determine if we should allow the execution based on whether it's a
459 * stack or data area and the current architecture.
462 if (user_tag
== VM_MEMORY_STACK
)
463 return allow_stack_exec
& current_abi
;
465 return (allow_data_exec
& current_abi
) && (map
->map_disallow_data_exec
== FALSE
);
470 * Virtual memory maps provide for the mapping, protection,
471 * and sharing of virtual memory objects. In addition,
472 * this module provides for an efficient virtual copy of
473 * memory from one map to another.
475 * Synchronization is required prior to most operations.
477 * Maps consist of an ordered doubly-linked list of simple
478 * entries; a single hint is used to speed up lookups.
480 * Sharing maps have been deleted from this version of Mach.
481 * All shared objects are now mapped directly into the respective
482 * maps. This requires a change in the copy on write strategy;
483 * the asymmetric (delayed) strategy is used for shared temporary
484 * objects instead of the symmetric (shadow) strategy. All maps
485 * are now "top level" maps (either task map, kernel map or submap
486 * of the kernel map).
488 * Since portions of maps are specified by start/end addreses,
489 * which may not align with existing map entries, all
490 * routines merely "clip" entries to these start/end values.
491 * [That is, an entry is split into two, bordering at a
492 * start or end value.] Note that these clippings may not
493 * always be necessary (as the two resulting entries are then
494 * not changed); however, the clipping is done for convenience.
495 * No attempt is currently made to "glue back together" two
498 * The symmetric (shadow) copy strategy implements virtual copy
499 * by copying VM object references from one map to
500 * another, and then marking both regions as copy-on-write.
501 * It is important to note that only one writeable reference
502 * to a VM object region exists in any map when this strategy
503 * is used -- this means that shadow object creation can be
504 * delayed until a write operation occurs. The symmetric (delayed)
505 * strategy allows multiple maps to have writeable references to
506 * the same region of a vm object, and hence cannot delay creating
507 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
508 * Copying of permanent objects is completely different; see
509 * vm_object_copy_strategically() in vm_object.c.
512 static zone_t vm_map_zone
; /* zone for vm_map structures */
513 zone_t vm_map_entry_zone
; /* zone for vm_map_entry structures */
514 static zone_t vm_map_entry_reserved_zone
; /* zone with reserve for non-blocking allocations */
515 static zone_t vm_map_copy_zone
; /* zone for vm_map_copy structures */
516 zone_t vm_map_holes_zone
; /* zone for vm map holes (vm_map_links) structures */
520 * Placeholder object for submap operations. This object is dropped
521 * into the range by a call to vm_map_find, and removed when
522 * vm_map_submap creates the submap.
525 vm_object_t vm_submap_object
;
527 static void *map_data
;
528 static vm_size_t map_data_size
;
529 static void *kentry_data
;
530 static vm_size_t kentry_data_size
;
531 static void *map_holes_data
;
532 static vm_size_t map_holes_data_size
;
535 #define NO_COALESCE_LIMIT 0
537 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
540 /* Skip acquiring locks if we're in the midst of a kernel core dump */
541 unsigned int not_in_kdp
= 1;
543 unsigned int vm_map_set_cache_attr_count
= 0;
546 vm_map_set_cache_attr(
550 vm_map_entry_t map_entry
;
552 kern_return_t kr
= KERN_SUCCESS
;
554 vm_map_lock_read(map
);
556 if (!vm_map_lookup_entry(map
, va
, &map_entry
) ||
557 map_entry
->is_sub_map
) {
559 * that memory is not properly mapped
561 kr
= KERN_INVALID_ARGUMENT
;
564 object
= VME_OBJECT(map_entry
);
566 if (object
== VM_OBJECT_NULL
) {
568 * there should be a VM object here at this point
570 kr
= KERN_INVALID_ARGUMENT
;
573 vm_object_lock(object
);
574 object
->set_cache_attr
= TRUE
;
575 vm_object_unlock(object
);
577 vm_map_set_cache_attr_count
++;
579 vm_map_unlock_read(map
);
585 #if CONFIG_CODE_DECRYPTION
587 * vm_map_apple_protected:
588 * This remaps the requested part of the object with an object backed by
589 * the decrypting pager.
590 * crypt_info contains entry points and session data for the crypt module.
591 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
592 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
595 vm_map_apple_protected(
597 vm_map_offset_t start
,
599 vm_object_offset_t crypto_backing_offset
,
600 struct pager_crypt_info
*crypt_info
)
602 boolean_t map_locked
;
604 vm_map_entry_t map_entry
;
605 struct vm_map_entry tmp_entry
;
606 memory_object_t unprotected_mem_obj
;
607 vm_object_t protected_object
;
608 vm_map_offset_t map_addr
;
609 vm_map_offset_t start_aligned
, end_aligned
;
610 vm_object_offset_t crypto_start
, crypto_end
;
612 vm_map_kernel_flags_t vmk_flags
;
615 vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
618 unprotected_mem_obj
= MEMORY_OBJECT_NULL
;
620 start_aligned
= vm_map_trunc_page(start
, PAGE_MASK_64
);
621 end_aligned
= vm_map_round_page(end
, PAGE_MASK_64
);
622 start_aligned
= vm_map_trunc_page(start_aligned
, VM_MAP_PAGE_MASK(map
));
623 end_aligned
= vm_map_round_page(end_aligned
, VM_MAP_PAGE_MASK(map
));
627 * "start" and "end" might be 4K-aligned but not 16K-aligned,
628 * so we might have to loop and establish up to 3 mappings:
630 * + the first 16K-page, which might overlap with the previous
631 * 4K-aligned mapping,
633 * + the last 16K-page, which might overlap with the next
634 * 4K-aligned mapping.
635 * Each of these mapping might be backed by a vnode pager (if
636 * properly page-aligned) or a "fourk_pager", itself backed by a
637 * vnode pager (if 4K-aligned but not page-aligned).
639 #else /* __arm64__ */
640 assert(start_aligned
== start
);
641 assert(end_aligned
== end
);
642 #endif /* __arm64__ */
644 map_addr
= start_aligned
;
645 for (map_addr
= start_aligned
;
647 map_addr
= tmp_entry
.vme_end
) {
651 /* lookup the protected VM object */
652 if (!vm_map_lookup_entry(map
,
655 map_entry
->is_sub_map
||
656 VME_OBJECT(map_entry
) == VM_OBJECT_NULL
||
657 !(map_entry
->protection
& VM_PROT_EXECUTE
)) {
658 /* that memory is not properly mapped */
659 kr
= KERN_INVALID_ARGUMENT
;
663 /* get the protected object to be decrypted */
664 protected_object
= VME_OBJECT(map_entry
);
665 if (protected_object
== VM_OBJECT_NULL
) {
666 /* there should be a VM object here at this point */
667 kr
= KERN_INVALID_ARGUMENT
;
670 /* ensure protected object stays alive while map is unlocked */
671 vm_object_reference(protected_object
);
673 /* limit the map entry to the area we want to cover */
674 vm_map_clip_start(map
, map_entry
, start_aligned
);
675 vm_map_clip_end(map
, map_entry
, end_aligned
);
677 tmp_entry
= *map_entry
;
678 map_entry
= VM_MAP_ENTRY_NULL
; /* not valid after unlocking map */
683 * This map entry might be only partially encrypted
684 * (if not fully "page-aligned").
687 crypto_end
= tmp_entry
.vme_end
- tmp_entry
.vme_start
;
688 if (tmp_entry
.vme_start
< start
) {
689 if (tmp_entry
.vme_start
!= start_aligned
) {
690 kr
= KERN_INVALID_ADDRESS
;
692 crypto_start
+= (start
- tmp_entry
.vme_start
);
694 if (tmp_entry
.vme_end
> end
) {
695 if (tmp_entry
.vme_end
!= end_aligned
) {
696 kr
= KERN_INVALID_ADDRESS
;
698 crypto_end
-= (tmp_entry
.vme_end
- end
);
702 * This "extra backing offset" is needed to get the decryption
703 * routine to use the right key. It adjusts for the possibly
704 * relative offset of an interposed "4K" pager...
706 if (crypto_backing_offset
== (vm_object_offset_t
) -1) {
707 crypto_backing_offset
= VME_OFFSET(&tmp_entry
);
711 * Lookup (and create if necessary) the protected memory object
712 * matching that VM object.
713 * If successful, this also grabs a reference on the memory object,
714 * to guarantee that it doesn't go away before we get a chance to map
717 unprotected_mem_obj
= apple_protect_pager_setup(
719 VME_OFFSET(&tmp_entry
),
720 crypto_backing_offset
,
725 /* release extra ref on protected object */
726 vm_object_deallocate(protected_object
);
728 if (unprotected_mem_obj
== NULL
) {
733 vm_flags
= VM_FLAGS_FIXED
| VM_FLAGS_OVERWRITE
;
734 /* can overwrite an immutable mapping */
735 vmk_flags
.vmkf_overwrite_immutable
= TRUE
;
737 if (tmp_entry
.used_for_jit
&&
738 (VM_MAP_PAGE_SHIFT(map
) != FOURK_PAGE_SHIFT
||
739 PAGE_SHIFT
!= FOURK_PAGE_SHIFT
) &&
740 fourk_binary_compatibility_unsafe
&&
741 fourk_binary_compatibility_allow_wx
) {
742 printf("** FOURK_COMPAT [%d]: "
743 "allowing write+execute at 0x%llx\n",
744 proc_selfpid(), tmp_entry
.vme_start
);
745 vmk_flags
.vmkf_map_jit
= TRUE
;
747 #endif /* __arm64__ */
749 /* map this memory object in place of the current one */
750 map_addr
= tmp_entry
.vme_start
;
751 kr
= vm_map_enter_mem_object(map
,
754 tmp_entry
.vme_start
),
755 (mach_vm_offset_t
) 0,
759 (ipc_port_t
)(uintptr_t) unprotected_mem_obj
,
762 tmp_entry
.protection
,
763 tmp_entry
.max_protection
,
764 tmp_entry
.inheritance
);
765 assertf(kr
== KERN_SUCCESS
,
767 assertf(map_addr
== tmp_entry
.vme_start
,
768 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
770 (uint64_t) tmp_entry
.vme_start
,
773 #if VM_MAP_DEBUG_APPLE_PROTECT
774 if (vm_map_debug_apple_protect
) {
775 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
776 " backing:[object:%p,offset:0x%llx,"
777 "crypto_backing_offset:0x%llx,"
778 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
781 (uint64_t) (map_addr
+ (tmp_entry
.vme_end
-
782 tmp_entry
.vme_start
)),
785 VME_OFFSET(&tmp_entry
),
786 crypto_backing_offset
,
790 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
793 * Release the reference obtained by
794 * apple_protect_pager_setup().
795 * The mapping (if it succeeded) is now holding a reference on
798 memory_object_deallocate(unprotected_mem_obj
);
799 unprotected_mem_obj
= MEMORY_OBJECT_NULL
;
801 /* continue with next map entry */
802 crypto_backing_offset
+= (tmp_entry
.vme_end
-
803 tmp_entry
.vme_start
);
804 crypto_backing_offset
-= crypto_start
;
814 #endif /* CONFIG_CODE_DECRYPTION */
817 lck_grp_t vm_map_lck_grp
;
818 lck_grp_attr_t vm_map_lck_grp_attr
;
819 lck_attr_t vm_map_lck_attr
;
820 lck_attr_t vm_map_lck_rw_attr
;
823 int malloc_no_cow
= 1;
824 #define VM_PROTECT_WX_FAIL 0
825 #else /* CONFIG_EMBEDDED */
826 int malloc_no_cow
= 0;
827 #define VM_PROTECT_WX_FAIL 1
828 #endif /* CONFIG_EMBEDDED */
829 uint64_t vm_memory_malloc_no_cow_mask
= 0ULL;
834 * Initialize the vm_map module. Must be called before
835 * any other vm_map routines.
837 * Map and entry structures are allocated from zones -- we must
838 * initialize those zones.
840 * There are three zones of interest:
842 * vm_map_zone: used to allocate maps.
843 * vm_map_entry_zone: used to allocate map entries.
844 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
846 * The kernel allocates map entries from a special zone that is initially
847 * "crammed" with memory. It would be difficult (perhaps impossible) for
848 * the kernel to allocate more memory to a entry zone when it became
849 * empty since the very act of allocating memory implies the creation
856 vm_size_t entry_zone_alloc_size
;
857 const char *mez_name
= "VM map entries";
859 vm_map_zone
= zinit((vm_map_size_t
) sizeof(struct _vm_map
), 40*1024,
861 zone_change(vm_map_zone
, Z_NOENCRYPT
, TRUE
);
862 #if defined(__LP64__)
863 entry_zone_alloc_size
= PAGE_SIZE
* 5;
865 entry_zone_alloc_size
= PAGE_SIZE
* 6;
867 vm_map_entry_zone
= zinit((vm_map_size_t
) sizeof(struct vm_map_entry
),
868 1024*1024, entry_zone_alloc_size
,
870 zone_change(vm_map_entry_zone
, Z_NOENCRYPT
, TRUE
);
871 zone_change(vm_map_entry_zone
, Z_NOCALLOUT
, TRUE
);
872 zone_change(vm_map_entry_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
874 vm_map_entry_reserved_zone
= zinit((vm_map_size_t
) sizeof(struct vm_map_entry
),
875 kentry_data_size
* 64, kentry_data_size
,
876 "Reserved VM map entries");
877 zone_change(vm_map_entry_reserved_zone
, Z_NOENCRYPT
, TRUE
);
878 /* Don't quarantine because we always need elements available */
879 zone_change(vm_map_entry_reserved_zone
, Z_KASAN_QUARANTINE
, FALSE
);
881 vm_map_copy_zone
= zinit((vm_map_size_t
) sizeof(struct vm_map_copy
),
882 16*1024, PAGE_SIZE
, "VM map copies");
883 zone_change(vm_map_copy_zone
, Z_NOENCRYPT
, TRUE
);
885 vm_map_holes_zone
= zinit((vm_map_size_t
) sizeof(struct vm_map_links
),
886 16*1024, PAGE_SIZE
, "VM map holes");
887 zone_change(vm_map_holes_zone
, Z_NOENCRYPT
, TRUE
);
890 * Cram the map and kentry zones with initial data.
891 * Set reserved_zone non-collectible to aid zone_gc().
893 zone_change(vm_map_zone
, Z_COLLECT
, FALSE
);
894 zone_change(vm_map_zone
, Z_FOREIGN
, TRUE
);
895 zone_change(vm_map_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
897 zone_change(vm_map_entry_reserved_zone
, Z_COLLECT
, FALSE
);
898 zone_change(vm_map_entry_reserved_zone
, Z_EXPAND
, FALSE
);
899 zone_change(vm_map_entry_reserved_zone
, Z_FOREIGN
, TRUE
);
900 zone_change(vm_map_entry_reserved_zone
, Z_NOCALLOUT
, TRUE
);
901 zone_change(vm_map_entry_reserved_zone
, Z_CALLERACCT
, FALSE
); /* don't charge caller */
902 zone_change(vm_map_copy_zone
, Z_CALLERACCT
, FALSE
); /* don't charge caller */
903 zone_change(vm_map_entry_reserved_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
905 zone_change(vm_map_holes_zone
, Z_COLLECT
, TRUE
);
906 zone_change(vm_map_holes_zone
, Z_EXPAND
, TRUE
);
907 zone_change(vm_map_holes_zone
, Z_FOREIGN
, TRUE
);
908 zone_change(vm_map_holes_zone
, Z_NOCALLOUT
, TRUE
);
909 zone_change(vm_map_holes_zone
, Z_CALLERACCT
, TRUE
);
910 zone_change(vm_map_holes_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
913 * Add the stolen memory to zones, adjust zone size and stolen counts.
914 * zcram only up to the maximum number of pages for each zone chunk.
916 zcram(vm_map_zone
, (vm_offset_t
)map_data
, map_data_size
);
918 const vm_size_t stride
= ZONE_CHUNK_MAXPAGES
* PAGE_SIZE
;
919 for (vm_offset_t off
= 0; off
< kentry_data_size
; off
+= stride
) {
920 zcram(vm_map_entry_reserved_zone
,
921 (vm_offset_t
)kentry_data
+ off
,
922 MIN(kentry_data_size
- off
, stride
));
924 for (vm_offset_t off
= 0; off
< map_holes_data_size
; off
+= stride
) {
925 zcram(vm_map_holes_zone
,
926 (vm_offset_t
)map_holes_data
+ off
,
927 MIN(map_holes_data_size
- off
, stride
));
930 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size
) + atop_64(kentry_data_size
) + atop_64(map_holes_data_size
));
932 lck_grp_attr_setdefault(&vm_map_lck_grp_attr
);
933 lck_grp_init(&vm_map_lck_grp
, "vm_map", &vm_map_lck_grp_attr
);
934 lck_attr_setdefault(&vm_map_lck_attr
);
936 lck_attr_setdefault(&vm_map_lck_rw_attr
);
937 lck_attr_cleardebug(&vm_map_lck_rw_attr
);
939 #if VM_MAP_DEBUG_APPLE_PROTECT
940 PE_parse_boot_argn("vm_map_debug_apple_protect",
941 &vm_map_debug_apple_protect
,
942 sizeof(vm_map_debug_apple_protect
));
943 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
944 #if VM_MAP_DEBUG_APPLE_FOURK
945 PE_parse_boot_argn("vm_map_debug_fourk",
947 sizeof(vm_map_debug_fourk
));
948 #endif /* VM_MAP_DEBUG_FOURK */
949 PE_parse_boot_argn("vm_map_executable_immutable",
950 &vm_map_executable_immutable
,
951 sizeof(vm_map_executable_immutable
));
952 PE_parse_boot_argn("vm_map_executable_immutable_verbose",
953 &vm_map_executable_immutable_verbose
,
954 sizeof(vm_map_executable_immutable_verbose
));
956 PE_parse_boot_argn("malloc_no_cow",
958 sizeof(malloc_no_cow
));
960 vm_memory_malloc_no_cow_mask
= 0ULL;
961 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC
;
962 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_SMALL
;
963 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_LARGE
;
964 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
965 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
966 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_TINY
;
967 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE
;
968 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED
;
969 vm_memory_malloc_no_cow_mask
|= 1ULL << VM_MEMORY_MALLOC_NANO
;
970 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
971 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
972 &vm_memory_malloc_no_cow_mask
,
973 sizeof(vm_memory_malloc_no_cow_mask
));
981 uint32_t kentry_initial_pages
;
983 map_data_size
= round_page(10 * sizeof(struct _vm_map
));
984 map_data
= pmap_steal_memory(map_data_size
);
987 * kentry_initial_pages corresponds to the number of kernel map entries
988 * required during bootstrap until the asynchronous replenishment
989 * scheme is activated and/or entries are available from the general
992 #if defined(__LP64__)
993 kentry_initial_pages
= 10;
995 kentry_initial_pages
= 6;
999 /* If using the guard allocator, reserve more memory for the kernel
1000 * reserved map entry pool.
1002 if (gzalloc_enabled())
1003 kentry_initial_pages
*= 1024;
1006 kentry_data_size
= kentry_initial_pages
* PAGE_SIZE
;
1007 kentry_data
= pmap_steal_memory(kentry_data_size
);
1009 map_holes_data_size
= kentry_data_size
;
1010 map_holes_data
= pmap_steal_memory(map_holes_data_size
);
1013 boolean_t vm_map_supports_hole_optimization
= FALSE
;
1016 vm_kernel_reserved_entry_init(void) {
1017 zone_prio_refill_configure(vm_map_entry_reserved_zone
, (6*PAGE_SIZE
)/sizeof(struct vm_map_entry
));
1020 * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1022 zone_prio_refill_configure(vm_map_holes_zone
, (6*PAGE_SIZE
)/sizeof(struct vm_map_links
));
1023 vm_map_supports_hole_optimization
= TRUE
;
1027 vm_map_disable_hole_optimization(vm_map_t map
)
1029 vm_map_entry_t head_entry
, hole_entry
, next_hole_entry
;
1031 if (map
->holelistenabled
) {
1033 head_entry
= hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
1035 while (hole_entry
!= NULL
) {
1037 next_hole_entry
= hole_entry
->vme_next
;
1039 hole_entry
->vme_next
= NULL
;
1040 hole_entry
->vme_prev
= NULL
;
1041 zfree(vm_map_holes_zone
, hole_entry
);
1043 if (next_hole_entry
== head_entry
) {
1046 hole_entry
= next_hole_entry
;
1050 map
->holes_list
= NULL
;
1051 map
->holelistenabled
= FALSE
;
1053 map
->first_free
= vm_map_first_entry(map
);
1054 SAVE_HINT_HOLE_WRITE(map
, NULL
);
1059 vm_kernel_map_is_kernel(vm_map_t map
) {
1060 return (map
->pmap
== kernel_pmap
);
1066 * Creates and returns a new empty VM map with
1067 * the given physical map structure, and having
1068 * the given lower and upper address bounds.
1074 vm_map_offset_t min
,
1075 vm_map_offset_t max
,
1082 options
|= VM_MAP_CREATE_PAGEABLE
;
1084 return vm_map_create_options(pmap
, min
, max
, options
);
1088 vm_map_create_options(
1090 vm_map_offset_t min
,
1091 vm_map_offset_t max
,
1095 struct vm_map_links
*hole_entry
= NULL
;
1097 if (options
& ~(VM_MAP_CREATE_ALL_OPTIONS
)) {
1098 /* unknown option */
1102 result
= (vm_map_t
) zalloc(vm_map_zone
);
1103 if (result
== VM_MAP_NULL
)
1104 panic("vm_map_create");
1106 vm_map_first_entry(result
) = vm_map_to_entry(result
);
1107 vm_map_last_entry(result
) = vm_map_to_entry(result
);
1108 result
->hdr
.nentries
= 0;
1109 if (options
& VM_MAP_CREATE_PAGEABLE
) {
1110 result
->hdr
.entries_pageable
= TRUE
;
1112 result
->hdr
.entries_pageable
= FALSE
;
1115 vm_map_store_init( &(result
->hdr
) );
1117 result
->hdr
.page_shift
= PAGE_SHIFT
;
1120 result
->user_wire_limit
= MACH_VM_MAX_ADDRESS
; /* default limit is unlimited */
1121 result
->user_wire_size
= 0;
1123 result
->vmmap_high_start
= 0;
1124 #endif /* __x86_64__ */
1125 result
->map_refcnt
= 1;
1127 result
->res_count
= 1;
1128 result
->sw_state
= MAP_SW_IN
;
1129 #endif /* TASK_SWAPPER */
1130 result
->pmap
= pmap
;
1131 result
->min_offset
= min
;
1132 result
->max_offset
= max
;
1133 result
->wiring_required
= FALSE
;
1134 result
->no_zero_fill
= FALSE
;
1135 result
->mapped_in_other_pmaps
= FALSE
;
1136 result
->wait_for_space
= FALSE
;
1137 result
->switch_protect
= FALSE
;
1138 result
->disable_vmentry_reuse
= FALSE
;
1139 result
->map_disallow_data_exec
= FALSE
;
1140 result
->is_nested_map
= FALSE
;
1141 result
->map_disallow_new_exec
= FALSE
;
1142 result
->highest_entry_end
= 0;
1143 result
->first_free
= vm_map_to_entry(result
);
1144 result
->hint
= vm_map_to_entry(result
);
1145 result
->jit_entry_exists
= FALSE
;
1147 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1148 if (options
& VM_MAP_CREATE_CORPSE_FOOTPRINT
) {
1149 result
->has_corpse_footprint
= TRUE
;
1150 result
->holelistenabled
= FALSE
;
1151 result
->vmmap_corpse_footprint
= NULL
;
1153 result
->has_corpse_footprint
= FALSE
;
1154 if (vm_map_supports_hole_optimization
) {
1155 hole_entry
= zalloc(vm_map_holes_zone
);
1157 hole_entry
->start
= min
;
1158 #if defined(__arm__) || defined(__arm64__)
1159 hole_entry
->end
= result
->max_offset
;
1161 hole_entry
->end
= (max
> (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
) ? max
: (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
;
1163 result
->holes_list
= result
->hole_hint
= hole_entry
;
1164 hole_entry
->prev
= hole_entry
->next
= CAST_TO_VM_MAP_ENTRY(hole_entry
);
1165 result
->holelistenabled
= TRUE
;
1167 result
->holelistenabled
= FALSE
;
1171 vm_map_lock_init(result
);
1172 lck_mtx_init_ext(&result
->s_lock
, &result
->s_lock_ext
, &vm_map_lck_grp
, &vm_map_lck_attr
);
1178 * vm_map_entry_create: [ internal use only ]
1180 * Allocates a VM map entry for insertion in the
1181 * given map (or map copy). No fields are filled.
1183 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1185 #define vm_map_copy_entry_create(copy, map_locked) \
1186 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1187 unsigned reserved_zalloc_count
, nonreserved_zalloc_count
;
1189 static vm_map_entry_t
1190 _vm_map_entry_create(
1191 struct vm_map_header
*map_header
, boolean_t __unused map_locked
)
1194 vm_map_entry_t entry
;
1196 zone
= vm_map_entry_zone
;
1198 assert(map_header
->entries_pageable
? !map_locked
: TRUE
);
1200 if (map_header
->entries_pageable
) {
1201 entry
= (vm_map_entry_t
) zalloc(zone
);
1204 entry
= (vm_map_entry_t
) zalloc_canblock(zone
, FALSE
);
1206 if (entry
== VM_MAP_ENTRY_NULL
) {
1207 zone
= vm_map_entry_reserved_zone
;
1208 entry
= (vm_map_entry_t
) zalloc(zone
);
1209 OSAddAtomic(1, &reserved_zalloc_count
);
1211 OSAddAtomic(1, &nonreserved_zalloc_count
);
1214 if (entry
== VM_MAP_ENTRY_NULL
)
1215 panic("vm_map_entry_create");
1216 entry
->from_reserved_zone
= (zone
== vm_map_entry_reserved_zone
);
1218 vm_map_store_update( (vm_map_t
) NULL
, entry
, VM_MAP_ENTRY_CREATE
);
1219 #if MAP_ENTRY_CREATION_DEBUG
1220 entry
->vme_creation_maphdr
= map_header
;
1221 backtrace(&entry
->vme_creation_bt
[0],
1222 (sizeof(entry
->vme_creation_bt
)/sizeof(uintptr_t)));
1228 * vm_map_entry_dispose: [ internal use only ]
1230 * Inverse of vm_map_entry_create.
1232 * write map lock held so no need to
1233 * do anything special to insure correctness
1236 #define vm_map_entry_dispose(map, entry) \
1237 _vm_map_entry_dispose(&(map)->hdr, (entry))
1239 #define vm_map_copy_entry_dispose(map, entry) \
1240 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1243 _vm_map_entry_dispose(
1244 struct vm_map_header
*map_header
,
1245 vm_map_entry_t entry
)
1249 if (map_header
->entries_pageable
|| !(entry
->from_reserved_zone
))
1250 zone
= vm_map_entry_zone
;
1252 zone
= vm_map_entry_reserved_zone
;
1254 if (!map_header
->entries_pageable
) {
1255 if (zone
== vm_map_entry_zone
)
1256 OSAddAtomic(-1, &nonreserved_zalloc_count
);
1258 OSAddAtomic(-1, &reserved_zalloc_count
);
1265 static boolean_t first_free_check
= FALSE
;
1267 first_free_is_valid(
1270 if (!first_free_check
)
1273 return( first_free_is_valid_store( map
));
1275 #endif /* MACH_ASSERT */
1278 #define vm_map_copy_entry_link(copy, after_where, entry) \
1279 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1281 #define vm_map_copy_entry_unlink(copy, entry) \
1282 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1284 #if MACH_ASSERT && TASK_SWAPPER
1286 * vm_map_res_reference:
1288 * Adds another valid residence count to the given map.
1290 * Map is locked so this function can be called from
1294 void vm_map_res_reference(vm_map_t map
)
1296 /* assert map is locked */
1297 assert(map
->res_count
>= 0);
1298 assert(map
->map_refcnt
>= map
->res_count
);
1299 if (map
->res_count
== 0) {
1300 lck_mtx_unlock(&map
->s_lock
);
1303 lck_mtx_lock(&map
->s_lock
);
1311 * vm_map_reference_swap:
1313 * Adds valid reference and residence counts to the given map.
1315 * The map may not be in memory (i.e. zero residence count).
1318 void vm_map_reference_swap(vm_map_t map
)
1320 assert(map
!= VM_MAP_NULL
);
1321 lck_mtx_lock(&map
->s_lock
);
1322 assert(map
->res_count
>= 0);
1323 assert(map
->map_refcnt
>= map
->res_count
);
1325 vm_map_res_reference(map
);
1326 lck_mtx_unlock(&map
->s_lock
);
1330 * vm_map_res_deallocate:
1332 * Decrement residence count on a map; possibly causing swapout.
1334 * The map must be in memory (i.e. non-zero residence count).
1336 * The map is locked, so this function is callable from vm_map_deallocate.
1339 void vm_map_res_deallocate(vm_map_t map
)
1341 assert(map
->res_count
> 0);
1342 if (--map
->res_count
== 0) {
1343 lck_mtx_unlock(&map
->s_lock
);
1345 vm_map_swapout(map
);
1347 lck_mtx_lock(&map
->s_lock
);
1349 assert(map
->map_refcnt
>= map
->res_count
);
1351 #endif /* MACH_ASSERT && TASK_SWAPPER */
1356 * Actually destroy a map.
1365 /* final cleanup: no need to unnest shared region */
1366 flags
|= VM_MAP_REMOVE_NO_UNNESTING
;
1367 /* final cleanup: ok to remove immutable mappings */
1368 flags
|= VM_MAP_REMOVE_IMMUTABLE
;
1369 /* final cleanup: allow gaps in range */
1370 flags
|= VM_MAP_REMOVE_GAPS_OK
;
1372 /* clean up regular map entries */
1373 (void) vm_map_delete(map
, map
->min_offset
, map
->max_offset
,
1374 flags
, VM_MAP_NULL
);
1375 /* clean up leftover special mappings (commpage, etc...) */
1376 #if !defined(__arm__) && !defined(__arm64__)
1377 (void) vm_map_delete(map
, 0x0, 0xFFFFFFFFFFFFF000ULL
,
1378 flags
, VM_MAP_NULL
);
1379 #endif /* !__arm__ && !__arm64__ */
1381 vm_map_disable_hole_optimization(map
);
1382 vm_map_corpse_footprint_destroy(map
);
1386 assert(map
->hdr
.nentries
== 0);
1389 pmap_destroy(map
->pmap
);
1391 if (vm_map_lck_attr
.lck_attr_val
& LCK_ATTR_DEBUG
) {
1393 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1394 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1395 * structure or kalloc'ed via lck_mtx_init.
1396 * An example is s_lock_ext within struct _vm_map.
1398 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1399 * can add another tag to detect embedded vs alloc'ed indirect external
1400 * mutexes but that'll be additional checks in the lock path and require
1401 * updating dependencies for the old vs new tag.
1403 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1404 * just when lock debugging is ON, we choose to forego explicitly destroying
1405 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1406 * count on vm_map_lck_grp, which has no serious side-effect.
1409 lck_rw_destroy(&(map
)->lock
, &vm_map_lck_grp
);
1410 lck_mtx_destroy(&(map
)->s_lock
, &vm_map_lck_grp
);
1413 zfree(vm_map_zone
, map
);
1417 * Returns pid of the task with the largest number of VM map entries.
1418 * Used in the zone-map-exhaustion jetsam path.
1421 find_largest_process_vm_map_entries(void)
1423 pid_t victim_pid
= -1;
1424 int max_vm_map_entries
= 0;
1425 task_t task
= TASK_NULL
;
1426 queue_head_t
*task_list
= &tasks
;
1428 lck_mtx_lock(&tasks_threads_lock
);
1429 queue_iterate(task_list
, task
, task_t
, tasks
) {
1430 if (task
== kernel_task
|| !task
->active
)
1433 vm_map_t task_map
= task
->map
;
1434 if (task_map
!= VM_MAP_NULL
) {
1435 int task_vm_map_entries
= task_map
->hdr
.nentries
;
1436 if (task_vm_map_entries
> max_vm_map_entries
) {
1437 max_vm_map_entries
= task_vm_map_entries
;
1438 victim_pid
= pid_from_task(task
);
1442 lck_mtx_unlock(&tasks_threads_lock
);
1444 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid
, max_vm_map_entries
);
1450 * vm_map_swapin/vm_map_swapout
1452 * Swap a map in and out, either referencing or releasing its resources.
1453 * These functions are internal use only; however, they must be exported
1454 * because they may be called from macros, which are exported.
1456 * In the case of swapout, there could be races on the residence count,
1457 * so if the residence count is up, we return, assuming that a
1458 * vm_map_deallocate() call in the near future will bring us back.
1461 * -- We use the map write lock for synchronization among races.
1462 * -- The map write lock, and not the simple s_lock, protects the
1463 * swap state of the map.
1464 * -- If a map entry is a share map, then we hold both locks, in
1465 * hierarchical order.
1467 * Synchronization Notes:
1468 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1469 * will block on the map lock and proceed when swapout is through.
1470 * 2) A vm_map_reference() call at this time is illegal, and will
1471 * cause a panic. vm_map_reference() is only allowed on resident
1472 * maps, since it refuses to block.
1473 * 3) A vm_map_swapin() call during a swapin will block, and
1474 * proceeed when the first swapin is done, turning into a nop.
1475 * This is the reason the res_count is not incremented until
1476 * after the swapin is complete.
1477 * 4) There is a timing hole after the checks of the res_count, before
1478 * the map lock is taken, during which a swapin may get the lock
1479 * before a swapout about to happen. If this happens, the swapin
1480 * will detect the state and increment the reference count, causing
1481 * the swapout to be a nop, thereby delaying it until a later
1482 * vm_map_deallocate. If the swapout gets the lock first, then
1483 * the swapin will simply block until the swapout is done, and
1486 * Because vm_map_swapin() is potentially an expensive operation, it
1487 * should be used with caution.
1490 * 1) A map with a residence count of zero is either swapped, or
1492 * 2) A map with a non-zero residence count is either resident,
1493 * or being swapped in.
1496 int vm_map_swap_enable
= 1;
1498 void vm_map_swapin (vm_map_t map
)
1500 vm_map_entry_t entry
;
1502 if (!vm_map_swap_enable
) /* debug */
1507 * First deal with various races.
1509 if (map
->sw_state
== MAP_SW_IN
)
1511 * we raced with swapout and won. Returning will incr.
1512 * the res_count, turning the swapout into a nop.
1517 * The residence count must be zero. If we raced with another
1518 * swapin, the state would have been IN; if we raced with a
1519 * swapout (after another competing swapin), we must have lost
1520 * the race to get here (see above comment), in which case
1521 * res_count is still 0.
1523 assert(map
->res_count
== 0);
1526 * There are no intermediate states of a map going out or
1527 * coming in, since the map is locked during the transition.
1529 assert(map
->sw_state
== MAP_SW_OUT
);
1532 * We now operate upon each map entry. If the entry is a sub-
1533 * or share-map, we call vm_map_res_reference upon it.
1534 * If the entry is an object, we call vm_object_res_reference
1535 * (this may iterate through the shadow chain).
1536 * Note that we hold the map locked the entire time,
1537 * even if we get back here via a recursive call in
1538 * vm_map_res_reference.
1540 entry
= vm_map_first_entry(map
);
1542 while (entry
!= vm_map_to_entry(map
)) {
1543 if (VME_OBJECT(entry
) != VM_OBJECT_NULL
) {
1544 if (entry
->is_sub_map
) {
1545 vm_map_t lmap
= VME_SUBMAP(entry
);
1546 lck_mtx_lock(&lmap
->s_lock
);
1547 vm_map_res_reference(lmap
);
1548 lck_mtx_unlock(&lmap
->s_lock
);
1550 vm_object_t object
= VME_OBEJCT(entry
);
1551 vm_object_lock(object
);
1553 * This call may iterate through the
1556 vm_object_res_reference(object
);
1557 vm_object_unlock(object
);
1560 entry
= entry
->vme_next
;
1562 assert(map
->sw_state
== MAP_SW_OUT
);
1563 map
->sw_state
= MAP_SW_IN
;
1566 void vm_map_swapout(vm_map_t map
)
1568 vm_map_entry_t entry
;
1572 * First deal with various races.
1573 * If we raced with a swapin and lost, the residence count
1574 * will have been incremented to 1, and we simply return.
1576 lck_mtx_lock(&map
->s_lock
);
1577 if (map
->res_count
!= 0) {
1578 lck_mtx_unlock(&map
->s_lock
);
1581 lck_mtx_unlock(&map
->s_lock
);
1584 * There are no intermediate states of a map going out or
1585 * coming in, since the map is locked during the transition.
1587 assert(map
->sw_state
== MAP_SW_IN
);
1589 if (!vm_map_swap_enable
)
1593 * We now operate upon each map entry. If the entry is a sub-
1594 * or share-map, we call vm_map_res_deallocate upon it.
1595 * If the entry is an object, we call vm_object_res_deallocate
1596 * (this may iterate through the shadow chain).
1597 * Note that we hold the map locked the entire time,
1598 * even if we get back here via a recursive call in
1599 * vm_map_res_deallocate.
1601 entry
= vm_map_first_entry(map
);
1603 while (entry
!= vm_map_to_entry(map
)) {
1604 if (VME_OBJECT(entry
) != VM_OBJECT_NULL
) {
1605 if (entry
->is_sub_map
) {
1606 vm_map_t lmap
= VME_SUBMAP(entry
);
1607 lck_mtx_lock(&lmap
->s_lock
);
1608 vm_map_res_deallocate(lmap
);
1609 lck_mtx_unlock(&lmap
->s_lock
);
1611 vm_object_t object
= VME_OBJECT(entry
);
1612 vm_object_lock(object
);
1614 * This call may take a long time,
1615 * since it could actively push
1616 * out pages (if we implement it
1619 vm_object_res_deallocate(object
);
1620 vm_object_unlock(object
);
1623 entry
= entry
->vme_next
;
1625 assert(map
->sw_state
== MAP_SW_IN
);
1626 map
->sw_state
= MAP_SW_OUT
;
1629 #endif /* TASK_SWAPPER */
1632 * vm_map_lookup_entry: [ internal use only ]
1634 * Calls into the vm map store layer to find the map
1635 * entry containing (or immediately preceding) the
1636 * specified address in the given map; the entry is returned
1637 * in the "entry" parameter. The boolean
1638 * result indicates whether the address is
1639 * actually contained in the map.
1642 vm_map_lookup_entry(
1644 vm_map_offset_t address
,
1645 vm_map_entry_t
*entry
) /* OUT */
1647 return ( vm_map_store_lookup_entry( map
, address
, entry
));
1651 * Routine: vm_map_find_space
1653 * Allocate a range in the specified virtual address map,
1654 * returning the entry allocated for that range.
1655 * Used by kmem_alloc, etc.
1657 * The map must be NOT be locked. It will be returned locked
1658 * on KERN_SUCCESS, unlocked on failure.
1660 * If an entry is allocated, the object/offset fields
1661 * are initialized to zero.
1666 vm_map_offset_t
*address
, /* OUT */
1668 vm_map_offset_t mask
,
1670 vm_map_kernel_flags_t vmk_flags
,
1672 vm_map_entry_t
*o_entry
) /* OUT */
1674 vm_map_entry_t entry
, new_entry
;
1675 vm_map_offset_t start
;
1676 vm_map_offset_t end
;
1677 vm_map_entry_t hole_entry
;
1681 return KERN_INVALID_ARGUMENT
;
1684 if (vmk_flags
.vmkf_guard_after
) {
1685 /* account for the back guard page in the size */
1686 size
+= VM_MAP_PAGE_SIZE(map
);
1689 new_entry
= vm_map_entry_create(map
, FALSE
);
1692 * Look for the first possible address; if there's already
1693 * something at this address, we have to start after it.
1698 if( map
->disable_vmentry_reuse
== TRUE
) {
1699 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
1701 if (map
->holelistenabled
) {
1702 hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
1704 if (hole_entry
== NULL
) {
1706 * No more space in the map?
1708 vm_map_entry_dispose(map
, new_entry
);
1710 return(KERN_NO_SPACE
);
1714 start
= entry
->vme_start
;
1716 assert(first_free_is_valid(map
));
1717 if ((entry
= map
->first_free
) == vm_map_to_entry(map
))
1718 start
= map
->min_offset
;
1720 start
= entry
->vme_end
;
1725 * In any case, the "entry" always precedes
1726 * the proposed new region throughout the loop:
1730 vm_map_entry_t next
;
1733 * Find the end of the proposed new region.
1734 * Be sure we didn't go beyond the end, or
1735 * wrap around the address.
1738 if (vmk_flags
.vmkf_guard_before
) {
1739 /* reserve space for the front guard page */
1740 start
+= VM_MAP_PAGE_SIZE(map
);
1742 end
= ((start
+ mask
) & ~mask
);
1745 vm_map_entry_dispose(map
, new_entry
);
1747 return(KERN_NO_SPACE
);
1750 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
1752 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
1754 if ((end
> map
->max_offset
) || (end
< start
)) {
1755 vm_map_entry_dispose(map
, new_entry
);
1757 return(KERN_NO_SPACE
);
1760 next
= entry
->vme_next
;
1762 if (map
->holelistenabled
) {
1763 if (entry
->vme_end
>= end
)
1767 * If there are no more entries, we must win.
1771 * If there is another entry, it must be
1772 * after the end of the potential new region.
1775 if (next
== vm_map_to_entry(map
))
1778 if (next
->vme_start
>= end
)
1783 * Didn't fit -- move to the next entry.
1788 if (map
->holelistenabled
) {
1789 if (entry
== CAST_TO_VM_MAP_ENTRY(map
->holes_list
)) {
1793 vm_map_entry_dispose(map
, new_entry
);
1795 return(KERN_NO_SPACE
);
1797 start
= entry
->vme_start
;
1799 start
= entry
->vme_end
;
1803 if (map
->holelistenabled
) {
1804 if (vm_map_lookup_entry(map
, entry
->vme_start
, &entry
)) {
1805 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry
, (unsigned long long)entry
->vme_start
);
1811 * "start" and "end" should define the endpoints of the
1812 * available new range, and
1813 * "entry" should refer to the region before the new
1816 * the map should be locked.
1819 if (vmk_flags
.vmkf_guard_before
) {
1820 /* go back for the front guard page */
1821 start
-= VM_MAP_PAGE_SIZE(map
);
1825 assert(start
< end
);
1826 new_entry
->vme_start
= start
;
1827 new_entry
->vme_end
= end
;
1828 assert(page_aligned(new_entry
->vme_start
));
1829 assert(page_aligned(new_entry
->vme_end
));
1830 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_start
,
1831 VM_MAP_PAGE_MASK(map
)));
1832 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_end
,
1833 VM_MAP_PAGE_MASK(map
)));
1835 new_entry
->is_shared
= FALSE
;
1836 new_entry
->is_sub_map
= FALSE
;
1837 new_entry
->use_pmap
= TRUE
;
1838 VME_OBJECT_SET(new_entry
, VM_OBJECT_NULL
);
1839 VME_OFFSET_SET(new_entry
, (vm_object_offset_t
) 0);
1841 new_entry
->needs_copy
= FALSE
;
1843 new_entry
->inheritance
= VM_INHERIT_DEFAULT
;
1844 new_entry
->protection
= VM_PROT_DEFAULT
;
1845 new_entry
->max_protection
= VM_PROT_ALL
;
1846 new_entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
1847 new_entry
->wired_count
= 0;
1848 new_entry
->user_wired_count
= 0;
1850 new_entry
->in_transition
= FALSE
;
1851 new_entry
->needs_wakeup
= FALSE
;
1852 new_entry
->no_cache
= FALSE
;
1853 new_entry
->permanent
= FALSE
;
1854 new_entry
->superpage_size
= FALSE
;
1855 if (VM_MAP_PAGE_SHIFT(map
) != PAGE_SHIFT
) {
1856 new_entry
->map_aligned
= TRUE
;
1858 new_entry
->map_aligned
= FALSE
;
1861 new_entry
->used_for_jit
= FALSE
;
1862 new_entry
->pmap_cs_associated
= FALSE
;
1863 new_entry
->zero_wired_pages
= FALSE
;
1864 new_entry
->iokit_acct
= FALSE
;
1865 new_entry
->vme_resilient_codesign
= FALSE
;
1866 new_entry
->vme_resilient_media
= FALSE
;
1867 if (vmk_flags
.vmkf_atomic_entry
)
1868 new_entry
->vme_atomic
= TRUE
;
1870 new_entry
->vme_atomic
= FALSE
;
1872 VME_ALIAS_SET(new_entry
, tag
);
1875 * Insert the new entry into the list
1878 vm_map_store_entry_link(map
, entry
, new_entry
, VM_MAP_KERNEL_FLAGS_NONE
);
1883 * Update the lookup hint
1885 SAVE_HINT_MAP_WRITE(map
, new_entry
);
1887 *o_entry
= new_entry
;
1888 return(KERN_SUCCESS
);
1891 int vm_map_pmap_enter_print
= FALSE
;
1892 int vm_map_pmap_enter_enable
= FALSE
;
1895 * Routine: vm_map_pmap_enter [internal only]
1898 * Force pages from the specified object to be entered into
1899 * the pmap at the specified address if they are present.
1900 * As soon as a page not found in the object the scan ends.
1905 * In/out conditions:
1906 * The source map should not be locked on entry.
1908 __unused
static void
1911 vm_map_offset_t addr
,
1912 vm_map_offset_t end_addr
,
1914 vm_object_offset_t offset
,
1915 vm_prot_t protection
)
1919 struct vm_object_fault_info fault_info
= {};
1924 while (addr
< end_addr
) {
1930 * From vm_map_enter(), we come into this function without the map
1931 * lock held or the object lock held.
1932 * We haven't taken a reference on the object either.
1933 * We should do a proper lookup on the map to make sure
1934 * that things are sane before we go locking objects that
1935 * could have been deallocated from under us.
1938 vm_object_lock(object
);
1940 m
= vm_page_lookup(object
, offset
);
1942 if (m
== VM_PAGE_NULL
|| m
->vmp_busy
|| m
->vmp_fictitious
||
1943 (m
->vmp_unusual
&& ( m
->vmp_error
|| m
->vmp_restart
|| m
->vmp_absent
))) {
1944 vm_object_unlock(object
);
1948 if (vm_map_pmap_enter_print
) {
1949 printf("vm_map_pmap_enter:");
1950 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1951 map
, (unsigned long long)addr
, object
, (unsigned long long)offset
);
1953 type_of_fault
= DBG_CACHE_HIT_FAULT
;
1954 kr
= vm_fault_enter(m
, map
->pmap
,
1955 addr
, protection
, protection
,
1957 FALSE
, /* change_wiring */
1958 VM_KERN_MEMORY_NONE
, /* tag - not wiring */
1960 NULL
, /* need_retry */
1963 vm_object_unlock(object
);
1965 offset
+= PAGE_SIZE_64
;
1970 boolean_t
vm_map_pmap_is_empty(
1972 vm_map_offset_t start
,
1973 vm_map_offset_t end
);
1974 boolean_t
vm_map_pmap_is_empty(
1976 vm_map_offset_t start
,
1977 vm_map_offset_t end
)
1979 #ifdef MACHINE_PMAP_IS_EMPTY
1980 return pmap_is_empty(map
->pmap
, start
, end
);
1981 #else /* MACHINE_PMAP_IS_EMPTY */
1982 vm_map_offset_t offset
;
1985 if (map
->pmap
== NULL
) {
1989 for (offset
= start
;
1991 offset
+= PAGE_SIZE
) {
1992 phys_page
= pmap_find_phys(map
->pmap
, offset
);
1994 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1995 "page %d at 0x%llx\n",
1996 map
, (long long)start
, (long long)end
,
1997 phys_page
, (long long)offset
);
2002 #endif /* MACHINE_PMAP_IS_EMPTY */
2005 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2007 vm_map_random_address_for_size(
2009 vm_map_offset_t
*address
,
2012 kern_return_t kr
= KERN_SUCCESS
;
2014 vm_map_offset_t random_addr
= 0;
2015 vm_map_offset_t hole_end
;
2017 vm_map_entry_t next_entry
= VM_MAP_ENTRY_NULL
;
2018 vm_map_entry_t prev_entry
= VM_MAP_ENTRY_NULL
;
2019 vm_map_size_t vm_hole_size
= 0;
2020 vm_map_size_t addr_space_size
;
2022 addr_space_size
= vm_map_max(map
) - vm_map_min(map
);
2024 assert(page_aligned(size
));
2026 while (tries
< MAX_TRIES_TO_GET_RANDOM_ADDRESS
) {
2027 random_addr
= ((vm_map_offset_t
)random()) << PAGE_SHIFT
;
2028 random_addr
= vm_map_trunc_page(
2029 vm_map_min(map
) +(random_addr
% addr_space_size
),
2030 VM_MAP_PAGE_MASK(map
));
2032 if (vm_map_lookup_entry(map
, random_addr
, &prev_entry
) == FALSE
) {
2033 if (prev_entry
== vm_map_to_entry(map
)) {
2034 next_entry
= vm_map_first_entry(map
);
2036 next_entry
= prev_entry
->vme_next
;
2038 if (next_entry
== vm_map_to_entry(map
)) {
2039 hole_end
= vm_map_max(map
);
2041 hole_end
= next_entry
->vme_start
;
2043 vm_hole_size
= hole_end
- random_addr
;
2044 if (vm_hole_size
>= size
) {
2045 *address
= random_addr
;
2052 if (tries
== MAX_TRIES_TO_GET_RANDOM_ADDRESS
) {
2059 vm_memory_malloc_no_cow(
2062 uint64_t alias_mask
;
2064 alias_mask
= 1ULL << alias
;
2065 if (alias_mask
& vm_memory_malloc_no_cow_mask
) {
2072 * Routine: vm_map_enter
2075 * Allocate a range in the specified virtual address map.
2076 * The resulting range will refer to memory defined by
2077 * the given memory object and offset into that object.
2079 * Arguments are as defined in the vm_map call.
2081 int _map_enter_debug
= 0;
2082 static unsigned int vm_map_enter_restore_successes
= 0;
2083 static unsigned int vm_map_enter_restore_failures
= 0;
2087 vm_map_offset_t
*address
, /* IN/OUT */
2089 vm_map_offset_t mask
,
2091 vm_map_kernel_flags_t vmk_flags
,
2094 vm_object_offset_t offset
,
2095 boolean_t needs_copy
,
2096 vm_prot_t cur_protection
,
2097 vm_prot_t max_protection
,
2098 vm_inherit_t inheritance
)
2100 vm_map_entry_t entry
, new_entry
;
2101 vm_map_offset_t start
, tmp_start
, tmp_offset
;
2102 vm_map_offset_t end
, tmp_end
;
2103 vm_map_offset_t tmp2_start
, tmp2_end
;
2104 vm_map_offset_t desired_empty_end
;
2105 vm_map_offset_t step
;
2106 kern_return_t result
= KERN_SUCCESS
;
2107 vm_map_t zap_old_map
= VM_MAP_NULL
;
2108 vm_map_t zap_new_map
= VM_MAP_NULL
;
2109 boolean_t map_locked
= FALSE
;
2110 boolean_t pmap_empty
= TRUE
;
2111 boolean_t new_mapping_established
= FALSE
;
2112 boolean_t keep_map_locked
= vmk_flags
.vmkf_keep_map_locked
;
2113 boolean_t anywhere
= ((flags
& VM_FLAGS_ANYWHERE
) != 0);
2114 boolean_t purgable
= ((flags
& VM_FLAGS_PURGABLE
) != 0);
2115 boolean_t overwrite
= ((flags
& VM_FLAGS_OVERWRITE
) != 0);
2116 boolean_t no_cache
= ((flags
& VM_FLAGS_NO_CACHE
) != 0);
2117 boolean_t is_submap
= vmk_flags
.vmkf_submap
;
2118 boolean_t permanent
= vmk_flags
.vmkf_permanent
;
2119 boolean_t entry_for_jit
= vmk_flags
.vmkf_map_jit
;
2120 boolean_t iokit_acct
= vmk_flags
.vmkf_iokit_acct
;
2121 boolean_t resilient_codesign
= ((flags
& VM_FLAGS_RESILIENT_CODESIGN
) != 0);
2122 boolean_t resilient_media
= ((flags
& VM_FLAGS_RESILIENT_MEDIA
) != 0);
2123 boolean_t random_address
= ((flags
& VM_FLAGS_RANDOM_ADDR
) != 0);
2124 unsigned int superpage_size
= ((flags
& VM_FLAGS_SUPERPAGE_MASK
) >> VM_FLAGS_SUPERPAGE_SHIFT
);
2125 vm_tag_t user_alias
;
2126 vm_map_offset_t effective_min_offset
, effective_max_offset
;
2128 boolean_t clear_map_aligned
= FALSE
;
2129 vm_map_entry_t hole_entry
;
2130 vm_map_size_t chunk_size
= 0;
2132 assertf(vmk_flags
.__vmkf_unused
== 0, "vmk_flags unused=0x%x\n", vmk_flags
.__vmkf_unused
);
2134 if (flags
& VM_FLAGS_4GB_CHUNK
) {
2135 #if defined(__LP64__)
2136 chunk_size
= (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2137 #else /* __LP64__ */
2138 chunk_size
= ANON_CHUNK_SIZE
;
2139 #endif /* __LP64__ */
2141 chunk_size
= ANON_CHUNK_SIZE
;
2144 if (superpage_size
) {
2145 switch (superpage_size
) {
2147 * Note that the current implementation only supports
2148 * a single size for superpages, SUPERPAGE_SIZE, per
2149 * architecture. As soon as more sizes are supposed
2150 * to be supported, SUPERPAGE_SIZE has to be replaced
2151 * with a lookup of the size depending on superpage_size.
2154 case SUPERPAGE_SIZE_ANY
:
2155 /* handle it like 2 MB and round up to page size */
2156 size
= (size
+ 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
2157 case SUPERPAGE_SIZE_2MB
:
2161 return KERN_INVALID_ARGUMENT
;
2163 mask
= SUPERPAGE_SIZE
-1;
2164 if (size
& (SUPERPAGE_SIZE
-1))
2165 return KERN_INVALID_ARGUMENT
;
2166 inheritance
= VM_INHERIT_NONE
; /* fork() children won't inherit superpages */
2170 if ((cur_protection
& VM_PROT_WRITE
) &&
2171 (cur_protection
& VM_PROT_EXECUTE
) &&
2172 #if !CONFIG_EMBEDDED
2173 map
!= kernel_map
&&
2174 (cs_process_global_enforcement() ||
2175 (vmk_flags
.vmkf_cs_enforcement_override
2176 ? vmk_flags
.vmkf_cs_enforcement
2177 : cs_process_enforcement(NULL
))) &&
2178 #endif /* !CONFIG_EMBEDDED */
2183 vm_prot_t
, cur_protection
);
2184 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
2185 #if VM_PROTECT_WX_FAIL
2187 #else /* VM_PROTECT_WX_FAIL */
2188 "turning off execute\n",
2189 #endif /* VM_PROTECT_WX_FAIL */
2191 (current_task()->bsd_info
2192 ? proc_name_address(current_task()->bsd_info
)
2195 cur_protection
&= ~VM_PROT_EXECUTE
;
2196 #if VM_PROTECT_WX_FAIL
2197 return KERN_PROTECTION_FAILURE
;
2198 #endif /* VM_PROTECT_WX_FAIL */
2202 * If the task has requested executable lockdown,
2203 * deny any new executable mapping.
2205 if (map
->map_disallow_new_exec
== TRUE
) {
2206 if (cur_protection
& VM_PROT_EXECUTE
) {
2207 return KERN_PROTECTION_FAILURE
;
2211 if (resilient_codesign
|| resilient_media
) {
2212 if ((cur_protection
& (VM_PROT_WRITE
| VM_PROT_EXECUTE
)) ||
2213 (max_protection
& (VM_PROT_WRITE
| VM_PROT_EXECUTE
))) {
2214 return KERN_PROTECTION_FAILURE
;
2220 /* submaps can not be purgeable */
2221 return KERN_INVALID_ARGUMENT
;
2223 if (object
== VM_OBJECT_NULL
) {
2224 /* submaps can not be created lazily */
2225 return KERN_INVALID_ARGUMENT
;
2228 if (vmk_flags
.vmkf_already
) {
2230 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2231 * is already present. For it to be meaningul, the requested
2232 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2233 * we shouldn't try and remove what was mapped there first
2234 * (!VM_FLAGS_OVERWRITE).
2236 if ((flags
& VM_FLAGS_ANYWHERE
) ||
2237 (flags
& VM_FLAGS_OVERWRITE
)) {
2238 return KERN_INVALID_ARGUMENT
;
2242 effective_min_offset
= map
->min_offset
;
2244 if (vmk_flags
.vmkf_beyond_max
) {
2246 * Allow an insertion beyond the map's max offset.
2248 #if !defined(__arm__) && !defined(__arm64__)
2249 if (vm_map_is_64bit(map
))
2250 effective_max_offset
= 0xFFFFFFFFFFFFF000ULL
;
2252 #endif /* __arm__ */
2253 effective_max_offset
= 0x00000000FFFFF000ULL
;
2255 effective_max_offset
= map
->max_offset
;
2259 (offset
& PAGE_MASK_64
) != 0) {
2261 return KERN_INVALID_ARGUMENT
;
2264 if (map
->pmap
== kernel_pmap
) {
2265 user_alias
= VM_KERN_MEMORY_NONE
;
2270 #define RETURN(value) { result = value; goto BailOut; }
2272 assert(page_aligned(*address
));
2273 assert(page_aligned(size
));
2275 if (!VM_MAP_PAGE_ALIGNED(size
, VM_MAP_PAGE_MASK(map
))) {
2277 * In most cases, the caller rounds the size up to the
2279 * If we get a size that is explicitly not map-aligned here,
2280 * we'll have to respect the caller's wish and mark the
2281 * mapping as "not map-aligned" to avoid tripping the
2282 * map alignment checks later.
2284 clear_map_aligned
= TRUE
;
2287 !VM_MAP_PAGE_ALIGNED(*address
, VM_MAP_PAGE_MASK(map
))) {
2289 * We've been asked to map at a fixed address and that
2290 * address is not aligned to the map's specific alignment.
2291 * The caller should know what it's doing (i.e. most likely
2292 * mapping some fragmented copy map, transferring memory from
2293 * a VM map with a different alignment), so clear map_aligned
2294 * for this new VM map entry and proceed.
2296 clear_map_aligned
= TRUE
;
2300 * Only zero-fill objects are allowed to be purgable.
2301 * LP64todo - limit purgable objects to 32-bits for now
2305 (object
!= VM_OBJECT_NULL
&&
2306 (object
->vo_size
!= size
||
2307 object
->purgable
== VM_PURGABLE_DENY
))
2308 || size
> ANON_MAX_SIZE
)) /* LP64todo: remove when dp capable */
2309 return KERN_INVALID_ARGUMENT
;
2311 if (!anywhere
&& overwrite
) {
2313 * Create a temporary VM map to hold the old mappings in the
2314 * affected area while we create the new one.
2315 * This avoids releasing the VM map lock in
2316 * vm_map_entry_delete() and allows atomicity
2317 * when we want to replace some mappings with a new one.
2318 * It also allows us to restore the old VM mappings if the
2319 * new mapping fails.
2321 zap_old_map
= vm_map_create(PMAP_NULL
,
2324 map
->hdr
.entries_pageable
);
2325 vm_map_set_page_shift(zap_old_map
, VM_MAP_PAGE_SHIFT(map
));
2326 vm_map_disable_hole_optimization(zap_old_map
);
2337 if (entry_for_jit
) {
2339 if (map
->jit_entry_exists
) {
2340 result
= KERN_INVALID_ARGUMENT
;
2343 random_address
= TRUE
;
2344 #endif /* CONFIG_EMBEDDED */
2347 if (random_address
) {
2349 * Get a random start address.
2351 result
= vm_map_random_address_for_size(map
, address
, size
);
2352 if (result
!= KERN_SUCCESS
) {
2358 else if ((start
== 0 || start
== vm_map_min(map
)) &&
2359 !map
->disable_vmentry_reuse
&&
2360 map
->vmmap_high_start
!= 0) {
2361 start
= map
->vmmap_high_start
;
2363 #endif /* __x86_64__ */
2367 * Calculate the first possible address.
2370 if (start
< effective_min_offset
)
2371 start
= effective_min_offset
;
2372 if (start
> effective_max_offset
)
2373 RETURN(KERN_NO_SPACE
);
2376 * Look for the first possible address;
2377 * if there's already something at this
2378 * address, we have to start after it.
2381 if( map
->disable_vmentry_reuse
== TRUE
) {
2382 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
2385 if (map
->holelistenabled
) {
2386 hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
2388 if (hole_entry
== NULL
) {
2390 * No more space in the map?
2392 result
= KERN_NO_SPACE
;
2396 boolean_t found_hole
= FALSE
;
2399 if (hole_entry
->vme_start
>= start
) {
2400 start
= hole_entry
->vme_start
;
2405 if (hole_entry
->vme_end
> start
) {
2409 hole_entry
= hole_entry
->vme_next
;
2411 } while (hole_entry
!= CAST_TO_VM_MAP_ENTRY(map
->holes_list
));
2413 if (found_hole
== FALSE
) {
2414 result
= KERN_NO_SPACE
;
2421 start
+= PAGE_SIZE_64
;
2424 assert(first_free_is_valid(map
));
2426 entry
= map
->first_free
;
2428 if (entry
== vm_map_to_entry(map
)) {
2431 if (entry
->vme_next
== vm_map_to_entry(map
)){
2433 * Hole at the end of the map.
2437 if (start
< (entry
->vme_next
)->vme_start
) {
2438 start
= entry
->vme_end
;
2439 start
= vm_map_round_page(start
,
2440 VM_MAP_PAGE_MASK(map
));
2443 * Need to do a lookup.
2450 if (entry
== NULL
) {
2451 vm_map_entry_t tmp_entry
;
2452 if (vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
2453 assert(!entry_for_jit
);
2454 start
= tmp_entry
->vme_end
;
2455 start
= vm_map_round_page(start
,
2456 VM_MAP_PAGE_MASK(map
));
2464 * In any case, the "entry" always precedes
2465 * the proposed new region throughout the
2470 vm_map_entry_t next
;
2473 * Find the end of the proposed new region.
2474 * Be sure we didn't go beyond the end, or
2475 * wrap around the address.
2478 end
= ((start
+ mask
) & ~mask
);
2479 end
= vm_map_round_page(end
,
2480 VM_MAP_PAGE_MASK(map
));
2482 RETURN(KERN_NO_SPACE
);
2484 assert(VM_MAP_PAGE_ALIGNED(start
,
2485 VM_MAP_PAGE_MASK(map
)));
2488 /* We want an entire page of empty space, but don't increase the allocation size. */
2489 desired_empty_end
= vm_map_round_page(end
, VM_MAP_PAGE_MASK(map
));
2491 if ((desired_empty_end
> effective_max_offset
) || (desired_empty_end
< start
)) {
2492 if (map
->wait_for_space
) {
2493 assert(!keep_map_locked
);
2494 if (size
<= (effective_max_offset
-
2495 effective_min_offset
)) {
2496 assert_wait((event_t
)map
,
2500 thread_block(THREAD_CONTINUE_NULL
);
2504 RETURN(KERN_NO_SPACE
);
2507 next
= entry
->vme_next
;
2509 if (map
->holelistenabled
) {
2510 if (entry
->vme_end
>= desired_empty_end
)
2514 * If there are no more entries, we must win.
2518 * If there is another entry, it must be
2519 * after the end of the potential new region.
2522 if (next
== vm_map_to_entry(map
))
2525 if (next
->vme_start
>= desired_empty_end
)
2530 * Didn't fit -- move to the next entry.
2535 if (map
->holelistenabled
) {
2536 if (entry
== CAST_TO_VM_MAP_ENTRY(map
->holes_list
)) {
2540 result
= KERN_NO_SPACE
;
2543 start
= entry
->vme_start
;
2545 start
= entry
->vme_end
;
2548 start
= vm_map_round_page(start
,
2549 VM_MAP_PAGE_MASK(map
));
2552 if (map
->holelistenabled
) {
2553 if (vm_map_lookup_entry(map
, entry
->vme_start
, &entry
)) {
2554 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry
, (unsigned long long)entry
->vme_start
);
2559 assert(VM_MAP_PAGE_ALIGNED(*address
,
2560 VM_MAP_PAGE_MASK(map
)));
2564 * the address doesn't itself violate
2565 * the mask requirement.
2570 if ((start
& mask
) != 0)
2571 RETURN(KERN_NO_SPACE
);
2574 * ... the address is within bounds
2579 if ((start
< effective_min_offset
) ||
2580 (end
> effective_max_offset
) ||
2582 RETURN(KERN_INVALID_ADDRESS
);
2585 if (overwrite
&& zap_old_map
!= VM_MAP_NULL
) {
2588 * Fixed mapping and "overwrite" flag: attempt to
2589 * remove all existing mappings in the specified
2590 * address range, saving them in our "zap_old_map".
2592 remove_flags
= VM_MAP_REMOVE_SAVE_ENTRIES
;
2593 remove_flags
|= VM_MAP_REMOVE_NO_MAP_ALIGN
;
2594 if (vmk_flags
.vmkf_overwrite_immutable
) {
2595 /* we can overwrite immutable mappings */
2596 remove_flags
|= VM_MAP_REMOVE_IMMUTABLE
;
2598 (void) vm_map_delete(map
, start
, end
,
2604 * ... the starting address isn't allocated
2607 if (vm_map_lookup_entry(map
, start
, &entry
)) {
2608 if (! (vmk_flags
.vmkf_already
)) {
2609 RETURN(KERN_NO_SPACE
);
2612 * Check if what's already there is what we want.
2615 tmp_offset
= offset
;
2616 if (entry
->vme_start
< start
) {
2617 tmp_start
-= start
- entry
->vme_start
;
2618 tmp_offset
-= start
- entry
->vme_start
;
2621 for (; entry
->vme_start
< end
;
2622 entry
= entry
->vme_next
) {
2624 * Check if the mapping's attributes
2625 * match the existing map entry.
2627 if (entry
== vm_map_to_entry(map
) ||
2628 entry
->vme_start
!= tmp_start
||
2629 entry
->is_sub_map
!= is_submap
||
2630 VME_OFFSET(entry
) != tmp_offset
||
2631 entry
->needs_copy
!= needs_copy
||
2632 entry
->protection
!= cur_protection
||
2633 entry
->max_protection
!= max_protection
||
2634 entry
->inheritance
!= inheritance
||
2635 entry
->iokit_acct
!= iokit_acct
||
2636 VME_ALIAS(entry
) != alias
) {
2637 /* not the same mapping ! */
2638 RETURN(KERN_NO_SPACE
);
2641 * Check if the same object is being mapped.
2644 if (VME_SUBMAP(entry
) !=
2645 (vm_map_t
) object
) {
2646 /* not the same submap */
2647 RETURN(KERN_NO_SPACE
);
2650 if (VME_OBJECT(entry
) != object
) {
2651 /* not the same VM object... */
2654 obj2
= VME_OBJECT(entry
);
2655 if ((obj2
== VM_OBJECT_NULL
||
2657 (object
== VM_OBJECT_NULL
||
2658 object
->internal
)) {
2665 RETURN(KERN_NO_SPACE
);
2670 tmp_offset
+= entry
->vme_end
- entry
->vme_start
;
2671 tmp_start
+= entry
->vme_end
- entry
->vme_start
;
2672 if (entry
->vme_end
>= end
) {
2673 /* reached the end of our mapping */
2677 /* it all matches: let's use what's already there ! */
2678 RETURN(KERN_MEMORY_PRESENT
);
2682 * ... the next region doesn't overlap the
2686 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
2687 (entry
->vme_next
->vme_start
< end
))
2688 RETURN(KERN_NO_SPACE
);
2693 * "start" and "end" should define the endpoints of the
2694 * available new range, and
2695 * "entry" should refer to the region before the new
2698 * the map should be locked.
2702 * See whether we can avoid creating a new entry (and object) by
2703 * extending one of our neighbors. [So far, we only attempt to
2704 * extend from below.] Note that we can never extend/join
2705 * purgable objects because they need to remain distinct
2706 * entities in order to implement their "volatile object"
2712 vm_memory_malloc_no_cow(user_alias
)) {
2713 if (object
== VM_OBJECT_NULL
) {
2715 object
= vm_object_allocate(size
);
2716 object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
2717 object
->true_share
= FALSE
;
2720 object
->purgable
= VM_PURGABLE_NONVOLATILE
;
2721 if (map
->pmap
== kernel_pmap
) {
2723 * Purgeable mappings made in a kernel
2724 * map are "owned" by the kernel itself
2725 * rather than the current user task
2726 * because they're likely to be used by
2727 * more than this user task (see
2728 * execargs_purgeable_allocate(), for
2731 owner
= kernel_task
;
2733 owner
= current_task();
2735 assert(object
->vo_owner
== NULL
);
2736 assert(object
->resident_page_count
== 0);
2737 assert(object
->wired_page_count
== 0);
2738 vm_object_lock(object
);
2739 vm_purgeable_nonvolatile_enqueue(object
, owner
);
2740 vm_object_unlock(object
);
2742 offset
= (vm_object_offset_t
)0;
2744 } else if ((is_submap
== FALSE
) &&
2745 (object
== VM_OBJECT_NULL
) &&
2746 (entry
!= vm_map_to_entry(map
)) &&
2747 (entry
->vme_end
== start
) &&
2748 (!entry
->is_shared
) &&
2749 (!entry
->is_sub_map
) &&
2750 (!entry
->in_transition
) &&
2751 (!entry
->needs_wakeup
) &&
2752 (entry
->behavior
== VM_BEHAVIOR_DEFAULT
) &&
2753 (entry
->protection
== cur_protection
) &&
2754 (entry
->max_protection
== max_protection
) &&
2755 (entry
->inheritance
== inheritance
) &&
2756 ((user_alias
== VM_MEMORY_REALLOC
) ||
2757 (VME_ALIAS(entry
) == alias
)) &&
2758 (entry
->no_cache
== no_cache
) &&
2759 (entry
->permanent
== permanent
) &&
2760 /* no coalescing for immutable executable mappings */
2761 !((entry
->protection
& VM_PROT_EXECUTE
) &&
2762 entry
->permanent
) &&
2763 (!entry
->superpage_size
&& !superpage_size
) &&
2765 * No coalescing if not map-aligned, to avoid propagating
2766 * that condition any further than needed:
2768 (!entry
->map_aligned
|| !clear_map_aligned
) &&
2769 (!entry
->zero_wired_pages
) &&
2770 (!entry
->used_for_jit
&& !entry_for_jit
) &&
2771 (!entry
->pmap_cs_associated
) &&
2772 (entry
->iokit_acct
== iokit_acct
) &&
2773 (!entry
->vme_resilient_codesign
) &&
2774 (!entry
->vme_resilient_media
) &&
2775 (!entry
->vme_atomic
) &&
2777 ((entry
->vme_end
- entry
->vme_start
) + size
<=
2778 (user_alias
== VM_MEMORY_REALLOC
?
2780 NO_COALESCE_LIMIT
)) &&
2782 (entry
->wired_count
== 0)) { /* implies user_wired_count == 0 */
2783 if (vm_object_coalesce(VME_OBJECT(entry
),
2786 (vm_object_offset_t
) 0,
2787 (vm_map_size_t
)(entry
->vme_end
- entry
->vme_start
),
2788 (vm_map_size_t
)(end
- entry
->vme_end
))) {
2791 * Coalesced the two objects - can extend
2792 * the previous map entry to include the
2795 map
->size
+= (end
- entry
->vme_end
);
2796 assert(entry
->vme_start
< end
);
2797 assert(VM_MAP_PAGE_ALIGNED(end
,
2798 VM_MAP_PAGE_MASK(map
)));
2799 if (__improbable(vm_debug_events
))
2800 DTRACE_VM5(map_entry_extend
, vm_map_t
, map
, vm_map_entry_t
, entry
, vm_address_t
, entry
->vme_start
, vm_address_t
, entry
->vme_end
, vm_address_t
, end
);
2801 entry
->vme_end
= end
;
2802 if (map
->holelistenabled
) {
2803 vm_map_store_update_first_free(map
, entry
, TRUE
);
2805 vm_map_store_update_first_free(map
, map
->first_free
, TRUE
);
2807 new_mapping_established
= TRUE
;
2808 RETURN(KERN_SUCCESS
);
2812 step
= superpage_size
? SUPERPAGE_SIZE
: (end
- start
);
2815 for (tmp2_start
= start
; tmp2_start
<end
; tmp2_start
+= step
) {
2816 tmp2_end
= tmp2_start
+ step
;
2818 * Create a new entry
2821 * The reserved "page zero" in each process's address space can
2822 * be arbitrarily large. Splitting it into separate objects and
2823 * therefore different VM map entries serves no purpose and just
2824 * slows down operations on the VM map, so let's not split the
2825 * allocation into chunks if the max protection is NONE. That
2826 * memory should never be accessible, so it will never get to the
2829 tmp_start
= tmp2_start
;
2830 if (object
== VM_OBJECT_NULL
&&
2831 size
> chunk_size
&&
2832 max_protection
!= VM_PROT_NONE
&&
2833 superpage_size
== 0)
2834 tmp_end
= tmp_start
+ chunk_size
;
2838 new_entry
= vm_map_entry_insert(
2839 map
, entry
, tmp_start
, tmp_end
,
2840 object
, offset
, needs_copy
,
2842 cur_protection
, max_protection
,
2843 VM_BEHAVIOR_DEFAULT
,
2844 (entry_for_jit
)? VM_INHERIT_NONE
: inheritance
,
2854 assert((object
!= kernel_object
) || (VM_KERN_MEMORY_NONE
!= alias
));
2856 if (resilient_codesign
&&
2857 ! ((cur_protection
| max_protection
) &
2858 (VM_PROT_WRITE
| VM_PROT_EXECUTE
))) {
2859 new_entry
->vme_resilient_codesign
= TRUE
;
2862 if (resilient_media
&&
2863 ! ((cur_protection
| max_protection
) &
2864 (VM_PROT_WRITE
| VM_PROT_EXECUTE
))) {
2865 new_entry
->vme_resilient_media
= TRUE
;
2868 assert(!new_entry
->iokit_acct
);
2870 object
!= VM_OBJECT_NULL
&&
2871 (object
->purgable
!= VM_PURGABLE_DENY
||
2872 object
->vo_ledger_tag
)) {
2873 assert(new_entry
->use_pmap
);
2874 assert(!new_entry
->iokit_acct
);
2876 * Turn off pmap accounting since
2877 * purgeable (or tagged) objects have their
2880 new_entry
->use_pmap
= FALSE
;
2881 } else if (!is_submap
&&
2883 object
!= VM_OBJECT_NULL
&&
2885 /* alternate accounting */
2886 assert(!new_entry
->iokit_acct
);
2887 assert(new_entry
->use_pmap
);
2888 new_entry
->iokit_acct
= TRUE
;
2889 new_entry
->use_pmap
= FALSE
;
2891 vm_map_iokit_mapped_region
,
2893 vm_map_offset_t
, new_entry
->vme_start
,
2894 vm_map_offset_t
, new_entry
->vme_end
,
2895 int, VME_ALIAS(new_entry
));
2896 vm_map_iokit_mapped_region(
2898 (new_entry
->vme_end
-
2899 new_entry
->vme_start
));
2900 } else if (!is_submap
) {
2901 assert(!new_entry
->iokit_acct
);
2902 assert(new_entry
->use_pmap
);
2907 boolean_t submap_is_64bit
;
2910 assert(new_entry
->is_sub_map
);
2911 assert(!new_entry
->use_pmap
);
2912 assert(!new_entry
->iokit_acct
);
2913 submap
= (vm_map_t
) object
;
2914 submap_is_64bit
= vm_map_is_64bit(submap
);
2915 use_pmap
= (user_alias
== VM_MEMORY_SHARED_PMAP
);
2916 #ifndef NO_NESTED_PMAP
2917 if (use_pmap
&& submap
->pmap
== NULL
) {
2918 ledger_t ledger
= map
->pmap
->ledger
;
2919 /* we need a sub pmap to nest... */
2920 submap
->pmap
= pmap_create(ledger
, 0,
2922 if (submap
->pmap
== NULL
) {
2923 /* let's proceed without nesting... */
2925 #if defined(__arm__) || defined(__arm64__)
2927 pmap_set_nested(submap
->pmap
);
2931 if (use_pmap
&& submap
->pmap
!= NULL
) {
2932 kr
= pmap_nest(map
->pmap
,
2936 tmp_end
- tmp_start
);
2937 if (kr
!= KERN_SUCCESS
) {
2938 printf("vm_map_enter: "
2939 "pmap_nest(0x%llx,0x%llx) "
2941 (long long)tmp_start
,
2945 /* we're now nested ! */
2946 new_entry
->use_pmap
= TRUE
;
2950 #endif /* NO_NESTED_PMAP */
2954 if (superpage_size
) {
2956 vm_object_t sp_object
;
2957 vm_object_offset_t sp_offset
;
2959 VME_OFFSET_SET(entry
, 0);
2961 /* allocate one superpage */
2962 kr
= cpm_allocate(SUPERPAGE_SIZE
, &pages
, 0, SUPERPAGE_NBASEPAGES
-1, TRUE
, 0);
2963 if (kr
!= KERN_SUCCESS
) {
2964 /* deallocate whole range... */
2965 new_mapping_established
= TRUE
;
2966 /* ... but only up to "tmp_end" */
2967 size
-= end
- tmp_end
;
2971 /* create one vm_object per superpage */
2972 sp_object
= vm_object_allocate((vm_map_size_t
)(entry
->vme_end
- entry
->vme_start
));
2973 sp_object
->phys_contiguous
= TRUE
;
2974 sp_object
->vo_shadow_offset
= (vm_object_offset_t
)VM_PAGE_GET_PHYS_PAGE(pages
)*PAGE_SIZE
;
2975 VME_OBJECT_SET(entry
, sp_object
);
2976 assert(entry
->use_pmap
);
2978 /* enter the base pages into the object */
2979 vm_object_lock(sp_object
);
2981 sp_offset
< SUPERPAGE_SIZE
;
2982 sp_offset
+= PAGE_SIZE
) {
2984 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m
));
2985 pages
= NEXT_PAGE(m
);
2986 *(NEXT_PAGE_PTR(m
)) = VM_PAGE_NULL
;
2987 vm_page_insert_wired(m
, sp_object
, sp_offset
, VM_KERN_MEMORY_OSFMK
);
2989 vm_object_unlock(sp_object
);
2991 } while (tmp_end
!= tmp2_end
&&
2992 (tmp_start
= tmp_end
) &&
2993 (tmp_end
= (tmp2_end
- tmp_end
> chunk_size
) ?
2994 tmp_end
+ chunk_size
: tmp2_end
));
2997 new_mapping_established
= TRUE
;
3000 assert(map_locked
== TRUE
);
3002 if (result
== KERN_SUCCESS
) {
3003 vm_prot_t pager_prot
;
3004 memory_object_t pager
;
3008 !(vmk_flags
.vmkf_no_pmap_check
)) {
3009 assert(vm_map_pmap_is_empty(map
,
3016 * For "named" VM objects, let the pager know that the
3017 * memory object is being mapped. Some pagers need to keep
3018 * track of this, to know when they can reclaim the memory
3019 * object, for example.
3020 * VM calls memory_object_map() for each mapping (specifying
3021 * the protection of each mapping) and calls
3022 * memory_object_last_unmap() when all the mappings are gone.
3024 pager_prot
= max_protection
;
3027 * Copy-On-Write mapping: won't modify
3028 * the memory object.
3030 pager_prot
&= ~VM_PROT_WRITE
;
3033 object
!= VM_OBJECT_NULL
&&
3035 object
->pager
!= MEMORY_OBJECT_NULL
) {
3036 vm_object_lock(object
);
3037 pager
= object
->pager
;
3038 if (object
->named
&&
3039 pager
!= MEMORY_OBJECT_NULL
) {
3040 assert(object
->pager_ready
);
3041 vm_object_mapping_wait(object
, THREAD_UNINT
);
3042 vm_object_mapping_begin(object
);
3043 vm_object_unlock(object
);
3045 kr
= memory_object_map(pager
, pager_prot
);
3046 assert(kr
== KERN_SUCCESS
);
3048 vm_object_lock(object
);
3049 vm_object_mapping_end(object
);
3051 vm_object_unlock(object
);
3055 assert(map_locked
== TRUE
);
3057 if (!keep_map_locked
) {
3063 * We can't hold the map lock if we enter this block.
3066 if (result
== KERN_SUCCESS
) {
3068 /* Wire down the new entry if the user
3069 * requested all new map entries be wired.
3071 if ((map
->wiring_required
)||(superpage_size
)) {
3072 assert(!keep_map_locked
);
3073 pmap_empty
= FALSE
; /* pmap won't be empty */
3074 kr
= vm_map_wire_kernel(map
, start
, end
,
3075 new_entry
->protection
, VM_KERN_MEMORY_MLOCK
,
3082 if (result
!= KERN_SUCCESS
) {
3083 if (new_mapping_established
) {
3085 * We have to get rid of the new mappings since we
3086 * won't make them available to the user.
3087 * Try and do that atomically, to minimize the risk
3088 * that someone else create new mappings that range.
3090 zap_new_map
= vm_map_create(PMAP_NULL
,
3093 map
->hdr
.entries_pageable
);
3094 vm_map_set_page_shift(zap_new_map
,
3095 VM_MAP_PAGE_SHIFT(map
));
3096 vm_map_disable_hole_optimization(zap_new_map
);
3102 (void) vm_map_delete(map
, *address
, *address
+size
,
3103 (VM_MAP_REMOVE_SAVE_ENTRIES
|
3104 VM_MAP_REMOVE_NO_MAP_ALIGN
),
3107 if (zap_old_map
!= VM_MAP_NULL
&&
3108 zap_old_map
->hdr
.nentries
!= 0) {
3109 vm_map_entry_t entry1
, entry2
;
3112 * The new mapping failed. Attempt to restore
3113 * the old mappings, saved in the "zap_old_map".
3120 /* first check if the coast is still clear */
3121 start
= vm_map_first_entry(zap_old_map
)->vme_start
;
3122 end
= vm_map_last_entry(zap_old_map
)->vme_end
;
3123 if (vm_map_lookup_entry(map
, start
, &entry1
) ||
3124 vm_map_lookup_entry(map
, end
, &entry2
) ||
3127 * Part of that range has already been
3128 * re-mapped: we can't restore the old
3131 vm_map_enter_restore_failures
++;
3134 * Transfer the saved map entries from
3135 * "zap_old_map" to the original "map",
3136 * inserting them all after "entry1".
3138 for (entry2
= vm_map_first_entry(zap_old_map
);
3139 entry2
!= vm_map_to_entry(zap_old_map
);
3140 entry2
= vm_map_first_entry(zap_old_map
)) {
3141 vm_map_size_t entry_size
;
3143 entry_size
= (entry2
->vme_end
-
3145 vm_map_store_entry_unlink(zap_old_map
,
3147 zap_old_map
->size
-= entry_size
;
3148 vm_map_store_entry_link(map
, entry1
, entry2
,
3149 VM_MAP_KERNEL_FLAGS_NONE
);
3150 map
->size
+= entry_size
;
3153 if (map
->wiring_required
) {
3155 * XXX TODO: we should rewire the
3159 vm_map_enter_restore_successes
++;
3165 * The caller is responsible for releasing the lock if it requested to
3166 * keep the map locked.
3168 if (map_locked
&& !keep_map_locked
) {
3173 * Get rid of the "zap_maps" and all the map entries that
3174 * they may still contain.
3176 if (zap_old_map
!= VM_MAP_NULL
) {
3177 vm_map_destroy(zap_old_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3178 zap_old_map
= VM_MAP_NULL
;
3180 if (zap_new_map
!= VM_MAP_NULL
) {
3181 vm_map_destroy(zap_new_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3182 zap_new_map
= VM_MAP_NULL
;
3191 extern const struct memory_object_pager_ops fourk_pager_ops
;
3195 vm_map_offset_t
*address
, /* IN/OUT */
3197 vm_map_offset_t mask
,
3199 vm_map_kernel_flags_t vmk_flags
,
3202 vm_object_offset_t offset
,
3203 boolean_t needs_copy
,
3204 vm_prot_t cur_protection
,
3205 vm_prot_t max_protection
,
3206 vm_inherit_t inheritance
)
3208 vm_map_entry_t entry
, new_entry
;
3209 vm_map_offset_t start
, fourk_start
;
3210 vm_map_offset_t end
, fourk_end
;
3211 vm_map_size_t fourk_size
;
3212 kern_return_t result
= KERN_SUCCESS
;
3213 vm_map_t zap_old_map
= VM_MAP_NULL
;
3214 vm_map_t zap_new_map
= VM_MAP_NULL
;
3215 boolean_t map_locked
= FALSE
;
3216 boolean_t pmap_empty
= TRUE
;
3217 boolean_t new_mapping_established
= FALSE
;
3218 boolean_t keep_map_locked
= vmk_flags
.vmkf_keep_map_locked
;
3219 boolean_t anywhere
= ((flags
& VM_FLAGS_ANYWHERE
) != 0);
3220 boolean_t purgable
= ((flags
& VM_FLAGS_PURGABLE
) != 0);
3221 boolean_t overwrite
= ((flags
& VM_FLAGS_OVERWRITE
) != 0);
3222 boolean_t no_cache
= ((flags
& VM_FLAGS_NO_CACHE
) != 0);
3223 boolean_t is_submap
= vmk_flags
.vmkf_submap
;
3224 boolean_t permanent
= vmk_flags
.vmkf_permanent
;
3225 boolean_t entry_for_jit
= vmk_flags
.vmkf_map_jit
;
3226 // boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3227 unsigned int superpage_size
= ((flags
& VM_FLAGS_SUPERPAGE_MASK
) >> VM_FLAGS_SUPERPAGE_SHIFT
);
3228 vm_map_offset_t effective_min_offset
, effective_max_offset
;
3230 boolean_t clear_map_aligned
= FALSE
;
3231 memory_object_t fourk_mem_obj
;
3232 vm_object_t fourk_object
;
3233 vm_map_offset_t fourk_pager_offset
;
3234 int fourk_pager_index_start
, fourk_pager_index_num
;
3236 boolean_t fourk_copy
;
3237 vm_object_t copy_object
;
3238 vm_object_offset_t copy_offset
;
3240 fourk_mem_obj
= MEMORY_OBJECT_NULL
;
3241 fourk_object
= VM_OBJECT_NULL
;
3243 if (superpage_size
) {
3244 return KERN_NOT_SUPPORTED
;
3247 if ((cur_protection
& VM_PROT_WRITE
) &&
3248 (cur_protection
& VM_PROT_EXECUTE
) &&
3249 #if !CONFIG_EMBEDDED
3250 map
!= kernel_map
&&
3251 cs_process_enforcement(NULL
) &&
3252 #endif /* !CONFIG_EMBEDDED */
3257 vm_prot_t
, cur_protection
);
3258 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3259 "turning off execute\n",
3261 (current_task()->bsd_info
3262 ? proc_name_address(current_task()->bsd_info
)
3265 cur_protection
&= ~VM_PROT_EXECUTE
;
3269 * If the task has requested executable lockdown,
3270 * deny any new executable mapping.
3272 if (map
->map_disallow_new_exec
== TRUE
) {
3273 if (cur_protection
& VM_PROT_EXECUTE
) {
3274 return KERN_PROTECTION_FAILURE
;
3279 return KERN_NOT_SUPPORTED
;
3281 if (vmk_flags
.vmkf_already
) {
3282 return KERN_NOT_SUPPORTED
;
3284 if (purgable
|| entry_for_jit
) {
3285 return KERN_NOT_SUPPORTED
;
3288 effective_min_offset
= map
->min_offset
;
3290 if (vmk_flags
.vmkf_beyond_max
) {
3291 return KERN_NOT_SUPPORTED
;
3293 effective_max_offset
= map
->max_offset
;
3297 (offset
& FOURK_PAGE_MASK
) != 0) {
3299 return KERN_INVALID_ARGUMENT
;
3302 #define RETURN(value) { result = value; goto BailOut; }
3304 assert(VM_MAP_PAGE_ALIGNED(*address
, FOURK_PAGE_MASK
));
3305 assert(VM_MAP_PAGE_ALIGNED(size
, FOURK_PAGE_MASK
));
3307 if (!anywhere
&& overwrite
) {
3308 return KERN_NOT_SUPPORTED
;
3310 if (!anywhere
&& overwrite
) {
3312 * Create a temporary VM map to hold the old mappings in the
3313 * affected area while we create the new one.
3314 * This avoids releasing the VM map lock in
3315 * vm_map_entry_delete() and allows atomicity
3316 * when we want to replace some mappings with a new one.
3317 * It also allows us to restore the old VM mappings if the
3318 * new mapping fails.
3320 zap_old_map
= vm_map_create(PMAP_NULL
,
3323 map
->hdr
.entries_pageable
);
3324 vm_map_set_page_shift(zap_old_map
, VM_MAP_PAGE_SHIFT(map
));
3325 vm_map_disable_hole_optimization(zap_old_map
);
3328 fourk_start
= *address
;
3330 fourk_end
= fourk_start
+ fourk_size
;
3332 start
= vm_map_trunc_page(*address
, VM_MAP_PAGE_MASK(map
));
3333 end
= vm_map_round_page(fourk_end
, VM_MAP_PAGE_MASK(map
));
3337 return KERN_NOT_SUPPORTED
;
3341 * the address doesn't itself violate
3342 * the mask requirement.
3347 if ((start
& mask
) != 0) {
3348 RETURN(KERN_NO_SPACE
);
3352 * ... the address is within bounds
3357 if ((start
< effective_min_offset
) ||
3358 (end
> effective_max_offset
) ||
3360 RETURN(KERN_INVALID_ADDRESS
);
3363 if (overwrite
&& zap_old_map
!= VM_MAP_NULL
) {
3365 * Fixed mapping and "overwrite" flag: attempt to
3366 * remove all existing mappings in the specified
3367 * address range, saving them in our "zap_old_map".
3369 (void) vm_map_delete(map
, start
, end
,
3370 (VM_MAP_REMOVE_SAVE_ENTRIES
|
3371 VM_MAP_REMOVE_NO_MAP_ALIGN
),
3376 * ... the starting address isn't allocated
3378 if (vm_map_lookup_entry(map
, start
, &entry
)) {
3379 vm_object_t cur_object
, shadow_object
;
3382 * We might already some 4K mappings
3383 * in a 16K page here.
3386 if (entry
->vme_end
- entry
->vme_start
3387 != SIXTEENK_PAGE_SIZE
) {
3388 RETURN(KERN_NO_SPACE
);
3390 if (entry
->is_sub_map
) {
3391 RETURN(KERN_NO_SPACE
);
3393 if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
3394 RETURN(KERN_NO_SPACE
);
3397 /* go all the way down the shadow chain */
3398 cur_object
= VME_OBJECT(entry
);
3399 vm_object_lock(cur_object
);
3400 while (cur_object
->shadow
!= VM_OBJECT_NULL
) {
3401 shadow_object
= cur_object
->shadow
;
3402 vm_object_lock(shadow_object
);
3403 vm_object_unlock(cur_object
);
3404 cur_object
= shadow_object
;
3405 shadow_object
= VM_OBJECT_NULL
;
3407 if (cur_object
->internal
||
3408 cur_object
->pager
== NULL
) {
3409 vm_object_unlock(cur_object
);
3410 RETURN(KERN_NO_SPACE
);
3412 if (cur_object
->pager
->mo_pager_ops
3413 != &fourk_pager_ops
) {
3414 vm_object_unlock(cur_object
);
3415 RETURN(KERN_NO_SPACE
);
3417 fourk_object
= cur_object
;
3418 fourk_mem_obj
= fourk_object
->pager
;
3420 /* keep the "4K" object alive */
3421 vm_object_reference_locked(fourk_object
);
3422 vm_object_unlock(fourk_object
);
3424 /* merge permissions */
3425 entry
->protection
|= cur_protection
;
3426 entry
->max_protection
|= max_protection
;
3427 if ((entry
->protection
& (VM_PROT_WRITE
|
3428 VM_PROT_EXECUTE
)) ==
3429 (VM_PROT_WRITE
| VM_PROT_EXECUTE
) &&
3430 fourk_binary_compatibility_unsafe
&&
3431 fourk_binary_compatibility_allow_wx
) {
3432 /* write+execute: need to be "jit" */
3433 entry
->used_for_jit
= TRUE
;
3436 goto map_in_fourk_pager
;
3440 * ... the next region doesn't overlap the
3444 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
3445 (entry
->vme_next
->vme_start
< end
)) {
3446 RETURN(KERN_NO_SPACE
);
3452 * "start" and "end" should define the endpoints of the
3453 * available new range, and
3454 * "entry" should refer to the region before the new
3457 * the map should be locked.
3460 /* create a new "4K" pager */
3461 fourk_mem_obj
= fourk_pager_create();
3462 fourk_object
= fourk_pager_to_vm_object(fourk_mem_obj
);
3463 assert(fourk_object
);
3465 /* keep the "4" object alive */
3466 vm_object_reference(fourk_object
);
3468 /* create a "copy" object, to map the "4K" object copy-on-write */
3470 result
= vm_object_copy_strategically(fourk_object
,
3476 assert(result
== KERN_SUCCESS
);
3477 assert(copy_object
!= VM_OBJECT_NULL
);
3478 assert(copy_offset
== 0);
3480 /* take a reference on the copy object, for this mapping */
3481 vm_object_reference(copy_object
);
3483 /* map the "4K" pager's copy object */
3485 vm_map_entry_insert(map
, entry
,
3486 vm_map_trunc_page(start
,
3487 VM_MAP_PAGE_MASK(map
)),
3488 vm_map_round_page(end
,
3489 VM_MAP_PAGE_MASK(map
)),
3492 FALSE
, /* needs_copy */
3494 cur_protection
, max_protection
,
3495 VM_BEHAVIOR_DEFAULT
,
3509 #if VM_MAP_DEBUG_FOURK
3510 if (vm_map_debug_fourk
) {
3511 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3513 (uint64_t) entry
->vme_start
,
3514 (uint64_t) entry
->vme_end
,
3517 #endif /* VM_MAP_DEBUG_FOURK */
3519 new_mapping_established
= TRUE
;
3522 /* "map" the original "object" where it belongs in the "4K" pager */
3523 fourk_pager_offset
= (fourk_start
& SIXTEENK_PAGE_MASK
);
3524 fourk_pager_index_start
= (int) (fourk_pager_offset
/ FOURK_PAGE_SIZE
);
3525 if (fourk_size
> SIXTEENK_PAGE_SIZE
) {
3526 fourk_pager_index_num
= 4;
3528 fourk_pager_index_num
= (int) (fourk_size
/ FOURK_PAGE_SIZE
);
3530 if (fourk_pager_index_start
+ fourk_pager_index_num
> 4) {
3531 fourk_pager_index_num
= 4 - fourk_pager_index_start
;
3534 cur_idx
< fourk_pager_index_num
;
3536 vm_object_t old_object
;
3537 vm_object_offset_t old_offset
;
3539 kr
= fourk_pager_populate(fourk_mem_obj
,
3540 TRUE
, /* overwrite */
3541 fourk_pager_index_start
+ cur_idx
,
3545 (cur_idx
* FOURK_PAGE_SIZE
))
3549 #if VM_MAP_DEBUG_FOURK
3550 if (vm_map_debug_fourk
) {
3551 if (old_object
== (vm_object_t
) -1 &&
3552 old_offset
== (vm_object_offset_t
) -1) {
3553 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3554 "pager [%p:0x%llx] "
3556 "[object:%p,offset:0x%llx]\n",
3558 (uint64_t) entry
->vme_start
,
3559 (uint64_t) entry
->vme_end
,
3562 fourk_pager_index_start
+ cur_idx
,
3565 ? (offset
+ (cur_idx
* FOURK_PAGE_SIZE
))
3568 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3569 "pager [%p:0x%llx] "
3570 "populate[%d] [object:%p,offset:0x%llx] "
3571 "old [%p:0x%llx]\n",
3573 (uint64_t) entry
->vme_start
,
3574 (uint64_t) entry
->vme_end
,
3577 fourk_pager_index_start
+ cur_idx
,
3580 ? (offset
+ (cur_idx
* FOURK_PAGE_SIZE
))
3586 #endif /* VM_MAP_DEBUG_FOURK */
3588 assert(kr
== KERN_SUCCESS
);
3589 if (object
!= old_object
&&
3590 object
!= VM_OBJECT_NULL
&&
3591 object
!= (vm_object_t
) -1) {
3592 vm_object_reference(object
);
3594 if (object
!= old_object
&&
3595 old_object
!= VM_OBJECT_NULL
&&
3596 old_object
!= (vm_object_t
) -1) {
3597 vm_object_deallocate(old_object
);
3602 assert(map_locked
== TRUE
);
3604 if (fourk_object
!= VM_OBJECT_NULL
) {
3605 vm_object_deallocate(fourk_object
);
3606 fourk_object
= VM_OBJECT_NULL
;
3607 fourk_mem_obj
= MEMORY_OBJECT_NULL
;
3610 if (result
== KERN_SUCCESS
) {
3611 vm_prot_t pager_prot
;
3612 memory_object_t pager
;
3616 !(vmk_flags
.vmkf_no_pmap_check
)) {
3617 assert(vm_map_pmap_is_empty(map
,
3624 * For "named" VM objects, let the pager know that the
3625 * memory object is being mapped. Some pagers need to keep
3626 * track of this, to know when they can reclaim the memory
3627 * object, for example.
3628 * VM calls memory_object_map() for each mapping (specifying
3629 * the protection of each mapping) and calls
3630 * memory_object_last_unmap() when all the mappings are gone.
3632 pager_prot
= max_protection
;
3635 * Copy-On-Write mapping: won't modify
3636 * the memory object.
3638 pager_prot
&= ~VM_PROT_WRITE
;
3641 object
!= VM_OBJECT_NULL
&&
3643 object
->pager
!= MEMORY_OBJECT_NULL
) {
3644 vm_object_lock(object
);
3645 pager
= object
->pager
;
3646 if (object
->named
&&
3647 pager
!= MEMORY_OBJECT_NULL
) {
3648 assert(object
->pager_ready
);
3649 vm_object_mapping_wait(object
, THREAD_UNINT
);
3650 vm_object_mapping_begin(object
);
3651 vm_object_unlock(object
);
3653 kr
= memory_object_map(pager
, pager_prot
);
3654 assert(kr
== KERN_SUCCESS
);
3656 vm_object_lock(object
);
3657 vm_object_mapping_end(object
);
3659 vm_object_unlock(object
);
3662 fourk_object
!= VM_OBJECT_NULL
&&
3663 fourk_object
->named
&&
3664 fourk_object
->pager
!= MEMORY_OBJECT_NULL
) {
3665 vm_object_lock(fourk_object
);
3666 pager
= fourk_object
->pager
;
3667 if (fourk_object
->named
&&
3668 pager
!= MEMORY_OBJECT_NULL
) {
3669 assert(fourk_object
->pager_ready
);
3670 vm_object_mapping_wait(fourk_object
,
3672 vm_object_mapping_begin(fourk_object
);
3673 vm_object_unlock(fourk_object
);
3675 kr
= memory_object_map(pager
, VM_PROT_READ
);
3676 assert(kr
== KERN_SUCCESS
);
3678 vm_object_lock(fourk_object
);
3679 vm_object_mapping_end(fourk_object
);
3681 vm_object_unlock(fourk_object
);
3685 assert(map_locked
== TRUE
);
3687 if (!keep_map_locked
) {
3693 * We can't hold the map lock if we enter this block.
3696 if (result
== KERN_SUCCESS
) {
3698 /* Wire down the new entry if the user
3699 * requested all new map entries be wired.
3701 if ((map
->wiring_required
)||(superpage_size
)) {
3702 assert(!keep_map_locked
);
3703 pmap_empty
= FALSE
; /* pmap won't be empty */
3704 kr
= vm_map_wire_kernel(map
, start
, end
,
3705 new_entry
->protection
, VM_KERN_MEMORY_MLOCK
,
3712 if (result
!= KERN_SUCCESS
) {
3713 if (new_mapping_established
) {
3715 * We have to get rid of the new mappings since we
3716 * won't make them available to the user.
3717 * Try and do that atomically, to minimize the risk
3718 * that someone else create new mappings that range.
3720 zap_new_map
= vm_map_create(PMAP_NULL
,
3723 map
->hdr
.entries_pageable
);
3724 vm_map_set_page_shift(zap_new_map
,
3725 VM_MAP_PAGE_SHIFT(map
));
3726 vm_map_disable_hole_optimization(zap_new_map
);
3732 (void) vm_map_delete(map
, *address
, *address
+size
,
3733 (VM_MAP_REMOVE_SAVE_ENTRIES
|
3734 VM_MAP_REMOVE_NO_MAP_ALIGN
),
3737 if (zap_old_map
!= VM_MAP_NULL
&&
3738 zap_old_map
->hdr
.nentries
!= 0) {
3739 vm_map_entry_t entry1
, entry2
;
3742 * The new mapping failed. Attempt to restore
3743 * the old mappings, saved in the "zap_old_map".
3750 /* first check if the coast is still clear */
3751 start
= vm_map_first_entry(zap_old_map
)->vme_start
;
3752 end
= vm_map_last_entry(zap_old_map
)->vme_end
;
3753 if (vm_map_lookup_entry(map
, start
, &entry1
) ||
3754 vm_map_lookup_entry(map
, end
, &entry2
) ||
3757 * Part of that range has already been
3758 * re-mapped: we can't restore the old
3761 vm_map_enter_restore_failures
++;
3764 * Transfer the saved map entries from
3765 * "zap_old_map" to the original "map",
3766 * inserting them all after "entry1".
3768 for (entry2
= vm_map_first_entry(zap_old_map
);
3769 entry2
!= vm_map_to_entry(zap_old_map
);
3770 entry2
= vm_map_first_entry(zap_old_map
)) {
3771 vm_map_size_t entry_size
;
3773 entry_size
= (entry2
->vme_end
-
3775 vm_map_store_entry_unlink(zap_old_map
,
3777 zap_old_map
->size
-= entry_size
;
3778 vm_map_store_entry_link(map
, entry1
, entry2
,
3779 VM_MAP_KERNEL_FLAGS_NONE
);
3780 map
->size
+= entry_size
;
3783 if (map
->wiring_required
) {
3785 * XXX TODO: we should rewire the
3789 vm_map_enter_restore_successes
++;
3795 * The caller is responsible for releasing the lock if it requested to
3796 * keep the map locked.
3798 if (map_locked
&& !keep_map_locked
) {
3803 * Get rid of the "zap_maps" and all the map entries that
3804 * they may still contain.
3806 if (zap_old_map
!= VM_MAP_NULL
) {
3807 vm_map_destroy(zap_old_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3808 zap_old_map
= VM_MAP_NULL
;
3810 if (zap_new_map
!= VM_MAP_NULL
) {
3811 vm_map_destroy(zap_new_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
3812 zap_new_map
= VM_MAP_NULL
;
3819 #endif /* __arm64__ */
3822 * Counters for the prefault optimization.
3824 int64_t vm_prefault_nb_pages
= 0;
3825 int64_t vm_prefault_nb_bailout
= 0;
3827 static kern_return_t
3828 vm_map_enter_mem_object_helper(
3829 vm_map_t target_map
,
3830 vm_map_offset_t
*address
,
3831 vm_map_size_t initial_size
,
3832 vm_map_offset_t mask
,
3834 vm_map_kernel_flags_t vmk_flags
,
3837 vm_object_offset_t offset
,
3839 vm_prot_t cur_protection
,
3840 vm_prot_t max_protection
,
3841 vm_inherit_t inheritance
,
3842 upl_page_list_ptr_t page_list
,
3843 unsigned int page_list_count
)
3845 vm_map_address_t map_addr
;
3846 vm_map_size_t map_size
;
3848 vm_object_size_t size
;
3849 kern_return_t result
;
3850 boolean_t mask_cur_protection
, mask_max_protection
;
3851 boolean_t kernel_prefault
, try_prefault
= (page_list_count
!= 0);
3852 vm_map_offset_t offset_in_mapping
= 0;
3854 boolean_t fourk
= vmk_flags
.vmkf_fourk
;
3855 #endif /* __arm64__ */
3857 assertf(vmk_flags
.__vmkf_unused
== 0, "vmk_flags unused=0x%x\n", vmk_flags
.__vmkf_unused
);
3859 mask_cur_protection
= cur_protection
& VM_PROT_IS_MASK
;
3860 mask_max_protection
= max_protection
& VM_PROT_IS_MASK
;
3861 cur_protection
&= ~VM_PROT_IS_MASK
;
3862 max_protection
&= ~VM_PROT_IS_MASK
;
3865 * Check arguments for validity
3867 if ((target_map
== VM_MAP_NULL
) ||
3868 (cur_protection
& ~VM_PROT_ALL
) ||
3869 (max_protection
& ~VM_PROT_ALL
) ||
3870 (inheritance
> VM_INHERIT_LAST_VALID
) ||
3871 (try_prefault
&& (copy
|| !page_list
)) ||
3872 initial_size
== 0) {
3873 return KERN_INVALID_ARGUMENT
;
3878 map_addr
= vm_map_trunc_page(*address
, FOURK_PAGE_MASK
);
3879 map_size
= vm_map_round_page(initial_size
, FOURK_PAGE_MASK
);
3881 #endif /* __arm64__ */
3883 map_addr
= vm_map_trunc_page(*address
,
3884 VM_MAP_PAGE_MASK(target_map
));
3885 map_size
= vm_map_round_page(initial_size
,
3886 VM_MAP_PAGE_MASK(target_map
));
3888 size
= vm_object_round_page(initial_size
);
3891 * Find the vm object (if any) corresponding to this port.
3893 if (!IP_VALID(port
)) {
3894 object
= VM_OBJECT_NULL
;
3897 } else if (ip_kotype(port
) == IKOT_NAMED_ENTRY
) {
3898 vm_named_entry_t named_entry
;
3900 named_entry
= (vm_named_entry_t
) port
->ip_kobject
;
3902 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
3903 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
3904 offset
+= named_entry
->data_offset
;
3907 /* a few checks to make sure user is obeying rules */
3909 if (offset
>= named_entry
->size
)
3910 return KERN_INVALID_RIGHT
;
3911 size
= named_entry
->size
- offset
;
3913 if (mask_max_protection
) {
3914 max_protection
&= named_entry
->protection
;
3916 if (mask_cur_protection
) {
3917 cur_protection
&= named_entry
->protection
;
3919 if ((named_entry
->protection
& max_protection
) !=
3921 return KERN_INVALID_RIGHT
;
3922 if ((named_entry
->protection
& cur_protection
) !=
3924 return KERN_INVALID_RIGHT
;
3925 if (offset
+ size
< offset
) {
3927 return KERN_INVALID_ARGUMENT
;
3929 if (named_entry
->size
< (offset
+ initial_size
)) {
3930 return KERN_INVALID_ARGUMENT
;
3933 if (named_entry
->is_copy
) {
3934 /* for a vm_map_copy, we can only map it whole */
3935 if ((size
!= named_entry
->size
) &&
3936 (vm_map_round_page(size
,
3937 VM_MAP_PAGE_MASK(target_map
)) ==
3938 named_entry
->size
)) {
3939 /* XXX FBDP use the rounded size... */
3940 size
= vm_map_round_page(
3942 VM_MAP_PAGE_MASK(target_map
));
3945 if (!(flags
& VM_FLAGS_ANYWHERE
) &&
3947 size
!= named_entry
->size
)) {
3949 * XXX for a mapping at a "fixed" address,
3950 * we can't trim after mapping the whole
3951 * memory entry, so reject a request for a
3954 return KERN_INVALID_ARGUMENT
;
3958 /* the callers parameter offset is defined to be the */
3959 /* offset from beginning of named entry offset in object */
3960 offset
= offset
+ named_entry
->offset
;
3962 if (! VM_MAP_PAGE_ALIGNED(size
,
3963 VM_MAP_PAGE_MASK(target_map
))) {
3965 * Let's not map more than requested;
3966 * vm_map_enter() will handle this "not map-aligned"
3972 named_entry_lock(named_entry
);
3973 if (named_entry
->is_sub_map
) {
3976 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
3977 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
3978 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3981 submap
= named_entry
->backing
.map
;
3982 vm_map_lock(submap
);
3983 vm_map_reference(submap
);
3984 vm_map_unlock(submap
);
3985 named_entry_unlock(named_entry
);
3987 vmk_flags
.vmkf_submap
= TRUE
;
3989 result
= vm_map_enter(target_map
,
3996 (vm_object_t
)(uintptr_t) submap
,
4002 if (result
!= KERN_SUCCESS
) {
4003 vm_map_deallocate(submap
);
4006 * No need to lock "submap" just to check its
4007 * "mapped" flag: that flag is never reset
4008 * once it's been set and if we race, we'll
4009 * just end up setting it twice, which is OK.
4011 if (submap
->mapped_in_other_pmaps
== FALSE
&&
4012 vm_map_pmap(submap
) != PMAP_NULL
&&
4013 vm_map_pmap(submap
) !=
4014 vm_map_pmap(target_map
)) {
4016 * This submap is being mapped in a map
4017 * that uses a different pmap.
4018 * Set its "mapped_in_other_pmaps" flag
4019 * to indicate that we now need to
4020 * remove mappings from all pmaps rather
4021 * than just the submap's pmap.
4023 vm_map_lock(submap
);
4024 submap
->mapped_in_other_pmaps
= TRUE
;
4025 vm_map_unlock(submap
);
4027 *address
= map_addr
;
4031 } else if (named_entry
->is_copy
) {
4033 vm_map_copy_t copy_map
;
4034 vm_map_entry_t copy_entry
;
4035 vm_map_offset_t copy_addr
;
4037 if (flags
& ~(VM_FLAGS_FIXED
|
4039 VM_FLAGS_OVERWRITE
|
4040 VM_FLAGS_RETURN_4K_DATA_ADDR
|
4041 VM_FLAGS_RETURN_DATA_ADDR
|
4042 VM_FLAGS_ALIAS_MASK
)) {
4043 named_entry_unlock(named_entry
);
4044 return KERN_INVALID_ARGUMENT
;
4047 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4048 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4049 offset_in_mapping
= offset
- vm_object_trunc_page(offset
);
4050 if (flags
& VM_FLAGS_RETURN_4K_DATA_ADDR
)
4051 offset_in_mapping
&= ~((signed)(0xFFF));
4052 offset
= vm_object_trunc_page(offset
);
4053 map_size
= vm_object_round_page(offset
+ offset_in_mapping
+ initial_size
) - offset
;
4056 copy_map
= named_entry
->backing
.copy
;
4057 assert(copy_map
->type
== VM_MAP_COPY_ENTRY_LIST
);
4058 if (copy_map
->type
!= VM_MAP_COPY_ENTRY_LIST
) {
4059 /* unsupported type; should not happen */
4060 printf("vm_map_enter_mem_object: "
4061 "memory_entry->backing.copy "
4062 "unsupported type 0x%x\n",
4064 named_entry_unlock(named_entry
);
4065 return KERN_INVALID_ARGUMENT
;
4068 /* reserve a contiguous range */
4069 kr
= vm_map_enter(target_map
,
4071 /* map whole mem entry, trim later: */
4074 flags
& (VM_FLAGS_ANYWHERE
|
4075 VM_FLAGS_OVERWRITE
|
4076 VM_FLAGS_RETURN_4K_DATA_ADDR
|
4077 VM_FLAGS_RETURN_DATA_ADDR
),
4086 if (kr
!= KERN_SUCCESS
) {
4087 named_entry_unlock(named_entry
);
4091 copy_addr
= map_addr
;
4093 for (copy_entry
= vm_map_copy_first_entry(copy_map
);
4094 copy_entry
!= vm_map_copy_to_entry(copy_map
);
4095 copy_entry
= copy_entry
->vme_next
) {
4097 vm_map_kernel_flags_t vmk_remap_flags
;
4098 vm_map_t copy_submap
;
4099 vm_object_t copy_object
;
4100 vm_map_size_t copy_size
;
4101 vm_object_offset_t copy_offset
;
4105 vmk_remap_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
4107 copy_object
= VME_OBJECT(copy_entry
);
4108 copy_offset
= VME_OFFSET(copy_entry
);
4109 copy_size
= (copy_entry
->vme_end
-
4110 copy_entry
->vme_start
);
4111 VM_GET_FLAGS_ALIAS(flags
, copy_vm_alias
);
4112 if (copy_vm_alias
== 0) {
4114 * Caller does not want a specific
4115 * alias for this new mapping: use
4116 * the alias of the original mapping.
4118 copy_vm_alias
= VME_ALIAS(copy_entry
);
4122 if ((copy_addr
+ copy_size
) >
4124 named_entry
->size
/* XXX full size */ )) {
4125 /* over-mapping too much !? */
4126 kr
= KERN_INVALID_ARGUMENT
;
4131 /* take a reference on the object */
4132 if (copy_entry
->is_sub_map
) {
4133 vmk_remap_flags
.vmkf_submap
= TRUE
;
4134 copy_submap
= VME_SUBMAP(copy_entry
);
4135 vm_map_lock(copy_submap
);
4136 vm_map_reference(copy_submap
);
4137 vm_map_unlock(copy_submap
);
4138 copy_object
= (vm_object_t
)(uintptr_t) copy_submap
;
4140 copy_object
!= VM_OBJECT_NULL
&&
4141 (copy_entry
->needs_copy
||
4142 copy_object
->shadowed
||
4143 (!copy_object
->true_share
&&
4144 !copy_entry
->is_shared
&&
4145 copy_object
->vo_size
> copy_size
))) {
4147 * We need to resolve our side of this
4148 * "symmetric" copy-on-write now; we
4149 * need a new object to map and share,
4150 * instead of the current one which
4151 * might still be shared with the
4154 * Note: A "vm_map_copy_t" does not
4155 * have a lock but we're protected by
4156 * the named entry's lock here.
4158 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4159 VME_OBJECT_SHADOW(copy_entry
, copy_size
);
4160 if (!copy_entry
->needs_copy
&&
4161 copy_entry
->protection
& VM_PROT_WRITE
) {
4164 prot
= copy_entry
->protection
& ~VM_PROT_WRITE
;
4165 vm_object_pmap_protect(copy_object
,
4173 copy_entry
->needs_copy
= FALSE
;
4174 copy_entry
->is_shared
= TRUE
;
4175 copy_object
= VME_OBJECT(copy_entry
);
4176 copy_offset
= VME_OFFSET(copy_entry
);
4177 vm_object_lock(copy_object
);
4178 vm_object_reference_locked(copy_object
);
4179 if (copy_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
) {
4180 /* we're about to make a shared mapping of this object */
4181 copy_object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
4182 copy_object
->true_share
= TRUE
;
4184 vm_object_unlock(copy_object
);
4187 * We already have the right object
4190 copy_object
= VME_OBJECT(copy_entry
);
4191 vm_object_reference(copy_object
);
4194 /* over-map the object into destination */
4195 remap_flags
|= flags
;
4196 remap_flags
|= VM_FLAGS_FIXED
;
4197 remap_flags
|= VM_FLAGS_OVERWRITE
;
4198 remap_flags
&= ~VM_FLAGS_ANYWHERE
;
4199 if (!copy
&& !copy_entry
->is_sub_map
) {
4201 * copy-on-write should have been
4202 * resolved at this point, or we would
4203 * end up sharing instead of copying.
4205 assert(!copy_entry
->needs_copy
);
4207 #if !CONFIG_EMBEDDED
4208 if (copy_entry
->used_for_jit
) {
4209 vmk_remap_flags
.vmkf_map_jit
= TRUE
;
4211 #endif /* !CONFIG_EMBEDDED */
4212 kr
= vm_map_enter(target_map
,
4215 (vm_map_offset_t
) 0,
4225 if (kr
!= KERN_SUCCESS
) {
4226 if (copy_entry
->is_sub_map
) {
4227 vm_map_deallocate(copy_submap
);
4229 vm_object_deallocate(copy_object
);
4236 copy_addr
+= copy_size
;
4239 if (kr
== KERN_SUCCESS
) {
4240 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4241 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4242 *address
= map_addr
+ offset_in_mapping
;
4244 *address
= map_addr
;
4249 * Trim in front, from 0 to "offset".
4251 vm_map_remove(target_map
,
4254 VM_MAP_REMOVE_NO_FLAGS
);
4257 if (offset
+ map_size
< named_entry
->size
) {
4259 * Trim in back, from
4260 * "offset + map_size" to
4261 * "named_entry->size".
4263 vm_map_remove(target_map
,
4268 VM_MAP_REMOVE_NO_FLAGS
);
4271 named_entry_unlock(named_entry
);
4273 if (kr
!= KERN_SUCCESS
) {
4274 if (! (flags
& VM_FLAGS_OVERWRITE
)) {
4275 /* deallocate the contiguous range */
4276 (void) vm_deallocate(target_map
,
4285 unsigned int access
;
4286 vm_prot_t protections
;
4287 unsigned int wimg_mode
;
4289 /* we are mapping a VM object */
4291 protections
= named_entry
->protection
& VM_PROT_ALL
;
4292 access
= GET_MAP_MEM(named_entry
->protection
);
4294 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4295 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4296 offset_in_mapping
= offset
- vm_object_trunc_page(offset
);
4297 if (flags
& VM_FLAGS_RETURN_4K_DATA_ADDR
)
4298 offset_in_mapping
&= ~((signed)(0xFFF));
4299 offset
= vm_object_trunc_page(offset
);
4300 map_size
= vm_object_round_page(offset
+ offset_in_mapping
+ initial_size
) - offset
;
4303 object
= named_entry
->backing
.object
;
4304 assert(object
!= VM_OBJECT_NULL
);
4305 vm_object_lock(object
);
4306 named_entry_unlock(named_entry
);
4308 vm_object_reference_locked(object
);
4310 wimg_mode
= object
->wimg_bits
;
4311 vm_prot_to_wimg(access
, &wimg_mode
);
4312 if (object
->wimg_bits
!= wimg_mode
)
4313 vm_object_change_wimg_mode(object
, wimg_mode
);
4315 vm_object_unlock(object
);
4317 } else if (ip_kotype(port
) == IKOT_MEMORY_OBJECT
) {
4319 * JMM - This is temporary until we unify named entries
4320 * and raw memory objects.
4322 * Detected fake ip_kotype for a memory object. In
4323 * this case, the port isn't really a port at all, but
4324 * instead is just a raw memory object.
4326 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4327 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4328 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4331 object
= memory_object_to_vm_object((memory_object_t
)port
);
4332 if (object
== VM_OBJECT_NULL
)
4333 return KERN_INVALID_OBJECT
;
4334 vm_object_reference(object
);
4336 /* wait for object (if any) to be ready */
4337 if (object
!= VM_OBJECT_NULL
) {
4338 if (object
== kernel_object
) {
4339 printf("Warning: Attempt to map kernel object"
4340 " by a non-private kernel entity\n");
4341 return KERN_INVALID_OBJECT
;
4343 if (!object
->pager_ready
) {
4344 vm_object_lock(object
);
4346 while (!object
->pager_ready
) {
4347 vm_object_wait(object
,
4348 VM_OBJECT_EVENT_PAGER_READY
,
4350 vm_object_lock(object
);
4352 vm_object_unlock(object
);
4356 return KERN_INVALID_OBJECT
;
4359 if (object
!= VM_OBJECT_NULL
&&
4361 object
->pager
!= MEMORY_OBJECT_NULL
&&
4362 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
4363 memory_object_t pager
;
4364 vm_prot_t pager_prot
;
4368 * For "named" VM objects, let the pager know that the
4369 * memory object is being mapped. Some pagers need to keep
4370 * track of this, to know when they can reclaim the memory
4371 * object, for example.
4372 * VM calls memory_object_map() for each mapping (specifying
4373 * the protection of each mapping) and calls
4374 * memory_object_last_unmap() when all the mappings are gone.
4376 pager_prot
= max_protection
;
4379 * Copy-On-Write mapping: won't modify the
4382 pager_prot
&= ~VM_PROT_WRITE
;
4384 vm_object_lock(object
);
4385 pager
= object
->pager
;
4386 if (object
->named
&&
4387 pager
!= MEMORY_OBJECT_NULL
&&
4388 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
4389 assert(object
->pager_ready
);
4390 vm_object_mapping_wait(object
, THREAD_UNINT
);
4391 vm_object_mapping_begin(object
);
4392 vm_object_unlock(object
);
4394 kr
= memory_object_map(pager
, pager_prot
);
4395 assert(kr
== KERN_SUCCESS
);
4397 vm_object_lock(object
);
4398 vm_object_mapping_end(object
);
4400 vm_object_unlock(object
);
4404 * Perform the copy if requested
4408 vm_object_t new_object
;
4409 vm_object_offset_t new_offset
;
4411 result
= vm_object_copy_strategically(object
, offset
,
4413 &new_object
, &new_offset
,
4417 if (result
== KERN_MEMORY_RESTART_COPY
) {
4419 boolean_t src_needs_copy
;
4423 * We currently ignore src_needs_copy.
4424 * This really is the issue of how to make
4425 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4426 * non-kernel users to use. Solution forthcoming.
4427 * In the meantime, since we don't allow non-kernel
4428 * memory managers to specify symmetric copy,
4429 * we won't run into problems here.
4431 new_object
= object
;
4432 new_offset
= offset
;
4433 success
= vm_object_copy_quickly(&new_object
,
4439 result
= KERN_SUCCESS
;
4442 * Throw away the reference to the
4443 * original object, as it won't be mapped.
4446 vm_object_deallocate(object
);
4448 if (result
!= KERN_SUCCESS
) {
4452 object
= new_object
;
4453 offset
= new_offset
;
4457 * If non-kernel users want to try to prefault pages, the mapping and prefault
4458 * needs to be atomic.
4460 kernel_prefault
= (try_prefault
&& vm_kernel_map_is_kernel(target_map
));
4461 vmk_flags
.vmkf_keep_map_locked
= (try_prefault
&& !kernel_prefault
);
4465 /* map this object in a "4K" pager */
4466 result
= vm_map_enter_fourk(target_map
,
4469 (vm_map_offset_t
) mask
,
4480 #endif /* __arm64__ */
4482 result
= vm_map_enter(target_map
,
4483 &map_addr
, map_size
,
4484 (vm_map_offset_t
)mask
,
4490 cur_protection
, max_protection
,
4493 if (result
!= KERN_SUCCESS
)
4494 vm_object_deallocate(object
);
4497 * Try to prefault, and do not forget to release the vm map lock.
4499 if (result
== KERN_SUCCESS
&& try_prefault
) {
4500 mach_vm_address_t va
= map_addr
;
4501 kern_return_t kr
= KERN_SUCCESS
;
4505 pmap_options
= kernel_prefault
? 0 : PMAP_OPTIONS_NOWAIT
;
4506 if (object
->internal
) {
4507 pmap_options
|= PMAP_OPTIONS_INTERNAL
;
4510 for (i
= 0; i
< page_list_count
; ++i
) {
4511 if (!UPL_VALID_PAGE(page_list
, i
)) {
4512 if (kernel_prefault
) {
4513 assertf(FALSE
, "kernel_prefault && !UPL_VALID_PAGE");
4514 result
= KERN_MEMORY_ERROR
;
4519 * If this function call failed, we should stop
4520 * trying to optimize, other calls are likely
4521 * going to fail too.
4523 * We are not gonna report an error for such
4524 * failure though. That's an optimization, not
4525 * something critical.
4527 kr
= pmap_enter_options(target_map
->pmap
,
4528 va
, UPL_PHYS_PAGE(page_list
, i
),
4529 cur_protection
, VM_PROT_NONE
,
4530 0, TRUE
, pmap_options
, NULL
);
4531 if (kr
!= KERN_SUCCESS
) {
4532 OSIncrementAtomic64(&vm_prefault_nb_bailout
);
4533 if (kernel_prefault
) {
4538 OSIncrementAtomic64(&vm_prefault_nb_pages
);
4541 /* Next virtual address */
4544 if (vmk_flags
.vmkf_keep_map_locked
) {
4545 vm_map_unlock(target_map
);
4549 if (flags
& (VM_FLAGS_RETURN_DATA_ADDR
|
4550 VM_FLAGS_RETURN_4K_DATA_ADDR
)) {
4551 *address
= map_addr
+ offset_in_mapping
;
4553 *address
= map_addr
;
4559 vm_map_enter_mem_object(
4560 vm_map_t target_map
,
4561 vm_map_offset_t
*address
,
4562 vm_map_size_t initial_size
,
4563 vm_map_offset_t mask
,
4565 vm_map_kernel_flags_t vmk_flags
,
4568 vm_object_offset_t offset
,
4570 vm_prot_t cur_protection
,
4571 vm_prot_t max_protection
,
4572 vm_inherit_t inheritance
)
4576 ret
= vm_map_enter_mem_object_helper(target_map
,
4593 if (ret
== KERN_SUCCESS
&& address
&& target_map
->pmap
== kernel_pmap
) {
4594 kasan_notify_address(*address
, initial_size
);
4602 vm_map_enter_mem_object_prefault(
4603 vm_map_t target_map
,
4604 vm_map_offset_t
*address
,
4605 vm_map_size_t initial_size
,
4606 vm_map_offset_t mask
,
4608 vm_map_kernel_flags_t vmk_flags
,
4611 vm_object_offset_t offset
,
4612 vm_prot_t cur_protection
,
4613 vm_prot_t max_protection
,
4614 upl_page_list_ptr_t page_list
,
4615 unsigned int page_list_count
)
4619 ret
= vm_map_enter_mem_object_helper(target_map
,
4636 if (ret
== KERN_SUCCESS
&& address
&& target_map
->pmap
== kernel_pmap
) {
4637 kasan_notify_address(*address
, initial_size
);
4646 vm_map_enter_mem_object_control(
4647 vm_map_t target_map
,
4648 vm_map_offset_t
*address
,
4649 vm_map_size_t initial_size
,
4650 vm_map_offset_t mask
,
4652 vm_map_kernel_flags_t vmk_flags
,
4654 memory_object_control_t control
,
4655 vm_object_offset_t offset
,
4657 vm_prot_t cur_protection
,
4658 vm_prot_t max_protection
,
4659 vm_inherit_t inheritance
)
4661 vm_map_address_t map_addr
;
4662 vm_map_size_t map_size
;
4664 vm_object_size_t size
;
4665 kern_return_t result
;
4666 memory_object_t pager
;
4667 vm_prot_t pager_prot
;
4670 boolean_t fourk
= vmk_flags
.vmkf_fourk
;
4671 #endif /* __arm64__ */
4674 * Check arguments for validity
4676 if ((target_map
== VM_MAP_NULL
) ||
4677 (cur_protection
& ~VM_PROT_ALL
) ||
4678 (max_protection
& ~VM_PROT_ALL
) ||
4679 (inheritance
> VM_INHERIT_LAST_VALID
) ||
4680 initial_size
== 0) {
4681 return KERN_INVALID_ARGUMENT
;
4686 map_addr
= vm_map_trunc_page(*address
,
4688 map_size
= vm_map_round_page(initial_size
,
4691 #endif /* __arm64__ */
4693 map_addr
= vm_map_trunc_page(*address
,
4694 VM_MAP_PAGE_MASK(target_map
));
4695 map_size
= vm_map_round_page(initial_size
,
4696 VM_MAP_PAGE_MASK(target_map
));
4698 size
= vm_object_round_page(initial_size
);
4700 object
= memory_object_control_to_vm_object(control
);
4702 if (object
== VM_OBJECT_NULL
)
4703 return KERN_INVALID_OBJECT
;
4705 if (object
== kernel_object
) {
4706 printf("Warning: Attempt to map kernel object"
4707 " by a non-private kernel entity\n");
4708 return KERN_INVALID_OBJECT
;
4711 vm_object_lock(object
);
4712 object
->ref_count
++;
4713 vm_object_res_reference(object
);
4716 * For "named" VM objects, let the pager know that the
4717 * memory object is being mapped. Some pagers need to keep
4718 * track of this, to know when they can reclaim the memory
4719 * object, for example.
4720 * VM calls memory_object_map() for each mapping (specifying
4721 * the protection of each mapping) and calls
4722 * memory_object_last_unmap() when all the mappings are gone.
4724 pager_prot
= max_protection
;
4726 pager_prot
&= ~VM_PROT_WRITE
;
4728 pager
= object
->pager
;
4729 if (object
->named
&&
4730 pager
!= MEMORY_OBJECT_NULL
&&
4731 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
4732 assert(object
->pager_ready
);
4733 vm_object_mapping_wait(object
, THREAD_UNINT
);
4734 vm_object_mapping_begin(object
);
4735 vm_object_unlock(object
);
4737 kr
= memory_object_map(pager
, pager_prot
);
4738 assert(kr
== KERN_SUCCESS
);
4740 vm_object_lock(object
);
4741 vm_object_mapping_end(object
);
4743 vm_object_unlock(object
);
4746 * Perform the copy if requested
4750 vm_object_t new_object
;
4751 vm_object_offset_t new_offset
;
4753 result
= vm_object_copy_strategically(object
, offset
, size
,
4754 &new_object
, &new_offset
,
4758 if (result
== KERN_MEMORY_RESTART_COPY
) {
4760 boolean_t src_needs_copy
;
4764 * We currently ignore src_needs_copy.
4765 * This really is the issue of how to make
4766 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4767 * non-kernel users to use. Solution forthcoming.
4768 * In the meantime, since we don't allow non-kernel
4769 * memory managers to specify symmetric copy,
4770 * we won't run into problems here.
4772 new_object
= object
;
4773 new_offset
= offset
;
4774 success
= vm_object_copy_quickly(&new_object
,
4779 result
= KERN_SUCCESS
;
4782 * Throw away the reference to the
4783 * original object, as it won't be mapped.
4786 vm_object_deallocate(object
);
4788 if (result
!= KERN_SUCCESS
) {
4792 object
= new_object
;
4793 offset
= new_offset
;
4798 result
= vm_map_enter_fourk(target_map
,
4801 (vm_map_offset_t
)mask
,
4807 cur_protection
, max_protection
,
4810 #endif /* __arm64__ */
4812 result
= vm_map_enter(target_map
,
4813 &map_addr
, map_size
,
4814 (vm_map_offset_t
)mask
,
4820 cur_protection
, max_protection
,
4823 if (result
!= KERN_SUCCESS
)
4824 vm_object_deallocate(object
);
4825 *address
= map_addr
;
4834 extern pmap_paddr_t avail_start
, avail_end
;
4838 * Allocate memory in the specified map, with the caveat that
4839 * the memory is physically contiguous. This call may fail
4840 * if the system can't find sufficient contiguous memory.
4841 * This call may cause or lead to heart-stopping amounts of
4844 * Memory obtained from this call should be freed in the
4845 * normal way, viz., via vm_deallocate.
4850 vm_map_offset_t
*addr
,
4854 vm_object_t cpm_obj
;
4858 vm_map_offset_t va
, start
, end
, offset
;
4860 vm_map_offset_t prev_addr
= 0;
4861 #endif /* MACH_ASSERT */
4863 boolean_t anywhere
= ((VM_FLAGS_ANYWHERE
& flags
) != 0);
4866 VM_GET_FLAGS_ALIAS(flags
, tag
);
4870 return KERN_SUCCESS
;
4873 *addr
= vm_map_min(map
);
4875 *addr
= vm_map_trunc_page(*addr
,
4876 VM_MAP_PAGE_MASK(map
));
4877 size
= vm_map_round_page(size
,
4878 VM_MAP_PAGE_MASK(map
));
4881 * LP64todo - cpm_allocate should probably allow
4882 * allocations of >4GB, but not with the current
4883 * algorithm, so just cast down the size for now.
4885 if (size
> VM_MAX_ADDRESS
)
4886 return KERN_RESOURCE_SHORTAGE
;
4887 if ((kr
= cpm_allocate(CAST_DOWN(vm_size_t
, size
),
4888 &pages
, 0, 0, TRUE
, flags
)) != KERN_SUCCESS
)
4891 cpm_obj
= vm_object_allocate((vm_object_size_t
)size
);
4892 assert(cpm_obj
!= VM_OBJECT_NULL
);
4893 assert(cpm_obj
->internal
);
4894 assert(cpm_obj
->vo_size
== (vm_object_size_t
)size
);
4895 assert(cpm_obj
->can_persist
== FALSE
);
4896 assert(cpm_obj
->pager_created
== FALSE
);
4897 assert(cpm_obj
->pageout
== FALSE
);
4898 assert(cpm_obj
->shadow
== VM_OBJECT_NULL
);
4901 * Insert pages into object.
4904 vm_object_lock(cpm_obj
);
4905 for (offset
= 0; offset
< size
; offset
+= PAGE_SIZE
) {
4907 pages
= NEXT_PAGE(m
);
4908 *(NEXT_PAGE_PTR(m
)) = VM_PAGE_NULL
;
4910 assert(!m
->vmp_gobbled
);
4911 assert(!m
->vmp_wanted
);
4912 assert(!m
->vmp_pageout
);
4913 assert(!m
->vmp_tabled
);
4914 assert(VM_PAGE_WIRED(m
));
4915 assert(m
->vmp_busy
);
4916 assert(VM_PAGE_GET_PHYS_PAGE(m
)>=(avail_start
>>PAGE_SHIFT
) && VM_PAGE_GET_PHYS_PAGE(m
)<=(avail_end
>>PAGE_SHIFT
));
4918 m
->vmp_busy
= FALSE
;
4919 vm_page_insert(m
, cpm_obj
, offset
);
4921 assert(cpm_obj
->resident_page_count
== size
/ PAGE_SIZE
);
4922 vm_object_unlock(cpm_obj
);
4925 * Hang onto a reference on the object in case a
4926 * multi-threaded application for some reason decides
4927 * to deallocate the portion of the address space into
4928 * which we will insert this object.
4930 * Unfortunately, we must insert the object now before
4931 * we can talk to the pmap module about which addresses
4932 * must be wired down. Hence, the race with a multi-
4935 vm_object_reference(cpm_obj
);
4938 * Insert object into map.
4947 VM_MAP_KERNEL_FLAGS_NONE
,
4949 (vm_object_offset_t
)0,
4953 VM_INHERIT_DEFAULT
);
4955 if (kr
!= KERN_SUCCESS
) {
4957 * A CPM object doesn't have can_persist set,
4958 * so all we have to do is deallocate it to
4959 * free up these pages.
4961 assert(cpm_obj
->pager_created
== FALSE
);
4962 assert(cpm_obj
->can_persist
== FALSE
);
4963 assert(cpm_obj
->pageout
== FALSE
);
4964 assert(cpm_obj
->shadow
== VM_OBJECT_NULL
);
4965 vm_object_deallocate(cpm_obj
); /* kill acquired ref */
4966 vm_object_deallocate(cpm_obj
); /* kill creation ref */
4970 * Inform the physical mapping system that the
4971 * range of addresses may not fault, so that
4972 * page tables and such can be locked down as well.
4976 pmap
= vm_map_pmap(map
);
4977 pmap_pageable(pmap
, start
, end
, FALSE
);
4980 * Enter each page into the pmap, to avoid faults.
4981 * Note that this loop could be coded more efficiently,
4982 * if the need arose, rather than looking up each page
4985 for (offset
= 0, va
= start
; offset
< size
;
4986 va
+= PAGE_SIZE
, offset
+= PAGE_SIZE
) {
4989 vm_object_lock(cpm_obj
);
4990 m
= vm_page_lookup(cpm_obj
, (vm_object_offset_t
)offset
);
4991 assert(m
!= VM_PAGE_NULL
);
4993 vm_page_zero_fill(m
);
4995 type_of_fault
= DBG_ZERO_FILL_FAULT
;
4997 vm_fault_enter(m
, pmap
, va
, VM_PROT_ALL
, VM_PROT_WRITE
,
4999 FALSE
, /* change_wiring */
5000 VM_KERN_MEMORY_NONE
, /* tag - not wiring */
5001 FALSE
, /* no_cache */
5002 FALSE
, /* cs_bypass */
5004 0, /* pmap_options */
5005 NULL
, /* need_retry */
5008 vm_object_unlock(cpm_obj
);
5013 * Verify ordering in address space.
5015 for (offset
= 0; offset
< size
; offset
+= PAGE_SIZE
) {
5016 vm_object_lock(cpm_obj
);
5017 m
= vm_page_lookup(cpm_obj
, (vm_object_offset_t
)offset
);
5018 vm_object_unlock(cpm_obj
);
5019 if (m
== VM_PAGE_NULL
)
5020 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
5021 cpm_obj
, (uint64_t)offset
);
5022 assert(m
->vmp_tabled
);
5023 assert(!m
->vmp_busy
);
5024 assert(!m
->vmp_wanted
);
5025 assert(!m
->vmp_fictitious
);
5026 assert(!m
->vmp_private
);
5027 assert(!m
->vmp_absent
);
5028 assert(!m
->vmp_error
);
5029 assert(!m
->vmp_cleaning
);
5030 assert(!m
->vmp_laundry
);
5031 assert(!m
->vmp_precious
);
5032 assert(!m
->vmp_clustered
);
5034 if (VM_PAGE_GET_PHYS_PAGE(m
) != prev_addr
+ 1) {
5035 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5036 (uint64_t)start
, (uint64_t)end
, (uint64_t)va
);
5037 printf("obj %p off 0x%llx\n", cpm_obj
, (uint64_t)offset
);
5038 printf("m %p prev_address 0x%llx\n", m
, (uint64_t)prev_addr
);
5039 panic("vm_allocate_cpm: pages not contig!");
5042 prev_addr
= VM_PAGE_GET_PHYS_PAGE(m
);
5044 #endif /* MACH_ASSERT */
5046 vm_object_deallocate(cpm_obj
); /* kill extra ref */
5055 * Interface is defined in all cases, but unless the kernel
5056 * is built explicitly for this option, the interface does
5062 __unused vm_map_t map
,
5063 __unused vm_map_offset_t
*addr
,
5064 __unused vm_map_size_t size
,
5067 return KERN_FAILURE
;
5071 /* Not used without nested pmaps */
5072 #ifndef NO_NESTED_PMAP
5074 * Clip and unnest a portion of a nested submap mapping.
5081 vm_map_entry_t entry
,
5082 vm_map_offset_t start_unnest
,
5083 vm_map_offset_t end_unnest
)
5085 vm_map_offset_t old_start_unnest
= start_unnest
;
5086 vm_map_offset_t old_end_unnest
= end_unnest
;
5088 assert(entry
->is_sub_map
);
5089 assert(VME_SUBMAP(entry
) != NULL
);
5090 assert(entry
->use_pmap
);
5093 * Query the platform for the optimal unnest range.
5094 * DRK: There's some duplication of effort here, since
5095 * callers may have adjusted the range to some extent. This
5096 * routine was introduced to support 1GiB subtree nesting
5097 * for x86 platforms, which can also nest on 2MiB boundaries
5098 * depending on size/alignment.
5100 if (pmap_adjust_unnest_parameters(map
->pmap
, &start_unnest
, &end_unnest
)) {
5101 assert(VME_SUBMAP(entry
)->is_nested_map
);
5102 assert(!VME_SUBMAP(entry
)->disable_vmentry_reuse
);
5103 log_unnest_badness(map
,
5106 VME_SUBMAP(entry
)->is_nested_map
,
5108 VME_SUBMAP(entry
)->lowest_unnestable_start
-
5109 VME_OFFSET(entry
)));
5112 if (entry
->vme_start
> start_unnest
||
5113 entry
->vme_end
< end_unnest
) {
5114 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5115 "bad nested entry: start=0x%llx end=0x%llx\n",
5116 (long long)start_unnest
, (long long)end_unnest
,
5117 (long long)entry
->vme_start
, (long long)entry
->vme_end
);
5120 if (start_unnest
> entry
->vme_start
) {
5121 _vm_map_clip_start(&map
->hdr
,
5124 if (map
->holelistenabled
) {
5125 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5127 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5130 if (entry
->vme_end
> end_unnest
) {
5131 _vm_map_clip_end(&map
->hdr
,
5134 if (map
->holelistenabled
) {
5135 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5137 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5141 pmap_unnest(map
->pmap
,
5143 entry
->vme_end
- entry
->vme_start
);
5144 if ((map
->mapped_in_other_pmaps
) && (map
->map_refcnt
)) {
5145 /* clean up parent map/maps */
5146 vm_map_submap_pmap_clean(
5147 map
, entry
->vme_start
,
5152 entry
->use_pmap
= FALSE
;
5153 if ((map
->pmap
!= kernel_pmap
) &&
5154 (VME_ALIAS(entry
) == VM_MEMORY_SHARED_PMAP
)) {
5155 VME_ALIAS_SET(entry
, VM_MEMORY_UNSHARED_PMAP
);
5158 #endif /* NO_NESTED_PMAP */
5161 * vm_map_clip_start: [ internal use only ]
5163 * Asserts that the given entry begins at or after
5164 * the specified address; if necessary,
5165 * it splits the entry into two.
5170 vm_map_entry_t entry
,
5171 vm_map_offset_t startaddr
)
5173 #ifndef NO_NESTED_PMAP
5174 if (entry
->is_sub_map
&&
5176 startaddr
>= entry
->vme_start
) {
5177 vm_map_offset_t start_unnest
, end_unnest
;
5180 * Make sure "startaddr" is no longer in a nested range
5181 * before we clip. Unnest only the minimum range the platform
5183 * vm_map_clip_unnest may perform additional adjustments to
5186 start_unnest
= startaddr
& ~(pmap_nesting_size_min
- 1);
5187 end_unnest
= start_unnest
+ pmap_nesting_size_min
;
5188 vm_map_clip_unnest(map
, entry
, start_unnest
, end_unnest
);
5190 #endif /* NO_NESTED_PMAP */
5191 if (startaddr
> entry
->vme_start
) {
5192 if (VME_OBJECT(entry
) &&
5193 !entry
->is_sub_map
&&
5194 VME_OBJECT(entry
)->phys_contiguous
) {
5195 pmap_remove(map
->pmap
,
5196 (addr64_t
)(entry
->vme_start
),
5197 (addr64_t
)(entry
->vme_end
));
5199 if (entry
->vme_atomic
) {
5200 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map
, entry
);
5206 vm_map_offset_t
, entry
->vme_start
,
5207 vm_map_offset_t
, entry
->vme_end
,
5208 vm_map_offset_t
, startaddr
,
5209 int, VME_ALIAS(entry
));
5211 _vm_map_clip_start(&map
->hdr
, entry
, startaddr
);
5212 if (map
->holelistenabled
) {
5213 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5215 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5221 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5223 if ((startaddr) > (entry)->vme_start) \
5224 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5228 * This routine is called only when it is known that
5229 * the entry must be split.
5233 struct vm_map_header
*map_header
,
5234 vm_map_entry_t entry
,
5235 vm_map_offset_t start
)
5237 vm_map_entry_t new_entry
;
5240 * Split off the front portion --
5241 * note that we must insert the new
5242 * entry BEFORE this one, so that
5243 * this entry has the specified starting
5247 if (entry
->map_aligned
) {
5248 assert(VM_MAP_PAGE_ALIGNED(start
,
5249 VM_MAP_HDR_PAGE_MASK(map_header
)));
5252 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
5253 vm_map_entry_copy_full(new_entry
, entry
);
5255 new_entry
->vme_end
= start
;
5256 assert(new_entry
->vme_start
< new_entry
->vme_end
);
5257 VME_OFFSET_SET(entry
, VME_OFFSET(entry
) + (start
- entry
->vme_start
));
5258 assert(start
< entry
->vme_end
);
5259 entry
->vme_start
= start
;
5261 _vm_map_store_entry_link(map_header
, entry
->vme_prev
, new_entry
);
5263 if (entry
->is_sub_map
)
5264 vm_map_reference(VME_SUBMAP(new_entry
));
5266 vm_object_reference(VME_OBJECT(new_entry
));
5271 * vm_map_clip_end: [ internal use only ]
5273 * Asserts that the given entry ends at or before
5274 * the specified address; if necessary,
5275 * it splits the entry into two.
5280 vm_map_entry_t entry
,
5281 vm_map_offset_t endaddr
)
5283 if (endaddr
> entry
->vme_end
) {
5285 * Within the scope of this clipping, limit "endaddr" to
5286 * the end of this map entry...
5288 endaddr
= entry
->vme_end
;
5290 #ifndef NO_NESTED_PMAP
5291 if (entry
->is_sub_map
&& entry
->use_pmap
) {
5292 vm_map_offset_t start_unnest
, end_unnest
;
5295 * Make sure the range between the start of this entry and
5296 * the new "endaddr" is no longer nested before we clip.
5297 * Unnest only the minimum range the platform can handle.
5298 * vm_map_clip_unnest may perform additional adjustments to
5301 start_unnest
= entry
->vme_start
;
5303 (endaddr
+ pmap_nesting_size_min
- 1) &
5304 ~(pmap_nesting_size_min
- 1);
5305 vm_map_clip_unnest(map
, entry
, start_unnest
, end_unnest
);
5307 #endif /* NO_NESTED_PMAP */
5308 if (endaddr
< entry
->vme_end
) {
5309 if (VME_OBJECT(entry
) &&
5310 !entry
->is_sub_map
&&
5311 VME_OBJECT(entry
)->phys_contiguous
) {
5312 pmap_remove(map
->pmap
,
5313 (addr64_t
)(entry
->vme_start
),
5314 (addr64_t
)(entry
->vme_end
));
5316 if (entry
->vme_atomic
) {
5317 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map
, entry
);
5322 vm_map_offset_t
, entry
->vme_start
,
5323 vm_map_offset_t
, entry
->vme_end
,
5324 vm_map_offset_t
, endaddr
,
5325 int, VME_ALIAS(entry
));
5327 _vm_map_clip_end(&map
->hdr
, entry
, endaddr
);
5328 if (map
->holelistenabled
) {
5329 vm_map_store_update_first_free(map
, NULL
, FALSE
);
5331 vm_map_store_update_first_free(map
, map
->first_free
, FALSE
);
5337 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5339 if ((endaddr) < (entry)->vme_end) \
5340 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5344 * This routine is called only when it is known that
5345 * the entry must be split.
5349 struct vm_map_header
*map_header
,
5350 vm_map_entry_t entry
,
5351 vm_map_offset_t end
)
5353 vm_map_entry_t new_entry
;
5356 * Create a new entry and insert it
5357 * AFTER the specified entry
5360 if (entry
->map_aligned
) {
5361 assert(VM_MAP_PAGE_ALIGNED(end
,
5362 VM_MAP_HDR_PAGE_MASK(map_header
)));
5365 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
5366 vm_map_entry_copy_full(new_entry
, entry
);
5368 assert(entry
->vme_start
< end
);
5369 new_entry
->vme_start
= entry
->vme_end
= end
;
5370 VME_OFFSET_SET(new_entry
,
5371 VME_OFFSET(new_entry
) + (end
- entry
->vme_start
));
5372 assert(new_entry
->vme_start
< new_entry
->vme_end
);
5374 _vm_map_store_entry_link(map_header
, entry
, new_entry
);
5376 if (entry
->is_sub_map
)
5377 vm_map_reference(VME_SUBMAP(new_entry
));
5379 vm_object_reference(VME_OBJECT(new_entry
));
5384 * VM_MAP_RANGE_CHECK: [ internal use only ]
5386 * Asserts that the starting and ending region
5387 * addresses fall within the valid range of the map.
5389 #define VM_MAP_RANGE_CHECK(map, start, end) \
5391 if (start < vm_map_min(map)) \
5392 start = vm_map_min(map); \
5393 if (end > vm_map_max(map)) \
5394 end = vm_map_max(map); \
5400 * vm_map_range_check: [ internal use only ]
5402 * Check that the region defined by the specified start and
5403 * end addresses are wholly contained within a single map
5404 * entry or set of adjacent map entries of the spacified map,
5405 * i.e. the specified region contains no unmapped space.
5406 * If any or all of the region is unmapped, FALSE is returned.
5407 * Otherwise, TRUE is returned and if the output argument 'entry'
5408 * is not NULL it points to the map entry containing the start
5411 * The map is locked for reading on entry and is left locked.
5416 vm_map_offset_t start
,
5417 vm_map_offset_t end
,
5418 vm_map_entry_t
*entry
)
5421 vm_map_offset_t prev
;
5424 * Basic sanity checks first
5426 if (start
< vm_map_min(map
) || end
> vm_map_max(map
) || start
> end
)
5430 * Check first if the region starts within a valid
5431 * mapping for the map.
5433 if (!vm_map_lookup_entry(map
, start
, &cur
))
5437 * Optimize for the case that the region is contained
5438 * in a single map entry.
5440 if (entry
!= (vm_map_entry_t
*) NULL
)
5442 if (end
<= cur
->vme_end
)
5446 * If the region is not wholly contained within a
5447 * single entry, walk the entries looking for holes.
5449 prev
= cur
->vme_end
;
5450 cur
= cur
->vme_next
;
5451 while ((cur
!= vm_map_to_entry(map
)) && (prev
== cur
->vme_start
)) {
5452 if (end
<= cur
->vme_end
)
5454 prev
= cur
->vme_end
;
5455 cur
= cur
->vme_next
;
5461 * vm_map_submap: [ kernel use only ]
5463 * Mark the given range as handled by a subordinate map.
5465 * This range must have been created with vm_map_find using
5466 * the vm_submap_object, and no other operations may have been
5467 * performed on this range prior to calling vm_map_submap.
5469 * Only a limited number of operations can be performed
5470 * within this rage after calling vm_map_submap:
5472 * [Don't try vm_map_copyin!]
5474 * To remove a submapping, one must first remove the
5475 * range from the superior map, and then destroy the
5476 * submap (if desired). [Better yet, don't try it.]
5481 vm_map_offset_t start
,
5482 vm_map_offset_t end
,
5484 vm_map_offset_t offset
,
5485 #ifdef NO_NESTED_PMAP
5487 #endif /* NO_NESTED_PMAP */
5490 vm_map_entry_t entry
;
5491 kern_return_t result
= KERN_INVALID_ARGUMENT
;
5496 if (! vm_map_lookup_entry(map
, start
, &entry
)) {
5497 entry
= entry
->vme_next
;
5500 if (entry
== vm_map_to_entry(map
) ||
5501 entry
->is_sub_map
) {
5503 return KERN_INVALID_ARGUMENT
;
5506 vm_map_clip_start(map
, entry
, start
);
5507 vm_map_clip_end(map
, entry
, end
);
5509 if ((entry
->vme_start
== start
) && (entry
->vme_end
== end
) &&
5510 (!entry
->is_sub_map
) &&
5511 ((object
= VME_OBJECT(entry
)) == vm_submap_object
) &&
5512 (object
->resident_page_count
== 0) &&
5513 (object
->copy
== VM_OBJECT_NULL
) &&
5514 (object
->shadow
== VM_OBJECT_NULL
) &&
5515 (!object
->pager_created
)) {
5516 VME_OFFSET_SET(entry
, (vm_object_offset_t
)offset
);
5517 VME_OBJECT_SET(entry
, VM_OBJECT_NULL
);
5518 vm_object_deallocate(object
);
5519 entry
->is_sub_map
= TRUE
;
5520 entry
->use_pmap
= FALSE
;
5521 VME_SUBMAP_SET(entry
, submap
);
5522 vm_map_reference(submap
);
5523 if (submap
->mapped_in_other_pmaps
== FALSE
&&
5524 vm_map_pmap(submap
) != PMAP_NULL
&&
5525 vm_map_pmap(submap
) != vm_map_pmap(map
)) {
5527 * This submap is being mapped in a map
5528 * that uses a different pmap.
5529 * Set its "mapped_in_other_pmaps" flag
5530 * to indicate that we now need to
5531 * remove mappings from all pmaps rather
5532 * than just the submap's pmap.
5534 submap
->mapped_in_other_pmaps
= TRUE
;
5537 #ifndef NO_NESTED_PMAP
5539 /* nest if platform code will allow */
5540 if(submap
->pmap
== NULL
) {
5541 ledger_t ledger
= map
->pmap
->ledger
;
5542 submap
->pmap
= pmap_create(ledger
,
5543 (vm_map_size_t
) 0, FALSE
);
5544 if(submap
->pmap
== PMAP_NULL
) {
5546 return(KERN_NO_SPACE
);
5548 #if defined(__arm__) || defined(__arm64__)
5549 pmap_set_nested(submap
->pmap
);
5552 result
= pmap_nest(map
->pmap
,
5553 (VME_SUBMAP(entry
))->pmap
,
5556 (uint64_t)(end
- start
));
5558 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result
);
5559 entry
->use_pmap
= TRUE
;
5561 #else /* NO_NESTED_PMAP */
5562 pmap_remove(map
->pmap
, (addr64_t
)start
, (addr64_t
)end
);
5563 #endif /* NO_NESTED_PMAP */
5564 result
= KERN_SUCCESS
;
5574 * Sets the protection of the specified address
5575 * region in the target map. If "set_max" is
5576 * specified, the maximum protection is to be set;
5577 * otherwise, only the current protection is affected.
5582 vm_map_offset_t start
,
5583 vm_map_offset_t end
,
5587 vm_map_entry_t current
;
5588 vm_map_offset_t prev
;
5589 vm_map_entry_t entry
;
5591 int pmap_options
= 0;
5595 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
5596 map
, start
, end
, new_prot
, set_max
);
5598 if (new_prot
& VM_PROT_COPY
) {
5599 vm_map_offset_t new_start
;
5600 vm_prot_t cur_prot
, max_prot
;
5601 vm_map_kernel_flags_t kflags
;
5603 /* LP64todo - see below */
5604 if (start
>= map
->max_offset
) {
5605 return KERN_INVALID_ADDRESS
;
5608 #if VM_PROTECT_WX_FAIL
5609 if ((new_prot
& VM_PROT_EXECUTE
) &&
5610 map
!= kernel_map
&&
5611 cs_process_enforcement(NULL
)) {
5613 uint64_t, (uint64_t) start
,
5614 uint64_t, (uint64_t) end
,
5615 vm_prot_t
, new_prot
);
5616 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5618 (current_task()->bsd_info
5619 ? proc_name_address(current_task()->bsd_info
)
5622 return KERN_PROTECTION_FAILURE
;
5624 #endif /* VM_PROTECT_WX_FAIL */
5627 * Let vm_map_remap_extract() know that it will need to:
5628 * + make a copy of the mapping
5629 * + add VM_PROT_WRITE to the max protections
5630 * + remove any protections that are no longer allowed from the
5631 * max protections (to avoid any WRITE/EXECUTE conflict, for
5633 * Note that "max_prot" is an IN/OUT parameter only for this
5634 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
5637 max_prot
= new_prot
& VM_PROT_ALL
;
5638 kflags
= VM_MAP_KERNEL_FLAGS_NONE
;
5639 kflags
.vmkf_remap_prot_copy
= TRUE
;
5640 kflags
.vmkf_overwrite_immutable
= TRUE
;
5642 kr
= vm_map_remap(map
,
5646 VM_FLAGS_FIXED
| VM_FLAGS_OVERWRITE
,
5651 TRUE
, /* copy-on-write remapping! */
5654 VM_INHERIT_DEFAULT
);
5655 if (kr
!= KERN_SUCCESS
) {
5658 new_prot
&= ~VM_PROT_COPY
;
5663 /* LP64todo - remove this check when vm_map_commpage64()
5664 * no longer has to stuff in a map_entry for the commpage
5665 * above the map's max_offset.
5667 if (start
>= map
->max_offset
) {
5669 return(KERN_INVALID_ADDRESS
);
5674 * Lookup the entry. If it doesn't start in a valid
5675 * entry, return an error.
5677 if (! vm_map_lookup_entry(map
, start
, &entry
)) {
5679 return(KERN_INVALID_ADDRESS
);
5682 if (entry
->superpage_size
&& (start
& (SUPERPAGE_SIZE
-1))) { /* extend request to whole entry */
5683 start
= SUPERPAGE_ROUND_DOWN(start
);
5688 if (entry
->superpage_size
)
5689 end
= SUPERPAGE_ROUND_UP(end
);
5692 * Make a first pass to check for protection and address
5697 prev
= current
->vme_start
;
5698 while ((current
!= vm_map_to_entry(map
)) &&
5699 (current
->vme_start
< end
)) {
5702 * If there is a hole, return an error.
5704 if (current
->vme_start
!= prev
) {
5706 return(KERN_INVALID_ADDRESS
);
5709 new_max
= current
->max_protection
;
5710 if ((new_prot
& new_max
) != new_prot
) {
5712 return(KERN_PROTECTION_FAILURE
);
5715 if ((new_prot
& VM_PROT_WRITE
) &&
5716 (new_prot
& VM_PROT_EXECUTE
) &&
5717 #if !CONFIG_EMBEDDED
5718 map
!= kernel_map
&&
5719 cs_process_enforcement(NULL
) &&
5720 #endif /* !CONFIG_EMBEDDED */
5721 !(current
->used_for_jit
)) {
5723 uint64_t, (uint64_t) current
->vme_start
,
5724 uint64_t, (uint64_t) current
->vme_end
,
5725 vm_prot_t
, new_prot
);
5726 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5728 (current_task()->bsd_info
5729 ? proc_name_address(current_task()->bsd_info
)
5732 new_prot
&= ~VM_PROT_EXECUTE
;
5733 #if VM_PROTECT_WX_FAIL
5735 return KERN_PROTECTION_FAILURE
;
5736 #endif /* VM_PROTECT_WX_FAIL */
5740 * If the task has requested executable lockdown,
5742 * - adding executable protections OR
5743 * - adding write protections to an existing executable mapping.
5745 if (map
->map_disallow_new_exec
== TRUE
) {
5746 if ((new_prot
& VM_PROT_EXECUTE
) ||
5747 ((current
->protection
& VM_PROT_EXECUTE
) && (new_prot
& VM_PROT_WRITE
))) {
5749 return(KERN_PROTECTION_FAILURE
);
5753 prev
= current
->vme_end
;
5754 current
= current
->vme_next
;
5759 end
== vm_map_round_page(prev
, VM_MAP_PAGE_MASK(map
))) {
5760 vm_map_entry_t prev_entry
;
5762 prev_entry
= current
->vme_prev
;
5763 if (prev_entry
!= vm_map_to_entry(map
) &&
5764 !prev_entry
->map_aligned
&&
5765 (vm_map_round_page(prev_entry
->vme_end
,
5766 VM_MAP_PAGE_MASK(map
))
5769 * The last entry in our range is not "map-aligned"
5770 * but it would have reached all the way to "end"
5771 * if it had been map-aligned, so this is not really
5772 * a hole in the range and we can proceed.
5777 #endif /* __arm64__ */
5781 return(KERN_INVALID_ADDRESS
);
5785 * Go back and fix up protections.
5786 * Clip to start here if the range starts within
5791 if (current
!= vm_map_to_entry(map
)) {
5792 /* clip and unnest if necessary */
5793 vm_map_clip_start(map
, current
, start
);
5796 while ((current
!= vm_map_to_entry(map
)) &&
5797 (current
->vme_start
< end
)) {
5801 vm_map_clip_end(map
, current
, end
);
5803 if (current
->is_sub_map
) {
5804 /* clipping did unnest if needed */
5805 assert(!current
->use_pmap
);
5808 old_prot
= current
->protection
;
5811 current
->max_protection
= new_prot
;
5812 current
->protection
= new_prot
& old_prot
;
5814 current
->protection
= new_prot
;
5818 * Update physical map if necessary.
5819 * If the request is to turn off write protection,
5820 * we won't do it for real (in pmap). This is because
5821 * it would cause copy-on-write to fail. We've already
5822 * set, the new protection in the map, so if a
5823 * write-protect fault occurred, it will be fixed up
5824 * properly, COW or not.
5826 if (current
->protection
!= old_prot
) {
5827 /* Look one level in we support nested pmaps */
5828 /* from mapped submaps which are direct entries */
5833 prot
= current
->protection
;
5834 if (current
->is_sub_map
|| (VME_OBJECT(current
) == NULL
) || (VME_OBJECT(current
) != compressor_object
)) {
5835 prot
&= ~VM_PROT_WRITE
;
5837 assert(!VME_OBJECT(current
)->code_signed
);
5838 assert(VME_OBJECT(current
)->copy_strategy
== MEMORY_OBJECT_COPY_NONE
);
5841 if (override_nx(map
, VME_ALIAS(current
)) && prot
)
5842 prot
|= VM_PROT_EXECUTE
;
5844 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5845 if (!(old_prot
& VM_PROT_EXECUTE
) &&
5846 (prot
& VM_PROT_EXECUTE
) &&
5847 panic_on_unsigned_execute
&&
5848 (proc_selfcsflags() & CS_KILL
)) {
5849 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map
, (uint64_t)current
->vme_start
, (uint64_t)current
->vme_end
, old_prot
, prot
);
5851 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5853 if (pmap_has_prot_policy(prot
)) {
5854 if (current
->wired_count
) {
5855 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
5856 map
, (uint64_t)current
->vme_start
, (uint64_t)current
->vme_end
, prot
, current
->wired_count
);
5859 /* If the pmap layer cares about this
5860 * protection type, force a fault for
5861 * each page so that vm_fault will
5862 * repopulate the page with the full
5863 * set of protections.
5866 * TODO: We don't seem to need this,
5867 * but this is due to an internal
5868 * implementation detail of
5869 * pmap_protect. Do we want to rely
5872 prot
= VM_PROT_NONE
;
5875 if (current
->is_sub_map
&& current
->use_pmap
) {
5876 pmap_protect(VME_SUBMAP(current
)->pmap
,
5881 if (prot
& VM_PROT_WRITE
) {
5882 if (VME_OBJECT(current
) == compressor_object
) {
5884 * For write requests on the
5885 * compressor, we wil ask the
5886 * pmap layer to prevent us from
5887 * taking a write fault when we
5888 * attempt to access the mapping
5891 pmap_options
|= PMAP_OPTIONS_PROTECT_IMMEDIATE
;
5895 pmap_protect_options(map
->pmap
,
5903 current
= current
->vme_next
;
5907 while ((current
!= vm_map_to_entry(map
)) &&
5908 (current
->vme_start
<= end
)) {
5909 vm_map_simplify_entry(map
, current
);
5910 current
= current
->vme_next
;
5914 return(KERN_SUCCESS
);
5920 * Sets the inheritance of the specified address
5921 * range in the target map. Inheritance
5922 * affects how the map will be shared with
5923 * child maps at the time of vm_map_fork.
5928 vm_map_offset_t start
,
5929 vm_map_offset_t end
,
5930 vm_inherit_t new_inheritance
)
5932 vm_map_entry_t entry
;
5933 vm_map_entry_t temp_entry
;
5937 VM_MAP_RANGE_CHECK(map
, start
, end
);
5939 if (vm_map_lookup_entry(map
, start
, &temp_entry
)) {
5943 temp_entry
= temp_entry
->vme_next
;
5947 /* first check entire range for submaps which can't support the */
5948 /* given inheritance. */
5949 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
5950 if(entry
->is_sub_map
) {
5951 if(new_inheritance
== VM_INHERIT_COPY
) {
5953 return(KERN_INVALID_ARGUMENT
);
5957 entry
= entry
->vme_next
;
5961 if (entry
!= vm_map_to_entry(map
)) {
5962 /* clip and unnest if necessary */
5963 vm_map_clip_start(map
, entry
, start
);
5966 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
5967 vm_map_clip_end(map
, entry
, end
);
5968 if (entry
->is_sub_map
) {
5969 /* clip did unnest if needed */
5970 assert(!entry
->use_pmap
);
5973 entry
->inheritance
= new_inheritance
;
5975 entry
= entry
->vme_next
;
5979 return(KERN_SUCCESS
);
5983 * Update the accounting for the amount of wired memory in this map. If the user has
5984 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
5987 static kern_return_t
5990 vm_map_entry_t entry
,
5991 boolean_t user_wire
)
5996 unsigned int total_wire_count
= vm_page_wire_count
+ vm_lopage_free_count
;
5999 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6003 if (entry
->user_wired_count
== 0) {
6004 size
= entry
->vme_end
- entry
->vme_start
;
6007 * Since this is the first time the user is wiring this map entry, check to see if we're
6008 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6009 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
6010 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6011 * limit, then we fail.
6014 if(size
+ map
->user_wire_size
> MIN(map
->user_wire_limit
, vm_user_wire_limit
) ||
6015 size
+ ptoa_64(total_wire_count
) > vm_global_user_wire_limit
||
6016 size
+ ptoa_64(total_wire_count
) > max_mem
- vm_global_no_user_wire_amount
)
6017 return KERN_RESOURCE_SHORTAGE
;
6020 * The first time the user wires an entry, we also increment the wired_count and add this to
6021 * the total that has been wired in the map.
6024 if (entry
->wired_count
>= MAX_WIRE_COUNT
)
6025 return KERN_FAILURE
;
6027 entry
->wired_count
++;
6028 map
->user_wire_size
+= size
;
6031 if (entry
->user_wired_count
>= MAX_WIRE_COUNT
)
6032 return KERN_FAILURE
;
6034 entry
->user_wired_count
++;
6039 * The kernel's wiring the memory. Just bump the count and continue.
6042 if (entry
->wired_count
>= MAX_WIRE_COUNT
)
6043 panic("vm_map_wire: too many wirings");
6045 entry
->wired_count
++;
6048 return KERN_SUCCESS
;
6052 * Update the memory wiring accounting now that the given map entry is being unwired.
6056 subtract_wire_counts(
6058 vm_map_entry_t entry
,
6059 boolean_t user_wire
)
6065 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6068 if (entry
->user_wired_count
== 1) {
6071 * We're removing the last user wire reference. Decrement the wired_count and the total
6072 * user wired memory for this map.
6075 assert(entry
->wired_count
>= 1);
6076 entry
->wired_count
--;
6077 map
->user_wire_size
-= entry
->vme_end
- entry
->vme_start
;
6080 assert(entry
->user_wired_count
>= 1);
6081 entry
->user_wired_count
--;
6086 * The kernel is unwiring the memory. Just update the count.
6089 assert(entry
->wired_count
>= 1);
6090 entry
->wired_count
--;
6094 int cs_executable_wire
= 0;
6099 * Sets the pageability of the specified address range in the
6100 * target map as wired. Regions specified as not pageable require
6101 * locked-down physical memory and physical page maps. The
6102 * access_type variable indicates types of accesses that must not
6103 * generate page faults. This is checked against protection of
6104 * memory being locked-down.
6106 * The map must not be locked, but a reference must remain to the
6107 * map throughout the call.
6109 static kern_return_t
6112 vm_map_offset_t start
,
6113 vm_map_offset_t end
,
6114 vm_prot_t caller_prot
,
6116 boolean_t user_wire
,
6118 vm_map_offset_t pmap_addr
,
6119 ppnum_t
*physpage_p
)
6121 vm_map_entry_t entry
;
6122 vm_prot_t access_type
;
6123 struct vm_map_entry
*first_entry
, tmp_entry
;
6125 vm_map_offset_t s
,e
;
6127 boolean_t need_wakeup
;
6128 boolean_t main_map
= FALSE
;
6129 wait_interrupt_t interruptible_state
;
6130 thread_t cur_thread
;
6131 unsigned int last_timestamp
;
6133 boolean_t wire_and_extract
;
6135 access_type
= (caller_prot
& VM_PROT_ALL
);
6137 wire_and_extract
= FALSE
;
6138 if (physpage_p
!= NULL
) {
6140 * The caller wants the physical page number of the
6141 * wired page. We return only one physical page number
6142 * so this works for only one page at a time.
6144 if ((end
- start
) != PAGE_SIZE
) {
6145 return KERN_INVALID_ARGUMENT
;
6147 wire_and_extract
= TRUE
;
6152 if(map_pmap
== NULL
)
6154 last_timestamp
= map
->timestamp
;
6156 VM_MAP_RANGE_CHECK(map
, start
, end
);
6157 assert(page_aligned(start
));
6158 assert(page_aligned(end
));
6159 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
6160 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
6162 /* We wired what the caller asked for, zero pages */
6164 return KERN_SUCCESS
;
6167 need_wakeup
= FALSE
;
6168 cur_thread
= current_thread();
6173 if (vm_map_lookup_entry(map
, s
, &first_entry
)) {
6174 entry
= first_entry
;
6176 * vm_map_clip_start will be done later.
6177 * We don't want to unnest any nested submaps here !
6180 /* Start address is not in map */
6181 rc
= KERN_INVALID_ADDRESS
;
6185 while ((entry
!= vm_map_to_entry(map
)) && (s
< end
)) {
6187 * At this point, we have wired from "start" to "s".
6188 * We still need to wire from "s" to "end".
6190 * "entry" hasn't been clipped, so it could start before "s"
6191 * and/or end after "end".
6194 /* "e" is how far we want to wire in this entry */
6200 * If another thread is wiring/unwiring this entry then
6201 * block after informing other thread to wake us up.
6203 if (entry
->in_transition
) {
6204 wait_result_t wait_result
;
6207 * We have not clipped the entry. Make sure that
6208 * the start address is in range so that the lookup
6209 * below will succeed.
6210 * "s" is the current starting point: we've already
6211 * wired from "start" to "s" and we still have
6212 * to wire from "s" to "end".
6215 entry
->needs_wakeup
= TRUE
;
6218 * wake up anybody waiting on entries that we have
6222 vm_map_entry_wakeup(map
);
6223 need_wakeup
= FALSE
;
6226 * User wiring is interruptible
6228 wait_result
= vm_map_entry_wait(map
,
6229 (user_wire
) ? THREAD_ABORTSAFE
:
6231 if (user_wire
&& wait_result
== THREAD_INTERRUPTED
) {
6233 * undo the wirings we have done so far
6234 * We do not clear the needs_wakeup flag,
6235 * because we cannot tell if we were the
6243 * Cannot avoid a lookup here. reset timestamp.
6245 last_timestamp
= map
->timestamp
;
6248 * The entry could have been clipped, look it up again.
6249 * Worse that can happen is, it may not exist anymore.
6251 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
6253 * User: undo everything upto the previous
6254 * entry. let vm_map_unwire worry about
6255 * checking the validity of the range.
6260 entry
= first_entry
;
6264 if (entry
->is_sub_map
) {
6265 vm_map_offset_t sub_start
;
6266 vm_map_offset_t sub_end
;
6267 vm_map_offset_t local_start
;
6268 vm_map_offset_t local_end
;
6271 if (wire_and_extract
) {
6273 * Wiring would result in copy-on-write
6274 * which would not be compatible with
6275 * the sharing we have with the original
6276 * provider of this memory.
6278 rc
= KERN_INVALID_ARGUMENT
;
6282 vm_map_clip_start(map
, entry
, s
);
6283 vm_map_clip_end(map
, entry
, end
);
6285 sub_start
= VME_OFFSET(entry
);
6286 sub_end
= entry
->vme_end
;
6287 sub_end
+= VME_OFFSET(entry
) - entry
->vme_start
;
6289 local_end
= entry
->vme_end
;
6290 if(map_pmap
== NULL
) {
6292 vm_object_offset_t offset
;
6295 vm_map_entry_t local_entry
;
6296 vm_map_version_t version
;
6297 vm_map_t lookup_map
;
6299 if(entry
->use_pmap
) {
6300 pmap
= VME_SUBMAP(entry
)->pmap
;
6301 /* ppc implementation requires that */
6302 /* submaps pmap address ranges line */
6303 /* up with parent map */
6305 pmap_addr
= sub_start
;
6313 if (entry
->wired_count
) {
6314 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
)
6318 * The map was not unlocked:
6319 * no need to goto re-lookup.
6320 * Just go directly to next entry.
6322 entry
= entry
->vme_next
;
6323 s
= entry
->vme_start
;
6328 /* call vm_map_lookup_locked to */
6329 /* cause any needs copy to be */
6331 local_start
= entry
->vme_start
;
6333 vm_map_lock_write_to_read(map
);
6334 if(vm_map_lookup_locked(
6335 &lookup_map
, local_start
,
6336 access_type
| VM_PROT_COPY
,
6337 OBJECT_LOCK_EXCLUSIVE
,
6339 &offset
, &prot
, &wired
,
6343 vm_map_unlock_read(lookup_map
);
6344 assert(map_pmap
== NULL
);
6345 vm_map_unwire(map
, start
,
6347 return(KERN_FAILURE
);
6349 vm_object_unlock(object
);
6350 if(real_map
!= lookup_map
)
6351 vm_map_unlock(real_map
);
6352 vm_map_unlock_read(lookup_map
);
6355 /* we unlocked, so must re-lookup */
6356 if (!vm_map_lookup_entry(map
,
6364 * entry could have been "simplified",
6367 entry
= local_entry
;
6368 assert(s
== local_start
);
6369 vm_map_clip_start(map
, entry
, s
);
6370 vm_map_clip_end(map
, entry
, end
);
6371 /* re-compute "e" */
6376 /* did we have a change of type? */
6377 if (!entry
->is_sub_map
) {
6378 last_timestamp
= map
->timestamp
;
6382 local_start
= entry
->vme_start
;
6386 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
)
6389 entry
->in_transition
= TRUE
;
6392 rc
= vm_map_wire_nested(VME_SUBMAP(entry
),
6395 user_wire
, pmap
, pmap_addr
,
6400 * Find the entry again. It could have been clipped
6401 * after we unlocked the map.
6403 if (!vm_map_lookup_entry(map
, local_start
,
6405 panic("vm_map_wire: re-lookup failed");
6406 entry
= first_entry
;
6408 assert(local_start
== s
);
6409 /* re-compute "e" */
6414 last_timestamp
= map
->timestamp
;
6415 while ((entry
!= vm_map_to_entry(map
)) &&
6416 (entry
->vme_start
< e
)) {
6417 assert(entry
->in_transition
);
6418 entry
->in_transition
= FALSE
;
6419 if (entry
->needs_wakeup
) {
6420 entry
->needs_wakeup
= FALSE
;
6423 if (rc
!= KERN_SUCCESS
) {/* from vm_*_wire */
6424 subtract_wire_counts(map
, entry
, user_wire
);
6426 entry
= entry
->vme_next
;
6428 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
6432 /* no need to relookup again */
6433 s
= entry
->vme_start
;
6438 * If this entry is already wired then increment
6439 * the appropriate wire reference count.
6441 if (entry
->wired_count
) {
6443 if ((entry
->protection
& access_type
) != access_type
) {
6444 /* found a protection problem */
6448 * We should always return an error
6449 * in this case but since we didn't
6450 * enforce it before, let's do
6451 * it only for the new "wire_and_extract"
6452 * code path for now...
6454 if (wire_and_extract
) {
6455 rc
= KERN_PROTECTION_FAILURE
;
6461 * entry is already wired down, get our reference
6462 * after clipping to our range.
6464 vm_map_clip_start(map
, entry
, s
);
6465 vm_map_clip_end(map
, entry
, end
);
6467 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
)
6470 if (wire_and_extract
) {
6472 vm_object_offset_t offset
;
6476 * We don't have to "wire" the page again
6477 * bit we still have to "extract" its
6478 * physical page number, after some sanity
6481 assert((entry
->vme_end
- entry
->vme_start
)
6483 assert(!entry
->needs_copy
);
6484 assert(!entry
->is_sub_map
);
6485 assert(VME_OBJECT(entry
));
6486 if (((entry
->vme_end
- entry
->vme_start
)
6488 entry
->needs_copy
||
6489 entry
->is_sub_map
||
6490 VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
6491 rc
= KERN_INVALID_ARGUMENT
;
6495 object
= VME_OBJECT(entry
);
6496 offset
= VME_OFFSET(entry
);
6497 /* need exclusive lock to update m->dirty */
6498 if (entry
->protection
& VM_PROT_WRITE
) {
6499 vm_object_lock(object
);
6501 vm_object_lock_shared(object
);
6503 m
= vm_page_lookup(object
, offset
);
6504 assert(m
!= VM_PAGE_NULL
);
6505 assert(VM_PAGE_WIRED(m
));
6506 if (m
!= VM_PAGE_NULL
&& VM_PAGE_WIRED(m
)) {
6507 *physpage_p
= VM_PAGE_GET_PHYS_PAGE(m
);
6508 if (entry
->protection
& VM_PROT_WRITE
) {
6509 vm_object_lock_assert_exclusive(
6511 m
->vmp_dirty
= TRUE
;
6514 /* not already wired !? */
6517 vm_object_unlock(object
);
6520 /* map was not unlocked: no need to relookup */
6521 entry
= entry
->vme_next
;
6522 s
= entry
->vme_start
;
6527 * Unwired entry or wire request transmitted via submap
6531 * Wiring would copy the pages to the shadow object.
6532 * The shadow object would not be code-signed so
6533 * attempting to execute code from these copied pages
6534 * would trigger a code-signing violation.
6537 if ((entry
->protection
& VM_PROT_EXECUTE
)
6538 #if !CONFIG_EMBEDDED
6540 map
!= kernel_map
&&
6541 cs_process_enforcement(NULL
)
6542 #endif /* !CONFIG_EMBEDDED */
6545 printf("pid %d[%s] wiring executable range from "
6546 "0x%llx to 0x%llx: rejected to preserve "
6549 (current_task()->bsd_info
6550 ? proc_name_address(current_task()->bsd_info
)
6552 (uint64_t) entry
->vme_start
,
6553 (uint64_t) entry
->vme_end
);
6554 #endif /* MACH_ASSERT */
6555 DTRACE_VM2(cs_executable_wire
,
6556 uint64_t, (uint64_t)entry
->vme_start
,
6557 uint64_t, (uint64_t)entry
->vme_end
);
6558 cs_executable_wire
++;
6559 rc
= KERN_PROTECTION_FAILURE
;
6564 * Perform actions of vm_map_lookup that need the write
6565 * lock on the map: create a shadow object for a
6566 * copy-on-write region, or an object for a zero-fill
6569 size
= entry
->vme_end
- entry
->vme_start
;
6571 * If wiring a copy-on-write page, we need to copy it now
6572 * even if we're only (currently) requesting read access.
6573 * This is aggressive, but once it's wired we can't move it.
6575 if (entry
->needs_copy
) {
6576 if (wire_and_extract
) {
6578 * We're supposed to share with the original
6579 * provider so should not be "needs_copy"
6581 rc
= KERN_INVALID_ARGUMENT
;
6585 VME_OBJECT_SHADOW(entry
, size
);
6586 entry
->needs_copy
= FALSE
;
6587 } else if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
6588 if (wire_and_extract
) {
6590 * We're supposed to share with the original
6591 * provider so should already have an object.
6593 rc
= KERN_INVALID_ARGUMENT
;
6596 VME_OBJECT_SET(entry
, vm_object_allocate(size
));
6597 VME_OFFSET_SET(entry
, (vm_object_offset_t
)0);
6598 assert(entry
->use_pmap
);
6601 vm_map_clip_start(map
, entry
, s
);
6602 vm_map_clip_end(map
, entry
, end
);
6604 /* re-compute "e" */
6610 * Check for holes and protection mismatch.
6611 * Holes: Next entry should be contiguous unless this
6612 * is the end of the region.
6613 * Protection: Access requested must be allowed, unless
6614 * wiring is by protection class
6616 if ((entry
->vme_end
< end
) &&
6617 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
6618 (entry
->vme_next
->vme_start
> entry
->vme_end
))) {
6620 rc
= KERN_INVALID_ADDRESS
;
6623 if ((entry
->protection
& access_type
) != access_type
) {
6624 /* found a protection problem */
6625 rc
= KERN_PROTECTION_FAILURE
;
6629 assert(entry
->wired_count
== 0 && entry
->user_wired_count
== 0);
6631 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
)
6634 entry
->in_transition
= TRUE
;
6637 * This entry might get split once we unlock the map.
6638 * In vm_fault_wire(), we need the current range as
6639 * defined by this entry. In order for this to work
6640 * along with a simultaneous clip operation, we make a
6641 * temporary copy of this entry and use that for the
6642 * wiring. Note that the underlying objects do not
6643 * change during a clip.
6648 * The in_transition state guarentees that the entry
6649 * (or entries for this range, if split occured) will be
6650 * there when the map lock is acquired for the second time.
6654 if (!user_wire
&& cur_thread
!= THREAD_NULL
)
6655 interruptible_state
= thread_interrupt_level(THREAD_UNINT
);
6657 interruptible_state
= THREAD_UNINT
;
6660 rc
= vm_fault_wire(map
,
6661 &tmp_entry
, caller_prot
, tag
, map_pmap
, pmap_addr
,
6664 rc
= vm_fault_wire(map
,
6665 &tmp_entry
, caller_prot
, tag
, map
->pmap
,
6666 tmp_entry
.vme_start
,
6669 if (!user_wire
&& cur_thread
!= THREAD_NULL
)
6670 thread_interrupt_level(interruptible_state
);
6674 if (last_timestamp
+1 != map
->timestamp
) {
6676 * Find the entry again. It could have been clipped
6677 * after we unlocked the map.
6679 if (!vm_map_lookup_entry(map
, tmp_entry
.vme_start
,
6681 panic("vm_map_wire: re-lookup failed");
6683 entry
= first_entry
;
6686 last_timestamp
= map
->timestamp
;
6688 while ((entry
!= vm_map_to_entry(map
)) &&
6689 (entry
->vme_start
< tmp_entry
.vme_end
)) {
6690 assert(entry
->in_transition
);
6691 entry
->in_transition
= FALSE
;
6692 if (entry
->needs_wakeup
) {
6693 entry
->needs_wakeup
= FALSE
;
6696 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
6697 subtract_wire_counts(map
, entry
, user_wire
);
6699 entry
= entry
->vme_next
;
6702 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
6706 if ((entry
!= vm_map_to_entry(map
)) && /* we still have entries in the map */
6707 (tmp_entry
.vme_end
!= end
) && /* AND, we are not at the end of the requested range */
6708 (entry
->vme_start
!= tmp_entry
.vme_end
)) { /* AND, the next entry is not contiguous. */
6709 /* found a "new" hole */
6710 s
= tmp_entry
.vme_end
;
6711 rc
= KERN_INVALID_ADDRESS
;
6715 s
= entry
->vme_start
;
6717 } /* end while loop through map entries */
6720 if (rc
== KERN_SUCCESS
) {
6721 /* repair any damage we may have made to the VM map */
6722 vm_map_simplify_range(map
, start
, end
);
6728 * wake up anybody waiting on entries we wired.
6731 vm_map_entry_wakeup(map
);
6733 if (rc
!= KERN_SUCCESS
) {
6734 /* undo what has been wired so far */
6735 vm_map_unwire_nested(map
, start
, s
, user_wire
,
6736 map_pmap
, pmap_addr
);
6747 vm_map_wire_external(
6749 vm_map_offset_t start
,
6750 vm_map_offset_t end
,
6751 vm_prot_t caller_prot
,
6752 boolean_t user_wire
)
6756 kret
= vm_map_wire_nested(map
, start
, end
, caller_prot
, vm_tag_bt(),
6757 user_wire
, (pmap_t
)NULL
, 0, NULL
);
6764 vm_map_offset_t start
,
6765 vm_map_offset_t end
,
6766 vm_prot_t caller_prot
,
6768 boolean_t user_wire
)
6772 kret
= vm_map_wire_nested(map
, start
, end
, caller_prot
, tag
,
6773 user_wire
, (pmap_t
)NULL
, 0, NULL
);
6778 vm_map_wire_and_extract_external(
6780 vm_map_offset_t start
,
6781 vm_prot_t caller_prot
,
6782 boolean_t user_wire
,
6783 ppnum_t
*physpage_p
)
6787 kret
= vm_map_wire_nested(map
,
6789 start
+VM_MAP_PAGE_SIZE(map
),
6796 if (kret
!= KERN_SUCCESS
&&
6797 physpage_p
!= NULL
) {
6804 vm_map_wire_and_extract_kernel(
6806 vm_map_offset_t start
,
6807 vm_prot_t caller_prot
,
6809 boolean_t user_wire
,
6810 ppnum_t
*physpage_p
)
6814 kret
= vm_map_wire_nested(map
,
6816 start
+VM_MAP_PAGE_SIZE(map
),
6823 if (kret
!= KERN_SUCCESS
&&
6824 physpage_p
!= NULL
) {
6833 * Sets the pageability of the specified address range in the target
6834 * as pageable. Regions specified must have been wired previously.
6836 * The map must not be locked, but a reference must remain to the map
6837 * throughout the call.
6839 * Kernel will panic on failures. User unwire ignores holes and
6840 * unwired and intransition entries to avoid losing memory by leaving
6843 static kern_return_t
6844 vm_map_unwire_nested(
6846 vm_map_offset_t start
,
6847 vm_map_offset_t end
,
6848 boolean_t user_wire
,
6850 vm_map_offset_t pmap_addr
)
6852 vm_map_entry_t entry
;
6853 struct vm_map_entry
*first_entry
, tmp_entry
;
6854 boolean_t need_wakeup
;
6855 boolean_t main_map
= FALSE
;
6856 unsigned int last_timestamp
;
6859 if(map_pmap
== NULL
)
6861 last_timestamp
= map
->timestamp
;
6863 VM_MAP_RANGE_CHECK(map
, start
, end
);
6864 assert(page_aligned(start
));
6865 assert(page_aligned(end
));
6866 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
6867 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
6870 /* We unwired what the caller asked for: zero pages */
6872 return KERN_SUCCESS
;
6875 if (vm_map_lookup_entry(map
, start
, &first_entry
)) {
6876 entry
= first_entry
;
6878 * vm_map_clip_start will be done later.
6879 * We don't want to unnest any nested sub maps here !
6884 panic("vm_map_unwire: start not found");
6886 /* Start address is not in map. */
6888 return(KERN_INVALID_ADDRESS
);
6891 if (entry
->superpage_size
) {
6892 /* superpages are always wired */
6894 return KERN_INVALID_ADDRESS
;
6897 need_wakeup
= FALSE
;
6898 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
6899 if (entry
->in_transition
) {
6902 * Another thread is wiring down this entry. Note
6903 * that if it is not for the other thread we would
6904 * be unwiring an unwired entry. This is not
6905 * permitted. If we wait, we will be unwiring memory
6909 * Another thread is unwiring this entry. We did not
6910 * have a reference to it, because if we did, this
6911 * entry will not be getting unwired now.
6916 * This could happen: there could be some
6917 * overlapping vslock/vsunlock operations
6919 * We should probably just wait and retry,
6920 * but then we have to be careful that this
6921 * entry could get "simplified" after
6922 * "in_transition" gets unset and before
6923 * we re-lookup the entry, so we would
6924 * have to re-clip the entry to avoid
6925 * re-unwiring what we have already unwired...
6926 * See vm_map_wire_nested().
6928 * Or we could just ignore "in_transition"
6929 * here and proceed to decement the wired
6930 * count(s) on this entry. That should be fine
6931 * as long as "wired_count" doesn't drop all
6932 * the way to 0 (and we should panic if THAT
6935 panic("vm_map_unwire: in_transition entry");
6938 entry
= entry
->vme_next
;
6942 if (entry
->is_sub_map
) {
6943 vm_map_offset_t sub_start
;
6944 vm_map_offset_t sub_end
;
6945 vm_map_offset_t local_end
;
6948 vm_map_clip_start(map
, entry
, start
);
6949 vm_map_clip_end(map
, entry
, end
);
6951 sub_start
= VME_OFFSET(entry
);
6952 sub_end
= entry
->vme_end
- entry
->vme_start
;
6953 sub_end
+= VME_OFFSET(entry
);
6954 local_end
= entry
->vme_end
;
6955 if(map_pmap
== NULL
) {
6956 if(entry
->use_pmap
) {
6957 pmap
= VME_SUBMAP(entry
)->pmap
;
6958 pmap_addr
= sub_start
;
6963 if (entry
->wired_count
== 0 ||
6964 (user_wire
&& entry
->user_wired_count
== 0)) {
6966 panic("vm_map_unwire: entry is unwired");
6967 entry
= entry
->vme_next
;
6973 * Holes: Next entry should be contiguous unless
6974 * this is the end of the region.
6976 if (((entry
->vme_end
< end
) &&
6977 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
6978 (entry
->vme_next
->vme_start
6979 > entry
->vme_end
)))) {
6981 panic("vm_map_unwire: non-contiguous region");
6983 entry = entry->vme_next;
6988 subtract_wire_counts(map
, entry
, user_wire
);
6990 if (entry
->wired_count
!= 0) {
6991 entry
= entry
->vme_next
;
6995 entry
->in_transition
= TRUE
;
6996 tmp_entry
= *entry
;/* see comment in vm_map_wire() */
6999 * We can unlock the map now. The in_transition state
7000 * guarantees existance of the entry.
7003 vm_map_unwire_nested(VME_SUBMAP(entry
),
7004 sub_start
, sub_end
, user_wire
, pmap
, pmap_addr
);
7007 if (last_timestamp
+1 != map
->timestamp
) {
7009 * Find the entry again. It could have been
7010 * clipped or deleted after we unlocked the map.
7012 if (!vm_map_lookup_entry(map
,
7013 tmp_entry
.vme_start
,
7016 panic("vm_map_unwire: re-lookup failed");
7017 entry
= first_entry
->vme_next
;
7019 entry
= first_entry
;
7021 last_timestamp
= map
->timestamp
;
7024 * clear transition bit for all constituent entries
7025 * that were in the original entry (saved in
7026 * tmp_entry). Also check for waiters.
7028 while ((entry
!= vm_map_to_entry(map
)) &&
7029 (entry
->vme_start
< tmp_entry
.vme_end
)) {
7030 assert(entry
->in_transition
);
7031 entry
->in_transition
= FALSE
;
7032 if (entry
->needs_wakeup
) {
7033 entry
->needs_wakeup
= FALSE
;
7036 entry
= entry
->vme_next
;
7041 vm_map_unwire_nested(VME_SUBMAP(entry
),
7042 sub_start
, sub_end
, user_wire
, map_pmap
,
7046 if (last_timestamp
+1 != map
->timestamp
) {
7048 * Find the entry again. It could have been
7049 * clipped or deleted after we unlocked the map.
7051 if (!vm_map_lookup_entry(map
,
7052 tmp_entry
.vme_start
,
7055 panic("vm_map_unwire: re-lookup failed");
7056 entry
= first_entry
->vme_next
;
7058 entry
= first_entry
;
7060 last_timestamp
= map
->timestamp
;
7065 if ((entry
->wired_count
== 0) ||
7066 (user_wire
&& entry
->user_wired_count
== 0)) {
7068 panic("vm_map_unwire: entry is unwired");
7070 entry
= entry
->vme_next
;
7074 assert(entry
->wired_count
> 0 &&
7075 (!user_wire
|| entry
->user_wired_count
> 0));
7077 vm_map_clip_start(map
, entry
, start
);
7078 vm_map_clip_end(map
, entry
, end
);
7082 * Holes: Next entry should be contiguous unless
7083 * this is the end of the region.
7085 if (((entry
->vme_end
< end
) &&
7086 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
7087 (entry
->vme_next
->vme_start
> entry
->vme_end
)))) {
7090 panic("vm_map_unwire: non-contiguous region");
7091 entry
= entry
->vme_next
;
7095 subtract_wire_counts(map
, entry
, user_wire
);
7097 if (entry
->wired_count
!= 0) {
7098 entry
= entry
->vme_next
;
7102 if(entry
->zero_wired_pages
) {
7103 entry
->zero_wired_pages
= FALSE
;
7106 entry
->in_transition
= TRUE
;
7107 tmp_entry
= *entry
; /* see comment in vm_map_wire() */
7110 * We can unlock the map now. The in_transition state
7111 * guarantees existance of the entry.
7115 vm_fault_unwire(map
,
7116 &tmp_entry
, FALSE
, map_pmap
, pmap_addr
);
7118 vm_fault_unwire(map
,
7119 &tmp_entry
, FALSE
, map
->pmap
,
7120 tmp_entry
.vme_start
);
7124 if (last_timestamp
+1 != map
->timestamp
) {
7126 * Find the entry again. It could have been clipped
7127 * or deleted after we unlocked the map.
7129 if (!vm_map_lookup_entry(map
, tmp_entry
.vme_start
,
7132 panic("vm_map_unwire: re-lookup failed");
7133 entry
= first_entry
->vme_next
;
7135 entry
= first_entry
;
7137 last_timestamp
= map
->timestamp
;
7140 * clear transition bit for all constituent entries that
7141 * were in the original entry (saved in tmp_entry). Also
7142 * check for waiters.
7144 while ((entry
!= vm_map_to_entry(map
)) &&
7145 (entry
->vme_start
< tmp_entry
.vme_end
)) {
7146 assert(entry
->in_transition
);
7147 entry
->in_transition
= FALSE
;
7148 if (entry
->needs_wakeup
) {
7149 entry
->needs_wakeup
= FALSE
;
7152 entry
= entry
->vme_next
;
7157 * We might have fragmented the address space when we wired this
7158 * range of addresses. Attempt to re-coalesce these VM map entries
7159 * with their neighbors now that they're no longer wired.
7160 * Under some circumstances, address space fragmentation can
7161 * prevent VM object shadow chain collapsing, which can cause
7164 vm_map_simplify_range(map
, start
, end
);
7168 * wake up anybody waiting on entries that we have unwired.
7171 vm_map_entry_wakeup(map
);
7172 return(KERN_SUCCESS
);
7179 vm_map_offset_t start
,
7180 vm_map_offset_t end
,
7181 boolean_t user_wire
)
7183 return vm_map_unwire_nested(map
, start
, end
,
7184 user_wire
, (pmap_t
)NULL
, 0);
7189 * vm_map_entry_delete: [ internal use only ]
7191 * Deallocate the given entry from the target map.
7194 vm_map_entry_delete(
7196 vm_map_entry_t entry
)
7198 vm_map_offset_t s
, e
;
7202 s
= entry
->vme_start
;
7204 assert(page_aligned(s
));
7205 assert(page_aligned(e
));
7206 if (entry
->map_aligned
== TRUE
) {
7207 assert(VM_MAP_PAGE_ALIGNED(s
, VM_MAP_PAGE_MASK(map
)));
7208 assert(VM_MAP_PAGE_ALIGNED(e
, VM_MAP_PAGE_MASK(map
)));
7210 assert(entry
->wired_count
== 0);
7211 assert(entry
->user_wired_count
== 0);
7212 assert(!entry
->permanent
);
7214 if (entry
->is_sub_map
) {
7216 submap
= VME_SUBMAP(entry
);
7219 object
= VME_OBJECT(entry
);
7222 vm_map_store_entry_unlink(map
, entry
);
7225 vm_map_entry_dispose(map
, entry
);
7229 * Deallocate the object only after removing all
7230 * pmap entries pointing to its pages.
7233 vm_map_deallocate(submap
);
7235 vm_object_deallocate(object
);
7240 vm_map_submap_pmap_clean(
7242 vm_map_offset_t start
,
7243 vm_map_offset_t end
,
7245 vm_map_offset_t offset
)
7247 vm_map_offset_t submap_start
;
7248 vm_map_offset_t submap_end
;
7249 vm_map_size_t remove_size
;
7250 vm_map_entry_t entry
;
7252 submap_end
= offset
+ (end
- start
);
7253 submap_start
= offset
;
7255 vm_map_lock_read(sub_map
);
7256 if(vm_map_lookup_entry(sub_map
, offset
, &entry
)) {
7258 remove_size
= (entry
->vme_end
- entry
->vme_start
);
7259 if(offset
> entry
->vme_start
)
7260 remove_size
-= offset
- entry
->vme_start
;
7263 if(submap_end
< entry
->vme_end
) {
7265 entry
->vme_end
- submap_end
;
7267 if(entry
->is_sub_map
) {
7268 vm_map_submap_pmap_clean(
7271 start
+ remove_size
,
7276 if((map
->mapped_in_other_pmaps
) && (map
->map_refcnt
)
7277 && (VME_OBJECT(entry
) != NULL
)) {
7278 vm_object_pmap_protect_options(
7280 (VME_OFFSET(entry
) +
7287 PMAP_OPTIONS_REMOVE
);
7289 pmap_remove(map
->pmap
,
7291 (addr64_t
)(start
+ remove_size
));
7296 entry
= entry
->vme_next
;
7298 while((entry
!= vm_map_to_entry(sub_map
))
7299 && (entry
->vme_start
< submap_end
)) {
7300 remove_size
= (entry
->vme_end
- entry
->vme_start
);
7301 if(submap_end
< entry
->vme_end
) {
7302 remove_size
-= entry
->vme_end
- submap_end
;
7304 if(entry
->is_sub_map
) {
7305 vm_map_submap_pmap_clean(
7307 (start
+ entry
->vme_start
) - offset
,
7308 ((start
+ entry
->vme_start
) - offset
) + remove_size
,
7312 if((map
->mapped_in_other_pmaps
) && (map
->map_refcnt
)
7313 && (VME_OBJECT(entry
) != NULL
)) {
7314 vm_object_pmap_protect_options(
7321 PMAP_OPTIONS_REMOVE
);
7323 pmap_remove(map
->pmap
,
7324 (addr64_t
)((start
+ entry
->vme_start
)
7326 (addr64_t
)(((start
+ entry
->vme_start
)
7327 - offset
) + remove_size
));
7330 entry
= entry
->vme_next
;
7332 vm_map_unlock_read(sub_map
);
7337 * virt_memory_guard_ast:
7339 * Handle the AST callout for a virtual memory guard.
7340 * raise an EXC_GUARD exception and terminate the task
7341 * if configured to do so.
7344 virt_memory_guard_ast(
7346 mach_exception_data_type_t code
,
7347 mach_exception_data_type_t subcode
)
7349 task_t task
= thread
->task
;
7350 assert(task
!= kernel_task
);
7351 assert(task
== current_task());
7354 behavior
= task
->task_exc_guard
;
7356 /* Is delivery enabled */
7357 if ((behavior
& TASK_EXC_GUARD_VM_DELIVER
) == 0) {
7361 /* If only once, make sure we're that once */
7362 while (behavior
& TASK_EXC_GUARD_VM_ONCE
) {
7363 uint32_t new_behavior
= behavior
& ~TASK_EXC_GUARD_VM_DELIVER
;
7365 if (OSCompareAndSwap(behavior
, new_behavior
, &task
->task_exc_guard
)) {
7368 behavior
= task
->task_exc_guard
;
7369 if ((behavior
& TASK_EXC_GUARD_VM_DELIVER
) == 0) {
7374 /* Raise exception via corpse fork or synchronously */
7375 if ((task
->task_exc_guard
& TASK_EXC_GUARD_VM_CORPSE
) &&
7376 (task
->task_exc_guard
& TASK_EXC_GUARD_VM_FATAL
) == 0) {
7377 task_violated_guard(code
, subcode
, NULL
);
7379 task_exception_notify(EXC_GUARD
, code
, subcode
);
7382 /* Terminate the task if desired */
7383 if (task
->task_exc_guard
& TASK_EXC_GUARD_VM_FATAL
) {
7384 task_bsdtask_kill(current_task());
7389 * vm_map_guard_exception:
7391 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7393 * Right now, we do this when we find nothing mapped, or a
7394 * gap in the mapping when a user address space deallocate
7395 * was requested. We report the address of the first gap found.
7398 vm_map_guard_exception(
7399 vm_map_offset_t gap_start
,
7402 mach_exception_code_t code
= 0;
7403 unsigned int guard_type
= GUARD_TYPE_VIRT_MEMORY
;
7404 unsigned int target
= 0; /* should we pass in pid associated with map? */
7405 mach_exception_data_type_t subcode
= (uint64_t)gap_start
;
7407 /* Can't deliver exceptions to kernel task */
7408 if (current_task() == kernel_task
)
7411 EXC_GUARD_ENCODE_TYPE(code
, guard_type
);
7412 EXC_GUARD_ENCODE_FLAVOR(code
, reason
);
7413 EXC_GUARD_ENCODE_TARGET(code
, target
);
7414 thread_guard_violation(current_thread(), code
, subcode
);
7418 * vm_map_delete: [ internal use only ]
7420 * Deallocates the given address range from the target map.
7421 * Removes all user wirings. Unwires one kernel wiring if
7422 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7423 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7424 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7426 * This routine is called with map locked and leaves map locked.
7428 static kern_return_t
7431 vm_map_offset_t start
,
7432 vm_map_offset_t end
,
7436 vm_map_entry_t entry
, next
;
7437 struct vm_map_entry
*first_entry
, tmp_entry
;
7440 boolean_t need_wakeup
;
7441 unsigned int last_timestamp
= ~0; /* unlikely value */
7443 vm_map_offset_t gap_start
;
7444 vm_map_offset_t save_start
= start
;
7445 vm_map_offset_t save_end
= end
;
7446 const vm_map_offset_t FIND_GAP
= 1; /* a not page aligned value */
7447 const vm_map_offset_t GAPS_OK
= 2; /* a different not page aligned value */
7449 if (map
!= kernel_map
&& !(flags
& VM_MAP_REMOVE_GAPS_OK
))
7450 gap_start
= FIND_GAP
;
7452 gap_start
= GAPS_OK
;
7454 interruptible
= (flags
& VM_MAP_REMOVE_INTERRUPTIBLE
) ?
7455 THREAD_ABORTSAFE
: THREAD_UNINT
;
7458 * All our DMA I/O operations in IOKit are currently done by
7459 * wiring through the map entries of the task requesting the I/O.
7460 * Because of this, we must always wait for kernel wirings
7461 * to go away on the entries before deleting them.
7463 * Any caller who wants to actually remove a kernel wiring
7464 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7465 * properly remove one wiring instead of blasting through
7468 flags
|= VM_MAP_REMOVE_WAIT_FOR_KWIRE
;
7472 * Find the start of the region, and clip it
7474 if (vm_map_lookup_entry(map
, start
, &first_entry
)) {
7475 entry
= first_entry
;
7476 if (map
== kalloc_map
&&
7477 (entry
->vme_start
!= start
||
7478 entry
->vme_end
!= end
)) {
7479 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7480 "mismatched entry %p [0x%llx:0x%llx]\n",
7485 (uint64_t)entry
->vme_start
,
7486 (uint64_t)entry
->vme_end
);
7490 * If in a superpage, extend the range to include the start of the mapping.
7492 if (entry
->superpage_size
&& (start
& ~SUPERPAGE_MASK
)) {
7493 start
= SUPERPAGE_ROUND_DOWN(start
);
7497 if (start
== entry
->vme_start
) {
7499 * No need to clip. We don't want to cause
7500 * any unnecessary unnesting in this case...
7503 if ((flags
& VM_MAP_REMOVE_NO_MAP_ALIGN
) &&
7504 entry
->map_aligned
&&
7505 !VM_MAP_PAGE_ALIGNED(
7507 VM_MAP_PAGE_MASK(map
))) {
7509 * The entry will no longer be
7510 * map-aligned after clipping
7511 * and the caller said it's OK.
7513 entry
->map_aligned
= FALSE
;
7515 if (map
== kalloc_map
) {
7516 panic("vm_map_delete(%p,0x%llx,0x%llx):"
7517 " clipping %p at 0x%llx\n",
7524 vm_map_clip_start(map
, entry
, start
);
7528 * Fix the lookup hint now, rather than each
7529 * time through the loop.
7531 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
7535 if (map
->pmap
== kernel_pmap
&&
7536 map
->map_refcnt
!= 0) {
7537 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7538 "no map entry at 0x%llx\n",
7544 entry
= first_entry
->vme_next
;
7545 if (gap_start
== FIND_GAP
)
7550 if (entry
->superpage_size
)
7551 end
= SUPERPAGE_ROUND_UP(end
);
7553 need_wakeup
= FALSE
;
7555 * Step through all entries in this region
7557 s
= entry
->vme_start
;
7558 while ((entry
!= vm_map_to_entry(map
)) && (s
< end
)) {
7560 * At this point, we have deleted all the memory entries
7561 * between "start" and "s". We still need to delete
7562 * all memory entries between "s" and "end".
7563 * While we were blocked and the map was unlocked, some
7564 * new memory entries could have been re-allocated between
7565 * "start" and "s" and we don't want to mess with those.
7566 * Some of those entries could even have been re-assembled
7567 * with an entry after "s" (in vm_map_simplify_entry()), so
7568 * we may have to vm_map_clip_start() again.
7571 if (entry
->vme_start
>= s
) {
7573 * This entry starts on or after "s"
7574 * so no need to clip its start.
7578 * This entry has been re-assembled by a
7579 * vm_map_simplify_entry(). We need to
7580 * re-clip its start.
7582 if ((flags
& VM_MAP_REMOVE_NO_MAP_ALIGN
) &&
7583 entry
->map_aligned
&&
7584 !VM_MAP_PAGE_ALIGNED(s
,
7585 VM_MAP_PAGE_MASK(map
))) {
7587 * The entry will no longer be map-aligned
7588 * after clipping and the caller said it's OK.
7590 entry
->map_aligned
= FALSE
;
7592 if (map
== kalloc_map
) {
7593 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7594 "clipping %p at 0x%llx\n",
7601 vm_map_clip_start(map
, entry
, s
);
7603 if (entry
->vme_end
<= end
) {
7605 * This entry is going away completely, so no need
7606 * to clip and possibly cause an unnecessary unnesting.
7609 if ((flags
& VM_MAP_REMOVE_NO_MAP_ALIGN
) &&
7610 entry
->map_aligned
&&
7611 !VM_MAP_PAGE_ALIGNED(end
,
7612 VM_MAP_PAGE_MASK(map
))) {
7614 * The entry will no longer be map-aligned
7615 * after clipping and the caller said it's OK.
7617 entry
->map_aligned
= FALSE
;
7619 if (map
== kalloc_map
) {
7620 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7621 "clipping %p at 0x%llx\n",
7628 vm_map_clip_end(map
, entry
, end
);
7631 if (entry
->permanent
) {
7632 if (map
->pmap
== kernel_pmap
) {
7633 panic("%s(%p,0x%llx,0x%llx): "
7634 "attempt to remove permanent "
7636 "%p [0x%llx:0x%llx]\n",
7642 (uint64_t) entry
->vme_start
,
7643 (uint64_t) entry
->vme_end
);
7644 } else if (flags
& VM_MAP_REMOVE_IMMUTABLE
) {
7645 // printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7646 entry
->permanent
= FALSE
;
7648 } else if ((entry
->protection
& VM_PROT_EXECUTE
) && !pmap_cs_enforced(map
->pmap
)) {
7649 entry
->permanent
= FALSE
;
7651 printf("%d[%s] %s(0x%llx,0x%llx): "
7652 "pmap_cs disabled, allowing for permanent executable entry [0x%llx:0x%llx] "
7655 (current_task()->bsd_info
7656 ? proc_name_address(current_task()->bsd_info
)
7661 (uint64_t)entry
->vme_start
,
7662 (uint64_t)entry
->vme_end
,
7664 entry
->max_protection
);
7667 if (vm_map_executable_immutable_verbose
) {
7668 printf("%d[%s] %s(0x%llx,0x%llx): "
7669 "permanent entry [0x%llx:0x%llx] "
7672 (current_task()->bsd_info
7673 ? proc_name_address(current_task()->bsd_info
)
7678 (uint64_t)entry
->vme_start
,
7679 (uint64_t)entry
->vme_end
,
7681 entry
->max_protection
);
7684 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7686 DTRACE_VM5(vm_map_delete_permanent
,
7687 vm_map_offset_t
, entry
->vme_start
,
7688 vm_map_offset_t
, entry
->vme_end
,
7689 vm_prot_t
, entry
->protection
,
7690 vm_prot_t
, entry
->max_protection
,
7691 int, VME_ALIAS(entry
));
7696 if (entry
->in_transition
) {
7697 wait_result_t wait_result
;
7700 * Another thread is wiring/unwiring this entry.
7701 * Let the other thread know we are waiting.
7703 assert(s
== entry
->vme_start
);
7704 entry
->needs_wakeup
= TRUE
;
7707 * wake up anybody waiting on entries that we have
7708 * already unwired/deleted.
7711 vm_map_entry_wakeup(map
);
7712 need_wakeup
= FALSE
;
7715 wait_result
= vm_map_entry_wait(map
, interruptible
);
7717 if (interruptible
&&
7718 wait_result
== THREAD_INTERRUPTED
) {
7720 * We do not clear the needs_wakeup flag,
7721 * since we cannot tell if we were the only one.
7723 return KERN_ABORTED
;
7727 * The entry could have been clipped or it
7728 * may not exist anymore. Look it up again.
7730 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
7732 * User: use the next entry
7734 if (gap_start
== FIND_GAP
)
7736 entry
= first_entry
->vme_next
;
7737 s
= entry
->vme_start
;
7739 entry
= first_entry
;
7740 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
7742 last_timestamp
= map
->timestamp
;
7744 } /* end in_transition */
7746 if (entry
->wired_count
) {
7747 boolean_t user_wire
;
7749 user_wire
= entry
->user_wired_count
> 0;
7752 * Remove a kernel wiring if requested
7754 if (flags
& VM_MAP_REMOVE_KUNWIRE
) {
7755 entry
->wired_count
--;
7759 * Remove all user wirings for proper accounting
7761 if (entry
->user_wired_count
> 0) {
7762 while (entry
->user_wired_count
)
7763 subtract_wire_counts(map
, entry
, user_wire
);
7766 if (entry
->wired_count
!= 0) {
7767 assert(map
!= kernel_map
);
7769 * Cannot continue. Typical case is when
7770 * a user thread has physical io pending on
7771 * on this page. Either wait for the
7772 * kernel wiring to go away or return an
7775 if (flags
& VM_MAP_REMOVE_WAIT_FOR_KWIRE
) {
7776 wait_result_t wait_result
;
7778 assert(s
== entry
->vme_start
);
7779 entry
->needs_wakeup
= TRUE
;
7780 wait_result
= vm_map_entry_wait(map
,
7783 if (interruptible
&&
7784 wait_result
== THREAD_INTERRUPTED
) {
7786 * We do not clear the
7787 * needs_wakeup flag, since we
7788 * cannot tell if we were the
7791 return KERN_ABORTED
;
7795 * The entry could have been clipped or
7796 * it may not exist anymore. Look it
7799 if (!vm_map_lookup_entry(map
, s
,
7801 assert(map
!= kernel_map
);
7803 * User: use the next entry
7805 if (gap_start
== FIND_GAP
)
7807 entry
= first_entry
->vme_next
;
7808 s
= entry
->vme_start
;
7810 entry
= first_entry
;
7811 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
7813 last_timestamp
= map
->timestamp
;
7817 return KERN_FAILURE
;
7821 entry
->in_transition
= TRUE
;
7823 * copy current entry. see comment in vm_map_wire()
7826 assert(s
== entry
->vme_start
);
7829 * We can unlock the map now. The in_transition
7830 * state guarentees existance of the entry.
7834 if (tmp_entry
.is_sub_map
) {
7836 vm_map_offset_t sub_start
, sub_end
;
7838 vm_map_offset_t pmap_addr
;
7841 sub_map
= VME_SUBMAP(&tmp_entry
);
7842 sub_start
= VME_OFFSET(&tmp_entry
);
7843 sub_end
= sub_start
+ (tmp_entry
.vme_end
-
7844 tmp_entry
.vme_start
);
7845 if (tmp_entry
.use_pmap
) {
7846 pmap
= sub_map
->pmap
;
7847 pmap_addr
= tmp_entry
.vme_start
;
7850 pmap_addr
= tmp_entry
.vme_start
;
7852 (void) vm_map_unwire_nested(sub_map
,
7858 if (VME_OBJECT(&tmp_entry
) == kernel_object
) {
7859 pmap_protect_options(
7861 tmp_entry
.vme_start
,
7864 PMAP_OPTIONS_REMOVE
,
7867 vm_fault_unwire(map
, &tmp_entry
,
7868 VME_OBJECT(&tmp_entry
) == kernel_object
,
7869 map
->pmap
, tmp_entry
.vme_start
);
7874 if (last_timestamp
+1 != map
->timestamp
) {
7876 * Find the entry again. It could have
7877 * been clipped after we unlocked the map.
7879 if (!vm_map_lookup_entry(map
, s
, &first_entry
)){
7880 assert((map
!= kernel_map
) &&
7881 (!entry
->is_sub_map
));
7882 if (gap_start
== FIND_GAP
)
7884 first_entry
= first_entry
->vme_next
;
7885 s
= first_entry
->vme_start
;
7887 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
7890 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
7891 first_entry
= entry
;
7894 last_timestamp
= map
->timestamp
;
7896 entry
= first_entry
;
7897 while ((entry
!= vm_map_to_entry(map
)) &&
7898 (entry
->vme_start
< tmp_entry
.vme_end
)) {
7899 assert(entry
->in_transition
);
7900 entry
->in_transition
= FALSE
;
7901 if (entry
->needs_wakeup
) {
7902 entry
->needs_wakeup
= FALSE
;
7905 entry
= entry
->vme_next
;
7908 * We have unwired the entry(s). Go back and
7911 entry
= first_entry
;
7915 /* entry is unwired */
7916 assert(entry
->wired_count
== 0);
7917 assert(entry
->user_wired_count
== 0);
7919 assert(s
== entry
->vme_start
);
7921 if (flags
& VM_MAP_REMOVE_NO_PMAP_CLEANUP
) {
7923 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
7924 * vm_map_delete(), some map entries might have been
7925 * transferred to a "zap_map", which doesn't have a
7926 * pmap. The original pmap has already been flushed
7927 * in the vm_map_delete() call targeting the original
7928 * map, but when we get to destroying the "zap_map",
7929 * we don't have any pmap to flush, so let's just skip
7932 } else if (entry
->is_sub_map
) {
7933 if (entry
->use_pmap
) {
7934 #ifndef NO_NESTED_PMAP
7937 if (flags
& VM_MAP_REMOVE_NO_UNNESTING
) {
7939 * This is the final cleanup of the
7940 * address space being terminated.
7941 * No new mappings are expected and
7942 * we don't really need to unnest the
7943 * shared region (and lose the "global"
7944 * pmap mappings, if applicable).
7946 * Tell the pmap layer that we're
7947 * "clean" wrt nesting.
7949 pmap_flags
= PMAP_UNNEST_CLEAN
;
7952 * We're unmapping part of the nested
7953 * shared region, so we can't keep the
7958 pmap_unnest_options(
7960 (addr64_t
)entry
->vme_start
,
7961 entry
->vme_end
- entry
->vme_start
,
7963 #endif /* NO_NESTED_PMAP */
7964 if ((map
->mapped_in_other_pmaps
) && (map
->map_refcnt
)) {
7965 /* clean up parent map/maps */
7966 vm_map_submap_pmap_clean(
7967 map
, entry
->vme_start
,
7973 vm_map_submap_pmap_clean(
7974 map
, entry
->vme_start
, entry
->vme_end
,
7978 } else if (VME_OBJECT(entry
) != kernel_object
&&
7979 VME_OBJECT(entry
) != compressor_object
) {
7980 object
= VME_OBJECT(entry
);
7981 if ((map
->mapped_in_other_pmaps
) && (map
->map_refcnt
)) {
7982 vm_object_pmap_protect_options(
7983 object
, VME_OFFSET(entry
),
7984 entry
->vme_end
- entry
->vme_start
,
7988 PMAP_OPTIONS_REMOVE
);
7989 } else if ((VME_OBJECT(entry
) != VM_OBJECT_NULL
) ||
7990 (map
->pmap
== kernel_pmap
)) {
7991 /* Remove translations associated
7992 * with this range unless the entry
7993 * does not have an object, or
7994 * it's the kernel map or a descendant
7995 * since the platform could potentially
7996 * create "backdoor" mappings invisible
7997 * to the VM. It is expected that
7998 * objectless, non-kernel ranges
7999 * do not have such VM invisible
8002 pmap_remove_options(map
->pmap
,
8003 (addr64_t
)entry
->vme_start
,
8004 (addr64_t
)entry
->vme_end
,
8005 PMAP_OPTIONS_REMOVE
);
8009 if (entry
->iokit_acct
) {
8010 /* alternate accounting */
8011 DTRACE_VM4(vm_map_iokit_unmapped_region
,
8013 vm_map_offset_t
, entry
->vme_start
,
8014 vm_map_offset_t
, entry
->vme_end
,
8015 int, VME_ALIAS(entry
));
8016 vm_map_iokit_unmapped_region(map
,
8019 entry
->iokit_acct
= FALSE
;
8020 entry
->use_pmap
= FALSE
;
8024 * All pmap mappings for this map entry must have been
8028 assert(vm_map_pmap_is_empty(map
,
8033 next
= entry
->vme_next
;
8035 if (map
->pmap
== kernel_pmap
&&
8036 map
->map_refcnt
!= 0 &&
8037 entry
->vme_end
< end
&&
8038 (next
== vm_map_to_entry(map
) ||
8039 next
->vme_start
!= entry
->vme_end
)) {
8040 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8041 "hole after %p at 0x%llx\n",
8046 (uint64_t)entry
->vme_end
);
8050 * If the desired range didn't end with "entry", then there is a gap if
8051 * we wrapped around to the start of the map or if "entry" and "next"
8052 * aren't contiguous.
8054 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8055 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8057 if (gap_start
== FIND_GAP
&&
8058 vm_map_round_page(entry
->vme_end
, VM_MAP_PAGE_MASK(map
)) < end
&&
8059 (next
== vm_map_to_entry(map
) || entry
->vme_end
!= next
->vme_start
)) {
8060 gap_start
= entry
->vme_end
;
8062 s
= next
->vme_start
;
8063 last_timestamp
= map
->timestamp
;
8065 if (entry
->permanent
) {
8067 * A permanent entry can not be removed, so leave it
8068 * in place but remove all access permissions.
8070 entry
->protection
= VM_PROT_NONE
;
8071 entry
->max_protection
= VM_PROT_NONE
;
8072 } else if ((flags
& VM_MAP_REMOVE_SAVE_ENTRIES
) &&
8073 zap_map
!= VM_MAP_NULL
) {
8074 vm_map_size_t entry_size
;
8076 * The caller wants to save the affected VM map entries
8077 * into the "zap_map". The caller will take care of
8080 /* unlink the entry from "map" ... */
8081 vm_map_store_entry_unlink(map
, entry
);
8082 /* ... and add it to the end of the "zap_map" */
8083 vm_map_store_entry_link(zap_map
,
8084 vm_map_last_entry(zap_map
),
8086 VM_MAP_KERNEL_FLAGS_NONE
);
8087 entry_size
= entry
->vme_end
- entry
->vme_start
;
8088 map
->size
-= entry_size
;
8089 zap_map
->size
+= entry_size
;
8090 /* we didn't unlock the map, so no timestamp increase */
8093 vm_map_entry_delete(map
, entry
);
8094 /* vm_map_entry_delete unlocks the map */
8100 if(entry
== vm_map_to_entry(map
)) {
8103 if (last_timestamp
+ 1 != map
->timestamp
) {
8105 * We are responsible for deleting everything
8106 * from the given space. If someone has interfered,
8107 * we pick up where we left off. Back fills should
8108 * be all right for anyone, except map_delete, and
8109 * we have to assume that the task has been fully
8110 * disabled before we get here
8112 if (!vm_map_lookup_entry(map
, s
, &entry
)){
8113 entry
= entry
->vme_next
;
8116 * Nothing found for s. If we weren't already done, then there is a gap.
8118 if (gap_start
== FIND_GAP
&& s
< end
)
8120 s
= entry
->vme_start
;
8122 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
8125 * others can not only allocate behind us, we can
8126 * also see coalesce while we don't have the map lock
8128 if (entry
== vm_map_to_entry(map
)) {
8132 last_timestamp
= map
->timestamp
;
8135 if (map
->wait_for_space
)
8136 thread_wakeup((event_t
) map
);
8138 * wake up anybody waiting on entries that we have already deleted.
8141 vm_map_entry_wakeup(map
);
8143 if (gap_start
!= FIND_GAP
&& gap_start
!= GAPS_OK
) {
8144 DTRACE_VM3(kern_vm_deallocate_gap
,
8145 vm_map_offset_t
, gap_start
,
8146 vm_map_offset_t
, save_start
,
8147 vm_map_offset_t
, save_end
);
8148 if (!(flags
& VM_MAP_REMOVE_GAPS_OK
)) {
8149 #if defined(DEVELOPMENT) || defined(DEBUG)
8150 /* log just once if not checking, otherwise log each one */
8151 if (!map
->warned_delete_gap
||
8152 (task_exc_guard_default
& TASK_EXC_GUARD_VM_ALL
) != 0) {
8153 printf("vm_map_delete: map %p [%p...%p] nothing at %p\n",
8154 (void *)map
, (void *)save_start
, (void *)save_end
,
8156 if (!map
->warned_delete_gap
) {
8157 map
->warned_delete_gap
= 1;
8161 vm_map_guard_exception(gap_start
, kGUARD_EXC_DEALLOC_GAP
);
8165 return KERN_SUCCESS
;
8171 * Remove the given address range from the target map.
8172 * This is the exported form of vm_map_delete.
8177 vm_map_offset_t start
,
8178 vm_map_offset_t end
,
8181 kern_return_t result
;
8184 VM_MAP_RANGE_CHECK(map
, start
, end
);
8186 * For the zone_map, the kernel controls the allocation/freeing of memory.
8187 * Any free to the zone_map should be within the bounds of the map and
8188 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8189 * free to the zone_map into a no-op, there is a problem and we should
8192 if ((map
== zone_map
) && (start
== end
))
8193 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start
);
8194 result
= vm_map_delete(map
, start
, end
, flags
, VM_MAP_NULL
);
8201 * vm_map_remove_locked:
8203 * Remove the given address range from the target locked map.
8204 * This is the exported form of vm_map_delete.
8207 vm_map_remove_locked(
8209 vm_map_offset_t start
,
8210 vm_map_offset_t end
,
8213 kern_return_t result
;
8215 VM_MAP_RANGE_CHECK(map
, start
, end
);
8216 result
= vm_map_delete(map
, start
, end
, flags
, VM_MAP_NULL
);
8222 * Routine: vm_map_copy_allocate
8225 * Allocates and initializes a map copy object.
8227 static vm_map_copy_t
8228 vm_map_copy_allocate(void)
8230 vm_map_copy_t new_copy
;
8232 new_copy
= zalloc(vm_map_copy_zone
);
8233 bzero(new_copy
, sizeof (*new_copy
));
8234 new_copy
->c_u
.hdr
.rb_head_store
.rbh_root
= (void*)(int)SKIP_RB_TREE
;
8235 vm_map_copy_first_entry(new_copy
) = vm_map_copy_to_entry(new_copy
);
8236 vm_map_copy_last_entry(new_copy
) = vm_map_copy_to_entry(new_copy
);
8241 * Routine: vm_map_copy_discard
8244 * Dispose of a map copy object (returned by
8248 vm_map_copy_discard(
8251 if (copy
== VM_MAP_COPY_NULL
)
8254 switch (copy
->type
) {
8255 case VM_MAP_COPY_ENTRY_LIST
:
8256 while (vm_map_copy_first_entry(copy
) !=
8257 vm_map_copy_to_entry(copy
)) {
8258 vm_map_entry_t entry
= vm_map_copy_first_entry(copy
);
8260 vm_map_copy_entry_unlink(copy
, entry
);
8261 if (entry
->is_sub_map
) {
8262 vm_map_deallocate(VME_SUBMAP(entry
));
8264 vm_object_deallocate(VME_OBJECT(entry
));
8266 vm_map_copy_entry_dispose(copy
, entry
);
8269 case VM_MAP_COPY_OBJECT
:
8270 vm_object_deallocate(copy
->cpy_object
);
8272 case VM_MAP_COPY_KERNEL_BUFFER
:
8275 * The vm_map_copy_t and possibly the data buffer were
8276 * allocated by a single call to kalloc(), i.e. the
8277 * vm_map_copy_t was not allocated out of the zone.
8279 if (copy
->size
> msg_ool_size_small
|| copy
->offset
)
8280 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8281 (long long)copy
->size
, (long long)copy
->offset
);
8282 kfree(copy
, copy
->size
+ cpy_kdata_hdr_sz
);
8285 zfree(vm_map_copy_zone
, copy
);
8289 * Routine: vm_map_copy_copy
8292 * Move the information in a map copy object to
8293 * a new map copy object, leaving the old one
8296 * This is used by kernel routines that need
8297 * to look at out-of-line data (in copyin form)
8298 * before deciding whether to return SUCCESS.
8299 * If the routine returns FAILURE, the original
8300 * copy object will be deallocated; therefore,
8301 * these routines must make a copy of the copy
8302 * object and leave the original empty so that
8303 * deallocation will not fail.
8309 vm_map_copy_t new_copy
;
8311 if (copy
== VM_MAP_COPY_NULL
)
8312 return VM_MAP_COPY_NULL
;
8315 * Allocate a new copy object, and copy the information
8316 * from the old one into it.
8319 new_copy
= (vm_map_copy_t
) zalloc(vm_map_copy_zone
);
8322 if (copy
->type
== VM_MAP_COPY_ENTRY_LIST
) {
8324 * The links in the entry chain must be
8325 * changed to point to the new copy object.
8327 vm_map_copy_first_entry(copy
)->vme_prev
8328 = vm_map_copy_to_entry(new_copy
);
8329 vm_map_copy_last_entry(copy
)->vme_next
8330 = vm_map_copy_to_entry(new_copy
);
8334 * Change the old copy object into one that contains
8335 * nothing to be deallocated.
8337 copy
->type
= VM_MAP_COPY_OBJECT
;
8338 copy
->cpy_object
= VM_OBJECT_NULL
;
8341 * Return the new object.
8346 static kern_return_t
8347 vm_map_overwrite_submap_recurse(
8349 vm_map_offset_t dst_addr
,
8350 vm_map_size_t dst_size
)
8352 vm_map_offset_t dst_end
;
8353 vm_map_entry_t tmp_entry
;
8354 vm_map_entry_t entry
;
8355 kern_return_t result
;
8356 boolean_t encountered_sub_map
= FALSE
;
8361 * Verify that the destination is all writeable
8362 * initially. We have to trunc the destination
8363 * address and round the copy size or we'll end up
8364 * splitting entries in strange ways.
8367 dst_end
= vm_map_round_page(dst_addr
+ dst_size
,
8368 VM_MAP_PAGE_MASK(dst_map
));
8369 vm_map_lock(dst_map
);
8372 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &tmp_entry
)) {
8373 vm_map_unlock(dst_map
);
8374 return(KERN_INVALID_ADDRESS
);
8377 vm_map_clip_start(dst_map
,
8379 vm_map_trunc_page(dst_addr
,
8380 VM_MAP_PAGE_MASK(dst_map
)));
8381 if (tmp_entry
->is_sub_map
) {
8382 /* clipping did unnest if needed */
8383 assert(!tmp_entry
->use_pmap
);
8386 for (entry
= tmp_entry
;;) {
8387 vm_map_entry_t next
;
8389 next
= entry
->vme_next
;
8390 while(entry
->is_sub_map
) {
8391 vm_map_offset_t sub_start
;
8392 vm_map_offset_t sub_end
;
8393 vm_map_offset_t local_end
;
8395 if (entry
->in_transition
) {
8397 * Say that we are waiting, and wait for entry.
8399 entry
->needs_wakeup
= TRUE
;
8400 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8405 encountered_sub_map
= TRUE
;
8406 sub_start
= VME_OFFSET(entry
);
8408 if(entry
->vme_end
< dst_end
)
8409 sub_end
= entry
->vme_end
;
8412 sub_end
-= entry
->vme_start
;
8413 sub_end
+= VME_OFFSET(entry
);
8414 local_end
= entry
->vme_end
;
8415 vm_map_unlock(dst_map
);
8417 result
= vm_map_overwrite_submap_recurse(
8420 sub_end
- sub_start
);
8422 if(result
!= KERN_SUCCESS
)
8424 if (dst_end
<= entry
->vme_end
)
8425 return KERN_SUCCESS
;
8426 vm_map_lock(dst_map
);
8427 if(!vm_map_lookup_entry(dst_map
, local_end
,
8429 vm_map_unlock(dst_map
);
8430 return(KERN_INVALID_ADDRESS
);
8433 next
= entry
->vme_next
;
8436 if ( ! (entry
->protection
& VM_PROT_WRITE
)) {
8437 vm_map_unlock(dst_map
);
8438 return(KERN_PROTECTION_FAILURE
);
8442 * If the entry is in transition, we must wait
8443 * for it to exit that state. Anything could happen
8444 * when we unlock the map, so start over.
8446 if (entry
->in_transition
) {
8449 * Say that we are waiting, and wait for entry.
8451 entry
->needs_wakeup
= TRUE
;
8452 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8458 * our range is contained completely within this map entry
8460 if (dst_end
<= entry
->vme_end
) {
8461 vm_map_unlock(dst_map
);
8462 return KERN_SUCCESS
;
8465 * check that range specified is contiguous region
8467 if ((next
== vm_map_to_entry(dst_map
)) ||
8468 (next
->vme_start
!= entry
->vme_end
)) {
8469 vm_map_unlock(dst_map
);
8470 return(KERN_INVALID_ADDRESS
);
8474 * Check for permanent objects in the destination.
8476 if ((VME_OBJECT(entry
) != VM_OBJECT_NULL
) &&
8477 ((!VME_OBJECT(entry
)->internal
) ||
8478 (VME_OBJECT(entry
)->true_share
))) {
8479 if(encountered_sub_map
) {
8480 vm_map_unlock(dst_map
);
8481 return(KERN_FAILURE
);
8488 vm_map_unlock(dst_map
);
8489 return(KERN_SUCCESS
);
8493 * Routine: vm_map_copy_overwrite
8496 * Copy the memory described by the map copy
8497 * object (copy; returned by vm_map_copyin) onto
8498 * the specified destination region (dst_map, dst_addr).
8499 * The destination must be writeable.
8501 * Unlike vm_map_copyout, this routine actually
8502 * writes over previously-mapped memory. If the
8503 * previous mapping was to a permanent (user-supplied)
8504 * memory object, it is preserved.
8506 * The attributes (protection and inheritance) of the
8507 * destination region are preserved.
8509 * If successful, consumes the copy object.
8510 * Otherwise, the caller is responsible for it.
8512 * Implementation notes:
8513 * To overwrite aligned temporary virtual memory, it is
8514 * sufficient to remove the previous mapping and insert
8515 * the new copy. This replacement is done either on
8516 * the whole region (if no permanent virtual memory
8517 * objects are embedded in the destination region) or
8518 * in individual map entries.
8520 * To overwrite permanent virtual memory , it is necessary
8521 * to copy each page, as the external memory management
8522 * interface currently does not provide any optimizations.
8524 * Unaligned memory also has to be copied. It is possible
8525 * to use 'vm_trickery' to copy the aligned data. This is
8526 * not done but not hard to implement.
8528 * Once a page of permanent memory has been overwritten,
8529 * it is impossible to interrupt this function; otherwise,
8530 * the call would be neither atomic nor location-independent.
8531 * The kernel-state portion of a user thread must be
8534 * It may be expensive to forward all requests that might
8535 * overwrite permanent memory (vm_write, vm_copy) to
8536 * uninterruptible kernel threads. This routine may be
8537 * called by interruptible threads; however, success is
8538 * not guaranteed -- if the request cannot be performed
8539 * atomically and interruptibly, an error indication is
8543 static kern_return_t
8544 vm_map_copy_overwrite_nested(
8546 vm_map_address_t dst_addr
,
8548 boolean_t interruptible
,
8550 boolean_t discard_on_success
)
8552 vm_map_offset_t dst_end
;
8553 vm_map_entry_t tmp_entry
;
8554 vm_map_entry_t entry
;
8556 boolean_t aligned
= TRUE
;
8557 boolean_t contains_permanent_objects
= FALSE
;
8558 boolean_t encountered_sub_map
= FALSE
;
8559 vm_map_offset_t base_addr
;
8560 vm_map_size_t copy_size
;
8561 vm_map_size_t total_size
;
8565 * Check for null copy object.
8568 if (copy
== VM_MAP_COPY_NULL
)
8569 return(KERN_SUCCESS
);
8572 * Check for special kernel buffer allocated
8573 * by new_ipc_kmsg_copyin.
8576 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
8577 return(vm_map_copyout_kernel_buffer(
8579 copy
, copy
->size
, TRUE
, discard_on_success
));
8583 * Only works for entry lists at the moment. Will
8584 * support page lists later.
8587 assert(copy
->type
== VM_MAP_COPY_ENTRY_LIST
);
8589 if (copy
->size
== 0) {
8590 if (discard_on_success
)
8591 vm_map_copy_discard(copy
);
8592 return(KERN_SUCCESS
);
8596 * Verify that the destination is all writeable
8597 * initially. We have to trunc the destination
8598 * address and round the copy size or we'll end up
8599 * splitting entries in strange ways.
8602 if (!VM_MAP_PAGE_ALIGNED(copy
->size
,
8603 VM_MAP_PAGE_MASK(dst_map
)) ||
8604 !VM_MAP_PAGE_ALIGNED(copy
->offset
,
8605 VM_MAP_PAGE_MASK(dst_map
)) ||
8606 !VM_MAP_PAGE_ALIGNED(dst_addr
,
8607 VM_MAP_PAGE_MASK(dst_map
)))
8610 dst_end
= vm_map_round_page(dst_addr
+ copy
->size
,
8611 VM_MAP_PAGE_MASK(dst_map
));
8613 dst_end
= dst_addr
+ copy
->size
;
8616 vm_map_lock(dst_map
);
8618 /* LP64todo - remove this check when vm_map_commpage64()
8619 * no longer has to stuff in a map_entry for the commpage
8620 * above the map's max_offset.
8622 if (dst_addr
>= dst_map
->max_offset
) {
8623 vm_map_unlock(dst_map
);
8624 return(KERN_INVALID_ADDRESS
);
8628 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &tmp_entry
)) {
8629 vm_map_unlock(dst_map
);
8630 return(KERN_INVALID_ADDRESS
);
8632 vm_map_clip_start(dst_map
,
8634 vm_map_trunc_page(dst_addr
,
8635 VM_MAP_PAGE_MASK(dst_map
)));
8636 for (entry
= tmp_entry
;;) {
8637 vm_map_entry_t next
= entry
->vme_next
;
8639 while(entry
->is_sub_map
) {
8640 vm_map_offset_t sub_start
;
8641 vm_map_offset_t sub_end
;
8642 vm_map_offset_t local_end
;
8644 if (entry
->in_transition
) {
8647 * Say that we are waiting, and wait for entry.
8649 entry
->needs_wakeup
= TRUE
;
8650 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8655 local_end
= entry
->vme_end
;
8656 if (!(entry
->needs_copy
)) {
8657 /* if needs_copy we are a COW submap */
8658 /* in such a case we just replace so */
8659 /* there is no need for the follow- */
8661 encountered_sub_map
= TRUE
;
8662 sub_start
= VME_OFFSET(entry
);
8664 if(entry
->vme_end
< dst_end
)
8665 sub_end
= entry
->vme_end
;
8668 sub_end
-= entry
->vme_start
;
8669 sub_end
+= VME_OFFSET(entry
);
8670 vm_map_unlock(dst_map
);
8672 kr
= vm_map_overwrite_submap_recurse(
8675 sub_end
- sub_start
);
8676 if(kr
!= KERN_SUCCESS
)
8678 vm_map_lock(dst_map
);
8681 if (dst_end
<= entry
->vme_end
)
8682 goto start_overwrite
;
8683 if(!vm_map_lookup_entry(dst_map
, local_end
,
8685 vm_map_unlock(dst_map
);
8686 return(KERN_INVALID_ADDRESS
);
8688 next
= entry
->vme_next
;
8691 if ( ! (entry
->protection
& VM_PROT_WRITE
)) {
8692 vm_map_unlock(dst_map
);
8693 return(KERN_PROTECTION_FAILURE
);
8697 * If the entry is in transition, we must wait
8698 * for it to exit that state. Anything could happen
8699 * when we unlock the map, so start over.
8701 if (entry
->in_transition
) {
8704 * Say that we are waiting, and wait for entry.
8706 entry
->needs_wakeup
= TRUE
;
8707 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8713 * our range is contained completely within this map entry
8715 if (dst_end
<= entry
->vme_end
)
8718 * check that range specified is contiguous region
8720 if ((next
== vm_map_to_entry(dst_map
)) ||
8721 (next
->vme_start
!= entry
->vme_end
)) {
8722 vm_map_unlock(dst_map
);
8723 return(KERN_INVALID_ADDRESS
);
8728 * Check for permanent objects in the destination.
8730 if ((VME_OBJECT(entry
) != VM_OBJECT_NULL
) &&
8731 ((!VME_OBJECT(entry
)->internal
) ||
8732 (VME_OBJECT(entry
)->true_share
))) {
8733 contains_permanent_objects
= TRUE
;
8741 * If there are permanent objects in the destination, then
8742 * the copy cannot be interrupted.
8745 if (interruptible
&& contains_permanent_objects
) {
8746 vm_map_unlock(dst_map
);
8747 return(KERN_FAILURE
); /* XXX */
8752 * Make a second pass, overwriting the data
8753 * At the beginning of each loop iteration,
8754 * the next entry to be overwritten is "tmp_entry"
8755 * (initially, the value returned from the lookup above),
8756 * and the starting address expected in that entry
8760 total_size
= copy
->size
;
8761 if(encountered_sub_map
) {
8763 /* re-calculate tmp_entry since we've had the map */
8765 if (!vm_map_lookup_entry( dst_map
, dst_addr
, &tmp_entry
)) {
8766 vm_map_unlock(dst_map
);
8767 return(KERN_INVALID_ADDRESS
);
8770 copy_size
= copy
->size
;
8773 base_addr
= dst_addr
;
8775 /* deconstruct the copy object and do in parts */
8776 /* only in sub_map, interruptable case */
8777 vm_map_entry_t copy_entry
;
8778 vm_map_entry_t previous_prev
= VM_MAP_ENTRY_NULL
;
8779 vm_map_entry_t next_copy
= VM_MAP_ENTRY_NULL
;
8781 int remaining_entries
= 0;
8782 vm_map_offset_t new_offset
= 0;
8784 for (entry
= tmp_entry
; copy_size
== 0;) {
8785 vm_map_entry_t next
;
8787 next
= entry
->vme_next
;
8789 /* tmp_entry and base address are moved along */
8790 /* each time we encounter a sub-map. Otherwise */
8791 /* entry can outpase tmp_entry, and the copy_size */
8792 /* may reflect the distance between them */
8793 /* if the current entry is found to be in transition */
8794 /* we will start over at the beginning or the last */
8795 /* encounter of a submap as dictated by base_addr */
8796 /* we will zero copy_size accordingly. */
8797 if (entry
->in_transition
) {
8799 * Say that we are waiting, and wait for entry.
8801 entry
->needs_wakeup
= TRUE
;
8802 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
8804 if(!vm_map_lookup_entry(dst_map
, base_addr
,
8806 vm_map_unlock(dst_map
);
8807 return(KERN_INVALID_ADDRESS
);
8813 if (entry
->is_sub_map
) {
8814 vm_map_offset_t sub_start
;
8815 vm_map_offset_t sub_end
;
8816 vm_map_offset_t local_end
;
8818 if (entry
->needs_copy
) {
8819 /* if this is a COW submap */
8820 /* just back the range with a */
8821 /* anonymous entry */
8822 if(entry
->vme_end
< dst_end
)
8823 sub_end
= entry
->vme_end
;
8826 if(entry
->vme_start
< base_addr
)
8827 sub_start
= base_addr
;
8829 sub_start
= entry
->vme_start
;
8831 dst_map
, entry
, sub_end
);
8833 dst_map
, entry
, sub_start
);
8834 assert(!entry
->use_pmap
);
8835 assert(!entry
->iokit_acct
);
8836 entry
->use_pmap
= TRUE
;
8837 entry
->is_sub_map
= FALSE
;
8840 VME_OBJECT_SET(entry
, NULL
);
8841 VME_OFFSET_SET(entry
, 0);
8842 entry
->is_shared
= FALSE
;
8843 entry
->needs_copy
= FALSE
;
8844 entry
->protection
= VM_PROT_DEFAULT
;
8845 entry
->max_protection
= VM_PROT_ALL
;
8846 entry
->wired_count
= 0;
8847 entry
->user_wired_count
= 0;
8848 if(entry
->inheritance
8849 == VM_INHERIT_SHARE
)
8850 entry
->inheritance
= VM_INHERIT_COPY
;
8853 /* first take care of any non-sub_map */
8854 /* entries to send */
8855 if(base_addr
< entry
->vme_start
) {
8858 entry
->vme_start
- base_addr
;
8861 sub_start
= VME_OFFSET(entry
);
8863 if(entry
->vme_end
< dst_end
)
8864 sub_end
= entry
->vme_end
;
8867 sub_end
-= entry
->vme_start
;
8868 sub_end
+= VME_OFFSET(entry
);
8869 local_end
= entry
->vme_end
;
8870 vm_map_unlock(dst_map
);
8871 copy_size
= sub_end
- sub_start
;
8873 /* adjust the copy object */
8874 if (total_size
> copy_size
) {
8875 vm_map_size_t local_size
= 0;
8876 vm_map_size_t entry_size
;
8879 new_offset
= copy
->offset
;
8880 copy_entry
= vm_map_copy_first_entry(copy
);
8882 vm_map_copy_to_entry(copy
)){
8883 entry_size
= copy_entry
->vme_end
-
8884 copy_entry
->vme_start
;
8885 if((local_size
< copy_size
) &&
8886 ((local_size
+ entry_size
)
8888 vm_map_copy_clip_end(copy
,
8890 copy_entry
->vme_start
+
8891 (copy_size
- local_size
));
8892 entry_size
= copy_entry
->vme_end
-
8893 copy_entry
->vme_start
;
8894 local_size
+= entry_size
;
8895 new_offset
+= entry_size
;
8897 if(local_size
>= copy_size
) {
8898 next_copy
= copy_entry
->vme_next
;
8899 copy_entry
->vme_next
=
8900 vm_map_copy_to_entry(copy
);
8902 copy
->cpy_hdr
.links
.prev
;
8903 copy
->cpy_hdr
.links
.prev
= copy_entry
;
8904 copy
->size
= copy_size
;
8906 copy
->cpy_hdr
.nentries
;
8907 remaining_entries
-= nentries
;
8908 copy
->cpy_hdr
.nentries
= nentries
;
8911 local_size
+= entry_size
;
8912 new_offset
+= entry_size
;
8915 copy_entry
= copy_entry
->vme_next
;
8919 if((entry
->use_pmap
) && (pmap
== NULL
)) {
8920 kr
= vm_map_copy_overwrite_nested(
8925 VME_SUBMAP(entry
)->pmap
,
8927 } else if (pmap
!= NULL
) {
8928 kr
= vm_map_copy_overwrite_nested(
8932 interruptible
, pmap
,
8935 kr
= vm_map_copy_overwrite_nested(
8943 if(kr
!= KERN_SUCCESS
) {
8944 if(next_copy
!= NULL
) {
8945 copy
->cpy_hdr
.nentries
+=
8947 copy
->cpy_hdr
.links
.prev
->vme_next
=
8949 copy
->cpy_hdr
.links
.prev
8951 copy
->size
= total_size
;
8955 if (dst_end
<= local_end
) {
8956 return(KERN_SUCCESS
);
8958 /* otherwise copy no longer exists, it was */
8959 /* destroyed after successful copy_overwrite */
8960 copy
= vm_map_copy_allocate();
8961 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
8962 copy
->offset
= new_offset
;
8966 * this does not seem to deal with
8967 * the VM map store (R&B tree)
8970 total_size
-= copy_size
;
8972 /* put back remainder of copy in container */
8973 if(next_copy
!= NULL
) {
8974 copy
->cpy_hdr
.nentries
= remaining_entries
;
8975 copy
->cpy_hdr
.links
.next
= next_copy
;
8976 copy
->cpy_hdr
.links
.prev
= previous_prev
;
8977 copy
->size
= total_size
;
8978 next_copy
->vme_prev
=
8979 vm_map_copy_to_entry(copy
);
8982 base_addr
= local_end
;
8983 vm_map_lock(dst_map
);
8984 if(!vm_map_lookup_entry(dst_map
,
8985 local_end
, &tmp_entry
)) {
8986 vm_map_unlock(dst_map
);
8987 return(KERN_INVALID_ADDRESS
);
8992 if (dst_end
<= entry
->vme_end
) {
8993 copy_size
= dst_end
- base_addr
;
8997 if ((next
== vm_map_to_entry(dst_map
)) ||
8998 (next
->vme_start
!= entry
->vme_end
)) {
8999 vm_map_unlock(dst_map
);
9000 return(KERN_INVALID_ADDRESS
);
9009 /* adjust the copy object */
9010 if (total_size
> copy_size
) {
9011 vm_map_size_t local_size
= 0;
9012 vm_map_size_t entry_size
;
9014 new_offset
= copy
->offset
;
9015 copy_entry
= vm_map_copy_first_entry(copy
);
9016 while(copy_entry
!= vm_map_copy_to_entry(copy
)) {
9017 entry_size
= copy_entry
->vme_end
-
9018 copy_entry
->vme_start
;
9019 if((local_size
< copy_size
) &&
9020 ((local_size
+ entry_size
)
9022 vm_map_copy_clip_end(copy
, copy_entry
,
9023 copy_entry
->vme_start
+
9024 (copy_size
- local_size
));
9025 entry_size
= copy_entry
->vme_end
-
9026 copy_entry
->vme_start
;
9027 local_size
+= entry_size
;
9028 new_offset
+= entry_size
;
9030 if(local_size
>= copy_size
) {
9031 next_copy
= copy_entry
->vme_next
;
9032 copy_entry
->vme_next
=
9033 vm_map_copy_to_entry(copy
);
9035 copy
->cpy_hdr
.links
.prev
;
9036 copy
->cpy_hdr
.links
.prev
= copy_entry
;
9037 copy
->size
= copy_size
;
9039 copy
->cpy_hdr
.nentries
;
9040 remaining_entries
-= nentries
;
9041 copy
->cpy_hdr
.nentries
= nentries
;
9044 local_size
+= entry_size
;
9045 new_offset
+= entry_size
;
9048 copy_entry
= copy_entry
->vme_next
;
9058 local_pmap
= dst_map
->pmap
;
9060 if ((kr
= vm_map_copy_overwrite_aligned(
9061 dst_map
, tmp_entry
, copy
,
9062 base_addr
, local_pmap
)) != KERN_SUCCESS
) {
9063 if(next_copy
!= NULL
) {
9064 copy
->cpy_hdr
.nentries
+=
9066 copy
->cpy_hdr
.links
.prev
->vme_next
=
9068 copy
->cpy_hdr
.links
.prev
=
9070 copy
->size
+= copy_size
;
9074 vm_map_unlock(dst_map
);
9079 * if the copy and dst address are misaligned but the same
9080 * offset within the page we can copy_not_aligned the
9081 * misaligned parts and copy aligned the rest. If they are
9082 * aligned but len is unaligned we simply need to copy
9083 * the end bit unaligned. We'll need to split the misaligned
9084 * bits of the region in this case !
9086 /* ALWAYS UNLOCKS THE dst_map MAP */
9087 kr
= vm_map_copy_overwrite_unaligned(
9092 discard_on_success
);
9093 if (kr
!= KERN_SUCCESS
) {
9094 if(next_copy
!= NULL
) {
9095 copy
->cpy_hdr
.nentries
+=
9097 copy
->cpy_hdr
.links
.prev
->vme_next
=
9099 copy
->cpy_hdr
.links
.prev
=
9101 copy
->size
+= copy_size
;
9106 total_size
-= copy_size
;
9109 base_addr
+= copy_size
;
9111 copy
->offset
= new_offset
;
9112 if(next_copy
!= NULL
) {
9113 copy
->cpy_hdr
.nentries
= remaining_entries
;
9114 copy
->cpy_hdr
.links
.next
= next_copy
;
9115 copy
->cpy_hdr
.links
.prev
= previous_prev
;
9116 next_copy
->vme_prev
= vm_map_copy_to_entry(copy
);
9117 copy
->size
= total_size
;
9119 vm_map_lock(dst_map
);
9121 if (!vm_map_lookup_entry(dst_map
,
9122 base_addr
, &tmp_entry
)) {
9123 vm_map_unlock(dst_map
);
9124 return(KERN_INVALID_ADDRESS
);
9126 if (tmp_entry
->in_transition
) {
9127 entry
->needs_wakeup
= TRUE
;
9128 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
9133 vm_map_clip_start(dst_map
,
9135 vm_map_trunc_page(base_addr
,
9136 VM_MAP_PAGE_MASK(dst_map
)));
9142 * Throw away the vm_map_copy object
9144 if (discard_on_success
)
9145 vm_map_copy_discard(copy
);
9147 return(KERN_SUCCESS
);
9148 }/* vm_map_copy_overwrite */
9151 vm_map_copy_overwrite(
9153 vm_map_offset_t dst_addr
,
9155 boolean_t interruptible
)
9157 vm_map_size_t head_size
, tail_size
;
9158 vm_map_copy_t head_copy
, tail_copy
;
9159 vm_map_offset_t head_addr
, tail_addr
;
9160 vm_map_entry_t entry
;
9162 vm_map_offset_t effective_page_mask
, effective_page_size
;
9171 if (interruptible
||
9172 copy
== VM_MAP_COPY_NULL
||
9173 copy
->type
!= VM_MAP_COPY_ENTRY_LIST
) {
9175 * We can't split the "copy" map if we're interruptible
9176 * or if we don't have a "copy" map...
9179 return vm_map_copy_overwrite_nested(dst_map
,
9187 effective_page_mask
= MAX(VM_MAP_PAGE_MASK(dst_map
), PAGE_MASK
);
9188 effective_page_mask
= MAX(VM_MAP_COPY_PAGE_MASK(copy
),
9189 effective_page_mask
);
9190 effective_page_size
= effective_page_mask
+ 1;
9192 if (copy
->size
< 3 * effective_page_size
) {
9194 * Too small to bother with optimizing...
9199 if ((dst_addr
& effective_page_mask
) !=
9200 (copy
->offset
& effective_page_mask
)) {
9202 * Incompatible mis-alignment of source and destination...
9208 * Proper alignment or identical mis-alignment at the beginning.
9209 * Let's try and do a small unaligned copy first (if needed)
9210 * and then an aligned copy for the rest.
9212 if (!vm_map_page_aligned(dst_addr
, effective_page_mask
)) {
9213 head_addr
= dst_addr
;
9214 head_size
= (effective_page_size
-
9215 (copy
->offset
& effective_page_mask
));
9216 head_size
= MIN(head_size
, copy
->size
);
9218 if (!vm_map_page_aligned(copy
->offset
+ copy
->size
,
9219 effective_page_mask
)) {
9221 * Mis-alignment at the end.
9222 * Do an aligned copy up to the last page and
9223 * then an unaligned copy for the remaining bytes.
9225 tail_size
= ((copy
->offset
+ copy
->size
) &
9226 effective_page_mask
);
9227 tail_size
= MIN(tail_size
, copy
->size
);
9228 tail_addr
= dst_addr
+ copy
->size
- tail_size
;
9229 assert(tail_addr
>= head_addr
+ head_size
);
9231 assert(head_size
+ tail_size
<= copy
->size
);
9233 if (head_size
+ tail_size
== copy
->size
) {
9235 * It's all unaligned, no optimization possible...
9241 * Can't optimize if there are any submaps in the
9242 * destination due to the way we free the "copy" map
9243 * progressively in vm_map_copy_overwrite_nested()
9246 vm_map_lock_read(dst_map
);
9247 if (! vm_map_lookup_entry(dst_map
, dst_addr
, &entry
)) {
9248 vm_map_unlock_read(dst_map
);
9252 (entry
!= vm_map_copy_to_entry(copy
) &&
9253 entry
->vme_start
< dst_addr
+ copy
->size
);
9254 entry
= entry
->vme_next
) {
9255 if (entry
->is_sub_map
) {
9256 vm_map_unlock_read(dst_map
);
9260 vm_map_unlock_read(dst_map
);
9264 * Unaligned copy of the first "head_size" bytes, to reach
9269 * Extract "head_copy" out of "copy".
9271 head_copy
= vm_map_copy_allocate();
9272 head_copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
9273 head_copy
->cpy_hdr
.entries_pageable
=
9274 copy
->cpy_hdr
.entries_pageable
;
9275 vm_map_store_init(&head_copy
->cpy_hdr
);
9277 entry
= vm_map_copy_first_entry(copy
);
9278 if (entry
->vme_end
< copy
->offset
+ head_size
) {
9279 head_size
= entry
->vme_end
- copy
->offset
;
9282 head_copy
->offset
= copy
->offset
;
9283 head_copy
->size
= head_size
;
9284 copy
->offset
+= head_size
;
9285 copy
->size
-= head_size
;
9287 vm_map_copy_clip_end(copy
, entry
, copy
->offset
);
9288 vm_map_copy_entry_unlink(copy
, entry
);
9289 vm_map_copy_entry_link(head_copy
,
9290 vm_map_copy_to_entry(head_copy
),
9294 * Do the unaligned copy.
9296 kr
= vm_map_copy_overwrite_nested(dst_map
,
9302 if (kr
!= KERN_SUCCESS
)
9308 * Extract "tail_copy" out of "copy".
9310 tail_copy
= vm_map_copy_allocate();
9311 tail_copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
9312 tail_copy
->cpy_hdr
.entries_pageable
=
9313 copy
->cpy_hdr
.entries_pageable
;
9314 vm_map_store_init(&tail_copy
->cpy_hdr
);
9316 tail_copy
->offset
= copy
->offset
+ copy
->size
- tail_size
;
9317 tail_copy
->size
= tail_size
;
9319 copy
->size
-= tail_size
;
9321 entry
= vm_map_copy_last_entry(copy
);
9322 vm_map_copy_clip_start(copy
, entry
, tail_copy
->offset
);
9323 entry
= vm_map_copy_last_entry(copy
);
9324 vm_map_copy_entry_unlink(copy
, entry
);
9325 vm_map_copy_entry_link(tail_copy
,
9326 vm_map_copy_last_entry(tail_copy
),
9331 * Copy most (or possibly all) of the data.
9333 kr
= vm_map_copy_overwrite_nested(dst_map
,
9334 dst_addr
+ head_size
,
9339 if (kr
!= KERN_SUCCESS
) {
9344 kr
= vm_map_copy_overwrite_nested(dst_map
,
9353 assert(copy
->type
== VM_MAP_COPY_ENTRY_LIST
);
9354 if (kr
== KERN_SUCCESS
) {
9356 * Discard all the copy maps.
9359 vm_map_copy_discard(head_copy
);
9362 vm_map_copy_discard(copy
);
9364 vm_map_copy_discard(tail_copy
);
9369 * Re-assemble the original copy map.
9372 entry
= vm_map_copy_first_entry(head_copy
);
9373 vm_map_copy_entry_unlink(head_copy
, entry
);
9374 vm_map_copy_entry_link(copy
,
9375 vm_map_copy_to_entry(copy
),
9377 copy
->offset
-= head_size
;
9378 copy
->size
+= head_size
;
9379 vm_map_copy_discard(head_copy
);
9383 entry
= vm_map_copy_last_entry(tail_copy
);
9384 vm_map_copy_entry_unlink(tail_copy
, entry
);
9385 vm_map_copy_entry_link(copy
,
9386 vm_map_copy_last_entry(copy
),
9388 copy
->size
+= tail_size
;
9389 vm_map_copy_discard(tail_copy
);
9398 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
9401 * Physically copy unaligned data
9404 * Unaligned parts of pages have to be physically copied. We use
9405 * a modified form of vm_fault_copy (which understands none-aligned
9406 * page offsets and sizes) to do the copy. We attempt to copy as
9407 * much memory in one go as possibly, however vm_fault_copy copies
9408 * within 1 memory object so we have to find the smaller of "amount left"
9409 * "source object data size" and "target object data size". With
9410 * unaligned data we don't need to split regions, therefore the source
9411 * (copy) object should be one map entry, the target range may be split
9412 * over multiple map entries however. In any event we are pessimistic
9413 * about these assumptions.
9416 * dst_map is locked on entry and is return locked on success,
9417 * unlocked on error.
9420 static kern_return_t
9421 vm_map_copy_overwrite_unaligned(
9423 vm_map_entry_t entry
,
9425 vm_map_offset_t start
,
9426 boolean_t discard_on_success
)
9428 vm_map_entry_t copy_entry
;
9429 vm_map_entry_t copy_entry_next
;
9430 vm_map_version_t version
;
9431 vm_object_t dst_object
;
9432 vm_object_offset_t dst_offset
;
9433 vm_object_offset_t src_offset
;
9434 vm_object_offset_t entry_offset
;
9435 vm_map_offset_t entry_end
;
9436 vm_map_size_t src_size
,
9440 kern_return_t kr
= KERN_SUCCESS
;
9443 copy_entry
= vm_map_copy_first_entry(copy
);
9445 vm_map_lock_write_to_read(dst_map
);
9447 src_offset
= copy
->offset
- vm_object_trunc_page(copy
->offset
);
9448 amount_left
= copy
->size
;
9450 * unaligned so we never clipped this entry, we need the offset into
9451 * the vm_object not just the data.
9453 while (amount_left
> 0) {
9455 if (entry
== vm_map_to_entry(dst_map
)) {
9456 vm_map_unlock_read(dst_map
);
9457 return KERN_INVALID_ADDRESS
;
9460 /* "start" must be within the current map entry */
9461 assert ((start
>=entry
->vme_start
) && (start
<entry
->vme_end
));
9463 dst_offset
= start
- entry
->vme_start
;
9465 dst_size
= entry
->vme_end
- start
;
9467 src_size
= copy_entry
->vme_end
-
9468 (copy_entry
->vme_start
+ src_offset
);
9470 if (dst_size
< src_size
) {
9472 * we can only copy dst_size bytes before
9473 * we have to get the next destination entry
9475 copy_size
= dst_size
;
9478 * we can only copy src_size bytes before
9479 * we have to get the next source copy entry
9481 copy_size
= src_size
;
9484 if (copy_size
> amount_left
) {
9485 copy_size
= amount_left
;
9488 * Entry needs copy, create a shadow shadow object for
9489 * Copy on write region.
9491 if (entry
->needs_copy
&&
9492 ((entry
->protection
& VM_PROT_WRITE
) != 0))
9494 if (vm_map_lock_read_to_write(dst_map
)) {
9495 vm_map_lock_read(dst_map
);
9498 VME_OBJECT_SHADOW(entry
,
9499 (vm_map_size_t
)(entry
->vme_end
9500 - entry
->vme_start
));
9501 entry
->needs_copy
= FALSE
;
9502 vm_map_lock_write_to_read(dst_map
);
9504 dst_object
= VME_OBJECT(entry
);
9506 * unlike with the virtual (aligned) copy we're going
9507 * to fault on it therefore we need a target object.
9509 if (dst_object
== VM_OBJECT_NULL
) {
9510 if (vm_map_lock_read_to_write(dst_map
)) {
9511 vm_map_lock_read(dst_map
);
9514 dst_object
= vm_object_allocate((vm_map_size_t
)
9515 entry
->vme_end
- entry
->vme_start
);
9516 VME_OBJECT(entry
) = dst_object
;
9517 VME_OFFSET_SET(entry
, 0);
9518 assert(entry
->use_pmap
);
9519 vm_map_lock_write_to_read(dst_map
);
9522 * Take an object reference and unlock map. The "entry" may
9523 * disappear or change when the map is unlocked.
9525 vm_object_reference(dst_object
);
9526 version
.main_timestamp
= dst_map
->timestamp
;
9527 entry_offset
= VME_OFFSET(entry
);
9528 entry_end
= entry
->vme_end
;
9529 vm_map_unlock_read(dst_map
);
9531 * Copy as much as possible in one pass
9534 VME_OBJECT(copy_entry
),
9535 VME_OFFSET(copy_entry
) + src_offset
,
9538 entry_offset
+ dst_offset
,
9544 src_offset
+= copy_size
;
9545 amount_left
-= copy_size
;
9547 * Release the object reference
9549 vm_object_deallocate(dst_object
);
9551 * If a hard error occurred, return it now
9553 if (kr
!= KERN_SUCCESS
)
9556 if ((copy_entry
->vme_start
+ src_offset
) == copy_entry
->vme_end
9557 || amount_left
== 0)
9560 * all done with this copy entry, dispose.
9562 copy_entry_next
= copy_entry
->vme_next
;
9564 if (discard_on_success
) {
9565 vm_map_copy_entry_unlink(copy
, copy_entry
);
9566 assert(!copy_entry
->is_sub_map
);
9567 vm_object_deallocate(VME_OBJECT(copy_entry
));
9568 vm_map_copy_entry_dispose(copy
, copy_entry
);
9571 if (copy_entry_next
== vm_map_copy_to_entry(copy
) &&
9574 * not finished copying but run out of source
9576 return KERN_INVALID_ADDRESS
;
9579 copy_entry
= copy_entry_next
;
9584 if (amount_left
== 0)
9585 return KERN_SUCCESS
;
9587 vm_map_lock_read(dst_map
);
9588 if (version
.main_timestamp
== dst_map
->timestamp
) {
9589 if (start
== entry_end
) {
9591 * destination region is split. Use the version
9592 * information to avoid a lookup in the normal
9595 entry
= entry
->vme_next
;
9597 * should be contiguous. Fail if we encounter
9598 * a hole in the destination.
9600 if (start
!= entry
->vme_start
) {
9601 vm_map_unlock_read(dst_map
);
9602 return KERN_INVALID_ADDRESS
;
9607 * Map version check failed.
9608 * we must lookup the entry because somebody
9609 * might have changed the map behind our backs.
9612 if (!vm_map_lookup_entry(dst_map
, start
, &entry
))
9614 vm_map_unlock_read(dst_map
);
9615 return KERN_INVALID_ADDRESS
;
9620 return KERN_SUCCESS
;
9621 }/* vm_map_copy_overwrite_unaligned */
9624 * Routine: vm_map_copy_overwrite_aligned [internal use only]
9627 * Does all the vm_trickery possible for whole pages.
9631 * If there are no permanent objects in the destination,
9632 * and the source and destination map entry zones match,
9633 * and the destination map entry is not shared,
9634 * then the map entries can be deleted and replaced
9635 * with those from the copy. The following code is the
9636 * basic idea of what to do, but there are lots of annoying
9637 * little details about getting protection and inheritance
9638 * right. Should add protection, inheritance, and sharing checks
9639 * to the above pass and make sure that no wiring is involved.
9642 int vm_map_copy_overwrite_aligned_src_not_internal
= 0;
9643 int vm_map_copy_overwrite_aligned_src_not_symmetric
= 0;
9644 int vm_map_copy_overwrite_aligned_src_large
= 0;
9646 static kern_return_t
9647 vm_map_copy_overwrite_aligned(
9649 vm_map_entry_t tmp_entry
,
9651 vm_map_offset_t start
,
9652 __unused pmap_t pmap
)
9655 vm_map_entry_t copy_entry
;
9656 vm_map_size_t copy_size
;
9658 vm_map_entry_t entry
;
9660 while ((copy_entry
= vm_map_copy_first_entry(copy
))
9661 != vm_map_copy_to_entry(copy
))
9663 copy_size
= (copy_entry
->vme_end
- copy_entry
->vme_start
);
9666 if (entry
->is_sub_map
) {
9667 /* unnested when clipped earlier */
9668 assert(!entry
->use_pmap
);
9670 if (entry
== vm_map_to_entry(dst_map
)) {
9671 vm_map_unlock(dst_map
);
9672 return KERN_INVALID_ADDRESS
;
9674 size
= (entry
->vme_end
- entry
->vme_start
);
9676 * Make sure that no holes popped up in the
9677 * address map, and that the protection is
9678 * still valid, in case the map was unlocked
9682 if ((entry
->vme_start
!= start
) || ((entry
->is_sub_map
)
9683 && !entry
->needs_copy
)) {
9684 vm_map_unlock(dst_map
);
9685 return(KERN_INVALID_ADDRESS
);
9687 assert(entry
!= vm_map_to_entry(dst_map
));
9690 * Check protection again
9693 if ( ! (entry
->protection
& VM_PROT_WRITE
)) {
9694 vm_map_unlock(dst_map
);
9695 return(KERN_PROTECTION_FAILURE
);
9699 * Adjust to source size first
9702 if (copy_size
< size
) {
9703 if (entry
->map_aligned
&&
9704 !VM_MAP_PAGE_ALIGNED(entry
->vme_start
+ copy_size
,
9705 VM_MAP_PAGE_MASK(dst_map
))) {
9706 /* no longer map-aligned */
9707 entry
->map_aligned
= FALSE
;
9709 vm_map_clip_end(dst_map
, entry
, entry
->vme_start
+ copy_size
);
9714 * Adjust to destination size
9717 if (size
< copy_size
) {
9718 vm_map_copy_clip_end(copy
, copy_entry
,
9719 copy_entry
->vme_start
+ size
);
9723 assert((entry
->vme_end
- entry
->vme_start
) == size
);
9724 assert((tmp_entry
->vme_end
- tmp_entry
->vme_start
) == size
);
9725 assert((copy_entry
->vme_end
- copy_entry
->vme_start
) == size
);
9728 * If the destination contains temporary unshared memory,
9729 * we can perform the copy by throwing it away and
9730 * installing the source data.
9733 object
= VME_OBJECT(entry
);
9734 if ((!entry
->is_shared
&&
9735 ((object
== VM_OBJECT_NULL
) ||
9736 (object
->internal
&& !object
->true_share
))) ||
9737 entry
->needs_copy
) {
9738 vm_object_t old_object
= VME_OBJECT(entry
);
9739 vm_object_offset_t old_offset
= VME_OFFSET(entry
);
9740 vm_object_offset_t offset
;
9743 * Ensure that the source and destination aren't
9746 if (old_object
== VME_OBJECT(copy_entry
) &&
9747 old_offset
== VME_OFFSET(copy_entry
)) {
9748 vm_map_copy_entry_unlink(copy
, copy_entry
);
9749 vm_map_copy_entry_dispose(copy
, copy_entry
);
9751 if (old_object
!= VM_OBJECT_NULL
)
9752 vm_object_deallocate(old_object
);
9754 start
= tmp_entry
->vme_end
;
9755 tmp_entry
= tmp_entry
->vme_next
;
9759 #if !CONFIG_EMBEDDED
9760 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
9761 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
9762 if (VME_OBJECT(copy_entry
) != VM_OBJECT_NULL
&&
9763 VME_OBJECT(copy_entry
)->vo_size
>= __TRADEOFF1_OBJ_SIZE
&&
9764 copy_size
<= __TRADEOFF1_COPY_SIZE
) {
9766 * Virtual vs. Physical copy tradeoff #1.
9768 * Copying only a few pages out of a large
9769 * object: do a physical copy instead of
9770 * a virtual copy, to avoid possibly keeping
9771 * the entire large object alive because of
9772 * those few copy-on-write pages.
9774 vm_map_copy_overwrite_aligned_src_large
++;
9777 #endif /* !CONFIG_EMBEDDED */
9779 if ((dst_map
->pmap
!= kernel_pmap
) &&
9780 (VME_ALIAS(entry
) >= VM_MEMORY_MALLOC
) &&
9781 (VME_ALIAS(entry
) <= VM_MEMORY_MALLOC_LARGE_REUSED
)) {
9782 vm_object_t new_object
, new_shadow
;
9785 * We're about to map something over a mapping
9786 * established by malloc()...
9788 new_object
= VME_OBJECT(copy_entry
);
9789 if (new_object
!= VM_OBJECT_NULL
) {
9790 vm_object_lock_shared(new_object
);
9792 while (new_object
!= VM_OBJECT_NULL
&&
9793 #if !CONFIG_EMBEDDED
9794 !new_object
->true_share
&&
9795 new_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
&&
9796 #endif /* !CONFIG_EMBEDDED */
9797 new_object
->internal
) {
9798 new_shadow
= new_object
->shadow
;
9799 if (new_shadow
== VM_OBJECT_NULL
) {
9802 vm_object_lock_shared(new_shadow
);
9803 vm_object_unlock(new_object
);
9804 new_object
= new_shadow
;
9806 if (new_object
!= VM_OBJECT_NULL
) {
9807 if (!new_object
->internal
) {
9809 * The new mapping is backed
9810 * by an external object. We
9811 * don't want malloc'ed memory
9812 * to be replaced with such a
9813 * non-anonymous mapping, so
9814 * let's go off the optimized
9817 vm_map_copy_overwrite_aligned_src_not_internal
++;
9818 vm_object_unlock(new_object
);
9821 #if !CONFIG_EMBEDDED
9822 if (new_object
->true_share
||
9823 new_object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) {
9825 * Same if there's a "true_share"
9826 * object in the shadow chain, or
9827 * an object with a non-default
9828 * (SYMMETRIC) copy strategy.
9830 vm_map_copy_overwrite_aligned_src_not_symmetric
++;
9831 vm_object_unlock(new_object
);
9834 #endif /* !CONFIG_EMBEDDED */
9835 vm_object_unlock(new_object
);
9838 * The new mapping is still backed by
9839 * anonymous (internal) memory, so it's
9840 * OK to substitute it for the original
9845 if (old_object
!= VM_OBJECT_NULL
) {
9846 if(entry
->is_sub_map
) {
9847 if(entry
->use_pmap
) {
9848 #ifndef NO_NESTED_PMAP
9849 pmap_unnest(dst_map
->pmap
,
9850 (addr64_t
)entry
->vme_start
,
9851 entry
->vme_end
- entry
->vme_start
);
9852 #endif /* NO_NESTED_PMAP */
9853 if(dst_map
->mapped_in_other_pmaps
) {
9854 /* clean up parent */
9856 vm_map_submap_pmap_clean(
9857 dst_map
, entry
->vme_start
,
9863 vm_map_submap_pmap_clean(
9864 dst_map
, entry
->vme_start
,
9869 vm_map_deallocate(VME_SUBMAP(entry
));
9871 if(dst_map
->mapped_in_other_pmaps
) {
9872 vm_object_pmap_protect_options(
9880 PMAP_OPTIONS_REMOVE
);
9882 pmap_remove_options(
9884 (addr64_t
)(entry
->vme_start
),
9885 (addr64_t
)(entry
->vme_end
),
9886 PMAP_OPTIONS_REMOVE
);
9888 vm_object_deallocate(old_object
);
9892 if (entry
->iokit_acct
) {
9893 /* keep using iokit accounting */
9894 entry
->use_pmap
= FALSE
;
9896 /* use pmap accounting */
9897 entry
->use_pmap
= TRUE
;
9899 entry
->is_sub_map
= FALSE
;
9900 VME_OBJECT_SET(entry
, VME_OBJECT(copy_entry
));
9901 object
= VME_OBJECT(entry
);
9902 entry
->needs_copy
= copy_entry
->needs_copy
;
9903 entry
->wired_count
= 0;
9904 entry
->user_wired_count
= 0;
9905 offset
= VME_OFFSET(copy_entry
);
9906 VME_OFFSET_SET(entry
, offset
);
9908 vm_map_copy_entry_unlink(copy
, copy_entry
);
9909 vm_map_copy_entry_dispose(copy
, copy_entry
);
9912 * we could try to push pages into the pmap at this point, BUT
9913 * this optimization only saved on average 2 us per page if ALL
9914 * the pages in the source were currently mapped
9915 * and ALL the pages in the dest were touched, if there were fewer
9916 * than 2/3 of the pages touched, this optimization actually cost more cycles
9917 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
9921 * Set up for the next iteration. The map
9922 * has not been unlocked, so the next
9923 * address should be at the end of this
9924 * entry, and the next map entry should be
9925 * the one following it.
9928 start
= tmp_entry
->vme_end
;
9929 tmp_entry
= tmp_entry
->vme_next
;
9931 vm_map_version_t version
;
9932 vm_object_t dst_object
;
9933 vm_object_offset_t dst_offset
;
9937 if (entry
->needs_copy
) {
9938 VME_OBJECT_SHADOW(entry
,
9941 entry
->needs_copy
= FALSE
;
9944 dst_object
= VME_OBJECT(entry
);
9945 dst_offset
= VME_OFFSET(entry
);
9948 * Take an object reference, and record
9949 * the map version information so that the
9950 * map can be safely unlocked.
9953 if (dst_object
== VM_OBJECT_NULL
) {
9955 * We would usually have just taken the
9956 * optimized path above if the destination
9957 * object has not been allocated yet. But we
9958 * now disable that optimization if the copy
9959 * entry's object is not backed by anonymous
9960 * memory to avoid replacing malloc'ed
9961 * (i.e. re-usable) anonymous memory with a
9962 * not-so-anonymous mapping.
9963 * So we have to handle this case here and
9964 * allocate a new VM object for this map entry.
9966 dst_object
= vm_object_allocate(
9967 entry
->vme_end
- entry
->vme_start
);
9969 VME_OBJECT_SET(entry
, dst_object
);
9970 VME_OFFSET_SET(entry
, dst_offset
);
9971 assert(entry
->use_pmap
);
9975 vm_object_reference(dst_object
);
9977 /* account for unlock bumping up timestamp */
9978 version
.main_timestamp
= dst_map
->timestamp
+ 1;
9980 vm_map_unlock(dst_map
);
9983 * Copy as much as possible in one pass
9988 VME_OBJECT(copy_entry
),
9989 VME_OFFSET(copy_entry
),
9998 * Release the object reference
10001 vm_object_deallocate(dst_object
);
10004 * If a hard error occurred, return it now
10007 if (r
!= KERN_SUCCESS
)
10010 if (copy_size
!= 0) {
10012 * Dispose of the copied region
10015 vm_map_copy_clip_end(copy
, copy_entry
,
10016 copy_entry
->vme_start
+ copy_size
);
10017 vm_map_copy_entry_unlink(copy
, copy_entry
);
10018 vm_object_deallocate(VME_OBJECT(copy_entry
));
10019 vm_map_copy_entry_dispose(copy
, copy_entry
);
10023 * Pick up in the destination map where we left off.
10025 * Use the version information to avoid a lookup
10026 * in the normal case.
10029 start
+= copy_size
;
10030 vm_map_lock(dst_map
);
10031 if (version
.main_timestamp
== dst_map
->timestamp
&&
10033 /* We can safely use saved tmp_entry value */
10035 if (tmp_entry
->map_aligned
&&
10036 !VM_MAP_PAGE_ALIGNED(
10038 VM_MAP_PAGE_MASK(dst_map
))) {
10039 /* no longer map-aligned */
10040 tmp_entry
->map_aligned
= FALSE
;
10042 vm_map_clip_end(dst_map
, tmp_entry
, start
);
10043 tmp_entry
= tmp_entry
->vme_next
;
10045 /* Must do lookup of tmp_entry */
10047 if (!vm_map_lookup_entry(dst_map
, start
, &tmp_entry
)) {
10048 vm_map_unlock(dst_map
);
10049 return(KERN_INVALID_ADDRESS
);
10051 if (tmp_entry
->map_aligned
&&
10052 !VM_MAP_PAGE_ALIGNED(
10054 VM_MAP_PAGE_MASK(dst_map
))) {
10055 /* no longer map-aligned */
10056 tmp_entry
->map_aligned
= FALSE
;
10058 vm_map_clip_start(dst_map
, tmp_entry
, start
);
10063 return(KERN_SUCCESS
);
10064 }/* vm_map_copy_overwrite_aligned */
10067 * Routine: vm_map_copyin_kernel_buffer [internal use only]
10070 * Copy in data to a kernel buffer from space in the
10071 * source map. The original space may be optionally
10074 * If successful, returns a new copy object.
10076 static kern_return_t
10077 vm_map_copyin_kernel_buffer(
10079 vm_map_offset_t src_addr
,
10081 boolean_t src_destroy
,
10082 vm_map_copy_t
*copy_result
)
10085 vm_map_copy_t copy
;
10086 vm_size_t kalloc_size
;
10088 if (len
> msg_ool_size_small
)
10089 return KERN_INVALID_ARGUMENT
;
10091 kalloc_size
= (vm_size_t
)(cpy_kdata_hdr_sz
+ len
);
10093 copy
= (vm_map_copy_t
)kalloc(kalloc_size
);
10094 if (copy
== VM_MAP_COPY_NULL
)
10095 return KERN_RESOURCE_SHORTAGE
;
10096 copy
->type
= VM_MAP_COPY_KERNEL_BUFFER
;
10100 kr
= copyinmap(src_map
, src_addr
, copy
->cpy_kdata
, (vm_size_t
)len
);
10101 if (kr
!= KERN_SUCCESS
) {
10102 kfree(copy
, kalloc_size
);
10106 (void) vm_map_remove(
10108 vm_map_trunc_page(src_addr
,
10109 VM_MAP_PAGE_MASK(src_map
)),
10110 vm_map_round_page(src_addr
+ len
,
10111 VM_MAP_PAGE_MASK(src_map
)),
10112 (VM_MAP_REMOVE_INTERRUPTIBLE
|
10113 VM_MAP_REMOVE_WAIT_FOR_KWIRE
|
10114 ((src_map
== kernel_map
) ? VM_MAP_REMOVE_KUNWIRE
: VM_MAP_REMOVE_NO_FLAGS
)));
10116 *copy_result
= copy
;
10117 return KERN_SUCCESS
;
10121 * Routine: vm_map_copyout_kernel_buffer [internal use only]
10124 * Copy out data from a kernel buffer into space in the
10125 * destination map. The space may be otpionally dynamically
10128 * If successful, consumes the copy object.
10129 * Otherwise, the caller is responsible for it.
10131 static int vm_map_copyout_kernel_buffer_failures
= 0;
10132 static kern_return_t
10133 vm_map_copyout_kernel_buffer(
10135 vm_map_address_t
*addr
, /* IN/OUT */
10136 vm_map_copy_t copy
,
10137 vm_map_size_t copy_size
,
10138 boolean_t overwrite
,
10139 boolean_t consume_on_success
)
10141 kern_return_t kr
= KERN_SUCCESS
;
10142 thread_t thread
= current_thread();
10144 assert(copy
->size
== copy_size
);
10147 * check for corrupted vm_map_copy structure
10149 if (copy_size
> msg_ool_size_small
|| copy
->offset
)
10150 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10151 (long long)copy
->size
, (long long)copy
->offset
);
10156 * Allocate space in the target map for the data
10159 kr
= vm_map_enter(map
,
10161 vm_map_round_page(copy_size
,
10162 VM_MAP_PAGE_MASK(map
)),
10163 (vm_map_offset_t
) 0,
10165 VM_MAP_KERNEL_FLAGS_NONE
,
10166 VM_KERN_MEMORY_NONE
,
10168 (vm_object_offset_t
) 0,
10172 VM_INHERIT_DEFAULT
);
10173 if (kr
!= KERN_SUCCESS
)
10176 if (map
->pmap
== kernel_pmap
) {
10177 kasan_notify_address(*addr
, copy
->size
);
10183 * Copyout the data from the kernel buffer to the target map.
10185 if (thread
->map
== map
) {
10188 * If the target map is the current map, just do
10191 assert((vm_size_t
)copy_size
== copy_size
);
10192 if (copyout(copy
->cpy_kdata
, *addr
, (vm_size_t
)copy_size
)) {
10193 kr
= KERN_INVALID_ADDRESS
;
10200 * If the target map is another map, assume the
10201 * target's address space identity for the duration
10204 vm_map_reference(map
);
10205 oldmap
= vm_map_switch(map
);
10207 assert((vm_size_t
)copy_size
== copy_size
);
10208 if (copyout(copy
->cpy_kdata
, *addr
, (vm_size_t
)copy_size
)) {
10209 vm_map_copyout_kernel_buffer_failures
++;
10210 kr
= KERN_INVALID_ADDRESS
;
10213 (void) vm_map_switch(oldmap
);
10214 vm_map_deallocate(map
);
10217 if (kr
!= KERN_SUCCESS
) {
10218 /* the copy failed, clean up */
10221 * Deallocate the space we allocated in the target map.
10223 (void) vm_map_remove(
10225 vm_map_trunc_page(*addr
,
10226 VM_MAP_PAGE_MASK(map
)),
10227 vm_map_round_page((*addr
+
10228 vm_map_round_page(copy_size
,
10229 VM_MAP_PAGE_MASK(map
))),
10230 VM_MAP_PAGE_MASK(map
)),
10231 VM_MAP_REMOVE_NO_FLAGS
);
10235 /* copy was successful, dicard the copy structure */
10236 if (consume_on_success
) {
10237 kfree(copy
, copy_size
+ cpy_kdata_hdr_sz
);
10245 * Routine: vm_map_copy_insert [internal use only]
10248 * Link a copy chain ("copy") into a map at the
10249 * specified location (after "where").
10251 * The copy chain is destroyed.
10254 vm_map_copy_insert(
10256 vm_map_entry_t after_where
,
10257 vm_map_copy_t copy
)
10259 vm_map_entry_t entry
;
10261 while (vm_map_copy_first_entry(copy
) != vm_map_copy_to_entry(copy
)) {
10262 entry
= vm_map_copy_first_entry(copy
);
10263 vm_map_copy_entry_unlink(copy
, entry
);
10264 vm_map_store_entry_link(map
, after_where
, entry
,
10265 VM_MAP_KERNEL_FLAGS_NONE
);
10266 after_where
= entry
;
10268 zfree(vm_map_copy_zone
, copy
);
10274 vm_map_entry_t where
,
10275 vm_map_copy_t copy
,
10276 vm_map_offset_t adjustment
,
10277 vm_prot_t cur_prot
,
10278 vm_prot_t max_prot
,
10279 vm_inherit_t inheritance
)
10281 vm_map_entry_t copy_entry
, new_entry
;
10283 for (copy_entry
= vm_map_copy_first_entry(copy
);
10284 copy_entry
!= vm_map_copy_to_entry(copy
);
10285 copy_entry
= copy_entry
->vme_next
) {
10286 /* get a new VM map entry for the map */
10287 new_entry
= vm_map_entry_create(map
,
10288 !map
->hdr
.entries_pageable
);
10289 /* copy the "copy entry" to the new entry */
10290 vm_map_entry_copy(new_entry
, copy_entry
);
10291 /* adjust "start" and "end" */
10292 new_entry
->vme_start
+= adjustment
;
10293 new_entry
->vme_end
+= adjustment
;
10294 /* clear some attributes */
10295 new_entry
->inheritance
= inheritance
;
10296 new_entry
->protection
= cur_prot
;
10297 new_entry
->max_protection
= max_prot
;
10298 new_entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
10299 /* take an extra reference on the entry's "object" */
10300 if (new_entry
->is_sub_map
) {
10301 assert(!new_entry
->use_pmap
); /* not nested */
10302 vm_map_lock(VME_SUBMAP(new_entry
));
10303 vm_map_reference(VME_SUBMAP(new_entry
));
10304 vm_map_unlock(VME_SUBMAP(new_entry
));
10306 vm_object_reference(VME_OBJECT(new_entry
));
10308 /* insert the new entry in the map */
10309 vm_map_store_entry_link(map
, where
, new_entry
,
10310 VM_MAP_KERNEL_FLAGS_NONE
);
10311 /* continue inserting the "copy entries" after the new entry */
10318 * Returns true if *size matches (or is in the range of) copy->size.
10319 * Upon returning true, the *size field is updated with the actual size of the
10320 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10323 vm_map_copy_validate_size(
10325 vm_map_copy_t copy
,
10326 vm_map_size_t
*size
)
10328 if (copy
== VM_MAP_COPY_NULL
)
10330 vm_map_size_t copy_sz
= copy
->size
;
10331 vm_map_size_t sz
= *size
;
10332 switch (copy
->type
) {
10333 case VM_MAP_COPY_OBJECT
:
10334 case VM_MAP_COPY_KERNEL_BUFFER
:
10338 case VM_MAP_COPY_ENTRY_LIST
:
10340 * potential page-size rounding prevents us from exactly
10341 * validating this flavor of vm_map_copy, but we can at least
10342 * assert that it's within a range.
10344 if (copy_sz
>= sz
&&
10345 copy_sz
<= vm_map_round_page(sz
, VM_MAP_PAGE_MASK(dst_map
))) {
10357 * Routine: vm_map_copyout_size
10360 * Copy out a copy chain ("copy") into newly-allocated
10361 * space in the destination map. Uses a prevalidated
10362 * size for the copy object (vm_map_copy_validate_size).
10364 * If successful, consumes the copy object.
10365 * Otherwise, the caller is responsible for it.
10368 vm_map_copyout_size(
10370 vm_map_address_t
*dst_addr
, /* OUT */
10371 vm_map_copy_t copy
,
10372 vm_map_size_t copy_size
)
10374 return vm_map_copyout_internal(dst_map
, dst_addr
, copy
, copy_size
,
10375 TRUE
, /* consume_on_success */
10378 VM_INHERIT_DEFAULT
);
10382 * Routine: vm_map_copyout
10385 * Copy out a copy chain ("copy") into newly-allocated
10386 * space in the destination map.
10388 * If successful, consumes the copy object.
10389 * Otherwise, the caller is responsible for it.
10394 vm_map_address_t
*dst_addr
, /* OUT */
10395 vm_map_copy_t copy
)
10397 return vm_map_copyout_internal(dst_map
, dst_addr
, copy
, copy
? copy
->size
: 0,
10398 TRUE
, /* consume_on_success */
10401 VM_INHERIT_DEFAULT
);
10405 vm_map_copyout_internal(
10407 vm_map_address_t
*dst_addr
, /* OUT */
10408 vm_map_copy_t copy
,
10409 vm_map_size_t copy_size
,
10410 boolean_t consume_on_success
,
10411 vm_prot_t cur_protection
,
10412 vm_prot_t max_protection
,
10413 vm_inherit_t inheritance
)
10415 vm_map_size_t size
;
10416 vm_map_size_t adjustment
;
10417 vm_map_offset_t start
;
10418 vm_object_offset_t vm_copy_start
;
10419 vm_map_entry_t last
;
10420 vm_map_entry_t entry
;
10421 vm_map_entry_t hole_entry
;
10424 * Check for null copy object.
10427 if (copy
== VM_MAP_COPY_NULL
) {
10429 return(KERN_SUCCESS
);
10432 if (copy
->size
!= copy_size
) {
10434 return KERN_FAILURE
;
10438 * Check for special copy object, created
10439 * by vm_map_copyin_object.
10442 if (copy
->type
== VM_MAP_COPY_OBJECT
) {
10443 vm_object_t object
= copy
->cpy_object
;
10445 vm_object_offset_t offset
;
10447 offset
= vm_object_trunc_page(copy
->offset
);
10448 size
= vm_map_round_page((copy_size
+
10449 (vm_map_size_t
)(copy
->offset
-
10451 VM_MAP_PAGE_MASK(dst_map
));
10453 kr
= vm_map_enter(dst_map
, dst_addr
, size
,
10454 (vm_map_offset_t
) 0, VM_FLAGS_ANYWHERE
,
10455 VM_MAP_KERNEL_FLAGS_NONE
,
10456 VM_KERN_MEMORY_NONE
,
10457 object
, offset
, FALSE
,
10458 VM_PROT_DEFAULT
, VM_PROT_ALL
,
10459 VM_INHERIT_DEFAULT
);
10460 if (kr
!= KERN_SUCCESS
)
10462 /* Account for non-pagealigned copy object */
10463 *dst_addr
+= (vm_map_offset_t
)(copy
->offset
- offset
);
10464 if (consume_on_success
)
10465 zfree(vm_map_copy_zone
, copy
);
10466 return(KERN_SUCCESS
);
10470 * Check for special kernel buffer allocated
10471 * by new_ipc_kmsg_copyin.
10474 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
10475 return vm_map_copyout_kernel_buffer(dst_map
, dst_addr
,
10476 copy
, copy_size
, FALSE
,
10477 consume_on_success
);
10482 * Find space for the data
10485 vm_copy_start
= vm_map_trunc_page((vm_map_size_t
)copy
->offset
,
10486 VM_MAP_COPY_PAGE_MASK(copy
));
10487 size
= vm_map_round_page((vm_map_size_t
)copy
->offset
+ copy_size
,
10488 VM_MAP_COPY_PAGE_MASK(copy
))
10494 vm_map_lock(dst_map
);
10495 if( dst_map
->disable_vmentry_reuse
== TRUE
) {
10496 VM_MAP_HIGHEST_ENTRY(dst_map
, entry
, start
);
10499 if (dst_map
->holelistenabled
) {
10500 hole_entry
= CAST_TO_VM_MAP_ENTRY(dst_map
->holes_list
);
10502 if (hole_entry
== NULL
) {
10504 * No more space in the map?
10506 vm_map_unlock(dst_map
);
10507 return(KERN_NO_SPACE
);
10511 start
= last
->vme_start
;
10513 assert(first_free_is_valid(dst_map
));
10514 start
= ((last
= dst_map
->first_free
) == vm_map_to_entry(dst_map
)) ?
10515 vm_map_min(dst_map
) : last
->vme_end
;
10517 start
= vm_map_round_page(start
,
10518 VM_MAP_PAGE_MASK(dst_map
));
10522 vm_map_entry_t next
= last
->vme_next
;
10523 vm_map_offset_t end
= start
+ size
;
10525 if ((end
> dst_map
->max_offset
) || (end
< start
)) {
10526 if (dst_map
->wait_for_space
) {
10527 if (size
<= (dst_map
->max_offset
- dst_map
->min_offset
)) {
10528 assert_wait((event_t
) dst_map
,
10529 THREAD_INTERRUPTIBLE
);
10530 vm_map_unlock(dst_map
);
10531 thread_block(THREAD_CONTINUE_NULL
);
10535 vm_map_unlock(dst_map
);
10536 return(KERN_NO_SPACE
);
10539 if (dst_map
->holelistenabled
) {
10540 if (last
->vme_end
>= end
)
10544 * If there are no more entries, we must win.
10548 * If there is another entry, it must be
10549 * after the end of the potential new region.
10552 if (next
== vm_map_to_entry(dst_map
))
10555 if (next
->vme_start
>= end
)
10561 if (dst_map
->holelistenabled
) {
10562 if (last
== CAST_TO_VM_MAP_ENTRY(dst_map
->holes_list
)) {
10566 vm_map_unlock(dst_map
);
10567 return(KERN_NO_SPACE
);
10569 start
= last
->vme_start
;
10571 start
= last
->vme_end
;
10573 start
= vm_map_round_page(start
,
10574 VM_MAP_PAGE_MASK(dst_map
));
10577 if (dst_map
->holelistenabled
) {
10578 if (vm_map_lookup_entry(dst_map
, last
->vme_start
, &last
)) {
10579 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last
, (unsigned long long)last
->vme_start
);
10584 adjustment
= start
- vm_copy_start
;
10585 if (! consume_on_success
) {
10587 * We're not allowed to consume "copy", so we'll have to
10588 * copy its map entries into the destination map below.
10589 * No need to re-allocate map entries from the correct
10590 * (pageable or not) zone, since we'll get new map entries
10591 * during the transfer.
10592 * We'll also adjust the map entries's "start" and "end"
10593 * during the transfer, to keep "copy"'s entries consistent
10594 * with its "offset".
10596 goto after_adjustments
;
10600 * Since we're going to just drop the map
10601 * entries from the copy into the destination
10602 * map, they must come from the same pool.
10605 if (copy
->cpy_hdr
.entries_pageable
!= dst_map
->hdr
.entries_pageable
) {
10607 * Mismatches occur when dealing with the default
10611 vm_map_entry_t next
, new;
10614 * Find the zone that the copies were allocated from
10617 entry
= vm_map_copy_first_entry(copy
);
10620 * Reinitialize the copy so that vm_map_copy_entry_link
10623 vm_map_store_copy_reset(copy
, entry
);
10624 copy
->cpy_hdr
.entries_pageable
= dst_map
->hdr
.entries_pageable
;
10629 while (entry
!= vm_map_copy_to_entry(copy
)) {
10630 new = vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
10631 vm_map_entry_copy_full(new, entry
);
10632 assert(!new->iokit_acct
);
10633 if (new->is_sub_map
) {
10634 /* clr address space specifics */
10635 new->use_pmap
= FALSE
;
10637 vm_map_copy_entry_link(copy
,
10638 vm_map_copy_last_entry(copy
),
10640 next
= entry
->vme_next
;
10641 old_zone
= entry
->from_reserved_zone
? vm_map_entry_reserved_zone
: vm_map_entry_zone
;
10642 zfree(old_zone
, entry
);
10648 * Adjust the addresses in the copy chain, and
10649 * reset the region attributes.
10652 for (entry
= vm_map_copy_first_entry(copy
);
10653 entry
!= vm_map_copy_to_entry(copy
);
10654 entry
= entry
->vme_next
) {
10655 if (VM_MAP_PAGE_SHIFT(dst_map
) == PAGE_SHIFT
) {
10657 * We're injecting this copy entry into a map that
10658 * has the standard page alignment, so clear
10659 * "map_aligned" (which might have been inherited
10660 * from the original map entry).
10662 entry
->map_aligned
= FALSE
;
10665 entry
->vme_start
+= adjustment
;
10666 entry
->vme_end
+= adjustment
;
10668 if (entry
->map_aligned
) {
10669 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_start
,
10670 VM_MAP_PAGE_MASK(dst_map
)));
10671 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_end
,
10672 VM_MAP_PAGE_MASK(dst_map
)));
10675 entry
->inheritance
= VM_INHERIT_DEFAULT
;
10676 entry
->protection
= VM_PROT_DEFAULT
;
10677 entry
->max_protection
= VM_PROT_ALL
;
10678 entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
10681 * If the entry is now wired,
10682 * map the pages into the destination map.
10684 if (entry
->wired_count
!= 0) {
10685 vm_map_offset_t va
;
10686 vm_object_offset_t offset
;
10687 vm_object_t object
;
10691 object
= VME_OBJECT(entry
);
10692 offset
= VME_OFFSET(entry
);
10693 va
= entry
->vme_start
;
10695 pmap_pageable(dst_map
->pmap
,
10700 while (va
< entry
->vme_end
) {
10702 struct vm_object_fault_info fault_info
= {};
10705 * Look up the page in the object.
10706 * Assert that the page will be found in the
10709 * the object was newly created by
10710 * vm_object_copy_slowly, and has
10711 * copies of all of the pages from
10712 * the source object
10714 * the object was moved from the old
10715 * map entry; because the old map
10716 * entry was wired, all of the pages
10717 * were in the top-level object.
10718 * (XXX not true if we wire pages for
10721 vm_object_lock(object
);
10723 m
= vm_page_lookup(object
, offset
);
10724 if (m
== VM_PAGE_NULL
|| !VM_PAGE_WIRED(m
) ||
10726 panic("vm_map_copyout: wiring %p", m
);
10728 prot
= entry
->protection
;
10730 if (override_nx(dst_map
, VME_ALIAS(entry
)) &&
10732 prot
|= VM_PROT_EXECUTE
;
10734 type_of_fault
= DBG_CACHE_HIT_FAULT
;
10736 fault_info
.user_tag
= VME_ALIAS(entry
);
10737 fault_info
.pmap_options
= 0;
10738 if (entry
->iokit_acct
||
10739 (!entry
->is_sub_map
&& !entry
->use_pmap
)) {
10740 fault_info
.pmap_options
|= PMAP_OPTIONS_ALT_ACCT
;
10749 FALSE
, /* change_wiring */
10750 VM_KERN_MEMORY_NONE
, /* tag - not wiring */
10752 NULL
, /* need_retry */
10755 vm_object_unlock(object
);
10757 offset
+= PAGE_SIZE_64
;
10766 * Correct the page alignment for the result
10769 *dst_addr
= start
+ (copy
->offset
- vm_copy_start
);
10772 kasan_notify_address(*dst_addr
, size
);
10776 * Update the hints and the map size
10779 if (consume_on_success
) {
10780 SAVE_HINT_MAP_WRITE(dst_map
, vm_map_copy_last_entry(copy
));
10782 SAVE_HINT_MAP_WRITE(dst_map
, last
);
10785 dst_map
->size
+= size
;
10791 if (consume_on_success
) {
10792 vm_map_copy_insert(dst_map
, last
, copy
);
10794 vm_map_copy_remap(dst_map
, last
, copy
, adjustment
,
10795 cur_protection
, max_protection
,
10799 vm_map_unlock(dst_map
);
10802 * XXX If wiring_required, call vm_map_pageable
10805 return(KERN_SUCCESS
);
10809 * Routine: vm_map_copyin
10812 * see vm_map_copyin_common. Exported via Unsupported.exports.
10816 #undef vm_map_copyin
10821 vm_map_address_t src_addr
,
10823 boolean_t src_destroy
,
10824 vm_map_copy_t
*copy_result
) /* OUT */
10826 return(vm_map_copyin_common(src_map
, src_addr
, len
, src_destroy
,
10827 FALSE
, copy_result
, FALSE
));
10831 * Routine: vm_map_copyin_common
10834 * Copy the specified region (src_addr, len) from the
10835 * source address space (src_map), possibly removing
10836 * the region from the source address space (src_destroy).
10839 * A vm_map_copy_t object (copy_result), suitable for
10840 * insertion into another address space (using vm_map_copyout),
10841 * copying over another address space region (using
10842 * vm_map_copy_overwrite). If the copy is unused, it
10843 * should be destroyed (using vm_map_copy_discard).
10845 * In/out conditions:
10846 * The source map should not be locked on entry.
10849 typedef struct submap_map
{
10850 vm_map_t parent_map
;
10851 vm_map_offset_t base_start
;
10852 vm_map_offset_t base_end
;
10853 vm_map_size_t base_len
;
10854 struct submap_map
*next
;
10858 vm_map_copyin_common(
10860 vm_map_address_t src_addr
,
10862 boolean_t src_destroy
,
10863 __unused boolean_t src_volatile
,
10864 vm_map_copy_t
*copy_result
, /* OUT */
10865 boolean_t use_maxprot
)
10871 flags
|= VM_MAP_COPYIN_SRC_DESTROY
;
10874 flags
|= VM_MAP_COPYIN_USE_MAXPROT
;
10876 return vm_map_copyin_internal(src_map
,
10883 vm_map_copyin_internal(
10885 vm_map_address_t src_addr
,
10888 vm_map_copy_t
*copy_result
) /* OUT */
10890 vm_map_entry_t tmp_entry
; /* Result of last map lookup --
10891 * in multi-level lookup, this
10892 * entry contains the actual
10893 * vm_object/offset.
10895 vm_map_entry_t new_entry
= VM_MAP_ENTRY_NULL
; /* Map entry for copy */
10897 vm_map_offset_t src_start
; /* Start of current entry --
10898 * where copy is taking place now
10900 vm_map_offset_t src_end
; /* End of entire region to be
10902 vm_map_offset_t src_base
;
10903 vm_map_t base_map
= src_map
;
10904 boolean_t map_share
=FALSE
;
10905 submap_map_t
*parent_maps
= NULL
;
10907 vm_map_copy_t copy
; /* Resulting copy */
10908 vm_map_address_t copy_addr
;
10909 vm_map_size_t copy_size
;
10910 boolean_t src_destroy
;
10911 boolean_t use_maxprot
;
10912 boolean_t preserve_purgeable
;
10913 boolean_t entry_was_shared
;
10914 vm_map_entry_t saved_src_entry
;
10916 if (flags
& ~VM_MAP_COPYIN_ALL_FLAGS
) {
10917 return KERN_INVALID_ARGUMENT
;
10920 src_destroy
= (flags
& VM_MAP_COPYIN_SRC_DESTROY
) ? TRUE
: FALSE
;
10921 use_maxprot
= (flags
& VM_MAP_COPYIN_USE_MAXPROT
) ? TRUE
: FALSE
;
10922 preserve_purgeable
=
10923 (flags
& VM_MAP_COPYIN_PRESERVE_PURGEABLE
) ? TRUE
: FALSE
;
10926 * Check for copies of zero bytes.
10930 *copy_result
= VM_MAP_COPY_NULL
;
10931 return(KERN_SUCCESS
);
10935 * Check that the end address doesn't overflow
10937 src_end
= src_addr
+ len
;
10938 if (src_end
< src_addr
)
10939 return KERN_INVALID_ADDRESS
;
10942 * Compute (page aligned) start and end of region
10944 src_start
= vm_map_trunc_page(src_addr
,
10945 VM_MAP_PAGE_MASK(src_map
));
10946 src_end
= vm_map_round_page(src_end
,
10947 VM_MAP_PAGE_MASK(src_map
));
10950 * If the copy is sufficiently small, use a kernel buffer instead
10951 * of making a virtual copy. The theory being that the cost of
10952 * setting up VM (and taking C-O-W faults) dominates the copy costs
10953 * for small regions.
10955 if ((len
< msg_ool_size_small
) &&
10957 !preserve_purgeable
&&
10958 !(flags
& VM_MAP_COPYIN_ENTRY_LIST
) &&
10960 * Since the "msg_ool_size_small" threshold was increased and
10961 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
10962 * address space limits, we revert to doing a virtual copy if the
10963 * copied range goes beyond those limits. Otherwise, mach_vm_read()
10964 * of the commpage would now fail when it used to work.
10966 (src_start
>= vm_map_min(src_map
) &&
10967 src_start
< vm_map_max(src_map
) &&
10968 src_end
>= vm_map_min(src_map
) &&
10969 src_end
< vm_map_max(src_map
)))
10970 return vm_map_copyin_kernel_buffer(src_map
, src_addr
, len
,
10971 src_destroy
, copy_result
);
10973 XPR(XPR_VM_MAP
, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map
, src_addr
, len
, src_destroy
, 0);
10976 * Allocate a header element for the list.
10978 * Use the start and end in the header to
10979 * remember the endpoints prior to rounding.
10982 copy
= vm_map_copy_allocate();
10983 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
10984 copy
->cpy_hdr
.entries_pageable
= TRUE
;
10986 copy
->cpy_hdr
.page_shift
= src_map
->hdr
.page_shift
;
10989 * The copy entries can be broken down for a variety of reasons,
10990 * so we can't guarantee that they will remain map-aligned...
10991 * Will need to adjust the first copy_entry's "vme_start" and
10992 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
10993 * rather than the original map's alignment.
10995 copy
->cpy_hdr
.page_shift
= PAGE_SHIFT
;
10998 vm_map_store_init( &(copy
->cpy_hdr
) );
11000 copy
->offset
= src_addr
;
11003 new_entry
= vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
11005 #define RETURN(x) \
11007 vm_map_unlock(src_map); \
11008 if(src_map != base_map) \
11009 vm_map_deallocate(src_map); \
11010 if (new_entry != VM_MAP_ENTRY_NULL) \
11011 vm_map_copy_entry_dispose(copy,new_entry); \
11012 vm_map_copy_discard(copy); \
11014 submap_map_t *_ptr; \
11016 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11017 parent_maps=parent_maps->next; \
11018 if (_ptr->parent_map != base_map) \
11019 vm_map_deallocate(_ptr->parent_map); \
11020 kfree(_ptr, sizeof(submap_map_t)); \
11027 * Find the beginning of the region.
11030 vm_map_lock(src_map
);
11033 * Lookup the original "src_addr" rather than the truncated
11034 * "src_start", in case "src_start" falls in a non-map-aligned
11035 * map entry *before* the map entry that contains "src_addr"...
11037 if (!vm_map_lookup_entry(src_map
, src_addr
, &tmp_entry
))
11038 RETURN(KERN_INVALID_ADDRESS
);
11039 if(!tmp_entry
->is_sub_map
) {
11041 * ... but clip to the map-rounded "src_start" rather than
11042 * "src_addr" to preserve map-alignment. We'll adjust the
11043 * first copy entry at the end, if needed.
11045 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
11047 if (src_start
< tmp_entry
->vme_start
) {
11049 * Move "src_start" up to the start of the
11050 * first map entry to copy.
11052 src_start
= tmp_entry
->vme_start
;
11054 /* set for later submap fix-up */
11055 copy_addr
= src_start
;
11058 * Go through entries until we get to the end.
11062 vm_map_entry_t src_entry
= tmp_entry
; /* Top-level entry */
11063 vm_map_size_t src_size
; /* Size of source
11064 * map entry (in both
11068 vm_object_t src_object
; /* Object to copy */
11069 vm_object_offset_t src_offset
;
11071 boolean_t src_needs_copy
; /* Should source map
11072 * be made read-only
11073 * for copy-on-write?
11076 boolean_t new_entry_needs_copy
; /* Will new entry be COW? */
11078 boolean_t was_wired
; /* Was source wired? */
11079 vm_map_version_t version
; /* Version before locks
11080 * dropped to make copy
11082 kern_return_t result
; /* Return value from
11083 * copy_strategically.
11085 while(tmp_entry
->is_sub_map
) {
11086 vm_map_size_t submap_len
;
11089 ptr
= (submap_map_t
*)kalloc(sizeof(submap_map_t
));
11090 ptr
->next
= parent_maps
;
11092 ptr
->parent_map
= src_map
;
11093 ptr
->base_start
= src_start
;
11094 ptr
->base_end
= src_end
;
11095 submap_len
= tmp_entry
->vme_end
- src_start
;
11096 if(submap_len
> (src_end
-src_start
))
11097 submap_len
= src_end
-src_start
;
11098 ptr
->base_len
= submap_len
;
11100 src_start
-= tmp_entry
->vme_start
;
11101 src_start
+= VME_OFFSET(tmp_entry
);
11102 src_end
= src_start
+ submap_len
;
11103 src_map
= VME_SUBMAP(tmp_entry
);
11104 vm_map_lock(src_map
);
11105 /* keep an outstanding reference for all maps in */
11106 /* the parents tree except the base map */
11107 vm_map_reference(src_map
);
11108 vm_map_unlock(ptr
->parent_map
);
11109 if (!vm_map_lookup_entry(
11110 src_map
, src_start
, &tmp_entry
))
11111 RETURN(KERN_INVALID_ADDRESS
);
11113 if(!tmp_entry
->is_sub_map
)
11114 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
11115 src_entry
= tmp_entry
;
11117 /* we are now in the lowest level submap... */
11119 if ((VME_OBJECT(tmp_entry
) != VM_OBJECT_NULL
) &&
11120 (VME_OBJECT(tmp_entry
)->phys_contiguous
)) {
11121 /* This is not, supported for now.In future */
11122 /* we will need to detect the phys_contig */
11123 /* condition and then upgrade copy_slowly */
11124 /* to do physical copy from the device mem */
11125 /* based object. We can piggy-back off of */
11126 /* the was wired boolean to set-up the */
11127 /* proper handling */
11128 RETURN(KERN_PROTECTION_FAILURE
);
11131 * Create a new address map entry to hold the result.
11132 * Fill in the fields from the appropriate source entries.
11133 * We must unlock the source map to do this if we need
11134 * to allocate a map entry.
11136 if (new_entry
== VM_MAP_ENTRY_NULL
) {
11137 version
.main_timestamp
= src_map
->timestamp
;
11138 vm_map_unlock(src_map
);
11140 new_entry
= vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
11142 vm_map_lock(src_map
);
11143 if ((version
.main_timestamp
+ 1) != src_map
->timestamp
) {
11144 if (!vm_map_lookup_entry(src_map
, src_start
,
11146 RETURN(KERN_INVALID_ADDRESS
);
11148 if (!tmp_entry
->is_sub_map
)
11149 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
11150 continue; /* restart w/ new tmp_entry */
11155 * Verify that the region can be read.
11157 if (((src_entry
->protection
& VM_PROT_READ
) == VM_PROT_NONE
&&
11159 (src_entry
->max_protection
& VM_PROT_READ
) == 0)
11160 RETURN(KERN_PROTECTION_FAILURE
);
11163 * Clip against the endpoints of the entire region.
11166 vm_map_clip_end(src_map
, src_entry
, src_end
);
11168 src_size
= src_entry
->vme_end
- src_start
;
11169 src_object
= VME_OBJECT(src_entry
);
11170 src_offset
= VME_OFFSET(src_entry
);
11171 was_wired
= (src_entry
->wired_count
!= 0);
11173 vm_map_entry_copy(new_entry
, src_entry
);
11174 if (new_entry
->is_sub_map
) {
11175 /* clr address space specifics */
11176 new_entry
->use_pmap
= FALSE
;
11179 * We're dealing with a copy-on-write operation,
11180 * so the resulting mapping should not inherit the
11181 * original mapping's accounting settings.
11182 * "iokit_acct" should have been cleared in
11183 * vm_map_entry_copy().
11184 * "use_pmap" should be reset to its default (TRUE)
11185 * so that the new mapping gets accounted for in
11186 * the task's memory footprint.
11188 assert(!new_entry
->iokit_acct
);
11189 new_entry
->use_pmap
= TRUE
;
11193 * Attempt non-blocking copy-on-write optimizations.
11197 (src_object
== VM_OBJECT_NULL
||
11198 (src_object
->internal
&&
11199 src_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
&&
11202 * If we are destroying the source, and the object
11203 * is internal, we can move the object reference
11204 * from the source to the copy. The copy is
11205 * copy-on-write only if the source is.
11206 * We make another reference to the object, because
11207 * destroying the source entry will deallocate it.
11209 vm_object_reference(src_object
);
11212 * Copy is always unwired. vm_map_copy_entry
11213 * set its wired count to zero.
11216 goto CopySuccessful
;
11221 XPR(XPR_VM_MAP
, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
11222 src_object
, new_entry
, VME_OBJECT(new_entry
),
11224 if ((src_object
== VM_OBJECT_NULL
||
11225 (!was_wired
&& !map_share
&& !tmp_entry
->is_shared
)) &&
11226 vm_object_copy_quickly(
11227 &VME_OBJECT(new_entry
),
11231 &new_entry_needs_copy
)) {
11233 new_entry
->needs_copy
= new_entry_needs_copy
;
11236 * Handle copy-on-write obligations
11239 if (src_needs_copy
&& !tmp_entry
->needs_copy
) {
11242 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
11244 if (override_nx(src_map
, VME_ALIAS(src_entry
))
11246 prot
|= VM_PROT_EXECUTE
;
11248 vm_object_pmap_protect(
11252 (src_entry
->is_shared
?
11255 src_entry
->vme_start
,
11258 assert(tmp_entry
->wired_count
== 0);
11259 tmp_entry
->needs_copy
= TRUE
;
11263 * The map has never been unlocked, so it's safe
11264 * to move to the next entry rather than doing
11268 goto CopySuccessful
;
11271 entry_was_shared
= tmp_entry
->is_shared
;
11274 * Take an object reference, so that we may
11275 * release the map lock(s).
11278 assert(src_object
!= VM_OBJECT_NULL
);
11279 vm_object_reference(src_object
);
11282 * Record the timestamp for later verification.
11286 version
.main_timestamp
= src_map
->timestamp
;
11287 vm_map_unlock(src_map
); /* Increments timestamp once! */
11288 saved_src_entry
= src_entry
;
11289 tmp_entry
= VM_MAP_ENTRY_NULL
;
11290 src_entry
= VM_MAP_ENTRY_NULL
;
11298 vm_object_lock(src_object
);
11299 result
= vm_object_copy_slowly(
11304 &VME_OBJECT(new_entry
));
11305 VME_OFFSET_SET(new_entry
, 0);
11306 new_entry
->needs_copy
= FALSE
;
11308 else if (src_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
&&
11309 (entry_was_shared
|| map_share
)) {
11310 vm_object_t new_object
;
11312 vm_object_lock_shared(src_object
);
11313 new_object
= vm_object_copy_delayed(
11318 if (new_object
== VM_OBJECT_NULL
)
11321 VME_OBJECT_SET(new_entry
, new_object
);
11322 assert(new_entry
->wired_count
== 0);
11323 new_entry
->needs_copy
= TRUE
;
11324 assert(!new_entry
->iokit_acct
);
11325 assert(new_object
->purgable
== VM_PURGABLE_DENY
);
11326 assertf(new_entry
->use_pmap
, "src_map %p new_entry %p\n", src_map
, new_entry
);
11327 result
= KERN_SUCCESS
;
11330 vm_object_offset_t new_offset
;
11331 new_offset
= VME_OFFSET(new_entry
);
11332 result
= vm_object_copy_strategically(src_object
,
11335 &VME_OBJECT(new_entry
),
11337 &new_entry_needs_copy
);
11338 if (new_offset
!= VME_OFFSET(new_entry
)) {
11339 VME_OFFSET_SET(new_entry
, new_offset
);
11342 new_entry
->needs_copy
= new_entry_needs_copy
;
11345 if (result
== KERN_SUCCESS
&&
11346 preserve_purgeable
&&
11347 src_object
->purgable
!= VM_PURGABLE_DENY
) {
11348 vm_object_t new_object
;
11350 new_object
= VME_OBJECT(new_entry
);
11351 assert(new_object
!= src_object
);
11352 vm_object_lock(new_object
);
11353 assert(new_object
->ref_count
== 1);
11354 assert(new_object
->shadow
== VM_OBJECT_NULL
);
11355 assert(new_object
->copy
== VM_OBJECT_NULL
);
11356 assert(new_object
->vo_owner
== NULL
);
11358 new_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
11359 new_object
->true_share
= TRUE
;
11360 /* start as non-volatile with no owner... */
11361 new_object
->purgable
= VM_PURGABLE_NONVOLATILE
;
11362 vm_purgeable_nonvolatile_enqueue(new_object
, NULL
);
11363 /* ... and move to src_object's purgeable state */
11364 if (src_object
->purgable
!= VM_PURGABLE_NONVOLATILE
) {
11366 state
= src_object
->purgable
;
11367 vm_object_purgable_control(
11369 VM_PURGABLE_SET_STATE_FROM_KERNEL
,
11372 vm_object_unlock(new_object
);
11373 new_object
= VM_OBJECT_NULL
;
11374 /* no pmap accounting for purgeable objects */
11375 new_entry
->use_pmap
= FALSE
;
11378 if (result
!= KERN_SUCCESS
&&
11379 result
!= KERN_MEMORY_RESTART_COPY
) {
11380 vm_map_lock(src_map
);
11385 * Throw away the extra reference
11388 vm_object_deallocate(src_object
);
11391 * Verify that the map has not substantially
11392 * changed while the copy was being made.
11395 vm_map_lock(src_map
);
11397 if ((version
.main_timestamp
+ 1) == src_map
->timestamp
) {
11398 /* src_map hasn't changed: src_entry is still valid */
11399 src_entry
= saved_src_entry
;
11400 goto VerificationSuccessful
;
11404 * Simple version comparison failed.
11406 * Retry the lookup and verify that the
11407 * same object/offset are still present.
11409 * [Note: a memory manager that colludes with
11410 * the calling task can detect that we have
11411 * cheated. While the map was unlocked, the
11412 * mapping could have been changed and restored.]
11415 if (!vm_map_lookup_entry(src_map
, src_start
, &tmp_entry
)) {
11416 if (result
!= KERN_MEMORY_RESTART_COPY
) {
11417 vm_object_deallocate(VME_OBJECT(new_entry
));
11418 VME_OBJECT_SET(new_entry
, VM_OBJECT_NULL
);
11419 /* reset accounting state */
11420 new_entry
->iokit_acct
= FALSE
;
11421 new_entry
->use_pmap
= TRUE
;
11423 RETURN(KERN_INVALID_ADDRESS
);
11426 src_entry
= tmp_entry
;
11427 vm_map_clip_start(src_map
, src_entry
, src_start
);
11429 if ((((src_entry
->protection
& VM_PROT_READ
) == VM_PROT_NONE
) &&
11431 ((src_entry
->max_protection
& VM_PROT_READ
) == 0))
11432 goto VerificationFailed
;
11434 if (src_entry
->vme_end
< new_entry
->vme_end
) {
11436 * This entry might have been shortened
11437 * (vm_map_clip_end) or been replaced with
11438 * an entry that ends closer to "src_start"
11440 * Adjust "new_entry" accordingly; copying
11441 * less memory would be correct but we also
11442 * redo the copy (see below) if the new entry
11443 * no longer points at the same object/offset.
11445 assert(VM_MAP_PAGE_ALIGNED(src_entry
->vme_end
,
11446 VM_MAP_COPY_PAGE_MASK(copy
)));
11447 new_entry
->vme_end
= src_entry
->vme_end
;
11448 src_size
= new_entry
->vme_end
- src_start
;
11449 } else if (src_entry
->vme_end
> new_entry
->vme_end
) {
11451 * This entry might have been extended
11452 * (vm_map_entry_simplify() or coalesce)
11453 * or been replaced with an entry that ends farther
11454 * from "src_start" than before.
11456 * We've called vm_object_copy_*() only on
11457 * the previous <start:end> range, so we can't
11458 * just extend new_entry. We have to re-do
11459 * the copy based on the new entry as if it was
11460 * pointing at a different object/offset (see
11461 * "Verification failed" below).
11465 if ((VME_OBJECT(src_entry
) != src_object
) ||
11466 (VME_OFFSET(src_entry
) != src_offset
) ||
11467 (src_entry
->vme_end
> new_entry
->vme_end
)) {
11470 * Verification failed.
11472 * Start over with this top-level entry.
11475 VerificationFailed
: ;
11477 vm_object_deallocate(VME_OBJECT(new_entry
));
11478 tmp_entry
= src_entry
;
11483 * Verification succeeded.
11486 VerificationSuccessful
: ;
11488 if (result
== KERN_MEMORY_RESTART_COPY
)
11498 * Link in the new copy entry.
11501 vm_map_copy_entry_link(copy
, vm_map_copy_last_entry(copy
),
11505 * Determine whether the entire region
11508 src_base
= src_start
;
11509 src_start
= new_entry
->vme_end
;
11510 new_entry
= VM_MAP_ENTRY_NULL
;
11511 while ((src_start
>= src_end
) && (src_end
!= 0)) {
11514 if (src_map
== base_map
) {
11515 /* back to the top */
11520 assert(ptr
!= NULL
);
11521 parent_maps
= parent_maps
->next
;
11523 /* fix up the damage we did in that submap */
11524 vm_map_simplify_range(src_map
,
11528 vm_map_unlock(src_map
);
11529 vm_map_deallocate(src_map
);
11530 vm_map_lock(ptr
->parent_map
);
11531 src_map
= ptr
->parent_map
;
11532 src_base
= ptr
->base_start
;
11533 src_start
= ptr
->base_start
+ ptr
->base_len
;
11534 src_end
= ptr
->base_end
;
11535 if (!vm_map_lookup_entry(src_map
,
11538 (src_end
> src_start
)) {
11539 RETURN(KERN_INVALID_ADDRESS
);
11541 kfree(ptr
, sizeof(submap_map_t
));
11542 if (parent_maps
== NULL
)
11544 src_entry
= tmp_entry
->vme_prev
;
11547 if ((VM_MAP_PAGE_SHIFT(src_map
) != PAGE_SHIFT
) &&
11548 (src_start
>= src_addr
+ len
) &&
11549 (src_addr
+ len
!= 0)) {
11551 * Stop copying now, even though we haven't reached
11552 * "src_end". We'll adjust the end of the last copy
11553 * entry at the end, if needed.
11555 * If src_map's aligment is different from the
11556 * system's page-alignment, there could be
11557 * extra non-map-aligned map entries between
11558 * the original (non-rounded) "src_addr + len"
11559 * and the rounded "src_end".
11560 * We do not want to copy those map entries since
11561 * they're not part of the copied range.
11566 if ((src_start
>= src_end
) && (src_end
!= 0))
11570 * Verify that there are no gaps in the region
11573 tmp_entry
= src_entry
->vme_next
;
11574 if ((tmp_entry
->vme_start
!= src_start
) ||
11575 (tmp_entry
== vm_map_to_entry(src_map
))) {
11576 RETURN(KERN_INVALID_ADDRESS
);
11581 * If the source should be destroyed, do it now, since the
11582 * copy was successful.
11585 (void) vm_map_delete(
11587 vm_map_trunc_page(src_addr
,
11588 VM_MAP_PAGE_MASK(src_map
)),
11590 ((src_map
== kernel_map
) ?
11591 VM_MAP_REMOVE_KUNWIRE
:
11592 VM_MAP_REMOVE_NO_FLAGS
),
11595 /* fix up the damage we did in the base map */
11596 vm_map_simplify_range(
11598 vm_map_trunc_page(src_addr
,
11599 VM_MAP_PAGE_MASK(src_map
)),
11600 vm_map_round_page(src_end
,
11601 VM_MAP_PAGE_MASK(src_map
)));
11604 vm_map_unlock(src_map
);
11605 tmp_entry
= VM_MAP_ENTRY_NULL
;
11607 if (VM_MAP_PAGE_SHIFT(src_map
) != PAGE_SHIFT
) {
11608 vm_map_offset_t original_start
, original_offset
, original_end
;
11610 assert(VM_MAP_COPY_PAGE_MASK(copy
) == PAGE_MASK
);
11612 /* adjust alignment of first copy_entry's "vme_start" */
11613 tmp_entry
= vm_map_copy_first_entry(copy
);
11614 if (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
11615 vm_map_offset_t adjustment
;
11617 original_start
= tmp_entry
->vme_start
;
11618 original_offset
= VME_OFFSET(tmp_entry
);
11620 /* map-align the start of the first copy entry... */
11621 adjustment
= (tmp_entry
->vme_start
-
11623 tmp_entry
->vme_start
,
11624 VM_MAP_PAGE_MASK(src_map
)));
11625 tmp_entry
->vme_start
-= adjustment
;
11626 VME_OFFSET_SET(tmp_entry
,
11627 VME_OFFSET(tmp_entry
) - adjustment
);
11628 copy_addr
-= adjustment
;
11629 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
11630 /* ... adjust for mis-aligned start of copy range */
11632 (vm_map_trunc_page(copy
->offset
,
11634 vm_map_trunc_page(copy
->offset
,
11635 VM_MAP_PAGE_MASK(src_map
)));
11637 assert(page_aligned(adjustment
));
11638 assert(adjustment
< VM_MAP_PAGE_SIZE(src_map
));
11639 tmp_entry
->vme_start
+= adjustment
;
11640 VME_OFFSET_SET(tmp_entry
,
11641 (VME_OFFSET(tmp_entry
) +
11643 copy_addr
+= adjustment
;
11644 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
11648 * Assert that the adjustments haven't exposed
11649 * more than was originally copied...
11651 assert(tmp_entry
->vme_start
>= original_start
);
11652 assert(VME_OFFSET(tmp_entry
) >= original_offset
);
11654 * ... and that it did not adjust outside of a
11655 * a single 16K page.
11657 assert(vm_map_trunc_page(tmp_entry
->vme_start
,
11658 VM_MAP_PAGE_MASK(src_map
)) ==
11659 vm_map_trunc_page(original_start
,
11660 VM_MAP_PAGE_MASK(src_map
)));
11663 /* adjust alignment of last copy_entry's "vme_end" */
11664 tmp_entry
= vm_map_copy_last_entry(copy
);
11665 if (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
11666 vm_map_offset_t adjustment
;
11668 original_end
= tmp_entry
->vme_end
;
11670 /* map-align the end of the last copy entry... */
11671 tmp_entry
->vme_end
=
11672 vm_map_round_page(tmp_entry
->vme_end
,
11673 VM_MAP_PAGE_MASK(src_map
));
11674 /* ... adjust for mis-aligned end of copy range */
11676 (vm_map_round_page((copy
->offset
+
11678 VM_MAP_PAGE_MASK(src_map
)) -
11679 vm_map_round_page((copy
->offset
+
11683 assert(page_aligned(adjustment
));
11684 assert(adjustment
< VM_MAP_PAGE_SIZE(src_map
));
11685 tmp_entry
->vme_end
-= adjustment
;
11686 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
11690 * Assert that the adjustments haven't exposed
11691 * more than was originally copied...
11693 assert(tmp_entry
->vme_end
<= original_end
);
11695 * ... and that it did not adjust outside of a
11696 * a single 16K page.
11698 assert(vm_map_round_page(tmp_entry
->vme_end
,
11699 VM_MAP_PAGE_MASK(src_map
)) ==
11700 vm_map_round_page(original_end
,
11701 VM_MAP_PAGE_MASK(src_map
)));
11705 /* Fix-up start and end points in copy. This is necessary */
11706 /* when the various entries in the copy object were picked */
11707 /* up from different sub-maps */
11709 tmp_entry
= vm_map_copy_first_entry(copy
);
11710 copy_size
= 0; /* compute actual size */
11711 while (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
11712 assert(VM_MAP_PAGE_ALIGNED(
11713 copy_addr
+ (tmp_entry
->vme_end
-
11714 tmp_entry
->vme_start
),
11715 VM_MAP_COPY_PAGE_MASK(copy
)));
11716 assert(VM_MAP_PAGE_ALIGNED(
11718 VM_MAP_COPY_PAGE_MASK(copy
)));
11721 * The copy_entries will be injected directly into the
11722 * destination map and might not be "map aligned" there...
11724 tmp_entry
->map_aligned
= FALSE
;
11726 tmp_entry
->vme_end
= copy_addr
+
11727 (tmp_entry
->vme_end
- tmp_entry
->vme_start
);
11728 tmp_entry
->vme_start
= copy_addr
;
11729 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
11730 copy_addr
+= tmp_entry
->vme_end
- tmp_entry
->vme_start
;
11731 copy_size
+= tmp_entry
->vme_end
- tmp_entry
->vme_start
;
11732 tmp_entry
= (struct vm_map_entry
*)tmp_entry
->vme_next
;
11735 if (VM_MAP_PAGE_SHIFT(src_map
) != PAGE_SHIFT
&&
11736 copy_size
< copy
->size
) {
11738 * The actual size of the VM map copy is smaller than what
11739 * was requested by the caller. This must be because some
11740 * PAGE_SIZE-sized pages are missing at the end of the last
11741 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
11742 * The caller might not have been aware of those missing
11743 * pages and might not want to be aware of it, which is
11744 * fine as long as they don't try to access (and crash on)
11745 * those missing pages.
11746 * Let's adjust the size of the "copy", to avoid failing
11747 * in vm_map_copyout() or vm_map_copy_overwrite().
11749 assert(vm_map_round_page(copy_size
,
11750 VM_MAP_PAGE_MASK(src_map
)) ==
11751 vm_map_round_page(copy
->size
,
11752 VM_MAP_PAGE_MASK(src_map
)));
11753 copy
->size
= copy_size
;
11756 *copy_result
= copy
;
11757 return(KERN_SUCCESS
);
11763 vm_map_copy_extract(
11765 vm_map_address_t src_addr
,
11767 vm_map_copy_t
*copy_result
, /* OUT */
11768 vm_prot_t
*cur_prot
, /* OUT */
11769 vm_prot_t
*max_prot
)
11771 vm_map_offset_t src_start
, src_end
;
11772 vm_map_copy_t copy
;
11776 * Check for copies of zero bytes.
11780 *copy_result
= VM_MAP_COPY_NULL
;
11781 return(KERN_SUCCESS
);
11785 * Check that the end address doesn't overflow
11787 src_end
= src_addr
+ len
;
11788 if (src_end
< src_addr
)
11789 return KERN_INVALID_ADDRESS
;
11792 * Compute (page aligned) start and end of region
11794 src_start
= vm_map_trunc_page(src_addr
, PAGE_MASK
);
11795 src_end
= vm_map_round_page(src_end
, PAGE_MASK
);
11798 * Allocate a header element for the list.
11800 * Use the start and end in the header to
11801 * remember the endpoints prior to rounding.
11804 copy
= vm_map_copy_allocate();
11805 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
11806 copy
->cpy_hdr
.entries_pageable
= TRUE
;
11808 vm_map_store_init(©
->cpy_hdr
);
11813 kr
= vm_map_remap_extract(src_map
,
11821 TRUE
, /* pageable */
11822 FALSE
, /* same_map */
11823 VM_MAP_KERNEL_FLAGS_NONE
);
11824 if (kr
!= KERN_SUCCESS
) {
11825 vm_map_copy_discard(copy
);
11829 *copy_result
= copy
;
11830 return KERN_SUCCESS
;
11834 * vm_map_copyin_object:
11836 * Create a copy object from an object.
11837 * Our caller donates an object reference.
11841 vm_map_copyin_object(
11842 vm_object_t object
,
11843 vm_object_offset_t offset
, /* offset of region in object */
11844 vm_object_size_t size
, /* size of region in object */
11845 vm_map_copy_t
*copy_result
) /* OUT */
11847 vm_map_copy_t copy
; /* Resulting copy */
11850 * We drop the object into a special copy object
11851 * that contains the object directly.
11854 copy
= vm_map_copy_allocate();
11855 copy
->type
= VM_MAP_COPY_OBJECT
;
11856 copy
->cpy_object
= object
;
11857 copy
->offset
= offset
;
11860 *copy_result
= copy
;
11861 return(KERN_SUCCESS
);
11867 vm_map_entry_t old_entry
,
11870 vm_object_t object
;
11871 vm_map_entry_t new_entry
;
11874 * New sharing code. New map entry
11875 * references original object. Internal
11876 * objects use asynchronous copy algorithm for
11877 * future copies. First make sure we have
11878 * the right object. If we need a shadow,
11879 * or someone else already has one, then
11880 * make a new shadow and share it.
11883 object
= VME_OBJECT(old_entry
);
11884 if (old_entry
->is_sub_map
) {
11885 assert(old_entry
->wired_count
== 0);
11886 #ifndef NO_NESTED_PMAP
11887 if(old_entry
->use_pmap
) {
11888 kern_return_t result
;
11890 result
= pmap_nest(new_map
->pmap
,
11891 (VME_SUBMAP(old_entry
))->pmap
,
11892 (addr64_t
)old_entry
->vme_start
,
11893 (addr64_t
)old_entry
->vme_start
,
11894 (uint64_t)(old_entry
->vme_end
- old_entry
->vme_start
));
11896 panic("vm_map_fork_share: pmap_nest failed!");
11898 #endif /* NO_NESTED_PMAP */
11899 } else if (object
== VM_OBJECT_NULL
) {
11900 object
= vm_object_allocate((vm_map_size_t
)(old_entry
->vme_end
-
11901 old_entry
->vme_start
));
11902 VME_OFFSET_SET(old_entry
, 0);
11903 VME_OBJECT_SET(old_entry
, object
);
11904 old_entry
->use_pmap
= TRUE
;
11905 // assert(!old_entry->needs_copy);
11906 } else if (object
->copy_strategy
!=
11907 MEMORY_OBJECT_COPY_SYMMETRIC
) {
11910 * We are already using an asymmetric
11911 * copy, and therefore we already have
11912 * the right object.
11915 assert(! old_entry
->needs_copy
);
11917 else if (old_entry
->needs_copy
|| /* case 1 */
11918 object
->shadowed
|| /* case 2 */
11919 (!object
->true_share
&& /* case 3 */
11920 !old_entry
->is_shared
&&
11922 (vm_map_size_t
)(old_entry
->vme_end
-
11923 old_entry
->vme_start
)))) {
11926 * We need to create a shadow.
11927 * There are three cases here.
11928 * In the first case, we need to
11929 * complete a deferred symmetrical
11930 * copy that we participated in.
11931 * In the second and third cases,
11932 * we need to create the shadow so
11933 * that changes that we make to the
11934 * object do not interfere with
11935 * any symmetrical copies which
11936 * have occured (case 2) or which
11937 * might occur (case 3).
11939 * The first case is when we had
11940 * deferred shadow object creation
11941 * via the entry->needs_copy mechanism.
11942 * This mechanism only works when
11943 * only one entry points to the source
11944 * object, and we are about to create
11945 * a second entry pointing to the
11946 * same object. The problem is that
11947 * there is no way of mapping from
11948 * an object to the entries pointing
11949 * to it. (Deferred shadow creation
11950 * works with one entry because occurs
11951 * at fault time, and we walk from the
11952 * entry to the object when handling
11955 * The second case is when the object
11956 * to be shared has already been copied
11957 * with a symmetric copy, but we point
11958 * directly to the object without
11959 * needs_copy set in our entry. (This
11960 * can happen because different ranges
11961 * of an object can be pointed to by
11962 * different entries. In particular,
11963 * a single entry pointing to an object
11964 * can be split by a call to vm_inherit,
11965 * which, combined with task_create, can
11966 * result in the different entries
11967 * having different needs_copy values.)
11968 * The shadowed flag in the object allows
11969 * us to detect this case. The problem
11970 * with this case is that if this object
11971 * has or will have shadows, then we
11972 * must not perform an asymmetric copy
11973 * of this object, since such a copy
11974 * allows the object to be changed, which
11975 * will break the previous symmetrical
11976 * copies (which rely upon the object
11977 * not changing). In a sense, the shadowed
11978 * flag says "don't change this object".
11979 * We fix this by creating a shadow
11980 * object for this object, and sharing
11981 * that. This works because we are free
11982 * to change the shadow object (and thus
11983 * to use an asymmetric copy strategy);
11984 * this is also semantically correct,
11985 * since this object is temporary, and
11986 * therefore a copy of the object is
11987 * as good as the object itself. (This
11988 * is not true for permanent objects,
11989 * since the pager needs to see changes,
11990 * which won't happen if the changes
11991 * are made to a copy.)
11993 * The third case is when the object
11994 * to be shared has parts sticking
11995 * outside of the entry we're working
11996 * with, and thus may in the future
11997 * be subject to a symmetrical copy.
11998 * (This is a preemptive version of
12001 VME_OBJECT_SHADOW(old_entry
,
12002 (vm_map_size_t
) (old_entry
->vme_end
-
12003 old_entry
->vme_start
));
12006 * If we're making a shadow for other than
12007 * copy on write reasons, then we have
12008 * to remove write permission.
12011 if (!old_entry
->needs_copy
&&
12012 (old_entry
->protection
& VM_PROT_WRITE
)) {
12015 assert(!pmap_has_prot_policy(old_entry
->protection
));
12017 prot
= old_entry
->protection
& ~VM_PROT_WRITE
;
12019 assert(!pmap_has_prot_policy(prot
));
12021 if (override_nx(old_map
, VME_ALIAS(old_entry
)) && prot
)
12022 prot
|= VM_PROT_EXECUTE
;
12025 if (old_map
->mapped_in_other_pmaps
) {
12026 vm_object_pmap_protect(
12027 VME_OBJECT(old_entry
),
12028 VME_OFFSET(old_entry
),
12029 (old_entry
->vme_end
-
12030 old_entry
->vme_start
),
12032 old_entry
->vme_start
,
12035 pmap_protect(old_map
->pmap
,
12036 old_entry
->vme_start
,
12037 old_entry
->vme_end
,
12042 old_entry
->needs_copy
= FALSE
;
12043 object
= VME_OBJECT(old_entry
);
12048 * If object was using a symmetric copy strategy,
12049 * change its copy strategy to the default
12050 * asymmetric copy strategy, which is copy_delay
12051 * in the non-norma case and copy_call in the
12052 * norma case. Bump the reference count for the
12056 if(old_entry
->is_sub_map
) {
12057 vm_map_lock(VME_SUBMAP(old_entry
));
12058 vm_map_reference(VME_SUBMAP(old_entry
));
12059 vm_map_unlock(VME_SUBMAP(old_entry
));
12061 vm_object_lock(object
);
12062 vm_object_reference_locked(object
);
12063 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
) {
12064 object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
12066 vm_object_unlock(object
);
12070 * Clone the entry, using object ref from above.
12071 * Mark both entries as shared.
12074 new_entry
= vm_map_entry_create(new_map
, FALSE
); /* Never the kernel
12075 * map or descendants */
12076 vm_map_entry_copy(new_entry
, old_entry
);
12077 old_entry
->is_shared
= TRUE
;
12078 new_entry
->is_shared
= TRUE
;
12081 * We're dealing with a shared mapping, so the resulting mapping
12082 * should inherit some of the original mapping's accounting settings.
12083 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12084 * "use_pmap" should stay the same as before (if it hasn't been reset
12085 * to TRUE when we cleared "iokit_acct").
12087 assert(!new_entry
->iokit_acct
);
12090 * If old entry's inheritence is VM_INHERIT_NONE,
12091 * the new entry is for corpse fork, remove the
12092 * write permission from the new entry.
12094 if (old_entry
->inheritance
== VM_INHERIT_NONE
) {
12096 new_entry
->protection
&= ~VM_PROT_WRITE
;
12097 new_entry
->max_protection
&= ~VM_PROT_WRITE
;
12101 * Insert the entry into the new map -- we
12102 * know we're inserting at the end of the new
12106 vm_map_store_entry_link(new_map
, vm_map_last_entry(new_map
), new_entry
,
12107 VM_MAP_KERNEL_FLAGS_NONE
);
12110 * Update the physical map
12113 if (old_entry
->is_sub_map
) {
12114 /* Bill Angell pmap support goes here */
12116 pmap_copy(new_map
->pmap
, old_map
->pmap
, new_entry
->vme_start
,
12117 old_entry
->vme_end
- old_entry
->vme_start
,
12118 old_entry
->vme_start
);
12125 vm_map_entry_t
*old_entry_p
,
12127 int vm_map_copyin_flags
)
12129 vm_map_entry_t old_entry
= *old_entry_p
;
12130 vm_map_size_t entry_size
= old_entry
->vme_end
- old_entry
->vme_start
;
12131 vm_map_offset_t start
= old_entry
->vme_start
;
12132 vm_map_copy_t copy
;
12133 vm_map_entry_t last
= vm_map_last_entry(new_map
);
12135 vm_map_unlock(old_map
);
12137 * Use maxprot version of copyin because we
12138 * care about whether this memory can ever
12139 * be accessed, not just whether it's accessible
12142 vm_map_copyin_flags
|= VM_MAP_COPYIN_USE_MAXPROT
;
12143 if (vm_map_copyin_internal(old_map
, start
, entry_size
,
12144 vm_map_copyin_flags
, ©
)
12147 * The map might have changed while it
12148 * was unlocked, check it again. Skip
12149 * any blank space or permanently
12150 * unreadable region.
12152 vm_map_lock(old_map
);
12153 if (!vm_map_lookup_entry(old_map
, start
, &last
) ||
12154 (last
->max_protection
& VM_PROT_READ
) == VM_PROT_NONE
) {
12155 last
= last
->vme_next
;
12157 *old_entry_p
= last
;
12160 * XXX For some error returns, want to
12161 * XXX skip to the next element. Note
12162 * that INVALID_ADDRESS and
12163 * PROTECTION_FAILURE are handled above.
12170 * Insert the copy into the new map
12173 vm_map_copy_insert(new_map
, last
, copy
);
12176 * Pick up the traversal at the end of
12177 * the copied region.
12180 vm_map_lock(old_map
);
12181 start
+= entry_size
;
12182 if (! vm_map_lookup_entry(old_map
, start
, &last
)) {
12183 last
= last
->vme_next
;
12185 if (last
->vme_start
== start
) {
12187 * No need to clip here and we don't
12188 * want to cause any unnecessary
12192 vm_map_clip_start(old_map
, last
, start
);
12195 *old_entry_p
= last
;
12203 * Create and return a new map based on the old
12204 * map, according to the inheritance values on the
12205 * regions in that map and the options.
12207 * The source map must not be locked.
12217 vm_map_entry_t old_entry
;
12218 vm_map_size_t new_size
= 0, entry_size
;
12219 vm_map_entry_t new_entry
;
12220 boolean_t src_needs_copy
;
12221 boolean_t new_entry_needs_copy
;
12222 boolean_t pmap_is64bit
;
12223 int vm_map_copyin_flags
;
12224 vm_inherit_t old_entry_inheritance
;
12225 int map_create_options
;
12226 kern_return_t footprint_collect_kr
;
12228 if (options
& ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE
|
12229 VM_MAP_FORK_PRESERVE_PURGEABLE
|
12230 VM_MAP_FORK_CORPSE_FOOTPRINT
)) {
12231 /* unsupported option */
12232 return VM_MAP_NULL
;
12236 #if defined(__i386__) || defined(__x86_64__)
12237 old_map
->pmap
->pm_task_map
!= TASK_MAP_32BIT
;
12238 #elif defined(__arm64__)
12239 old_map
->pmap
->max
== MACH_VM_MAX_ADDRESS
;
12240 #elif defined(__arm__)
12243 #error Unknown architecture.
12246 new_pmap
= pmap_create(ledger
, (vm_map_size_t
) 0, pmap_is64bit
);
12248 vm_map_reference_swap(old_map
);
12249 vm_map_lock(old_map
);
12251 map_create_options
= 0;
12252 if (old_map
->hdr
.entries_pageable
) {
12253 map_create_options
|= VM_MAP_CREATE_PAGEABLE
;
12255 if (options
& VM_MAP_FORK_CORPSE_FOOTPRINT
) {
12256 map_create_options
|= VM_MAP_CREATE_CORPSE_FOOTPRINT
;
12257 footprint_collect_kr
= KERN_SUCCESS
;
12259 new_map
= vm_map_create_options(new_pmap
,
12260 old_map
->min_offset
,
12261 old_map
->max_offset
,
12262 map_create_options
);
12263 vm_map_lock(new_map
);
12264 vm_commit_pagezero_status(new_map
);
12265 /* inherit the parent map's page size */
12266 vm_map_set_page_shift(new_map
, VM_MAP_PAGE_SHIFT(old_map
));
12268 old_entry
= vm_map_first_entry(old_map
);
12269 old_entry
!= vm_map_to_entry(old_map
);
12272 entry_size
= old_entry
->vme_end
- old_entry
->vme_start
;
12274 old_entry_inheritance
= old_entry
->inheritance
;
12276 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12277 * share VM_INHERIT_NONE entries that are not backed by a
12280 if (old_entry_inheritance
== VM_INHERIT_NONE
&&
12281 (options
& VM_MAP_FORK_SHARE_IF_INHERIT_NONE
) &&
12282 !(!old_entry
->is_sub_map
&&
12283 VME_OBJECT(old_entry
) != NULL
&&
12284 VME_OBJECT(old_entry
)->pager
!= NULL
&&
12285 is_device_pager_ops(
12286 VME_OBJECT(old_entry
)->pager
->mo_pager_ops
))) {
12287 old_entry_inheritance
= VM_INHERIT_SHARE
;
12290 if (old_entry_inheritance
!= VM_INHERIT_NONE
&&
12291 (options
& VM_MAP_FORK_CORPSE_FOOTPRINT
) &&
12292 footprint_collect_kr
== KERN_SUCCESS
) {
12294 * The corpse won't have old_map->pmap to query
12295 * footprint information, so collect that data now
12296 * and store it in new_map->vmmap_corpse_footprint
12297 * for later autopsy.
12299 footprint_collect_kr
=
12300 vm_map_corpse_footprint_collect(old_map
,
12305 switch (old_entry_inheritance
) {
12306 case VM_INHERIT_NONE
:
12309 case VM_INHERIT_SHARE
:
12310 vm_map_fork_share(old_map
, old_entry
, new_map
);
12311 new_size
+= entry_size
;
12314 case VM_INHERIT_COPY
:
12317 * Inline the copy_quickly case;
12318 * upon failure, fall back on call
12319 * to vm_map_fork_copy.
12322 if(old_entry
->is_sub_map
)
12324 if ((old_entry
->wired_count
!= 0) ||
12325 ((VME_OBJECT(old_entry
) != NULL
) &&
12326 (VME_OBJECT(old_entry
)->true_share
))) {
12327 goto slow_vm_map_fork_copy
;
12330 new_entry
= vm_map_entry_create(new_map
, FALSE
); /* never the kernel map or descendants */
12331 vm_map_entry_copy(new_entry
, old_entry
);
12332 if (new_entry
->is_sub_map
) {
12333 /* clear address space specifics */
12334 new_entry
->use_pmap
= FALSE
;
12337 * We're dealing with a copy-on-write operation,
12338 * so the resulting mapping should not inherit
12339 * the original mapping's accounting settings.
12340 * "iokit_acct" should have been cleared in
12341 * vm_map_entry_copy().
12342 * "use_pmap" should be reset to its default
12343 * (TRUE) so that the new mapping gets
12344 * accounted for in the task's memory footprint.
12346 assert(!new_entry
->iokit_acct
);
12347 new_entry
->use_pmap
= TRUE
;
12350 if (! vm_object_copy_quickly(
12351 &VME_OBJECT(new_entry
),
12352 VME_OFFSET(old_entry
),
12353 (old_entry
->vme_end
-
12354 old_entry
->vme_start
),
12356 &new_entry_needs_copy
)) {
12357 vm_map_entry_dispose(new_map
, new_entry
);
12358 goto slow_vm_map_fork_copy
;
12362 * Handle copy-on-write obligations
12365 if (src_needs_copy
&& !old_entry
->needs_copy
) {
12368 assert(!pmap_has_prot_policy(old_entry
->protection
));
12370 prot
= old_entry
->protection
& ~VM_PROT_WRITE
;
12372 if (override_nx(old_map
, VME_ALIAS(old_entry
))
12374 prot
|= VM_PROT_EXECUTE
;
12376 assert(!pmap_has_prot_policy(prot
));
12378 vm_object_pmap_protect(
12379 VME_OBJECT(old_entry
),
12380 VME_OFFSET(old_entry
),
12381 (old_entry
->vme_end
-
12382 old_entry
->vme_start
),
12383 ((old_entry
->is_shared
12384 || old_map
->mapped_in_other_pmaps
)
12387 old_entry
->vme_start
,
12390 assert(old_entry
->wired_count
== 0);
12391 old_entry
->needs_copy
= TRUE
;
12393 new_entry
->needs_copy
= new_entry_needs_copy
;
12396 * Insert the entry at the end
12400 vm_map_store_entry_link(new_map
,
12401 vm_map_last_entry(new_map
),
12403 VM_MAP_KERNEL_FLAGS_NONE
);
12404 new_size
+= entry_size
;
12407 slow_vm_map_fork_copy
:
12408 vm_map_copyin_flags
= 0;
12409 if (options
& VM_MAP_FORK_PRESERVE_PURGEABLE
) {
12410 vm_map_copyin_flags
|=
12411 VM_MAP_COPYIN_PRESERVE_PURGEABLE
;
12413 if (vm_map_fork_copy(old_map
,
12416 vm_map_copyin_flags
)) {
12417 new_size
+= entry_size
;
12421 old_entry
= old_entry
->vme_next
;
12424 #if defined(__arm64__)
12425 pmap_insert_sharedpage(new_map
->pmap
);
12428 new_map
->size
= new_size
;
12430 if (options
& VM_MAP_FORK_CORPSE_FOOTPRINT
) {
12431 vm_map_corpse_footprint_collect_done(new_map
);
12434 vm_map_unlock(new_map
);
12435 vm_map_unlock(old_map
);
12436 vm_map_deallocate(old_map
);
12444 * Setup the "new_map" with the proper execution environment according
12445 * to the type of executable (platform, 64bit, chroot environment).
12446 * Map the comm page and shared region, etc...
12455 cpu_subtype_t cpu_subtype
)
12457 SHARED_REGION_TRACE_DEBUG(
12458 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
12459 (void *)VM_KERNEL_ADDRPERM(current_task()),
12460 (void *)VM_KERNEL_ADDRPERM(new_map
),
12461 (void *)VM_KERNEL_ADDRPERM(task
),
12462 (void *)VM_KERNEL_ADDRPERM(fsroot
),
12465 (void) vm_commpage_enter(new_map
, task
, is64bit
);
12466 (void) vm_shared_region_enter(new_map
, task
, is64bit
, fsroot
, cpu
, cpu_subtype
);
12467 SHARED_REGION_TRACE_DEBUG(
12468 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
12469 (void *)VM_KERNEL_ADDRPERM(current_task()),
12470 (void *)VM_KERNEL_ADDRPERM(new_map
),
12471 (void *)VM_KERNEL_ADDRPERM(task
),
12472 (void *)VM_KERNEL_ADDRPERM(fsroot
),
12475 return KERN_SUCCESS
;
12479 * vm_map_lookup_locked:
12481 * Finds the VM object, offset, and
12482 * protection for a given virtual address in the
12483 * specified map, assuming a page fault of the
12486 * Returns the (object, offset, protection) for
12487 * this address, whether it is wired down, and whether
12488 * this map has the only reference to the data in question.
12489 * In order to later verify this lookup, a "version"
12492 * The map MUST be locked by the caller and WILL be
12493 * locked on exit. In order to guarantee the
12494 * existence of the returned object, it is returned
12497 * If a lookup is requested with "write protection"
12498 * specified, the map may be changed to perform virtual
12499 * copying operations, although the data referenced will
12503 vm_map_lookup_locked(
12504 vm_map_t
*var_map
, /* IN/OUT */
12505 vm_map_offset_t vaddr
,
12506 vm_prot_t fault_type
,
12507 int object_lock_type
,
12508 vm_map_version_t
*out_version
, /* OUT */
12509 vm_object_t
*object
, /* OUT */
12510 vm_object_offset_t
*offset
, /* OUT */
12511 vm_prot_t
*out_prot
, /* OUT */
12512 boolean_t
*wired
, /* OUT */
12513 vm_object_fault_info_t fault_info
, /* OUT */
12514 vm_map_t
*real_map
)
12516 vm_map_entry_t entry
;
12517 vm_map_t map
= *var_map
;
12518 vm_map_t old_map
= *var_map
;
12519 vm_map_t cow_sub_map_parent
= VM_MAP_NULL
;
12520 vm_map_offset_t cow_parent_vaddr
= 0;
12521 vm_map_offset_t old_start
= 0;
12522 vm_map_offset_t old_end
= 0;
12524 boolean_t mask_protections
;
12525 boolean_t force_copy
;
12526 vm_prot_t original_fault_type
;
12529 * VM_PROT_MASK means that the caller wants us to use "fault_type"
12530 * as a mask against the mapping's actual protections, not as an
12533 mask_protections
= (fault_type
& VM_PROT_IS_MASK
) ? TRUE
: FALSE
;
12534 force_copy
= (fault_type
& VM_PROT_COPY
) ? TRUE
: FALSE
;
12535 fault_type
&= VM_PROT_ALL
;
12536 original_fault_type
= fault_type
;
12541 fault_type
= original_fault_type
;
12544 * If the map has an interesting hint, try it before calling
12545 * full blown lookup routine.
12549 if ((entry
== vm_map_to_entry(map
)) ||
12550 (vaddr
< entry
->vme_start
) || (vaddr
>= entry
->vme_end
)) {
12551 vm_map_entry_t tmp_entry
;
12554 * Entry was either not a valid hint, or the vaddr
12555 * was not contained in the entry, so do a full lookup.
12557 if (!vm_map_lookup_entry(map
, vaddr
, &tmp_entry
)) {
12558 if((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
))
12559 vm_map_unlock(cow_sub_map_parent
);
12560 if((*real_map
!= map
)
12561 && (*real_map
!= cow_sub_map_parent
))
12562 vm_map_unlock(*real_map
);
12563 return KERN_INVALID_ADDRESS
;
12568 if(map
== old_map
) {
12569 old_start
= entry
->vme_start
;
12570 old_end
= entry
->vme_end
;
12574 * Handle submaps. Drop lock on upper map, submap is
12579 if (entry
->is_sub_map
) {
12580 vm_map_offset_t local_vaddr
;
12581 vm_map_offset_t end_delta
;
12582 vm_map_offset_t start_delta
;
12583 vm_map_entry_t submap_entry
;
12584 vm_prot_t subentry_protection
;
12585 vm_prot_t subentry_max_protection
;
12586 boolean_t mapped_needs_copy
=FALSE
;
12588 local_vaddr
= vaddr
;
12590 if ((entry
->use_pmap
&&
12591 ! ((fault_type
& VM_PROT_WRITE
) ||
12593 /* if real_map equals map we unlock below */
12594 if ((*real_map
!= map
) &&
12595 (*real_map
!= cow_sub_map_parent
))
12596 vm_map_unlock(*real_map
);
12597 *real_map
= VME_SUBMAP(entry
);
12600 if(entry
->needs_copy
&&
12601 ((fault_type
& VM_PROT_WRITE
) ||
12603 if (!mapped_needs_copy
) {
12604 if (vm_map_lock_read_to_write(map
)) {
12605 vm_map_lock_read(map
);
12609 vm_map_lock_read(VME_SUBMAP(entry
));
12610 *var_map
= VME_SUBMAP(entry
);
12611 cow_sub_map_parent
= map
;
12612 /* reset base to map before cow object */
12613 /* this is the map which will accept */
12614 /* the new cow object */
12615 old_start
= entry
->vme_start
;
12616 old_end
= entry
->vme_end
;
12617 cow_parent_vaddr
= vaddr
;
12618 mapped_needs_copy
= TRUE
;
12620 vm_map_lock_read(VME_SUBMAP(entry
));
12621 *var_map
= VME_SUBMAP(entry
);
12622 if((cow_sub_map_parent
!= map
) &&
12623 (*real_map
!= map
))
12624 vm_map_unlock(map
);
12627 vm_map_lock_read(VME_SUBMAP(entry
));
12628 *var_map
= VME_SUBMAP(entry
);
12629 /* leave map locked if it is a target */
12630 /* cow sub_map above otherwise, just */
12631 /* follow the maps down to the object */
12632 /* here we unlock knowing we are not */
12633 /* revisiting the map. */
12634 if((*real_map
!= map
) && (map
!= cow_sub_map_parent
))
12635 vm_map_unlock_read(map
);
12640 /* calculate the offset in the submap for vaddr */
12641 local_vaddr
= (local_vaddr
- entry
->vme_start
) + VME_OFFSET(entry
);
12644 if(!vm_map_lookup_entry(map
, local_vaddr
, &submap_entry
)) {
12645 if((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)){
12646 vm_map_unlock(cow_sub_map_parent
);
12648 if((*real_map
!= map
)
12649 && (*real_map
!= cow_sub_map_parent
)) {
12650 vm_map_unlock(*real_map
);
12653 return KERN_INVALID_ADDRESS
;
12656 /* find the attenuated shadow of the underlying object */
12657 /* on our target map */
12659 /* in english the submap object may extend beyond the */
12660 /* region mapped by the entry or, may only fill a portion */
12661 /* of it. For our purposes, we only care if the object */
12662 /* doesn't fill. In this case the area which will */
12663 /* ultimately be clipped in the top map will only need */
12664 /* to be as big as the portion of the underlying entry */
12665 /* which is mapped */
12666 start_delta
= submap_entry
->vme_start
> VME_OFFSET(entry
) ?
12667 submap_entry
->vme_start
- VME_OFFSET(entry
) : 0;
12670 (VME_OFFSET(entry
) + start_delta
+ (old_end
- old_start
)) <=
12671 submap_entry
->vme_end
?
12672 0 : (VME_OFFSET(entry
) +
12673 (old_end
- old_start
))
12674 - submap_entry
->vme_end
;
12676 old_start
+= start_delta
;
12677 old_end
-= end_delta
;
12679 if(submap_entry
->is_sub_map
) {
12680 entry
= submap_entry
;
12681 vaddr
= local_vaddr
;
12682 goto submap_recurse
;
12685 if (((fault_type
& VM_PROT_WRITE
) ||
12687 && cow_sub_map_parent
) {
12689 vm_object_t sub_object
, copy_object
;
12690 vm_object_offset_t copy_offset
;
12691 vm_map_offset_t local_start
;
12692 vm_map_offset_t local_end
;
12693 boolean_t copied_slowly
= FALSE
;
12695 if (vm_map_lock_read_to_write(map
)) {
12696 vm_map_lock_read(map
);
12697 old_start
-= start_delta
;
12698 old_end
+= end_delta
;
12703 sub_object
= VME_OBJECT(submap_entry
);
12704 if (sub_object
== VM_OBJECT_NULL
) {
12706 vm_object_allocate(
12708 (submap_entry
->vme_end
-
12709 submap_entry
->vme_start
));
12710 VME_OBJECT_SET(submap_entry
, sub_object
);
12711 VME_OFFSET_SET(submap_entry
, 0);
12712 assert(!submap_entry
->is_sub_map
);
12713 assert(submap_entry
->use_pmap
);
12715 local_start
= local_vaddr
-
12716 (cow_parent_vaddr
- old_start
);
12717 local_end
= local_vaddr
+
12718 (old_end
- cow_parent_vaddr
);
12719 vm_map_clip_start(map
, submap_entry
, local_start
);
12720 vm_map_clip_end(map
, submap_entry
, local_end
);
12721 if (submap_entry
->is_sub_map
) {
12722 /* unnesting was done when clipping */
12723 assert(!submap_entry
->use_pmap
);
12726 /* This is the COW case, lets connect */
12727 /* an entry in our space to the underlying */
12728 /* object in the submap, bypassing the */
12732 if(submap_entry
->wired_count
!= 0 ||
12733 (sub_object
->copy_strategy
==
12734 MEMORY_OBJECT_COPY_NONE
)) {
12735 vm_object_lock(sub_object
);
12736 vm_object_copy_slowly(sub_object
,
12737 VME_OFFSET(submap_entry
),
12738 (submap_entry
->vme_end
-
12739 submap_entry
->vme_start
),
12742 copied_slowly
= TRUE
;
12745 /* set up shadow object */
12746 copy_object
= sub_object
;
12747 vm_object_lock(sub_object
);
12748 vm_object_reference_locked(sub_object
);
12749 sub_object
->shadowed
= TRUE
;
12750 vm_object_unlock(sub_object
);
12752 assert(submap_entry
->wired_count
== 0);
12753 submap_entry
->needs_copy
= TRUE
;
12755 prot
= submap_entry
->protection
;
12756 assert(!pmap_has_prot_policy(prot
));
12757 prot
= prot
& ~VM_PROT_WRITE
;
12758 assert(!pmap_has_prot_policy(prot
));
12760 if (override_nx(old_map
,
12761 VME_ALIAS(submap_entry
))
12763 prot
|= VM_PROT_EXECUTE
;
12765 vm_object_pmap_protect(
12767 VME_OFFSET(submap_entry
),
12768 submap_entry
->vme_end
-
12769 submap_entry
->vme_start
,
12770 (submap_entry
->is_shared
12771 || map
->mapped_in_other_pmaps
) ?
12772 PMAP_NULL
: map
->pmap
,
12773 submap_entry
->vme_start
,
12778 * Adjust the fault offset to the submap entry.
12780 copy_offset
= (local_vaddr
-
12781 submap_entry
->vme_start
+
12782 VME_OFFSET(submap_entry
));
12784 /* This works diffently than the */
12785 /* normal submap case. We go back */
12786 /* to the parent of the cow map and*/
12787 /* clip out the target portion of */
12788 /* the sub_map, substituting the */
12789 /* new copy object, */
12791 subentry_protection
= submap_entry
->protection
;
12792 subentry_max_protection
= submap_entry
->max_protection
;
12793 vm_map_unlock(map
);
12794 submap_entry
= NULL
; /* not valid after map unlock */
12796 local_start
= old_start
;
12797 local_end
= old_end
;
12798 map
= cow_sub_map_parent
;
12799 *var_map
= cow_sub_map_parent
;
12800 vaddr
= cow_parent_vaddr
;
12801 cow_sub_map_parent
= NULL
;
12803 if(!vm_map_lookup_entry(map
,
12805 vm_object_deallocate(
12807 vm_map_lock_write_to_read(map
);
12808 return KERN_INVALID_ADDRESS
;
12811 /* clip out the portion of space */
12812 /* mapped by the sub map which */
12813 /* corresponds to the underlying */
12817 * Clip (and unnest) the smallest nested chunk
12818 * possible around the faulting address...
12820 local_start
= vaddr
& ~(pmap_nesting_size_min
- 1);
12821 local_end
= local_start
+ pmap_nesting_size_min
;
12823 * ... but don't go beyond the "old_start" to "old_end"
12824 * range, to avoid spanning over another VM region
12825 * with a possibly different VM object and/or offset.
12827 if (local_start
< old_start
) {
12828 local_start
= old_start
;
12830 if (local_end
> old_end
) {
12831 local_end
= old_end
;
12834 * Adjust copy_offset to the start of the range.
12836 copy_offset
-= (vaddr
- local_start
);
12838 vm_map_clip_start(map
, entry
, local_start
);
12839 vm_map_clip_end(map
, entry
, local_end
);
12840 if (entry
->is_sub_map
) {
12841 /* unnesting was done when clipping */
12842 assert(!entry
->use_pmap
);
12845 /* substitute copy object for */
12846 /* shared map entry */
12847 vm_map_deallocate(VME_SUBMAP(entry
));
12848 assert(!entry
->iokit_acct
);
12849 entry
->is_sub_map
= FALSE
;
12850 entry
->use_pmap
= TRUE
;
12851 VME_OBJECT_SET(entry
, copy_object
);
12853 /* propagate the submap entry's protections */
12854 if (entry
->protection
!= VM_PROT_READ
) {
12856 * Someone has already altered the top entry's
12857 * protections via vm_protect(VM_PROT_COPY).
12858 * Respect these new values and ignore the
12859 * submap entry's protections.
12863 * Regular copy-on-write: propagate the submap
12864 * entry's protections to the top map entry.
12866 entry
->protection
|= subentry_protection
;
12868 entry
->max_protection
|= subentry_max_protection
;
12870 if ((entry
->protection
& VM_PROT_WRITE
) &&
12871 (entry
->protection
& VM_PROT_EXECUTE
) &&
12872 #if !CONFIG_EMBEDDED
12873 map
!= kernel_map
&&
12874 cs_process_enforcement(NULL
) &&
12875 #endif /* !CONFIG_EMBEDDED */
12876 !(entry
->used_for_jit
)) {
12878 uint64_t, (uint64_t)entry
->vme_start
,
12879 uint64_t, (uint64_t)entry
->vme_end
,
12880 vm_prot_t
, entry
->protection
);
12881 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
12883 (current_task()->bsd_info
12884 ? proc_name_address(current_task()->bsd_info
)
12887 entry
->protection
&= ~VM_PROT_EXECUTE
;
12890 if(copied_slowly
) {
12891 VME_OFFSET_SET(entry
, local_start
- old_start
);
12892 entry
->needs_copy
= FALSE
;
12893 entry
->is_shared
= FALSE
;
12895 VME_OFFSET_SET(entry
, copy_offset
);
12896 assert(entry
->wired_count
== 0);
12897 entry
->needs_copy
= TRUE
;
12898 if(entry
->inheritance
== VM_INHERIT_SHARE
)
12899 entry
->inheritance
= VM_INHERIT_COPY
;
12900 if (map
!= old_map
)
12901 entry
->is_shared
= TRUE
;
12903 if(entry
->inheritance
== VM_INHERIT_SHARE
)
12904 entry
->inheritance
= VM_INHERIT_COPY
;
12906 vm_map_lock_write_to_read(map
);
12908 if((cow_sub_map_parent
)
12909 && (cow_sub_map_parent
!= *real_map
)
12910 && (cow_sub_map_parent
!= map
)) {
12911 vm_map_unlock(cow_sub_map_parent
);
12913 entry
= submap_entry
;
12914 vaddr
= local_vaddr
;
12919 * Check whether this task is allowed to have
12923 prot
= entry
->protection
;
12925 if (override_nx(old_map
, VME_ALIAS(entry
)) && prot
) {
12927 * HACK -- if not a stack, then allow execution
12929 prot
|= VM_PROT_EXECUTE
;
12932 if (mask_protections
) {
12933 fault_type
&= prot
;
12934 if (fault_type
== VM_PROT_NONE
) {
12935 goto protection_failure
;
12938 if (((fault_type
& prot
) != fault_type
)
12940 /* prefetch abort in execute-only page */
12941 && !(prot
== VM_PROT_EXECUTE
&& fault_type
== (VM_PROT_READ
| VM_PROT_EXECUTE
))
12944 protection_failure
:
12945 if (*real_map
!= map
) {
12946 vm_map_unlock(*real_map
);
12950 if ((fault_type
& VM_PROT_EXECUTE
) && prot
)
12951 log_stack_execution_failure((addr64_t
)vaddr
, prot
);
12953 DTRACE_VM2(prot_fault
, int, 1, (uint64_t *), NULL
);
12954 return KERN_PROTECTION_FAILURE
;
12958 * If this page is not pageable, we have to get
12959 * it for all possible accesses.
12962 *wired
= (entry
->wired_count
!= 0);
12967 * If the entry was copy-on-write, we either ...
12970 if (entry
->needs_copy
) {
12972 * If we want to write the page, we may as well
12973 * handle that now since we've got the map locked.
12975 * If we don't need to write the page, we just
12976 * demote the permissions allowed.
12979 if ((fault_type
& VM_PROT_WRITE
) || *wired
|| force_copy
) {
12981 * Make a new object, and place it in the
12982 * object chain. Note that no new references
12983 * have appeared -- one just moved from the
12984 * map to the new object.
12987 if (vm_map_lock_read_to_write(map
)) {
12988 vm_map_lock_read(map
);
12992 if (VME_OBJECT(entry
)->shadowed
== FALSE
) {
12993 vm_object_lock(VME_OBJECT(entry
));
12994 VME_OBJECT(entry
)->shadowed
= TRUE
;
12995 vm_object_unlock(VME_OBJECT(entry
));
12997 VME_OBJECT_SHADOW(entry
,
12998 (vm_map_size_t
) (entry
->vme_end
-
12999 entry
->vme_start
));
13000 entry
->needs_copy
= FALSE
;
13002 vm_map_lock_write_to_read(map
);
13004 if ((fault_type
& VM_PROT_WRITE
) == 0 && *wired
== 0) {
13006 * We're attempting to read a copy-on-write
13007 * page -- don't allow writes.
13010 prot
&= (~VM_PROT_WRITE
);
13015 * Create an object if necessary.
13017 if (VME_OBJECT(entry
) == VM_OBJECT_NULL
) {
13019 if (vm_map_lock_read_to_write(map
)) {
13020 vm_map_lock_read(map
);
13024 VME_OBJECT_SET(entry
,
13025 vm_object_allocate(
13026 (vm_map_size_t
)(entry
->vme_end
-
13027 entry
->vme_start
)));
13028 VME_OFFSET_SET(entry
, 0);
13029 assert(entry
->use_pmap
);
13030 vm_map_lock_write_to_read(map
);
13034 * Return the object/offset from this entry. If the entry
13035 * was copy-on-write or empty, it has been fixed up. Also
13036 * return the protection.
13039 *offset
= (vaddr
- entry
->vme_start
) + VME_OFFSET(entry
);
13040 *object
= VME_OBJECT(entry
);
13044 fault_info
->interruptible
= THREAD_UNINT
; /* for now... */
13045 /* ... the caller will change "interruptible" if needed */
13046 fault_info
->cluster_size
= 0;
13047 fault_info
->user_tag
= VME_ALIAS(entry
);
13048 fault_info
->pmap_options
= 0;
13049 if (entry
->iokit_acct
||
13050 (!entry
->is_sub_map
&& !entry
->use_pmap
)) {
13051 fault_info
->pmap_options
|= PMAP_OPTIONS_ALT_ACCT
;
13053 fault_info
->behavior
= entry
->behavior
;
13054 fault_info
->lo_offset
= VME_OFFSET(entry
);
13055 fault_info
->hi_offset
=
13056 (entry
->vme_end
- entry
->vme_start
) + VME_OFFSET(entry
);
13057 fault_info
->no_cache
= entry
->no_cache
;
13058 fault_info
->stealth
= FALSE
;
13059 fault_info
->io_sync
= FALSE
;
13060 if (entry
->used_for_jit
||
13061 entry
->vme_resilient_codesign
) {
13062 fault_info
->cs_bypass
= TRUE
;
13064 fault_info
->cs_bypass
= FALSE
;
13066 fault_info
->pmap_cs_associated
= FALSE
;
13068 if (entry
->pmap_cs_associated
) {
13070 * The pmap layer will validate this page
13071 * before allowing it to be executed from.
13073 fault_info
->pmap_cs_associated
= TRUE
;
13075 #endif /* CONFIG_PMAP_CS */
13076 fault_info
->mark_zf_absent
= FALSE
;
13077 fault_info
->batch_pmap_op
= FALSE
;
13081 * Lock the object to prevent it from disappearing
13083 if (object_lock_type
== OBJECT_LOCK_EXCLUSIVE
)
13084 vm_object_lock(*object
);
13086 vm_object_lock_shared(*object
);
13089 * Save the version number
13092 out_version
->main_timestamp
= map
->timestamp
;
13094 return KERN_SUCCESS
;
13101 * Verifies that the map in question has not changed
13102 * since the given version. The map has to be locked
13103 * ("shared" mode is fine) before calling this function
13104 * and it will be returned locked too.
13109 vm_map_version_t
*version
) /* REF */
13113 vm_map_lock_assert_held(map
);
13114 result
= (map
->timestamp
== version
->main_timestamp
);
13120 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
13121 * Goes away after regular vm_region_recurse function migrates to
13123 * vm_region_recurse: A form of vm_region which follows the
13124 * submaps in a target map
13129 vm_map_region_recurse_64(
13131 vm_map_offset_t
*address
, /* IN/OUT */
13132 vm_map_size_t
*size
, /* OUT */
13133 natural_t
*nesting_depth
, /* IN/OUT */
13134 vm_region_submap_info_64_t submap_info
, /* IN/OUT */
13135 mach_msg_type_number_t
*count
) /* IN/OUT */
13137 mach_msg_type_number_t original_count
;
13138 vm_region_extended_info_data_t extended
;
13139 vm_map_entry_t tmp_entry
;
13140 vm_map_offset_t user_address
;
13141 unsigned int user_max_depth
;
13144 * "curr_entry" is the VM map entry preceding or including the
13145 * address we're looking for.
13146 * "curr_map" is the map or sub-map containing "curr_entry".
13147 * "curr_address" is the equivalent of the top map's "user_address"
13148 * in the current map.
13149 * "curr_offset" is the cumulated offset of "curr_map" in the
13150 * target task's address space.
13151 * "curr_depth" is the depth of "curr_map" in the chain of
13154 * "curr_max_below" and "curr_max_above" limit the range (around
13155 * "curr_address") we should take into account in the current (sub)map.
13156 * They limit the range to what's visible through the map entries
13157 * we've traversed from the top map to the current map.
13160 vm_map_entry_t curr_entry
;
13161 vm_map_address_t curr_address
;
13162 vm_map_offset_t curr_offset
;
13164 unsigned int curr_depth
;
13165 vm_map_offset_t curr_max_below
, curr_max_above
;
13166 vm_map_offset_t curr_skip
;
13169 * "next_" is the same as "curr_" but for the VM region immediately
13170 * after the address we're looking for. We need to keep track of this
13171 * too because we want to return info about that region if the
13172 * address we're looking for is not mapped.
13174 vm_map_entry_t next_entry
;
13175 vm_map_offset_t next_offset
;
13176 vm_map_offset_t next_address
;
13178 unsigned int next_depth
;
13179 vm_map_offset_t next_max_below
, next_max_above
;
13180 vm_map_offset_t next_skip
;
13182 boolean_t look_for_pages
;
13183 vm_region_submap_short_info_64_t short_info
;
13184 boolean_t do_region_footprint
;
13186 if (map
== VM_MAP_NULL
) {
13187 /* no address space to work on */
13188 return KERN_INVALID_ARGUMENT
;
13192 if (*count
< VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
) {
13194 * "info" structure is not big enough and
13197 return KERN_INVALID_ARGUMENT
;
13200 do_region_footprint
= task_self_region_footprint();
13201 original_count
= *count
;
13203 if (original_count
< VM_REGION_SUBMAP_INFO_V0_COUNT_64
) {
13204 *count
= VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
;
13205 look_for_pages
= FALSE
;
13206 short_info
= (vm_region_submap_short_info_64_t
) submap_info
;
13207 submap_info
= NULL
;
13209 look_for_pages
= TRUE
;
13210 *count
= VM_REGION_SUBMAP_INFO_V0_COUNT_64
;
13213 if (original_count
>= VM_REGION_SUBMAP_INFO_V1_COUNT_64
) {
13214 *count
= VM_REGION_SUBMAP_INFO_V1_COUNT_64
;
13218 user_address
= *address
;
13219 user_max_depth
= *nesting_depth
;
13222 vm_map_lock_read(map
);
13228 curr_address
= user_address
;
13232 curr_max_above
= ((vm_map_offset_t
) -1) - curr_address
;
13233 curr_max_below
= curr_address
;
13241 next_max_above
= (vm_map_offset_t
) -1;
13242 next_max_below
= (vm_map_offset_t
) -1;
13245 if (vm_map_lookup_entry(curr_map
,
13248 /* tmp_entry contains the address we're looking for */
13249 curr_entry
= tmp_entry
;
13251 vm_map_offset_t skip
;
13253 * The address is not mapped. "tmp_entry" is the
13254 * map entry preceding the address. We want the next
13255 * one, if it exists.
13257 curr_entry
= tmp_entry
->vme_next
;
13259 if (curr_entry
== vm_map_to_entry(curr_map
) ||
13260 (curr_entry
->vme_start
>=
13261 curr_address
+ curr_max_above
)) {
13262 /* no next entry at this level: stop looking */
13264 vm_map_unlock_read(curr_map
);
13271 curr_max_above
= 0;
13272 curr_max_below
= 0;
13276 /* adjust current address and offset */
13277 skip
= curr_entry
->vme_start
- curr_address
;
13278 curr_address
= curr_entry
->vme_start
;
13280 curr_offset
+= skip
;
13281 curr_max_above
-= skip
;
13282 curr_max_below
= 0;
13286 * Is the next entry at this level closer to the address (or
13287 * deeper in the submap chain) than the one we had
13290 tmp_entry
= curr_entry
->vme_next
;
13291 if (tmp_entry
== vm_map_to_entry(curr_map
)) {
13292 /* no next entry at this level */
13293 } else if (tmp_entry
->vme_start
>=
13294 curr_address
+ curr_max_above
) {
13296 * tmp_entry is beyond the scope of what we mapped of
13297 * this submap in the upper level: ignore it.
13299 } else if ((next_entry
== NULL
) ||
13300 (tmp_entry
->vme_start
+ curr_offset
<=
13301 next_entry
->vme_start
+ next_offset
)) {
13303 * We didn't have a "next_entry" or this one is
13304 * closer to the address we're looking for:
13305 * use this "tmp_entry" as the new "next_entry".
13307 if (next_entry
!= NULL
) {
13308 /* unlock the last "next_map" */
13309 if (next_map
!= curr_map
&& not_in_kdp
) {
13310 vm_map_unlock_read(next_map
);
13313 next_entry
= tmp_entry
;
13314 next_map
= curr_map
;
13315 next_depth
= curr_depth
;
13316 next_address
= next_entry
->vme_start
;
13317 next_skip
= curr_skip
;
13318 next_skip
+= (next_address
- curr_address
);
13319 next_offset
= curr_offset
;
13320 next_offset
+= (next_address
- curr_address
);
13321 next_max_above
= MIN(next_max_above
, curr_max_above
);
13322 next_max_above
= MIN(next_max_above
,
13323 next_entry
->vme_end
- next_address
);
13324 next_max_below
= MIN(next_max_below
, curr_max_below
);
13325 next_max_below
= MIN(next_max_below
,
13326 next_address
- next_entry
->vme_start
);
13330 * "curr_max_{above,below}" allow us to keep track of the
13331 * portion of the submap that is actually mapped at this level:
13332 * the rest of that submap is irrelevant to us, since it's not
13334 * The relevant portion of the map starts at
13335 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
13337 curr_max_above
= MIN(curr_max_above
,
13338 curr_entry
->vme_end
- curr_address
);
13339 curr_max_below
= MIN(curr_max_below
,
13340 curr_address
- curr_entry
->vme_start
);
13342 if (!curr_entry
->is_sub_map
||
13343 curr_depth
>= user_max_depth
) {
13345 * We hit a leaf map or we reached the maximum depth
13346 * we could, so stop looking. Keep the current map
13353 * Get down to the next submap level.
13357 * Lock the next level and unlock the current level,
13358 * unless we need to keep it locked to access the "next_entry"
13362 vm_map_lock_read(VME_SUBMAP(curr_entry
));
13364 if (curr_map
== next_map
) {
13365 /* keep "next_map" locked in case we need it */
13367 /* release this map */
13369 vm_map_unlock_read(curr_map
);
13373 * Adjust the offset. "curr_entry" maps the submap
13374 * at relative address "curr_entry->vme_start" in the
13375 * curr_map but skips the first "VME_OFFSET(curr_entry)"
13376 * bytes of the submap.
13377 * "curr_offset" always represents the offset of a virtual
13378 * address in the curr_map relative to the absolute address
13379 * space (i.e. the top-level VM map).
13382 (VME_OFFSET(curr_entry
) - curr_entry
->vme_start
);
13383 curr_address
= user_address
+ curr_offset
;
13384 /* switch to the submap */
13385 curr_map
= VME_SUBMAP(curr_entry
);
13390 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13391 // so probably should be a real 32b ID vs. ptr.
13392 // Current users just check for equality
13394 if (curr_entry
== NULL
) {
13395 /* no VM region contains the address... */
13397 if (do_region_footprint
&& /* we want footprint numbers */
13398 next_entry
== NULL
&& /* & there are no more regions */
13399 /* & we haven't already provided our fake region: */
13400 user_address
<= vm_map_last_entry(map
)->vme_end
) {
13401 ledger_amount_t nonvol
, nonvol_compressed
;
13403 * Add a fake memory region to account for
13404 * purgeable memory that counts towards this
13405 * task's memory footprint, i.e. the resident
13406 * compressed pages of non-volatile objects
13407 * owned by that task.
13409 ledger_get_balance(
13411 task_ledgers
.purgeable_nonvolatile
,
13413 ledger_get_balance(
13415 task_ledgers
.purgeable_nonvolatile_compressed
,
13416 &nonvol_compressed
);
13417 if (nonvol
+ nonvol_compressed
== 0) {
13418 /* no purgeable memory usage to report */
13419 return KERN_INVALID_ADDRESS
;
13421 /* fake region to show nonvolatile footprint */
13422 if (look_for_pages
) {
13423 submap_info
->protection
= VM_PROT_DEFAULT
;
13424 submap_info
->max_protection
= VM_PROT_DEFAULT
;
13425 submap_info
->inheritance
= VM_INHERIT_DEFAULT
;
13426 submap_info
->offset
= 0;
13427 submap_info
->user_tag
= -1;
13428 submap_info
->pages_resident
= (unsigned int) (nonvol
/ PAGE_SIZE
);
13429 submap_info
->pages_shared_now_private
= 0;
13430 submap_info
->pages_swapped_out
= (unsigned int) (nonvol_compressed
/ PAGE_SIZE
);
13431 submap_info
->pages_dirtied
= submap_info
->pages_resident
;
13432 submap_info
->ref_count
= 1;
13433 submap_info
->shadow_depth
= 0;
13434 submap_info
->external_pager
= 0;
13435 submap_info
->share_mode
= SM_PRIVATE
;
13436 submap_info
->is_submap
= 0;
13437 submap_info
->behavior
= VM_BEHAVIOR_DEFAULT
;
13438 submap_info
->object_id
= INFO_MAKE_FAKE_OBJECT_ID(map
, task_ledgers
.purgeable_nonvolatile
);
13439 submap_info
->user_wired_count
= 0;
13440 submap_info
->pages_reusable
= 0;
13442 short_info
->user_tag
= -1;
13443 short_info
->offset
= 0;
13444 short_info
->protection
= VM_PROT_DEFAULT
;
13445 short_info
->inheritance
= VM_INHERIT_DEFAULT
;
13446 short_info
->max_protection
= VM_PROT_DEFAULT
;
13447 short_info
->behavior
= VM_BEHAVIOR_DEFAULT
;
13448 short_info
->user_wired_count
= 0;
13449 short_info
->is_submap
= 0;
13450 short_info
->object_id
= INFO_MAKE_FAKE_OBJECT_ID(map
, task_ledgers
.purgeable_nonvolatile
);
13451 short_info
->external_pager
= 0;
13452 short_info
->shadow_depth
= 0;
13453 short_info
->share_mode
= SM_PRIVATE
;
13454 short_info
->ref_count
= 1;
13456 *nesting_depth
= 0;
13457 *size
= (vm_map_size_t
) (nonvol
+ nonvol_compressed
);
13458 // *address = user_address;
13459 *address
= vm_map_last_entry(map
)->vme_end
;
13460 return KERN_SUCCESS
;
13463 if (next_entry
== NULL
) {
13464 /* ... and no VM region follows it either */
13465 return KERN_INVALID_ADDRESS
;
13467 /* ... gather info about the next VM region */
13468 curr_entry
= next_entry
;
13469 curr_map
= next_map
; /* still locked ... */
13470 curr_address
= next_address
;
13471 curr_skip
= next_skip
;
13472 curr_offset
= next_offset
;
13473 curr_depth
= next_depth
;
13474 curr_max_above
= next_max_above
;
13475 curr_max_below
= next_max_below
;
13477 /* we won't need "next_entry" after all */
13478 if (next_entry
!= NULL
) {
13479 /* release "next_map" */
13480 if (next_map
!= curr_map
&& not_in_kdp
) {
13481 vm_map_unlock_read(next_map
);
13490 next_max_below
= -1;
13491 next_max_above
= -1;
13493 if (curr_entry
->is_sub_map
&&
13494 curr_depth
< user_max_depth
) {
13496 * We're not as deep as we could be: we must have
13497 * gone back up after not finding anything mapped
13498 * below the original top-level map entry's.
13499 * Let's move "curr_address" forward and recurse again.
13501 user_address
= curr_address
;
13502 goto recurse_again
;
13505 *nesting_depth
= curr_depth
;
13506 *size
= curr_max_above
+ curr_max_below
;
13507 *address
= user_address
+ curr_skip
- curr_max_below
;
13509 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13510 // so probably should be a real 32b ID vs. ptr.
13511 // Current users just check for equality
13512 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
13514 if (look_for_pages
) {
13515 submap_info
->user_tag
= VME_ALIAS(curr_entry
);
13516 submap_info
->offset
= VME_OFFSET(curr_entry
);
13517 submap_info
->protection
= curr_entry
->protection
;
13518 submap_info
->inheritance
= curr_entry
->inheritance
;
13519 submap_info
->max_protection
= curr_entry
->max_protection
;
13520 submap_info
->behavior
= curr_entry
->behavior
;
13521 submap_info
->user_wired_count
= curr_entry
->user_wired_count
;
13522 submap_info
->is_submap
= curr_entry
->is_sub_map
;
13523 submap_info
->object_id
= INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry
));
13525 short_info
->user_tag
= VME_ALIAS(curr_entry
);
13526 short_info
->offset
= VME_OFFSET(curr_entry
);
13527 short_info
->protection
= curr_entry
->protection
;
13528 short_info
->inheritance
= curr_entry
->inheritance
;
13529 short_info
->max_protection
= curr_entry
->max_protection
;
13530 short_info
->behavior
= curr_entry
->behavior
;
13531 short_info
->user_wired_count
= curr_entry
->user_wired_count
;
13532 short_info
->is_submap
= curr_entry
->is_sub_map
;
13533 short_info
->object_id
= INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry
));
13536 extended
.pages_resident
= 0;
13537 extended
.pages_swapped_out
= 0;
13538 extended
.pages_shared_now_private
= 0;
13539 extended
.pages_dirtied
= 0;
13540 extended
.pages_reusable
= 0;
13541 extended
.external_pager
= 0;
13542 extended
.shadow_depth
= 0;
13543 extended
.share_mode
= SM_EMPTY
;
13544 extended
.ref_count
= 0;
13547 if (!curr_entry
->is_sub_map
) {
13548 vm_map_offset_t range_start
, range_end
;
13549 range_start
= MAX((curr_address
- curr_max_below
),
13550 curr_entry
->vme_start
);
13551 range_end
= MIN((curr_address
+ curr_max_above
),
13552 curr_entry
->vme_end
);
13553 vm_map_region_walk(curr_map
,
13556 (VME_OFFSET(curr_entry
) +
13558 curr_entry
->vme_start
)),
13559 range_end
- range_start
,
13561 look_for_pages
, VM_REGION_EXTENDED_INFO_COUNT
);
13562 if (extended
.external_pager
&&
13563 extended
.ref_count
== 2 &&
13564 extended
.share_mode
== SM_SHARED
) {
13565 extended
.share_mode
= SM_PRIVATE
;
13568 if (curr_entry
->use_pmap
) {
13569 extended
.share_mode
= SM_TRUESHARED
;
13571 extended
.share_mode
= SM_PRIVATE
;
13573 extended
.ref_count
= VME_SUBMAP(curr_entry
)->map_refcnt
;
13577 if (look_for_pages
) {
13578 submap_info
->pages_resident
= extended
.pages_resident
;
13579 submap_info
->pages_swapped_out
= extended
.pages_swapped_out
;
13580 submap_info
->pages_shared_now_private
=
13581 extended
.pages_shared_now_private
;
13582 submap_info
->pages_dirtied
= extended
.pages_dirtied
;
13583 submap_info
->external_pager
= extended
.external_pager
;
13584 submap_info
->shadow_depth
= extended
.shadow_depth
;
13585 submap_info
->share_mode
= extended
.share_mode
;
13586 submap_info
->ref_count
= extended
.ref_count
;
13588 if (original_count
>= VM_REGION_SUBMAP_INFO_V1_COUNT_64
) {
13589 submap_info
->pages_reusable
= extended
.pages_reusable
;
13592 short_info
->external_pager
= extended
.external_pager
;
13593 short_info
->shadow_depth
= extended
.shadow_depth
;
13594 short_info
->share_mode
= extended
.share_mode
;
13595 short_info
->ref_count
= extended
.ref_count
;
13599 vm_map_unlock_read(curr_map
);
13602 return KERN_SUCCESS
;
13608 * User call to obtain information about a region in
13609 * a task's address map. Currently, only one flavor is
13612 * XXX The reserved and behavior fields cannot be filled
13613 * in until the vm merge from the IK is completed, and
13614 * vm_reserve is implemented.
13620 vm_map_offset_t
*address
, /* IN/OUT */
13621 vm_map_size_t
*size
, /* OUT */
13622 vm_region_flavor_t flavor
, /* IN */
13623 vm_region_info_t info
, /* OUT */
13624 mach_msg_type_number_t
*count
, /* IN/OUT */
13625 mach_port_t
*object_name
) /* OUT */
13627 vm_map_entry_t tmp_entry
;
13628 vm_map_entry_t entry
;
13629 vm_map_offset_t start
;
13631 if (map
== VM_MAP_NULL
)
13632 return(KERN_INVALID_ARGUMENT
);
13636 case VM_REGION_BASIC_INFO
:
13637 /* legacy for old 32-bit objects info */
13639 vm_region_basic_info_t basic
;
13641 if (*count
< VM_REGION_BASIC_INFO_COUNT
)
13642 return(KERN_INVALID_ARGUMENT
);
13644 basic
= (vm_region_basic_info_t
) info
;
13645 *count
= VM_REGION_BASIC_INFO_COUNT
;
13647 vm_map_lock_read(map
);
13650 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
13651 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
13652 vm_map_unlock_read(map
);
13653 return(KERN_INVALID_ADDRESS
);
13659 start
= entry
->vme_start
;
13661 basic
->offset
= (uint32_t)VME_OFFSET(entry
);
13662 basic
->protection
= entry
->protection
;
13663 basic
->inheritance
= entry
->inheritance
;
13664 basic
->max_protection
= entry
->max_protection
;
13665 basic
->behavior
= entry
->behavior
;
13666 basic
->user_wired_count
= entry
->user_wired_count
;
13667 basic
->reserved
= entry
->is_sub_map
;
13669 *size
= (entry
->vme_end
- start
);
13671 if (object_name
) *object_name
= IP_NULL
;
13672 if (entry
->is_sub_map
) {
13673 basic
->shared
= FALSE
;
13675 basic
->shared
= entry
->is_shared
;
13678 vm_map_unlock_read(map
);
13679 return(KERN_SUCCESS
);
13682 case VM_REGION_BASIC_INFO_64
:
13684 vm_region_basic_info_64_t basic
;
13686 if (*count
< VM_REGION_BASIC_INFO_COUNT_64
)
13687 return(KERN_INVALID_ARGUMENT
);
13689 basic
= (vm_region_basic_info_64_t
) info
;
13690 *count
= VM_REGION_BASIC_INFO_COUNT_64
;
13692 vm_map_lock_read(map
);
13695 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
13696 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
13697 vm_map_unlock_read(map
);
13698 return(KERN_INVALID_ADDRESS
);
13704 start
= entry
->vme_start
;
13706 basic
->offset
= VME_OFFSET(entry
);
13707 basic
->protection
= entry
->protection
;
13708 basic
->inheritance
= entry
->inheritance
;
13709 basic
->max_protection
= entry
->max_protection
;
13710 basic
->behavior
= entry
->behavior
;
13711 basic
->user_wired_count
= entry
->user_wired_count
;
13712 basic
->reserved
= entry
->is_sub_map
;
13714 *size
= (entry
->vme_end
- start
);
13716 if (object_name
) *object_name
= IP_NULL
;
13717 if (entry
->is_sub_map
) {
13718 basic
->shared
= FALSE
;
13720 basic
->shared
= entry
->is_shared
;
13723 vm_map_unlock_read(map
);
13724 return(KERN_SUCCESS
);
13726 case VM_REGION_EXTENDED_INFO
:
13727 if (*count
< VM_REGION_EXTENDED_INFO_COUNT
)
13728 return(KERN_INVALID_ARGUMENT
);
13730 case VM_REGION_EXTENDED_INFO__legacy
:
13731 if (*count
< VM_REGION_EXTENDED_INFO_COUNT__legacy
)
13732 return KERN_INVALID_ARGUMENT
;
13735 vm_region_extended_info_t extended
;
13736 mach_msg_type_number_t original_count
;
13738 extended
= (vm_region_extended_info_t
) info
;
13740 vm_map_lock_read(map
);
13743 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
13744 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
13745 vm_map_unlock_read(map
);
13746 return(KERN_INVALID_ADDRESS
);
13751 start
= entry
->vme_start
;
13753 extended
->protection
= entry
->protection
;
13754 extended
->user_tag
= VME_ALIAS(entry
);
13755 extended
->pages_resident
= 0;
13756 extended
->pages_swapped_out
= 0;
13757 extended
->pages_shared_now_private
= 0;
13758 extended
->pages_dirtied
= 0;
13759 extended
->external_pager
= 0;
13760 extended
->shadow_depth
= 0;
13762 original_count
= *count
;
13763 if (flavor
== VM_REGION_EXTENDED_INFO__legacy
) {
13764 *count
= VM_REGION_EXTENDED_INFO_COUNT__legacy
;
13766 extended
->pages_reusable
= 0;
13767 *count
= VM_REGION_EXTENDED_INFO_COUNT
;
13770 vm_map_region_walk(map
, start
, entry
, VME_OFFSET(entry
), entry
->vme_end
- start
, extended
, TRUE
, *count
);
13772 if (extended
->external_pager
&& extended
->ref_count
== 2 && extended
->share_mode
== SM_SHARED
)
13773 extended
->share_mode
= SM_PRIVATE
;
13776 *object_name
= IP_NULL
;
13778 *size
= (entry
->vme_end
- start
);
13780 vm_map_unlock_read(map
);
13781 return(KERN_SUCCESS
);
13783 case VM_REGION_TOP_INFO
:
13785 vm_region_top_info_t top
;
13787 if (*count
< VM_REGION_TOP_INFO_COUNT
)
13788 return(KERN_INVALID_ARGUMENT
);
13790 top
= (vm_region_top_info_t
) info
;
13791 *count
= VM_REGION_TOP_INFO_COUNT
;
13793 vm_map_lock_read(map
);
13796 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
13797 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
13798 vm_map_unlock_read(map
);
13799 return(KERN_INVALID_ADDRESS
);
13805 start
= entry
->vme_start
;
13807 top
->private_pages_resident
= 0;
13808 top
->shared_pages_resident
= 0;
13810 vm_map_region_top_walk(entry
, top
);
13813 *object_name
= IP_NULL
;
13815 *size
= (entry
->vme_end
- start
);
13817 vm_map_unlock_read(map
);
13818 return(KERN_SUCCESS
);
13821 return(KERN_INVALID_ARGUMENT
);
13825 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
13826 MIN((entry_size), \
13827 ((obj)->all_reusable ? \
13828 (obj)->wired_page_count : \
13829 (obj)->resident_page_count - (obj)->reusable_page_count))
13832 vm_map_region_top_walk(
13833 vm_map_entry_t entry
,
13834 vm_region_top_info_t top
)
13837 if (VME_OBJECT(entry
) == 0 || entry
->is_sub_map
) {
13838 top
->share_mode
= SM_EMPTY
;
13839 top
->ref_count
= 0;
13845 struct vm_object
*obj
, *tmp_obj
;
13847 uint32_t entry_size
;
13849 entry_size
= (uint32_t) ((entry
->vme_end
- entry
->vme_start
) / PAGE_SIZE_64
);
13851 obj
= VME_OBJECT(entry
);
13853 vm_object_lock(obj
);
13855 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
)
13858 assert(obj
->reusable_page_count
<= obj
->resident_page_count
);
13860 if (ref_count
== 1)
13861 top
->private_pages_resident
=
13862 OBJ_RESIDENT_COUNT(obj
, entry_size
);
13864 top
->shared_pages_resident
=
13865 OBJ_RESIDENT_COUNT(obj
, entry_size
);
13866 top
->ref_count
= ref_count
;
13867 top
->share_mode
= SM_COW
;
13869 while ((tmp_obj
= obj
->shadow
)) {
13870 vm_object_lock(tmp_obj
);
13871 vm_object_unlock(obj
);
13874 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
)
13877 assert(obj
->reusable_page_count
<= obj
->resident_page_count
);
13878 top
->shared_pages_resident
+=
13879 OBJ_RESIDENT_COUNT(obj
, entry_size
);
13880 top
->ref_count
+= ref_count
- 1;
13883 if (entry
->superpage_size
) {
13884 top
->share_mode
= SM_LARGE_PAGE
;
13885 top
->shared_pages_resident
= 0;
13886 top
->private_pages_resident
= entry_size
;
13887 } else if (entry
->needs_copy
) {
13888 top
->share_mode
= SM_COW
;
13889 top
->shared_pages_resident
=
13890 OBJ_RESIDENT_COUNT(obj
, entry_size
);
13892 if (ref_count
== 1 ||
13893 (ref_count
== 2 && !(obj
->pager_trusted
) && !(obj
->internal
))) {
13894 top
->share_mode
= SM_PRIVATE
;
13895 top
->private_pages_resident
=
13896 OBJ_RESIDENT_COUNT(obj
,
13899 top
->share_mode
= SM_SHARED
;
13900 top
->shared_pages_resident
=
13901 OBJ_RESIDENT_COUNT(obj
,
13905 top
->ref_count
= ref_count
;
13907 /* XXX K64: obj_id will be truncated */
13908 top
->obj_id
= (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj
);
13910 vm_object_unlock(obj
);
13915 vm_map_region_walk(
13917 vm_map_offset_t va
,
13918 vm_map_entry_t entry
,
13919 vm_object_offset_t offset
,
13920 vm_object_size_t range
,
13921 vm_region_extended_info_t extended
,
13922 boolean_t look_for_pages
,
13923 mach_msg_type_number_t count
)
13925 struct vm_object
*obj
, *tmp_obj
;
13926 vm_map_offset_t last_offset
;
13929 struct vm_object
*shadow_object
;
13931 boolean_t do_region_footprint
;
13933 do_region_footprint
= task_self_region_footprint();
13935 if ((VME_OBJECT(entry
) == 0) ||
13936 (entry
->is_sub_map
) ||
13937 (VME_OBJECT(entry
)->phys_contiguous
&&
13938 !entry
->superpage_size
)) {
13939 extended
->share_mode
= SM_EMPTY
;
13940 extended
->ref_count
= 0;
13944 if (entry
->superpage_size
) {
13945 extended
->shadow_depth
= 0;
13946 extended
->share_mode
= SM_LARGE_PAGE
;
13947 extended
->ref_count
= 1;
13948 extended
->external_pager
= 0;
13949 extended
->pages_resident
= (unsigned int)(range
>> PAGE_SHIFT
);
13950 extended
->shadow_depth
= 0;
13954 obj
= VME_OBJECT(entry
);
13956 vm_object_lock(obj
);
13958 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
)
13961 if (look_for_pages
) {
13962 for (last_offset
= offset
+ range
;
13963 offset
< last_offset
;
13964 offset
+= PAGE_SIZE_64
, va
+= PAGE_SIZE
) {
13966 if (do_region_footprint
) {
13970 if (map
->has_corpse_footprint
) {
13972 * Query the page info data we saved
13973 * while forking the corpse.
13975 vm_map_corpse_footprint_query_page_info(
13983 pmap_query_page_info(map
->pmap
,
13987 if (disp
& PMAP_QUERY_PAGE_PRESENT
) {
13988 if (!(disp
& PMAP_QUERY_PAGE_ALTACCT
)) {
13989 extended
->pages_resident
++;
13991 if (disp
& PMAP_QUERY_PAGE_REUSABLE
) {
13992 extended
->pages_reusable
++;
13993 } else if (!(disp
& PMAP_QUERY_PAGE_INTERNAL
) ||
13994 (disp
& PMAP_QUERY_PAGE_ALTACCT
)) {
13995 /* alternate accounting */
13997 extended
->pages_dirtied
++;
13999 } else if (disp
& PMAP_QUERY_PAGE_COMPRESSED
) {
14000 if (disp
& PMAP_QUERY_PAGE_COMPRESSED_ALTACCT
) {
14001 /* alternate accounting */
14003 extended
->pages_swapped_out
++;
14006 /* deal with alternate accounting */
14007 if (obj
->purgable
== VM_PURGABLE_NONVOLATILE
&&
14008 /* && not tagged as no-footprint? */
14009 VM_OBJECT_OWNER(obj
) != NULL
&&
14010 VM_OBJECT_OWNER(obj
)->map
== map
) {
14013 + VME_OFFSET(entry
))
14015 (obj
->resident_page_count
+
14016 vm_compressor_pager_get_count(obj
->pager
)))) {
14018 * Non-volatile purgeable object owned
14019 * by this task: report the first
14020 * "#resident + #compressed" pages as
14021 * "resident" (to show that they
14022 * contribute to the footprint) but not
14023 * "dirty" (to avoid double-counting
14024 * with the fake "non-volatile" region
14025 * we'll report at the end of the
14026 * address space to account for all
14027 * (mapped or not) non-volatile memory
14028 * owned by this task.
14030 extended
->pages_resident
++;
14032 } else if ((obj
->purgable
== VM_PURGABLE_VOLATILE
||
14033 obj
->purgable
== VM_PURGABLE_EMPTY
) &&
14034 /* && not tagged as no-footprint? */
14035 VM_OBJECT_OWNER(obj
) != NULL
&&
14036 VM_OBJECT_OWNER(obj
)->map
== map
) {
14039 + VME_OFFSET(entry
))
14041 obj
->wired_page_count
)) {
14043 * Volatile|empty purgeable object owned
14044 * by this task: report the first
14045 * "#wired" pages as "resident" (to
14046 * show that they contribute to the
14047 * footprint) but not "dirty" (to avoid
14048 * double-counting with the fake
14049 * "non-volatile" region we'll report
14050 * at the end of the address space to
14051 * account for all (mapped or not)
14052 * non-volatile memory owned by this
14055 extended
->pages_resident
++;
14057 } else if (obj
->purgable
!= VM_PURGABLE_DENY
) {
14059 * Pages from purgeable objects
14060 * will be reported as dirty
14061 * appropriately in an extra
14062 * fake memory region at the end of
14063 * the address space.
14065 } else if (entry
->iokit_acct
) {
14067 * IOKit mappings are considered
14068 * as fully dirty for footprint's
14071 extended
->pages_dirtied
++;
14076 vm_map_region_look_for_page(map
, va
, obj
,
14078 0, extended
, count
);
14081 if (do_region_footprint
) {
14082 goto collect_object_info
;
14086 collect_object_info
:
14087 shadow_object
= obj
->shadow
;
14090 if ( !(obj
->pager_trusted
) && !(obj
->internal
))
14091 extended
->external_pager
= 1;
14093 if (shadow_object
!= VM_OBJECT_NULL
) {
14094 vm_object_lock(shadow_object
);
14096 shadow_object
!= VM_OBJECT_NULL
;
14098 vm_object_t next_shadow
;
14100 if ( !(shadow_object
->pager_trusted
) &&
14101 !(shadow_object
->internal
))
14102 extended
->external_pager
= 1;
14104 next_shadow
= shadow_object
->shadow
;
14106 vm_object_lock(next_shadow
);
14108 vm_object_unlock(shadow_object
);
14109 shadow_object
= next_shadow
;
14112 extended
->shadow_depth
= shadow_depth
;
14115 if (extended
->shadow_depth
|| entry
->needs_copy
)
14116 extended
->share_mode
= SM_COW
;
14118 if (ref_count
== 1)
14119 extended
->share_mode
= SM_PRIVATE
;
14121 if (obj
->true_share
)
14122 extended
->share_mode
= SM_TRUESHARED
;
14124 extended
->share_mode
= SM_SHARED
;
14127 extended
->ref_count
= ref_count
- extended
->shadow_depth
;
14129 for (i
= 0; i
< extended
->shadow_depth
; i
++) {
14130 if ((tmp_obj
= obj
->shadow
) == 0)
14132 vm_object_lock(tmp_obj
);
14133 vm_object_unlock(obj
);
14135 if ((ref_count
= tmp_obj
->ref_count
) > 1 && tmp_obj
->paging_in_progress
)
14138 extended
->ref_count
+= ref_count
;
14141 vm_object_unlock(obj
);
14143 if (extended
->share_mode
== SM_SHARED
) {
14144 vm_map_entry_t cur
;
14145 vm_map_entry_t last
;
14148 obj
= VME_OBJECT(entry
);
14149 last
= vm_map_to_entry(map
);
14152 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
)
14154 for (cur
= vm_map_first_entry(map
); cur
!= last
; cur
= cur
->vme_next
)
14155 my_refs
+= vm_map_region_count_obj_refs(cur
, obj
);
14157 if (my_refs
== ref_count
)
14158 extended
->share_mode
= SM_PRIVATE_ALIASED
;
14159 else if (my_refs
> 1)
14160 extended
->share_mode
= SM_SHARED_ALIASED
;
14165 /* object is locked on entry and locked on return */
14169 vm_map_region_look_for_page(
14170 __unused vm_map_t map
,
14171 __unused vm_map_offset_t va
,
14172 vm_object_t object
,
14173 vm_object_offset_t offset
,
14176 vm_region_extended_info_t extended
,
14177 mach_msg_type_number_t count
)
14180 vm_object_t shadow
;
14182 vm_object_t caller_object
;
14184 shadow
= object
->shadow
;
14185 caller_object
= object
;
14190 if ( !(object
->pager_trusted
) && !(object
->internal
))
14191 extended
->external_pager
= 1;
14193 if ((p
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
14194 if (shadow
&& (max_refcnt
== 1))
14195 extended
->pages_shared_now_private
++;
14197 if (!p
->vmp_fictitious
&&
14198 (p
->vmp_dirty
|| pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p
))))
14199 extended
->pages_dirtied
++;
14200 else if (count
>= VM_REGION_EXTENDED_INFO_COUNT
) {
14201 if (p
->vmp_reusable
|| object
->all_reusable
) {
14202 extended
->pages_reusable
++;
14206 extended
->pages_resident
++;
14208 if(object
!= caller_object
)
14209 vm_object_unlock(object
);
14213 if (object
->internal
&&
14215 !object
->terminating
&&
14216 object
->pager_ready
) {
14218 if (VM_COMPRESSOR_PAGER_STATE_GET(object
, offset
)
14219 == VM_EXTERNAL_STATE_EXISTS
) {
14220 /* the pager has that page */
14221 extended
->pages_swapped_out
++;
14222 if (object
!= caller_object
)
14223 vm_object_unlock(object
);
14229 vm_object_lock(shadow
);
14231 if ((ref_count
= shadow
->ref_count
) > 1 && shadow
->paging_in_progress
)
14234 if (++depth
> extended
->shadow_depth
)
14235 extended
->shadow_depth
= depth
;
14237 if (ref_count
> max_refcnt
)
14238 max_refcnt
= ref_count
;
14240 if(object
!= caller_object
)
14241 vm_object_unlock(object
);
14243 offset
= offset
+ object
->vo_shadow_offset
;
14245 shadow
= object
->shadow
;
14248 if(object
!= caller_object
)
14249 vm_object_unlock(object
);
14255 vm_map_region_count_obj_refs(
14256 vm_map_entry_t entry
,
14257 vm_object_t object
)
14260 vm_object_t chk_obj
;
14261 vm_object_t tmp_obj
;
14263 if (VME_OBJECT(entry
) == 0)
14266 if (entry
->is_sub_map
)
14271 chk_obj
= VME_OBJECT(entry
);
14272 vm_object_lock(chk_obj
);
14275 if (chk_obj
== object
)
14277 tmp_obj
= chk_obj
->shadow
;
14279 vm_object_lock(tmp_obj
);
14280 vm_object_unlock(chk_obj
);
14290 * Routine: vm_map_simplify
14293 * Attempt to simplify the map representation in
14294 * the vicinity of the given starting address.
14296 * This routine is intended primarily to keep the
14297 * kernel maps more compact -- they generally don't
14298 * benefit from the "expand a map entry" technology
14299 * at allocation time because the adjacent entry
14300 * is often wired down.
14303 vm_map_simplify_entry(
14305 vm_map_entry_t this_entry
)
14307 vm_map_entry_t prev_entry
;
14309 counter(c_vm_map_simplify_entry_called
++);
14311 prev_entry
= this_entry
->vme_prev
;
14313 if ((this_entry
!= vm_map_to_entry(map
)) &&
14314 (prev_entry
!= vm_map_to_entry(map
)) &&
14316 (prev_entry
->vme_end
== this_entry
->vme_start
) &&
14318 (prev_entry
->is_sub_map
== this_entry
->is_sub_map
) &&
14319 (VME_OBJECT(prev_entry
) == VME_OBJECT(this_entry
)) &&
14320 ((VME_OFFSET(prev_entry
) + (prev_entry
->vme_end
-
14321 prev_entry
->vme_start
))
14322 == VME_OFFSET(this_entry
)) &&
14324 (prev_entry
->behavior
== this_entry
->behavior
) &&
14325 (prev_entry
->needs_copy
== this_entry
->needs_copy
) &&
14326 (prev_entry
->protection
== this_entry
->protection
) &&
14327 (prev_entry
->max_protection
== this_entry
->max_protection
) &&
14328 (prev_entry
->inheritance
== this_entry
->inheritance
) &&
14329 (prev_entry
->use_pmap
== this_entry
->use_pmap
) &&
14330 (VME_ALIAS(prev_entry
) == VME_ALIAS(this_entry
)) &&
14331 (prev_entry
->no_cache
== this_entry
->no_cache
) &&
14332 (prev_entry
->permanent
== this_entry
->permanent
) &&
14333 (prev_entry
->map_aligned
== this_entry
->map_aligned
) &&
14334 (prev_entry
->zero_wired_pages
== this_entry
->zero_wired_pages
) &&
14335 (prev_entry
->used_for_jit
== this_entry
->used_for_jit
) &&
14336 (prev_entry
->pmap_cs_associated
== this_entry
->pmap_cs_associated
) &&
14337 /* from_reserved_zone: OK if that field doesn't match */
14338 (prev_entry
->iokit_acct
== this_entry
->iokit_acct
) &&
14339 (prev_entry
->vme_resilient_codesign
==
14340 this_entry
->vme_resilient_codesign
) &&
14341 (prev_entry
->vme_resilient_media
==
14342 this_entry
->vme_resilient_media
) &&
14344 (prev_entry
->wired_count
== this_entry
->wired_count
) &&
14345 (prev_entry
->user_wired_count
== this_entry
->user_wired_count
) &&
14347 ((prev_entry
->vme_atomic
== FALSE
) && (this_entry
->vme_atomic
== FALSE
)) &&
14348 (prev_entry
->in_transition
== FALSE
) &&
14349 (this_entry
->in_transition
== FALSE
) &&
14350 (prev_entry
->needs_wakeup
== FALSE
) &&
14351 (this_entry
->needs_wakeup
== FALSE
) &&
14352 (prev_entry
->is_shared
== FALSE
) &&
14353 (this_entry
->is_shared
== FALSE
) &&
14354 (prev_entry
->superpage_size
== FALSE
) &&
14355 (this_entry
->superpage_size
== FALSE
)
14357 vm_map_store_entry_unlink(map
, prev_entry
);
14358 assert(prev_entry
->vme_start
< this_entry
->vme_end
);
14359 if (prev_entry
->map_aligned
)
14360 assert(VM_MAP_PAGE_ALIGNED(prev_entry
->vme_start
,
14361 VM_MAP_PAGE_MASK(map
)));
14362 this_entry
->vme_start
= prev_entry
->vme_start
;
14363 VME_OFFSET_SET(this_entry
, VME_OFFSET(prev_entry
));
14365 if (map
->holelistenabled
) {
14366 vm_map_store_update_first_free(map
, this_entry
, TRUE
);
14369 if (prev_entry
->is_sub_map
) {
14370 vm_map_deallocate(VME_SUBMAP(prev_entry
));
14372 vm_object_deallocate(VME_OBJECT(prev_entry
));
14374 vm_map_entry_dispose(map
, prev_entry
);
14375 SAVE_HINT_MAP_WRITE(map
, this_entry
);
14376 counter(c_vm_map_simplified
++);
14383 vm_map_offset_t start
)
14385 vm_map_entry_t this_entry
;
14388 if (vm_map_lookup_entry(map
, start
, &this_entry
)) {
14389 vm_map_simplify_entry(map
, this_entry
);
14390 vm_map_simplify_entry(map
, this_entry
->vme_next
);
14392 counter(c_vm_map_simplify_called
++);
14393 vm_map_unlock(map
);
14397 vm_map_simplify_range(
14399 vm_map_offset_t start
,
14400 vm_map_offset_t end
)
14402 vm_map_entry_t entry
;
14405 * The map should be locked (for "write") by the caller.
14408 if (start
>= end
) {
14409 /* invalid address range */
14413 start
= vm_map_trunc_page(start
,
14414 VM_MAP_PAGE_MASK(map
));
14415 end
= vm_map_round_page(end
,
14416 VM_MAP_PAGE_MASK(map
));
14418 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
14419 /* "start" is not mapped and "entry" ends before "start" */
14420 if (entry
== vm_map_to_entry(map
)) {
14421 /* start with first entry in the map */
14422 entry
= vm_map_first_entry(map
);
14424 /* start with next entry */
14425 entry
= entry
->vme_next
;
14429 while (entry
!= vm_map_to_entry(map
) &&
14430 entry
->vme_start
<= end
) {
14431 /* try and coalesce "entry" with its previous entry */
14432 vm_map_simplify_entry(map
, entry
);
14433 entry
= entry
->vme_next
;
14439 * Routine: vm_map_machine_attribute
14441 * Provide machine-specific attributes to mappings,
14442 * such as cachability etc. for machines that provide
14443 * them. NUMA architectures and machines with big/strange
14444 * caches will use this.
14446 * Responsibilities for locking and checking are handled here,
14447 * everything else in the pmap module. If any non-volatile
14448 * information must be kept, the pmap module should handle
14449 * it itself. [This assumes that attributes do not
14450 * need to be inherited, which seems ok to me]
14453 vm_map_machine_attribute(
14455 vm_map_offset_t start
,
14456 vm_map_offset_t end
,
14457 vm_machine_attribute_t attribute
,
14458 vm_machine_attribute_val_t
* value
) /* IN/OUT */
14461 vm_map_size_t sync_size
;
14462 vm_map_entry_t entry
;
14464 if (start
< vm_map_min(map
) || end
> vm_map_max(map
))
14465 return KERN_INVALID_ADDRESS
;
14467 /* Figure how much memory we need to flush (in page increments) */
14468 sync_size
= end
- start
;
14472 if (attribute
!= MATTR_CACHE
) {
14473 /* If we don't have to find physical addresses, we */
14474 /* don't have to do an explicit traversal here. */
14475 ret
= pmap_attribute(map
->pmap
, start
, end
-start
,
14477 vm_map_unlock(map
);
14481 ret
= KERN_SUCCESS
; /* Assume it all worked */
14484 if (vm_map_lookup_entry(map
, start
, &entry
)) {
14485 vm_map_size_t sub_size
;
14486 if((entry
->vme_end
- start
) > sync_size
) {
14487 sub_size
= sync_size
;
14490 sub_size
= entry
->vme_end
- start
;
14491 sync_size
-= sub_size
;
14493 if(entry
->is_sub_map
) {
14494 vm_map_offset_t sub_start
;
14495 vm_map_offset_t sub_end
;
14497 sub_start
= (start
- entry
->vme_start
)
14498 + VME_OFFSET(entry
);
14499 sub_end
= sub_start
+ sub_size
;
14500 vm_map_machine_attribute(
14506 if (VME_OBJECT(entry
)) {
14508 vm_object_t object
;
14509 vm_object_t base_object
;
14510 vm_object_t last_object
;
14511 vm_object_offset_t offset
;
14512 vm_object_offset_t base_offset
;
14513 vm_map_size_t range
;
14515 offset
= (start
- entry
->vme_start
)
14516 + VME_OFFSET(entry
);
14517 base_offset
= offset
;
14518 object
= VME_OBJECT(entry
);
14519 base_object
= object
;
14520 last_object
= NULL
;
14522 vm_object_lock(object
);
14525 m
= vm_page_lookup(
14528 if (m
&& !m
->vmp_fictitious
) {
14530 pmap_attribute_cache_sync(
14531 VM_PAGE_GET_PHYS_PAGE(m
),
14535 } else if (object
->shadow
) {
14536 offset
= offset
+ object
->vo_shadow_offset
;
14537 last_object
= object
;
14538 object
= object
->shadow
;
14539 vm_object_lock(last_object
->shadow
);
14540 vm_object_unlock(last_object
);
14543 range
-= PAGE_SIZE
;
14545 if (base_object
!= object
) {
14546 vm_object_unlock(object
);
14547 vm_object_lock(base_object
);
14548 object
= base_object
;
14550 /* Bump to the next page */
14551 base_offset
+= PAGE_SIZE
;
14552 offset
= base_offset
;
14554 vm_object_unlock(object
);
14559 vm_map_unlock(map
);
14560 return KERN_FAILURE
;
14565 vm_map_unlock(map
);
14571 * vm_map_behavior_set:
14573 * Sets the paging reference behavior of the specified address
14574 * range in the target map. Paging reference behavior affects
14575 * how pagein operations resulting from faults on the map will be
14579 vm_map_behavior_set(
14581 vm_map_offset_t start
,
14582 vm_map_offset_t end
,
14583 vm_behavior_t new_behavior
)
14585 vm_map_entry_t entry
;
14586 vm_map_entry_t temp_entry
;
14589 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
14590 map
, start
, end
, new_behavior
, 0);
14593 start
< vm_map_min(map
) ||
14594 end
> vm_map_max(map
)) {
14595 return KERN_NO_SPACE
;
14598 switch (new_behavior
) {
14601 * This first block of behaviors all set a persistent state on the specified
14602 * memory range. All we have to do here is to record the desired behavior
14603 * in the vm_map_entry_t's.
14606 case VM_BEHAVIOR_DEFAULT
:
14607 case VM_BEHAVIOR_RANDOM
:
14608 case VM_BEHAVIOR_SEQUENTIAL
:
14609 case VM_BEHAVIOR_RSEQNTL
:
14610 case VM_BEHAVIOR_ZERO_WIRED_PAGES
:
14614 * The entire address range must be valid for the map.
14615 * Note that vm_map_range_check() does a
14616 * vm_map_lookup_entry() internally and returns the
14617 * entry containing the start of the address range if
14618 * the entire range is valid.
14620 if (vm_map_range_check(map
, start
, end
, &temp_entry
)) {
14621 entry
= temp_entry
;
14622 vm_map_clip_start(map
, entry
, start
);
14625 vm_map_unlock(map
);
14626 return(KERN_INVALID_ADDRESS
);
14629 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
14630 vm_map_clip_end(map
, entry
, end
);
14631 if (entry
->is_sub_map
) {
14632 assert(!entry
->use_pmap
);
14635 if( new_behavior
== VM_BEHAVIOR_ZERO_WIRED_PAGES
) {
14636 entry
->zero_wired_pages
= TRUE
;
14638 entry
->behavior
= new_behavior
;
14640 entry
= entry
->vme_next
;
14643 vm_map_unlock(map
);
14647 * The rest of these are different from the above in that they cause
14648 * an immediate action to take place as opposed to setting a behavior that
14649 * affects future actions.
14652 case VM_BEHAVIOR_WILLNEED
:
14653 return vm_map_willneed(map
, start
, end
);
14655 case VM_BEHAVIOR_DONTNEED
:
14656 return vm_map_msync(map
, start
, end
- start
, VM_SYNC_DEACTIVATE
| VM_SYNC_CONTIGUOUS
);
14658 case VM_BEHAVIOR_FREE
:
14659 return vm_map_msync(map
, start
, end
- start
, VM_SYNC_KILLPAGES
| VM_SYNC_CONTIGUOUS
);
14661 case VM_BEHAVIOR_REUSABLE
:
14662 return vm_map_reusable_pages(map
, start
, end
);
14664 case VM_BEHAVIOR_REUSE
:
14665 return vm_map_reuse_pages(map
, start
, end
);
14667 case VM_BEHAVIOR_CAN_REUSE
:
14668 return vm_map_can_reuse(map
, start
, end
);
14671 case VM_BEHAVIOR_PAGEOUT
:
14672 return vm_map_pageout(map
, start
, end
);
14673 #endif /* MACH_ASSERT */
14676 return(KERN_INVALID_ARGUMENT
);
14679 return(KERN_SUCCESS
);
14684 * Internals for madvise(MADV_WILLNEED) system call.
14686 * The present implementation is to do a read-ahead if the mapping corresponds
14687 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
14688 * and basically ignore the "advice" (which we are always free to do).
14692 static kern_return_t
14695 vm_map_offset_t start
,
14696 vm_map_offset_t end
14699 vm_map_entry_t entry
;
14700 vm_object_t object
;
14701 memory_object_t pager
;
14702 struct vm_object_fault_info fault_info
= {};
14704 vm_object_size_t len
;
14705 vm_object_offset_t offset
;
14707 fault_info
.interruptible
= THREAD_UNINT
; /* ignored value */
14708 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
14709 fault_info
.stealth
= TRUE
;
14712 * The MADV_WILLNEED operation doesn't require any changes to the
14713 * vm_map_entry_t's, so the read lock is sufficient.
14716 vm_map_lock_read(map
);
14719 * The madvise semantics require that the address range be fully
14720 * allocated with no holes. Otherwise, we're required to return
14724 if (! vm_map_range_check(map
, start
, end
, &entry
)) {
14725 vm_map_unlock_read(map
);
14726 return KERN_INVALID_ADDRESS
;
14730 * Examine each vm_map_entry_t in the range.
14732 for (; entry
!= vm_map_to_entry(map
) && start
< end
; ) {
14735 * The first time through, the start address could be anywhere
14736 * within the vm_map_entry we found. So adjust the offset to
14737 * correspond. After that, the offset will always be zero to
14738 * correspond to the beginning of the current vm_map_entry.
14740 offset
= (start
- entry
->vme_start
) + VME_OFFSET(entry
);
14743 * Set the length so we don't go beyond the end of the
14744 * map_entry or beyond the end of the range we were given.
14745 * This range could span also multiple map entries all of which
14746 * map different files, so make sure we only do the right amount
14747 * of I/O for each object. Note that it's possible for there
14748 * to be multiple map entries all referring to the same object
14749 * but with different page permissions, but it's not worth
14750 * trying to optimize that case.
14752 len
= MIN(entry
->vme_end
- start
, end
- start
);
14754 if ((vm_size_t
) len
!= len
) {
14755 /* 32-bit overflow */
14756 len
= (vm_size_t
) (0 - PAGE_SIZE
);
14758 fault_info
.cluster_size
= (vm_size_t
) len
;
14759 fault_info
.lo_offset
= offset
;
14760 fault_info
.hi_offset
= offset
+ len
;
14761 fault_info
.user_tag
= VME_ALIAS(entry
);
14762 fault_info
.pmap_options
= 0;
14763 if (entry
->iokit_acct
||
14764 (!entry
->is_sub_map
&& !entry
->use_pmap
)) {
14765 fault_info
.pmap_options
|= PMAP_OPTIONS_ALT_ACCT
;
14769 * If there's no read permission to this mapping, then just
14772 if ((entry
->protection
& VM_PROT_READ
) == 0) {
14773 entry
= entry
->vme_next
;
14774 start
= entry
->vme_start
;
14779 * Find the file object backing this map entry. If there is
14780 * none, then we simply ignore the "will need" advice for this
14781 * entry and go on to the next one.
14783 if ((object
= find_vnode_object(entry
)) == VM_OBJECT_NULL
) {
14784 entry
= entry
->vme_next
;
14785 start
= entry
->vme_start
;
14790 * The data_request() could take a long time, so let's
14791 * release the map lock to avoid blocking other threads.
14793 vm_map_unlock_read(map
);
14795 vm_object_paging_begin(object
);
14796 pager
= object
->pager
;
14797 vm_object_unlock(object
);
14800 * Get the data from the object asynchronously.
14802 * Note that memory_object_data_request() places limits on the
14803 * amount of I/O it will do. Regardless of the len we
14804 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
14805 * silently truncates the len to that size. This isn't
14806 * necessarily bad since madvise shouldn't really be used to
14807 * page in unlimited amounts of data. Other Unix variants
14808 * limit the willneed case as well. If this turns out to be an
14809 * issue for developers, then we can always adjust the policy
14810 * here and still be backwards compatible since this is all
14813 kr
= memory_object_data_request(
14815 offset
+ object
->paging_offset
,
14818 (memory_object_fault_info_t
)&fault_info
);
14820 vm_object_lock(object
);
14821 vm_object_paging_end(object
);
14822 vm_object_unlock(object
);
14825 * If we couldn't do the I/O for some reason, just give up on
14826 * the madvise. We still return success to the user since
14827 * madvise isn't supposed to fail when the advice can't be
14830 if (kr
!= KERN_SUCCESS
) {
14831 return KERN_SUCCESS
;
14835 if (start
>= end
) {
14837 return KERN_SUCCESS
;
14840 /* look up next entry */
14841 vm_map_lock_read(map
);
14842 if (! vm_map_lookup_entry(map
, start
, &entry
)) {
14844 * There's a new hole in the address range.
14846 vm_map_unlock_read(map
);
14847 return KERN_INVALID_ADDRESS
;
14851 vm_map_unlock_read(map
);
14852 return KERN_SUCCESS
;
14856 vm_map_entry_is_reusable(
14857 vm_map_entry_t entry
)
14859 /* Only user map entries */
14861 vm_object_t object
;
14863 if (entry
->is_sub_map
) {
14867 switch (VME_ALIAS(entry
)) {
14868 case VM_MEMORY_MALLOC
:
14869 case VM_MEMORY_MALLOC_SMALL
:
14870 case VM_MEMORY_MALLOC_LARGE
:
14871 case VM_MEMORY_REALLOC
:
14872 case VM_MEMORY_MALLOC_TINY
:
14873 case VM_MEMORY_MALLOC_LARGE_REUSABLE
:
14874 case VM_MEMORY_MALLOC_LARGE_REUSED
:
14876 * This is a malloc() memory region: check if it's still
14877 * in its original state and can be re-used for more
14878 * malloc() allocations.
14883 * Not a malloc() memory region: let the caller decide if
14889 if (/*entry->is_shared ||*/
14890 entry
->is_sub_map
||
14891 entry
->in_transition
||
14892 entry
->protection
!= VM_PROT_DEFAULT
||
14893 entry
->max_protection
!= VM_PROT_ALL
||
14894 entry
->inheritance
!= VM_INHERIT_DEFAULT
||
14896 entry
->permanent
||
14897 entry
->superpage_size
!= FALSE
||
14898 entry
->zero_wired_pages
||
14899 entry
->wired_count
!= 0 ||
14900 entry
->user_wired_count
!= 0) {
14904 object
= VME_OBJECT(entry
);
14905 if (object
== VM_OBJECT_NULL
) {
14911 * Let's proceed even if the VM object is potentially
14913 * We check for this later when processing the actual
14914 * VM pages, so the contents will be safe if shared.
14916 * But we can still mark this memory region as "reusable" to
14917 * acknowledge that the caller did let us know that the memory
14918 * could be re-used and should not be penalized for holding
14919 * on to it. This allows its "resident size" to not include
14920 * the reusable range.
14922 object
->ref_count
== 1 &&
14924 object
->wired_page_count
== 0 &&
14925 object
->copy
== VM_OBJECT_NULL
&&
14926 object
->shadow
== VM_OBJECT_NULL
&&
14927 object
->internal
&&
14928 object
->purgable
== VM_PURGABLE_DENY
&&
14929 object
->copy_strategy
!= MEMORY_OBJECT_COPY_DELAY
&&
14930 !object
->true_share
&&
14931 object
->wimg_bits
== VM_WIMG_USE_DEFAULT
&&
14932 !object
->code_signed
) {
14940 static kern_return_t
14941 vm_map_reuse_pages(
14943 vm_map_offset_t start
,
14944 vm_map_offset_t end
)
14946 vm_map_entry_t entry
;
14947 vm_object_t object
;
14948 vm_object_offset_t start_offset
, end_offset
;
14951 * The MADV_REUSE operation doesn't require any changes to the
14952 * vm_map_entry_t's, so the read lock is sufficient.
14955 vm_map_lock_read(map
);
14956 assert(map
->pmap
!= kernel_pmap
); /* protect alias access */
14959 * The madvise semantics require that the address range be fully
14960 * allocated with no holes. Otherwise, we're required to return
14964 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
14965 vm_map_unlock_read(map
);
14966 vm_page_stats_reusable
.reuse_pages_failure
++;
14967 return KERN_INVALID_ADDRESS
;
14971 * Examine each vm_map_entry_t in the range.
14973 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
14974 entry
= entry
->vme_next
) {
14976 * Sanity check on the VM map entry.
14978 if (! vm_map_entry_is_reusable(entry
)) {
14979 vm_map_unlock_read(map
);
14980 vm_page_stats_reusable
.reuse_pages_failure
++;
14981 return KERN_INVALID_ADDRESS
;
14985 * The first time through, the start address could be anywhere
14986 * within the vm_map_entry we found. So adjust the offset to
14989 if (entry
->vme_start
< start
) {
14990 start_offset
= start
- entry
->vme_start
;
14994 end_offset
= MIN(end
, entry
->vme_end
) - entry
->vme_start
;
14995 start_offset
+= VME_OFFSET(entry
);
14996 end_offset
+= VME_OFFSET(entry
);
14998 assert(!entry
->is_sub_map
);
14999 object
= VME_OBJECT(entry
);
15000 if (object
!= VM_OBJECT_NULL
) {
15001 vm_object_lock(object
);
15002 vm_object_reuse_pages(object
, start_offset
, end_offset
,
15004 vm_object_unlock(object
);
15007 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC_LARGE_REUSABLE
) {
15010 * We do not hold the VM map exclusively here.
15011 * The "alias" field is not that critical, so it's
15012 * safe to update it here, as long as it is the only
15013 * one that can be modified while holding the VM map
15016 VME_ALIAS_SET(entry
, VM_MEMORY_MALLOC_LARGE_REUSED
);
15020 vm_map_unlock_read(map
);
15021 vm_page_stats_reusable
.reuse_pages_success
++;
15022 return KERN_SUCCESS
;
15026 static kern_return_t
15027 vm_map_reusable_pages(
15029 vm_map_offset_t start
,
15030 vm_map_offset_t end
)
15032 vm_map_entry_t entry
;
15033 vm_object_t object
;
15034 vm_object_offset_t start_offset
, end_offset
;
15035 vm_map_offset_t pmap_offset
;
15038 * The MADV_REUSABLE operation doesn't require any changes to the
15039 * vm_map_entry_t's, so the read lock is sufficient.
15042 vm_map_lock_read(map
);
15043 assert(map
->pmap
!= kernel_pmap
); /* protect alias access */
15046 * The madvise semantics require that the address range be fully
15047 * allocated with no holes. Otherwise, we're required to return
15051 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15052 vm_map_unlock_read(map
);
15053 vm_page_stats_reusable
.reusable_pages_failure
++;
15054 return KERN_INVALID_ADDRESS
;
15058 * Examine each vm_map_entry_t in the range.
15060 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
15061 entry
= entry
->vme_next
) {
15062 int kill_pages
= 0;
15065 * Sanity check on the VM map entry.
15067 if (! vm_map_entry_is_reusable(entry
)) {
15068 vm_map_unlock_read(map
);
15069 vm_page_stats_reusable
.reusable_pages_failure
++;
15070 return KERN_INVALID_ADDRESS
;
15073 if (! (entry
->protection
& VM_PROT_WRITE
) && !entry
->used_for_jit
) {
15074 /* not writable: can't discard contents */
15075 vm_map_unlock_read(map
);
15076 vm_page_stats_reusable
.reusable_nonwritable
++;
15077 vm_page_stats_reusable
.reusable_pages_failure
++;
15078 return KERN_PROTECTION_FAILURE
;
15082 * The first time through, the start address could be anywhere
15083 * within the vm_map_entry we found. So adjust the offset to
15086 if (entry
->vme_start
< start
) {
15087 start_offset
= start
- entry
->vme_start
;
15088 pmap_offset
= start
;
15091 pmap_offset
= entry
->vme_start
;
15093 end_offset
= MIN(end
, entry
->vme_end
) - entry
->vme_start
;
15094 start_offset
+= VME_OFFSET(entry
);
15095 end_offset
+= VME_OFFSET(entry
);
15097 assert(!entry
->is_sub_map
);
15098 object
= VME_OBJECT(entry
);
15099 if (object
== VM_OBJECT_NULL
)
15103 vm_object_lock(object
);
15104 if (((object
->ref_count
== 1) ||
15105 (object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
&&
15106 object
->copy
== VM_OBJECT_NULL
)) &&
15107 object
->shadow
== VM_OBJECT_NULL
&&
15109 * "iokit_acct" entries are billed for their virtual size
15110 * (rather than for their resident pages only), so they
15111 * wouldn't benefit from making pages reusable, and it
15112 * would be hard to keep track of pages that are both
15113 * "iokit_acct" and "reusable" in the pmap stats and
15116 !(entry
->iokit_acct
||
15117 (!entry
->is_sub_map
&& !entry
->use_pmap
))) {
15118 if (object
->ref_count
!= 1) {
15119 vm_page_stats_reusable
.reusable_shared
++;
15125 if (kill_pages
!= -1) {
15126 vm_object_deactivate_pages(object
,
15128 end_offset
- start_offset
,
15130 TRUE
/*reusable_pages*/,
15134 vm_page_stats_reusable
.reusable_pages_shared
++;
15136 vm_object_unlock(object
);
15138 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC_LARGE
||
15139 VME_ALIAS(entry
) == VM_MEMORY_MALLOC_LARGE_REUSED
) {
15142 * We do not hold the VM map exclusively here.
15143 * The "alias" field is not that critical, so it's
15144 * safe to update it here, as long as it is the only
15145 * one that can be modified while holding the VM map
15148 VME_ALIAS_SET(entry
, VM_MEMORY_MALLOC_LARGE_REUSABLE
);
15152 vm_map_unlock_read(map
);
15153 vm_page_stats_reusable
.reusable_pages_success
++;
15154 return KERN_SUCCESS
;
15158 static kern_return_t
15161 vm_map_offset_t start
,
15162 vm_map_offset_t end
)
15164 vm_map_entry_t entry
;
15167 * The MADV_REUSABLE operation doesn't require any changes to the
15168 * vm_map_entry_t's, so the read lock is sufficient.
15171 vm_map_lock_read(map
);
15172 assert(map
->pmap
!= kernel_pmap
); /* protect alias access */
15175 * The madvise semantics require that the address range be fully
15176 * allocated with no holes. Otherwise, we're required to return
15180 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15181 vm_map_unlock_read(map
);
15182 vm_page_stats_reusable
.can_reuse_failure
++;
15183 return KERN_INVALID_ADDRESS
;
15187 * Examine each vm_map_entry_t in the range.
15189 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
15190 entry
= entry
->vme_next
) {
15192 * Sanity check on the VM map entry.
15194 if (! vm_map_entry_is_reusable(entry
)) {
15195 vm_map_unlock_read(map
);
15196 vm_page_stats_reusable
.can_reuse_failure
++;
15197 return KERN_INVALID_ADDRESS
;
15201 vm_map_unlock_read(map
);
15202 vm_page_stats_reusable
.can_reuse_success
++;
15203 return KERN_SUCCESS
;
15208 static kern_return_t
15211 vm_map_offset_t start
,
15212 vm_map_offset_t end
)
15214 vm_map_entry_t entry
;
15217 * The MADV_PAGEOUT operation doesn't require any changes to the
15218 * vm_map_entry_t's, so the read lock is sufficient.
15221 vm_map_lock_read(map
);
15224 * The madvise semantics require that the address range be fully
15225 * allocated with no holes. Otherwise, we're required to return
15229 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
15230 vm_map_unlock_read(map
);
15231 return KERN_INVALID_ADDRESS
;
15235 * Examine each vm_map_entry_t in the range.
15237 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
15238 entry
= entry
->vme_next
) {
15239 vm_object_t object
;
15242 * Sanity check on the VM map entry.
15244 if (entry
->is_sub_map
) {
15246 vm_map_offset_t submap_start
;
15247 vm_map_offset_t submap_end
;
15248 vm_map_entry_t submap_entry
;
15250 submap
= VME_SUBMAP(entry
);
15251 submap_start
= VME_OFFSET(entry
);
15252 submap_end
= submap_start
+ (entry
->vme_end
-
15255 vm_map_lock_read(submap
);
15257 if (! vm_map_range_check(submap
,
15261 vm_map_unlock_read(submap
);
15262 vm_map_unlock_read(map
);
15263 return KERN_INVALID_ADDRESS
;
15266 object
= VME_OBJECT(submap_entry
);
15267 if (submap_entry
->is_sub_map
||
15268 object
== VM_OBJECT_NULL
||
15269 !object
->internal
) {
15270 vm_map_unlock_read(submap
);
15274 vm_object_pageout(object
);
15276 vm_map_unlock_read(submap
);
15277 submap
= VM_MAP_NULL
;
15278 submap_entry
= VM_MAP_ENTRY_NULL
;
15282 object
= VME_OBJECT(entry
);
15283 if (entry
->is_sub_map
||
15284 object
== VM_OBJECT_NULL
||
15285 !object
->internal
) {
15289 vm_object_pageout(object
);
15292 vm_map_unlock_read(map
);
15293 return KERN_SUCCESS
;
15295 #endif /* MACH_ASSERT */
15299 * Routine: vm_map_entry_insert
15301 * Description: This routine inserts a new vm_entry in a locked map.
15304 vm_map_entry_insert(
15306 vm_map_entry_t insp_entry
,
15307 vm_map_offset_t start
,
15308 vm_map_offset_t end
,
15309 vm_object_t object
,
15310 vm_object_offset_t offset
,
15311 boolean_t needs_copy
,
15312 boolean_t is_shared
,
15313 boolean_t in_transition
,
15314 vm_prot_t cur_protection
,
15315 vm_prot_t max_protection
,
15316 vm_behavior_t behavior
,
15317 vm_inherit_t inheritance
,
15318 unsigned wired_count
,
15319 boolean_t no_cache
,
15320 boolean_t permanent
,
15321 unsigned int superpage_size
,
15322 boolean_t clear_map_aligned
,
15323 boolean_t is_submap
,
15324 boolean_t used_for_jit
,
15327 vm_map_entry_t new_entry
;
15329 assert(insp_entry
!= (vm_map_entry_t
)0);
15330 vm_map_lock_assert_exclusive(map
);
15332 #if DEVELOPMENT || DEBUG
15333 vm_object_offset_t end_offset
= 0;
15334 assertf(!os_add_overflow(end
- start
, offset
, &end_offset
), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end
- start
), offset
);
15335 #endif /* DEVELOPMENT || DEBUG */
15337 new_entry
= vm_map_entry_create(map
, !map
->hdr
.entries_pageable
);
15339 if (VM_MAP_PAGE_SHIFT(map
) != PAGE_SHIFT
) {
15340 new_entry
->map_aligned
= TRUE
;
15342 new_entry
->map_aligned
= FALSE
;
15344 if (clear_map_aligned
&&
15345 (! VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)) ||
15346 ! VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)))) {
15347 new_entry
->map_aligned
= FALSE
;
15350 new_entry
->vme_start
= start
;
15351 new_entry
->vme_end
= end
;
15352 assert(page_aligned(new_entry
->vme_start
));
15353 assert(page_aligned(new_entry
->vme_end
));
15354 if (new_entry
->map_aligned
) {
15355 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_start
,
15356 VM_MAP_PAGE_MASK(map
)));
15357 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_end
,
15358 VM_MAP_PAGE_MASK(map
)));
15360 assert(new_entry
->vme_start
< new_entry
->vme_end
);
15362 VME_OBJECT_SET(new_entry
, object
);
15363 VME_OFFSET_SET(new_entry
, offset
);
15364 new_entry
->is_shared
= is_shared
;
15365 new_entry
->is_sub_map
= is_submap
;
15366 new_entry
->needs_copy
= needs_copy
;
15367 new_entry
->in_transition
= in_transition
;
15368 new_entry
->needs_wakeup
= FALSE
;
15369 new_entry
->inheritance
= inheritance
;
15370 new_entry
->protection
= cur_protection
;
15371 new_entry
->max_protection
= max_protection
;
15372 new_entry
->behavior
= behavior
;
15373 new_entry
->wired_count
= wired_count
;
15374 new_entry
->user_wired_count
= 0;
15377 * submap: "use_pmap" means "nested".
15380 new_entry
->use_pmap
= FALSE
;
15383 * object: "use_pmap" means "use pmap accounting" for footprint.
15386 new_entry
->use_pmap
= TRUE
;
15388 VME_ALIAS_SET(new_entry
, alias
);
15389 new_entry
->zero_wired_pages
= FALSE
;
15390 new_entry
->no_cache
= no_cache
;
15391 new_entry
->permanent
= permanent
;
15392 if (superpage_size
)
15393 new_entry
->superpage_size
= TRUE
;
15395 new_entry
->superpage_size
= FALSE
;
15397 #if CONFIG_EMBEDDED
15398 if (!(map
->jit_entry_exists
))
15399 #endif /* CONFIG_EMBEDDED */
15401 new_entry
->used_for_jit
= TRUE
;
15402 map
->jit_entry_exists
= TRUE
;
15404 /* Tell the pmap that it supports JIT. */
15405 pmap_set_jit_entitled(map
->pmap
);
15408 new_entry
->used_for_jit
= FALSE
;
15410 new_entry
->pmap_cs_associated
= FALSE
;
15411 new_entry
->iokit_acct
= FALSE
;
15412 new_entry
->vme_resilient_codesign
= FALSE
;
15413 new_entry
->vme_resilient_media
= FALSE
;
15414 new_entry
->vme_atomic
= FALSE
;
15417 * Insert the new entry into the list.
15420 vm_map_store_entry_link(map
, insp_entry
, new_entry
,
15421 VM_MAP_KERNEL_FLAGS_NONE
);
15422 map
->size
+= end
- start
;
15425 * Update the free space hint and the lookup hint.
15428 SAVE_HINT_MAP_WRITE(map
, new_entry
);
15433 * Routine: vm_map_remap_extract
15435 * Descritpion: This routine returns a vm_entry list from a map.
15437 static kern_return_t
15438 vm_map_remap_extract(
15440 vm_map_offset_t addr
,
15441 vm_map_size_t size
,
15443 struct vm_map_header
*map_header
,
15444 vm_prot_t
*cur_protection
,
15445 vm_prot_t
*max_protection
,
15446 /* What, no behavior? */
15447 vm_inherit_t inheritance
,
15448 boolean_t pageable
,
15449 boolean_t same_map
,
15450 vm_map_kernel_flags_t vmk_flags
)
15452 kern_return_t result
;
15453 vm_map_size_t mapped_size
;
15454 vm_map_size_t tmp_size
;
15455 vm_map_entry_t src_entry
; /* result of last map lookup */
15456 vm_map_entry_t new_entry
;
15457 vm_object_offset_t offset
;
15458 vm_map_offset_t map_address
;
15459 vm_map_offset_t src_start
; /* start of entry to map */
15460 vm_map_offset_t src_end
; /* end of region to be mapped */
15461 vm_object_t object
;
15462 vm_map_version_t version
;
15463 boolean_t src_needs_copy
;
15464 boolean_t new_entry_needs_copy
;
15465 vm_map_entry_t saved_src_entry
;
15466 boolean_t src_entry_was_wired
;
15467 vm_prot_t max_prot_for_prot_copy
;
15469 assert(map
!= VM_MAP_NULL
);
15471 assert(size
== vm_map_round_page(size
, PAGE_MASK
));
15472 assert(inheritance
== VM_INHERIT_NONE
||
15473 inheritance
== VM_INHERIT_COPY
||
15474 inheritance
== VM_INHERIT_SHARE
);
15477 * Compute start and end of region.
15479 src_start
= vm_map_trunc_page(addr
, PAGE_MASK
);
15480 src_end
= vm_map_round_page(src_start
+ size
, PAGE_MASK
);
15484 * Initialize map_header.
15486 map_header
->links
.next
= CAST_TO_VM_MAP_ENTRY(&map_header
->links
);
15487 map_header
->links
.prev
= CAST_TO_VM_MAP_ENTRY(&map_header
->links
);
15488 map_header
->nentries
= 0;
15489 map_header
->entries_pageable
= pageable
;
15490 map_header
->page_shift
= PAGE_SHIFT
;
15492 vm_map_store_init( map_header
);
15494 if (copy
&& vmk_flags
.vmkf_remap_prot_copy
) {
15495 max_prot_for_prot_copy
= *max_protection
& VM_PROT_ALL
;
15497 max_prot_for_prot_copy
= VM_PROT_NONE
;
15499 *cur_protection
= VM_PROT_ALL
;
15500 *max_protection
= VM_PROT_ALL
;
15504 result
= KERN_SUCCESS
;
15507 * The specified source virtual space might correspond to
15508 * multiple map entries, need to loop on them.
15511 while (mapped_size
!= size
) {
15512 vm_map_size_t entry_size
;
15515 * Find the beginning of the region.
15517 if (! vm_map_lookup_entry(map
, src_start
, &src_entry
)) {
15518 result
= KERN_INVALID_ADDRESS
;
15522 if (src_start
< src_entry
->vme_start
||
15523 (mapped_size
&& src_start
!= src_entry
->vme_start
)) {
15524 result
= KERN_INVALID_ADDRESS
;
15528 tmp_size
= size
- mapped_size
;
15529 if (src_end
> src_entry
->vme_end
)
15530 tmp_size
-= (src_end
- src_entry
->vme_end
);
15532 entry_size
= (vm_map_size_t
)(src_entry
->vme_end
-
15533 src_entry
->vme_start
);
15535 if(src_entry
->is_sub_map
) {
15536 vm_map_reference(VME_SUBMAP(src_entry
));
15537 object
= VM_OBJECT_NULL
;
15539 object
= VME_OBJECT(src_entry
);
15540 if (src_entry
->iokit_acct
) {
15542 * This entry uses "IOKit accounting".
15544 } else if (object
!= VM_OBJECT_NULL
&&
15545 object
->purgable
!= VM_PURGABLE_DENY
) {
15547 * Purgeable objects have their own accounting:
15548 * no pmap accounting for them.
15550 assertf(!src_entry
->use_pmap
,
15551 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15554 (uint64_t)src_entry
->vme_start
,
15555 (uint64_t)src_entry
->vme_end
,
15556 src_entry
->protection
,
15557 src_entry
->max_protection
,
15558 VME_ALIAS(src_entry
));
15561 * Not IOKit or purgeable:
15562 * must be accounted by pmap stats.
15564 assertf(src_entry
->use_pmap
,
15565 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15568 (uint64_t)src_entry
->vme_start
,
15569 (uint64_t)src_entry
->vme_end
,
15570 src_entry
->protection
,
15571 src_entry
->max_protection
,
15572 VME_ALIAS(src_entry
));
15575 if (object
== VM_OBJECT_NULL
) {
15576 object
= vm_object_allocate(entry_size
);
15577 VME_OFFSET_SET(src_entry
, 0);
15578 VME_OBJECT_SET(src_entry
, object
);
15579 assert(src_entry
->use_pmap
);
15580 } else if (object
->copy_strategy
!=
15581 MEMORY_OBJECT_COPY_SYMMETRIC
) {
15583 * We are already using an asymmetric
15584 * copy, and therefore we already have
15585 * the right object.
15587 assert(!src_entry
->needs_copy
);
15588 } else if (src_entry
->needs_copy
|| object
->shadowed
||
15589 (object
->internal
&& !object
->true_share
&&
15590 !src_entry
->is_shared
&&
15591 object
->vo_size
> entry_size
)) {
15593 VME_OBJECT_SHADOW(src_entry
, entry_size
);
15594 assert(src_entry
->use_pmap
);
15596 if (!src_entry
->needs_copy
&&
15597 (src_entry
->protection
& VM_PROT_WRITE
)) {
15600 assert(!pmap_has_prot_policy(src_entry
->protection
));
15602 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
15604 if (override_nx(map
,
15605 VME_ALIAS(src_entry
))
15607 prot
|= VM_PROT_EXECUTE
;
15609 assert(!pmap_has_prot_policy(prot
));
15611 if(map
->mapped_in_other_pmaps
) {
15612 vm_object_pmap_protect(
15613 VME_OBJECT(src_entry
),
15614 VME_OFFSET(src_entry
),
15617 src_entry
->vme_start
,
15620 pmap_protect(vm_map_pmap(map
),
15621 src_entry
->vme_start
,
15622 src_entry
->vme_end
,
15627 object
= VME_OBJECT(src_entry
);
15628 src_entry
->needs_copy
= FALSE
;
15632 vm_object_lock(object
);
15633 vm_object_reference_locked(object
); /* object ref. for new entry */
15634 if (object
->copy_strategy
==
15635 MEMORY_OBJECT_COPY_SYMMETRIC
) {
15636 object
->copy_strategy
=
15637 MEMORY_OBJECT_COPY_DELAY
;
15639 vm_object_unlock(object
);
15642 offset
= (VME_OFFSET(src_entry
) +
15643 (src_start
- src_entry
->vme_start
));
15645 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
15646 vm_map_entry_copy(new_entry
, src_entry
);
15647 if (new_entry
->is_sub_map
) {
15648 /* clr address space specifics */
15649 new_entry
->use_pmap
= FALSE
;
15652 * We're dealing with a copy-on-write operation,
15653 * so the resulting mapping should not inherit the
15654 * original mapping's accounting settings.
15655 * "use_pmap" should be reset to its default (TRUE)
15656 * so that the new mapping gets accounted for in
15657 * the task's memory footprint.
15659 new_entry
->use_pmap
= TRUE
;
15661 /* "iokit_acct" was cleared in vm_map_entry_copy() */
15662 assert(!new_entry
->iokit_acct
);
15664 new_entry
->map_aligned
= FALSE
;
15666 new_entry
->vme_start
= map_address
;
15667 new_entry
->vme_end
= map_address
+ tmp_size
;
15668 assert(new_entry
->vme_start
< new_entry
->vme_end
);
15669 if (copy
&& vmk_flags
.vmkf_remap_prot_copy
) {
15671 * Remapping for vm_map_protect(VM_PROT_COPY)
15672 * to convert a read-only mapping into a
15673 * copy-on-write version of itself but
15674 * with write access:
15675 * keep the original inheritance and add
15676 * VM_PROT_WRITE to the max protection.
15678 new_entry
->inheritance
= src_entry
->inheritance
;
15679 new_entry
->protection
&= max_prot_for_prot_copy
;
15680 new_entry
->max_protection
|= VM_PROT_WRITE
;
15682 new_entry
->inheritance
= inheritance
;
15684 VME_OFFSET_SET(new_entry
, offset
);
15687 * The new region has to be copied now if required.
15692 * Cannot allow an entry describing a JIT
15693 * region to be shared across address spaces.
15695 if (src_entry
->used_for_jit
== TRUE
&& !same_map
) {
15696 #if CONFIG_EMBEDDED
15697 result
= KERN_INVALID_ARGUMENT
;
15699 #endif /* CONFIG_EMBEDDED */
15701 src_entry
->is_shared
= TRUE
;
15702 new_entry
->is_shared
= TRUE
;
15703 if (!(new_entry
->is_sub_map
))
15704 new_entry
->needs_copy
= FALSE
;
15706 } else if (src_entry
->is_sub_map
) {
15707 /* make this a COW sub_map if not already */
15708 assert(new_entry
->wired_count
== 0);
15709 new_entry
->needs_copy
= TRUE
;
15710 object
= VM_OBJECT_NULL
;
15711 } else if (src_entry
->wired_count
== 0 &&
15712 vm_object_copy_quickly(&VME_OBJECT(new_entry
),
15713 VME_OFFSET(new_entry
),
15714 (new_entry
->vme_end
-
15715 new_entry
->vme_start
),
15717 &new_entry_needs_copy
)) {
15719 new_entry
->needs_copy
= new_entry_needs_copy
;
15720 new_entry
->is_shared
= FALSE
;
15721 assertf(new_entry
->use_pmap
, "map %p new_entry %p\n", map
, new_entry
);
15724 * Handle copy_on_write semantics.
15726 if (src_needs_copy
&& !src_entry
->needs_copy
) {
15729 assert(!pmap_has_prot_policy(src_entry
->protection
));
15731 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
15733 if (override_nx(map
,
15734 VME_ALIAS(src_entry
))
15736 prot
|= VM_PROT_EXECUTE
;
15738 assert(!pmap_has_prot_policy(prot
));
15740 vm_object_pmap_protect(object
,
15743 ((src_entry
->is_shared
15744 || map
->mapped_in_other_pmaps
) ?
15745 PMAP_NULL
: map
->pmap
),
15746 src_entry
->vme_start
,
15749 assert(src_entry
->wired_count
== 0);
15750 src_entry
->needs_copy
= TRUE
;
15753 * Throw away the old object reference of the new entry.
15755 vm_object_deallocate(object
);
15758 new_entry
->is_shared
= FALSE
;
15759 assertf(new_entry
->use_pmap
, "map %p new_entry %p\n", map
, new_entry
);
15761 src_entry_was_wired
= (src_entry
->wired_count
> 0);
15762 saved_src_entry
= src_entry
;
15763 src_entry
= VM_MAP_ENTRY_NULL
;
15766 * The map can be safely unlocked since we
15767 * already hold a reference on the object.
15769 * Record the timestamp of the map for later
15770 * verification, and unlock the map.
15772 version
.main_timestamp
= map
->timestamp
;
15773 vm_map_unlock(map
); /* Increments timestamp once! */
15776 * Perform the copy.
15778 if (src_entry_was_wired
> 0) {
15779 vm_object_lock(object
);
15780 result
= vm_object_copy_slowly(
15783 (new_entry
->vme_end
-
15784 new_entry
->vme_start
),
15786 &VME_OBJECT(new_entry
));
15788 VME_OFFSET_SET(new_entry
, 0);
15789 new_entry
->needs_copy
= FALSE
;
15791 vm_object_offset_t new_offset
;
15793 new_offset
= VME_OFFSET(new_entry
);
15794 result
= vm_object_copy_strategically(
15797 (new_entry
->vme_end
-
15798 new_entry
->vme_start
),
15799 &VME_OBJECT(new_entry
),
15801 &new_entry_needs_copy
);
15802 if (new_offset
!= VME_OFFSET(new_entry
)) {
15803 VME_OFFSET_SET(new_entry
, new_offset
);
15806 new_entry
->needs_copy
= new_entry_needs_copy
;
15810 * Throw away the old object reference of the new entry.
15812 vm_object_deallocate(object
);
15814 if (result
!= KERN_SUCCESS
&&
15815 result
!= KERN_MEMORY_RESTART_COPY
) {
15816 _vm_map_entry_dispose(map_header
, new_entry
);
15822 * Verify that the map has not substantially
15823 * changed while the copy was being made.
15827 if (version
.main_timestamp
+ 1 != map
->timestamp
) {
15829 * Simple version comparison failed.
15831 * Retry the lookup and verify that the
15832 * same object/offset are still present.
15834 saved_src_entry
= VM_MAP_ENTRY_NULL
;
15835 vm_object_deallocate(VME_OBJECT(new_entry
));
15836 _vm_map_entry_dispose(map_header
, new_entry
);
15837 if (result
== KERN_MEMORY_RESTART_COPY
)
15838 result
= KERN_SUCCESS
;
15841 /* map hasn't changed: src_entry is still valid */
15842 src_entry
= saved_src_entry
;
15843 saved_src_entry
= VM_MAP_ENTRY_NULL
;
15845 if (result
== KERN_MEMORY_RESTART_COPY
) {
15846 vm_object_reference(object
);
15851 _vm_map_store_entry_link(map_header
,
15852 map_header
->links
.prev
, new_entry
);
15854 /*Protections for submap mapping are irrelevant here*/
15855 if( !src_entry
->is_sub_map
) {
15856 *cur_protection
&= src_entry
->protection
;
15857 *max_protection
&= src_entry
->max_protection
;
15859 map_address
+= tmp_size
;
15860 mapped_size
+= tmp_size
;
15861 src_start
+= tmp_size
;
15865 vm_map_unlock(map
);
15866 if (result
!= KERN_SUCCESS
) {
15868 * Free all allocated elements.
15870 for (src_entry
= map_header
->links
.next
;
15871 src_entry
!= CAST_TO_VM_MAP_ENTRY(&map_header
->links
);
15872 src_entry
= new_entry
) {
15873 new_entry
= src_entry
->vme_next
;
15874 _vm_map_store_entry_unlink(map_header
, src_entry
);
15875 if (src_entry
->is_sub_map
) {
15876 vm_map_deallocate(VME_SUBMAP(src_entry
));
15878 vm_object_deallocate(VME_OBJECT(src_entry
));
15880 _vm_map_entry_dispose(map_header
, src_entry
);
15887 * Routine: vm_remap
15889 * Map portion of a task's address space.
15890 * Mapped region must not overlap more than
15891 * one vm memory object. Protections and
15892 * inheritance attributes remain the same
15893 * as in the original task and are out parameters.
15894 * Source and Target task can be identical
15895 * Other attributes are identical as for vm_map()
15899 vm_map_t target_map
,
15900 vm_map_address_t
*address
,
15901 vm_map_size_t size
,
15902 vm_map_offset_t mask
,
15904 vm_map_kernel_flags_t vmk_flags
,
15907 vm_map_offset_t memory_address
,
15909 vm_prot_t
*cur_protection
,
15910 vm_prot_t
*max_protection
,
15911 vm_inherit_t inheritance
)
15913 kern_return_t result
;
15914 vm_map_entry_t entry
;
15915 vm_map_entry_t insp_entry
= VM_MAP_ENTRY_NULL
;
15916 vm_map_entry_t new_entry
;
15917 struct vm_map_header map_header
;
15918 vm_map_offset_t offset_in_mapping
;
15920 if (target_map
== VM_MAP_NULL
)
15921 return KERN_INVALID_ARGUMENT
;
15923 switch (inheritance
) {
15924 case VM_INHERIT_NONE
:
15925 case VM_INHERIT_COPY
:
15926 case VM_INHERIT_SHARE
:
15927 if (size
!= 0 && src_map
!= VM_MAP_NULL
)
15931 return KERN_INVALID_ARGUMENT
;
15935 * If the user is requesting that we return the address of the
15936 * first byte of the data (rather than the base of the page),
15937 * then we use different rounding semantics: specifically,
15938 * we assume that (memory_address, size) describes a region
15939 * all of whose pages we must cover, rather than a base to be truncated
15940 * down and a size to be added to that base. So we figure out
15941 * the highest page that the requested region includes and make
15942 * sure that the size will cover it.
15944 * The key example we're worried about it is of the form:
15946 * memory_address = 0x1ff0, size = 0x20
15948 * With the old semantics, we round down the memory_address to 0x1000
15949 * and round up the size to 0x1000, resulting in our covering *only*
15950 * page 0x1000. With the new semantics, we'd realize that the region covers
15951 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
15952 * 0x1000 and page 0x2000 in the region we remap.
15954 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
15955 offset_in_mapping
= memory_address
- vm_map_trunc_page(memory_address
, PAGE_MASK
);
15956 size
= vm_map_round_page(memory_address
+ size
- vm_map_trunc_page(memory_address
, PAGE_MASK
), PAGE_MASK
);
15958 size
= vm_map_round_page(size
, PAGE_MASK
);
15961 return KERN_INVALID_ARGUMENT
;
15964 result
= vm_map_remap_extract(src_map
, memory_address
,
15965 size
, copy
, &map_header
,
15969 target_map
->hdr
.entries_pageable
,
15970 src_map
== target_map
,
15973 if (result
!= KERN_SUCCESS
) {
15978 * Allocate/check a range of free virtual address
15979 * space for the target
15981 *address
= vm_map_trunc_page(*address
,
15982 VM_MAP_PAGE_MASK(target_map
));
15983 vm_map_lock(target_map
);
15984 result
= vm_map_remap_range_allocate(target_map
, address
, size
,
15985 mask
, flags
, vmk_flags
, tag
,
15988 for (entry
= map_header
.links
.next
;
15989 entry
!= CAST_TO_VM_MAP_ENTRY(&map_header
.links
);
15990 entry
= new_entry
) {
15991 new_entry
= entry
->vme_next
;
15992 _vm_map_store_entry_unlink(&map_header
, entry
);
15993 if (result
== KERN_SUCCESS
) {
15994 if (flags
& VM_FLAGS_RESILIENT_CODESIGN
) {
15995 /* no codesigning -> read-only access */
15996 entry
->max_protection
= VM_PROT_READ
;
15997 entry
->protection
= VM_PROT_READ
;
15998 entry
->vme_resilient_codesign
= TRUE
;
16000 entry
->vme_start
+= *address
;
16001 entry
->vme_end
+= *address
;
16002 assert(!entry
->map_aligned
);
16003 vm_map_store_entry_link(target_map
, insp_entry
, entry
,
16005 insp_entry
= entry
;
16007 if (!entry
->is_sub_map
) {
16008 vm_object_deallocate(VME_OBJECT(entry
));
16010 vm_map_deallocate(VME_SUBMAP(entry
));
16012 _vm_map_entry_dispose(&map_header
, entry
);
16016 if (flags
& VM_FLAGS_RESILIENT_CODESIGN
) {
16017 *cur_protection
= VM_PROT_READ
;
16018 *max_protection
= VM_PROT_READ
;
16021 if( target_map
->disable_vmentry_reuse
== TRUE
) {
16022 assert(!target_map
->is_nested_map
);
16023 if( target_map
->highest_entry_end
< insp_entry
->vme_end
){
16024 target_map
->highest_entry_end
= insp_entry
->vme_end
;
16028 if (result
== KERN_SUCCESS
) {
16029 target_map
->size
+= size
;
16030 SAVE_HINT_MAP_WRITE(target_map
, insp_entry
);
16033 if (*max_protection
& VM_PROT_EXECUTE
) {
16034 vm_map_address_t region_start
= 0, region_size
= 0;
16035 struct pmap_cs_code_directory
*region_cd
= NULL
;
16036 vm_map_address_t base
= 0;
16037 struct pmap_cs_lookup_results results
= {};
16038 vm_map_size_t page_addr
= vm_map_trunc_page(memory_address
, PAGE_MASK
);
16039 vm_map_size_t assoc_size
= vm_map_round_page(memory_address
+ size
- page_addr
, PAGE_MASK
);
16041 pmap_cs_lookup(src_map
->pmap
, memory_address
, &results
);
16042 region_size
= results
.region_size
;
16043 region_start
= results
.region_start
;
16044 region_cd
= results
.region_cd_entry
;
16045 base
= results
.base
;
16047 if (region_cd
!= NULL
&& (page_addr
!= region_start
|| assoc_size
!= region_size
)) {
16048 *cur_protection
= VM_PROT_READ
;
16049 *max_protection
= VM_PROT_READ
;
16050 printf("mismatched remap of executable range 0x%llx-0x%llx to 0x%llx, "
16051 "region_start 0x%llx, region_size 0x%llx, cd_entry %sNULL, making non-executable.\n",
16052 page_addr
, page_addr
+assoc_size
, *address
,
16053 region_start
, region_size
,
16054 region_cd
!= NULL
? "not " : "" // Don't leak kernel slide
16061 vm_map_unlock(target_map
);
16063 if (result
== KERN_SUCCESS
&& target_map
->wiring_required
)
16064 result
= vm_map_wire_kernel(target_map
, *address
,
16065 *address
+ size
, *cur_protection
, VM_KERN_MEMORY_MLOCK
,
16069 * If requested, return the address of the data pointed to by the
16070 * request, rather than the base of the resulting page.
16072 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
16073 *address
+= offset_in_mapping
;
16080 * Routine: vm_map_remap_range_allocate
16083 * Allocate a range in the specified virtual address map.
16084 * returns the address and the map entry just before the allocated
16087 * Map must be locked.
16090 static kern_return_t
16091 vm_map_remap_range_allocate(
16093 vm_map_address_t
*address
, /* IN/OUT */
16094 vm_map_size_t size
,
16095 vm_map_offset_t mask
,
16097 vm_map_kernel_flags_t vmk_flags
,
16098 __unused vm_tag_t tag
,
16099 vm_map_entry_t
*map_entry
) /* OUT */
16101 vm_map_entry_t entry
;
16102 vm_map_offset_t start
;
16103 vm_map_offset_t end
;
16104 vm_map_offset_t desired_empty_end
;
16106 vm_map_entry_t hole_entry
;
16112 if (flags
& VM_FLAGS_ANYWHERE
)
16114 if (flags
& VM_FLAGS_RANDOM_ADDR
)
16117 * Get a random start address.
16119 kr
= vm_map_random_address_for_size(map
, address
, size
);
16120 if (kr
!= KERN_SUCCESS
) {
16127 * Calculate the first possible address.
16130 if (start
< map
->min_offset
)
16131 start
= map
->min_offset
;
16132 if (start
> map
->max_offset
)
16133 return(KERN_NO_SPACE
);
16136 * Look for the first possible address;
16137 * if there's already something at this
16138 * address, we have to start after it.
16141 if( map
->disable_vmentry_reuse
== TRUE
) {
16142 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
16145 if (map
->holelistenabled
) {
16146 hole_entry
= CAST_TO_VM_MAP_ENTRY(map
->holes_list
);
16148 if (hole_entry
== NULL
) {
16150 * No more space in the map?
16152 return(KERN_NO_SPACE
);
16155 boolean_t found_hole
= FALSE
;
16158 if (hole_entry
->vme_start
>= start
) {
16159 start
= hole_entry
->vme_start
;
16164 if (hole_entry
->vme_end
> start
) {
16168 hole_entry
= hole_entry
->vme_next
;
16170 } while (hole_entry
!= CAST_TO_VM_MAP_ENTRY(map
->holes_list
));
16172 if (found_hole
== FALSE
) {
16173 return (KERN_NO_SPACE
);
16176 entry
= hole_entry
;
16179 assert(first_free_is_valid(map
));
16180 if (start
== map
->min_offset
) {
16181 if ((entry
= map
->first_free
) != vm_map_to_entry(map
))
16182 start
= entry
->vme_end
;
16184 vm_map_entry_t tmp_entry
;
16185 if (vm_map_lookup_entry(map
, start
, &tmp_entry
))
16186 start
= tmp_entry
->vme_end
;
16190 start
= vm_map_round_page(start
,
16191 VM_MAP_PAGE_MASK(map
));
16195 * In any case, the "entry" always precedes
16196 * the proposed new region throughout the
16201 vm_map_entry_t next
;
16204 * Find the end of the proposed new region.
16205 * Be sure we didn't go beyond the end, or
16206 * wrap around the address.
16209 end
= ((start
+ mask
) & ~mask
);
16210 end
= vm_map_round_page(end
,
16211 VM_MAP_PAGE_MASK(map
));
16213 return(KERN_NO_SPACE
);
16217 /* We want an entire page of empty space, but don't increase the allocation size. */
16218 desired_empty_end
= vm_map_round_page(end
, VM_MAP_PAGE_MASK(map
));
16220 if ((desired_empty_end
> map
->max_offset
) || (desired_empty_end
< start
)) {
16221 if (map
->wait_for_space
) {
16222 if (size
<= (map
->max_offset
-
16223 map
->min_offset
)) {
16224 assert_wait((event_t
) map
, THREAD_INTERRUPTIBLE
);
16225 vm_map_unlock(map
);
16226 thread_block(THREAD_CONTINUE_NULL
);
16232 return(KERN_NO_SPACE
);
16235 next
= entry
->vme_next
;
16237 if (map
->holelistenabled
) {
16238 if (entry
->vme_end
>= desired_empty_end
)
16242 * If there are no more entries, we must win.
16246 * If there is another entry, it must be
16247 * after the end of the potential new region.
16250 if (next
== vm_map_to_entry(map
))
16253 if (next
->vme_start
>= desired_empty_end
)
16258 * Didn't fit -- move to the next entry.
16263 if (map
->holelistenabled
) {
16264 if (entry
== CAST_TO_VM_MAP_ENTRY(map
->holes_list
)) {
16268 return(KERN_NO_SPACE
);
16270 start
= entry
->vme_start
;
16272 start
= entry
->vme_end
;
16276 if (map
->holelistenabled
) {
16278 if (vm_map_lookup_entry(map
, entry
->vme_start
, &entry
)) {
16279 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry
, (unsigned long long)entry
->vme_start
);
16286 vm_map_entry_t temp_entry
;
16290 * the address doesn't itself violate
16291 * the mask requirement.
16294 if ((start
& mask
) != 0)
16295 return(KERN_NO_SPACE
);
16299 * ... the address is within bounds
16302 end
= start
+ size
;
16304 if ((start
< map
->min_offset
) ||
16305 (end
> map
->max_offset
) ||
16307 return(KERN_INVALID_ADDRESS
);
16311 * If we're asked to overwrite whatever was mapped in that
16312 * range, first deallocate that range.
16314 if (flags
& VM_FLAGS_OVERWRITE
) {
16316 int remove_flags
= VM_MAP_REMOVE_SAVE_ENTRIES
| VM_MAP_REMOVE_NO_MAP_ALIGN
;
16319 * We use a "zap_map" to avoid having to unlock
16320 * the "map" in vm_map_delete(), which would compromise
16321 * the atomicity of the "deallocate" and then "remap"
16324 zap_map
= vm_map_create(PMAP_NULL
,
16327 map
->hdr
.entries_pageable
);
16328 if (zap_map
== VM_MAP_NULL
) {
16329 return KERN_RESOURCE_SHORTAGE
;
16331 vm_map_set_page_shift(zap_map
, VM_MAP_PAGE_SHIFT(map
));
16332 vm_map_disable_hole_optimization(zap_map
);
16334 if (vmk_flags
.vmkf_overwrite_immutable
) {
16335 remove_flags
|= VM_MAP_REMOVE_IMMUTABLE
;
16337 kr
= vm_map_delete(map
, start
, end
,
16340 if (kr
== KERN_SUCCESS
) {
16341 vm_map_destroy(zap_map
,
16342 VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
16343 zap_map
= VM_MAP_NULL
;
16348 * ... the starting address isn't allocated
16351 if (vm_map_lookup_entry(map
, start
, &temp_entry
))
16352 return(KERN_NO_SPACE
);
16354 entry
= temp_entry
;
16357 * ... the next region doesn't overlap the
16361 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
16362 (entry
->vme_next
->vme_start
< end
))
16363 return(KERN_NO_SPACE
);
16365 *map_entry
= entry
;
16366 return(KERN_SUCCESS
);
16372 * Set the address map for the current thread to the specified map
16380 thread_t thread
= current_thread();
16381 vm_map_t oldmap
= thread
->map
;
16383 mp_disable_preemption();
16384 mycpu
= cpu_number();
16387 * Deactivate the current map and activate the requested map
16389 PMAP_SWITCH_USER(thread
, map
, mycpu
);
16391 mp_enable_preemption();
16397 * Routine: vm_map_write_user
16400 * Copy out data from a kernel space into space in the
16401 * destination map. The space must already exist in the
16403 * NOTE: This routine should only be called by threads
16404 * which can block on a page fault. i.e. kernel mode user
16412 vm_map_address_t dst_addr
,
16415 kern_return_t kr
= KERN_SUCCESS
;
16417 if(current_map() == map
) {
16418 if (copyout(src_p
, dst_addr
, size
)) {
16419 kr
= KERN_INVALID_ADDRESS
;
16424 /* take on the identity of the target map while doing */
16427 vm_map_reference(map
);
16428 oldmap
= vm_map_switch(map
);
16429 if (copyout(src_p
, dst_addr
, size
)) {
16430 kr
= KERN_INVALID_ADDRESS
;
16432 vm_map_switch(oldmap
);
16433 vm_map_deallocate(map
);
16439 * Routine: vm_map_read_user
16442 * Copy in data from a user space source map into the
16443 * kernel map. The space must already exist in the
16445 * NOTE: This routine should only be called by threads
16446 * which can block on a page fault. i.e. kernel mode user
16453 vm_map_address_t src_addr
,
16457 kern_return_t kr
= KERN_SUCCESS
;
16459 if(current_map() == map
) {
16460 if (copyin(src_addr
, dst_p
, size
)) {
16461 kr
= KERN_INVALID_ADDRESS
;
16466 /* take on the identity of the target map while doing */
16469 vm_map_reference(map
);
16470 oldmap
= vm_map_switch(map
);
16471 if (copyin(src_addr
, dst_p
, size
)) {
16472 kr
= KERN_INVALID_ADDRESS
;
16474 vm_map_switch(oldmap
);
16475 vm_map_deallocate(map
);
16482 * vm_map_check_protection:
16484 * Assert that the target map allows the specified
16485 * privilege on the entire address region given.
16486 * The entire region must be allocated.
16489 vm_map_check_protection(vm_map_t map
, vm_map_offset_t start
,
16490 vm_map_offset_t end
, vm_prot_t protection
)
16492 vm_map_entry_t entry
;
16493 vm_map_entry_t tmp_entry
;
16497 if (start
< vm_map_min(map
) || end
> vm_map_max(map
) || start
> end
)
16499 vm_map_unlock(map
);
16503 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
16504 vm_map_unlock(map
);
16510 while (start
< end
) {
16511 if (entry
== vm_map_to_entry(map
)) {
16512 vm_map_unlock(map
);
16517 * No holes allowed!
16520 if (start
< entry
->vme_start
) {
16521 vm_map_unlock(map
);
16526 * Check protection associated with entry.
16529 if ((entry
->protection
& protection
) != protection
) {
16530 vm_map_unlock(map
);
16534 /* go to next entry */
16536 start
= entry
->vme_end
;
16537 entry
= entry
->vme_next
;
16539 vm_map_unlock(map
);
16544 vm_map_purgable_control(
16546 vm_map_offset_t address
,
16547 vm_purgable_t control
,
16550 vm_map_entry_t entry
;
16551 vm_object_t object
;
16553 boolean_t was_nonvolatile
;
16556 * Vet all the input parameters and current type and state of the
16557 * underlaying object. Return with an error if anything is amiss.
16559 if (map
== VM_MAP_NULL
)
16560 return(KERN_INVALID_ARGUMENT
);
16562 if (control
!= VM_PURGABLE_SET_STATE
&&
16563 control
!= VM_PURGABLE_GET_STATE
&&
16564 control
!= VM_PURGABLE_PURGE_ALL
&&
16565 control
!= VM_PURGABLE_SET_STATE_FROM_KERNEL
)
16566 return(KERN_INVALID_ARGUMENT
);
16568 if (control
== VM_PURGABLE_PURGE_ALL
) {
16569 vm_purgeable_object_purge_all();
16570 return KERN_SUCCESS
;
16573 if ((control
== VM_PURGABLE_SET_STATE
||
16574 control
== VM_PURGABLE_SET_STATE_FROM_KERNEL
) &&
16575 (((*state
& ~(VM_PURGABLE_ALL_MASKS
)) != 0) ||
16576 ((*state
& VM_PURGABLE_STATE_MASK
) > VM_PURGABLE_STATE_MASK
)))
16577 return(KERN_INVALID_ARGUMENT
);
16579 vm_map_lock_read(map
);
16581 if (!vm_map_lookup_entry(map
, address
, &entry
) || entry
->is_sub_map
) {
16584 * Must pass a valid non-submap address.
16586 vm_map_unlock_read(map
);
16587 return(KERN_INVALID_ADDRESS
);
16590 if ((entry
->protection
& VM_PROT_WRITE
) == 0) {
16592 * Can't apply purgable controls to something you can't write.
16594 vm_map_unlock_read(map
);
16595 return(KERN_PROTECTION_FAILURE
);
16598 object
= VME_OBJECT(entry
);
16599 if (object
== VM_OBJECT_NULL
||
16600 object
->purgable
== VM_PURGABLE_DENY
) {
16602 * Object must already be present and be purgeable.
16604 vm_map_unlock_read(map
);
16605 return KERN_INVALID_ARGUMENT
;
16608 vm_object_lock(object
);
16611 if (VME_OFFSET(entry
) != 0 ||
16612 entry
->vme_end
- entry
->vme_start
!= object
->vo_size
) {
16614 * Can only apply purgable controls to the whole (existing)
16617 vm_map_unlock_read(map
);
16618 vm_object_unlock(object
);
16619 return KERN_INVALID_ARGUMENT
;
16623 assert(!entry
->is_sub_map
);
16624 assert(!entry
->use_pmap
); /* purgeable has its own accounting */
16626 vm_map_unlock_read(map
);
16628 was_nonvolatile
= (object
->purgable
== VM_PURGABLE_NONVOLATILE
);
16630 kr
= vm_object_purgable_control(object
, control
, state
);
16632 if (was_nonvolatile
&&
16633 object
->purgable
!= VM_PURGABLE_NONVOLATILE
&&
16634 map
->pmap
== kernel_pmap
) {
16636 object
->vo_purgeable_volatilizer
= kernel_task
;
16640 vm_object_unlock(object
);
16646 vm_map_page_query_internal(
16647 vm_map_t target_map
,
16648 vm_map_offset_t offset
,
16653 vm_page_info_basic_data_t info
;
16654 mach_msg_type_number_t count
;
16656 count
= VM_PAGE_INFO_BASIC_COUNT
;
16657 kr
= vm_map_page_info(target_map
,
16659 VM_PAGE_INFO_BASIC
,
16660 (vm_page_info_t
) &info
,
16662 if (kr
== KERN_SUCCESS
) {
16663 *disposition
= info
.disposition
;
16664 *ref_count
= info
.ref_count
;
16676 vm_map_offset_t offset
,
16677 vm_page_info_flavor_t flavor
,
16678 vm_page_info_t info
,
16679 mach_msg_type_number_t
*count
)
16681 return (vm_map_page_range_info_internal(map
,
16682 offset
, /* start of range */
16683 (offset
+ 1), /* this will get rounded in the call to the page boundary */
16690 vm_map_page_range_info_internal(
16692 vm_map_offset_t start_offset
,
16693 vm_map_offset_t end_offset
,
16694 vm_page_info_flavor_t flavor
,
16695 vm_page_info_t info
,
16696 mach_msg_type_number_t
*count
)
16698 vm_map_entry_t map_entry
= VM_MAP_ENTRY_NULL
;
16699 vm_object_t object
= VM_OBJECT_NULL
, curr_object
= VM_OBJECT_NULL
;
16700 vm_page_t m
= VM_PAGE_NULL
;
16701 kern_return_t retval
= KERN_SUCCESS
;
16702 int disposition
= 0;
16704 int depth
= 0, info_idx
= 0;
16705 vm_page_info_basic_t basic_info
= 0;
16706 vm_map_offset_t offset_in_page
= 0, offset_in_object
= 0, curr_offset_in_object
= 0;
16707 vm_map_offset_t start
= 0, end
= 0, curr_s_offset
= 0, curr_e_offset
= 0;
16708 boolean_t do_region_footprint
;
16711 case VM_PAGE_INFO_BASIC
:
16712 if (*count
!= VM_PAGE_INFO_BASIC_COUNT
) {
16714 * The "vm_page_info_basic_data" structure was not
16715 * properly padded, so allow the size to be off by
16716 * one to maintain backwards binary compatibility...
16718 if (*count
!= VM_PAGE_INFO_BASIC_COUNT
- 1)
16719 return KERN_INVALID_ARGUMENT
;
16723 return KERN_INVALID_ARGUMENT
;
16726 do_region_footprint
= task_self_region_footprint();
16730 info_idx
= 0; /* Tracks the next index within the info structure to be filled.*/
16731 retval
= KERN_SUCCESS
;
16733 offset_in_page
= start_offset
& PAGE_MASK
;
16734 start
= vm_map_trunc_page(start_offset
, PAGE_MASK
);
16735 end
= vm_map_round_page(end_offset
, PAGE_MASK
);
16737 assert ((end
- start
) <= MAX_PAGE_RANGE_QUERY
);
16739 vm_map_lock_read(map
);
16741 for (curr_s_offset
= start
; curr_s_offset
< end
;) {
16743 * New lookup needs reset of these variables.
16745 curr_object
= object
= VM_OBJECT_NULL
;
16746 offset_in_object
= 0;
16750 if (do_region_footprint
&&
16751 curr_s_offset
>= vm_map_last_entry(map
)->vme_end
) {
16752 ledger_amount_t nonvol_compressed
;
16755 * Request for "footprint" info about a page beyond
16756 * the end of address space: this must be for
16757 * the fake region vm_map_region_recurse_64()
16758 * reported to account for non-volatile purgeable
16759 * memory owned by this task.
16762 nonvol_compressed
= 0;
16763 ledger_get_balance(
16765 task_ledgers
.purgeable_nonvolatile_compressed
,
16766 &nonvol_compressed
);
16767 if (curr_s_offset
- vm_map_last_entry(map
)->vme_end
<=
16768 (unsigned) nonvol_compressed
) {
16770 * We haven't reported all the "non-volatile
16771 * compressed" pages yet, so report this fake
16772 * page as "compressed".
16774 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
16777 * We've reported all the non-volatile
16778 * compressed page but not all the non-volatile
16779 * pages , so report this fake page as
16780 * "resident dirty".
16782 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
16783 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
16784 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
16787 case VM_PAGE_INFO_BASIC
:
16788 basic_info
= (vm_page_info_basic_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
16789 basic_info
->disposition
= disposition
;
16790 basic_info
->ref_count
= 1;
16791 basic_info
->object_id
= INFO_MAKE_FAKE_OBJECT_ID(map
, task_ledgers
.purgeable_nonvolatile
);
16792 basic_info
->offset
= 0;
16793 basic_info
->depth
= 0;
16798 curr_s_offset
+= PAGE_SIZE
;
16803 * First, find the map entry covering "curr_s_offset", going down
16804 * submaps if necessary.
16806 if (!vm_map_lookup_entry(map
, curr_s_offset
, &map_entry
)) {
16807 /* no entry -> no object -> no page */
16809 if (curr_s_offset
< vm_map_min(map
)) {
16811 * Illegal address that falls below map min.
16813 curr_e_offset
= MIN(end
, vm_map_min(map
));
16815 } else if (curr_s_offset
>= vm_map_max(map
)) {
16817 * Illegal address that falls on/after map max.
16819 curr_e_offset
= end
;
16821 } else if (map_entry
== vm_map_to_entry(map
)) {
16825 if (map_entry
->vme_next
== vm_map_to_entry(map
)) {
16829 curr_e_offset
= MIN(map
->max_offset
, end
);
16832 * Hole at start of the map.
16834 curr_e_offset
= MIN(map_entry
->vme_next
->vme_start
, end
);
16837 if (map_entry
->vme_next
== vm_map_to_entry(map
)) {
16839 * Hole at the end of the map.
16841 curr_e_offset
= MIN(map
->max_offset
, end
);
16843 curr_e_offset
= MIN(map_entry
->vme_next
->vme_start
, end
);
16847 assert(curr_e_offset
>= curr_s_offset
);
16849 uint64_t num_pages
= (curr_e_offset
- curr_s_offset
) >> PAGE_SHIFT
;
16851 void *info_ptr
= (void*) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
16853 bzero(info_ptr
, num_pages
* sizeof(struct vm_page_info_basic
));
16855 curr_s_offset
= curr_e_offset
;
16857 info_idx
+= num_pages
;
16862 /* compute offset from this map entry's start */
16863 offset_in_object
= curr_s_offset
- map_entry
->vme_start
;
16865 /* compute offset into this map entry's object (or submap) */
16866 offset_in_object
+= VME_OFFSET(map_entry
);
16868 if (map_entry
->is_sub_map
) {
16869 vm_map_t sub_map
= VM_MAP_NULL
;
16870 vm_page_info_t submap_info
= 0;
16871 vm_map_offset_t submap_s_offset
= 0, submap_e_offset
= 0, range_len
= 0;
16873 range_len
= MIN(map_entry
->vme_end
, end
) - curr_s_offset
;
16875 submap_s_offset
= offset_in_object
;
16876 submap_e_offset
= submap_s_offset
+ range_len
;
16878 sub_map
= VME_SUBMAP(map_entry
);
16880 vm_map_reference(sub_map
);
16881 vm_map_unlock_read(map
);
16883 submap_info
= (vm_page_info_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
16885 retval
= vm_map_page_range_info_internal(sub_map
,
16888 VM_PAGE_INFO_BASIC
,
16889 (vm_page_info_t
) submap_info
,
16892 assert(retval
== KERN_SUCCESS
);
16894 vm_map_lock_read(map
);
16895 vm_map_deallocate(sub_map
);
16897 /* Move the "info" index by the number of pages we inspected.*/
16898 info_idx
+= range_len
>> PAGE_SHIFT
;
16900 /* Move our current offset by the size of the range we inspected.*/
16901 curr_s_offset
+= range_len
;
16906 object
= VME_OBJECT(map_entry
);
16907 if (object
== VM_OBJECT_NULL
) {
16910 * We don't have an object here and, hence,
16911 * no pages to inspect. We'll fill up the
16912 * info structure appropriately.
16915 curr_e_offset
= MIN(map_entry
->vme_end
, end
);
16917 uint64_t num_pages
= (curr_e_offset
- curr_s_offset
) >> PAGE_SHIFT
;
16919 void *info_ptr
= (void*) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
16921 bzero(info_ptr
, num_pages
* sizeof(struct vm_page_info_basic
));
16923 curr_s_offset
= curr_e_offset
;
16925 info_idx
+= num_pages
;
16930 if (do_region_footprint
) {
16935 if (map
->has_corpse_footprint
) {
16937 * Query the page info data we saved
16938 * while forking the corpse.
16940 vm_map_corpse_footprint_query_page_info(
16948 pmap_query_page_info(map
->pmap
,
16952 if (object
->purgable
== VM_PURGABLE_NONVOLATILE
&&
16953 /* && not tagged as no-footprint? */
16954 VM_OBJECT_OWNER(object
) != NULL
&&
16955 VM_OBJECT_OWNER(object
)->map
== map
) {
16956 if ((((curr_s_offset
16957 - map_entry
->vme_start
16958 + VME_OFFSET(map_entry
))
16960 (object
->resident_page_count
+
16961 vm_compressor_pager_get_count(object
->pager
)))) {
16963 * Non-volatile purgeable object owned
16964 * by this task: report the first
16965 * "#resident + #compressed" pages as
16966 * "resident" (to show that they
16967 * contribute to the footprint) but not
16968 * "dirty" (to avoid double-counting
16969 * with the fake "non-volatile" region
16970 * we'll report at the end of the
16971 * address space to account for all
16972 * (mapped or not) non-volatile memory
16973 * owned by this task.
16975 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
16977 } else if ((object
->purgable
== VM_PURGABLE_VOLATILE
||
16978 object
->purgable
== VM_PURGABLE_EMPTY
) &&
16979 /* && not tagged as no-footprint? */
16980 VM_OBJECT_OWNER(object
) != NULL
&&
16981 VM_OBJECT_OWNER(object
)->map
== map
) {
16982 if ((((curr_s_offset
16983 - map_entry
->vme_start
16984 + VME_OFFSET(map_entry
))
16986 object
->wired_page_count
)) {
16988 * Volatile|empty purgeable object owned
16989 * by this task: report the first
16990 * "#wired" pages as "resident" (to
16991 * show that they contribute to the
16992 * footprint) but not "dirty" (to avoid
16993 * double-counting with the fake
16994 * "non-volatile" region we'll report
16995 * at the end of the address space to
16996 * account for all (mapped or not)
16997 * non-volatile memory owned by this
17000 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
17002 } else if (map_entry
->iokit_acct
&&
17003 object
->internal
&&
17004 object
->purgable
== VM_PURGABLE_DENY
) {
17006 * Non-purgeable IOKit memory: phys_footprint
17007 * includes the entire virtual mapping.
17009 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
17010 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
17011 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
17012 } else if (pmap_disp
& (PMAP_QUERY_PAGE_ALTACCT
|
17013 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT
)) {
17014 /* alternate accounting */
17015 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
17016 if (map
->pmap
->footprint_was_suspended
||
17018 * XXX corpse does not know if original
17019 * pmap had its footprint suspended...
17021 map
->has_corpse_footprint
) {
17023 * The assertion below can fail if dyld
17024 * suspended footprint accounting
17025 * while doing some adjustments to
17026 * this page; the mapping would say
17027 * "use pmap accounting" but the page
17028 * would be marked "alternate
17032 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
17033 assertf(!map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
17036 if (pmap_disp
& PMAP_QUERY_PAGE_PRESENT
) {
17037 assertf(map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
17038 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
17039 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
17040 if (pmap_disp
& PMAP_QUERY_PAGE_INTERNAL
) {
17041 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
17043 disposition
|= VM_PAGE_QUERY_PAGE_EXTERNAL
;
17045 } else if (pmap_disp
& PMAP_QUERY_PAGE_COMPRESSED
) {
17046 assertf(map_entry
->use_pmap
, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset
, map_entry
);
17047 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
17051 case VM_PAGE_INFO_BASIC
:
17052 basic_info
= (vm_page_info_basic_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
17053 basic_info
->disposition
= disposition
;
17054 basic_info
->ref_count
= 1;
17055 basic_info
->object_id
= INFO_MAKE_FAKE_OBJECT_ID(map
, task_ledgers
.purgeable_nonvolatile
);
17056 basic_info
->offset
= 0;
17057 basic_info
->depth
= 0;
17062 curr_s_offset
+= PAGE_SIZE
;
17066 vm_object_reference(object
);
17068 * Shared mode -- so we can allow other readers
17069 * to grab the lock too.
17071 vm_object_lock_shared(object
);
17073 curr_e_offset
= MIN(map_entry
->vme_end
, end
);
17075 vm_map_unlock_read(map
);
17077 map_entry
= NULL
; /* map is unlocked, the entry is no longer valid. */
17079 curr_object
= object
;
17081 for (; curr_s_offset
< curr_e_offset
;) {
17083 if (object
== curr_object
) {
17084 ref_count
= curr_object
->ref_count
- 1; /* account for our object reference above. */
17086 ref_count
= curr_object
->ref_count
;
17089 curr_offset_in_object
= offset_in_object
;
17092 m
= vm_page_lookup(curr_object
, curr_offset_in_object
);
17094 if (m
!= VM_PAGE_NULL
) {
17096 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
17100 if (curr_object
->internal
&&
17101 curr_object
->alive
&&
17102 !curr_object
->terminating
&&
17103 curr_object
->pager_ready
) {
17105 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object
, curr_offset_in_object
)
17106 == VM_EXTERNAL_STATE_EXISTS
) {
17107 /* the pager has that page */
17108 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
17114 * Go down the VM object shadow chain until we find the page
17115 * we're looking for.
17118 if (curr_object
->shadow
!= VM_OBJECT_NULL
) {
17119 vm_object_t shadow
= VM_OBJECT_NULL
;
17121 curr_offset_in_object
+= curr_object
->vo_shadow_offset
;
17122 shadow
= curr_object
->shadow
;
17124 vm_object_lock_shared(shadow
);
17125 vm_object_unlock(curr_object
);
17127 curr_object
= shadow
;
17137 /* The ref_count is not strictly accurate, it measures the number */
17138 /* of entities holding a ref on the object, they may not be mapping */
17139 /* the object or may not be mapping the section holding the */
17140 /* target page but its still a ball park number and though an over- */
17141 /* count, it picks up the copy-on-write cases */
17143 /* We could also get a picture of page sharing from pmap_attributes */
17144 /* but this would under count as only faulted-in mappings would */
17147 if ((curr_object
== object
) && curr_object
->shadow
)
17148 disposition
|= VM_PAGE_QUERY_PAGE_COPIED
;
17150 if (! curr_object
->internal
)
17151 disposition
|= VM_PAGE_QUERY_PAGE_EXTERNAL
;
17153 if (m
!= VM_PAGE_NULL
) {
17155 if (m
->vmp_fictitious
) {
17157 disposition
|= VM_PAGE_QUERY_PAGE_FICTITIOUS
;
17160 if (m
->vmp_dirty
|| pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m
)))
17161 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
17163 if (m
->vmp_reference
|| pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m
)))
17164 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
17166 if (m
->vmp_q_state
== VM_PAGE_ON_SPECULATIVE_Q
)
17167 disposition
|= VM_PAGE_QUERY_PAGE_SPECULATIVE
;
17169 if (m
->vmp_cs_validated
)
17170 disposition
|= VM_PAGE_QUERY_PAGE_CS_VALIDATED
;
17171 if (m
->vmp_cs_tainted
)
17172 disposition
|= VM_PAGE_QUERY_PAGE_CS_TAINTED
;
17174 disposition
|= VM_PAGE_QUERY_PAGE_CS_NX
;
17179 case VM_PAGE_INFO_BASIC
:
17180 basic_info
= (vm_page_info_basic_t
) (((uintptr_t) info
) + (info_idx
* sizeof(struct vm_page_info_basic
)));
17181 basic_info
->disposition
= disposition
;
17182 basic_info
->ref_count
= ref_count
;
17183 basic_info
->object_id
= (vm_object_id_t
) (uintptr_t)
17184 VM_KERNEL_ADDRPERM(curr_object
);
17185 basic_info
->offset
=
17186 (memory_object_offset_t
) curr_offset_in_object
+ offset_in_page
;
17187 basic_info
->depth
= depth
;
17194 offset_in_page
= 0; // This doesn't really make sense for any offset other than the starting offset.
17197 * Move to next offset in the range and in our object.
17199 curr_s_offset
+= PAGE_SIZE
;
17200 offset_in_object
+= PAGE_SIZE
;
17201 curr_offset_in_object
= offset_in_object
;
17203 if (curr_object
!= object
) {
17205 vm_object_unlock(curr_object
);
17207 curr_object
= object
;
17209 vm_object_lock_shared(curr_object
);
17212 vm_object_lock_yield_shared(curr_object
);
17216 vm_object_unlock(curr_object
);
17217 vm_object_deallocate(curr_object
);
17219 vm_map_lock_read(map
);
17222 vm_map_unlock_read(map
);
17229 * Synchronises the memory range specified with its backing store
17230 * image by either flushing or cleaning the contents to the appropriate
17231 * memory manager engaging in a memory object synchronize dialog with
17232 * the manager. The client doesn't return until the manager issues
17233 * m_o_s_completed message. MIG Magically converts user task parameter
17234 * to the task's address map.
17236 * interpretation of sync_flags
17237 * VM_SYNC_INVALIDATE - discard pages, only return precious
17238 * pages to manager.
17240 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
17241 * - discard pages, write dirty or precious
17242 * pages back to memory manager.
17244 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
17245 * - write dirty or precious pages back to
17246 * the memory manager.
17248 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
17249 * is a hole in the region, and we would
17250 * have returned KERN_SUCCESS, return
17251 * KERN_INVALID_ADDRESS instead.
17254 * The memory object attributes have not yet been implemented, this
17255 * function will have to deal with the invalidate attribute
17258 * KERN_INVALID_TASK Bad task parameter
17259 * KERN_INVALID_ARGUMENT both sync and async were specified.
17260 * KERN_SUCCESS The usual.
17261 * KERN_INVALID_ADDRESS There was a hole in the region.
17267 vm_map_address_t address
,
17268 vm_map_size_t size
,
17269 vm_sync_t sync_flags
)
17271 vm_map_entry_t entry
;
17272 vm_map_size_t amount_left
;
17273 vm_object_offset_t offset
;
17274 boolean_t do_sync_req
;
17275 boolean_t had_hole
= FALSE
;
17276 vm_map_offset_t pmap_offset
;
17278 if ((sync_flags
& VM_SYNC_ASYNCHRONOUS
) &&
17279 (sync_flags
& VM_SYNC_SYNCHRONOUS
))
17280 return(KERN_INVALID_ARGUMENT
);
17283 * align address and size on page boundaries
17285 size
= (vm_map_round_page(address
+ size
,
17286 VM_MAP_PAGE_MASK(map
)) -
17287 vm_map_trunc_page(address
,
17288 VM_MAP_PAGE_MASK(map
)));
17289 address
= vm_map_trunc_page(address
,
17290 VM_MAP_PAGE_MASK(map
));
17292 if (map
== VM_MAP_NULL
)
17293 return(KERN_INVALID_TASK
);
17296 return(KERN_SUCCESS
);
17298 amount_left
= size
;
17300 while (amount_left
> 0) {
17301 vm_object_size_t flush_size
;
17302 vm_object_t object
;
17305 if (!vm_map_lookup_entry(map
,
17309 vm_map_size_t skip
;
17312 * hole in the address map.
17316 if (sync_flags
& VM_SYNC_KILLPAGES
) {
17318 * For VM_SYNC_KILLPAGES, there should be
17319 * no holes in the range, since we couldn't
17320 * prevent someone else from allocating in
17321 * that hole and we wouldn't want to "kill"
17324 vm_map_unlock(map
);
17329 * Check for empty map.
17331 if (entry
== vm_map_to_entry(map
) &&
17332 entry
->vme_next
== entry
) {
17333 vm_map_unlock(map
);
17337 * Check that we don't wrap and that
17338 * we have at least one real map entry.
17340 if ((map
->hdr
.nentries
== 0) ||
17341 (entry
->vme_next
->vme_start
< address
)) {
17342 vm_map_unlock(map
);
17346 * Move up to the next entry if needed
17348 skip
= (entry
->vme_next
->vme_start
- address
);
17349 if (skip
>= amount_left
)
17352 amount_left
-= skip
;
17353 address
= entry
->vme_next
->vme_start
;
17354 vm_map_unlock(map
);
17358 offset
= address
- entry
->vme_start
;
17359 pmap_offset
= address
;
17362 * do we have more to flush than is contained in this
17365 if (amount_left
+ entry
->vme_start
+ offset
> entry
->vme_end
) {
17366 flush_size
= entry
->vme_end
-
17367 (entry
->vme_start
+ offset
);
17369 flush_size
= amount_left
;
17371 amount_left
-= flush_size
;
17372 address
+= flush_size
;
17374 if (entry
->is_sub_map
== TRUE
) {
17375 vm_map_t local_map
;
17376 vm_map_offset_t local_offset
;
17378 local_map
= VME_SUBMAP(entry
);
17379 local_offset
= VME_OFFSET(entry
);
17380 vm_map_unlock(map
);
17385 sync_flags
) == KERN_INVALID_ADDRESS
) {
17390 object
= VME_OBJECT(entry
);
17393 * We can't sync this object if the object has not been
17396 if (object
== VM_OBJECT_NULL
) {
17397 vm_map_unlock(map
);
17400 offset
+= VME_OFFSET(entry
);
17402 vm_object_lock(object
);
17404 if (sync_flags
& (VM_SYNC_KILLPAGES
| VM_SYNC_DEACTIVATE
)) {
17405 int kill_pages
= 0;
17406 boolean_t reusable_pages
= FALSE
;
17408 if (sync_flags
& VM_SYNC_KILLPAGES
) {
17409 if (((object
->ref_count
== 1) ||
17410 ((object
->copy_strategy
!=
17411 MEMORY_OBJECT_COPY_SYMMETRIC
) &&
17412 (object
->copy
== VM_OBJECT_NULL
))) &&
17413 (object
->shadow
== VM_OBJECT_NULL
)) {
17414 if (object
->ref_count
!= 1) {
17415 vm_page_stats_reusable
.free_shared
++;
17422 if (kill_pages
!= -1)
17423 vm_object_deactivate_pages(
17426 (vm_object_size_t
) flush_size
,
17431 vm_object_unlock(object
);
17432 vm_map_unlock(map
);
17436 * We can't sync this object if there isn't a pager.
17437 * Don't bother to sync internal objects, since there can't
17438 * be any "permanent" storage for these objects anyway.
17440 if ((object
->pager
== MEMORY_OBJECT_NULL
) ||
17441 (object
->internal
) || (object
->private)) {
17442 vm_object_unlock(object
);
17443 vm_map_unlock(map
);
17447 * keep reference on the object until syncing is done
17449 vm_object_reference_locked(object
);
17450 vm_object_unlock(object
);
17452 vm_map_unlock(map
);
17454 do_sync_req
= vm_object_sync(object
,
17457 sync_flags
& VM_SYNC_INVALIDATE
,
17458 ((sync_flags
& VM_SYNC_SYNCHRONOUS
) ||
17459 (sync_flags
& VM_SYNC_ASYNCHRONOUS
)),
17460 sync_flags
& VM_SYNC_SYNCHRONOUS
);
17462 if ((sync_flags
& VM_SYNC_INVALIDATE
) && object
->resident_page_count
== 0) {
17464 * clear out the clustering and read-ahead hints
17466 vm_object_lock(object
);
17468 object
->pages_created
= 0;
17469 object
->pages_used
= 0;
17470 object
->sequential
= 0;
17471 object
->last_alloc
= 0;
17473 vm_object_unlock(object
);
17475 vm_object_deallocate(object
);
17478 /* for proper msync() behaviour */
17479 if (had_hole
== TRUE
&& (sync_flags
& VM_SYNC_CONTIGUOUS
))
17480 return(KERN_INVALID_ADDRESS
);
17482 return(KERN_SUCCESS
);
17486 * Routine: convert_port_entry_to_map
17488 * Convert from a port specifying an entry or a task
17489 * to a map. Doesn't consume the port ref; produces a map ref,
17490 * which may be null. Unlike convert_port_to_map, the
17491 * port may be task or a named entry backed.
17498 convert_port_entry_to_map(
17502 vm_named_entry_t named_entry
;
17503 uint32_t try_failed_count
= 0;
17505 if(IP_VALID(port
) && (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
17508 if(ip_active(port
) && (ip_kotype(port
)
17509 == IKOT_NAMED_ENTRY
)) {
17511 (vm_named_entry_t
)port
->ip_kobject
;
17512 if (!(lck_mtx_try_lock(&(named_entry
)->Lock
))) {
17515 try_failed_count
++;
17516 mutex_pause(try_failed_count
);
17519 named_entry
->ref_count
++;
17520 lck_mtx_unlock(&(named_entry
)->Lock
);
17522 if ((named_entry
->is_sub_map
) &&
17523 (named_entry
->protection
17524 & VM_PROT_WRITE
)) {
17525 map
= named_entry
->backing
.map
;
17527 mach_destroy_memory_entry(port
);
17528 return VM_MAP_NULL
;
17530 vm_map_reference_swap(map
);
17531 mach_destroy_memory_entry(port
);
17535 return VM_MAP_NULL
;
17539 map
= convert_port_to_map(port
);
17545 * Routine: convert_port_entry_to_object
17547 * Convert from a port specifying a named entry to an
17548 * object. Doesn't consume the port ref; produces a map ref,
17549 * which may be null.
17556 convert_port_entry_to_object(
17559 vm_object_t object
= VM_OBJECT_NULL
;
17560 vm_named_entry_t named_entry
;
17561 uint32_t try_failed_count
= 0;
17563 if (IP_VALID(port
) &&
17564 (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
17567 if (ip_active(port
) &&
17568 (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
17569 named_entry
= (vm_named_entry_t
)port
->ip_kobject
;
17570 if (!(lck_mtx_try_lock(&(named_entry
)->Lock
))) {
17572 try_failed_count
++;
17573 mutex_pause(try_failed_count
);
17576 named_entry
->ref_count
++;
17577 lck_mtx_unlock(&(named_entry
)->Lock
);
17579 if (!(named_entry
->is_sub_map
) &&
17580 !(named_entry
->is_copy
) &&
17581 (named_entry
->protection
& VM_PROT_WRITE
)) {
17582 object
= named_entry
->backing
.object
;
17583 vm_object_reference(object
);
17585 mach_destroy_memory_entry(port
);
17593 * Export routines to other components for the things we access locally through
17600 return (current_map_fast());
17604 * vm_map_reference:
17606 * Most code internal to the osfmk will go through a
17607 * macro defining this. This is always here for the
17608 * use of other kernel components.
17610 #undef vm_map_reference
17615 if (map
== VM_MAP_NULL
)
17618 lck_mtx_lock(&map
->s_lock
);
17620 assert(map
->res_count
> 0);
17621 assert(map
->map_refcnt
>= map
->res_count
);
17625 lck_mtx_unlock(&map
->s_lock
);
17629 * vm_map_deallocate:
17631 * Removes a reference from the specified map,
17632 * destroying it if no references remain.
17633 * The map should not be locked.
17641 if (map
== VM_MAP_NULL
)
17644 lck_mtx_lock(&map
->s_lock
);
17645 ref
= --map
->map_refcnt
;
17647 vm_map_res_deallocate(map
);
17648 lck_mtx_unlock(&map
->s_lock
);
17651 assert(map
->map_refcnt
== 0);
17652 lck_mtx_unlock(&map
->s_lock
);
17656 * The map residence count isn't decremented here because
17657 * the vm_map_delete below will traverse the entire map,
17658 * deleting entries, and the residence counts on objects
17659 * and sharing maps will go away then.
17663 vm_map_destroy(map
, VM_MAP_REMOVE_NO_FLAGS
);
17668 vm_map_disable_NX(vm_map_t map
)
17672 if (map
->pmap
== NULL
)
17675 pmap_disable_NX(map
->pmap
);
17679 vm_map_disallow_data_exec(vm_map_t map
)
17684 map
->map_disallow_data_exec
= TRUE
;
17687 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
17688 * more descriptive.
17691 vm_map_set_32bit(vm_map_t map
)
17693 #if defined(__arm__) || defined(__arm64__)
17694 map
->max_offset
= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_DEVICE
);
17696 map
->max_offset
= (vm_map_offset_t
)VM_MAX_ADDRESS
;
17702 vm_map_set_64bit(vm_map_t map
)
17704 #if defined(__arm__) || defined(__arm64__)
17705 map
->max_offset
= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_DEVICE
);
17707 map
->max_offset
= (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
;
17712 * Expand the maximum size of an existing map to the maximum supported.
17715 vm_map_set_jumbo(vm_map_t map
)
17717 #if defined (__arm64__)
17718 vm_map_set_max_addr(map
, ~0);
17725 * Expand the maximum size of an existing map.
17728 vm_map_set_max_addr(vm_map_t map
, vm_map_offset_t new_max_offset
)
17730 #if defined(__arm64__)
17731 vm_map_offset_t max_supported_offset
= 0;
17732 vm_map_offset_t old_max_offset
= map
->max_offset
;
17733 max_supported_offset
= pmap_max_offset(vm_map_is_64bit(map
), ARM_PMAP_MAX_OFFSET_JUMBO
);
17735 new_max_offset
= trunc_page(new_max_offset
);
17737 /* The address space cannot be shrunk using this routine. */
17738 if (old_max_offset
>= new_max_offset
) {
17742 if (max_supported_offset
< new_max_offset
) {
17743 new_max_offset
= max_supported_offset
;
17746 map
->max_offset
= new_max_offset
;
17748 if (map
->holes_list
->prev
->vme_end
== old_max_offset
) {
17750 * There is already a hole at the end of the map; simply make it bigger.
17752 map
->holes_list
->prev
->vme_end
= map
->max_offset
;
17755 * There is no hole at the end, so we need to create a new hole
17756 * for the new empty space we're creating.
17758 struct vm_map_links
*new_hole
= zalloc(vm_map_holes_zone
);
17759 new_hole
->start
= old_max_offset
;
17760 new_hole
->end
= map
->max_offset
;
17761 new_hole
->prev
= map
->holes_list
->prev
;
17762 new_hole
->next
= (struct vm_map_entry
*)map
->holes_list
;
17763 map
->holes_list
->prev
->links
.next
= (struct vm_map_entry
*)new_hole
;
17764 map
->holes_list
->prev
= (struct vm_map_entry
*)new_hole
;
17768 (void)new_max_offset
;
17773 vm_compute_max_offset(boolean_t is64
)
17775 #if defined(__arm__) || defined(__arm64__)
17776 return (pmap_max_offset(is64
, ARM_PMAP_MAX_OFFSET_DEVICE
));
17778 return (is64
? (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
: (vm_map_offset_t
)VM_MAX_ADDRESS
);
17783 vm_map_get_max_aslr_slide_section(
17784 vm_map_t map __unused
,
17785 int64_t *max_sections
,
17786 int64_t *section_size
)
17788 #if defined(__arm64__)
17790 *section_size
= ARM_TT_TWIG_SIZE
;
17798 vm_map_get_max_aslr_slide_pages(vm_map_t map
)
17800 #if defined(__arm64__)
17801 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
17802 * limited embedded address space; this is also meant to minimize pmap
17803 * memory usage on 16KB page systems.
17805 return (1 << (24 - VM_MAP_PAGE_SHIFT(map
)));
17807 return (1 << (vm_map_is_64bit(map
) ? 16 : 8));
17812 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map
)
17814 #if defined(__arm64__)
17815 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
17816 * of independent entropy on 16KB page systems.
17818 return (1 << (22 - VM_MAP_PAGE_SHIFT(map
)));
17820 return (1 << (vm_map_is_64bit(map
) ? 16 : 8));
17829 return map
->max_offset
> ((vm_map_offset_t
)VM_MAX_ADDRESS
);
17834 vm_map_has_hard_pagezero(
17836 vm_map_offset_t pagezero_size
)
17840 * We should lock the VM map (for read) here but we can get away
17841 * with it for now because there can't really be any race condition:
17842 * the VM map's min_offset is changed only when the VM map is created
17843 * and when the zero page is established (when the binary gets loaded),
17844 * and this routine gets called only when the task terminates and the
17845 * VM map is being torn down, and when a new map is created via
17846 * load_machfile()/execve().
17848 return (map
->min_offset
>= pagezero_size
);
17852 * Raise a VM map's maximun offset.
17855 vm_map_raise_max_offset(
17857 vm_map_offset_t new_max_offset
)
17862 ret
= KERN_INVALID_ADDRESS
;
17864 if (new_max_offset
>= map
->max_offset
) {
17865 if (!vm_map_is_64bit(map
)) {
17866 if (new_max_offset
<= (vm_map_offset_t
)VM_MAX_ADDRESS
) {
17867 map
->max_offset
= new_max_offset
;
17868 ret
= KERN_SUCCESS
;
17871 if (new_max_offset
<= (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
) {
17872 map
->max_offset
= new_max_offset
;
17873 ret
= KERN_SUCCESS
;
17878 vm_map_unlock(map
);
17884 * Raise a VM map's minimum offset.
17885 * To strictly enforce "page zero" reservation.
17888 vm_map_raise_min_offset(
17890 vm_map_offset_t new_min_offset
)
17892 vm_map_entry_t first_entry
;
17894 new_min_offset
= vm_map_round_page(new_min_offset
,
17895 VM_MAP_PAGE_MASK(map
));
17899 if (new_min_offset
< map
->min_offset
) {
17901 * Can't move min_offset backwards, as that would expose
17902 * a part of the address space that was previously, and for
17903 * possibly good reasons, inaccessible.
17905 vm_map_unlock(map
);
17906 return KERN_INVALID_ADDRESS
;
17908 if (new_min_offset
>= map
->max_offset
) {
17909 /* can't go beyond the end of the address space */
17910 vm_map_unlock(map
);
17911 return KERN_INVALID_ADDRESS
;
17914 first_entry
= vm_map_first_entry(map
);
17915 if (first_entry
!= vm_map_to_entry(map
) &&
17916 first_entry
->vme_start
< new_min_offset
) {
17918 * Some memory was already allocated below the new
17919 * minimun offset. It's too late to change it now...
17921 vm_map_unlock(map
);
17922 return KERN_NO_SPACE
;
17925 map
->min_offset
= new_min_offset
;
17927 assert(map
->holes_list
);
17928 map
->holes_list
->start
= new_min_offset
;
17929 assert(new_min_offset
< map
->holes_list
->end
);
17931 vm_map_unlock(map
);
17933 return KERN_SUCCESS
;
17937 * Set the limit on the maximum amount of user wired memory allowed for this map.
17938 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
17939 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
17940 * don't have to reach over to the BSD data structures.
17944 vm_map_set_user_wire_limit(vm_map_t map
,
17947 map
->user_wire_limit
= limit
;
17951 void vm_map_switch_protect(vm_map_t map
,
17955 map
->switch_protect
=val
;
17956 vm_map_unlock(map
);
17960 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
17961 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
17962 * bump both counters.
17965 vm_map_iokit_mapped_region(vm_map_t map
, vm_size_t bytes
)
17967 pmap_t pmap
= vm_map_pmap(map
);
17969 ledger_credit(pmap
->ledger
, task_ledgers
.iokit_mapped
, bytes
);
17970 ledger_credit(pmap
->ledger
, task_ledgers
.phys_footprint
, bytes
);
17974 vm_map_iokit_unmapped_region(vm_map_t map
, vm_size_t bytes
)
17976 pmap_t pmap
= vm_map_pmap(map
);
17978 ledger_debit(pmap
->ledger
, task_ledgers
.iokit_mapped
, bytes
);
17979 ledger_debit(pmap
->ledger
, task_ledgers
.phys_footprint
, bytes
);
17982 /* Add (generate) code signature for memory range */
17983 #if CONFIG_DYNAMIC_CODE_SIGNING
17984 kern_return_t
vm_map_sign(vm_map_t map
,
17985 vm_map_offset_t start
,
17986 vm_map_offset_t end
)
17988 vm_map_entry_t entry
;
17990 vm_object_t object
;
17993 * Vet all the input parameters and current type and state of the
17994 * underlaying object. Return with an error if anything is amiss.
17996 if (map
== VM_MAP_NULL
)
17997 return(KERN_INVALID_ARGUMENT
);
17999 vm_map_lock_read(map
);
18001 if (!vm_map_lookup_entry(map
, start
, &entry
) || entry
->is_sub_map
) {
18003 * Must pass a valid non-submap address.
18005 vm_map_unlock_read(map
);
18006 return(KERN_INVALID_ADDRESS
);
18009 if((entry
->vme_start
> start
) || (entry
->vme_end
< end
)) {
18011 * Map entry doesn't cover the requested range. Not handling
18012 * this situation currently.
18014 vm_map_unlock_read(map
);
18015 return(KERN_INVALID_ARGUMENT
);
18018 object
= VME_OBJECT(entry
);
18019 if (object
== VM_OBJECT_NULL
) {
18021 * Object must already be present or we can't sign.
18023 vm_map_unlock_read(map
);
18024 return KERN_INVALID_ARGUMENT
;
18027 vm_object_lock(object
);
18028 vm_map_unlock_read(map
);
18030 while(start
< end
) {
18033 m
= vm_page_lookup(object
,
18034 start
- entry
->vme_start
+ VME_OFFSET(entry
));
18035 if (m
==VM_PAGE_NULL
) {
18036 /* shoud we try to fault a page here? we can probably
18037 * demand it exists and is locked for this request */
18038 vm_object_unlock(object
);
18039 return KERN_FAILURE
;
18041 /* deal with special page status */
18043 (m
->vmp_unusual
&& (m
->vmp_error
|| m
->vmp_restart
|| m
->vmp_private
|| m
->vmp_absent
))) {
18044 vm_object_unlock(object
);
18045 return KERN_FAILURE
;
18048 /* Page is OK... now "validate" it */
18049 /* This is the place where we'll call out to create a code
18050 * directory, later */
18051 m
->vmp_cs_validated
= TRUE
;
18053 /* The page is now "clean" for codesigning purposes. That means
18054 * we don't consider it as modified (wpmapped) anymore. But
18055 * we'll disconnect the page so we note any future modification
18057 m
->vmp_wpmapped
= FALSE
;
18058 refmod
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
));
18060 /* Pull the dirty status from the pmap, since we cleared the
18062 if ((refmod
& VM_MEM_MODIFIED
) && !m
->vmp_dirty
) {
18063 SET_PAGE_DIRTY(m
, FALSE
);
18066 /* On to the next page */
18067 start
+= PAGE_SIZE
;
18069 vm_object_unlock(object
);
18071 return KERN_SUCCESS
;
18075 kern_return_t
vm_map_partial_reap(vm_map_t map
, unsigned int *reclaimed_resident
, unsigned int *reclaimed_compressed
)
18077 vm_map_entry_t entry
= VM_MAP_ENTRY_NULL
;
18078 vm_map_entry_t next_entry
;
18079 kern_return_t kr
= KERN_SUCCESS
;
18085 * We use a "zap_map" to avoid having to unlock
18086 * the "map" in vm_map_delete().
18088 zap_map
= vm_map_create(PMAP_NULL
,
18091 map
->hdr
.entries_pageable
);
18093 if (zap_map
== VM_MAP_NULL
) {
18094 return KERN_RESOURCE_SHORTAGE
;
18097 vm_map_set_page_shift(zap_map
,
18098 VM_MAP_PAGE_SHIFT(map
));
18099 vm_map_disable_hole_optimization(zap_map
);
18101 for (entry
= vm_map_first_entry(map
);
18102 entry
!= vm_map_to_entry(map
);
18103 entry
= next_entry
) {
18104 next_entry
= entry
->vme_next
;
18106 if (VME_OBJECT(entry
) &&
18107 !entry
->is_sub_map
&&
18108 (VME_OBJECT(entry
)->internal
== TRUE
) &&
18109 (VME_OBJECT(entry
)->ref_count
== 1)) {
18111 *reclaimed_resident
+= VME_OBJECT(entry
)->resident_page_count
;
18112 *reclaimed_compressed
+= vm_compressor_pager_get_count(VME_OBJECT(entry
)->pager
);
18114 (void)vm_map_delete(map
,
18117 VM_MAP_REMOVE_SAVE_ENTRIES
,
18122 vm_map_unlock(map
);
18125 * Get rid of the "zap_maps" and all the map entries that
18126 * they may still contain.
18128 if (zap_map
!= VM_MAP_NULL
) {
18129 vm_map_destroy(zap_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
18130 zap_map
= VM_MAP_NULL
;
18137 #if DEVELOPMENT || DEBUG
18140 vm_map_disconnect_page_mappings(
18142 boolean_t do_unnest
)
18144 vm_map_entry_t entry
;
18145 int page_count
= 0;
18147 if (do_unnest
== TRUE
) {
18148 #ifndef NO_NESTED_PMAP
18151 for (entry
= vm_map_first_entry(map
);
18152 entry
!= vm_map_to_entry(map
);
18153 entry
= entry
->vme_next
) {
18155 if (entry
->is_sub_map
&& entry
->use_pmap
) {
18157 * Make sure the range between the start of this entry and
18158 * the end of this entry is no longer nested, so that
18159 * we will only remove mappings from the pmap in use by this
18162 vm_map_clip_unnest(map
, entry
, entry
->vme_start
, entry
->vme_end
);
18165 vm_map_unlock(map
);
18168 vm_map_lock_read(map
);
18170 page_count
= map
->pmap
->stats
.resident_count
;
18172 for (entry
= vm_map_first_entry(map
);
18173 entry
!= vm_map_to_entry(map
);
18174 entry
= entry
->vme_next
) {
18176 if (!entry
->is_sub_map
&& ((VME_OBJECT(entry
) == 0) ||
18177 (VME_OBJECT(entry
)->phys_contiguous
))) {
18180 if (entry
->is_sub_map
)
18181 assert(!entry
->use_pmap
);
18183 pmap_remove_options(map
->pmap
, entry
->vme_start
, entry
->vme_end
, 0);
18185 vm_map_unlock_read(map
);
18196 int c_freezer_swapout_page_count
;
18197 int c_freezer_compression_count
= 0;
18198 AbsoluteTime c_freezer_last_yield_ts
= 0;
18200 extern unsigned int memorystatus_freeze_private_shared_pages_ratio
;
18201 extern unsigned int memorystatus_freeze_shared_mb_per_process_max
;
18206 unsigned int *purgeable_count
,
18207 unsigned int *wired_count
,
18208 unsigned int *clean_count
,
18209 unsigned int *dirty_count
,
18210 __unused
unsigned int dirty_budget
,
18211 unsigned int *shared_count
,
18212 int *freezer_error_code
,
18213 boolean_t eval_only
)
18215 vm_map_entry_t entry2
= VM_MAP_ENTRY_NULL
;
18216 kern_return_t kr
= KERN_SUCCESS
;
18217 boolean_t evaluation_phase
= TRUE
;
18218 vm_object_t cur_shared_object
= NULL
;
18219 int cur_shared_obj_ref_cnt
= 0;
18220 unsigned int dirty_private_count
= 0, dirty_shared_count
= 0, obj_pages_snapshot
= 0;
18222 *purgeable_count
= *wired_count
= *clean_count
= *dirty_count
= *shared_count
= 0;
18225 * We need the exclusive lock here so that we can
18226 * block any page faults or lookups while we are
18227 * in the middle of freezing this vm map.
18231 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT
);
18233 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18234 if (vm_compressor_low_on_space()) {
18235 *freezer_error_code
= FREEZER_ERROR_NO_COMPRESSOR_SPACE
;
18238 if (vm_swap_low_on_space()) {
18239 *freezer_error_code
= FREEZER_ERROR_NO_SWAP_SPACE
;
18242 kr
= KERN_NO_SPACE
;
18246 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
== FALSE
) {
18248 * In-memory compressor backing the freezer. No disk.
18249 * So no need to do the evaluation phase.
18251 evaluation_phase
= FALSE
;
18253 if (eval_only
== TRUE
) {
18255 * We don't support 'eval_only' mode
18256 * in this non-swap config.
18258 *freezer_error_code
= FREEZER_ERROR_GENERIC
;
18259 kr
= KERN_INVALID_ARGUMENT
;
18263 c_freezer_compression_count
= 0;
18264 clock_get_uptime(&c_freezer_last_yield_ts
);
18268 for (entry2
= vm_map_first_entry(map
);
18269 entry2
!= vm_map_to_entry(map
);
18270 entry2
= entry2
->vme_next
) {
18272 vm_object_t src_object
= VME_OBJECT(entry2
);
18275 !entry2
->is_sub_map
&&
18276 !src_object
->phys_contiguous
) {
18277 /* If eligible, scan the entry, moving eligible pages over to our parent object */
18279 if (src_object
->internal
== TRUE
) {
18281 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
) {
18283 * Pages belonging to this object could be swapped to disk.
18284 * Make sure it's not a shared object because we could end
18285 * up just bringing it back in again.
18287 * We try to optimize somewhat by checking for objects that are mapped
18288 * more than once within our own map. But we don't do full searches,
18289 * we just look at the entries following our current entry.
18291 if (src_object
->ref_count
> 1) {
18292 if (src_object
!= cur_shared_object
) {
18293 obj_pages_snapshot
= (src_object
->resident_page_count
- src_object
->wired_page_count
) + vm_compressor_pager_get_count(src_object
->pager
);
18294 dirty_shared_count
+= obj_pages_snapshot
;
18296 cur_shared_object
= src_object
;
18297 cur_shared_obj_ref_cnt
= 1;
18300 cur_shared_obj_ref_cnt
++;
18301 if (src_object
->ref_count
== cur_shared_obj_ref_cnt
) {
18303 * Fall through to below and treat this object as private.
18304 * So deduct its pages from our shared total and add it to the
18308 dirty_shared_count
-= obj_pages_snapshot
;
18309 dirty_private_count
+= obj_pages_snapshot
;
18317 if (src_object
->ref_count
== 1) {
18318 dirty_private_count
+= (src_object
->resident_page_count
- src_object
->wired_page_count
) + vm_compressor_pager_get_count(src_object
->pager
);
18321 if (evaluation_phase
== TRUE
) {
18327 vm_object_compressed_freezer_pageout(src_object
);
18329 *wired_count
+= src_object
->wired_page_count
;
18331 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18332 if (vm_compressor_low_on_space()) {
18333 *freezer_error_code
= FREEZER_ERROR_NO_COMPRESSOR_SPACE
;
18336 if (vm_swap_low_on_space()) {
18337 *freezer_error_code
= FREEZER_ERROR_NO_SWAP_SPACE
;
18340 kr
= KERN_NO_SPACE
;
18347 if (evaluation_phase
) {
18349 unsigned int shared_pages_threshold
= (memorystatus_freeze_shared_mb_per_process_max
* 1024 * 1024ULL) / PAGE_SIZE_64
;
18351 if (dirty_shared_count
> shared_pages_threshold
) {
18352 *freezer_error_code
= FREEZER_ERROR_EXCESS_SHARED_MEMORY
;
18357 if (dirty_shared_count
&&
18358 ((dirty_private_count
/ dirty_shared_count
) < memorystatus_freeze_private_shared_pages_ratio
)) {
18359 *freezer_error_code
= FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO
;
18364 evaluation_phase
= FALSE
;
18365 dirty_shared_count
= dirty_private_count
= 0;
18367 c_freezer_compression_count
= 0;
18368 clock_get_uptime(&c_freezer_last_yield_ts
);
18380 *shared_count
= (unsigned int) ((dirty_shared_count
* PAGE_SIZE_64
) / (1024 * 1024ULL));
18384 vm_map_unlock(map
);
18386 if ((eval_only
== FALSE
) && (kr
== KERN_SUCCESS
)) {
18387 vm_object_compressed_freezer_done();
18389 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
) {
18391 * reset the counter tracking the # of swapped compressed pages
18392 * because we are now done with this freeze session and task.
18395 *dirty_count
= c_freezer_swapout_page_count
; //used to track pageouts
18396 c_freezer_swapout_page_count
= 0;
18405 * vm_map_entry_should_cow_for_true_share:
18407 * Determines if the map entry should be clipped and setup for copy-on-write
18408 * to avoid applying "true_share" to a large VM object when only a subset is
18411 * For now, we target only the map entries created for the Objective C
18412 * Garbage Collector, which initially have the following properties:
18413 * - alias == VM_MEMORY_MALLOC
18414 * - wired_count == 0
18416 * and a VM object with:
18418 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
18420 * - vo_size == ANON_CHUNK_SIZE
18422 * Only non-kernel map entries.
18425 vm_map_entry_should_cow_for_true_share(
18426 vm_map_entry_t entry
)
18428 vm_object_t object
;
18430 if (entry
->is_sub_map
) {
18431 /* entry does not point at a VM object */
18435 if (entry
->needs_copy
) {
18436 /* already set for copy_on_write: done! */
18440 if (VME_ALIAS(entry
) != VM_MEMORY_MALLOC
&&
18441 VME_ALIAS(entry
) != VM_MEMORY_MALLOC_SMALL
) {
18442 /* not a malloc heap or Obj-C Garbage Collector heap */
18446 if (entry
->wired_count
) {
18447 /* wired: can't change the map entry... */
18448 vm_counters
.should_cow_but_wired
++;
18452 object
= VME_OBJECT(entry
);
18454 if (object
== VM_OBJECT_NULL
) {
18455 /* no object yet... */
18459 if (!object
->internal
) {
18460 /* not an internal object */
18464 if (object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) {
18465 /* not the default copy strategy */
18469 if (object
->true_share
) {
18470 /* already true_share: too late to avoid it */
18474 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC
&&
18475 object
->vo_size
!= ANON_CHUNK_SIZE
) {
18476 /* ... not an object created for the ObjC Garbage Collector */
18480 if (VME_ALIAS(entry
) == VM_MEMORY_MALLOC_SMALL
&&
18481 object
->vo_size
!= 2048 * 4096) {
18482 /* ... not a "MALLOC_SMALL" heap */
18487 * All the criteria match: we have a large object being targeted for "true_share".
18488 * To limit the adverse side-effects linked with "true_share", tell the caller to
18489 * try and avoid setting up the entire object for "true_share" by clipping the
18490 * targeted range and setting it up for copy-on-write.
18496 vm_map_round_page_mask(
18497 vm_map_offset_t offset
,
18498 vm_map_offset_t mask
)
18500 return VM_MAP_ROUND_PAGE(offset
, mask
);
18504 vm_map_trunc_page_mask(
18505 vm_map_offset_t offset
,
18506 vm_map_offset_t mask
)
18508 return VM_MAP_TRUNC_PAGE(offset
, mask
);
18512 vm_map_page_aligned(
18513 vm_map_offset_t offset
,
18514 vm_map_offset_t mask
)
18516 return ((offset
) & mask
) == 0;
18523 return VM_MAP_PAGE_SHIFT(map
);
18530 return VM_MAP_PAGE_SIZE(map
);
18537 return VM_MAP_PAGE_MASK(map
);
18541 vm_map_set_page_shift(
18545 if (map
->hdr
.nentries
!= 0) {
18546 /* too late to change page size */
18547 return KERN_FAILURE
;
18550 map
->hdr
.page_shift
= pageshift
;
18552 return KERN_SUCCESS
;
18556 vm_map_query_volatile(
18558 mach_vm_size_t
*volatile_virtual_size_p
,
18559 mach_vm_size_t
*volatile_resident_size_p
,
18560 mach_vm_size_t
*volatile_compressed_size_p
,
18561 mach_vm_size_t
*volatile_pmap_size_p
,
18562 mach_vm_size_t
*volatile_compressed_pmap_size_p
)
18564 mach_vm_size_t volatile_virtual_size
;
18565 mach_vm_size_t volatile_resident_count
;
18566 mach_vm_size_t volatile_compressed_count
;
18567 mach_vm_size_t volatile_pmap_count
;
18568 mach_vm_size_t volatile_compressed_pmap_count
;
18569 mach_vm_size_t resident_count
;
18570 vm_map_entry_t entry
;
18571 vm_object_t object
;
18573 /* map should be locked by caller */
18575 volatile_virtual_size
= 0;
18576 volatile_resident_count
= 0;
18577 volatile_compressed_count
= 0;
18578 volatile_pmap_count
= 0;
18579 volatile_compressed_pmap_count
= 0;
18581 for (entry
= vm_map_first_entry(map
);
18582 entry
!= vm_map_to_entry(map
);
18583 entry
= entry
->vme_next
) {
18584 mach_vm_size_t pmap_resident_bytes
, pmap_compressed_bytes
;
18586 if (entry
->is_sub_map
) {
18589 if (! (entry
->protection
& VM_PROT_WRITE
)) {
18592 object
= VME_OBJECT(entry
);
18593 if (object
== VM_OBJECT_NULL
) {
18596 if (object
->purgable
!= VM_PURGABLE_VOLATILE
&&
18597 object
->purgable
!= VM_PURGABLE_EMPTY
) {
18600 if (VME_OFFSET(entry
)) {
18602 * If the map entry has been split and the object now
18603 * appears several times in the VM map, we don't want
18604 * to count the object's resident_page_count more than
18605 * once. We count it only for the first one, starting
18606 * at offset 0 and ignore the other VM map entries.
18610 resident_count
= object
->resident_page_count
;
18611 if ((VME_OFFSET(entry
) / PAGE_SIZE
) >= resident_count
) {
18612 resident_count
= 0;
18614 resident_count
-= (VME_OFFSET(entry
) / PAGE_SIZE
);
18617 volatile_virtual_size
+= entry
->vme_end
- entry
->vme_start
;
18618 volatile_resident_count
+= resident_count
;
18619 if (object
->pager
) {
18620 volatile_compressed_count
+=
18621 vm_compressor_pager_get_count(object
->pager
);
18623 pmap_compressed_bytes
= 0;
18624 pmap_resident_bytes
=
18625 pmap_query_resident(map
->pmap
,
18628 &pmap_compressed_bytes
);
18629 volatile_pmap_count
+= (pmap_resident_bytes
/ PAGE_SIZE
);
18630 volatile_compressed_pmap_count
+= (pmap_compressed_bytes
18634 /* map is still locked on return */
18636 *volatile_virtual_size_p
= volatile_virtual_size
;
18637 *volatile_resident_size_p
= volatile_resident_count
* PAGE_SIZE
;
18638 *volatile_compressed_size_p
= volatile_compressed_count
* PAGE_SIZE
;
18639 *volatile_pmap_size_p
= volatile_pmap_count
* PAGE_SIZE
;
18640 *volatile_compressed_pmap_size_p
= volatile_compressed_pmap_count
* PAGE_SIZE
;
18642 return KERN_SUCCESS
;
18646 vm_map_sizes(vm_map_t map
,
18647 vm_map_size_t
* psize
,
18648 vm_map_size_t
* pfree
,
18649 vm_map_size_t
* plargest_free
)
18651 vm_map_entry_t entry
;
18652 vm_map_offset_t prev
;
18653 vm_map_size_t free
, total_free
, largest_free
;
18658 *psize
= *pfree
= *plargest_free
= 0;
18661 total_free
= largest_free
= 0;
18663 vm_map_lock_read(map
);
18664 if (psize
) *psize
= map
->max_offset
- map
->min_offset
;
18666 prev
= map
->min_offset
;
18667 for (entry
= vm_map_first_entry(map
);; entry
= entry
->vme_next
)
18669 end
= (entry
== vm_map_to_entry(map
));
18671 if (end
) free
= entry
->vme_end
- prev
;
18672 else free
= entry
->vme_start
- prev
;
18674 total_free
+= free
;
18675 if (free
> largest_free
) largest_free
= free
;
18678 prev
= entry
->vme_end
;
18680 vm_map_unlock_read(map
);
18681 if (pfree
) *pfree
= total_free
;
18682 if (plargest_free
) *plargest_free
= largest_free
;
18685 #if VM_SCAN_FOR_SHADOW_CHAIN
18686 int vm_map_shadow_max(vm_map_t map
);
18687 int vm_map_shadow_max(
18690 int shadows
, shadows_max
;
18691 vm_map_entry_t entry
;
18692 vm_object_t object
, next_object
;
18699 vm_map_lock_read(map
);
18701 for (entry
= vm_map_first_entry(map
);
18702 entry
!= vm_map_to_entry(map
);
18703 entry
= entry
->vme_next
) {
18704 if (entry
->is_sub_map
) {
18707 object
= VME_OBJECT(entry
);
18708 if (object
== NULL
) {
18711 vm_object_lock_shared(object
);
18713 object
->shadow
!= NULL
;
18714 shadows
++, object
= next_object
) {
18715 next_object
= object
->shadow
;
18716 vm_object_lock_shared(next_object
);
18717 vm_object_unlock(object
);
18719 vm_object_unlock(object
);
18720 if (shadows
> shadows_max
) {
18721 shadows_max
= shadows
;
18725 vm_map_unlock_read(map
);
18727 return shadows_max
;
18729 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
18731 void vm_commit_pagezero_status(vm_map_t lmap
) {
18732 pmap_advise_pagezero_range(lmap
->pmap
, lmap
->min_offset
);
18737 vm_map_set_high_start(
18739 vm_map_offset_t high_start
)
18741 map
->vmmap_high_start
= high_start
;
18743 #endif /* __x86_64__ */
18747 vm_map_entry_cs_associate(
18749 vm_map_entry_t entry
,
18750 vm_map_kernel_flags_t vmk_flags
)
18752 vm_object_t cs_object
, cs_shadow
;
18753 vm_object_offset_t cs_offset
;
18755 struct vnode
*cs_vnode
;
18756 kern_return_t cs_ret
;
18758 if (map
->pmap
== NULL
||
18759 entry
->is_sub_map
|| /* XXX FBDP: recurse on sub-range? */
18760 VME_OBJECT(entry
) == VM_OBJECT_NULL
||
18761 ! (entry
->protection
& VM_PROT_EXECUTE
)) {
18762 return KERN_SUCCESS
;
18765 vm_map_lock_assert_exclusive(map
);
18767 if (entry
->used_for_jit
) {
18768 cs_ret
= pmap_cs_associate(map
->pmap
,
18769 PMAP_CS_ASSOCIATE_JIT
,
18771 entry
->vme_end
- entry
->vme_start
);
18775 if (vmk_flags
.vmkf_remap_prot_copy
) {
18776 cs_ret
= pmap_cs_associate(map
->pmap
,
18777 PMAP_CS_ASSOCIATE_COW
,
18779 entry
->vme_end
- entry
->vme_start
);
18783 vm_object_lock_shared(VME_OBJECT(entry
));
18784 cs_offset
= VME_OFFSET(entry
);
18785 for (cs_object
= VME_OBJECT(entry
);
18786 (cs_object
!= VM_OBJECT_NULL
&&
18787 !cs_object
->code_signed
);
18788 cs_object
= cs_shadow
) {
18789 cs_shadow
= cs_object
->shadow
;
18790 if (cs_shadow
!= VM_OBJECT_NULL
) {
18791 cs_offset
+= cs_object
->vo_shadow_offset
;
18792 vm_object_lock_shared(cs_shadow
);
18794 vm_object_unlock(cs_object
);
18796 if (cs_object
== VM_OBJECT_NULL
) {
18797 return KERN_SUCCESS
;
18800 cs_offset
+= cs_object
->paging_offset
;
18801 cs_vnode
= vnode_pager_lookup_vnode(cs_object
->pager
);
18802 cs_ret
= vnode_pager_get_cs_blobs(cs_vnode
,
18804 assert(cs_ret
== KERN_SUCCESS
);
18805 cs_ret
= cs_associate_blob_with_mapping(map
->pmap
,
18811 vm_object_unlock(cs_object
);
18812 cs_object
= VM_OBJECT_NULL
;
18815 if (cs_ret
== KERN_SUCCESS
) {
18816 DTRACE_VM2(vm_map_entry_cs_associate_success
,
18817 vm_map_offset_t
, entry
->vme_start
,
18818 vm_map_offset_t
, entry
->vme_end
);
18819 if (vm_map_executable_immutable
) {
18821 * Prevent this executable
18822 * mapping from being unmapped
18825 entry
->permanent
= TRUE
;
18828 * pmap says it will validate the
18829 * code-signing validity of pages
18830 * faulted in via this mapping, so
18831 * this map entry should be marked so
18832 * that vm_fault() bypasses code-signing
18833 * validation for faults coming through
18836 entry
->pmap_cs_associated
= TRUE
;
18837 } else if (cs_ret
== KERN_NOT_SUPPORTED
) {
18839 * pmap won't check the code-signing
18840 * validity of pages faulted in via
18841 * this mapping, so VM should keep
18844 DTRACE_VM3(vm_map_entry_cs_associate_off
,
18845 vm_map_offset_t
, entry
->vme_start
,
18846 vm_map_offset_t
, entry
->vme_end
,
18850 * A real error: do not allow
18851 * execution in this mapping.
18853 DTRACE_VM3(vm_map_entry_cs_associate_failure
,
18854 vm_map_offset_t
, entry
->vme_start
,
18855 vm_map_offset_t
, entry
->vme_end
,
18857 entry
->protection
&= ~VM_PROT_EXECUTE
;
18858 entry
->max_protection
&= ~VM_PROT_EXECUTE
;
18863 #endif /* PMAP_CS */
18866 * FORKED CORPSE FOOTPRINT
18868 * A forked corpse gets a copy of the original VM map but its pmap is mostly
18869 * empty since it never ran and never got to fault in any pages.
18870 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
18871 * a forked corpse would therefore return very little information.
18873 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
18874 * to vm_map_fork() to collect footprint information from the original VM map
18875 * and its pmap, and store it in the forked corpse's VM map. That information
18876 * is stored in place of the VM map's "hole list" since we'll never need to
18877 * lookup for holes in the corpse's map.
18879 * The corpse's footprint info looks like this:
18881 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
18883 * +---------------------------------------+
18884 * header-> | cf_size |
18885 * +-------------------+-------------------+
18886 * | cf_last_region | cf_last_zeroes |
18887 * +-------------------+-------------------+
18888 * region1-> | cfr_vaddr |
18889 * +-------------------+-------------------+
18890 * | cfr_num_pages | d0 | d1 | d2 | d3 |
18891 * +---------------------------------------+
18892 * | d4 | d5 | ... |
18893 * +---------------------------------------+
18895 * +-------------------+-------------------+
18896 * | dy | dz | na | na | cfr_vaddr... | <-region2
18897 * +-------------------+-------------------+
18898 * | cfr_vaddr (ctd) | cfr_num_pages |
18899 * +---------------------------------------+
18901 * +---------------------------------------+
18903 * +---------------------------------------+
18904 * last region-> | cfr_vaddr |
18905 * +---------------------------------------+
18906 * + cfr_num_pages | d0 | d1 | d2 | d3 |
18907 * +---------------------------------------+
18909 * +---------------------------------------+
18910 * | dx | dy | dz | na | na | na | na | na |
18911 * +---------------------------------------+
18914 * cf_size: total size of the buffer (rounded to page size)
18915 * cf_last_region: offset in the buffer of the last "region" sub-header
18916 * cf_last_zeroes: number of trailing "zero" dispositions at the end
18918 * cfr_vaddr: virtual address of the start of the covered "region"
18919 * cfr_num_pages: number of pages in the covered "region"
18920 * d*: disposition of the page at that virtual address
18921 * Regions in the buffer are word-aligned.
18923 * We estimate the size of the buffer based on the number of memory regions
18924 * and the virtual size of the address space. While copying each memory region
18925 * during vm_map_fork(), we also collect the footprint info for that region
18926 * and store it in the buffer, packing it as much as possible (coalescing
18927 * contiguous memory regions to avoid having too many region headers and
18928 * avoiding long streaks of "zero" page dispositions by splitting footprint
18929 * "regions", so the number of regions in the footprint buffer might not match
18930 * the number of memory regions in the address space.
18932 * We also have to copy the original task's "nonvolatile" ledgers since that's
18933 * part of the footprint and will need to be reported to any tool asking for
18934 * the footprint information of the forked corpse.
18937 uint64_t vm_map_corpse_footprint_count
= 0;
18938 uint64_t vm_map_corpse_footprint_size_avg
= 0;
18939 uint64_t vm_map_corpse_footprint_size_max
= 0;
18940 uint64_t vm_map_corpse_footprint_full
= 0;
18941 uint64_t vm_map_corpse_footprint_no_buf
= 0;
18944 * vm_map_corpse_footprint_new_region:
18945 * closes the current footprint "region" and creates a new one
18947 * Returns NULL if there's not enough space in the buffer for a new region.
18949 static struct vm_map_corpse_footprint_region
*
18950 vm_map_corpse_footprint_new_region(
18951 struct vm_map_corpse_footprint_header
*footprint_header
)
18953 uintptr_t footprint_edge
;
18954 uint32_t new_region_offset
;
18955 struct vm_map_corpse_footprint_region
*footprint_region
;
18956 struct vm_map_corpse_footprint_region
*new_footprint_region
;
18958 footprint_edge
= ((uintptr_t)footprint_header
+
18959 footprint_header
->cf_size
);
18960 footprint_region
= ((struct vm_map_corpse_footprint_region
*)
18961 ((char *)footprint_header
+
18962 footprint_header
->cf_last_region
));
18963 assert((uintptr_t)footprint_region
+ sizeof (*footprint_region
) <=
18966 /* get rid of trailing zeroes in the last region */
18967 assert(footprint_region
->cfr_num_pages
>=
18968 footprint_header
->cf_last_zeroes
);
18969 footprint_region
->cfr_num_pages
-=
18970 footprint_header
->cf_last_zeroes
;
18971 footprint_header
->cf_last_zeroes
= 0;
18973 /* reuse this region if it's now empty */
18974 if (footprint_region
->cfr_num_pages
== 0) {
18975 return footprint_region
;
18978 /* compute offset of new region */
18979 new_region_offset
= footprint_header
->cf_last_region
;
18980 new_region_offset
+= sizeof (*footprint_region
);
18981 new_region_offset
+= footprint_region
->cfr_num_pages
;
18982 new_region_offset
= roundup(new_region_offset
, sizeof (int));
18984 /* check if we're going over the edge */
18985 if (((uintptr_t)footprint_header
+
18986 new_region_offset
+
18987 sizeof (*footprint_region
)) >=
18989 /* over the edge: no new region */
18993 /* adjust offset of last region in header */
18994 footprint_header
->cf_last_region
= new_region_offset
;
18996 new_footprint_region
= (struct vm_map_corpse_footprint_region
*)
18997 ((char *)footprint_header
+
18998 footprint_header
->cf_last_region
);
18999 new_footprint_region
->cfr_vaddr
= 0;
19000 new_footprint_region
->cfr_num_pages
= 0;
19001 /* caller needs to initialize new region */
19003 return new_footprint_region
;
19007 * vm_map_corpse_footprint_collect:
19008 * collect footprint information for "old_entry" in "old_map" and
19009 * stores it in "new_map"'s vmmap_footprint_info.
19012 vm_map_corpse_footprint_collect(
19014 vm_map_entry_t old_entry
,
19017 vm_map_offset_t va
;
19020 struct vm_map_corpse_footprint_header
*footprint_header
;
19021 struct vm_map_corpse_footprint_region
*footprint_region
;
19022 struct vm_map_corpse_footprint_region
*new_footprint_region
;
19023 unsigned char *next_disp_p
;
19024 uintptr_t footprint_edge
;
19025 uint32_t num_pages_tmp
;
19027 va
= old_entry
->vme_start
;
19029 vm_map_lock_assert_exclusive(old_map
);
19030 vm_map_lock_assert_exclusive(new_map
);
19032 assert(new_map
->has_corpse_footprint
);
19033 assert(!old_map
->has_corpse_footprint
);
19034 if (!new_map
->has_corpse_footprint
||
19035 old_map
->has_corpse_footprint
) {
19037 * This can only transfer footprint info from a
19038 * map with a live pmap to a map with a corpse footprint.
19040 return KERN_NOT_SUPPORTED
;
19043 if (new_map
->vmmap_corpse_footprint
== NULL
) {
19045 vm_size_t buf_size
;
19048 buf_size
= (sizeof (*footprint_header
) +
19049 (old_map
->hdr
.nentries
19051 (sizeof (*footprint_region
) +
19052 + 3)) /* potential alignment for each region */
19054 ((old_map
->size
/ PAGE_SIZE
)
19056 sizeof (char))); /* disposition for each page */
19057 // printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
19058 buf_size
= round_page(buf_size
);
19060 /* limit buffer to 1 page to validate overflow detection */
19061 // buf_size = PAGE_SIZE;
19063 /* limit size to a somewhat sane amount */
19064 #if CONFIG_EMBEDDED
19065 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
19066 #else /* CONFIG_EMBEDDED */
19067 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
19068 #endif /* CONFIG_EMBEDDED */
19069 if (buf_size
> VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE
) {
19070 buf_size
= VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE
;
19074 * Allocate the pageable buffer (with a trailing guard page).
19075 * It will be zero-filled on demand.
19077 kr
= kernel_memory_allocate(kernel_map
,
19080 + PAGE_SIZE
), /* trailing guard page */
19082 KMA_PAGEABLE
| KMA_GUARD_LAST
,
19083 VM_KERN_MEMORY_DIAG
);
19084 if (kr
!= KERN_SUCCESS
) {
19085 vm_map_corpse_footprint_no_buf
++;
19089 /* initialize header and 1st region */
19090 footprint_header
= (struct vm_map_corpse_footprint_header
*)buf
;
19091 new_map
->vmmap_corpse_footprint
= footprint_header
;
19093 footprint_header
->cf_size
= buf_size
;
19094 footprint_header
->cf_last_region
=
19095 sizeof (*footprint_header
);
19096 footprint_header
->cf_last_zeroes
= 0;
19098 footprint_region
= (struct vm_map_corpse_footprint_region
*)
19099 ((char *)footprint_header
+
19100 footprint_header
->cf_last_region
);
19101 footprint_region
->cfr_vaddr
= 0;
19102 footprint_region
->cfr_num_pages
= 0;
19104 /* retrieve header and last region */
19105 footprint_header
= (struct vm_map_corpse_footprint_header
*)
19106 new_map
->vmmap_corpse_footprint
;
19107 footprint_region
= (struct vm_map_corpse_footprint_region
*)
19108 ((char *)footprint_header
+
19109 footprint_header
->cf_last_region
);
19111 footprint_edge
= ((uintptr_t)footprint_header
+
19112 footprint_header
->cf_size
);
19114 if ((footprint_region
->cfr_vaddr
+
19115 (((vm_map_offset_t
)footprint_region
->cfr_num_pages
) *
19117 != old_entry
->vme_start
) {
19118 uint64_t num_pages_delta
;
19119 uint32_t region_offset_delta
;
19122 * Not the next contiguous virtual address:
19123 * start a new region or store "zero" dispositions for
19124 * the missing pages?
19126 /* size of gap in actual page dispositions */
19127 num_pages_delta
= (((old_entry
->vme_start
-
19128 footprint_region
->cfr_vaddr
) / PAGE_SIZE
)
19129 - footprint_region
->cfr_num_pages
);
19130 /* size of gap as a new footprint region header */
19131 region_offset_delta
=
19132 (sizeof (*footprint_region
) +
19133 roundup((footprint_region
->cfr_num_pages
-
19134 footprint_header
->cf_last_zeroes
),
19136 (footprint_region
->cfr_num_pages
-
19137 footprint_header
->cf_last_zeroes
));
19138 // printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
19139 if (region_offset_delta
< num_pages_delta
||
19140 os_add3_overflow(footprint_region
->cfr_num_pages
,
19141 (uint32_t) num_pages_delta
,
19145 * Storing data for this gap would take more space
19146 * than inserting a new footprint region header:
19147 * let's start a new region and save space. If it's a
19148 * tie, let's avoid using a new region, since that
19149 * would require more region hops to find the right
19150 * range during lookups.
19152 * If the current region's cfr_num_pages would overflow
19153 * if we added "zero" page dispositions for the gap,
19154 * no choice but to start a new region.
19156 // printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
19157 new_footprint_region
=
19158 vm_map_corpse_footprint_new_region(footprint_header
);
19159 /* check that we're not going over the edge */
19160 if (new_footprint_region
== NULL
) {
19161 goto over_the_edge
;
19163 footprint_region
= new_footprint_region
;
19164 /* initialize new region as empty */
19165 footprint_region
->cfr_vaddr
= old_entry
->vme_start
;
19166 footprint_region
->cfr_num_pages
= 0;
19169 * Store "zero" page dispositions for the missing
19172 // printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
19173 for (; num_pages_delta
> 0; num_pages_delta
--) {
19175 ((unsigned char *) footprint_region
+
19176 sizeof (*footprint_region
) +
19177 footprint_region
->cfr_num_pages
);
19178 /* check that we're not going over the edge */
19179 if ((uintptr_t)next_disp_p
>= footprint_edge
) {
19180 goto over_the_edge
;
19182 /* store "zero" disposition for this gap page */
19183 footprint_region
->cfr_num_pages
++;
19184 *next_disp_p
= (unsigned char) 0;
19185 footprint_header
->cf_last_zeroes
++;
19190 for (va
= old_entry
->vme_start
;
19191 va
< old_entry
->vme_end
;
19193 vm_object_t object
;
19195 object
= VME_OBJECT(old_entry
);
19196 if (!old_entry
->is_sub_map
&&
19197 old_entry
->iokit_acct
&&
19198 object
!= VM_OBJECT_NULL
&&
19199 object
->internal
&&
19200 object
->purgable
== VM_PURGABLE_DENY
) {
19202 * Non-purgeable IOKit memory: phys_footprint
19203 * includes the entire virtual mapping.
19204 * Since the forked corpse's VM map entry will not
19205 * have "iokit_acct", pretend that this page's
19206 * disposition is "present & internal", so that it
19207 * shows up in the forked corpse's footprint.
19209 disp
= (PMAP_QUERY_PAGE_PRESENT
|
19210 PMAP_QUERY_PAGE_INTERNAL
);
19213 pmap_query_page_info(old_map
->pmap
,
19218 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
19220 if (disp
== 0 && footprint_region
->cfr_num_pages
== 0) {
19222 * Ignore "zero" dispositions at start of
19223 * region: just move start of region.
19225 footprint_region
->cfr_vaddr
+= PAGE_SIZE
;
19229 /* would region's cfr_num_pages overflow? */
19230 if (os_add_overflow(footprint_region
->cfr_num_pages
, 1,
19232 /* overflow: create a new region */
19233 new_footprint_region
=
19234 vm_map_corpse_footprint_new_region(
19236 if (new_footprint_region
== NULL
) {
19237 goto over_the_edge
;
19239 footprint_region
= new_footprint_region
;
19240 footprint_region
->cfr_vaddr
= va
;
19241 footprint_region
->cfr_num_pages
= 0;
19244 next_disp_p
= ((unsigned char *)footprint_region
+
19245 sizeof (*footprint_region
) +
19246 footprint_region
->cfr_num_pages
);
19247 /* check that we're not going over the edge */
19248 if ((uintptr_t)next_disp_p
>= footprint_edge
) {
19249 goto over_the_edge
;
19251 /* store this dispostion */
19252 *next_disp_p
= (unsigned char) disp
;
19253 footprint_region
->cfr_num_pages
++;
19256 /* non-zero disp: break the current zero streak */
19257 footprint_header
->cf_last_zeroes
= 0;
19262 /* zero disp: add to the current streak of zeroes */
19263 footprint_header
->cf_last_zeroes
++;
19264 if ((footprint_header
->cf_last_zeroes
+
19265 roundup((footprint_region
->cfr_num_pages
-
19266 footprint_header
->cf_last_zeroes
) &
19267 (sizeof (int) - 1),
19269 (sizeof (*footprint_header
))) {
19271 * There are not enough trailing "zero" dispositions
19272 * (+ the extra padding we would need for the previous
19273 * region); creating a new region would not save space
19274 * at this point, so let's keep this "zero" disposition
19275 * in this region and reconsider later.
19280 * Create a new region to avoid having too many consecutive
19281 * "zero" dispositions.
19283 new_footprint_region
=
19284 vm_map_corpse_footprint_new_region(footprint_header
);
19285 if (new_footprint_region
== NULL
) {
19286 goto over_the_edge
;
19288 footprint_region
= new_footprint_region
;
19289 /* initialize the new region as empty ... */
19290 footprint_region
->cfr_num_pages
= 0;
19291 /* ... and skip this "zero" disp */
19292 footprint_region
->cfr_vaddr
= va
+ PAGE_SIZE
;
19295 return KERN_SUCCESS
;
19298 // printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
19299 vm_map_corpse_footprint_full
++;
19300 return KERN_RESOURCE_SHORTAGE
;
19304 * vm_map_corpse_footprint_collect_done:
19305 * completes the footprint collection by getting rid of any remaining
19306 * trailing "zero" dispositions and trimming the unused part of the
19310 vm_map_corpse_footprint_collect_done(
19313 struct vm_map_corpse_footprint_header
*footprint_header
;
19314 struct vm_map_corpse_footprint_region
*footprint_region
;
19315 vm_size_t buf_size
, actual_size
;
19318 assert(new_map
->has_corpse_footprint
);
19319 if (!new_map
->has_corpse_footprint
||
19320 new_map
->vmmap_corpse_footprint
== NULL
) {
19324 footprint_header
= (struct vm_map_corpse_footprint_header
*)
19325 new_map
->vmmap_corpse_footprint
;
19326 buf_size
= footprint_header
->cf_size
;
19328 footprint_region
= (struct vm_map_corpse_footprint_region
*)
19329 ((char *)footprint_header
+
19330 footprint_header
->cf_last_region
);
19332 /* get rid of trailing zeroes in last region */
19333 assert(footprint_region
->cfr_num_pages
>= footprint_header
->cf_last_zeroes
);
19334 footprint_region
->cfr_num_pages
-= footprint_header
->cf_last_zeroes
;
19335 footprint_header
->cf_last_zeroes
= 0;
19337 actual_size
= (vm_size_t
)(footprint_header
->cf_last_region
+
19338 sizeof (*footprint_region
) +
19339 footprint_region
->cfr_num_pages
);
19341 // printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
19342 vm_map_corpse_footprint_size_avg
=
19343 (((vm_map_corpse_footprint_size_avg
*
19344 vm_map_corpse_footprint_count
) +
19346 (vm_map_corpse_footprint_count
+ 1));
19347 vm_map_corpse_footprint_count
++;
19348 if (actual_size
> vm_map_corpse_footprint_size_max
) {
19349 vm_map_corpse_footprint_size_max
= actual_size
;
19352 actual_size
= round_page(actual_size
);
19353 if (buf_size
> actual_size
) {
19354 kr
= vm_deallocate(kernel_map
,
19355 ((vm_address_t
)footprint_header
+
19357 PAGE_SIZE
), /* trailing guard page */
19358 (buf_size
- actual_size
));
19359 assertf(kr
== KERN_SUCCESS
,
19360 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19362 (uint64_t) buf_size
,
19363 (uint64_t) actual_size
,
19365 kr
= vm_protect(kernel_map
,
19366 ((vm_address_t
)footprint_header
+
19369 FALSE
, /* set_maximum */
19371 assertf(kr
== KERN_SUCCESS
,
19372 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19374 (uint64_t) buf_size
,
19375 (uint64_t) actual_size
,
19379 footprint_header
->cf_size
= actual_size
;
19383 * vm_map_corpse_footprint_query_page_info:
19384 * retrieves the disposition of the page at virtual address "vaddr"
19385 * in the forked corpse's VM map
19387 * This is the equivalent of pmap_query_page_info() for a forked corpse.
19390 vm_map_corpse_footprint_query_page_info(
19392 vm_map_offset_t va
,
19395 struct vm_map_corpse_footprint_header
*footprint_header
;
19396 struct vm_map_corpse_footprint_region
*footprint_region
;
19397 uint32_t footprint_region_offset
;
19398 vm_map_offset_t region_start
, region_end
;
19402 if (!map
->has_corpse_footprint
) {
19404 kr
= KERN_INVALID_ARGUMENT
;
19408 footprint_header
= map
->vmmap_corpse_footprint
;
19409 if (footprint_header
== NULL
) {
19411 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19412 kr
= KERN_INVALID_ARGUMENT
;
19416 /* start looking at the hint ("cf_hint_region") */
19417 footprint_region_offset
= footprint_header
->cf_hint_region
;
19420 if (footprint_region_offset
< sizeof (*footprint_header
)) {
19421 /* hint too low: start from 1st region */
19422 footprint_region_offset
= sizeof (*footprint_header
);
19424 if (footprint_region_offset
>= footprint_header
->cf_last_region
) {
19425 /* hint too high: re-start from 1st region */
19426 footprint_region_offset
= sizeof (*footprint_header
);
19428 footprint_region
= (struct vm_map_corpse_footprint_region
*)
19429 ((char *)footprint_header
+ footprint_region_offset
);
19430 region_start
= footprint_region
->cfr_vaddr
;
19431 region_end
= (region_start
+
19432 ((vm_map_offset_t
)(footprint_region
->cfr_num_pages
) *
19434 if (va
< region_start
&&
19435 footprint_region_offset
!= sizeof (*footprint_header
)) {
19436 /* our range starts before the hint region */
19438 /* reset the hint (in a racy way...) */
19439 footprint_header
->cf_hint_region
= sizeof (*footprint_header
);
19440 /* lookup "va" again from 1st region */
19441 footprint_region_offset
= sizeof (*footprint_header
);
19445 while (va
>= region_end
) {
19446 if (footprint_region_offset
>= footprint_header
->cf_last_region
) {
19449 /* skip the region's header */
19450 footprint_region_offset
+= sizeof (*footprint_region
);
19451 /* skip the region's page dispositions */
19452 footprint_region_offset
+= footprint_region
->cfr_num_pages
;
19453 /* align to next word boundary */
19454 footprint_region_offset
=
19455 roundup(footprint_region_offset
,
19457 footprint_region
= (struct vm_map_corpse_footprint_region
*)
19458 ((char *)footprint_header
+ footprint_region_offset
);
19459 region_start
= footprint_region
->cfr_vaddr
;
19460 region_end
= (region_start
+
19461 ((vm_map_offset_t
)(footprint_region
->cfr_num_pages
) *
19464 if (va
< region_start
|| va
>= region_end
) {
19465 /* page not found */
19467 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19472 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
19473 footprint_header
->cf_hint_region
= footprint_region_offset
;
19475 /* get page disposition for "va" in this region */
19476 disp_idx
= (int) ((va
- footprint_region
->cfr_vaddr
) / PAGE_SIZE
);
19477 *disp
= (int) (footprint_region
->cfr_disposition
[disp_idx
]);
19481 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19482 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
19483 DTRACE_VM4(footprint_query_page_info
,
19485 vm_map_offset_t
, va
,
19487 kern_return_t
, kr
);
19494 vm_map_corpse_footprint_destroy(
19497 if (map
->has_corpse_footprint
&&
19498 map
->vmmap_corpse_footprint
!= 0) {
19499 struct vm_map_corpse_footprint_header
*footprint_header
;
19500 vm_size_t buf_size
;
19503 footprint_header
= map
->vmmap_corpse_footprint
;
19504 buf_size
= footprint_header
->cf_size
;
19505 kr
= vm_deallocate(kernel_map
,
19506 (vm_offset_t
) map
->vmmap_corpse_footprint
,
19507 ((vm_size_t
) buf_size
19508 + PAGE_SIZE
)); /* trailing guard page */
19509 assertf(kr
== KERN_SUCCESS
, "kr=0x%x\n", kr
);
19510 map
->vmmap_corpse_footprint
= 0;
19511 map
->has_corpse_footprint
= FALSE
;
19516 * vm_map_copy_footprint_ledgers:
19517 * copies any ledger that's relevant to the memory footprint of "old_task"
19518 * into the forked corpse's task ("new_task")
19521 vm_map_copy_footprint_ledgers(
19525 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.phys_footprint
);
19526 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.purgeable_nonvolatile
);
19527 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.purgeable_nonvolatile_compressed
);
19528 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.internal
);
19529 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.internal_compressed
);
19530 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.iokit_mapped
);
19531 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.alternate_accounting
);
19532 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.alternate_accounting_compressed
);
19533 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.page_table
);
19534 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.network_nonvolatile
);
19535 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.network_nonvolatile_compressed
);
19536 vm_map_copy_ledger(old_task
, new_task
, task_ledgers
.wired_mem
);
19540 * vm_map_copy_ledger:
19541 * copy a single ledger from "old_task" to "new_task"
19544 vm_map_copy_ledger(
19549 ledger_amount_t old_balance
, new_balance
, delta
;
19551 assert(new_task
->map
->has_corpse_footprint
);
19552 if (!new_task
->map
->has_corpse_footprint
)
19555 /* turn off sanity checks for the ledger we're about to mess with */
19556 ledger_disable_panic_on_negative(new_task
->ledger
,
19559 /* adjust "new_task" to match "old_task" */
19560 ledger_get_balance(old_task
->ledger
,
19563 ledger_get_balance(new_task
->ledger
,
19566 if (new_balance
== old_balance
) {
19567 /* new == old: done */
19568 } else if (new_balance
> old_balance
) {
19569 /* new > old ==> new -= new - old */
19570 delta
= new_balance
- old_balance
;
19571 ledger_debit(new_task
->ledger
,
19575 /* new < old ==> new += old - new */
19576 delta
= old_balance
- new_balance
;
19577 ledger_credit(new_task
->ledger
,