2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
63 * Virtual memory mapping module.
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68 #include <libkern/OSAtomic.h>
70 #include <mach/kern_return.h>
71 #include <mach/port.h>
72 #include <mach/vm_attributes.h>
73 #include <mach/vm_param.h>
74 #include <mach/vm_behavior.h>
75 #include <mach/vm_statistics.h>
76 #include <mach/memory_object.h>
77 #include <mach/mach_vm.h>
78 #include <machine/cpu_capabilities.h>
81 #include <kern/assert.h>
82 #include <kern/counters.h>
83 #include <kern/kalloc.h>
84 #include <kern/zalloc.h>
87 #include <vm/vm_compressor_pager.h>
88 #include <vm/vm_init.h>
89 #include <vm/vm_fault.h>
90 #include <vm/vm_map.h>
91 #include <vm/vm_object.h>
92 #include <vm/vm_page.h>
93 #include <vm/vm_pageout.h>
94 #include <vm/vm_kern.h>
95 #include <ipc/ipc_port.h>
96 #include <kern/sched_prim.h>
97 #include <kern/misc_protos.h>
100 #include <mach/vm_map_server.h>
101 #include <mach/mach_host_server.h>
102 #include <vm/vm_protos.h>
103 #include <vm/vm_purgeable_internal.h>
105 #include <vm/vm_protos.h>
106 #include <vm/vm_shared_region.h>
107 #include <vm/vm_map_store.h>
109 extern u_int32_t
random(void); /* from <libkern/libkern.h> */
110 /* Internal prototypes
113 static void vm_map_simplify_range(
115 vm_map_offset_t start
,
116 vm_map_offset_t end
); /* forward */
118 static boolean_t
vm_map_range_check(
120 vm_map_offset_t start
,
122 vm_map_entry_t
*entry
);
124 static vm_map_entry_t
_vm_map_entry_create(
125 struct vm_map_header
*map_header
, boolean_t map_locked
);
127 static void _vm_map_entry_dispose(
128 struct vm_map_header
*map_header
,
129 vm_map_entry_t entry
);
131 static void vm_map_pmap_enter(
133 vm_map_offset_t addr
,
134 vm_map_offset_t end_addr
,
136 vm_object_offset_t offset
,
137 vm_prot_t protection
);
139 static void _vm_map_clip_end(
140 struct vm_map_header
*map_header
,
141 vm_map_entry_t entry
,
142 vm_map_offset_t end
);
144 static void _vm_map_clip_start(
145 struct vm_map_header
*map_header
,
146 vm_map_entry_t entry
,
147 vm_map_offset_t start
);
149 static void vm_map_entry_delete(
151 vm_map_entry_t entry
);
153 static kern_return_t
vm_map_delete(
155 vm_map_offset_t start
,
160 static kern_return_t
vm_map_copy_overwrite_unaligned(
162 vm_map_entry_t entry
,
164 vm_map_address_t start
,
165 boolean_t discard_on_success
);
167 static kern_return_t
vm_map_copy_overwrite_aligned(
169 vm_map_entry_t tmp_entry
,
171 vm_map_offset_t start
,
174 static kern_return_t
vm_map_copyin_kernel_buffer(
176 vm_map_address_t src_addr
,
178 boolean_t src_destroy
,
179 vm_map_copy_t
*copy_result
); /* OUT */
181 static kern_return_t
vm_map_copyout_kernel_buffer(
183 vm_map_address_t
*addr
, /* IN/OUT */
186 boolean_t consume_on_success
);
188 static void vm_map_fork_share(
190 vm_map_entry_t old_entry
,
193 static boolean_t
vm_map_fork_copy(
195 vm_map_entry_t
*old_entry_p
,
198 void vm_map_region_top_walk(
199 vm_map_entry_t entry
,
200 vm_region_top_info_t top
);
202 void vm_map_region_walk(
205 vm_map_entry_t entry
,
206 vm_object_offset_t offset
,
207 vm_object_size_t range
,
208 vm_region_extended_info_t extended
,
209 boolean_t look_for_pages
,
210 mach_msg_type_number_t count
);
212 static kern_return_t
vm_map_wire_nested(
214 vm_map_offset_t start
,
216 vm_prot_t access_type
,
219 vm_map_offset_t pmap_addr
);
221 static kern_return_t
vm_map_unwire_nested(
223 vm_map_offset_t start
,
227 vm_map_offset_t pmap_addr
);
229 static kern_return_t
vm_map_overwrite_submap_recurse(
231 vm_map_offset_t dst_addr
,
232 vm_map_size_t dst_size
);
234 static kern_return_t
vm_map_copy_overwrite_nested(
236 vm_map_offset_t dst_addr
,
238 boolean_t interruptible
,
240 boolean_t discard_on_success
);
242 static kern_return_t
vm_map_remap_extract(
244 vm_map_offset_t addr
,
247 struct vm_map_header
*map_header
,
248 vm_prot_t
*cur_protection
,
249 vm_prot_t
*max_protection
,
250 vm_inherit_t inheritance
,
253 static kern_return_t
vm_map_remap_range_allocate(
255 vm_map_address_t
*address
,
257 vm_map_offset_t mask
,
259 vm_map_entry_t
*map_entry
);
261 static void vm_map_region_look_for_page(
265 vm_object_offset_t offset
,
268 vm_region_extended_info_t extended
,
269 mach_msg_type_number_t count
);
271 static int vm_map_region_count_obj_refs(
272 vm_map_entry_t entry
,
276 static kern_return_t
vm_map_willneed(
278 vm_map_offset_t start
,
279 vm_map_offset_t end
);
281 static kern_return_t
vm_map_reuse_pages(
283 vm_map_offset_t start
,
284 vm_map_offset_t end
);
286 static kern_return_t
vm_map_reusable_pages(
288 vm_map_offset_t start
,
289 vm_map_offset_t end
);
291 static kern_return_t
vm_map_can_reuse(
293 vm_map_offset_t start
,
294 vm_map_offset_t end
);
298 * Macros to copy a vm_map_entry. We must be careful to correctly
299 * manage the wired page count. vm_map_entry_copy() creates a new
300 * map entry to the same memory - the wired count in the new entry
301 * must be set to zero. vm_map_entry_copy_full() creates a new
302 * entry that is identical to the old entry. This preserves the
303 * wire count; it's used for map splitting and zone changing in
307 #define vm_map_entry_copy(NEW,OLD) \
309 boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
311 (NEW)->is_shared = FALSE; \
312 (NEW)->needs_wakeup = FALSE; \
313 (NEW)->in_transition = FALSE; \
314 (NEW)->wired_count = 0; \
315 (NEW)->user_wired_count = 0; \
316 (NEW)->permanent = FALSE; \
317 (NEW)->used_for_jit = FALSE; \
318 (NEW)->from_reserved_zone = _vmec_reserved; \
321 #define vm_map_entry_copy_full(NEW,OLD) \
323 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
325 (NEW)->from_reserved_zone = _vmecf_reserved; \
329 * Decide if we want to allow processes to execute from their data or stack areas.
330 * override_nx() returns true if we do. Data/stack execution can be enabled independently
331 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
332 * or allow_stack_exec to enable data execution for that type of data area for that particular
333 * ABI (or both by or'ing the flags together). These are initialized in the architecture
334 * specific pmap files since the default behavior varies according to architecture. The
335 * main reason it varies is because of the need to provide binary compatibility with old
336 * applications that were written before these restrictions came into being. In the old
337 * days, an app could execute anything it could read, but this has slowly been tightened
338 * up over time. The default behavior is:
340 * 32-bit PPC apps may execute from both stack and data areas
341 * 32-bit Intel apps may exeucte from data areas but not stack
342 * 64-bit PPC/Intel apps may not execute from either data or stack
344 * An application on any architecture may override these defaults by explicitly
345 * adding PROT_EXEC permission to the page in question with the mprotect(2)
346 * system call. This code here just determines what happens when an app tries to
347 * execute from a page that lacks execute permission.
349 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
350 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
351 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
352 * execution from data areas for a particular binary even if the arch normally permits it. As
353 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
354 * to support some complicated use cases, notably browsers with out-of-process plugins that
355 * are not all NX-safe.
358 extern int allow_data_exec
, allow_stack_exec
;
361 override_nx(vm_map_t map
, uint32_t user_tag
) /* map unused on arm */
366 * Determine if the app is running in 32 or 64 bit mode.
369 if (vm_map_is_64bit(map
))
370 current_abi
= VM_ABI_64
;
372 current_abi
= VM_ABI_32
;
375 * Determine if we should allow the execution based on whether it's a
376 * stack or data area and the current architecture.
379 if (user_tag
== VM_MEMORY_STACK
)
380 return allow_stack_exec
& current_abi
;
382 return (allow_data_exec
& current_abi
) && (map
->map_disallow_data_exec
== FALSE
);
387 * Virtual memory maps provide for the mapping, protection,
388 * and sharing of virtual memory objects. In addition,
389 * this module provides for an efficient virtual copy of
390 * memory from one map to another.
392 * Synchronization is required prior to most operations.
394 * Maps consist of an ordered doubly-linked list of simple
395 * entries; a single hint is used to speed up lookups.
397 * Sharing maps have been deleted from this version of Mach.
398 * All shared objects are now mapped directly into the respective
399 * maps. This requires a change in the copy on write strategy;
400 * the asymmetric (delayed) strategy is used for shared temporary
401 * objects instead of the symmetric (shadow) strategy. All maps
402 * are now "top level" maps (either task map, kernel map or submap
403 * of the kernel map).
405 * Since portions of maps are specified by start/end addreses,
406 * which may not align with existing map entries, all
407 * routines merely "clip" entries to these start/end values.
408 * [That is, an entry is split into two, bordering at a
409 * start or end value.] Note that these clippings may not
410 * always be necessary (as the two resulting entries are then
411 * not changed); however, the clipping is done for convenience.
412 * No attempt is currently made to "glue back together" two
415 * The symmetric (shadow) copy strategy implements virtual copy
416 * by copying VM object references from one map to
417 * another, and then marking both regions as copy-on-write.
418 * It is important to note that only one writeable reference
419 * to a VM object region exists in any map when this strategy
420 * is used -- this means that shadow object creation can be
421 * delayed until a write operation occurs. The symmetric (delayed)
422 * strategy allows multiple maps to have writeable references to
423 * the same region of a vm object, and hence cannot delay creating
424 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
425 * Copying of permanent objects is completely different; see
426 * vm_object_copy_strategically() in vm_object.c.
429 static zone_t vm_map_zone
; /* zone for vm_map structures */
430 static zone_t vm_map_entry_zone
; /* zone for vm_map_entry structures */
431 static zone_t vm_map_entry_reserved_zone
; /* zone with reserve for non-blocking
433 static zone_t vm_map_copy_zone
; /* zone for vm_map_copy structures */
437 * Placeholder object for submap operations. This object is dropped
438 * into the range by a call to vm_map_find, and removed when
439 * vm_map_submap creates the submap.
442 vm_object_t vm_submap_object
;
444 static void *map_data
;
445 static vm_size_t map_data_size
;
446 static void *kentry_data
;
447 static vm_size_t kentry_data_size
;
449 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
451 /* Skip acquiring locks if we're in the midst of a kernel core dump */
452 unsigned int not_in_kdp
= 1;
454 unsigned int vm_map_set_cache_attr_count
= 0;
457 vm_map_set_cache_attr(
461 vm_map_entry_t map_entry
;
463 kern_return_t kr
= KERN_SUCCESS
;
465 vm_map_lock_read(map
);
467 if (!vm_map_lookup_entry(map
, va
, &map_entry
) ||
468 map_entry
->is_sub_map
) {
470 * that memory is not properly mapped
472 kr
= KERN_INVALID_ARGUMENT
;
475 object
= map_entry
->object
.vm_object
;
477 if (object
== VM_OBJECT_NULL
) {
479 * there should be a VM object here at this point
481 kr
= KERN_INVALID_ARGUMENT
;
484 vm_object_lock(object
);
485 object
->set_cache_attr
= TRUE
;
486 vm_object_unlock(object
);
488 vm_map_set_cache_attr_count
++;
490 vm_map_unlock_read(map
);
496 #if CONFIG_CODE_DECRYPTION
498 * vm_map_apple_protected:
499 * This remaps the requested part of the object with an object backed by
500 * the decrypting pager.
501 * crypt_info contains entry points and session data for the crypt module.
502 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
503 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
506 vm_map_apple_protected(
508 vm_map_offset_t start
,
510 struct pager_crypt_info
*crypt_info
)
512 boolean_t map_locked
;
514 vm_map_entry_t map_entry
;
515 memory_object_t protected_mem_obj
;
516 vm_object_t protected_object
;
517 vm_map_offset_t map_addr
;
519 vm_map_lock_read(map
);
522 /* lookup the protected VM object */
523 if (!vm_map_lookup_entry(map
,
526 map_entry
->vme_end
< end
||
527 map_entry
->is_sub_map
) {
528 /* that memory is not properly mapped */
529 kr
= KERN_INVALID_ARGUMENT
;
532 protected_object
= map_entry
->object
.vm_object
;
533 if (protected_object
== VM_OBJECT_NULL
) {
534 /* there should be a VM object here at this point */
535 kr
= KERN_INVALID_ARGUMENT
;
539 /* make sure protected object stays alive while map is unlocked */
540 vm_object_reference(protected_object
);
542 vm_map_unlock_read(map
);
546 * Lookup (and create if necessary) the protected memory object
547 * matching that VM object.
548 * If successful, this also grabs a reference on the memory object,
549 * to guarantee that it doesn't go away before we get a chance to map
552 protected_mem_obj
= apple_protect_pager_setup(protected_object
, crypt_info
);
554 /* release extra ref on protected object */
555 vm_object_deallocate(protected_object
);
557 if (protected_mem_obj
== NULL
) {
562 /* map this memory object in place of the current one */
564 kr
= vm_map_enter_mem_object(map
,
567 (mach_vm_offset_t
) 0,
568 VM_FLAGS_FIXED
| VM_FLAGS_OVERWRITE
,
569 (ipc_port_t
) protected_mem_obj
,
571 (start
- map_entry
->vme_start
)),
573 map_entry
->protection
,
574 map_entry
->max_protection
,
575 map_entry
->inheritance
);
576 assert(map_addr
== start
);
578 * Release the reference obtained by apple_protect_pager_setup().
579 * The mapping (if it succeeded) is now holding a reference on the
582 memory_object_deallocate(protected_mem_obj
);
586 vm_map_unlock_read(map
);
590 #endif /* CONFIG_CODE_DECRYPTION */
593 lck_grp_t vm_map_lck_grp
;
594 lck_grp_attr_t vm_map_lck_grp_attr
;
595 lck_attr_t vm_map_lck_attr
;
601 * Initialize the vm_map module. Must be called before
602 * any other vm_map routines.
604 * Map and entry structures are allocated from zones -- we must
605 * initialize those zones.
607 * There are three zones of interest:
609 * vm_map_zone: used to allocate maps.
610 * vm_map_entry_zone: used to allocate map entries.
611 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
613 * The kernel allocates map entries from a special zone that is initially
614 * "crammed" with memory. It would be difficult (perhaps impossible) for
615 * the kernel to allocate more memory to a entry zone when it became
616 * empty since the very act of allocating memory implies the creation
623 vm_size_t entry_zone_alloc_size
;
624 const char *mez_name
= "VM map entries";
626 vm_map_zone
= zinit((vm_map_size_t
) sizeof(struct _vm_map
), 40*1024,
628 zone_change(vm_map_zone
, Z_NOENCRYPT
, TRUE
);
629 #if defined(__LP64__)
630 entry_zone_alloc_size
= PAGE_SIZE
* 5;
632 entry_zone_alloc_size
= PAGE_SIZE
* 6;
634 vm_map_entry_zone
= zinit((vm_map_size_t
) sizeof(struct vm_map_entry
),
635 1024*1024, entry_zone_alloc_size
,
637 zone_change(vm_map_entry_zone
, Z_NOENCRYPT
, TRUE
);
638 zone_change(vm_map_entry_zone
, Z_NOCALLOUT
, TRUE
);
639 zone_change(vm_map_entry_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
641 vm_map_entry_reserved_zone
= zinit((vm_map_size_t
) sizeof(struct vm_map_entry
),
642 kentry_data_size
* 64, kentry_data_size
,
643 "Reserved VM map entries");
644 zone_change(vm_map_entry_reserved_zone
, Z_NOENCRYPT
, TRUE
);
646 vm_map_copy_zone
= zinit((vm_map_size_t
) sizeof(struct vm_map_copy
),
647 16*1024, PAGE_SIZE
, "VM map copies");
648 zone_change(vm_map_copy_zone
, Z_NOENCRYPT
, TRUE
);
651 * Cram the map and kentry zones with initial data.
652 * Set reserved_zone non-collectible to aid zone_gc().
654 zone_change(vm_map_zone
, Z_COLLECT
, FALSE
);
656 zone_change(vm_map_entry_reserved_zone
, Z_COLLECT
, FALSE
);
657 zone_change(vm_map_entry_reserved_zone
, Z_EXPAND
, FALSE
);
658 zone_change(vm_map_entry_reserved_zone
, Z_FOREIGN
, TRUE
);
659 zone_change(vm_map_entry_reserved_zone
, Z_NOCALLOUT
, TRUE
);
660 zone_change(vm_map_entry_reserved_zone
, Z_CALLERACCT
, FALSE
); /* don't charge caller */
661 zone_change(vm_map_copy_zone
, Z_CALLERACCT
, FALSE
); /* don't charge caller */
662 zone_change(vm_map_entry_reserved_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
664 zcram(vm_map_zone
, (vm_offset_t
)map_data
, map_data_size
);
665 zcram(vm_map_entry_reserved_zone
, (vm_offset_t
)kentry_data
, kentry_data_size
);
667 lck_grp_attr_setdefault(&vm_map_lck_grp_attr
);
668 lck_grp_init(&vm_map_lck_grp
, "vm_map", &vm_map_lck_grp_attr
);
669 lck_attr_setdefault(&vm_map_lck_attr
);
672 default_freezer_init();
673 #endif /* CONFIG_FREEZE */
680 uint32_t kentry_initial_pages
;
682 map_data_size
= round_page(10 * sizeof(struct _vm_map
));
683 map_data
= pmap_steal_memory(map_data_size
);
686 * kentry_initial_pages corresponds to the number of kernel map entries
687 * required during bootstrap until the asynchronous replenishment
688 * scheme is activated and/or entries are available from the general
691 #if defined(__LP64__)
692 kentry_initial_pages
= 10;
694 kentry_initial_pages
= 6;
698 /* If using the guard allocator, reserve more memory for the kernel
699 * reserved map entry pool.
701 if (gzalloc_enabled())
702 kentry_initial_pages
*= 1024;
705 kentry_data_size
= kentry_initial_pages
* PAGE_SIZE
;
706 kentry_data
= pmap_steal_memory(kentry_data_size
);
709 void vm_kernel_reserved_entry_init(void) {
710 zone_prio_refill_configure(vm_map_entry_reserved_zone
, (6*PAGE_SIZE
)/sizeof(struct vm_map_entry
));
716 * Creates and returns a new empty VM map with
717 * the given physical map structure, and having
718 * the given lower and upper address bounds.
727 static int color_seed
= 0;
728 register vm_map_t result
;
730 result
= (vm_map_t
) zalloc(vm_map_zone
);
731 if (result
== VM_MAP_NULL
)
732 panic("vm_map_create");
734 vm_map_first_entry(result
) = vm_map_to_entry(result
);
735 vm_map_last_entry(result
) = vm_map_to_entry(result
);
736 result
->hdr
.nentries
= 0;
737 result
->hdr
.entries_pageable
= pageable
;
739 vm_map_store_init( &(result
->hdr
) );
741 result
->hdr
.page_shift
= PAGE_SHIFT
;
744 result
->user_wire_limit
= MACH_VM_MAX_ADDRESS
; /* default limit is unlimited */
745 result
->user_wire_size
= 0;
746 result
->ref_count
= 1;
748 result
->res_count
= 1;
749 result
->sw_state
= MAP_SW_IN
;
750 #endif /* TASK_SWAPPER */
752 result
->min_offset
= min
;
753 result
->max_offset
= max
;
754 result
->wiring_required
= FALSE
;
755 result
->no_zero_fill
= FALSE
;
756 result
->mapped_in_other_pmaps
= FALSE
;
757 result
->wait_for_space
= FALSE
;
758 result
->switch_protect
= FALSE
;
759 result
->disable_vmentry_reuse
= FALSE
;
760 result
->map_disallow_data_exec
= FALSE
;
761 result
->highest_entry_end
= 0;
762 result
->first_free
= vm_map_to_entry(result
);
763 result
->hint
= vm_map_to_entry(result
);
764 result
->color_rr
= (color_seed
++) & vm_color_mask
;
765 result
->jit_entry_exists
= FALSE
;
767 result
->default_freezer_handle
= NULL
;
769 vm_map_lock_init(result
);
770 lck_mtx_init_ext(&result
->s_lock
, &result
->s_lock_ext
, &vm_map_lck_grp
, &vm_map_lck_attr
);
776 * vm_map_entry_create: [ internal use only ]
778 * Allocates a VM map entry for insertion in the
779 * given map (or map copy). No fields are filled.
781 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
783 #define vm_map_copy_entry_create(copy, map_locked) \
784 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
785 unsigned reserved_zalloc_count
, nonreserved_zalloc_count
;
787 static vm_map_entry_t
788 _vm_map_entry_create(
789 struct vm_map_header
*map_header
, boolean_t __unused map_locked
)
792 vm_map_entry_t entry
;
794 zone
= vm_map_entry_zone
;
796 assert(map_header
->entries_pageable
? !map_locked
: TRUE
);
798 if (map_header
->entries_pageable
) {
799 entry
= (vm_map_entry_t
) zalloc(zone
);
802 entry
= (vm_map_entry_t
) zalloc_canblock(zone
, FALSE
);
804 if (entry
== VM_MAP_ENTRY_NULL
) {
805 zone
= vm_map_entry_reserved_zone
;
806 entry
= (vm_map_entry_t
) zalloc(zone
);
807 OSAddAtomic(1, &reserved_zalloc_count
);
809 OSAddAtomic(1, &nonreserved_zalloc_count
);
812 if (entry
== VM_MAP_ENTRY_NULL
)
813 panic("vm_map_entry_create");
814 entry
->from_reserved_zone
= (zone
== vm_map_entry_reserved_zone
);
816 vm_map_store_update( (vm_map_t
) NULL
, entry
, VM_MAP_ENTRY_CREATE
);
817 #if MAP_ENTRY_CREATION_DEBUG
818 entry
->vme_creation_maphdr
= map_header
;
819 fastbacktrace(&entry
->vme_creation_bt
[0],
820 (sizeof(entry
->vme_creation_bt
)/sizeof(uintptr_t)));
826 * vm_map_entry_dispose: [ internal use only ]
828 * Inverse of vm_map_entry_create.
830 * write map lock held so no need to
831 * do anything special to insure correctness
834 #define vm_map_entry_dispose(map, entry) \
835 _vm_map_entry_dispose(&(map)->hdr, (entry))
837 #define vm_map_copy_entry_dispose(map, entry) \
838 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
841 _vm_map_entry_dispose(
842 register struct vm_map_header
*map_header
,
843 register vm_map_entry_t entry
)
845 register zone_t zone
;
847 if (map_header
->entries_pageable
|| !(entry
->from_reserved_zone
))
848 zone
= vm_map_entry_zone
;
850 zone
= vm_map_entry_reserved_zone
;
852 if (!map_header
->entries_pageable
) {
853 if (zone
== vm_map_entry_zone
)
854 OSAddAtomic(-1, &nonreserved_zalloc_count
);
856 OSAddAtomic(-1, &reserved_zalloc_count
);
863 static boolean_t first_free_check
= FALSE
;
868 if (!first_free_check
)
871 return( first_free_is_valid_store( map
));
873 #endif /* MACH_ASSERT */
876 #define vm_map_copy_entry_link(copy, after_where, entry) \
877 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
879 #define vm_map_copy_entry_unlink(copy, entry) \
880 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
882 #if MACH_ASSERT && TASK_SWAPPER
884 * vm_map_res_reference:
886 * Adds another valid residence count to the given map.
888 * Map is locked so this function can be called from
892 void vm_map_res_reference(register vm_map_t map
)
894 /* assert map is locked */
895 assert(map
->res_count
>= 0);
896 assert(map
->ref_count
>= map
->res_count
);
897 if (map
->res_count
== 0) {
898 lck_mtx_unlock(&map
->s_lock
);
901 lck_mtx_lock(&map
->s_lock
);
909 * vm_map_reference_swap:
911 * Adds valid reference and residence counts to the given map.
913 * The map may not be in memory (i.e. zero residence count).
916 void vm_map_reference_swap(register vm_map_t map
)
918 assert(map
!= VM_MAP_NULL
);
919 lck_mtx_lock(&map
->s_lock
);
920 assert(map
->res_count
>= 0);
921 assert(map
->ref_count
>= map
->res_count
);
923 vm_map_res_reference(map
);
924 lck_mtx_unlock(&map
->s_lock
);
928 * vm_map_res_deallocate:
930 * Decrement residence count on a map; possibly causing swapout.
932 * The map must be in memory (i.e. non-zero residence count).
934 * The map is locked, so this function is callable from vm_map_deallocate.
937 void vm_map_res_deallocate(register vm_map_t map
)
939 assert(map
->res_count
> 0);
940 if (--map
->res_count
== 0) {
941 lck_mtx_unlock(&map
->s_lock
);
945 lck_mtx_lock(&map
->s_lock
);
947 assert(map
->ref_count
>= map
->res_count
);
949 #endif /* MACH_ASSERT && TASK_SWAPPER */
954 * Actually destroy a map.
963 /* clean up regular map entries */
964 (void) vm_map_delete(map
, map
->min_offset
, map
->max_offset
,
966 /* clean up leftover special mappings (commpage, etc...) */
967 (void) vm_map_delete(map
, 0x0, 0xFFFFFFFFFFFFF000ULL
,
971 if (map
->default_freezer_handle
) {
972 default_freezer_handle_deallocate(map
->default_freezer_handle
);
973 map
->default_freezer_handle
= NULL
;
978 assert(map
->hdr
.nentries
== 0);
981 pmap_destroy(map
->pmap
);
983 zfree(vm_map_zone
, map
);
988 * vm_map_swapin/vm_map_swapout
990 * Swap a map in and out, either referencing or releasing its resources.
991 * These functions are internal use only; however, they must be exported
992 * because they may be called from macros, which are exported.
994 * In the case of swapout, there could be races on the residence count,
995 * so if the residence count is up, we return, assuming that a
996 * vm_map_deallocate() call in the near future will bring us back.
999 * -- We use the map write lock for synchronization among races.
1000 * -- The map write lock, and not the simple s_lock, protects the
1001 * swap state of the map.
1002 * -- If a map entry is a share map, then we hold both locks, in
1003 * hierarchical order.
1005 * Synchronization Notes:
1006 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1007 * will block on the map lock and proceed when swapout is through.
1008 * 2) A vm_map_reference() call at this time is illegal, and will
1009 * cause a panic. vm_map_reference() is only allowed on resident
1010 * maps, since it refuses to block.
1011 * 3) A vm_map_swapin() call during a swapin will block, and
1012 * proceeed when the first swapin is done, turning into a nop.
1013 * This is the reason the res_count is not incremented until
1014 * after the swapin is complete.
1015 * 4) There is a timing hole after the checks of the res_count, before
1016 * the map lock is taken, during which a swapin may get the lock
1017 * before a swapout about to happen. If this happens, the swapin
1018 * will detect the state and increment the reference count, causing
1019 * the swapout to be a nop, thereby delaying it until a later
1020 * vm_map_deallocate. If the swapout gets the lock first, then
1021 * the swapin will simply block until the swapout is done, and
1024 * Because vm_map_swapin() is potentially an expensive operation, it
1025 * should be used with caution.
1028 * 1) A map with a residence count of zero is either swapped, or
1030 * 2) A map with a non-zero residence count is either resident,
1031 * or being swapped in.
1034 int vm_map_swap_enable
= 1;
1036 void vm_map_swapin (vm_map_t map
)
1038 register vm_map_entry_t entry
;
1040 if (!vm_map_swap_enable
) /* debug */
1045 * First deal with various races.
1047 if (map
->sw_state
== MAP_SW_IN
)
1049 * we raced with swapout and won. Returning will incr.
1050 * the res_count, turning the swapout into a nop.
1055 * The residence count must be zero. If we raced with another
1056 * swapin, the state would have been IN; if we raced with a
1057 * swapout (after another competing swapin), we must have lost
1058 * the race to get here (see above comment), in which case
1059 * res_count is still 0.
1061 assert(map
->res_count
== 0);
1064 * There are no intermediate states of a map going out or
1065 * coming in, since the map is locked during the transition.
1067 assert(map
->sw_state
== MAP_SW_OUT
);
1070 * We now operate upon each map entry. If the entry is a sub-
1071 * or share-map, we call vm_map_res_reference upon it.
1072 * If the entry is an object, we call vm_object_res_reference
1073 * (this may iterate through the shadow chain).
1074 * Note that we hold the map locked the entire time,
1075 * even if we get back here via a recursive call in
1076 * vm_map_res_reference.
1078 entry
= vm_map_first_entry(map
);
1080 while (entry
!= vm_map_to_entry(map
)) {
1081 if (entry
->object
.vm_object
!= VM_OBJECT_NULL
) {
1082 if (entry
->is_sub_map
) {
1083 vm_map_t lmap
= entry
->object
.sub_map
;
1084 lck_mtx_lock(&lmap
->s_lock
);
1085 vm_map_res_reference(lmap
);
1086 lck_mtx_unlock(&lmap
->s_lock
);
1088 vm_object_t object
= entry
->object
.vm_object
;
1089 vm_object_lock(object
);
1091 * This call may iterate through the
1094 vm_object_res_reference(object
);
1095 vm_object_unlock(object
);
1098 entry
= entry
->vme_next
;
1100 assert(map
->sw_state
== MAP_SW_OUT
);
1101 map
->sw_state
= MAP_SW_IN
;
1104 void vm_map_swapout(vm_map_t map
)
1106 register vm_map_entry_t entry
;
1110 * First deal with various races.
1111 * If we raced with a swapin and lost, the residence count
1112 * will have been incremented to 1, and we simply return.
1114 lck_mtx_lock(&map
->s_lock
);
1115 if (map
->res_count
!= 0) {
1116 lck_mtx_unlock(&map
->s_lock
);
1119 lck_mtx_unlock(&map
->s_lock
);
1122 * There are no intermediate states of a map going out or
1123 * coming in, since the map is locked during the transition.
1125 assert(map
->sw_state
== MAP_SW_IN
);
1127 if (!vm_map_swap_enable
)
1131 * We now operate upon each map entry. If the entry is a sub-
1132 * or share-map, we call vm_map_res_deallocate upon it.
1133 * If the entry is an object, we call vm_object_res_deallocate
1134 * (this may iterate through the shadow chain).
1135 * Note that we hold the map locked the entire time,
1136 * even if we get back here via a recursive call in
1137 * vm_map_res_deallocate.
1139 entry
= vm_map_first_entry(map
);
1141 while (entry
!= vm_map_to_entry(map
)) {
1142 if (entry
->object
.vm_object
!= VM_OBJECT_NULL
) {
1143 if (entry
->is_sub_map
) {
1144 vm_map_t lmap
= entry
->object
.sub_map
;
1145 lck_mtx_lock(&lmap
->s_lock
);
1146 vm_map_res_deallocate(lmap
);
1147 lck_mtx_unlock(&lmap
->s_lock
);
1149 vm_object_t object
= entry
->object
.vm_object
;
1150 vm_object_lock(object
);
1152 * This call may take a long time,
1153 * since it could actively push
1154 * out pages (if we implement it
1157 vm_object_res_deallocate(object
);
1158 vm_object_unlock(object
);
1161 entry
= entry
->vme_next
;
1163 assert(map
->sw_state
== MAP_SW_IN
);
1164 map
->sw_state
= MAP_SW_OUT
;
1167 #endif /* TASK_SWAPPER */
1170 * vm_map_lookup_entry: [ internal use only ]
1172 * Calls into the vm map store layer to find the map
1173 * entry containing (or immediately preceding) the
1174 * specified address in the given map; the entry is returned
1175 * in the "entry" parameter. The boolean
1176 * result indicates whether the address is
1177 * actually contained in the map.
1180 vm_map_lookup_entry(
1181 register vm_map_t map
,
1182 register vm_map_offset_t address
,
1183 vm_map_entry_t
*entry
) /* OUT */
1185 return ( vm_map_store_lookup_entry( map
, address
, entry
));
1189 * Routine: vm_map_find_space
1191 * Allocate a range in the specified virtual address map,
1192 * returning the entry allocated for that range.
1193 * Used by kmem_alloc, etc.
1195 * The map must be NOT be locked. It will be returned locked
1196 * on KERN_SUCCESS, unlocked on failure.
1198 * If an entry is allocated, the object/offset fields
1199 * are initialized to zero.
1203 register vm_map_t map
,
1204 vm_map_offset_t
*address
, /* OUT */
1206 vm_map_offset_t mask
,
1208 vm_map_entry_t
*o_entry
) /* OUT */
1210 register vm_map_entry_t entry
, new_entry
;
1211 register vm_map_offset_t start
;
1212 register vm_map_offset_t end
;
1216 return KERN_INVALID_ARGUMENT
;
1219 if (flags
& VM_FLAGS_GUARD_AFTER
) {
1220 /* account for the back guard page in the size */
1221 size
+= VM_MAP_PAGE_SIZE(map
);
1224 new_entry
= vm_map_entry_create(map
, FALSE
);
1227 * Look for the first possible address; if there's already
1228 * something at this address, we have to start after it.
1233 if( map
->disable_vmentry_reuse
== TRUE
) {
1234 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
1236 assert(first_free_is_valid(map
));
1237 if ((entry
= map
->first_free
) == vm_map_to_entry(map
))
1238 start
= map
->min_offset
;
1240 start
= entry
->vme_end
;
1244 * In any case, the "entry" always precedes
1245 * the proposed new region throughout the loop:
1249 register vm_map_entry_t next
;
1252 * Find the end of the proposed new region.
1253 * Be sure we didn't go beyond the end, or
1254 * wrap around the address.
1257 if (flags
& VM_FLAGS_GUARD_BEFORE
) {
1258 /* reserve space for the front guard page */
1259 start
+= VM_MAP_PAGE_SIZE(map
);
1261 end
= ((start
+ mask
) & ~mask
);
1264 vm_map_entry_dispose(map
, new_entry
);
1266 return(KERN_NO_SPACE
);
1271 if ((end
> map
->max_offset
) || (end
< start
)) {
1272 vm_map_entry_dispose(map
, new_entry
);
1274 return(KERN_NO_SPACE
);
1278 * If there are no more entries, we must win.
1281 next
= entry
->vme_next
;
1282 if (next
== vm_map_to_entry(map
))
1286 * If there is another entry, it must be
1287 * after the end of the potential new region.
1290 if (next
->vme_start
>= end
)
1294 * Didn't fit -- move to the next entry.
1298 start
= entry
->vme_end
;
1303 * "start" and "end" should define the endpoints of the
1304 * available new range, and
1305 * "entry" should refer to the region before the new
1308 * the map should be locked.
1311 if (flags
& VM_FLAGS_GUARD_BEFORE
) {
1312 /* go back for the front guard page */
1313 start
-= VM_MAP_PAGE_SIZE(map
);
1317 assert(start
< end
);
1318 new_entry
->vme_start
= start
;
1319 new_entry
->vme_end
= end
;
1320 assert(page_aligned(new_entry
->vme_start
));
1321 assert(page_aligned(new_entry
->vme_end
));
1322 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_start
,
1323 VM_MAP_PAGE_MASK(map
)));
1324 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_end
,
1325 VM_MAP_PAGE_MASK(map
)));
1327 new_entry
->is_shared
= FALSE
;
1328 new_entry
->is_sub_map
= FALSE
;
1329 new_entry
->use_pmap
= FALSE
;
1330 new_entry
->object
.vm_object
= VM_OBJECT_NULL
;
1331 new_entry
->offset
= (vm_object_offset_t
) 0;
1333 new_entry
->needs_copy
= FALSE
;
1335 new_entry
->inheritance
= VM_INHERIT_DEFAULT
;
1336 new_entry
->protection
= VM_PROT_DEFAULT
;
1337 new_entry
->max_protection
= VM_PROT_ALL
;
1338 new_entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
1339 new_entry
->wired_count
= 0;
1340 new_entry
->user_wired_count
= 0;
1342 new_entry
->in_transition
= FALSE
;
1343 new_entry
->needs_wakeup
= FALSE
;
1344 new_entry
->no_cache
= FALSE
;
1345 new_entry
->permanent
= FALSE
;
1346 new_entry
->superpage_size
= FALSE
;
1347 if (VM_MAP_PAGE_SHIFT(map
) != PAGE_SHIFT
) {
1348 new_entry
->map_aligned
= TRUE
;
1350 new_entry
->map_aligned
= FALSE
;
1353 new_entry
->used_for_jit
= 0;
1355 new_entry
->alias
= 0;
1356 new_entry
->zero_wired_pages
= FALSE
;
1358 VM_GET_FLAGS_ALIAS(flags
, new_entry
->alias
);
1361 * Insert the new entry into the list
1364 vm_map_store_entry_link(map
, entry
, new_entry
);
1369 * Update the lookup hint
1371 SAVE_HINT_MAP_WRITE(map
, new_entry
);
1373 *o_entry
= new_entry
;
1374 return(KERN_SUCCESS
);
1377 int vm_map_pmap_enter_print
= FALSE
;
1378 int vm_map_pmap_enter_enable
= FALSE
;
1381 * Routine: vm_map_pmap_enter [internal only]
1384 * Force pages from the specified object to be entered into
1385 * the pmap at the specified address if they are present.
1386 * As soon as a page not found in the object the scan ends.
1391 * In/out conditions:
1392 * The source map should not be locked on entry.
1397 register vm_map_offset_t addr
,
1398 register vm_map_offset_t end_addr
,
1399 register vm_object_t object
,
1400 vm_object_offset_t offset
,
1401 vm_prot_t protection
)
1409 while (addr
< end_addr
) {
1410 register vm_page_t m
;
1412 vm_object_lock(object
);
1414 m
= vm_page_lookup(object
, offset
);
1417 * The user should never see encrypted data, so do not
1418 * enter an encrypted page in the page table.
1420 if (m
== VM_PAGE_NULL
|| m
->busy
|| m
->encrypted
||
1422 (m
->unusual
&& ( m
->error
|| m
->restart
|| m
->absent
))) {
1423 vm_object_unlock(object
);
1427 if (vm_map_pmap_enter_print
) {
1428 printf("vm_map_pmap_enter:");
1429 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1430 map
, (unsigned long long)addr
, object
, (unsigned long long)offset
);
1432 type_of_fault
= DBG_CACHE_HIT_FAULT
;
1433 kr
= vm_fault_enter(m
, map
->pmap
, addr
, protection
, protection
,
1434 VM_PAGE_WIRED(m
), FALSE
, FALSE
, FALSE
, NULL
,
1437 vm_object_unlock(object
);
1439 offset
+= PAGE_SIZE_64
;
1444 boolean_t
vm_map_pmap_is_empty(
1446 vm_map_offset_t start
,
1447 vm_map_offset_t end
);
1448 boolean_t
vm_map_pmap_is_empty(
1450 vm_map_offset_t start
,
1451 vm_map_offset_t end
)
1453 #ifdef MACHINE_PMAP_IS_EMPTY
1454 return pmap_is_empty(map
->pmap
, start
, end
);
1455 #else /* MACHINE_PMAP_IS_EMPTY */
1456 vm_map_offset_t offset
;
1459 if (map
->pmap
== NULL
) {
1463 for (offset
= start
;
1465 offset
+= PAGE_SIZE
) {
1466 phys_page
= pmap_find_phys(map
->pmap
, offset
);
1468 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1469 "page %d at 0x%llx\n",
1470 map
, (long long)start
, (long long)end
,
1471 phys_page
, (long long)offset
);
1476 #endif /* MACHINE_PMAP_IS_EMPTY */
1479 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1481 vm_map_random_address_for_size(
1483 vm_map_offset_t
*address
,
1486 kern_return_t kr
= KERN_SUCCESS
;
1488 vm_map_offset_t random_addr
= 0;
1489 vm_map_offset_t hole_end
;
1491 vm_map_entry_t next_entry
= VM_MAP_ENTRY_NULL
;
1492 vm_map_entry_t prev_entry
= VM_MAP_ENTRY_NULL
;
1493 vm_map_size_t vm_hole_size
= 0;
1494 vm_map_size_t addr_space_size
;
1496 addr_space_size
= vm_map_max(map
) - vm_map_min(map
);
1498 assert(page_aligned(size
));
1500 while (tries
< MAX_TRIES_TO_GET_RANDOM_ADDRESS
) {
1501 random_addr
= ((vm_map_offset_t
)random()) << PAGE_SHIFT
;
1502 random_addr
= vm_map_trunc_page(
1503 vm_map_min(map
) +(random_addr
% addr_space_size
),
1504 VM_MAP_PAGE_MASK(map
));
1506 if (vm_map_lookup_entry(map
, random_addr
, &prev_entry
) == FALSE
) {
1507 if (prev_entry
== vm_map_to_entry(map
)) {
1508 next_entry
= vm_map_first_entry(map
);
1510 next_entry
= prev_entry
->vme_next
;
1512 if (next_entry
== vm_map_to_entry(map
)) {
1513 hole_end
= vm_map_max(map
);
1515 hole_end
= next_entry
->vme_start
;
1517 vm_hole_size
= hole_end
- random_addr
;
1518 if (vm_hole_size
>= size
) {
1519 *address
= random_addr
;
1526 if (tries
== MAX_TRIES_TO_GET_RANDOM_ADDRESS
) {
1533 * Routine: vm_map_enter
1536 * Allocate a range in the specified virtual address map.
1537 * The resulting range will refer to memory defined by
1538 * the given memory object and offset into that object.
1540 * Arguments are as defined in the vm_map call.
1542 int _map_enter_debug
= 0;
1543 static unsigned int vm_map_enter_restore_successes
= 0;
1544 static unsigned int vm_map_enter_restore_failures
= 0;
1548 vm_map_offset_t
*address
, /* IN/OUT */
1550 vm_map_offset_t mask
,
1553 vm_object_offset_t offset
,
1554 boolean_t needs_copy
,
1555 vm_prot_t cur_protection
,
1556 vm_prot_t max_protection
,
1557 vm_inherit_t inheritance
)
1559 vm_map_entry_t entry
, new_entry
;
1560 vm_map_offset_t start
, tmp_start
, tmp_offset
;
1561 vm_map_offset_t end
, tmp_end
;
1562 vm_map_offset_t tmp2_start
, tmp2_end
;
1563 vm_map_offset_t step
;
1564 kern_return_t result
= KERN_SUCCESS
;
1565 vm_map_t zap_old_map
= VM_MAP_NULL
;
1566 vm_map_t zap_new_map
= VM_MAP_NULL
;
1567 boolean_t map_locked
= FALSE
;
1568 boolean_t pmap_empty
= TRUE
;
1569 boolean_t new_mapping_established
= FALSE
;
1570 boolean_t anywhere
= ((flags
& VM_FLAGS_ANYWHERE
) != 0);
1571 boolean_t purgable
= ((flags
& VM_FLAGS_PURGABLE
) != 0);
1572 boolean_t overwrite
= ((flags
& VM_FLAGS_OVERWRITE
) != 0);
1573 boolean_t no_cache
= ((flags
& VM_FLAGS_NO_CACHE
) != 0);
1574 boolean_t is_submap
= ((flags
& VM_FLAGS_SUBMAP
) != 0);
1575 boolean_t permanent
= ((flags
& VM_FLAGS_PERMANENT
) != 0);
1576 boolean_t entry_for_jit
= ((flags
& VM_FLAGS_MAP_JIT
) != 0);
1577 unsigned int superpage_size
= ((flags
& VM_FLAGS_SUPERPAGE_MASK
) >> VM_FLAGS_SUPERPAGE_SHIFT
);
1579 vm_map_offset_t effective_min_offset
, effective_max_offset
;
1581 boolean_t clear_map_aligned
= FALSE
;
1583 if (superpage_size
) {
1584 switch (superpage_size
) {
1586 * Note that the current implementation only supports
1587 * a single size for superpages, SUPERPAGE_SIZE, per
1588 * architecture. As soon as more sizes are supposed
1589 * to be supported, SUPERPAGE_SIZE has to be replaced
1590 * with a lookup of the size depending on superpage_size.
1593 case SUPERPAGE_SIZE_ANY
:
1594 /* handle it like 2 MB and round up to page size */
1595 size
= (size
+ 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1596 case SUPERPAGE_SIZE_2MB
:
1600 return KERN_INVALID_ARGUMENT
;
1602 mask
= SUPERPAGE_SIZE
-1;
1603 if (size
& (SUPERPAGE_SIZE
-1))
1604 return KERN_INVALID_ARGUMENT
;
1605 inheritance
= VM_INHERIT_NONE
; /* fork() children won't inherit superpages */
1612 /* submaps can not be purgeable */
1613 return KERN_INVALID_ARGUMENT
;
1615 if (object
== VM_OBJECT_NULL
) {
1616 /* submaps can not be created lazily */
1617 return KERN_INVALID_ARGUMENT
;
1620 if (flags
& VM_FLAGS_ALREADY
) {
1622 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1623 * is already present. For it to be meaningul, the requested
1624 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1625 * we shouldn't try and remove what was mapped there first
1626 * (!VM_FLAGS_OVERWRITE).
1628 if ((flags
& VM_FLAGS_ANYWHERE
) ||
1629 (flags
& VM_FLAGS_OVERWRITE
)) {
1630 return KERN_INVALID_ARGUMENT
;
1634 effective_min_offset
= map
->min_offset
;
1636 if (flags
& VM_FLAGS_BEYOND_MAX
) {
1638 * Allow an insertion beyond the map's max offset.
1640 if (vm_map_is_64bit(map
))
1641 effective_max_offset
= 0xFFFFFFFFFFFFF000ULL
;
1643 effective_max_offset
= 0x00000000FFFFF000ULL
;
1645 effective_max_offset
= map
->max_offset
;
1649 (offset
& PAGE_MASK_64
) != 0) {
1651 return KERN_INVALID_ARGUMENT
;
1654 VM_GET_FLAGS_ALIAS(flags
, alias
);
1656 #define RETURN(value) { result = value; goto BailOut; }
1658 assert(page_aligned(*address
));
1659 assert(page_aligned(size
));
1661 if (!VM_MAP_PAGE_ALIGNED(size
, VM_MAP_PAGE_MASK(map
))) {
1663 * In most cases, the caller rounds the size up to the
1665 * If we get a size that is explicitly not map-aligned here,
1666 * we'll have to respect the caller's wish and mark the
1667 * mapping as "not map-aligned" to avoid tripping the
1668 * map alignment checks later.
1670 clear_map_aligned
= TRUE
;
1674 * Only zero-fill objects are allowed to be purgable.
1675 * LP64todo - limit purgable objects to 32-bits for now
1679 (object
!= VM_OBJECT_NULL
&&
1680 (object
->vo_size
!= size
||
1681 object
->purgable
== VM_PURGABLE_DENY
))
1682 || size
> ANON_MAX_SIZE
)) /* LP64todo: remove when dp capable */
1683 return KERN_INVALID_ARGUMENT
;
1685 if (!anywhere
&& overwrite
) {
1687 * Create a temporary VM map to hold the old mappings in the
1688 * affected area while we create the new one.
1689 * This avoids releasing the VM map lock in
1690 * vm_map_entry_delete() and allows atomicity
1691 * when we want to replace some mappings with a new one.
1692 * It also allows us to restore the old VM mappings if the
1693 * new mapping fails.
1695 zap_old_map
= vm_map_create(PMAP_NULL
,
1698 map
->hdr
.entries_pageable
);
1699 vm_map_set_page_shift(zap_old_map
, VM_MAP_PAGE_SHIFT(map
));
1710 if (entry_for_jit
) {
1711 if (map
->jit_entry_exists
) {
1712 result
= KERN_INVALID_ARGUMENT
;
1716 * Get a random start address.
1718 result
= vm_map_random_address_for_size(map
, address
, size
);
1719 if (result
!= KERN_SUCCESS
) {
1727 * Calculate the first possible address.
1730 if (start
< effective_min_offset
)
1731 start
= effective_min_offset
;
1732 if (start
> effective_max_offset
)
1733 RETURN(KERN_NO_SPACE
);
1736 * Look for the first possible address;
1737 * if there's already something at this
1738 * address, we have to start after it.
1741 if( map
->disable_vmentry_reuse
== TRUE
) {
1742 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
1744 assert(first_free_is_valid(map
));
1746 entry
= map
->first_free
;
1748 if (entry
== vm_map_to_entry(map
)) {
1751 if (entry
->vme_next
== vm_map_to_entry(map
)){
1753 * Hole at the end of the map.
1757 if (start
< (entry
->vme_next
)->vme_start
) {
1758 start
= entry
->vme_end
;
1759 start
= vm_map_round_page(start
,
1760 VM_MAP_PAGE_MASK(map
));
1763 * Need to do a lookup.
1770 if (entry
== NULL
) {
1771 vm_map_entry_t tmp_entry
;
1772 if (vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
1773 assert(!entry_for_jit
);
1774 start
= tmp_entry
->vme_end
;
1775 start
= vm_map_round_page(start
,
1776 VM_MAP_PAGE_MASK(map
));
1783 * In any case, the "entry" always precedes
1784 * the proposed new region throughout the
1789 register vm_map_entry_t next
;
1792 * Find the end of the proposed new region.
1793 * Be sure we didn't go beyond the end, or
1794 * wrap around the address.
1797 end
= ((start
+ mask
) & ~mask
);
1798 end
= vm_map_round_page(end
,
1799 VM_MAP_PAGE_MASK(map
));
1801 RETURN(KERN_NO_SPACE
);
1803 assert(VM_MAP_PAGE_ALIGNED(start
,
1804 VM_MAP_PAGE_MASK(map
)));
1807 if ((end
> effective_max_offset
) || (end
< start
)) {
1808 if (map
->wait_for_space
) {
1809 if (size
<= (effective_max_offset
-
1810 effective_min_offset
)) {
1811 assert_wait((event_t
)map
,
1815 thread_block(THREAD_CONTINUE_NULL
);
1819 RETURN(KERN_NO_SPACE
);
1823 * If there are no more entries, we must win.
1826 next
= entry
->vme_next
;
1827 if (next
== vm_map_to_entry(map
))
1831 * If there is another entry, it must be
1832 * after the end of the potential new region.
1835 if (next
->vme_start
>= end
)
1839 * Didn't fit -- move to the next entry.
1843 start
= entry
->vme_end
;
1844 start
= vm_map_round_page(start
,
1845 VM_MAP_PAGE_MASK(map
));
1848 assert(VM_MAP_PAGE_ALIGNED(*address
,
1849 VM_MAP_PAGE_MASK(map
)));
1853 * the address doesn't itself violate
1854 * the mask requirement.
1859 if ((start
& mask
) != 0)
1860 RETURN(KERN_NO_SPACE
);
1863 * ... the address is within bounds
1868 if ((start
< effective_min_offset
) ||
1869 (end
> effective_max_offset
) ||
1871 RETURN(KERN_INVALID_ADDRESS
);
1874 if (overwrite
&& zap_old_map
!= VM_MAP_NULL
) {
1876 * Fixed mapping and "overwrite" flag: attempt to
1877 * remove all existing mappings in the specified
1878 * address range, saving them in our "zap_old_map".
1880 (void) vm_map_delete(map
, start
, end
,
1881 VM_MAP_REMOVE_SAVE_ENTRIES
,
1886 * ... the starting address isn't allocated
1889 if (vm_map_lookup_entry(map
, start
, &entry
)) {
1890 if (! (flags
& VM_FLAGS_ALREADY
)) {
1891 RETURN(KERN_NO_SPACE
);
1894 * Check if what's already there is what we want.
1897 tmp_offset
= offset
;
1898 if (entry
->vme_start
< start
) {
1899 tmp_start
-= start
- entry
->vme_start
;
1900 tmp_offset
-= start
- entry
->vme_start
;
1903 for (; entry
->vme_start
< end
;
1904 entry
= entry
->vme_next
) {
1906 * Check if the mapping's attributes
1907 * match the existing map entry.
1909 if (entry
== vm_map_to_entry(map
) ||
1910 entry
->vme_start
!= tmp_start
||
1911 entry
->is_sub_map
!= is_submap
||
1912 entry
->offset
!= tmp_offset
||
1913 entry
->needs_copy
!= needs_copy
||
1914 entry
->protection
!= cur_protection
||
1915 entry
->max_protection
!= max_protection
||
1916 entry
->inheritance
!= inheritance
||
1917 entry
->alias
!= alias
) {
1918 /* not the same mapping ! */
1919 RETURN(KERN_NO_SPACE
);
1922 * Check if the same object is being mapped.
1925 if (entry
->object
.sub_map
!=
1926 (vm_map_t
) object
) {
1927 /* not the same submap */
1928 RETURN(KERN_NO_SPACE
);
1931 if (entry
->object
.vm_object
!= object
) {
1932 /* not the same VM object... */
1935 obj2
= entry
->object
.vm_object
;
1936 if ((obj2
== VM_OBJECT_NULL
||
1938 (object
== VM_OBJECT_NULL
||
1939 object
->internal
)) {
1946 RETURN(KERN_NO_SPACE
);
1951 tmp_offset
+= entry
->vme_end
- entry
->vme_start
;
1952 tmp_start
+= entry
->vme_end
- entry
->vme_start
;
1953 if (entry
->vme_end
>= end
) {
1954 /* reached the end of our mapping */
1958 /* it all matches: let's use what's already there ! */
1959 RETURN(KERN_MEMORY_PRESENT
);
1963 * ... the next region doesn't overlap the
1967 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
1968 (entry
->vme_next
->vme_start
< end
))
1969 RETURN(KERN_NO_SPACE
);
1974 * "start" and "end" should define the endpoints of the
1975 * available new range, and
1976 * "entry" should refer to the region before the new
1979 * the map should be locked.
1983 * See whether we can avoid creating a new entry (and object) by
1984 * extending one of our neighbors. [So far, we only attempt to
1985 * extend from below.] Note that we can never extend/join
1986 * purgable objects because they need to remain distinct
1987 * entities in order to implement their "volatile object"
1991 if (purgable
|| entry_for_jit
) {
1992 if (object
== VM_OBJECT_NULL
) {
1993 object
= vm_object_allocate(size
);
1994 object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
1996 object
->purgable
= VM_PURGABLE_NONVOLATILE
;
1998 offset
= (vm_object_offset_t
)0;
2000 } else if ((is_submap
== FALSE
) &&
2001 (object
== VM_OBJECT_NULL
) &&
2002 (entry
!= vm_map_to_entry(map
)) &&
2003 (entry
->vme_end
== start
) &&
2004 (!entry
->is_shared
) &&
2005 (!entry
->is_sub_map
) &&
2006 ((alias
== VM_MEMORY_REALLOC
) || (entry
->alias
== alias
)) &&
2007 (entry
->inheritance
== inheritance
) &&
2008 (entry
->protection
== cur_protection
) &&
2009 (entry
->max_protection
== max_protection
) &&
2010 (entry
->behavior
== VM_BEHAVIOR_DEFAULT
) &&
2011 (entry
->in_transition
== 0) &&
2012 (entry
->no_cache
== no_cache
) &&
2014 * No coalescing if not map-aligned, to avoid propagating
2015 * that condition any further than needed:
2017 (!entry
->map_aligned
|| !clear_map_aligned
) &&
2018 ((entry
->vme_end
- entry
->vme_start
) + size
<=
2019 (alias
== VM_MEMORY_REALLOC
?
2021 NO_COALESCE_LIMIT
)) &&
2022 (entry
->wired_count
== 0)) { /* implies user_wired_count == 0 */
2023 if (vm_object_coalesce(entry
->object
.vm_object
,
2026 (vm_object_offset_t
) 0,
2027 (vm_map_size_t
)(entry
->vme_end
- entry
->vme_start
),
2028 (vm_map_size_t
)(end
- entry
->vme_end
))) {
2031 * Coalesced the two objects - can extend
2032 * the previous map entry to include the
2035 map
->size
+= (end
- entry
->vme_end
);
2036 assert(entry
->vme_start
< end
);
2037 assert(VM_MAP_PAGE_ALIGNED(end
,
2038 VM_MAP_PAGE_MASK(map
)));
2039 entry
->vme_end
= end
;
2040 vm_map_store_update_first_free(map
, map
->first_free
);
2041 RETURN(KERN_SUCCESS
);
2045 step
= superpage_size
? SUPERPAGE_SIZE
: (end
- start
);
2048 for (tmp2_start
= start
; tmp2_start
<end
; tmp2_start
+= step
) {
2049 tmp2_end
= tmp2_start
+ step
;
2051 * Create a new entry
2052 * LP64todo - for now, we can only allocate 4GB internal objects
2053 * because the default pager can't page bigger ones. Remove this
2057 * The reserved "page zero" in each process's address space can
2058 * be arbitrarily large. Splitting it into separate 4GB objects and
2059 * therefore different VM map entries serves no purpose and just
2060 * slows down operations on the VM map, so let's not split the
2061 * allocation into 4GB chunks if the max protection is NONE. That
2062 * memory should never be accessible, so it will never get to the
2065 tmp_start
= tmp2_start
;
2066 if (object
== VM_OBJECT_NULL
&&
2067 size
> (vm_map_size_t
)ANON_CHUNK_SIZE
&&
2068 max_protection
!= VM_PROT_NONE
&&
2069 superpage_size
== 0)
2070 tmp_end
= tmp_start
+ (vm_map_size_t
)ANON_CHUNK_SIZE
;
2074 new_entry
= vm_map_entry_insert(map
, entry
, tmp_start
, tmp_end
,
2075 object
, offset
, needs_copy
,
2077 cur_protection
, max_protection
,
2078 VM_BEHAVIOR_DEFAULT
,
2079 (entry_for_jit
)? VM_INHERIT_NONE
: inheritance
,
2084 new_entry
->alias
= alias
;
2086 if (!(map
->jit_entry_exists
)){
2087 new_entry
->used_for_jit
= TRUE
;
2088 map
->jit_entry_exists
= TRUE
;
2094 boolean_t submap_is_64bit
;
2097 new_entry
->is_sub_map
= TRUE
;
2098 submap
= (vm_map_t
) object
;
2099 submap_is_64bit
= vm_map_is_64bit(submap
);
2100 use_pmap
= (alias
== VM_MEMORY_SHARED_PMAP
);
2101 #ifndef NO_NESTED_PMAP
2102 if (use_pmap
&& submap
->pmap
== NULL
) {
2103 ledger_t ledger
= map
->pmap
->ledger
;
2104 /* we need a sub pmap to nest... */
2105 submap
->pmap
= pmap_create(ledger
, 0,
2107 if (submap
->pmap
== NULL
) {
2108 /* let's proceed without nesting... */
2111 if (use_pmap
&& submap
->pmap
!= NULL
) {
2112 kr
= pmap_nest(map
->pmap
,
2116 tmp_end
- tmp_start
);
2117 if (kr
!= KERN_SUCCESS
) {
2118 printf("vm_map_enter: "
2119 "pmap_nest(0x%llx,0x%llx) "
2121 (long long)tmp_start
,
2125 /* we're now nested ! */
2126 new_entry
->use_pmap
= TRUE
;
2130 #endif /* NO_NESTED_PMAP */
2134 if (superpage_size
) {
2136 vm_object_t sp_object
;
2140 /* allocate one superpage */
2141 kr
= cpm_allocate(SUPERPAGE_SIZE
, &pages
, 0, SUPERPAGE_NBASEPAGES
-1, TRUE
, 0);
2142 if (kr
!= KERN_SUCCESS
) {
2143 new_mapping_established
= TRUE
; /* will cause deallocation of whole range */
2147 /* create one vm_object per superpage */
2148 sp_object
= vm_object_allocate((vm_map_size_t
)(entry
->vme_end
- entry
->vme_start
));
2149 sp_object
->phys_contiguous
= TRUE
;
2150 sp_object
->vo_shadow_offset
= (vm_object_offset_t
)pages
->phys_page
*PAGE_SIZE
;
2151 entry
->object
.vm_object
= sp_object
;
2153 /* enter the base pages into the object */
2154 vm_object_lock(sp_object
);
2155 for (offset
= 0; offset
< SUPERPAGE_SIZE
; offset
+= PAGE_SIZE
) {
2157 pmap_zero_page(m
->phys_page
);
2158 pages
= NEXT_PAGE(m
);
2159 *(NEXT_PAGE_PTR(m
)) = VM_PAGE_NULL
;
2160 vm_page_insert(m
, sp_object
, offset
);
2162 vm_object_unlock(sp_object
);
2164 } while (tmp_end
!= tmp2_end
&&
2165 (tmp_start
= tmp_end
) &&
2166 (tmp_end
= (tmp2_end
- tmp_end
> (vm_map_size_t
)ANON_CHUNK_SIZE
) ?
2167 tmp_end
+ (vm_map_size_t
)ANON_CHUNK_SIZE
: tmp2_end
));
2173 new_mapping_established
= TRUE
;
2175 /* Wire down the new entry if the user
2176 * requested all new map entries be wired.
2178 if ((map
->wiring_required
)||(superpage_size
)) {
2179 pmap_empty
= FALSE
; /* pmap won't be empty */
2180 kr
= vm_map_wire(map
, start
, end
,
2181 new_entry
->protection
, TRUE
);
2185 if ((object
!= VM_OBJECT_NULL
) &&
2186 (vm_map_pmap_enter_enable
) &&
2189 (size
< (128*1024))) {
2190 pmap_empty
= FALSE
; /* pmap won't be empty */
2192 if (override_nx(map
, alias
) && cur_protection
)
2193 cur_protection
|= VM_PROT_EXECUTE
;
2195 vm_map_pmap_enter(map
, start
, end
,
2196 object
, offset
, cur_protection
);
2200 if (result
== KERN_SUCCESS
) {
2201 vm_prot_t pager_prot
;
2202 memory_object_t pager
;
2205 !(flags
& VM_FLAGS_NO_PMAP_CHECK
)) {
2206 assert(vm_map_pmap_is_empty(map
,
2212 * For "named" VM objects, let the pager know that the
2213 * memory object is being mapped. Some pagers need to keep
2214 * track of this, to know when they can reclaim the memory
2215 * object, for example.
2216 * VM calls memory_object_map() for each mapping (specifying
2217 * the protection of each mapping) and calls
2218 * memory_object_last_unmap() when all the mappings are gone.
2220 pager_prot
= max_protection
;
2223 * Copy-On-Write mapping: won't modify
2224 * the memory object.
2226 pager_prot
&= ~VM_PROT_WRITE
;
2229 object
!= VM_OBJECT_NULL
&&
2231 object
->pager
!= MEMORY_OBJECT_NULL
) {
2232 vm_object_lock(object
);
2233 pager
= object
->pager
;
2234 if (object
->named
&&
2235 pager
!= MEMORY_OBJECT_NULL
) {
2236 assert(object
->pager_ready
);
2237 vm_object_mapping_wait(object
, THREAD_UNINT
);
2238 vm_object_mapping_begin(object
);
2239 vm_object_unlock(object
);
2241 kr
= memory_object_map(pager
, pager_prot
);
2242 assert(kr
== KERN_SUCCESS
);
2244 vm_object_lock(object
);
2245 vm_object_mapping_end(object
);
2247 vm_object_unlock(object
);
2250 if (new_mapping_established
) {
2252 * We have to get rid of the new mappings since we
2253 * won't make them available to the user.
2254 * Try and do that atomically, to minimize the risk
2255 * that someone else create new mappings that range.
2257 zap_new_map
= vm_map_create(PMAP_NULL
,
2260 map
->hdr
.entries_pageable
);
2261 vm_map_set_page_shift(zap_new_map
,
2262 VM_MAP_PAGE_SHIFT(map
));
2267 (void) vm_map_delete(map
, *address
, *address
+size
,
2268 VM_MAP_REMOVE_SAVE_ENTRIES
,
2271 if (zap_old_map
!= VM_MAP_NULL
&&
2272 zap_old_map
->hdr
.nentries
!= 0) {
2273 vm_map_entry_t entry1
, entry2
;
2276 * The new mapping failed. Attempt to restore
2277 * the old mappings, saved in the "zap_old_map".
2284 /* first check if the coast is still clear */
2285 start
= vm_map_first_entry(zap_old_map
)->vme_start
;
2286 end
= vm_map_last_entry(zap_old_map
)->vme_end
;
2287 if (vm_map_lookup_entry(map
, start
, &entry1
) ||
2288 vm_map_lookup_entry(map
, end
, &entry2
) ||
2291 * Part of that range has already been
2292 * re-mapped: we can't restore the old
2295 vm_map_enter_restore_failures
++;
2298 * Transfer the saved map entries from
2299 * "zap_old_map" to the original "map",
2300 * inserting them all after "entry1".
2302 for (entry2
= vm_map_first_entry(zap_old_map
);
2303 entry2
!= vm_map_to_entry(zap_old_map
);
2304 entry2
= vm_map_first_entry(zap_old_map
)) {
2305 vm_map_size_t entry_size
;
2307 entry_size
= (entry2
->vme_end
-
2309 vm_map_store_entry_unlink(zap_old_map
,
2311 zap_old_map
->size
-= entry_size
;
2312 vm_map_store_entry_link(map
, entry1
, entry2
);
2313 map
->size
+= entry_size
;
2316 if (map
->wiring_required
) {
2318 * XXX TODO: we should rewire the
2322 vm_map_enter_restore_successes
++;
2332 * Get rid of the "zap_maps" and all the map entries that
2333 * they may still contain.
2335 if (zap_old_map
!= VM_MAP_NULL
) {
2336 vm_map_destroy(zap_old_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
2337 zap_old_map
= VM_MAP_NULL
;
2339 if (zap_new_map
!= VM_MAP_NULL
) {
2340 vm_map_destroy(zap_new_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
2341 zap_new_map
= VM_MAP_NULL
;
2350 vm_map_enter_mem_object(
2351 vm_map_t target_map
,
2352 vm_map_offset_t
*address
,
2353 vm_map_size_t initial_size
,
2354 vm_map_offset_t mask
,
2357 vm_object_offset_t offset
,
2359 vm_prot_t cur_protection
,
2360 vm_prot_t max_protection
,
2361 vm_inherit_t inheritance
)
2363 vm_map_address_t map_addr
;
2364 vm_map_size_t map_size
;
2366 vm_object_size_t size
;
2367 kern_return_t result
;
2368 boolean_t mask_cur_protection
, mask_max_protection
;
2369 vm_map_offset_t offset_in_mapping
;
2371 mask_cur_protection
= cur_protection
& VM_PROT_IS_MASK
;
2372 mask_max_protection
= max_protection
& VM_PROT_IS_MASK
;
2373 cur_protection
&= ~VM_PROT_IS_MASK
;
2374 max_protection
&= ~VM_PROT_IS_MASK
;
2377 * Check arguments for validity
2379 if ((target_map
== VM_MAP_NULL
) ||
2380 (cur_protection
& ~VM_PROT_ALL
) ||
2381 (max_protection
& ~VM_PROT_ALL
) ||
2382 (inheritance
> VM_INHERIT_LAST_VALID
) ||
2384 return KERN_INVALID_ARGUMENT
;
2386 map_addr
= vm_map_trunc_page(*address
,
2387 VM_MAP_PAGE_MASK(target_map
));
2388 map_size
= vm_map_round_page(initial_size
,
2389 VM_MAP_PAGE_MASK(target_map
));
2390 size
= vm_object_round_page(initial_size
);
2393 * Find the vm object (if any) corresponding to this port.
2395 if (!IP_VALID(port
)) {
2396 object
= VM_OBJECT_NULL
;
2399 } else if (ip_kotype(port
) == IKOT_NAMED_ENTRY
) {
2400 vm_named_entry_t named_entry
;
2402 named_entry
= (vm_named_entry_t
) port
->ip_kobject
;
2404 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
2405 offset
+= named_entry
->data_offset
;
2408 /* a few checks to make sure user is obeying rules */
2410 if (offset
>= named_entry
->size
)
2411 return KERN_INVALID_RIGHT
;
2412 size
= named_entry
->size
- offset
;
2414 if (mask_max_protection
) {
2415 max_protection
&= named_entry
->protection
;
2417 if (mask_cur_protection
) {
2418 cur_protection
&= named_entry
->protection
;
2420 if ((named_entry
->protection
& max_protection
) !=
2422 return KERN_INVALID_RIGHT
;
2423 if ((named_entry
->protection
& cur_protection
) !=
2425 return KERN_INVALID_RIGHT
;
2426 if (offset
+ size
< offset
) {
2428 return KERN_INVALID_ARGUMENT
;
2430 if (named_entry
->size
< (offset
+ size
))
2431 return KERN_INVALID_ARGUMENT
;
2433 if (named_entry
->is_copy
) {
2434 /* for a vm_map_copy, we can only map it whole */
2435 if ((size
!= named_entry
->size
) &&
2436 (vm_map_round_page(size
,
2437 VM_MAP_PAGE_MASK(target_map
)) ==
2438 named_entry
->size
)) {
2439 /* XXX FBDP use the rounded size... */
2440 size
= vm_map_round_page(
2442 VM_MAP_PAGE_MASK(target_map
));
2446 size
!= named_entry
->size
) {
2447 return KERN_INVALID_ARGUMENT
;
2451 /* the callers parameter offset is defined to be the */
2452 /* offset from beginning of named entry offset in object */
2453 offset
= offset
+ named_entry
->offset
;
2455 if (! VM_MAP_PAGE_ALIGNED(size
,
2456 VM_MAP_PAGE_MASK(target_map
))) {
2458 * Let's not map more than requested;
2459 * vm_map_enter() will handle this "not map-aligned"
2465 named_entry_lock(named_entry
);
2466 if (named_entry
->is_sub_map
) {
2469 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
2470 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2473 submap
= named_entry
->backing
.map
;
2474 vm_map_lock(submap
);
2475 vm_map_reference(submap
);
2476 vm_map_unlock(submap
);
2477 named_entry_unlock(named_entry
);
2479 result
= vm_map_enter(target_map
,
2483 flags
| VM_FLAGS_SUBMAP
,
2484 (vm_object_t
) submap
,
2490 if (result
!= KERN_SUCCESS
) {
2491 vm_map_deallocate(submap
);
2494 * No need to lock "submap" just to check its
2495 * "mapped" flag: that flag is never reset
2496 * once it's been set and if we race, we'll
2497 * just end up setting it twice, which is OK.
2499 if (submap
->mapped_in_other_pmaps
== FALSE
&&
2500 vm_map_pmap(submap
) != PMAP_NULL
&&
2501 vm_map_pmap(submap
) !=
2502 vm_map_pmap(target_map
)) {
2504 * This submap is being mapped in a map
2505 * that uses a different pmap.
2506 * Set its "mapped_in_other_pmaps" flag
2507 * to indicate that we now need to
2508 * remove mappings from all pmaps rather
2509 * than just the submap's pmap.
2511 vm_map_lock(submap
);
2512 submap
->mapped_in_other_pmaps
= TRUE
;
2513 vm_map_unlock(submap
);
2515 *address
= map_addr
;
2519 } else if (named_entry
->is_pager
) {
2520 unsigned int access
;
2521 vm_prot_t protections
;
2522 unsigned int wimg_mode
;
2524 protections
= named_entry
->protection
& VM_PROT_ALL
;
2525 access
= GET_MAP_MEM(named_entry
->protection
);
2527 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
2528 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2531 object
= vm_object_enter(named_entry
->backing
.pager
,
2533 named_entry
->internal
,
2536 if (object
== VM_OBJECT_NULL
) {
2537 named_entry_unlock(named_entry
);
2538 return KERN_INVALID_OBJECT
;
2541 /* JMM - drop reference on pager here */
2543 /* create an extra ref for the named entry */
2544 vm_object_lock(object
);
2545 vm_object_reference_locked(object
);
2546 named_entry
->backing
.object
= object
;
2547 named_entry
->is_pager
= FALSE
;
2548 named_entry_unlock(named_entry
);
2550 wimg_mode
= object
->wimg_bits
;
2552 if (access
== MAP_MEM_IO
) {
2553 wimg_mode
= VM_WIMG_IO
;
2554 } else if (access
== MAP_MEM_COPYBACK
) {
2555 wimg_mode
= VM_WIMG_USE_DEFAULT
;
2556 } else if (access
== MAP_MEM_INNERWBACK
) {
2557 wimg_mode
= VM_WIMG_INNERWBACK
;
2558 } else if (access
== MAP_MEM_WTHRU
) {
2559 wimg_mode
= VM_WIMG_WTHRU
;
2560 } else if (access
== MAP_MEM_WCOMB
) {
2561 wimg_mode
= VM_WIMG_WCOMB
;
2564 /* wait for object (if any) to be ready */
2565 if (!named_entry
->internal
) {
2566 while (!object
->pager_ready
) {
2569 VM_OBJECT_EVENT_PAGER_READY
,
2571 vm_object_lock(object
);
2575 if (object
->wimg_bits
!= wimg_mode
)
2576 vm_object_change_wimg_mode(object
, wimg_mode
);
2578 object
->true_share
= TRUE
;
2580 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
)
2581 object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
2582 vm_object_unlock(object
);
2584 } else if (named_entry
->is_copy
) {
2586 vm_map_copy_t copy_map
;
2587 vm_map_entry_t copy_entry
;
2588 vm_map_offset_t copy_addr
;
2590 if (flags
& ~(VM_FLAGS_FIXED
|
2592 VM_FLAGS_OVERWRITE
|
2593 VM_FLAGS_RETURN_DATA_ADDR
)) {
2594 named_entry_unlock(named_entry
);
2595 return KERN_INVALID_ARGUMENT
;
2598 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
2599 offset_in_mapping
= offset
- vm_object_trunc_page(offset
);
2600 offset
= vm_object_trunc_page(offset
);
2601 map_size
= vm_object_round_page(offset
+ offset_in_mapping
+ initial_size
) - offset
;
2604 copy_map
= named_entry
->backing
.copy
;
2605 assert(copy_map
->type
== VM_MAP_COPY_ENTRY_LIST
);
2606 if (copy_map
->type
!= VM_MAP_COPY_ENTRY_LIST
) {
2607 /* unsupported type; should not happen */
2608 printf("vm_map_enter_mem_object: "
2609 "memory_entry->backing.copy "
2610 "unsupported type 0x%x\n",
2612 named_entry_unlock(named_entry
);
2613 return KERN_INVALID_ARGUMENT
;
2616 /* reserve a contiguous range */
2617 kr
= vm_map_enter(target_map
,
2621 flags
& (VM_FLAGS_ANYWHERE
|
2622 VM_FLAGS_OVERWRITE
|
2623 VM_FLAGS_RETURN_DATA_ADDR
),
2630 if (kr
!= KERN_SUCCESS
) {
2631 named_entry_unlock(named_entry
);
2635 copy_addr
= map_addr
;
2637 for (copy_entry
= vm_map_copy_first_entry(copy_map
);
2638 copy_entry
!= vm_map_copy_to_entry(copy_map
);
2639 copy_entry
= copy_entry
->vme_next
) {
2640 int remap_flags
= 0;
2641 vm_map_t copy_submap
;
2642 vm_object_t copy_object
;
2643 vm_map_size_t copy_size
;
2644 vm_object_offset_t copy_offset
;
2646 copy_offset
= copy_entry
->offset
;
2647 copy_size
= (copy_entry
->vme_end
-
2648 copy_entry
->vme_start
);
2651 if (copy_addr
+ copy_size
>
2652 map_addr
+ map_size
) {
2653 /* over-mapping too much !? */
2654 kr
= KERN_INVALID_ARGUMENT
;
2659 /* take a reference on the object */
2660 if (copy_entry
->is_sub_map
) {
2661 remap_flags
|= VM_FLAGS_SUBMAP
;
2663 copy_entry
->object
.sub_map
;
2664 vm_map_lock(copy_submap
);
2665 vm_map_reference(copy_submap
);
2666 vm_map_unlock(copy_submap
);
2667 copy_object
= (vm_object_t
) copy_submap
;
2670 copy_entry
->object
.vm_object
;
2671 vm_object_reference(copy_object
);
2674 /* over-map the object into destination */
2675 remap_flags
|= flags
;
2676 remap_flags
|= VM_FLAGS_FIXED
;
2677 remap_flags
|= VM_FLAGS_OVERWRITE
;
2678 remap_flags
&= ~VM_FLAGS_ANYWHERE
;
2679 kr
= vm_map_enter(target_map
,
2682 (vm_map_offset_t
) 0,
2690 if (kr
!= KERN_SUCCESS
) {
2691 if (copy_entry
->is_sub_map
) {
2692 vm_map_deallocate(copy_submap
);
2694 vm_object_deallocate(copy_object
);
2701 copy_addr
+= copy_size
;
2704 if (kr
== KERN_SUCCESS
) {
2705 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
2706 *address
= map_addr
+ offset_in_mapping
;
2708 *address
= map_addr
;
2711 named_entry_unlock(named_entry
);
2713 if (kr
!= KERN_SUCCESS
) {
2714 if (! (flags
& VM_FLAGS_OVERWRITE
)) {
2715 /* deallocate the contiguous range */
2716 (void) vm_deallocate(target_map
,
2725 /* This is the case where we are going to map */
2726 /* an already mapped object. If the object is */
2727 /* not ready it is internal. An external */
2728 /* object cannot be mapped until it is ready */
2729 /* we can therefore avoid the ready check */
2731 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
2732 offset_in_mapping
= offset
- vm_object_trunc_page(offset
);
2733 offset
= vm_object_trunc_page(offset
);
2734 map_size
= vm_object_round_page(offset
+ offset_in_mapping
+ initial_size
) - offset
;
2737 object
= named_entry
->backing
.object
;
2738 assert(object
!= VM_OBJECT_NULL
);
2739 named_entry_unlock(named_entry
);
2740 vm_object_reference(object
);
2742 } else if (ip_kotype(port
) == IKOT_MEMORY_OBJECT
) {
2744 * JMM - This is temporary until we unify named entries
2745 * and raw memory objects.
2747 * Detected fake ip_kotype for a memory object. In
2748 * this case, the port isn't really a port at all, but
2749 * instead is just a raw memory object.
2751 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
2752 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
2755 object
= vm_object_enter((memory_object_t
)port
,
2756 size
, FALSE
, FALSE
, FALSE
);
2757 if (object
== VM_OBJECT_NULL
)
2758 return KERN_INVALID_OBJECT
;
2760 /* wait for object (if any) to be ready */
2761 if (object
!= VM_OBJECT_NULL
) {
2762 if (object
== kernel_object
) {
2763 printf("Warning: Attempt to map kernel object"
2764 " by a non-private kernel entity\n");
2765 return KERN_INVALID_OBJECT
;
2767 if (!object
->pager_ready
) {
2768 vm_object_lock(object
);
2770 while (!object
->pager_ready
) {
2771 vm_object_wait(object
,
2772 VM_OBJECT_EVENT_PAGER_READY
,
2774 vm_object_lock(object
);
2776 vm_object_unlock(object
);
2780 return KERN_INVALID_OBJECT
;
2783 if (object
!= VM_OBJECT_NULL
&&
2785 object
->pager
!= MEMORY_OBJECT_NULL
&&
2786 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
2787 memory_object_t pager
;
2788 vm_prot_t pager_prot
;
2792 * For "named" VM objects, let the pager know that the
2793 * memory object is being mapped. Some pagers need to keep
2794 * track of this, to know when they can reclaim the memory
2795 * object, for example.
2796 * VM calls memory_object_map() for each mapping (specifying
2797 * the protection of each mapping) and calls
2798 * memory_object_last_unmap() when all the mappings are gone.
2800 pager_prot
= max_protection
;
2803 * Copy-On-Write mapping: won't modify the
2806 pager_prot
&= ~VM_PROT_WRITE
;
2808 vm_object_lock(object
);
2809 pager
= object
->pager
;
2810 if (object
->named
&&
2811 pager
!= MEMORY_OBJECT_NULL
&&
2812 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
2813 assert(object
->pager_ready
);
2814 vm_object_mapping_wait(object
, THREAD_UNINT
);
2815 vm_object_mapping_begin(object
);
2816 vm_object_unlock(object
);
2818 kr
= memory_object_map(pager
, pager_prot
);
2819 assert(kr
== KERN_SUCCESS
);
2821 vm_object_lock(object
);
2822 vm_object_mapping_end(object
);
2824 vm_object_unlock(object
);
2828 * Perform the copy if requested
2832 vm_object_t new_object
;
2833 vm_object_offset_t new_offset
;
2835 result
= vm_object_copy_strategically(object
, offset
, size
,
2836 &new_object
, &new_offset
,
2840 if (result
== KERN_MEMORY_RESTART_COPY
) {
2842 boolean_t src_needs_copy
;
2846 * We currently ignore src_needs_copy.
2847 * This really is the issue of how to make
2848 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2849 * non-kernel users to use. Solution forthcoming.
2850 * In the meantime, since we don't allow non-kernel
2851 * memory managers to specify symmetric copy,
2852 * we won't run into problems here.
2854 new_object
= object
;
2855 new_offset
= offset
;
2856 success
= vm_object_copy_quickly(&new_object
,
2861 result
= KERN_SUCCESS
;
2864 * Throw away the reference to the
2865 * original object, as it won't be mapped.
2868 vm_object_deallocate(object
);
2870 if (result
!= KERN_SUCCESS
)
2873 object
= new_object
;
2874 offset
= new_offset
;
2877 result
= vm_map_enter(target_map
,
2878 &map_addr
, map_size
,
2879 (vm_map_offset_t
)mask
,
2883 cur_protection
, max_protection
, inheritance
);
2884 if (result
!= KERN_SUCCESS
)
2885 vm_object_deallocate(object
);
2887 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
2888 *address
= map_addr
+ offset_in_mapping
;
2890 *address
= map_addr
;
2899 vm_map_enter_mem_object_control(
2900 vm_map_t target_map
,
2901 vm_map_offset_t
*address
,
2902 vm_map_size_t initial_size
,
2903 vm_map_offset_t mask
,
2905 memory_object_control_t control
,
2906 vm_object_offset_t offset
,
2908 vm_prot_t cur_protection
,
2909 vm_prot_t max_protection
,
2910 vm_inherit_t inheritance
)
2912 vm_map_address_t map_addr
;
2913 vm_map_size_t map_size
;
2915 vm_object_size_t size
;
2916 kern_return_t result
;
2917 memory_object_t pager
;
2918 vm_prot_t pager_prot
;
2922 * Check arguments for validity
2924 if ((target_map
== VM_MAP_NULL
) ||
2925 (cur_protection
& ~VM_PROT_ALL
) ||
2926 (max_protection
& ~VM_PROT_ALL
) ||
2927 (inheritance
> VM_INHERIT_LAST_VALID
) ||
2929 return KERN_INVALID_ARGUMENT
;
2931 map_addr
= vm_map_trunc_page(*address
,
2932 VM_MAP_PAGE_MASK(target_map
));
2933 map_size
= vm_map_round_page(initial_size
,
2934 VM_MAP_PAGE_MASK(target_map
));
2935 size
= vm_object_round_page(initial_size
);
2937 object
= memory_object_control_to_vm_object(control
);
2939 if (object
== VM_OBJECT_NULL
)
2940 return KERN_INVALID_OBJECT
;
2942 if (object
== kernel_object
) {
2943 printf("Warning: Attempt to map kernel object"
2944 " by a non-private kernel entity\n");
2945 return KERN_INVALID_OBJECT
;
2948 vm_object_lock(object
);
2949 object
->ref_count
++;
2950 vm_object_res_reference(object
);
2953 * For "named" VM objects, let the pager know that the
2954 * memory object is being mapped. Some pagers need to keep
2955 * track of this, to know when they can reclaim the memory
2956 * object, for example.
2957 * VM calls memory_object_map() for each mapping (specifying
2958 * the protection of each mapping) and calls
2959 * memory_object_last_unmap() when all the mappings are gone.
2961 pager_prot
= max_protection
;
2963 pager_prot
&= ~VM_PROT_WRITE
;
2965 pager
= object
->pager
;
2966 if (object
->named
&&
2967 pager
!= MEMORY_OBJECT_NULL
&&
2968 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
2969 assert(object
->pager_ready
);
2970 vm_object_mapping_wait(object
, THREAD_UNINT
);
2971 vm_object_mapping_begin(object
);
2972 vm_object_unlock(object
);
2974 kr
= memory_object_map(pager
, pager_prot
);
2975 assert(kr
== KERN_SUCCESS
);
2977 vm_object_lock(object
);
2978 vm_object_mapping_end(object
);
2980 vm_object_unlock(object
);
2983 * Perform the copy if requested
2987 vm_object_t new_object
;
2988 vm_object_offset_t new_offset
;
2990 result
= vm_object_copy_strategically(object
, offset
, size
,
2991 &new_object
, &new_offset
,
2995 if (result
== KERN_MEMORY_RESTART_COPY
) {
2997 boolean_t src_needs_copy
;
3001 * We currently ignore src_needs_copy.
3002 * This really is the issue of how to make
3003 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3004 * non-kernel users to use. Solution forthcoming.
3005 * In the meantime, since we don't allow non-kernel
3006 * memory managers to specify symmetric copy,
3007 * we won't run into problems here.
3009 new_object
= object
;
3010 new_offset
= offset
;
3011 success
= vm_object_copy_quickly(&new_object
,
3016 result
= KERN_SUCCESS
;
3019 * Throw away the reference to the
3020 * original object, as it won't be mapped.
3023 vm_object_deallocate(object
);
3025 if (result
!= KERN_SUCCESS
)
3028 object
= new_object
;
3029 offset
= new_offset
;
3032 result
= vm_map_enter(target_map
,
3033 &map_addr
, map_size
,
3034 (vm_map_offset_t
)mask
,
3038 cur_protection
, max_protection
, inheritance
);
3039 if (result
!= KERN_SUCCESS
)
3040 vm_object_deallocate(object
);
3041 *address
= map_addr
;
3050 extern pmap_paddr_t avail_start
, avail_end
;
3054 * Allocate memory in the specified map, with the caveat that
3055 * the memory is physically contiguous. This call may fail
3056 * if the system can't find sufficient contiguous memory.
3057 * This call may cause or lead to heart-stopping amounts of
3060 * Memory obtained from this call should be freed in the
3061 * normal way, viz., via vm_deallocate.
3066 vm_map_offset_t
*addr
,
3070 vm_object_t cpm_obj
;
3074 vm_map_offset_t va
, start
, end
, offset
;
3076 vm_map_offset_t prev_addr
= 0;
3077 #endif /* MACH_ASSERT */
3079 boolean_t anywhere
= ((VM_FLAGS_ANYWHERE
& flags
) != 0);
3083 return KERN_SUCCESS
;
3086 *addr
= vm_map_min(map
);
3088 *addr
= vm_map_trunc_page(*addr
,
3089 VM_MAP_PAGE_MASK(map
));
3090 size
= vm_map_round_page(size
,
3091 VM_MAP_PAGE_MASK(map
));
3094 * LP64todo - cpm_allocate should probably allow
3095 * allocations of >4GB, but not with the current
3096 * algorithm, so just cast down the size for now.
3098 if (size
> VM_MAX_ADDRESS
)
3099 return KERN_RESOURCE_SHORTAGE
;
3100 if ((kr
= cpm_allocate(CAST_DOWN(vm_size_t
, size
),
3101 &pages
, 0, 0, TRUE
, flags
)) != KERN_SUCCESS
)
3104 cpm_obj
= vm_object_allocate((vm_object_size_t
)size
);
3105 assert(cpm_obj
!= VM_OBJECT_NULL
);
3106 assert(cpm_obj
->internal
);
3107 assert(cpm_obj
->vo_size
== (vm_object_size_t
)size
);
3108 assert(cpm_obj
->can_persist
== FALSE
);
3109 assert(cpm_obj
->pager_created
== FALSE
);
3110 assert(cpm_obj
->pageout
== FALSE
);
3111 assert(cpm_obj
->shadow
== VM_OBJECT_NULL
);
3114 * Insert pages into object.
3117 vm_object_lock(cpm_obj
);
3118 for (offset
= 0; offset
< size
; offset
+= PAGE_SIZE
) {
3120 pages
= NEXT_PAGE(m
);
3121 *(NEXT_PAGE_PTR(m
)) = VM_PAGE_NULL
;
3123 assert(!m
->gobbled
);
3125 assert(!m
->pageout
);
3127 assert(VM_PAGE_WIRED(m
));
3130 * "m" is not supposed to be pageable, so it
3131 * should not be encrypted. It wouldn't be safe
3132 * to enter it in a new VM object while encrypted.
3134 ASSERT_PAGE_DECRYPTED(m
);
3136 assert(m
->phys_page
>=(avail_start
>>PAGE_SHIFT
) && m
->phys_page
<=(avail_end
>>PAGE_SHIFT
));
3139 vm_page_insert(m
, cpm_obj
, offset
);
3141 assert(cpm_obj
->resident_page_count
== size
/ PAGE_SIZE
);
3142 vm_object_unlock(cpm_obj
);
3145 * Hang onto a reference on the object in case a
3146 * multi-threaded application for some reason decides
3147 * to deallocate the portion of the address space into
3148 * which we will insert this object.
3150 * Unfortunately, we must insert the object now before
3151 * we can talk to the pmap module about which addresses
3152 * must be wired down. Hence, the race with a multi-
3155 vm_object_reference(cpm_obj
);
3158 * Insert object into map.
3168 (vm_object_offset_t
)0,
3172 VM_INHERIT_DEFAULT
);
3174 if (kr
!= KERN_SUCCESS
) {
3176 * A CPM object doesn't have can_persist set,
3177 * so all we have to do is deallocate it to
3178 * free up these pages.
3180 assert(cpm_obj
->pager_created
== FALSE
);
3181 assert(cpm_obj
->can_persist
== FALSE
);
3182 assert(cpm_obj
->pageout
== FALSE
);
3183 assert(cpm_obj
->shadow
== VM_OBJECT_NULL
);
3184 vm_object_deallocate(cpm_obj
); /* kill acquired ref */
3185 vm_object_deallocate(cpm_obj
); /* kill creation ref */
3189 * Inform the physical mapping system that the
3190 * range of addresses may not fault, so that
3191 * page tables and such can be locked down as well.
3195 pmap
= vm_map_pmap(map
);
3196 pmap_pageable(pmap
, start
, end
, FALSE
);
3199 * Enter each page into the pmap, to avoid faults.
3200 * Note that this loop could be coded more efficiently,
3201 * if the need arose, rather than looking up each page
3204 for (offset
= 0, va
= start
; offset
< size
;
3205 va
+= PAGE_SIZE
, offset
+= PAGE_SIZE
) {
3208 vm_object_lock(cpm_obj
);
3209 m
= vm_page_lookup(cpm_obj
, (vm_object_offset_t
)offset
);
3210 assert(m
!= VM_PAGE_NULL
);
3212 vm_page_zero_fill(m
);
3214 type_of_fault
= DBG_ZERO_FILL_FAULT
;
3216 vm_fault_enter(m
, pmap
, va
, VM_PROT_ALL
, VM_PROT_WRITE
,
3217 VM_PAGE_WIRED(m
), FALSE
, FALSE
, FALSE
, NULL
,
3220 vm_object_unlock(cpm_obj
);
3225 * Verify ordering in address space.
3227 for (offset
= 0; offset
< size
; offset
+= PAGE_SIZE
) {
3228 vm_object_lock(cpm_obj
);
3229 m
= vm_page_lookup(cpm_obj
, (vm_object_offset_t
)offset
);
3230 vm_object_unlock(cpm_obj
);
3231 if (m
== VM_PAGE_NULL
)
3232 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
3233 cpm_obj
, (uint64_t)offset
);
3237 assert(!m
->fictitious
);
3238 assert(!m
->private);
3241 assert(!m
->cleaning
);
3242 assert(!m
->laundry
);
3243 assert(!m
->precious
);
3244 assert(!m
->clustered
);
3246 if (m
->phys_page
!= prev_addr
+ 1) {
3247 printf("start 0x%llx end 0x%llx va 0x%llx\n",
3248 (uint64_t)start
, (uint64_t)end
, (uint64_t)va
);
3249 printf("obj %p off 0x%llx\n", cpm_obj
, (uint64_t)offset
);
3250 printf("m %p prev_address 0x%llx\n", m
, (uint64_t)prev_addr
);
3251 panic("vm_allocate_cpm: pages not contig!");
3254 prev_addr
= m
->phys_page
;
3256 #endif /* MACH_ASSERT */
3258 vm_object_deallocate(cpm_obj
); /* kill extra ref */
3267 * Interface is defined in all cases, but unless the kernel
3268 * is built explicitly for this option, the interface does
3274 __unused vm_map_t map
,
3275 __unused vm_map_offset_t
*addr
,
3276 __unused vm_map_size_t size
,
3279 return KERN_FAILURE
;
3283 /* Not used without nested pmaps */
3284 #ifndef NO_NESTED_PMAP
3286 * Clip and unnest a portion of a nested submap mapping.
3293 vm_map_entry_t entry
,
3294 vm_map_offset_t start_unnest
,
3295 vm_map_offset_t end_unnest
)
3297 vm_map_offset_t old_start_unnest
= start_unnest
;
3298 vm_map_offset_t old_end_unnest
= end_unnest
;
3300 assert(entry
->is_sub_map
);
3301 assert(entry
->object
.sub_map
!= NULL
);
3304 * Query the platform for the optimal unnest range.
3305 * DRK: There's some duplication of effort here, since
3306 * callers may have adjusted the range to some extent. This
3307 * routine was introduced to support 1GiB subtree nesting
3308 * for x86 platforms, which can also nest on 2MiB boundaries
3309 * depending on size/alignment.
3311 if (pmap_adjust_unnest_parameters(map
->pmap
, &start_unnest
, &end_unnest
)) {
3312 log_unnest_badness(map
, old_start_unnest
, old_end_unnest
);
3315 if (entry
->vme_start
> start_unnest
||
3316 entry
->vme_end
< end_unnest
) {
3317 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3318 "bad nested entry: start=0x%llx end=0x%llx\n",
3319 (long long)start_unnest
, (long long)end_unnest
,
3320 (long long)entry
->vme_start
, (long long)entry
->vme_end
);
3323 if (start_unnest
> entry
->vme_start
) {
3324 _vm_map_clip_start(&map
->hdr
,
3327 vm_map_store_update_first_free(map
, map
->first_free
);
3329 if (entry
->vme_end
> end_unnest
) {
3330 _vm_map_clip_end(&map
->hdr
,
3333 vm_map_store_update_first_free(map
, map
->first_free
);
3336 pmap_unnest(map
->pmap
,
3338 entry
->vme_end
- entry
->vme_start
);
3339 if ((map
->mapped_in_other_pmaps
) && (map
->ref_count
)) {
3340 /* clean up parent map/maps */
3341 vm_map_submap_pmap_clean(
3342 map
, entry
->vme_start
,
3344 entry
->object
.sub_map
,
3347 entry
->use_pmap
= FALSE
;
3348 if (entry
->alias
== VM_MEMORY_SHARED_PMAP
) {
3349 entry
->alias
= VM_MEMORY_UNSHARED_PMAP
;
3352 #endif /* NO_NESTED_PMAP */
3355 * vm_map_clip_start: [ internal use only ]
3357 * Asserts that the given entry begins at or after
3358 * the specified address; if necessary,
3359 * it splits the entry into two.
3364 vm_map_entry_t entry
,
3365 vm_map_offset_t startaddr
)
3367 #ifndef NO_NESTED_PMAP
3368 if (entry
->use_pmap
&&
3369 startaddr
>= entry
->vme_start
) {
3370 vm_map_offset_t start_unnest
, end_unnest
;
3373 * Make sure "startaddr" is no longer in a nested range
3374 * before we clip. Unnest only the minimum range the platform
3376 * vm_map_clip_unnest may perform additional adjustments to
3379 start_unnest
= startaddr
& ~(pmap_nesting_size_min
- 1);
3380 end_unnest
= start_unnest
+ pmap_nesting_size_min
;
3381 vm_map_clip_unnest(map
, entry
, start_unnest
, end_unnest
);
3383 #endif /* NO_NESTED_PMAP */
3384 if (startaddr
> entry
->vme_start
) {
3385 if (entry
->object
.vm_object
&&
3386 !entry
->is_sub_map
&&
3387 entry
->object
.vm_object
->phys_contiguous
) {
3388 pmap_remove(map
->pmap
,
3389 (addr64_t
)(entry
->vme_start
),
3390 (addr64_t
)(entry
->vme_end
));
3392 _vm_map_clip_start(&map
->hdr
, entry
, startaddr
);
3393 vm_map_store_update_first_free(map
, map
->first_free
);
3398 #define vm_map_copy_clip_start(copy, entry, startaddr) \
3400 if ((startaddr) > (entry)->vme_start) \
3401 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3405 * This routine is called only when it is known that
3406 * the entry must be split.
3410 register struct vm_map_header
*map_header
,
3411 register vm_map_entry_t entry
,
3412 register vm_map_offset_t start
)
3414 register vm_map_entry_t new_entry
;
3417 * Split off the front portion --
3418 * note that we must insert the new
3419 * entry BEFORE this one, so that
3420 * this entry has the specified starting
3424 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
3425 vm_map_entry_copy_full(new_entry
, entry
);
3427 assert(VM_MAP_PAGE_ALIGNED(start
,
3428 VM_MAP_HDR_PAGE_MASK(map_header
)));
3429 new_entry
->vme_end
= start
;
3430 assert(new_entry
->vme_start
< new_entry
->vme_end
);
3431 entry
->offset
+= (start
- entry
->vme_start
);
3432 assert(start
< entry
->vme_end
);
3433 assert(VM_MAP_PAGE_ALIGNED(start
,
3434 VM_MAP_HDR_PAGE_MASK(map_header
)));
3435 entry
->vme_start
= start
;
3437 _vm_map_store_entry_link(map_header
, entry
->vme_prev
, new_entry
);
3439 if (entry
->is_sub_map
)
3440 vm_map_reference(new_entry
->object
.sub_map
);
3442 vm_object_reference(new_entry
->object
.vm_object
);
3447 * vm_map_clip_end: [ internal use only ]
3449 * Asserts that the given entry ends at or before
3450 * the specified address; if necessary,
3451 * it splits the entry into two.
3456 vm_map_entry_t entry
,
3457 vm_map_offset_t endaddr
)
3459 if (endaddr
> entry
->vme_end
) {
3461 * Within the scope of this clipping, limit "endaddr" to
3462 * the end of this map entry...
3464 endaddr
= entry
->vme_end
;
3466 #ifndef NO_NESTED_PMAP
3467 if (entry
->use_pmap
) {
3468 vm_map_offset_t start_unnest
, end_unnest
;
3471 * Make sure the range between the start of this entry and
3472 * the new "endaddr" is no longer nested before we clip.
3473 * Unnest only the minimum range the platform can handle.
3474 * vm_map_clip_unnest may perform additional adjustments to
3477 start_unnest
= entry
->vme_start
;
3479 (endaddr
+ pmap_nesting_size_min
- 1) &
3480 ~(pmap_nesting_size_min
- 1);
3481 vm_map_clip_unnest(map
, entry
, start_unnest
, end_unnest
);
3483 #endif /* NO_NESTED_PMAP */
3484 if (endaddr
< entry
->vme_end
) {
3485 if (entry
->object
.vm_object
&&
3486 !entry
->is_sub_map
&&
3487 entry
->object
.vm_object
->phys_contiguous
) {
3488 pmap_remove(map
->pmap
,
3489 (addr64_t
)(entry
->vme_start
),
3490 (addr64_t
)(entry
->vme_end
));
3492 _vm_map_clip_end(&map
->hdr
, entry
, endaddr
);
3493 vm_map_store_update_first_free(map
, map
->first_free
);
3498 #define vm_map_copy_clip_end(copy, entry, endaddr) \
3500 if ((endaddr) < (entry)->vme_end) \
3501 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3505 * This routine is called only when it is known that
3506 * the entry must be split.
3510 register struct vm_map_header
*map_header
,
3511 register vm_map_entry_t entry
,
3512 register vm_map_offset_t end
)
3514 register vm_map_entry_t new_entry
;
3517 * Create a new entry and insert it
3518 * AFTER the specified entry
3521 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
3522 vm_map_entry_copy_full(new_entry
, entry
);
3524 assert(entry
->vme_start
< end
);
3525 assert(VM_MAP_PAGE_ALIGNED(end
,
3526 VM_MAP_HDR_PAGE_MASK(map_header
)));
3527 new_entry
->vme_start
= entry
->vme_end
= end
;
3528 new_entry
->offset
+= (end
- entry
->vme_start
);
3529 assert(new_entry
->vme_start
< new_entry
->vme_end
);
3531 _vm_map_store_entry_link(map_header
, entry
, new_entry
);
3533 if (entry
->is_sub_map
)
3534 vm_map_reference(new_entry
->object
.sub_map
);
3536 vm_object_reference(new_entry
->object
.vm_object
);
3541 * VM_MAP_RANGE_CHECK: [ internal use only ]
3543 * Asserts that the starting and ending region
3544 * addresses fall within the valid range of the map.
3546 #define VM_MAP_RANGE_CHECK(map, start, end) \
3548 if (start < vm_map_min(map)) \
3549 start = vm_map_min(map); \
3550 if (end > vm_map_max(map)) \
3551 end = vm_map_max(map); \
3557 * vm_map_range_check: [ internal use only ]
3559 * Check that the region defined by the specified start and
3560 * end addresses are wholly contained within a single map
3561 * entry or set of adjacent map entries of the spacified map,
3562 * i.e. the specified region contains no unmapped space.
3563 * If any or all of the region is unmapped, FALSE is returned.
3564 * Otherwise, TRUE is returned and if the output argument 'entry'
3565 * is not NULL it points to the map entry containing the start
3568 * The map is locked for reading on entry and is left locked.
3572 register vm_map_t map
,
3573 register vm_map_offset_t start
,
3574 register vm_map_offset_t end
,
3575 vm_map_entry_t
*entry
)
3578 register vm_map_offset_t prev
;
3581 * Basic sanity checks first
3583 if (start
< vm_map_min(map
) || end
> vm_map_max(map
) || start
> end
)
3587 * Check first if the region starts within a valid
3588 * mapping for the map.
3590 if (!vm_map_lookup_entry(map
, start
, &cur
))
3594 * Optimize for the case that the region is contained
3595 * in a single map entry.
3597 if (entry
!= (vm_map_entry_t
*) NULL
)
3599 if (end
<= cur
->vme_end
)
3603 * If the region is not wholly contained within a
3604 * single entry, walk the entries looking for holes.
3606 prev
= cur
->vme_end
;
3607 cur
= cur
->vme_next
;
3608 while ((cur
!= vm_map_to_entry(map
)) && (prev
== cur
->vme_start
)) {
3609 if (end
<= cur
->vme_end
)
3611 prev
= cur
->vme_end
;
3612 cur
= cur
->vme_next
;
3618 * vm_map_submap: [ kernel use only ]
3620 * Mark the given range as handled by a subordinate map.
3622 * This range must have been created with vm_map_find using
3623 * the vm_submap_object, and no other operations may have been
3624 * performed on this range prior to calling vm_map_submap.
3626 * Only a limited number of operations can be performed
3627 * within this rage after calling vm_map_submap:
3629 * [Don't try vm_map_copyin!]
3631 * To remove a submapping, one must first remove the
3632 * range from the superior map, and then destroy the
3633 * submap (if desired). [Better yet, don't try it.]
3638 vm_map_offset_t start
,
3639 vm_map_offset_t end
,
3641 vm_map_offset_t offset
,
3642 #ifdef NO_NESTED_PMAP
3644 #endif /* NO_NESTED_PMAP */
3647 vm_map_entry_t entry
;
3648 register kern_return_t result
= KERN_INVALID_ARGUMENT
;
3649 register vm_object_t object
;
3653 if (! vm_map_lookup_entry(map
, start
, &entry
)) {
3654 entry
= entry
->vme_next
;
3657 if (entry
== vm_map_to_entry(map
) ||
3658 entry
->is_sub_map
) {
3660 return KERN_INVALID_ARGUMENT
;
3663 assert(!entry
->use_pmap
); /* we don't want to unnest anything here */
3664 vm_map_clip_start(map
, entry
, start
);
3665 vm_map_clip_end(map
, entry
, end
);
3667 if ((entry
->vme_start
== start
) && (entry
->vme_end
== end
) &&
3668 (!entry
->is_sub_map
) &&
3669 ((object
= entry
->object
.vm_object
) == vm_submap_object
) &&
3670 (object
->resident_page_count
== 0) &&
3671 (object
->copy
== VM_OBJECT_NULL
) &&
3672 (object
->shadow
== VM_OBJECT_NULL
) &&
3673 (!object
->pager_created
)) {
3674 entry
->offset
= (vm_object_offset_t
)offset
;
3675 entry
->object
.vm_object
= VM_OBJECT_NULL
;
3676 vm_object_deallocate(object
);
3677 entry
->is_sub_map
= TRUE
;
3678 entry
->object
.sub_map
= submap
;
3679 vm_map_reference(submap
);
3680 if (submap
->mapped_in_other_pmaps
== FALSE
&&
3681 vm_map_pmap(submap
) != PMAP_NULL
&&
3682 vm_map_pmap(submap
) != vm_map_pmap(map
)) {
3684 * This submap is being mapped in a map
3685 * that uses a different pmap.
3686 * Set its "mapped_in_other_pmaps" flag
3687 * to indicate that we now need to
3688 * remove mappings from all pmaps rather
3689 * than just the submap's pmap.
3691 submap
->mapped_in_other_pmaps
= TRUE
;
3694 #ifndef NO_NESTED_PMAP
3696 /* nest if platform code will allow */
3697 if(submap
->pmap
== NULL
) {
3698 ledger_t ledger
= map
->pmap
->ledger
;
3699 submap
->pmap
= pmap_create(ledger
,
3700 (vm_map_size_t
) 0, FALSE
);
3701 if(submap
->pmap
== PMAP_NULL
) {
3703 return(KERN_NO_SPACE
);
3706 result
= pmap_nest(map
->pmap
,
3707 (entry
->object
.sub_map
)->pmap
,
3710 (uint64_t)(end
- start
));
3712 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result
);
3713 entry
->use_pmap
= TRUE
;
3715 #else /* NO_NESTED_PMAP */
3716 pmap_remove(map
->pmap
, (addr64_t
)start
, (addr64_t
)end
);
3717 #endif /* NO_NESTED_PMAP */
3718 result
= KERN_SUCCESS
;
3728 * Sets the protection of the specified address
3729 * region in the target map. If "set_max" is
3730 * specified, the maximum protection is to be set;
3731 * otherwise, only the current protection is affected.
3735 register vm_map_t map
,
3736 register vm_map_offset_t start
,
3737 register vm_map_offset_t end
,
3738 register vm_prot_t new_prot
,
3739 register boolean_t set_max
)
3741 register vm_map_entry_t current
;
3742 register vm_map_offset_t prev
;
3743 vm_map_entry_t entry
;
3747 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3748 map
, start
, end
, new_prot
, set_max
);
3752 /* LP64todo - remove this check when vm_map_commpage64()
3753 * no longer has to stuff in a map_entry for the commpage
3754 * above the map's max_offset.
3756 if (start
>= map
->max_offset
) {
3758 return(KERN_INVALID_ADDRESS
);
3763 * Lookup the entry. If it doesn't start in a valid
3764 * entry, return an error.
3766 if (! vm_map_lookup_entry(map
, start
, &entry
)) {
3768 return(KERN_INVALID_ADDRESS
);
3771 if (entry
->superpage_size
&& (start
& (SUPERPAGE_SIZE
-1))) { /* extend request to whole entry */
3772 start
= SUPERPAGE_ROUND_DOWN(start
);
3777 if (entry
->superpage_size
)
3778 end
= SUPERPAGE_ROUND_UP(end
);
3781 * Make a first pass to check for protection and address
3786 prev
= current
->vme_start
;
3787 while ((current
!= vm_map_to_entry(map
)) &&
3788 (current
->vme_start
< end
)) {
3791 * If there is a hole, return an error.
3793 if (current
->vme_start
!= prev
) {
3795 return(KERN_INVALID_ADDRESS
);
3798 new_max
= current
->max_protection
;
3799 if(new_prot
& VM_PROT_COPY
) {
3800 new_max
|= VM_PROT_WRITE
;
3801 if ((new_prot
& (new_max
| VM_PROT_COPY
)) != new_prot
) {
3803 return(KERN_PROTECTION_FAILURE
);
3806 if ((new_prot
& new_max
) != new_prot
) {
3808 return(KERN_PROTECTION_FAILURE
);
3813 prev
= current
->vme_end
;
3814 current
= current
->vme_next
;
3818 return(KERN_INVALID_ADDRESS
);
3822 * Go back and fix up protections.
3823 * Clip to start here if the range starts within
3828 if (current
!= vm_map_to_entry(map
)) {
3829 /* clip and unnest if necessary */
3830 vm_map_clip_start(map
, current
, start
);
3833 while ((current
!= vm_map_to_entry(map
)) &&
3834 (current
->vme_start
< end
)) {
3838 vm_map_clip_end(map
, current
, end
);
3840 assert(!current
->use_pmap
); /* clipping did unnest if needed */
3842 old_prot
= current
->protection
;
3844 if(new_prot
& VM_PROT_COPY
) {
3845 /* caller is asking specifically to copy the */
3846 /* mapped data, this implies that max protection */
3847 /* will include write. Caller must be prepared */
3848 /* for loss of shared memory communication in the */
3849 /* target area after taking this step */
3851 if (current
->is_sub_map
== FALSE
&& current
->object
.vm_object
== VM_OBJECT_NULL
){
3852 current
->object
.vm_object
= vm_object_allocate((vm_map_size_t
)(current
->vme_end
- current
->vme_start
));
3853 current
->offset
= 0;
3855 current
->needs_copy
= TRUE
;
3856 current
->max_protection
|= VM_PROT_WRITE
;
3860 current
->protection
=
3861 (current
->max_protection
=
3862 new_prot
& ~VM_PROT_COPY
) &
3865 current
->protection
= new_prot
& ~VM_PROT_COPY
;
3868 * Update physical map if necessary.
3869 * If the request is to turn off write protection,
3870 * we won't do it for real (in pmap). This is because
3871 * it would cause copy-on-write to fail. We've already
3872 * set, the new protection in the map, so if a
3873 * write-protect fault occurred, it will be fixed up
3874 * properly, COW or not.
3876 if (current
->protection
!= old_prot
) {
3877 /* Look one level in we support nested pmaps */
3878 /* from mapped submaps which are direct entries */
3883 prot
= current
->protection
& ~VM_PROT_WRITE
;
3885 if (override_nx(map
, current
->alias
) && prot
)
3886 prot
|= VM_PROT_EXECUTE
;
3888 if (current
->is_sub_map
&& current
->use_pmap
) {
3889 pmap_protect(current
->object
.sub_map
->pmap
,
3894 pmap_protect(map
->pmap
,
3900 current
= current
->vme_next
;
3904 while ((current
!= vm_map_to_entry(map
)) &&
3905 (current
->vme_start
<= end
)) {
3906 vm_map_simplify_entry(map
, current
);
3907 current
= current
->vme_next
;
3911 return(KERN_SUCCESS
);
3917 * Sets the inheritance of the specified address
3918 * range in the target map. Inheritance
3919 * affects how the map will be shared with
3920 * child maps at the time of vm_map_fork.
3924 register vm_map_t map
,
3925 register vm_map_offset_t start
,
3926 register vm_map_offset_t end
,
3927 register vm_inherit_t new_inheritance
)
3929 register vm_map_entry_t entry
;
3930 vm_map_entry_t temp_entry
;
3934 VM_MAP_RANGE_CHECK(map
, start
, end
);
3936 if (vm_map_lookup_entry(map
, start
, &temp_entry
)) {
3940 temp_entry
= temp_entry
->vme_next
;
3944 /* first check entire range for submaps which can't support the */
3945 /* given inheritance. */
3946 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
3947 if(entry
->is_sub_map
) {
3948 if(new_inheritance
== VM_INHERIT_COPY
) {
3950 return(KERN_INVALID_ARGUMENT
);
3954 entry
= entry
->vme_next
;
3958 if (entry
!= vm_map_to_entry(map
)) {
3959 /* clip and unnest if necessary */
3960 vm_map_clip_start(map
, entry
, start
);
3963 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
3964 vm_map_clip_end(map
, entry
, end
);
3965 assert(!entry
->use_pmap
); /* clip did unnest if needed */
3967 entry
->inheritance
= new_inheritance
;
3969 entry
= entry
->vme_next
;
3973 return(KERN_SUCCESS
);
3977 * Update the accounting for the amount of wired memory in this map. If the user has
3978 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
3981 static kern_return_t
3984 vm_map_entry_t entry
,
3985 boolean_t user_wire
)
3990 unsigned int total_wire_count
= vm_page_wire_count
+ vm_lopage_free_count
;
3993 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
3997 if (entry
->user_wired_count
== 0) {
3998 size
= entry
->vme_end
- entry
->vme_start
;
4001 * Since this is the first time the user is wiring this map entry, check to see if we're
4002 * exceeding the user wire limits. There is a per map limit which is the smaller of either
4003 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
4004 * a system-wide limit on the amount of memory all users can wire. If the user is over either
4005 * limit, then we fail.
4008 if(size
+ map
->user_wire_size
> MIN(map
->user_wire_limit
, vm_user_wire_limit
) ||
4009 size
+ ptoa_64(total_wire_count
) > vm_global_user_wire_limit
||
4010 size
+ ptoa_64(total_wire_count
) > max_mem
- vm_global_no_user_wire_amount
)
4011 return KERN_RESOURCE_SHORTAGE
;
4014 * The first time the user wires an entry, we also increment the wired_count and add this to
4015 * the total that has been wired in the map.
4018 if (entry
->wired_count
>= MAX_WIRE_COUNT
)
4019 return KERN_FAILURE
;
4021 entry
->wired_count
++;
4022 map
->user_wire_size
+= size
;
4025 if (entry
->user_wired_count
>= MAX_WIRE_COUNT
)
4026 return KERN_FAILURE
;
4028 entry
->user_wired_count
++;
4033 * The kernel's wiring the memory. Just bump the count and continue.
4036 if (entry
->wired_count
>= MAX_WIRE_COUNT
)
4037 panic("vm_map_wire: too many wirings");
4039 entry
->wired_count
++;
4042 return KERN_SUCCESS
;
4046 * Update the memory wiring accounting now that the given map entry is being unwired.
4050 subtract_wire_counts(
4052 vm_map_entry_t entry
,
4053 boolean_t user_wire
)
4059 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
4062 if (entry
->user_wired_count
== 1) {
4065 * We're removing the last user wire reference. Decrement the wired_count and the total
4066 * user wired memory for this map.
4069 assert(entry
->wired_count
>= 1);
4070 entry
->wired_count
--;
4071 map
->user_wire_size
-= entry
->vme_end
- entry
->vme_start
;
4074 assert(entry
->user_wired_count
>= 1);
4075 entry
->user_wired_count
--;
4080 * The kernel is unwiring the memory. Just update the count.
4083 assert(entry
->wired_count
>= 1);
4084 entry
->wired_count
--;
4091 * Sets the pageability of the specified address range in the
4092 * target map as wired. Regions specified as not pageable require
4093 * locked-down physical memory and physical page maps. The
4094 * access_type variable indicates types of accesses that must not
4095 * generate page faults. This is checked against protection of
4096 * memory being locked-down.
4098 * The map must not be locked, but a reference must remain to the
4099 * map throughout the call.
4101 static kern_return_t
4103 register vm_map_t map
,
4104 register vm_map_offset_t start
,
4105 register vm_map_offset_t end
,
4106 register vm_prot_t access_type
,
4107 boolean_t user_wire
,
4109 vm_map_offset_t pmap_addr
)
4111 register vm_map_entry_t entry
;
4112 struct vm_map_entry
*first_entry
, tmp_entry
;
4114 register vm_map_offset_t s
,e
;
4116 boolean_t need_wakeup
;
4117 boolean_t main_map
= FALSE
;
4118 wait_interrupt_t interruptible_state
;
4119 thread_t cur_thread
;
4120 unsigned int last_timestamp
;
4124 if(map_pmap
== NULL
)
4126 last_timestamp
= map
->timestamp
;
4128 VM_MAP_RANGE_CHECK(map
, start
, end
);
4129 assert(page_aligned(start
));
4130 assert(page_aligned(end
));
4131 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
4132 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
4134 /* We wired what the caller asked for, zero pages */
4136 return KERN_SUCCESS
;
4139 need_wakeup
= FALSE
;
4140 cur_thread
= current_thread();
4145 if (vm_map_lookup_entry(map
, s
, &first_entry
)) {
4146 entry
= first_entry
;
4148 * vm_map_clip_start will be done later.
4149 * We don't want to unnest any nested submaps here !
4152 /* Start address is not in map */
4153 rc
= KERN_INVALID_ADDRESS
;
4157 while ((entry
!= vm_map_to_entry(map
)) && (s
< end
)) {
4159 * At this point, we have wired from "start" to "s".
4160 * We still need to wire from "s" to "end".
4162 * "entry" hasn't been clipped, so it could start before "s"
4163 * and/or end after "end".
4166 /* "e" is how far we want to wire in this entry */
4172 * If another thread is wiring/unwiring this entry then
4173 * block after informing other thread to wake us up.
4175 if (entry
->in_transition
) {
4176 wait_result_t wait_result
;
4179 * We have not clipped the entry. Make sure that
4180 * the start address is in range so that the lookup
4181 * below will succeed.
4182 * "s" is the current starting point: we've already
4183 * wired from "start" to "s" and we still have
4184 * to wire from "s" to "end".
4187 entry
->needs_wakeup
= TRUE
;
4190 * wake up anybody waiting on entries that we have
4194 vm_map_entry_wakeup(map
);
4195 need_wakeup
= FALSE
;
4198 * User wiring is interruptible
4200 wait_result
= vm_map_entry_wait(map
,
4201 (user_wire
) ? THREAD_ABORTSAFE
:
4203 if (user_wire
&& wait_result
== THREAD_INTERRUPTED
) {
4205 * undo the wirings we have done so far
4206 * We do not clear the needs_wakeup flag,
4207 * because we cannot tell if we were the
4215 * Cannot avoid a lookup here. reset timestamp.
4217 last_timestamp
= map
->timestamp
;
4220 * The entry could have been clipped, look it up again.
4221 * Worse that can happen is, it may not exist anymore.
4223 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
4225 * User: undo everything upto the previous
4226 * entry. let vm_map_unwire worry about
4227 * checking the validity of the range.
4232 entry
= first_entry
;
4236 if (entry
->is_sub_map
) {
4237 vm_map_offset_t sub_start
;
4238 vm_map_offset_t sub_end
;
4239 vm_map_offset_t local_start
;
4240 vm_map_offset_t local_end
;
4243 vm_map_clip_start(map
, entry
, s
);
4244 vm_map_clip_end(map
, entry
, end
);
4246 sub_start
= entry
->offset
;
4247 sub_end
= entry
->vme_end
;
4248 sub_end
+= entry
->offset
- entry
->vme_start
;
4250 local_end
= entry
->vme_end
;
4251 if(map_pmap
== NULL
) {
4253 vm_object_offset_t offset
;
4256 vm_map_entry_t local_entry
;
4257 vm_map_version_t version
;
4258 vm_map_t lookup_map
;
4260 if(entry
->use_pmap
) {
4261 pmap
= entry
->object
.sub_map
->pmap
;
4262 /* ppc implementation requires that */
4263 /* submaps pmap address ranges line */
4264 /* up with parent map */
4266 pmap_addr
= sub_start
;
4274 if (entry
->wired_count
) {
4275 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
)
4279 * The map was not unlocked:
4280 * no need to goto re-lookup.
4281 * Just go directly to next entry.
4283 entry
= entry
->vme_next
;
4284 s
= entry
->vme_start
;
4289 /* call vm_map_lookup_locked to */
4290 /* cause any needs copy to be */
4292 local_start
= entry
->vme_start
;
4294 vm_map_lock_write_to_read(map
);
4295 if(vm_map_lookup_locked(
4296 &lookup_map
, local_start
,
4298 OBJECT_LOCK_EXCLUSIVE
,
4300 &offset
, &prot
, &wired
,
4304 vm_map_unlock_read(lookup_map
);
4305 vm_map_unwire(map
, start
,
4307 return(KERN_FAILURE
);
4309 vm_object_unlock(object
);
4310 if(real_map
!= lookup_map
)
4311 vm_map_unlock(real_map
);
4312 vm_map_unlock_read(lookup_map
);
4315 /* we unlocked, so must re-lookup */
4316 if (!vm_map_lookup_entry(map
,
4324 * entry could have been "simplified",
4327 entry
= local_entry
;
4328 assert(s
== local_start
);
4329 vm_map_clip_start(map
, entry
, s
);
4330 vm_map_clip_end(map
, entry
, end
);
4331 /* re-compute "e" */
4336 /* did we have a change of type? */
4337 if (!entry
->is_sub_map
) {
4338 last_timestamp
= map
->timestamp
;
4342 local_start
= entry
->vme_start
;
4346 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
)
4349 entry
->in_transition
= TRUE
;
4352 rc
= vm_map_wire_nested(entry
->object
.sub_map
,
4355 user_wire
, pmap
, pmap_addr
);
4359 * Find the entry again. It could have been clipped
4360 * after we unlocked the map.
4362 if (!vm_map_lookup_entry(map
, local_start
,
4364 panic("vm_map_wire: re-lookup failed");
4365 entry
= first_entry
;
4367 assert(local_start
== s
);
4368 /* re-compute "e" */
4373 last_timestamp
= map
->timestamp
;
4374 while ((entry
!= vm_map_to_entry(map
)) &&
4375 (entry
->vme_start
< e
)) {
4376 assert(entry
->in_transition
);
4377 entry
->in_transition
= FALSE
;
4378 if (entry
->needs_wakeup
) {
4379 entry
->needs_wakeup
= FALSE
;
4382 if (rc
!= KERN_SUCCESS
) {/* from vm_*_wire */
4383 subtract_wire_counts(map
, entry
, user_wire
);
4385 entry
= entry
->vme_next
;
4387 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
4391 /* no need to relookup again */
4392 s
= entry
->vme_start
;
4397 * If this entry is already wired then increment
4398 * the appropriate wire reference count.
4400 if (entry
->wired_count
) {
4402 * entry is already wired down, get our reference
4403 * after clipping to our range.
4405 vm_map_clip_start(map
, entry
, s
);
4406 vm_map_clip_end(map
, entry
, end
);
4408 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
)
4411 /* map was not unlocked: no need to relookup */
4412 entry
= entry
->vme_next
;
4413 s
= entry
->vme_start
;
4418 * Unwired entry or wire request transmitted via submap
4423 * Perform actions of vm_map_lookup that need the write
4424 * lock on the map: create a shadow object for a
4425 * copy-on-write region, or an object for a zero-fill
4428 size
= entry
->vme_end
- entry
->vme_start
;
4430 * If wiring a copy-on-write page, we need to copy it now
4431 * even if we're only (currently) requesting read access.
4432 * This is aggressive, but once it's wired we can't move it.
4434 if (entry
->needs_copy
) {
4435 vm_object_shadow(&entry
->object
.vm_object
,
4436 &entry
->offset
, size
);
4437 entry
->needs_copy
= FALSE
;
4438 } else if (entry
->object
.vm_object
== VM_OBJECT_NULL
) {
4439 entry
->object
.vm_object
= vm_object_allocate(size
);
4440 entry
->offset
= (vm_object_offset_t
)0;
4443 vm_map_clip_start(map
, entry
, s
);
4444 vm_map_clip_end(map
, entry
, end
);
4446 /* re-compute "e" */
4452 * Check for holes and protection mismatch.
4453 * Holes: Next entry should be contiguous unless this
4454 * is the end of the region.
4455 * Protection: Access requested must be allowed, unless
4456 * wiring is by protection class
4458 if ((entry
->vme_end
< end
) &&
4459 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
4460 (entry
->vme_next
->vme_start
> entry
->vme_end
))) {
4462 rc
= KERN_INVALID_ADDRESS
;
4465 if ((entry
->protection
& access_type
) != access_type
) {
4466 /* found a protection problem */
4467 rc
= KERN_PROTECTION_FAILURE
;
4471 assert(entry
->wired_count
== 0 && entry
->user_wired_count
== 0);
4473 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
)
4476 entry
->in_transition
= TRUE
;
4479 * This entry might get split once we unlock the map.
4480 * In vm_fault_wire(), we need the current range as
4481 * defined by this entry. In order for this to work
4482 * along with a simultaneous clip operation, we make a
4483 * temporary copy of this entry and use that for the
4484 * wiring. Note that the underlying objects do not
4485 * change during a clip.
4490 * The in_transition state guarentees that the entry
4491 * (or entries for this range, if split occured) will be
4492 * there when the map lock is acquired for the second time.
4496 if (!user_wire
&& cur_thread
!= THREAD_NULL
)
4497 interruptible_state
= thread_interrupt_level(THREAD_UNINT
);
4499 interruptible_state
= THREAD_UNINT
;
4502 rc
= vm_fault_wire(map
,
4503 &tmp_entry
, map_pmap
, pmap_addr
);
4505 rc
= vm_fault_wire(map
,
4506 &tmp_entry
, map
->pmap
,
4507 tmp_entry
.vme_start
);
4509 if (!user_wire
&& cur_thread
!= THREAD_NULL
)
4510 thread_interrupt_level(interruptible_state
);
4514 if (last_timestamp
+1 != map
->timestamp
) {
4516 * Find the entry again. It could have been clipped
4517 * after we unlocked the map.
4519 if (!vm_map_lookup_entry(map
, tmp_entry
.vme_start
,
4521 panic("vm_map_wire: re-lookup failed");
4523 entry
= first_entry
;
4526 last_timestamp
= map
->timestamp
;
4528 while ((entry
!= vm_map_to_entry(map
)) &&
4529 (entry
->vme_start
< tmp_entry
.vme_end
)) {
4530 assert(entry
->in_transition
);
4531 entry
->in_transition
= FALSE
;
4532 if (entry
->needs_wakeup
) {
4533 entry
->needs_wakeup
= FALSE
;
4536 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
4537 subtract_wire_counts(map
, entry
, user_wire
);
4539 entry
= entry
->vme_next
;
4542 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
4546 s
= entry
->vme_start
;
4547 } /* end while loop through map entries */
4550 if (rc
== KERN_SUCCESS
) {
4551 /* repair any damage we may have made to the VM map */
4552 vm_map_simplify_range(map
, start
, end
);
4558 * wake up anybody waiting on entries we wired.
4561 vm_map_entry_wakeup(map
);
4563 if (rc
!= KERN_SUCCESS
) {
4564 /* undo what has been wired so far */
4565 vm_map_unwire(map
, start
, s
, user_wire
);
4574 register vm_map_t map
,
4575 register vm_map_offset_t start
,
4576 register vm_map_offset_t end
,
4577 register vm_prot_t access_type
,
4578 boolean_t user_wire
)
4583 kret
= vm_map_wire_nested(map
, start
, end
, access_type
,
4584 user_wire
, (pmap_t
)NULL
, 0);
4591 * Sets the pageability of the specified address range in the target
4592 * as pageable. Regions specified must have been wired previously.
4594 * The map must not be locked, but a reference must remain to the map
4595 * throughout the call.
4597 * Kernel will panic on failures. User unwire ignores holes and
4598 * unwired and intransition entries to avoid losing memory by leaving
4601 static kern_return_t
4602 vm_map_unwire_nested(
4603 register vm_map_t map
,
4604 register vm_map_offset_t start
,
4605 register vm_map_offset_t end
,
4606 boolean_t user_wire
,
4608 vm_map_offset_t pmap_addr
)
4610 register vm_map_entry_t entry
;
4611 struct vm_map_entry
*first_entry
, tmp_entry
;
4612 boolean_t need_wakeup
;
4613 boolean_t main_map
= FALSE
;
4614 unsigned int last_timestamp
;
4617 if(map_pmap
== NULL
)
4619 last_timestamp
= map
->timestamp
;
4621 VM_MAP_RANGE_CHECK(map
, start
, end
);
4622 assert(page_aligned(start
));
4623 assert(page_aligned(end
));
4624 assert(VM_MAP_PAGE_ALIGNED(start
, VM_MAP_PAGE_MASK(map
)));
4625 assert(VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
)));
4628 /* We unwired what the caller asked for: zero pages */
4630 return KERN_SUCCESS
;
4633 if (vm_map_lookup_entry(map
, start
, &first_entry
)) {
4634 entry
= first_entry
;
4636 * vm_map_clip_start will be done later.
4637 * We don't want to unnest any nested sub maps here !
4642 panic("vm_map_unwire: start not found");
4644 /* Start address is not in map. */
4646 return(KERN_INVALID_ADDRESS
);
4649 if (entry
->superpage_size
) {
4650 /* superpages are always wired */
4652 return KERN_INVALID_ADDRESS
;
4655 need_wakeup
= FALSE
;
4656 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
4657 if (entry
->in_transition
) {
4660 * Another thread is wiring down this entry. Note
4661 * that if it is not for the other thread we would
4662 * be unwiring an unwired entry. This is not
4663 * permitted. If we wait, we will be unwiring memory
4667 * Another thread is unwiring this entry. We did not
4668 * have a reference to it, because if we did, this
4669 * entry will not be getting unwired now.
4674 * This could happen: there could be some
4675 * overlapping vslock/vsunlock operations
4677 * We should probably just wait and retry,
4678 * but then we have to be careful that this
4679 * entry could get "simplified" after
4680 * "in_transition" gets unset and before
4681 * we re-lookup the entry, so we would
4682 * have to re-clip the entry to avoid
4683 * re-unwiring what we have already unwired...
4684 * See vm_map_wire_nested().
4686 * Or we could just ignore "in_transition"
4687 * here and proceed to decement the wired
4688 * count(s) on this entry. That should be fine
4689 * as long as "wired_count" doesn't drop all
4690 * the way to 0 (and we should panic if THAT
4693 panic("vm_map_unwire: in_transition entry");
4696 entry
= entry
->vme_next
;
4700 if (entry
->is_sub_map
) {
4701 vm_map_offset_t sub_start
;
4702 vm_map_offset_t sub_end
;
4703 vm_map_offset_t local_end
;
4706 vm_map_clip_start(map
, entry
, start
);
4707 vm_map_clip_end(map
, entry
, end
);
4709 sub_start
= entry
->offset
;
4710 sub_end
= entry
->vme_end
- entry
->vme_start
;
4711 sub_end
+= entry
->offset
;
4712 local_end
= entry
->vme_end
;
4713 if(map_pmap
== NULL
) {
4714 if(entry
->use_pmap
) {
4715 pmap
= entry
->object
.sub_map
->pmap
;
4716 pmap_addr
= sub_start
;
4721 if (entry
->wired_count
== 0 ||
4722 (user_wire
&& entry
->user_wired_count
== 0)) {
4724 panic("vm_map_unwire: entry is unwired");
4725 entry
= entry
->vme_next
;
4731 * Holes: Next entry should be contiguous unless
4732 * this is the end of the region.
4734 if (((entry
->vme_end
< end
) &&
4735 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
4736 (entry
->vme_next
->vme_start
4737 > entry
->vme_end
)))) {
4739 panic("vm_map_unwire: non-contiguous region");
4741 entry = entry->vme_next;
4746 subtract_wire_counts(map
, entry
, user_wire
);
4748 if (entry
->wired_count
!= 0) {
4749 entry
= entry
->vme_next
;
4753 entry
->in_transition
= TRUE
;
4754 tmp_entry
= *entry
;/* see comment in vm_map_wire() */
4757 * We can unlock the map now. The in_transition state
4758 * guarantees existance of the entry.
4761 vm_map_unwire_nested(entry
->object
.sub_map
,
4762 sub_start
, sub_end
, user_wire
, pmap
, pmap_addr
);
4765 if (last_timestamp
+1 != map
->timestamp
) {
4767 * Find the entry again. It could have been
4768 * clipped or deleted after we unlocked the map.
4770 if (!vm_map_lookup_entry(map
,
4771 tmp_entry
.vme_start
,
4774 panic("vm_map_unwire: re-lookup failed");
4775 entry
= first_entry
->vme_next
;
4777 entry
= first_entry
;
4779 last_timestamp
= map
->timestamp
;
4782 * clear transition bit for all constituent entries
4783 * that were in the original entry (saved in
4784 * tmp_entry). Also check for waiters.
4786 while ((entry
!= vm_map_to_entry(map
)) &&
4787 (entry
->vme_start
< tmp_entry
.vme_end
)) {
4788 assert(entry
->in_transition
);
4789 entry
->in_transition
= FALSE
;
4790 if (entry
->needs_wakeup
) {
4791 entry
->needs_wakeup
= FALSE
;
4794 entry
= entry
->vme_next
;
4799 vm_map_unwire_nested(entry
->object
.sub_map
,
4800 sub_start
, sub_end
, user_wire
, map_pmap
,
4804 if (last_timestamp
+1 != map
->timestamp
) {
4806 * Find the entry again. It could have been
4807 * clipped or deleted after we unlocked the map.
4809 if (!vm_map_lookup_entry(map
,
4810 tmp_entry
.vme_start
,
4813 panic("vm_map_unwire: re-lookup failed");
4814 entry
= first_entry
->vme_next
;
4816 entry
= first_entry
;
4818 last_timestamp
= map
->timestamp
;
4823 if ((entry
->wired_count
== 0) ||
4824 (user_wire
&& entry
->user_wired_count
== 0)) {
4826 panic("vm_map_unwire: entry is unwired");
4828 entry
= entry
->vme_next
;
4832 assert(entry
->wired_count
> 0 &&
4833 (!user_wire
|| entry
->user_wired_count
> 0));
4835 vm_map_clip_start(map
, entry
, start
);
4836 vm_map_clip_end(map
, entry
, end
);
4840 * Holes: Next entry should be contiguous unless
4841 * this is the end of the region.
4843 if (((entry
->vme_end
< end
) &&
4844 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
4845 (entry
->vme_next
->vme_start
> entry
->vme_end
)))) {
4848 panic("vm_map_unwire: non-contiguous region");
4849 entry
= entry
->vme_next
;
4853 subtract_wire_counts(map
, entry
, user_wire
);
4855 if (entry
->wired_count
!= 0) {
4856 entry
= entry
->vme_next
;
4860 if(entry
->zero_wired_pages
) {
4861 entry
->zero_wired_pages
= FALSE
;
4864 entry
->in_transition
= TRUE
;
4865 tmp_entry
= *entry
; /* see comment in vm_map_wire() */
4868 * We can unlock the map now. The in_transition state
4869 * guarantees existance of the entry.
4873 vm_fault_unwire(map
,
4874 &tmp_entry
, FALSE
, map_pmap
, pmap_addr
);
4876 vm_fault_unwire(map
,
4877 &tmp_entry
, FALSE
, map
->pmap
,
4878 tmp_entry
.vme_start
);
4882 if (last_timestamp
+1 != map
->timestamp
) {
4884 * Find the entry again. It could have been clipped
4885 * or deleted after we unlocked the map.
4887 if (!vm_map_lookup_entry(map
, tmp_entry
.vme_start
,
4890 panic("vm_map_unwire: re-lookup failed");
4891 entry
= first_entry
->vme_next
;
4893 entry
= first_entry
;
4895 last_timestamp
= map
->timestamp
;
4898 * clear transition bit for all constituent entries that
4899 * were in the original entry (saved in tmp_entry). Also
4900 * check for waiters.
4902 while ((entry
!= vm_map_to_entry(map
)) &&
4903 (entry
->vme_start
< tmp_entry
.vme_end
)) {
4904 assert(entry
->in_transition
);
4905 entry
->in_transition
= FALSE
;
4906 if (entry
->needs_wakeup
) {
4907 entry
->needs_wakeup
= FALSE
;
4910 entry
= entry
->vme_next
;
4915 * We might have fragmented the address space when we wired this
4916 * range of addresses. Attempt to re-coalesce these VM map entries
4917 * with their neighbors now that they're no longer wired.
4918 * Under some circumstances, address space fragmentation can
4919 * prevent VM object shadow chain collapsing, which can cause
4922 vm_map_simplify_range(map
, start
, end
);
4926 * wake up anybody waiting on entries that we have unwired.
4929 vm_map_entry_wakeup(map
);
4930 return(KERN_SUCCESS
);
4936 register vm_map_t map
,
4937 register vm_map_offset_t start
,
4938 register vm_map_offset_t end
,
4939 boolean_t user_wire
)
4941 return vm_map_unwire_nested(map
, start
, end
,
4942 user_wire
, (pmap_t
)NULL
, 0);
4947 * vm_map_entry_delete: [ internal use only ]
4949 * Deallocate the given entry from the target map.
4952 vm_map_entry_delete(
4953 register vm_map_t map
,
4954 register vm_map_entry_t entry
)
4956 register vm_map_offset_t s
, e
;
4957 register vm_object_t object
;
4958 register vm_map_t submap
;
4960 s
= entry
->vme_start
;
4962 assert(page_aligned(s
));
4963 assert(page_aligned(e
));
4964 if (entry
->map_aligned
== TRUE
) {
4965 assert(VM_MAP_PAGE_ALIGNED(s
, VM_MAP_PAGE_MASK(map
)));
4966 assert(VM_MAP_PAGE_ALIGNED(e
, VM_MAP_PAGE_MASK(map
)));
4968 assert(entry
->wired_count
== 0);
4969 assert(entry
->user_wired_count
== 0);
4970 assert(!entry
->permanent
);
4972 if (entry
->is_sub_map
) {
4974 submap
= entry
->object
.sub_map
;
4977 object
= entry
->object
.vm_object
;
4980 vm_map_store_entry_unlink(map
, entry
);
4983 vm_map_entry_dispose(map
, entry
);
4987 * Deallocate the object only after removing all
4988 * pmap entries pointing to its pages.
4991 vm_map_deallocate(submap
);
4993 vm_object_deallocate(object
);
4998 vm_map_submap_pmap_clean(
5000 vm_map_offset_t start
,
5001 vm_map_offset_t end
,
5003 vm_map_offset_t offset
)
5005 vm_map_offset_t submap_start
;
5006 vm_map_offset_t submap_end
;
5007 vm_map_size_t remove_size
;
5008 vm_map_entry_t entry
;
5010 submap_end
= offset
+ (end
- start
);
5011 submap_start
= offset
;
5013 vm_map_lock_read(sub_map
);
5014 if(vm_map_lookup_entry(sub_map
, offset
, &entry
)) {
5016 remove_size
= (entry
->vme_end
- entry
->vme_start
);
5017 if(offset
> entry
->vme_start
)
5018 remove_size
-= offset
- entry
->vme_start
;
5021 if(submap_end
< entry
->vme_end
) {
5023 entry
->vme_end
- submap_end
;
5025 if(entry
->is_sub_map
) {
5026 vm_map_submap_pmap_clean(
5029 start
+ remove_size
,
5030 entry
->object
.sub_map
,
5034 if((map
->mapped_in_other_pmaps
) && (map
->ref_count
)
5035 && (entry
->object
.vm_object
!= NULL
)) {
5036 vm_object_pmap_protect(
5037 entry
->object
.vm_object
,
5038 entry
->offset
+(offset
-entry
->vme_start
),
5044 pmap_remove(map
->pmap
,
5046 (addr64_t
)(start
+ remove_size
));
5051 entry
= entry
->vme_next
;
5053 while((entry
!= vm_map_to_entry(sub_map
))
5054 && (entry
->vme_start
< submap_end
)) {
5055 remove_size
= (entry
->vme_end
- entry
->vme_start
);
5056 if(submap_end
< entry
->vme_end
) {
5057 remove_size
-= entry
->vme_end
- submap_end
;
5059 if(entry
->is_sub_map
) {
5060 vm_map_submap_pmap_clean(
5062 (start
+ entry
->vme_start
) - offset
,
5063 ((start
+ entry
->vme_start
) - offset
) + remove_size
,
5064 entry
->object
.sub_map
,
5067 if((map
->mapped_in_other_pmaps
) && (map
->ref_count
)
5068 && (entry
->object
.vm_object
!= NULL
)) {
5069 vm_object_pmap_protect(
5070 entry
->object
.vm_object
,
5077 pmap_remove(map
->pmap
,
5078 (addr64_t
)((start
+ entry
->vme_start
)
5080 (addr64_t
)(((start
+ entry
->vme_start
)
5081 - offset
) + remove_size
));
5084 entry
= entry
->vme_next
;
5086 vm_map_unlock_read(sub_map
);
5091 * vm_map_delete: [ internal use only ]
5093 * Deallocates the given address range from the target map.
5094 * Removes all user wirings. Unwires one kernel wiring if
5095 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
5096 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
5097 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
5099 * This routine is called with map locked and leaves map locked.
5101 static kern_return_t
5104 vm_map_offset_t start
,
5105 vm_map_offset_t end
,
5109 vm_map_entry_t entry
, next
;
5110 struct vm_map_entry
*first_entry
, tmp_entry
;
5111 register vm_map_offset_t s
;
5112 register vm_object_t object
;
5113 boolean_t need_wakeup
;
5114 unsigned int last_timestamp
= ~0; /* unlikely value */
5117 interruptible
= (flags
& VM_MAP_REMOVE_INTERRUPTIBLE
) ?
5118 THREAD_ABORTSAFE
: THREAD_UNINT
;
5121 * All our DMA I/O operations in IOKit are currently done by
5122 * wiring through the map entries of the task requesting the I/O.
5123 * Because of this, we must always wait for kernel wirings
5124 * to go away on the entries before deleting them.
5126 * Any caller who wants to actually remove a kernel wiring
5127 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
5128 * properly remove one wiring instead of blasting through
5131 flags
|= VM_MAP_REMOVE_WAIT_FOR_KWIRE
;
5135 * Find the start of the region, and clip it
5137 if (vm_map_lookup_entry(map
, start
, &first_entry
)) {
5138 entry
= first_entry
;
5139 if (entry
->superpage_size
&& (start
& ~SUPERPAGE_MASK
)) { /* extend request to whole entry */ start
= SUPERPAGE_ROUND_DOWN(start
);
5140 start
= SUPERPAGE_ROUND_DOWN(start
);
5143 if (start
== entry
->vme_start
) {
5145 * No need to clip. We don't want to cause
5146 * any unnecessary unnesting in this case...
5149 vm_map_clip_start(map
, entry
, start
);
5153 * Fix the lookup hint now, rather than each
5154 * time through the loop.
5156 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
5158 entry
= first_entry
->vme_next
;
5162 if (entry
->superpage_size
)
5163 end
= SUPERPAGE_ROUND_UP(end
);
5165 need_wakeup
= FALSE
;
5167 * Step through all entries in this region
5169 s
= entry
->vme_start
;
5170 while ((entry
!= vm_map_to_entry(map
)) && (s
< end
)) {
5172 * At this point, we have deleted all the memory entries
5173 * between "start" and "s". We still need to delete
5174 * all memory entries between "s" and "end".
5175 * While we were blocked and the map was unlocked, some
5176 * new memory entries could have been re-allocated between
5177 * "start" and "s" and we don't want to mess with those.
5178 * Some of those entries could even have been re-assembled
5179 * with an entry after "s" (in vm_map_simplify_entry()), so
5180 * we may have to vm_map_clip_start() again.
5183 if (entry
->vme_start
>= s
) {
5185 * This entry starts on or after "s"
5186 * so no need to clip its start.
5190 * This entry has been re-assembled by a
5191 * vm_map_simplify_entry(). We need to
5192 * re-clip its start.
5194 vm_map_clip_start(map
, entry
, s
);
5196 if (entry
->vme_end
<= end
) {
5198 * This entry is going away completely, so no need
5199 * to clip and possibly cause an unnecessary unnesting.
5202 vm_map_clip_end(map
, entry
, end
);
5205 if (entry
->permanent
) {
5206 panic("attempt to remove permanent VM map entry "
5207 "%p [0x%llx:0x%llx]\n",
5208 entry
, (uint64_t) s
, (uint64_t) end
);
5212 if (entry
->in_transition
) {
5213 wait_result_t wait_result
;
5216 * Another thread is wiring/unwiring this entry.
5217 * Let the other thread know we are waiting.
5219 assert(s
== entry
->vme_start
);
5220 entry
->needs_wakeup
= TRUE
;
5223 * wake up anybody waiting on entries that we have
5224 * already unwired/deleted.
5227 vm_map_entry_wakeup(map
);
5228 need_wakeup
= FALSE
;
5231 wait_result
= vm_map_entry_wait(map
, interruptible
);
5233 if (interruptible
&&
5234 wait_result
== THREAD_INTERRUPTED
) {
5236 * We do not clear the needs_wakeup flag,
5237 * since we cannot tell if we were the only one.
5240 return KERN_ABORTED
;
5244 * The entry could have been clipped or it
5245 * may not exist anymore. Look it up again.
5247 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
5248 assert((map
!= kernel_map
) &&
5249 (!entry
->is_sub_map
));
5251 * User: use the next entry
5253 entry
= first_entry
->vme_next
;
5254 s
= entry
->vme_start
;
5256 entry
= first_entry
;
5257 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
5259 last_timestamp
= map
->timestamp
;
5261 } /* end in_transition */
5263 if (entry
->wired_count
) {
5264 boolean_t user_wire
;
5266 user_wire
= entry
->user_wired_count
> 0;
5269 * Remove a kernel wiring if requested
5271 if (flags
& VM_MAP_REMOVE_KUNWIRE
) {
5272 entry
->wired_count
--;
5276 * Remove all user wirings for proper accounting
5278 if (entry
->user_wired_count
> 0) {
5279 while (entry
->user_wired_count
)
5280 subtract_wire_counts(map
, entry
, user_wire
);
5283 if (entry
->wired_count
!= 0) {
5284 assert(map
!= kernel_map
);
5286 * Cannot continue. Typical case is when
5287 * a user thread has physical io pending on
5288 * on this page. Either wait for the
5289 * kernel wiring to go away or return an
5292 if (flags
& VM_MAP_REMOVE_WAIT_FOR_KWIRE
) {
5293 wait_result_t wait_result
;
5295 assert(s
== entry
->vme_start
);
5296 entry
->needs_wakeup
= TRUE
;
5297 wait_result
= vm_map_entry_wait(map
,
5300 if (interruptible
&&
5301 wait_result
== THREAD_INTERRUPTED
) {
5303 * We do not clear the
5304 * needs_wakeup flag, since we
5305 * cannot tell if we were the
5309 return KERN_ABORTED
;
5313 * The entry could have been clipped or
5314 * it may not exist anymore. Look it
5317 if (!vm_map_lookup_entry(map
, s
,
5319 assert(map
!= kernel_map
);
5321 * User: use the next entry
5323 entry
= first_entry
->vme_next
;
5324 s
= entry
->vme_start
;
5326 entry
= first_entry
;
5327 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
5329 last_timestamp
= map
->timestamp
;
5333 return KERN_FAILURE
;
5337 entry
->in_transition
= TRUE
;
5339 * copy current entry. see comment in vm_map_wire()
5342 assert(s
== entry
->vme_start
);
5345 * We can unlock the map now. The in_transition
5346 * state guarentees existance of the entry.
5350 if (tmp_entry
.is_sub_map
) {
5352 vm_map_offset_t sub_start
, sub_end
;
5354 vm_map_offset_t pmap_addr
;
5357 sub_map
= tmp_entry
.object
.sub_map
;
5358 sub_start
= tmp_entry
.offset
;
5359 sub_end
= sub_start
+ (tmp_entry
.vme_end
-
5360 tmp_entry
.vme_start
);
5361 if (tmp_entry
.use_pmap
) {
5362 pmap
= sub_map
->pmap
;
5363 pmap_addr
= tmp_entry
.vme_start
;
5366 pmap_addr
= tmp_entry
.vme_start
;
5368 (void) vm_map_unwire_nested(sub_map
,
5374 if (tmp_entry
.object
.vm_object
== kernel_object
) {
5375 pmap_protect_options(
5377 tmp_entry
.vme_start
,
5380 PMAP_OPTIONS_REMOVE
,
5383 vm_fault_unwire(map
, &tmp_entry
,
5384 tmp_entry
.object
.vm_object
== kernel_object
,
5385 map
->pmap
, tmp_entry
.vme_start
);
5390 if (last_timestamp
+1 != map
->timestamp
) {
5392 * Find the entry again. It could have
5393 * been clipped after we unlocked the map.
5395 if (!vm_map_lookup_entry(map
, s
, &first_entry
)){
5396 assert((map
!= kernel_map
) &&
5397 (!entry
->is_sub_map
));
5398 first_entry
= first_entry
->vme_next
;
5399 s
= first_entry
->vme_start
;
5401 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
5404 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
5405 first_entry
= entry
;
5408 last_timestamp
= map
->timestamp
;
5410 entry
= first_entry
;
5411 while ((entry
!= vm_map_to_entry(map
)) &&
5412 (entry
->vme_start
< tmp_entry
.vme_end
)) {
5413 assert(entry
->in_transition
);
5414 entry
->in_transition
= FALSE
;
5415 if (entry
->needs_wakeup
) {
5416 entry
->needs_wakeup
= FALSE
;
5419 entry
= entry
->vme_next
;
5422 * We have unwired the entry(s). Go back and
5425 entry
= first_entry
;
5429 /* entry is unwired */
5430 assert(entry
->wired_count
== 0);
5431 assert(entry
->user_wired_count
== 0);
5433 assert(s
== entry
->vme_start
);
5435 if (flags
& VM_MAP_REMOVE_NO_PMAP_CLEANUP
) {
5437 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5438 * vm_map_delete(), some map entries might have been
5439 * transferred to a "zap_map", which doesn't have a
5440 * pmap. The original pmap has already been flushed
5441 * in the vm_map_delete() call targeting the original
5442 * map, but when we get to destroying the "zap_map",
5443 * we don't have any pmap to flush, so let's just skip
5446 } else if (entry
->is_sub_map
) {
5447 if (entry
->use_pmap
) {
5448 #ifndef NO_NESTED_PMAP
5449 pmap_unnest(map
->pmap
,
5450 (addr64_t
)entry
->vme_start
,
5451 entry
->vme_end
- entry
->vme_start
);
5452 #endif /* NO_NESTED_PMAP */
5453 if ((map
->mapped_in_other_pmaps
) && (map
->ref_count
)) {
5454 /* clean up parent map/maps */
5455 vm_map_submap_pmap_clean(
5456 map
, entry
->vme_start
,
5458 entry
->object
.sub_map
,
5462 vm_map_submap_pmap_clean(
5463 map
, entry
->vme_start
, entry
->vme_end
,
5464 entry
->object
.sub_map
,
5467 } else if (entry
->object
.vm_object
!= kernel_object
&&
5468 entry
->object
.vm_object
!= compressor_object
) {
5469 object
= entry
->object
.vm_object
;
5470 if ((map
->mapped_in_other_pmaps
) && (map
->ref_count
)) {
5471 vm_object_pmap_protect_options(
5472 object
, entry
->offset
,
5473 entry
->vme_end
- entry
->vme_start
,
5477 PMAP_OPTIONS_REMOVE
);
5478 } else if ((entry
->object
.vm_object
!=
5480 (map
->pmap
== kernel_pmap
)) {
5481 /* Remove translations associated
5482 * with this range unless the entry
5483 * does not have an object, or
5484 * it's the kernel map or a descendant
5485 * since the platform could potentially
5486 * create "backdoor" mappings invisible
5487 * to the VM. It is expected that
5488 * objectless, non-kernel ranges
5489 * do not have such VM invisible
5492 pmap_remove_options(map
->pmap
,
5493 (addr64_t
)entry
->vme_start
,
5494 (addr64_t
)entry
->vme_end
,
5495 PMAP_OPTIONS_REMOVE
);
5500 * All pmap mappings for this map entry must have been
5503 assert(vm_map_pmap_is_empty(map
,
5507 next
= entry
->vme_next
;
5508 s
= next
->vme_start
;
5509 last_timestamp
= map
->timestamp
;
5511 if ((flags
& VM_MAP_REMOVE_SAVE_ENTRIES
) &&
5512 zap_map
!= VM_MAP_NULL
) {
5513 vm_map_size_t entry_size
;
5515 * The caller wants to save the affected VM map entries
5516 * into the "zap_map". The caller will take care of
5519 /* unlink the entry from "map" ... */
5520 vm_map_store_entry_unlink(map
, entry
);
5521 /* ... and add it to the end of the "zap_map" */
5522 vm_map_store_entry_link(zap_map
,
5523 vm_map_last_entry(zap_map
),
5525 entry_size
= entry
->vme_end
- entry
->vme_start
;
5526 map
->size
-= entry_size
;
5527 zap_map
->size
+= entry_size
;
5528 /* we didn't unlock the map, so no timestamp increase */
5531 vm_map_entry_delete(map
, entry
);
5532 /* vm_map_entry_delete unlocks the map */
5538 if(entry
== vm_map_to_entry(map
)) {
5541 if (last_timestamp
+1 != map
->timestamp
) {
5543 * we are responsible for deleting everything
5544 * from the give space, if someone has interfered
5545 * we pick up where we left off, back fills should
5546 * be all right for anyone except map_delete and
5547 * we have to assume that the task has been fully
5548 * disabled before we get here
5550 if (!vm_map_lookup_entry(map
, s
, &entry
)){
5551 entry
= entry
->vme_next
;
5552 s
= entry
->vme_start
;
5554 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
5557 * others can not only allocate behind us, we can
5558 * also see coalesce while we don't have the map lock
5560 if(entry
== vm_map_to_entry(map
)) {
5564 last_timestamp
= map
->timestamp
;
5567 if (map
->wait_for_space
)
5568 thread_wakeup((event_t
) map
);
5570 * wake up anybody waiting on entries that we have already deleted.
5573 vm_map_entry_wakeup(map
);
5575 return KERN_SUCCESS
;
5581 * Remove the given address range from the target map.
5582 * This is the exported form of vm_map_delete.
5586 register vm_map_t map
,
5587 register vm_map_offset_t start
,
5588 register vm_map_offset_t end
,
5589 register boolean_t flags
)
5591 register kern_return_t result
;
5594 VM_MAP_RANGE_CHECK(map
, start
, end
);
5596 * For the zone_map, the kernel controls the allocation/freeing of memory.
5597 * Any free to the zone_map should be within the bounds of the map and
5598 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
5599 * free to the zone_map into a no-op, there is a problem and we should
5602 if ((map
== zone_map
) && (start
== end
))
5603 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start
);
5604 result
= vm_map_delete(map
, start
, end
, flags
, VM_MAP_NULL
);
5612 * Routine: vm_map_copy_discard
5615 * Dispose of a map copy object (returned by
5619 vm_map_copy_discard(
5622 if (copy
== VM_MAP_COPY_NULL
)
5625 switch (copy
->type
) {
5626 case VM_MAP_COPY_ENTRY_LIST
:
5627 while (vm_map_copy_first_entry(copy
) !=
5628 vm_map_copy_to_entry(copy
)) {
5629 vm_map_entry_t entry
= vm_map_copy_first_entry(copy
);
5631 vm_map_copy_entry_unlink(copy
, entry
);
5632 if (entry
->is_sub_map
) {
5633 vm_map_deallocate(entry
->object
.sub_map
);
5635 vm_object_deallocate(entry
->object
.vm_object
);
5637 vm_map_copy_entry_dispose(copy
, entry
);
5640 case VM_MAP_COPY_OBJECT
:
5641 vm_object_deallocate(copy
->cpy_object
);
5643 case VM_MAP_COPY_KERNEL_BUFFER
:
5646 * The vm_map_copy_t and possibly the data buffer were
5647 * allocated by a single call to kalloc(), i.e. the
5648 * vm_map_copy_t was not allocated out of the zone.
5650 kfree(copy
, copy
->cpy_kalloc_size
);
5653 zfree(vm_map_copy_zone
, copy
);
5657 * Routine: vm_map_copy_copy
5660 * Move the information in a map copy object to
5661 * a new map copy object, leaving the old one
5664 * This is used by kernel routines that need
5665 * to look at out-of-line data (in copyin form)
5666 * before deciding whether to return SUCCESS.
5667 * If the routine returns FAILURE, the original
5668 * copy object will be deallocated; therefore,
5669 * these routines must make a copy of the copy
5670 * object and leave the original empty so that
5671 * deallocation will not fail.
5677 vm_map_copy_t new_copy
;
5679 if (copy
== VM_MAP_COPY_NULL
)
5680 return VM_MAP_COPY_NULL
;
5683 * Allocate a new copy object, and copy the information
5684 * from the old one into it.
5687 new_copy
= (vm_map_copy_t
) zalloc(vm_map_copy_zone
);
5690 if (copy
->type
== VM_MAP_COPY_ENTRY_LIST
) {
5692 * The links in the entry chain must be
5693 * changed to point to the new copy object.
5695 vm_map_copy_first_entry(copy
)->vme_prev
5696 = vm_map_copy_to_entry(new_copy
);
5697 vm_map_copy_last_entry(copy
)->vme_next
5698 = vm_map_copy_to_entry(new_copy
);
5702 * Change the old copy object into one that contains
5703 * nothing to be deallocated.
5705 copy
->type
= VM_MAP_COPY_OBJECT
;
5706 copy
->cpy_object
= VM_OBJECT_NULL
;
5709 * Return the new object.
5714 static kern_return_t
5715 vm_map_overwrite_submap_recurse(
5717 vm_map_offset_t dst_addr
,
5718 vm_map_size_t dst_size
)
5720 vm_map_offset_t dst_end
;
5721 vm_map_entry_t tmp_entry
;
5722 vm_map_entry_t entry
;
5723 kern_return_t result
;
5724 boolean_t encountered_sub_map
= FALSE
;
5729 * Verify that the destination is all writeable
5730 * initially. We have to trunc the destination
5731 * address and round the copy size or we'll end up
5732 * splitting entries in strange ways.
5735 dst_end
= vm_map_round_page(dst_addr
+ dst_size
,
5736 VM_MAP_PAGE_MASK(dst_map
));
5737 vm_map_lock(dst_map
);
5740 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &tmp_entry
)) {
5741 vm_map_unlock(dst_map
);
5742 return(KERN_INVALID_ADDRESS
);
5745 vm_map_clip_start(dst_map
,
5747 vm_map_trunc_page(dst_addr
,
5748 VM_MAP_PAGE_MASK(dst_map
)));
5749 assert(!tmp_entry
->use_pmap
); /* clipping did unnest if needed */
5751 for (entry
= tmp_entry
;;) {
5752 vm_map_entry_t next
;
5754 next
= entry
->vme_next
;
5755 while(entry
->is_sub_map
) {
5756 vm_map_offset_t sub_start
;
5757 vm_map_offset_t sub_end
;
5758 vm_map_offset_t local_end
;
5760 if (entry
->in_transition
) {
5762 * Say that we are waiting, and wait for entry.
5764 entry
->needs_wakeup
= TRUE
;
5765 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
5770 encountered_sub_map
= TRUE
;
5771 sub_start
= entry
->offset
;
5773 if(entry
->vme_end
< dst_end
)
5774 sub_end
= entry
->vme_end
;
5777 sub_end
-= entry
->vme_start
;
5778 sub_end
+= entry
->offset
;
5779 local_end
= entry
->vme_end
;
5780 vm_map_unlock(dst_map
);
5782 result
= vm_map_overwrite_submap_recurse(
5783 entry
->object
.sub_map
,
5785 sub_end
- sub_start
);
5787 if(result
!= KERN_SUCCESS
)
5789 if (dst_end
<= entry
->vme_end
)
5790 return KERN_SUCCESS
;
5791 vm_map_lock(dst_map
);
5792 if(!vm_map_lookup_entry(dst_map
, local_end
,
5794 vm_map_unlock(dst_map
);
5795 return(KERN_INVALID_ADDRESS
);
5798 next
= entry
->vme_next
;
5801 if ( ! (entry
->protection
& VM_PROT_WRITE
)) {
5802 vm_map_unlock(dst_map
);
5803 return(KERN_PROTECTION_FAILURE
);
5807 * If the entry is in transition, we must wait
5808 * for it to exit that state. Anything could happen
5809 * when we unlock the map, so start over.
5811 if (entry
->in_transition
) {
5814 * Say that we are waiting, and wait for entry.
5816 entry
->needs_wakeup
= TRUE
;
5817 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
5823 * our range is contained completely within this map entry
5825 if (dst_end
<= entry
->vme_end
) {
5826 vm_map_unlock(dst_map
);
5827 return KERN_SUCCESS
;
5830 * check that range specified is contiguous region
5832 if ((next
== vm_map_to_entry(dst_map
)) ||
5833 (next
->vme_start
!= entry
->vme_end
)) {
5834 vm_map_unlock(dst_map
);
5835 return(KERN_INVALID_ADDRESS
);
5839 * Check for permanent objects in the destination.
5841 if ((entry
->object
.vm_object
!= VM_OBJECT_NULL
) &&
5842 ((!entry
->object
.vm_object
->internal
) ||
5843 (entry
->object
.vm_object
->true_share
))) {
5844 if(encountered_sub_map
) {
5845 vm_map_unlock(dst_map
);
5846 return(KERN_FAILURE
);
5853 vm_map_unlock(dst_map
);
5854 return(KERN_SUCCESS
);
5858 * Routine: vm_map_copy_overwrite
5861 * Copy the memory described by the map copy
5862 * object (copy; returned by vm_map_copyin) onto
5863 * the specified destination region (dst_map, dst_addr).
5864 * The destination must be writeable.
5866 * Unlike vm_map_copyout, this routine actually
5867 * writes over previously-mapped memory. If the
5868 * previous mapping was to a permanent (user-supplied)
5869 * memory object, it is preserved.
5871 * The attributes (protection and inheritance) of the
5872 * destination region are preserved.
5874 * If successful, consumes the copy object.
5875 * Otherwise, the caller is responsible for it.
5877 * Implementation notes:
5878 * To overwrite aligned temporary virtual memory, it is
5879 * sufficient to remove the previous mapping and insert
5880 * the new copy. This replacement is done either on
5881 * the whole region (if no permanent virtual memory
5882 * objects are embedded in the destination region) or
5883 * in individual map entries.
5885 * To overwrite permanent virtual memory , it is necessary
5886 * to copy each page, as the external memory management
5887 * interface currently does not provide any optimizations.
5889 * Unaligned memory also has to be copied. It is possible
5890 * to use 'vm_trickery' to copy the aligned data. This is
5891 * not done but not hard to implement.
5893 * Once a page of permanent memory has been overwritten,
5894 * it is impossible to interrupt this function; otherwise,
5895 * the call would be neither atomic nor location-independent.
5896 * The kernel-state portion of a user thread must be
5899 * It may be expensive to forward all requests that might
5900 * overwrite permanent memory (vm_write, vm_copy) to
5901 * uninterruptible kernel threads. This routine may be
5902 * called by interruptible threads; however, success is
5903 * not guaranteed -- if the request cannot be performed
5904 * atomically and interruptibly, an error indication is
5908 static kern_return_t
5909 vm_map_copy_overwrite_nested(
5911 vm_map_address_t dst_addr
,
5913 boolean_t interruptible
,
5915 boolean_t discard_on_success
)
5917 vm_map_offset_t dst_end
;
5918 vm_map_entry_t tmp_entry
;
5919 vm_map_entry_t entry
;
5921 boolean_t aligned
= TRUE
;
5922 boolean_t contains_permanent_objects
= FALSE
;
5923 boolean_t encountered_sub_map
= FALSE
;
5924 vm_map_offset_t base_addr
;
5925 vm_map_size_t copy_size
;
5926 vm_map_size_t total_size
;
5930 * Check for null copy object.
5933 if (copy
== VM_MAP_COPY_NULL
)
5934 return(KERN_SUCCESS
);
5937 * Check for special kernel buffer allocated
5938 * by new_ipc_kmsg_copyin.
5941 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
5942 return(vm_map_copyout_kernel_buffer(
5944 copy
, TRUE
, discard_on_success
));
5948 * Only works for entry lists at the moment. Will
5949 * support page lists later.
5952 assert(copy
->type
== VM_MAP_COPY_ENTRY_LIST
);
5954 if (copy
->size
== 0) {
5955 if (discard_on_success
)
5956 vm_map_copy_discard(copy
);
5957 return(KERN_SUCCESS
);
5961 * Verify that the destination is all writeable
5962 * initially. We have to trunc the destination
5963 * address and round the copy size or we'll end up
5964 * splitting entries in strange ways.
5967 if (!VM_MAP_PAGE_ALIGNED(copy
->size
,
5968 VM_MAP_PAGE_MASK(dst_map
)) ||
5969 !VM_MAP_PAGE_ALIGNED(copy
->offset
,
5970 VM_MAP_PAGE_MASK(dst_map
)) ||
5971 !VM_MAP_PAGE_ALIGNED(dst_addr
,
5972 VM_MAP_PAGE_MASK(dst_map
)) ||
5973 dst_map
->hdr
.page_shift
!= copy
->cpy_hdr
.page_shift
)
5976 dst_end
= vm_map_round_page(dst_addr
+ copy
->size
,
5977 VM_MAP_PAGE_MASK(dst_map
));
5979 dst_end
= dst_addr
+ copy
->size
;
5982 vm_map_lock(dst_map
);
5984 /* LP64todo - remove this check when vm_map_commpage64()
5985 * no longer has to stuff in a map_entry for the commpage
5986 * above the map's max_offset.
5988 if (dst_addr
>= dst_map
->max_offset
) {
5989 vm_map_unlock(dst_map
);
5990 return(KERN_INVALID_ADDRESS
);
5994 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &tmp_entry
)) {
5995 vm_map_unlock(dst_map
);
5996 return(KERN_INVALID_ADDRESS
);
5998 vm_map_clip_start(dst_map
,
6000 vm_map_trunc_page(dst_addr
,
6001 VM_MAP_PAGE_MASK(dst_map
)));
6002 for (entry
= tmp_entry
;;) {
6003 vm_map_entry_t next
= entry
->vme_next
;
6005 while(entry
->is_sub_map
) {
6006 vm_map_offset_t sub_start
;
6007 vm_map_offset_t sub_end
;
6008 vm_map_offset_t local_end
;
6010 if (entry
->in_transition
) {
6013 * Say that we are waiting, and wait for entry.
6015 entry
->needs_wakeup
= TRUE
;
6016 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
6021 local_end
= entry
->vme_end
;
6022 if (!(entry
->needs_copy
)) {
6023 /* if needs_copy we are a COW submap */
6024 /* in such a case we just replace so */
6025 /* there is no need for the follow- */
6027 encountered_sub_map
= TRUE
;
6028 sub_start
= entry
->offset
;
6030 if(entry
->vme_end
< dst_end
)
6031 sub_end
= entry
->vme_end
;
6034 sub_end
-= entry
->vme_start
;
6035 sub_end
+= entry
->offset
;
6036 vm_map_unlock(dst_map
);
6038 kr
= vm_map_overwrite_submap_recurse(
6039 entry
->object
.sub_map
,
6041 sub_end
- sub_start
);
6042 if(kr
!= KERN_SUCCESS
)
6044 vm_map_lock(dst_map
);
6047 if (dst_end
<= entry
->vme_end
)
6048 goto start_overwrite
;
6049 if(!vm_map_lookup_entry(dst_map
, local_end
,
6051 vm_map_unlock(dst_map
);
6052 return(KERN_INVALID_ADDRESS
);
6054 next
= entry
->vme_next
;
6057 if ( ! (entry
->protection
& VM_PROT_WRITE
)) {
6058 vm_map_unlock(dst_map
);
6059 return(KERN_PROTECTION_FAILURE
);
6063 * If the entry is in transition, we must wait
6064 * for it to exit that state. Anything could happen
6065 * when we unlock the map, so start over.
6067 if (entry
->in_transition
) {
6070 * Say that we are waiting, and wait for entry.
6072 entry
->needs_wakeup
= TRUE
;
6073 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
6079 * our range is contained completely within this map entry
6081 if (dst_end
<= entry
->vme_end
)
6084 * check that range specified is contiguous region
6086 if ((next
== vm_map_to_entry(dst_map
)) ||
6087 (next
->vme_start
!= entry
->vme_end
)) {
6088 vm_map_unlock(dst_map
);
6089 return(KERN_INVALID_ADDRESS
);
6094 * Check for permanent objects in the destination.
6096 if ((entry
->object
.vm_object
!= VM_OBJECT_NULL
) &&
6097 ((!entry
->object
.vm_object
->internal
) ||
6098 (entry
->object
.vm_object
->true_share
))) {
6099 contains_permanent_objects
= TRUE
;
6107 * If there are permanent objects in the destination, then
6108 * the copy cannot be interrupted.
6111 if (interruptible
&& contains_permanent_objects
) {
6112 vm_map_unlock(dst_map
);
6113 return(KERN_FAILURE
); /* XXX */
6118 * Make a second pass, overwriting the data
6119 * At the beginning of each loop iteration,
6120 * the next entry to be overwritten is "tmp_entry"
6121 * (initially, the value returned from the lookup above),
6122 * and the starting address expected in that entry
6126 total_size
= copy
->size
;
6127 if(encountered_sub_map
) {
6129 /* re-calculate tmp_entry since we've had the map */
6131 if (!vm_map_lookup_entry( dst_map
, dst_addr
, &tmp_entry
)) {
6132 vm_map_unlock(dst_map
);
6133 return(KERN_INVALID_ADDRESS
);
6136 copy_size
= copy
->size
;
6139 base_addr
= dst_addr
;
6141 /* deconstruct the copy object and do in parts */
6142 /* only in sub_map, interruptable case */
6143 vm_map_entry_t copy_entry
;
6144 vm_map_entry_t previous_prev
= VM_MAP_ENTRY_NULL
;
6145 vm_map_entry_t next_copy
= VM_MAP_ENTRY_NULL
;
6147 int remaining_entries
= 0;
6148 vm_map_offset_t new_offset
= 0;
6150 for (entry
= tmp_entry
; copy_size
== 0;) {
6151 vm_map_entry_t next
;
6153 next
= entry
->vme_next
;
6155 /* tmp_entry and base address are moved along */
6156 /* each time we encounter a sub-map. Otherwise */
6157 /* entry can outpase tmp_entry, and the copy_size */
6158 /* may reflect the distance between them */
6159 /* if the current entry is found to be in transition */
6160 /* we will start over at the beginning or the last */
6161 /* encounter of a submap as dictated by base_addr */
6162 /* we will zero copy_size accordingly. */
6163 if (entry
->in_transition
) {
6165 * Say that we are waiting, and wait for entry.
6167 entry
->needs_wakeup
= TRUE
;
6168 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
6170 if(!vm_map_lookup_entry(dst_map
, base_addr
,
6172 vm_map_unlock(dst_map
);
6173 return(KERN_INVALID_ADDRESS
);
6179 if(entry
->is_sub_map
) {
6180 vm_map_offset_t sub_start
;
6181 vm_map_offset_t sub_end
;
6182 vm_map_offset_t local_end
;
6184 if (entry
->needs_copy
) {
6185 /* if this is a COW submap */
6186 /* just back the range with a */
6187 /* anonymous entry */
6188 if(entry
->vme_end
< dst_end
)
6189 sub_end
= entry
->vme_end
;
6192 if(entry
->vme_start
< base_addr
)
6193 sub_start
= base_addr
;
6195 sub_start
= entry
->vme_start
;
6197 dst_map
, entry
, sub_end
);
6199 dst_map
, entry
, sub_start
);
6200 assert(!entry
->use_pmap
);
6201 entry
->is_sub_map
= FALSE
;
6203 entry
->object
.sub_map
);
6204 entry
->object
.sub_map
= NULL
;
6205 entry
->is_shared
= FALSE
;
6206 entry
->needs_copy
= FALSE
;
6210 * We should propagate the protections
6211 * of the submap entry here instead
6212 * of forcing them to VM_PROT_ALL...
6213 * Or better yet, we should inherit
6214 * the protection of the copy_entry.
6216 entry
->protection
= VM_PROT_ALL
;
6217 entry
->max_protection
= VM_PROT_ALL
;
6218 entry
->wired_count
= 0;
6219 entry
->user_wired_count
= 0;
6220 if(entry
->inheritance
6221 == VM_INHERIT_SHARE
)
6222 entry
->inheritance
= VM_INHERIT_COPY
;
6225 /* first take care of any non-sub_map */
6226 /* entries to send */
6227 if(base_addr
< entry
->vme_start
) {
6230 entry
->vme_start
- base_addr
;
6233 sub_start
= entry
->offset
;
6235 if(entry
->vme_end
< dst_end
)
6236 sub_end
= entry
->vme_end
;
6239 sub_end
-= entry
->vme_start
;
6240 sub_end
+= entry
->offset
;
6241 local_end
= entry
->vme_end
;
6242 vm_map_unlock(dst_map
);
6243 copy_size
= sub_end
- sub_start
;
6245 /* adjust the copy object */
6246 if (total_size
> copy_size
) {
6247 vm_map_size_t local_size
= 0;
6248 vm_map_size_t entry_size
;
6251 new_offset
= copy
->offset
;
6252 copy_entry
= vm_map_copy_first_entry(copy
);
6254 vm_map_copy_to_entry(copy
)){
6255 entry_size
= copy_entry
->vme_end
-
6256 copy_entry
->vme_start
;
6257 if((local_size
< copy_size
) &&
6258 ((local_size
+ entry_size
)
6260 vm_map_copy_clip_end(copy
,
6262 copy_entry
->vme_start
+
6263 (copy_size
- local_size
));
6264 entry_size
= copy_entry
->vme_end
-
6265 copy_entry
->vme_start
;
6266 local_size
+= entry_size
;
6267 new_offset
+= entry_size
;
6269 if(local_size
>= copy_size
) {
6270 next_copy
= copy_entry
->vme_next
;
6271 copy_entry
->vme_next
=
6272 vm_map_copy_to_entry(copy
);
6274 copy
->cpy_hdr
.links
.prev
;
6275 copy
->cpy_hdr
.links
.prev
= copy_entry
;
6276 copy
->size
= copy_size
;
6278 copy
->cpy_hdr
.nentries
;
6279 remaining_entries
-= nentries
;
6280 copy
->cpy_hdr
.nentries
= nentries
;
6283 local_size
+= entry_size
;
6284 new_offset
+= entry_size
;
6287 copy_entry
= copy_entry
->vme_next
;
6291 if((entry
->use_pmap
) && (pmap
== NULL
)) {
6292 kr
= vm_map_copy_overwrite_nested(
6293 entry
->object
.sub_map
,
6297 entry
->object
.sub_map
->pmap
,
6299 } else if (pmap
!= NULL
) {
6300 kr
= vm_map_copy_overwrite_nested(
6301 entry
->object
.sub_map
,
6304 interruptible
, pmap
,
6307 kr
= vm_map_copy_overwrite_nested(
6308 entry
->object
.sub_map
,
6315 if(kr
!= KERN_SUCCESS
) {
6316 if(next_copy
!= NULL
) {
6317 copy
->cpy_hdr
.nentries
+=
6319 copy
->cpy_hdr
.links
.prev
->vme_next
=
6321 copy
->cpy_hdr
.links
.prev
6323 copy
->size
= total_size
;
6327 if (dst_end
<= local_end
) {
6328 return(KERN_SUCCESS
);
6330 /* otherwise copy no longer exists, it was */
6331 /* destroyed after successful copy_overwrite */
6332 copy
= (vm_map_copy_t
)
6333 zalloc(vm_map_copy_zone
);
6334 vm_map_copy_first_entry(copy
) =
6335 vm_map_copy_last_entry(copy
) =
6336 vm_map_copy_to_entry(copy
);
6337 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
6338 copy
->offset
= new_offset
;
6342 * this does not seem to deal with
6343 * the VM map store (R&B tree)
6346 total_size
-= copy_size
;
6348 /* put back remainder of copy in container */
6349 if(next_copy
!= NULL
) {
6350 copy
->cpy_hdr
.nentries
= remaining_entries
;
6351 copy
->cpy_hdr
.links
.next
= next_copy
;
6352 copy
->cpy_hdr
.links
.prev
= previous_prev
;
6353 copy
->size
= total_size
;
6354 next_copy
->vme_prev
=
6355 vm_map_copy_to_entry(copy
);
6358 base_addr
= local_end
;
6359 vm_map_lock(dst_map
);
6360 if(!vm_map_lookup_entry(dst_map
,
6361 local_end
, &tmp_entry
)) {
6362 vm_map_unlock(dst_map
);
6363 return(KERN_INVALID_ADDRESS
);
6368 if (dst_end
<= entry
->vme_end
) {
6369 copy_size
= dst_end
- base_addr
;
6373 if ((next
== vm_map_to_entry(dst_map
)) ||
6374 (next
->vme_start
!= entry
->vme_end
)) {
6375 vm_map_unlock(dst_map
);
6376 return(KERN_INVALID_ADDRESS
);
6385 /* adjust the copy object */
6386 if (total_size
> copy_size
) {
6387 vm_map_size_t local_size
= 0;
6388 vm_map_size_t entry_size
;
6390 new_offset
= copy
->offset
;
6391 copy_entry
= vm_map_copy_first_entry(copy
);
6392 while(copy_entry
!= vm_map_copy_to_entry(copy
)) {
6393 entry_size
= copy_entry
->vme_end
-
6394 copy_entry
->vme_start
;
6395 if((local_size
< copy_size
) &&
6396 ((local_size
+ entry_size
)
6398 vm_map_copy_clip_end(copy
, copy_entry
,
6399 copy_entry
->vme_start
+
6400 (copy_size
- local_size
));
6401 entry_size
= copy_entry
->vme_end
-
6402 copy_entry
->vme_start
;
6403 local_size
+= entry_size
;
6404 new_offset
+= entry_size
;
6406 if(local_size
>= copy_size
) {
6407 next_copy
= copy_entry
->vme_next
;
6408 copy_entry
->vme_next
=
6409 vm_map_copy_to_entry(copy
);
6411 copy
->cpy_hdr
.links
.prev
;
6412 copy
->cpy_hdr
.links
.prev
= copy_entry
;
6413 copy
->size
= copy_size
;
6415 copy
->cpy_hdr
.nentries
;
6416 remaining_entries
-= nentries
;
6417 copy
->cpy_hdr
.nentries
= nentries
;
6420 local_size
+= entry_size
;
6421 new_offset
+= entry_size
;
6424 copy_entry
= copy_entry
->vme_next
;
6434 local_pmap
= dst_map
->pmap
;
6436 if ((kr
= vm_map_copy_overwrite_aligned(
6437 dst_map
, tmp_entry
, copy
,
6438 base_addr
, local_pmap
)) != KERN_SUCCESS
) {
6439 if(next_copy
!= NULL
) {
6440 copy
->cpy_hdr
.nentries
+=
6442 copy
->cpy_hdr
.links
.prev
->vme_next
=
6444 copy
->cpy_hdr
.links
.prev
=
6446 copy
->size
+= copy_size
;
6450 vm_map_unlock(dst_map
);
6455 * if the copy and dst address are misaligned but the same
6456 * offset within the page we can copy_not_aligned the
6457 * misaligned parts and copy aligned the rest. If they are
6458 * aligned but len is unaligned we simply need to copy
6459 * the end bit unaligned. We'll need to split the misaligned
6460 * bits of the region in this case !
6462 /* ALWAYS UNLOCKS THE dst_map MAP */
6463 kr
= vm_map_copy_overwrite_unaligned(
6468 discard_on_success
);
6469 if (kr
!= KERN_SUCCESS
) {
6470 if(next_copy
!= NULL
) {
6471 copy
->cpy_hdr
.nentries
+=
6473 copy
->cpy_hdr
.links
.prev
->vme_next
=
6475 copy
->cpy_hdr
.links
.prev
=
6477 copy
->size
+= copy_size
;
6482 total_size
-= copy_size
;
6485 base_addr
+= copy_size
;
6487 copy
->offset
= new_offset
;
6488 if(next_copy
!= NULL
) {
6489 copy
->cpy_hdr
.nentries
= remaining_entries
;
6490 copy
->cpy_hdr
.links
.next
= next_copy
;
6491 copy
->cpy_hdr
.links
.prev
= previous_prev
;
6492 next_copy
->vme_prev
= vm_map_copy_to_entry(copy
);
6493 copy
->size
= total_size
;
6495 vm_map_lock(dst_map
);
6497 if (!vm_map_lookup_entry(dst_map
,
6498 base_addr
, &tmp_entry
)) {
6499 vm_map_unlock(dst_map
);
6500 return(KERN_INVALID_ADDRESS
);
6502 if (tmp_entry
->in_transition
) {
6503 entry
->needs_wakeup
= TRUE
;
6504 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
6509 vm_map_clip_start(dst_map
,
6511 vm_map_trunc_page(base_addr
,
6512 VM_MAP_PAGE_MASK(dst_map
)));
6518 * Throw away the vm_map_copy object
6520 if (discard_on_success
)
6521 vm_map_copy_discard(copy
);
6523 return(KERN_SUCCESS
);
6524 }/* vm_map_copy_overwrite */
6527 vm_map_copy_overwrite(
6529 vm_map_offset_t dst_addr
,
6531 boolean_t interruptible
)
6533 vm_map_size_t head_size
, tail_size
;
6534 vm_map_copy_t head_copy
, tail_copy
;
6535 vm_map_offset_t head_addr
, tail_addr
;
6536 vm_map_entry_t entry
;
6546 if (interruptible
||
6547 copy
== VM_MAP_COPY_NULL
||
6548 copy
->type
!= VM_MAP_COPY_ENTRY_LIST
) {
6550 * We can't split the "copy" map if we're interruptible
6551 * or if we don't have a "copy" map...
6554 return vm_map_copy_overwrite_nested(dst_map
,
6562 if (copy
->size
< 3 * PAGE_SIZE
) {
6564 * Too small to bother with optimizing...
6569 if ((dst_addr
& VM_MAP_PAGE_MASK(dst_map
)) !=
6570 (copy
->offset
& VM_MAP_PAGE_MASK(dst_map
))) {
6572 * Incompatible mis-alignment of source and destination...
6578 * Proper alignment or identical mis-alignment at the beginning.
6579 * Let's try and do a small unaligned copy first (if needed)
6580 * and then an aligned copy for the rest.
6582 if (!page_aligned(dst_addr
)) {
6583 head_addr
= dst_addr
;
6584 head_size
= (VM_MAP_PAGE_SIZE(dst_map
) -
6585 (copy
->offset
& VM_MAP_PAGE_MASK(dst_map
)));
6587 if (!page_aligned(copy
->offset
+ copy
->size
)) {
6589 * Mis-alignment at the end.
6590 * Do an aligned copy up to the last page and
6591 * then an unaligned copy for the remaining bytes.
6593 tail_size
= ((copy
->offset
+ copy
->size
) &
6594 VM_MAP_PAGE_MASK(dst_map
));
6595 tail_addr
= dst_addr
+ copy
->size
- tail_size
;
6598 if (head_size
+ tail_size
== copy
->size
) {
6600 * It's all unaligned, no optimization possible...
6606 * Can't optimize if there are any submaps in the
6607 * destination due to the way we free the "copy" map
6608 * progressively in vm_map_copy_overwrite_nested()
6611 vm_map_lock_read(dst_map
);
6612 if (! vm_map_lookup_entry(dst_map
, dst_addr
, &entry
)) {
6613 vm_map_unlock_read(dst_map
);
6617 (entry
!= vm_map_copy_to_entry(copy
) &&
6618 entry
->vme_start
< dst_addr
+ copy
->size
);
6619 entry
= entry
->vme_next
) {
6620 if (entry
->is_sub_map
) {
6621 vm_map_unlock_read(dst_map
);
6625 vm_map_unlock_read(dst_map
);
6629 * Unaligned copy of the first "head_size" bytes, to reach
6634 * Extract "head_copy" out of "copy".
6636 head_copy
= (vm_map_copy_t
) zalloc(vm_map_copy_zone
);
6637 vm_map_copy_first_entry(head_copy
) =
6638 vm_map_copy_to_entry(head_copy
);
6639 vm_map_copy_last_entry(head_copy
) =
6640 vm_map_copy_to_entry(head_copy
);
6641 head_copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
6642 head_copy
->cpy_hdr
.nentries
= 0;
6643 head_copy
->cpy_hdr
.entries_pageable
=
6644 copy
->cpy_hdr
.entries_pageable
;
6645 vm_map_store_init(&head_copy
->cpy_hdr
);
6647 head_copy
->offset
= copy
->offset
;
6648 head_copy
->size
= head_size
;
6650 copy
->offset
+= head_size
;
6651 copy
->size
-= head_size
;
6653 entry
= vm_map_copy_first_entry(copy
);
6654 vm_map_copy_clip_end(copy
, entry
, copy
->offset
);
6655 vm_map_copy_entry_unlink(copy
, entry
);
6656 vm_map_copy_entry_link(head_copy
,
6657 vm_map_copy_to_entry(head_copy
),
6661 * Do the unaligned copy.
6663 kr
= vm_map_copy_overwrite_nested(dst_map
,
6669 if (kr
!= KERN_SUCCESS
)
6675 * Extract "tail_copy" out of "copy".
6677 tail_copy
= (vm_map_copy_t
) zalloc(vm_map_copy_zone
);
6678 vm_map_copy_first_entry(tail_copy
) =
6679 vm_map_copy_to_entry(tail_copy
);
6680 vm_map_copy_last_entry(tail_copy
) =
6681 vm_map_copy_to_entry(tail_copy
);
6682 tail_copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
6683 tail_copy
->cpy_hdr
.nentries
= 0;
6684 tail_copy
->cpy_hdr
.entries_pageable
=
6685 copy
->cpy_hdr
.entries_pageable
;
6686 vm_map_store_init(&tail_copy
->cpy_hdr
);
6688 tail_copy
->offset
= copy
->offset
+ copy
->size
- tail_size
;
6689 tail_copy
->size
= tail_size
;
6691 copy
->size
-= tail_size
;
6693 entry
= vm_map_copy_last_entry(copy
);
6694 vm_map_copy_clip_start(copy
, entry
, tail_copy
->offset
);
6695 entry
= vm_map_copy_last_entry(copy
);
6696 vm_map_copy_entry_unlink(copy
, entry
);
6697 vm_map_copy_entry_link(tail_copy
,
6698 vm_map_copy_last_entry(tail_copy
),
6703 * Copy most (or possibly all) of the data.
6705 kr
= vm_map_copy_overwrite_nested(dst_map
,
6706 dst_addr
+ head_size
,
6711 if (kr
!= KERN_SUCCESS
) {
6716 kr
= vm_map_copy_overwrite_nested(dst_map
,
6725 assert(copy
->type
== VM_MAP_COPY_ENTRY_LIST
);
6726 if (kr
== KERN_SUCCESS
) {
6728 * Discard all the copy maps.
6731 vm_map_copy_discard(head_copy
);
6734 vm_map_copy_discard(copy
);
6736 vm_map_copy_discard(tail_copy
);
6741 * Re-assemble the original copy map.
6744 entry
= vm_map_copy_first_entry(head_copy
);
6745 vm_map_copy_entry_unlink(head_copy
, entry
);
6746 vm_map_copy_entry_link(copy
,
6747 vm_map_copy_to_entry(copy
),
6749 copy
->offset
-= head_size
;
6750 copy
->size
+= head_size
;
6751 vm_map_copy_discard(head_copy
);
6755 entry
= vm_map_copy_last_entry(tail_copy
);
6756 vm_map_copy_entry_unlink(tail_copy
, entry
);
6757 vm_map_copy_entry_link(copy
,
6758 vm_map_copy_last_entry(copy
),
6760 copy
->size
+= tail_size
;
6761 vm_map_copy_discard(tail_copy
);
6770 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
6773 * Physically copy unaligned data
6776 * Unaligned parts of pages have to be physically copied. We use
6777 * a modified form of vm_fault_copy (which understands none-aligned
6778 * page offsets and sizes) to do the copy. We attempt to copy as
6779 * much memory in one go as possibly, however vm_fault_copy copies
6780 * within 1 memory object so we have to find the smaller of "amount left"
6781 * "source object data size" and "target object data size". With
6782 * unaligned data we don't need to split regions, therefore the source
6783 * (copy) object should be one map entry, the target range may be split
6784 * over multiple map entries however. In any event we are pessimistic
6785 * about these assumptions.
6788 * dst_map is locked on entry and is return locked on success,
6789 * unlocked on error.
6792 static kern_return_t
6793 vm_map_copy_overwrite_unaligned(
6795 vm_map_entry_t entry
,
6797 vm_map_offset_t start
,
6798 boolean_t discard_on_success
)
6800 vm_map_entry_t copy_entry
;
6801 vm_map_entry_t copy_entry_next
;
6802 vm_map_version_t version
;
6803 vm_object_t dst_object
;
6804 vm_object_offset_t dst_offset
;
6805 vm_object_offset_t src_offset
;
6806 vm_object_offset_t entry_offset
;
6807 vm_map_offset_t entry_end
;
6808 vm_map_size_t src_size
,
6812 kern_return_t kr
= KERN_SUCCESS
;
6815 copy_entry
= vm_map_copy_first_entry(copy
);
6817 vm_map_lock_write_to_read(dst_map
);
6819 src_offset
= copy
->offset
- vm_object_trunc_page(copy
->offset
);
6820 amount_left
= copy
->size
;
6822 * unaligned so we never clipped this entry, we need the offset into
6823 * the vm_object not just the data.
6825 while (amount_left
> 0) {
6827 if (entry
== vm_map_to_entry(dst_map
)) {
6828 vm_map_unlock_read(dst_map
);
6829 return KERN_INVALID_ADDRESS
;
6832 /* "start" must be within the current map entry */
6833 assert ((start
>=entry
->vme_start
) && (start
<entry
->vme_end
));
6835 dst_offset
= start
- entry
->vme_start
;
6837 dst_size
= entry
->vme_end
- start
;
6839 src_size
= copy_entry
->vme_end
-
6840 (copy_entry
->vme_start
+ src_offset
);
6842 if (dst_size
< src_size
) {
6844 * we can only copy dst_size bytes before
6845 * we have to get the next destination entry
6847 copy_size
= dst_size
;
6850 * we can only copy src_size bytes before
6851 * we have to get the next source copy entry
6853 copy_size
= src_size
;
6856 if (copy_size
> amount_left
) {
6857 copy_size
= amount_left
;
6860 * Entry needs copy, create a shadow shadow object for
6861 * Copy on write region.
6863 if (entry
->needs_copy
&&
6864 ((entry
->protection
& VM_PROT_WRITE
) != 0))
6866 if (vm_map_lock_read_to_write(dst_map
)) {
6867 vm_map_lock_read(dst_map
);
6870 vm_object_shadow(&entry
->object
.vm_object
,
6872 (vm_map_size_t
)(entry
->vme_end
6873 - entry
->vme_start
));
6874 entry
->needs_copy
= FALSE
;
6875 vm_map_lock_write_to_read(dst_map
);
6877 dst_object
= entry
->object
.vm_object
;
6879 * unlike with the virtual (aligned) copy we're going
6880 * to fault on it therefore we need a target object.
6882 if (dst_object
== VM_OBJECT_NULL
) {
6883 if (vm_map_lock_read_to_write(dst_map
)) {
6884 vm_map_lock_read(dst_map
);
6887 dst_object
= vm_object_allocate((vm_map_size_t
)
6888 entry
->vme_end
- entry
->vme_start
);
6889 entry
->object
.vm_object
= dst_object
;
6891 vm_map_lock_write_to_read(dst_map
);
6894 * Take an object reference and unlock map. The "entry" may
6895 * disappear or change when the map is unlocked.
6897 vm_object_reference(dst_object
);
6898 version
.main_timestamp
= dst_map
->timestamp
;
6899 entry_offset
= entry
->offset
;
6900 entry_end
= entry
->vme_end
;
6901 vm_map_unlock_read(dst_map
);
6903 * Copy as much as possible in one pass
6906 copy_entry
->object
.vm_object
,
6907 copy_entry
->offset
+ src_offset
,
6910 entry_offset
+ dst_offset
,
6916 src_offset
+= copy_size
;
6917 amount_left
-= copy_size
;
6919 * Release the object reference
6921 vm_object_deallocate(dst_object
);
6923 * If a hard error occurred, return it now
6925 if (kr
!= KERN_SUCCESS
)
6928 if ((copy_entry
->vme_start
+ src_offset
) == copy_entry
->vme_end
6929 || amount_left
== 0)
6932 * all done with this copy entry, dispose.
6934 copy_entry_next
= copy_entry
->vme_next
;
6936 if (discard_on_success
) {
6937 vm_map_copy_entry_unlink(copy
, copy_entry
);
6938 assert(!copy_entry
->is_sub_map
);
6939 vm_object_deallocate(
6940 copy_entry
->object
.vm_object
);
6941 vm_map_copy_entry_dispose(copy
, copy_entry
);
6944 if (copy_entry_next
== vm_map_copy_to_entry(copy
) &&
6947 * not finished copying but run out of source
6949 return KERN_INVALID_ADDRESS
;
6952 copy_entry
= copy_entry_next
;
6957 if (amount_left
== 0)
6958 return KERN_SUCCESS
;
6960 vm_map_lock_read(dst_map
);
6961 if (version
.main_timestamp
== dst_map
->timestamp
) {
6962 if (start
== entry_end
) {
6964 * destination region is split. Use the version
6965 * information to avoid a lookup in the normal
6968 entry
= entry
->vme_next
;
6970 * should be contiguous. Fail if we encounter
6971 * a hole in the destination.
6973 if (start
!= entry
->vme_start
) {
6974 vm_map_unlock_read(dst_map
);
6975 return KERN_INVALID_ADDRESS
;
6980 * Map version check failed.
6981 * we must lookup the entry because somebody
6982 * might have changed the map behind our backs.
6985 if (!vm_map_lookup_entry(dst_map
, start
, &entry
))
6987 vm_map_unlock_read(dst_map
);
6988 return KERN_INVALID_ADDRESS
;
6993 return KERN_SUCCESS
;
6994 }/* vm_map_copy_overwrite_unaligned */
6997 * Routine: vm_map_copy_overwrite_aligned [internal use only]
7000 * Does all the vm_trickery possible for whole pages.
7004 * If there are no permanent objects in the destination,
7005 * and the source and destination map entry zones match,
7006 * and the destination map entry is not shared,
7007 * then the map entries can be deleted and replaced
7008 * with those from the copy. The following code is the
7009 * basic idea of what to do, but there are lots of annoying
7010 * little details about getting protection and inheritance
7011 * right. Should add protection, inheritance, and sharing checks
7012 * to the above pass and make sure that no wiring is involved.
7015 int vm_map_copy_overwrite_aligned_src_not_internal
= 0;
7016 int vm_map_copy_overwrite_aligned_src_not_symmetric
= 0;
7017 int vm_map_copy_overwrite_aligned_src_large
= 0;
7019 static kern_return_t
7020 vm_map_copy_overwrite_aligned(
7022 vm_map_entry_t tmp_entry
,
7024 vm_map_offset_t start
,
7025 __unused pmap_t pmap
)
7028 vm_map_entry_t copy_entry
;
7029 vm_map_size_t copy_size
;
7031 vm_map_entry_t entry
;
7033 while ((copy_entry
= vm_map_copy_first_entry(copy
))
7034 != vm_map_copy_to_entry(copy
))
7036 copy_size
= (copy_entry
->vme_end
- copy_entry
->vme_start
);
7039 assert(!entry
->use_pmap
); /* unnested when clipped earlier */
7040 if (entry
== vm_map_to_entry(dst_map
)) {
7041 vm_map_unlock(dst_map
);
7042 return KERN_INVALID_ADDRESS
;
7044 size
= (entry
->vme_end
- entry
->vme_start
);
7046 * Make sure that no holes popped up in the
7047 * address map, and that the protection is
7048 * still valid, in case the map was unlocked
7052 if ((entry
->vme_start
!= start
) || ((entry
->is_sub_map
)
7053 && !entry
->needs_copy
)) {
7054 vm_map_unlock(dst_map
);
7055 return(KERN_INVALID_ADDRESS
);
7057 assert(entry
!= vm_map_to_entry(dst_map
));
7060 * Check protection again
7063 if ( ! (entry
->protection
& VM_PROT_WRITE
)) {
7064 vm_map_unlock(dst_map
);
7065 return(KERN_PROTECTION_FAILURE
);
7069 * Adjust to source size first
7072 if (copy_size
< size
) {
7073 vm_map_clip_end(dst_map
, entry
, entry
->vme_start
+ copy_size
);
7078 * Adjust to destination size
7081 if (size
< copy_size
) {
7082 vm_map_copy_clip_end(copy
, copy_entry
,
7083 copy_entry
->vme_start
+ size
);
7087 assert((entry
->vme_end
- entry
->vme_start
) == size
);
7088 assert((tmp_entry
->vme_end
- tmp_entry
->vme_start
) == size
);
7089 assert((copy_entry
->vme_end
- copy_entry
->vme_start
) == size
);
7092 * If the destination contains temporary unshared memory,
7093 * we can perform the copy by throwing it away and
7094 * installing the source data.
7097 object
= entry
->object
.vm_object
;
7098 if ((!entry
->is_shared
&&
7099 ((object
== VM_OBJECT_NULL
) ||
7100 (object
->internal
&& !object
->true_share
))) ||
7101 entry
->needs_copy
) {
7102 vm_object_t old_object
= entry
->object
.vm_object
;
7103 vm_object_offset_t old_offset
= entry
->offset
;
7104 vm_object_offset_t offset
;
7107 * Ensure that the source and destination aren't
7110 if (old_object
== copy_entry
->object
.vm_object
&&
7111 old_offset
== copy_entry
->offset
) {
7112 vm_map_copy_entry_unlink(copy
, copy_entry
);
7113 vm_map_copy_entry_dispose(copy
, copy_entry
);
7115 if (old_object
!= VM_OBJECT_NULL
)
7116 vm_object_deallocate(old_object
);
7118 start
= tmp_entry
->vme_end
;
7119 tmp_entry
= tmp_entry
->vme_next
;
7123 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
7124 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
7125 if (copy_entry
->object
.vm_object
!= VM_OBJECT_NULL
&&
7126 copy_entry
->object
.vm_object
->vo_size
>= __TRADEOFF1_OBJ_SIZE
&&
7127 copy_size
<= __TRADEOFF1_COPY_SIZE
) {
7129 * Virtual vs. Physical copy tradeoff #1.
7131 * Copying only a few pages out of a large
7132 * object: do a physical copy instead of
7133 * a virtual copy, to avoid possibly keeping
7134 * the entire large object alive because of
7135 * those few copy-on-write pages.
7137 vm_map_copy_overwrite_aligned_src_large
++;
7141 if (entry
->alias
>= VM_MEMORY_MALLOC
&&
7142 entry
->alias
<= VM_MEMORY_MALLOC_LARGE_REUSED
) {
7143 vm_object_t new_object
, new_shadow
;
7146 * We're about to map something over a mapping
7147 * established by malloc()...
7149 new_object
= copy_entry
->object
.vm_object
;
7150 if (new_object
!= VM_OBJECT_NULL
) {
7151 vm_object_lock_shared(new_object
);
7153 while (new_object
!= VM_OBJECT_NULL
&&
7154 !new_object
->true_share
&&
7155 new_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
&&
7156 new_object
->internal
) {
7157 new_shadow
= new_object
->shadow
;
7158 if (new_shadow
== VM_OBJECT_NULL
) {
7161 vm_object_lock_shared(new_shadow
);
7162 vm_object_unlock(new_object
);
7163 new_object
= new_shadow
;
7165 if (new_object
!= VM_OBJECT_NULL
) {
7166 if (!new_object
->internal
) {
7168 * The new mapping is backed
7169 * by an external object. We
7170 * don't want malloc'ed memory
7171 * to be replaced with such a
7172 * non-anonymous mapping, so
7173 * let's go off the optimized
7176 vm_map_copy_overwrite_aligned_src_not_internal
++;
7177 vm_object_unlock(new_object
);
7180 if (new_object
->true_share
||
7181 new_object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) {
7183 * Same if there's a "true_share"
7184 * object in the shadow chain, or
7185 * an object with a non-default
7186 * (SYMMETRIC) copy strategy.
7188 vm_map_copy_overwrite_aligned_src_not_symmetric
++;
7189 vm_object_unlock(new_object
);
7192 vm_object_unlock(new_object
);
7195 * The new mapping is still backed by
7196 * anonymous (internal) memory, so it's
7197 * OK to substitute it for the original
7202 if (old_object
!= VM_OBJECT_NULL
) {
7203 if(entry
->is_sub_map
) {
7204 if(entry
->use_pmap
) {
7205 #ifndef NO_NESTED_PMAP
7206 pmap_unnest(dst_map
->pmap
,
7207 (addr64_t
)entry
->vme_start
,
7208 entry
->vme_end
- entry
->vme_start
);
7209 #endif /* NO_NESTED_PMAP */
7210 if(dst_map
->mapped_in_other_pmaps
) {
7211 /* clean up parent */
7213 vm_map_submap_pmap_clean(
7214 dst_map
, entry
->vme_start
,
7216 entry
->object
.sub_map
,
7220 vm_map_submap_pmap_clean(
7221 dst_map
, entry
->vme_start
,
7223 entry
->object
.sub_map
,
7227 entry
->object
.sub_map
);
7229 if(dst_map
->mapped_in_other_pmaps
) {
7230 vm_object_pmap_protect_options(
7231 entry
->object
.vm_object
,
7238 PMAP_OPTIONS_REMOVE
);
7240 pmap_remove_options(
7242 (addr64_t
)(entry
->vme_start
),
7243 (addr64_t
)(entry
->vme_end
),
7244 PMAP_OPTIONS_REMOVE
);
7246 vm_object_deallocate(old_object
);
7250 entry
->is_sub_map
= FALSE
;
7251 entry
->object
= copy_entry
->object
;
7252 object
= entry
->object
.vm_object
;
7253 entry
->needs_copy
= copy_entry
->needs_copy
;
7254 entry
->wired_count
= 0;
7255 entry
->user_wired_count
= 0;
7256 offset
= entry
->offset
= copy_entry
->offset
;
7258 vm_map_copy_entry_unlink(copy
, copy_entry
);
7259 vm_map_copy_entry_dispose(copy
, copy_entry
);
7262 * we could try to push pages into the pmap at this point, BUT
7263 * this optimization only saved on average 2 us per page if ALL
7264 * the pages in the source were currently mapped
7265 * and ALL the pages in the dest were touched, if there were fewer
7266 * than 2/3 of the pages touched, this optimization actually cost more cycles
7267 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
7271 * Set up for the next iteration. The map
7272 * has not been unlocked, so the next
7273 * address should be at the end of this
7274 * entry, and the next map entry should be
7275 * the one following it.
7278 start
= tmp_entry
->vme_end
;
7279 tmp_entry
= tmp_entry
->vme_next
;
7281 vm_map_version_t version
;
7282 vm_object_t dst_object
;
7283 vm_object_offset_t dst_offset
;
7287 if (entry
->needs_copy
) {
7288 vm_object_shadow(&entry
->object
.vm_object
,
7292 entry
->needs_copy
= FALSE
;
7295 dst_object
= entry
->object
.vm_object
;
7296 dst_offset
= entry
->offset
;
7299 * Take an object reference, and record
7300 * the map version information so that the
7301 * map can be safely unlocked.
7304 if (dst_object
== VM_OBJECT_NULL
) {
7306 * We would usually have just taken the
7307 * optimized path above if the destination
7308 * object has not been allocated yet. But we
7309 * now disable that optimization if the copy
7310 * entry's object is not backed by anonymous
7311 * memory to avoid replacing malloc'ed
7312 * (i.e. re-usable) anonymous memory with a
7313 * not-so-anonymous mapping.
7314 * So we have to handle this case here and
7315 * allocate a new VM object for this map entry.
7317 dst_object
= vm_object_allocate(
7318 entry
->vme_end
- entry
->vme_start
);
7320 entry
->object
.vm_object
= dst_object
;
7321 entry
->offset
= dst_offset
;
7325 vm_object_reference(dst_object
);
7327 /* account for unlock bumping up timestamp */
7328 version
.main_timestamp
= dst_map
->timestamp
+ 1;
7330 vm_map_unlock(dst_map
);
7333 * Copy as much as possible in one pass
7338 copy_entry
->object
.vm_object
,
7348 * Release the object reference
7351 vm_object_deallocate(dst_object
);
7354 * If a hard error occurred, return it now
7357 if (r
!= KERN_SUCCESS
)
7360 if (copy_size
!= 0) {
7362 * Dispose of the copied region
7365 vm_map_copy_clip_end(copy
, copy_entry
,
7366 copy_entry
->vme_start
+ copy_size
);
7367 vm_map_copy_entry_unlink(copy
, copy_entry
);
7368 vm_object_deallocate(copy_entry
->object
.vm_object
);
7369 vm_map_copy_entry_dispose(copy
, copy_entry
);
7373 * Pick up in the destination map where we left off.
7375 * Use the version information to avoid a lookup
7376 * in the normal case.
7380 vm_map_lock(dst_map
);
7381 if (version
.main_timestamp
== dst_map
->timestamp
&&
7383 /* We can safely use saved tmp_entry value */
7385 vm_map_clip_end(dst_map
, tmp_entry
, start
);
7386 tmp_entry
= tmp_entry
->vme_next
;
7388 /* Must do lookup of tmp_entry */
7390 if (!vm_map_lookup_entry(dst_map
, start
, &tmp_entry
)) {
7391 vm_map_unlock(dst_map
);
7392 return(KERN_INVALID_ADDRESS
);
7394 vm_map_clip_start(dst_map
, tmp_entry
, start
);
7399 return(KERN_SUCCESS
);
7400 }/* vm_map_copy_overwrite_aligned */
7403 * Routine: vm_map_copyin_kernel_buffer [internal use only]
7406 * Copy in data to a kernel buffer from space in the
7407 * source map. The original space may be optionally
7410 * If successful, returns a new copy object.
7412 static kern_return_t
7413 vm_map_copyin_kernel_buffer(
7415 vm_map_offset_t src_addr
,
7417 boolean_t src_destroy
,
7418 vm_map_copy_t
*copy_result
)
7422 vm_size_t kalloc_size
;
7424 if ((vm_size_t
) len
!= len
) {
7425 /* "len" is too big and doesn't fit in a "vm_size_t" */
7426 return KERN_RESOURCE_SHORTAGE
;
7428 kalloc_size
= (vm_size_t
) (sizeof(struct vm_map_copy
) + len
);
7429 assert((vm_map_size_t
) kalloc_size
== sizeof (struct vm_map_copy
) + len
);
7431 copy
= (vm_map_copy_t
) kalloc(kalloc_size
);
7432 if (copy
== VM_MAP_COPY_NULL
) {
7433 return KERN_RESOURCE_SHORTAGE
;
7435 copy
->type
= VM_MAP_COPY_KERNEL_BUFFER
;
7438 copy
->cpy_kdata
= (void *) (copy
+ 1);
7439 copy
->cpy_kalloc_size
= kalloc_size
;
7441 kr
= copyinmap(src_map
, src_addr
, copy
->cpy_kdata
, (vm_size_t
) len
);
7442 if (kr
!= KERN_SUCCESS
) {
7443 kfree(copy
, kalloc_size
);
7447 (void) vm_map_remove(
7449 vm_map_trunc_page(src_addr
,
7450 VM_MAP_PAGE_MASK(src_map
)),
7451 vm_map_round_page(src_addr
+ len
,
7452 VM_MAP_PAGE_MASK(src_map
)),
7453 (VM_MAP_REMOVE_INTERRUPTIBLE
|
7454 VM_MAP_REMOVE_WAIT_FOR_KWIRE
|
7455 (src_map
== kernel_map
) ? VM_MAP_REMOVE_KUNWIRE
: 0));
7457 *copy_result
= copy
;
7458 return KERN_SUCCESS
;
7462 * Routine: vm_map_copyout_kernel_buffer [internal use only]
7465 * Copy out data from a kernel buffer into space in the
7466 * destination map. The space may be otpionally dynamically
7469 * If successful, consumes the copy object.
7470 * Otherwise, the caller is responsible for it.
7472 static int vm_map_copyout_kernel_buffer_failures
= 0;
7473 static kern_return_t
7474 vm_map_copyout_kernel_buffer(
7476 vm_map_address_t
*addr
, /* IN/OUT */
7478 boolean_t overwrite
,
7479 boolean_t consume_on_success
)
7481 kern_return_t kr
= KERN_SUCCESS
;
7482 thread_t thread
= current_thread();
7487 * Allocate space in the target map for the data
7490 kr
= vm_map_enter(map
,
7492 vm_map_round_page(copy
->size
,
7493 VM_MAP_PAGE_MASK(map
)),
7494 (vm_map_offset_t
) 0,
7497 (vm_object_offset_t
) 0,
7501 VM_INHERIT_DEFAULT
);
7502 if (kr
!= KERN_SUCCESS
)
7507 * Copyout the data from the kernel buffer to the target map.
7509 if (thread
->map
== map
) {
7512 * If the target map is the current map, just do
7515 assert((vm_size_t
) copy
->size
== copy
->size
);
7516 if (copyout(copy
->cpy_kdata
, *addr
, (vm_size_t
) copy
->size
)) {
7517 kr
= KERN_INVALID_ADDRESS
;
7524 * If the target map is another map, assume the
7525 * target's address space identity for the duration
7528 vm_map_reference(map
);
7529 oldmap
= vm_map_switch(map
);
7531 assert((vm_size_t
) copy
->size
== copy
->size
);
7532 if (copyout(copy
->cpy_kdata
, *addr
, (vm_size_t
) copy
->size
)) {
7533 vm_map_copyout_kernel_buffer_failures
++;
7534 kr
= KERN_INVALID_ADDRESS
;
7537 (void) vm_map_switch(oldmap
);
7538 vm_map_deallocate(map
);
7541 if (kr
!= KERN_SUCCESS
) {
7542 /* the copy failed, clean up */
7545 * Deallocate the space we allocated in the target map.
7547 (void) vm_map_remove(
7549 vm_map_trunc_page(*addr
,
7550 VM_MAP_PAGE_MASK(map
)),
7551 vm_map_round_page((*addr
+
7552 vm_map_round_page(copy
->size
,
7553 VM_MAP_PAGE_MASK(map
))),
7554 VM_MAP_PAGE_MASK(map
)),
7559 /* copy was successful, dicard the copy structure */
7560 if (consume_on_success
) {
7561 kfree(copy
, copy
->cpy_kalloc_size
);
7569 * Macro: vm_map_copy_insert
7572 * Link a copy chain ("copy") into a map at the
7573 * specified location (after "where").
7575 * The copy chain is destroyed.
7577 * The arguments are evaluated multiple times.
7579 #define vm_map_copy_insert(map, where, copy) \
7581 vm_map_store_copy_insert(map, where, copy); \
7582 zfree(vm_map_copy_zone, copy); \
7588 vm_map_entry_t where
,
7590 vm_map_offset_t adjustment
,
7593 vm_inherit_t inheritance
)
7595 vm_map_entry_t copy_entry
, new_entry
;
7597 for (copy_entry
= vm_map_copy_first_entry(copy
);
7598 copy_entry
!= vm_map_copy_to_entry(copy
);
7599 copy_entry
= copy_entry
->vme_next
) {
7600 /* get a new VM map entry for the map */
7601 new_entry
= vm_map_entry_create(map
,
7602 !map
->hdr
.entries_pageable
);
7603 /* copy the "copy entry" to the new entry */
7604 vm_map_entry_copy(new_entry
, copy_entry
);
7605 /* adjust "start" and "end" */
7606 new_entry
->vme_start
+= adjustment
;
7607 new_entry
->vme_end
+= adjustment
;
7608 /* clear some attributes */
7609 new_entry
->inheritance
= inheritance
;
7610 new_entry
->protection
= cur_prot
;
7611 new_entry
->max_protection
= max_prot
;
7612 new_entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
7613 /* take an extra reference on the entry's "object" */
7614 if (new_entry
->is_sub_map
) {
7615 vm_map_lock(new_entry
->object
.sub_map
);
7616 vm_map_reference(new_entry
->object
.sub_map
);
7617 vm_map_unlock(new_entry
->object
.sub_map
);
7619 vm_object_reference(new_entry
->object
.vm_object
);
7621 /* insert the new entry in the map */
7622 vm_map_store_entry_link(map
, where
, new_entry
);
7623 /* continue inserting the "copy entries" after the new entry */
7629 * Routine: vm_map_copyout
7632 * Copy out a copy chain ("copy") into newly-allocated
7633 * space in the destination map.
7635 * If successful, consumes the copy object.
7636 * Otherwise, the caller is responsible for it.
7642 vm_map_address_t
*dst_addr
, /* OUT */
7645 return vm_map_copyout_internal(dst_map
, dst_addr
, copy
,
7646 TRUE
, /* consume_on_success */
7649 VM_INHERIT_DEFAULT
);
7653 vm_map_copyout_internal(
7655 vm_map_address_t
*dst_addr
, /* OUT */
7657 boolean_t consume_on_success
,
7658 vm_prot_t cur_protection
,
7659 vm_prot_t max_protection
,
7660 vm_inherit_t inheritance
)
7663 vm_map_size_t adjustment
;
7664 vm_map_offset_t start
;
7665 vm_object_offset_t vm_copy_start
;
7666 vm_map_entry_t last
;
7667 vm_map_entry_t entry
;
7670 * Check for null copy object.
7673 if (copy
== VM_MAP_COPY_NULL
) {
7675 return(KERN_SUCCESS
);
7679 * Check for special copy object, created
7680 * by vm_map_copyin_object.
7683 if (copy
->type
== VM_MAP_COPY_OBJECT
) {
7684 vm_object_t object
= copy
->cpy_object
;
7686 vm_object_offset_t offset
;
7688 offset
= vm_object_trunc_page(copy
->offset
);
7689 size
= vm_map_round_page((copy
->size
+
7690 (vm_map_size_t
)(copy
->offset
-
7692 VM_MAP_PAGE_MASK(dst_map
));
7694 kr
= vm_map_enter(dst_map
, dst_addr
, size
,
7695 (vm_map_offset_t
) 0, VM_FLAGS_ANYWHERE
,
7696 object
, offset
, FALSE
,
7697 VM_PROT_DEFAULT
, VM_PROT_ALL
,
7698 VM_INHERIT_DEFAULT
);
7699 if (kr
!= KERN_SUCCESS
)
7701 /* Account for non-pagealigned copy object */
7702 *dst_addr
+= (vm_map_offset_t
)(copy
->offset
- offset
);
7703 if (consume_on_success
)
7704 zfree(vm_map_copy_zone
, copy
);
7705 return(KERN_SUCCESS
);
7709 * Check for special kernel buffer allocated
7710 * by new_ipc_kmsg_copyin.
7713 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
7714 return vm_map_copyout_kernel_buffer(dst_map
, dst_addr
,
7716 consume_on_success
);
7721 * Find space for the data
7724 vm_copy_start
= vm_map_trunc_page((vm_map_size_t
)copy
->offset
,
7725 VM_MAP_COPY_PAGE_MASK(copy
));
7726 size
= vm_map_round_page((vm_map_size_t
)copy
->offset
+ copy
->size
,
7727 VM_MAP_COPY_PAGE_MASK(copy
))
7733 vm_map_lock(dst_map
);
7734 if( dst_map
->disable_vmentry_reuse
== TRUE
) {
7735 VM_MAP_HIGHEST_ENTRY(dst_map
, entry
, start
);
7738 assert(first_free_is_valid(dst_map
));
7739 start
= ((last
= dst_map
->first_free
) == vm_map_to_entry(dst_map
)) ?
7740 vm_map_min(dst_map
) : last
->vme_end
;
7741 start
= vm_map_round_page(start
,
7742 VM_MAP_PAGE_MASK(dst_map
));
7746 vm_map_entry_t next
= last
->vme_next
;
7747 vm_map_offset_t end
= start
+ size
;
7749 if ((end
> dst_map
->max_offset
) || (end
< start
)) {
7750 if (dst_map
->wait_for_space
) {
7751 if (size
<= (dst_map
->max_offset
- dst_map
->min_offset
)) {
7752 assert_wait((event_t
) dst_map
,
7753 THREAD_INTERRUPTIBLE
);
7754 vm_map_unlock(dst_map
);
7755 thread_block(THREAD_CONTINUE_NULL
);
7759 vm_map_unlock(dst_map
);
7760 return(KERN_NO_SPACE
);
7763 if ((next
== vm_map_to_entry(dst_map
)) ||
7764 (next
->vme_start
>= end
))
7768 start
= last
->vme_end
;
7769 start
= vm_map_round_page(start
,
7770 VM_MAP_PAGE_MASK(dst_map
));
7773 adjustment
= start
- vm_copy_start
;
7774 if (! consume_on_success
) {
7776 * We're not allowed to consume "copy", so we'll have to
7777 * copy its map entries into the destination map below.
7778 * No need to re-allocate map entries from the correct
7779 * (pageable or not) zone, since we'll get new map entries
7780 * during the transfer.
7781 * We'll also adjust the map entries's "start" and "end"
7782 * during the transfer, to keep "copy"'s entries consistent
7783 * with its "offset".
7785 goto after_adjustments
;
7789 * Since we're going to just drop the map
7790 * entries from the copy into the destination
7791 * map, they must come from the same pool.
7794 if (copy
->cpy_hdr
.entries_pageable
!= dst_map
->hdr
.entries_pageable
) {
7796 * Mismatches occur when dealing with the default
7800 vm_map_entry_t next
, new;
7803 * Find the zone that the copies were allocated from
7806 entry
= vm_map_copy_first_entry(copy
);
7809 * Reinitialize the copy so that vm_map_copy_entry_link
7812 vm_map_store_copy_reset(copy
, entry
);
7813 copy
->cpy_hdr
.entries_pageable
= dst_map
->hdr
.entries_pageable
;
7818 while (entry
!= vm_map_copy_to_entry(copy
)) {
7819 new = vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
7820 vm_map_entry_copy_full(new, entry
);
7821 new->use_pmap
= FALSE
; /* clr address space specifics */
7822 vm_map_copy_entry_link(copy
,
7823 vm_map_copy_last_entry(copy
),
7825 next
= entry
->vme_next
;
7826 old_zone
= entry
->from_reserved_zone
? vm_map_entry_reserved_zone
: vm_map_entry_zone
;
7827 zfree(old_zone
, entry
);
7833 * Adjust the addresses in the copy chain, and
7834 * reset the region attributes.
7837 for (entry
= vm_map_copy_first_entry(copy
);
7838 entry
!= vm_map_copy_to_entry(copy
);
7839 entry
= entry
->vme_next
) {
7840 if (VM_MAP_PAGE_SHIFT(dst_map
) == PAGE_SHIFT
) {
7842 * We're injecting this copy entry into a map that
7843 * has the standard page alignment, so clear
7844 * "map_aligned" (which might have been inherited
7845 * from the original map entry).
7847 entry
->map_aligned
= FALSE
;
7850 entry
->vme_start
+= adjustment
;
7851 entry
->vme_end
+= adjustment
;
7853 if (entry
->map_aligned
) {
7854 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_start
,
7855 VM_MAP_PAGE_MASK(dst_map
)));
7856 assert(VM_MAP_PAGE_ALIGNED(entry
->vme_end
,
7857 VM_MAP_PAGE_MASK(dst_map
)));
7860 entry
->inheritance
= VM_INHERIT_DEFAULT
;
7861 entry
->protection
= VM_PROT_DEFAULT
;
7862 entry
->max_protection
= VM_PROT_ALL
;
7863 entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
7866 * If the entry is now wired,
7867 * map the pages into the destination map.
7869 if (entry
->wired_count
!= 0) {
7870 register vm_map_offset_t va
;
7871 vm_object_offset_t offset
;
7872 register vm_object_t object
;
7876 object
= entry
->object
.vm_object
;
7877 offset
= entry
->offset
;
7878 va
= entry
->vme_start
;
7880 pmap_pageable(dst_map
->pmap
,
7885 while (va
< entry
->vme_end
) {
7886 register vm_page_t m
;
7889 * Look up the page in the object.
7890 * Assert that the page will be found in the
7893 * the object was newly created by
7894 * vm_object_copy_slowly, and has
7895 * copies of all of the pages from
7898 * the object was moved from the old
7899 * map entry; because the old map
7900 * entry was wired, all of the pages
7901 * were in the top-level object.
7902 * (XXX not true if we wire pages for
7905 vm_object_lock(object
);
7907 m
= vm_page_lookup(object
, offset
);
7908 if (m
== VM_PAGE_NULL
|| !VM_PAGE_WIRED(m
) ||
7910 panic("vm_map_copyout: wiring %p", m
);
7914 * The page is assumed to be wired here, so it
7915 * shouldn't be encrypted. Otherwise, we
7916 * couldn't enter it in the page table, since
7917 * we don't want the user to see the encrypted
7920 ASSERT_PAGE_DECRYPTED(m
);
7922 prot
= entry
->protection
;
7924 if (override_nx(dst_map
, entry
->alias
) && prot
)
7925 prot
|= VM_PROT_EXECUTE
;
7927 type_of_fault
= DBG_CACHE_HIT_FAULT
;
7929 vm_fault_enter(m
, dst_map
->pmap
, va
, prot
, prot
,
7930 VM_PAGE_WIRED(m
), FALSE
, FALSE
, FALSE
, NULL
,
7933 vm_object_unlock(object
);
7935 offset
+= PAGE_SIZE_64
;
7944 * Correct the page alignment for the result
7947 *dst_addr
= start
+ (copy
->offset
- vm_copy_start
);
7950 * Update the hints and the map size
7953 if (consume_on_success
) {
7954 SAVE_HINT_MAP_WRITE(dst_map
, vm_map_copy_last_entry(copy
));
7956 SAVE_HINT_MAP_WRITE(dst_map
, last
);
7959 dst_map
->size
+= size
;
7965 if (consume_on_success
) {
7966 vm_map_copy_insert(dst_map
, last
, copy
);
7968 vm_map_copy_remap(dst_map
, last
, copy
, adjustment
,
7969 cur_protection
, max_protection
,
7973 vm_map_unlock(dst_map
);
7976 * XXX If wiring_required, call vm_map_pageable
7979 return(KERN_SUCCESS
);
7983 * Routine: vm_map_copyin
7986 * see vm_map_copyin_common. Exported via Unsupported.exports.
7990 #undef vm_map_copyin
7995 vm_map_address_t src_addr
,
7997 boolean_t src_destroy
,
7998 vm_map_copy_t
*copy_result
) /* OUT */
8000 return(vm_map_copyin_common(src_map
, src_addr
, len
, src_destroy
,
8001 FALSE
, copy_result
, FALSE
));
8005 * Routine: vm_map_copyin_common
8008 * Copy the specified region (src_addr, len) from the
8009 * source address space (src_map), possibly removing
8010 * the region from the source address space (src_destroy).
8013 * A vm_map_copy_t object (copy_result), suitable for
8014 * insertion into another address space (using vm_map_copyout),
8015 * copying over another address space region (using
8016 * vm_map_copy_overwrite). If the copy is unused, it
8017 * should be destroyed (using vm_map_copy_discard).
8019 * In/out conditions:
8020 * The source map should not be locked on entry.
8023 typedef struct submap_map
{
8024 vm_map_t parent_map
;
8025 vm_map_offset_t base_start
;
8026 vm_map_offset_t base_end
;
8027 vm_map_size_t base_len
;
8028 struct submap_map
*next
;
8032 vm_map_copyin_common(
8034 vm_map_address_t src_addr
,
8036 boolean_t src_destroy
,
8037 __unused boolean_t src_volatile
,
8038 vm_map_copy_t
*copy_result
, /* OUT */
8039 boolean_t use_maxprot
)
8041 vm_map_entry_t tmp_entry
; /* Result of last map lookup --
8042 * in multi-level lookup, this
8043 * entry contains the actual
8047 vm_map_entry_t new_entry
= VM_MAP_ENTRY_NULL
; /* Map entry for copy */
8049 vm_map_offset_t src_start
; /* Start of current entry --
8050 * where copy is taking place now
8052 vm_map_offset_t src_end
; /* End of entire region to be
8054 vm_map_offset_t src_base
;
8055 vm_map_t base_map
= src_map
;
8056 boolean_t map_share
=FALSE
;
8057 submap_map_t
*parent_maps
= NULL
;
8060 vm_map_copy_t copy
; /* Resulting copy */
8061 vm_map_address_t copy_addr
;
8064 * Check for copies of zero bytes.
8068 *copy_result
= VM_MAP_COPY_NULL
;
8069 return(KERN_SUCCESS
);
8073 * Check that the end address doesn't overflow
8075 src_end
= src_addr
+ len
;
8076 if (src_end
< src_addr
)
8077 return KERN_INVALID_ADDRESS
;
8080 * If the copy is sufficiently small, use a kernel buffer instead
8081 * of making a virtual copy. The theory being that the cost of
8082 * setting up VM (and taking C-O-W faults) dominates the copy costs
8083 * for small regions.
8085 if ((len
< msg_ool_size_small
) && !use_maxprot
)
8086 return vm_map_copyin_kernel_buffer(src_map
, src_addr
, len
,
8087 src_destroy
, copy_result
);
8090 * Compute (page aligned) start and end of region
8092 src_start
= vm_map_trunc_page(src_addr
,
8093 VM_MAP_PAGE_MASK(src_map
));
8094 src_end
= vm_map_round_page(src_end
,
8095 VM_MAP_PAGE_MASK(src_map
));
8097 XPR(XPR_VM_MAP
, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map
, src_addr
, len
, src_destroy
, 0);
8100 * Allocate a header element for the list.
8102 * Use the start and end in the header to
8103 * remember the endpoints prior to rounding.
8106 copy
= (vm_map_copy_t
) zalloc(vm_map_copy_zone
);
8107 vm_map_copy_first_entry(copy
) =
8108 vm_map_copy_last_entry(copy
) = vm_map_copy_to_entry(copy
);
8109 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
8110 copy
->cpy_hdr
.nentries
= 0;
8111 copy
->cpy_hdr
.entries_pageable
= TRUE
;
8113 copy
->cpy_hdr
.page_shift
= src_map
->hdr
.page_shift
;
8116 * The copy entries can be broken down for a variety of reasons,
8117 * so we can't guarantee that they will remain map-aligned...
8118 * Will need to adjust the first copy_entry's "vme_start" and
8119 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
8120 * rather than the original map's alignment.
8122 copy
->cpy_hdr
.page_shift
= PAGE_SHIFT
;
8125 vm_map_store_init( &(copy
->cpy_hdr
) );
8127 copy
->offset
= src_addr
;
8130 new_entry
= vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
8134 vm_map_unlock(src_map); \
8135 if(src_map != base_map) \
8136 vm_map_deallocate(src_map); \
8137 if (new_entry != VM_MAP_ENTRY_NULL) \
8138 vm_map_copy_entry_dispose(copy,new_entry); \
8139 vm_map_copy_discard(copy); \
8141 submap_map_t *_ptr; \
8143 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
8144 parent_maps=parent_maps->next; \
8145 if (_ptr->parent_map != base_map) \
8146 vm_map_deallocate(_ptr->parent_map); \
8147 kfree(_ptr, sizeof(submap_map_t)); \
8154 * Find the beginning of the region.
8157 vm_map_lock(src_map
);
8159 if (!vm_map_lookup_entry(src_map
, src_start
, &tmp_entry
))
8160 RETURN(KERN_INVALID_ADDRESS
);
8161 if(!tmp_entry
->is_sub_map
) {
8162 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
8164 /* set for later submap fix-up */
8165 copy_addr
= src_start
;
8168 * Go through entries until we get to the end.
8173 vm_map_entry_t src_entry
= tmp_entry
; /* Top-level entry */
8174 vm_map_size_t src_size
; /* Size of source
8175 * map entry (in both
8180 vm_object_t src_object
; /* Object to copy */
8181 vm_object_offset_t src_offset
;
8183 boolean_t src_needs_copy
; /* Should source map
8185 * for copy-on-write?
8188 boolean_t new_entry_needs_copy
; /* Will new entry be COW? */
8190 boolean_t was_wired
; /* Was source wired? */
8191 vm_map_version_t version
; /* Version before locks
8192 * dropped to make copy
8194 kern_return_t result
; /* Return value from
8195 * copy_strategically.
8197 while(tmp_entry
->is_sub_map
) {
8198 vm_map_size_t submap_len
;
8201 ptr
= (submap_map_t
*)kalloc(sizeof(submap_map_t
));
8202 ptr
->next
= parent_maps
;
8204 ptr
->parent_map
= src_map
;
8205 ptr
->base_start
= src_start
;
8206 ptr
->base_end
= src_end
;
8207 submap_len
= tmp_entry
->vme_end
- src_start
;
8208 if(submap_len
> (src_end
-src_start
))
8209 submap_len
= src_end
-src_start
;
8210 ptr
->base_len
= submap_len
;
8212 src_start
-= tmp_entry
->vme_start
;
8213 src_start
+= tmp_entry
->offset
;
8214 src_end
= src_start
+ submap_len
;
8215 src_map
= tmp_entry
->object
.sub_map
;
8216 vm_map_lock(src_map
);
8217 /* keep an outstanding reference for all maps in */
8218 /* the parents tree except the base map */
8219 vm_map_reference(src_map
);
8220 vm_map_unlock(ptr
->parent_map
);
8221 if (!vm_map_lookup_entry(
8222 src_map
, src_start
, &tmp_entry
))
8223 RETURN(KERN_INVALID_ADDRESS
);
8225 if(!tmp_entry
->is_sub_map
)
8226 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
8227 src_entry
= tmp_entry
;
8229 /* we are now in the lowest level submap... */
8231 if ((tmp_entry
->object
.vm_object
!= VM_OBJECT_NULL
) &&
8232 (tmp_entry
->object
.vm_object
->phys_contiguous
)) {
8233 /* This is not, supported for now.In future */
8234 /* we will need to detect the phys_contig */
8235 /* condition and then upgrade copy_slowly */
8236 /* to do physical copy from the device mem */
8237 /* based object. We can piggy-back off of */
8238 /* the was wired boolean to set-up the */
8239 /* proper handling */
8240 RETURN(KERN_PROTECTION_FAILURE
);
8243 * Create a new address map entry to hold the result.
8244 * Fill in the fields from the appropriate source entries.
8245 * We must unlock the source map to do this if we need
8246 * to allocate a map entry.
8248 if (new_entry
== VM_MAP_ENTRY_NULL
) {
8249 version
.main_timestamp
= src_map
->timestamp
;
8250 vm_map_unlock(src_map
);
8252 new_entry
= vm_map_copy_entry_create(copy
, !copy
->cpy_hdr
.entries_pageable
);
8254 vm_map_lock(src_map
);
8255 if ((version
.main_timestamp
+ 1) != src_map
->timestamp
) {
8256 if (!vm_map_lookup_entry(src_map
, src_start
,
8258 RETURN(KERN_INVALID_ADDRESS
);
8260 if (!tmp_entry
->is_sub_map
)
8261 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
8262 continue; /* restart w/ new tmp_entry */
8267 * Verify that the region can be read.
8269 if (((src_entry
->protection
& VM_PROT_READ
) == VM_PROT_NONE
&&
8271 (src_entry
->max_protection
& VM_PROT_READ
) == 0)
8272 RETURN(KERN_PROTECTION_FAILURE
);
8275 * Clip against the endpoints of the entire region.
8278 vm_map_clip_end(src_map
, src_entry
, src_end
);
8280 src_size
= src_entry
->vme_end
- src_start
;
8281 src_object
= src_entry
->object
.vm_object
;
8282 src_offset
= src_entry
->offset
;
8283 was_wired
= (src_entry
->wired_count
!= 0);
8285 vm_map_entry_copy(new_entry
, src_entry
);
8286 new_entry
->use_pmap
= FALSE
; /* clr address space specifics */
8289 * Attempt non-blocking copy-on-write optimizations.
8293 (src_object
== VM_OBJECT_NULL
||
8294 (src_object
->internal
&& !src_object
->true_share
8297 * If we are destroying the source, and the object
8298 * is internal, we can move the object reference
8299 * from the source to the copy. The copy is
8300 * copy-on-write only if the source is.
8301 * We make another reference to the object, because
8302 * destroying the source entry will deallocate it.
8304 vm_object_reference(src_object
);
8307 * Copy is always unwired. vm_map_copy_entry
8308 * set its wired count to zero.
8311 goto CopySuccessful
;
8316 XPR(XPR_VM_MAP
, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
8317 src_object
, new_entry
, new_entry
->object
.vm_object
,
8319 if ((src_object
== VM_OBJECT_NULL
||
8320 (!was_wired
&& !map_share
&& !tmp_entry
->is_shared
)) &&
8321 vm_object_copy_quickly(
8322 &new_entry
->object
.vm_object
,
8326 &new_entry_needs_copy
)) {
8328 new_entry
->needs_copy
= new_entry_needs_copy
;
8331 * Handle copy-on-write obligations
8334 if (src_needs_copy
&& !tmp_entry
->needs_copy
) {
8337 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
8339 if (override_nx(src_map
, src_entry
->alias
) && prot
)
8340 prot
|= VM_PROT_EXECUTE
;
8342 vm_object_pmap_protect(
8346 (src_entry
->is_shared
?
8349 src_entry
->vme_start
,
8352 tmp_entry
->needs_copy
= TRUE
;
8356 * The map has never been unlocked, so it's safe
8357 * to move to the next entry rather than doing
8361 goto CopySuccessful
;
8365 * Take an object reference, so that we may
8366 * release the map lock(s).
8369 assert(src_object
!= VM_OBJECT_NULL
);
8370 vm_object_reference(src_object
);
8373 * Record the timestamp for later verification.
8377 version
.main_timestamp
= src_map
->timestamp
;
8378 vm_map_unlock(src_map
); /* Increments timestamp once! */
8386 vm_object_lock(src_object
);
8387 result
= vm_object_copy_slowly(
8392 &new_entry
->object
.vm_object
);
8393 new_entry
->offset
= 0;
8394 new_entry
->needs_copy
= FALSE
;
8397 else if (src_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
&&
8398 (tmp_entry
->is_shared
|| map_share
)) {
8399 vm_object_t new_object
;
8401 vm_object_lock_shared(src_object
);
8402 new_object
= vm_object_copy_delayed(
8407 if (new_object
== VM_OBJECT_NULL
)
8410 new_entry
->object
.vm_object
= new_object
;
8411 new_entry
->needs_copy
= TRUE
;
8412 result
= KERN_SUCCESS
;
8415 result
= vm_object_copy_strategically(src_object
,
8418 &new_entry
->object
.vm_object
,
8420 &new_entry_needs_copy
);
8422 new_entry
->needs_copy
= new_entry_needs_copy
;
8425 if (result
!= KERN_SUCCESS
&&
8426 result
!= KERN_MEMORY_RESTART_COPY
) {
8427 vm_map_lock(src_map
);
8432 * Throw away the extra reference
8435 vm_object_deallocate(src_object
);
8438 * Verify that the map has not substantially
8439 * changed while the copy was being made.
8442 vm_map_lock(src_map
);
8444 if ((version
.main_timestamp
+ 1) == src_map
->timestamp
)
8445 goto VerificationSuccessful
;
8448 * Simple version comparison failed.
8450 * Retry the lookup and verify that the
8451 * same object/offset are still present.
8453 * [Note: a memory manager that colludes with
8454 * the calling task can detect that we have
8455 * cheated. While the map was unlocked, the
8456 * mapping could have been changed and restored.]
8459 if (!vm_map_lookup_entry(src_map
, src_start
, &tmp_entry
)) {
8460 RETURN(KERN_INVALID_ADDRESS
);
8463 src_entry
= tmp_entry
;
8464 vm_map_clip_start(src_map
, src_entry
, src_start
);
8466 if ((((src_entry
->protection
& VM_PROT_READ
) == VM_PROT_NONE
) &&
8468 ((src_entry
->max_protection
& VM_PROT_READ
) == 0))
8469 goto VerificationFailed
;
8471 if (src_entry
->vme_end
< new_entry
->vme_end
) {
8472 assert(VM_MAP_PAGE_ALIGNED(src_entry
->vme_end
,
8473 VM_MAP_COPY_PAGE_MASK(copy
)));
8474 new_entry
->vme_end
= src_entry
->vme_end
;
8475 src_size
= new_entry
->vme_end
- src_start
;
8478 if ((src_entry
->object
.vm_object
!= src_object
) ||
8479 (src_entry
->offset
!= src_offset
) ) {
8482 * Verification failed.
8484 * Start over with this top-level entry.
8487 VerificationFailed
: ;
8489 vm_object_deallocate(new_entry
->object
.vm_object
);
8490 tmp_entry
= src_entry
;
8495 * Verification succeeded.
8498 VerificationSuccessful
: ;
8500 if (result
== KERN_MEMORY_RESTART_COPY
)
8510 * Link in the new copy entry.
8513 vm_map_copy_entry_link(copy
, vm_map_copy_last_entry(copy
),
8517 * Determine whether the entire region
8520 src_base
= src_start
;
8521 src_start
= new_entry
->vme_end
;
8522 new_entry
= VM_MAP_ENTRY_NULL
;
8523 while ((src_start
>= src_end
) && (src_end
!= 0)) {
8524 if (src_map
!= base_map
) {
8528 assert(ptr
!= NULL
);
8529 parent_maps
= parent_maps
->next
;
8531 /* fix up the damage we did in that submap */
8532 vm_map_simplify_range(src_map
,
8536 vm_map_unlock(src_map
);
8537 vm_map_deallocate(src_map
);
8538 vm_map_lock(ptr
->parent_map
);
8539 src_map
= ptr
->parent_map
;
8540 src_base
= ptr
->base_start
;
8541 src_start
= ptr
->base_start
+ ptr
->base_len
;
8542 src_end
= ptr
->base_end
;
8543 if ((src_end
> src_start
) &&
8544 !vm_map_lookup_entry(
8545 src_map
, src_start
, &tmp_entry
))
8546 RETURN(KERN_INVALID_ADDRESS
);
8547 kfree(ptr
, sizeof(submap_map_t
));
8548 if(parent_maps
== NULL
)
8550 src_entry
= tmp_entry
->vme_prev
;
8554 if ((src_start
>= src_end
) && (src_end
!= 0))
8558 * Verify that there are no gaps in the region
8561 tmp_entry
= src_entry
->vme_next
;
8562 if ((tmp_entry
->vme_start
!= src_start
) ||
8563 (tmp_entry
== vm_map_to_entry(src_map
))) {
8565 if (VM_MAP_PAGE_SHIFT(src_map
) != PAGE_SHIFT
&&
8566 (vm_map_round_page(src_entry
->vme_end
,
8567 VM_MAP_PAGE_MASK(src_map
)) ==
8569 vm_map_entry_t last_copy_entry
;
8570 vm_map_offset_t adjustment
;
8573 * This is the last entry in the range we
8574 * want and it happens to miss a few pages
8575 * because it is not map-aligned (must have
8576 * been imported from a differently-aligned
8578 * Let's say we're done, but first we have
8579 * to compensate for the alignment adjustment
8580 * we're about to do before returning.
8583 last_copy_entry
= vm_map_copy_last_entry(copy
);
8584 assert(last_copy_entry
!=
8585 vm_map_copy_to_entry(copy
));
8587 (vm_map_round_page((copy
->offset
+
8589 VM_MAP_PAGE_MASK(src_map
)) -
8590 vm_map_round_page((copy
->offset
+
8593 last_copy_entry
->vme_end
+= adjustment
;
8594 last_copy_entry
->map_aligned
= FALSE
;
8595 /* ... and we're done */
8599 RETURN(KERN_INVALID_ADDRESS
);
8604 * If the source should be destroyed, do it now, since the
8605 * copy was successful.
8608 (void) vm_map_delete(
8610 vm_map_trunc_page(src_addr
,
8611 VM_MAP_PAGE_MASK(src_map
)),
8613 ((src_map
== kernel_map
) ?
8614 VM_MAP_REMOVE_KUNWIRE
:
8618 /* fix up the damage we did in the base map */
8619 vm_map_simplify_range(
8621 vm_map_trunc_page(src_addr
,
8622 VM_MAP_PAGE_MASK(src_map
)),
8623 vm_map_round_page(src_end
,
8624 VM_MAP_PAGE_MASK(src_map
)));
8627 vm_map_unlock(src_map
);
8629 if (VM_MAP_PAGE_SHIFT(src_map
) != PAGE_SHIFT
) {
8630 assert(VM_MAP_COPY_PAGE_MASK(copy
) == PAGE_MASK
);
8632 /* adjust alignment of first copy_entry's "vme_start" */
8633 tmp_entry
= vm_map_copy_first_entry(copy
);
8634 if (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
8635 vm_map_offset_t adjustment
;
8637 (vm_map_trunc_page(copy
->offset
,
8639 vm_map_trunc_page(copy
->offset
,
8640 VM_MAP_PAGE_MASK(src_map
)));
8642 assert(page_aligned(adjustment
));
8643 assert(adjustment
< VM_MAP_PAGE_SIZE(src_map
));
8644 tmp_entry
->vme_start
+= adjustment
;
8645 tmp_entry
->offset
+= adjustment
;
8646 copy_addr
+= adjustment
;
8647 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
8651 /* adjust alignment of last copy_entry's "vme_end" */
8652 tmp_entry
= vm_map_copy_last_entry(copy
);
8653 if (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
8654 vm_map_offset_t adjustment
;
8656 (vm_map_round_page((copy
->offset
+
8658 VM_MAP_PAGE_MASK(src_map
)) -
8659 vm_map_round_page((copy
->offset
+
8663 assert(page_aligned(adjustment
));
8664 assert(adjustment
< VM_MAP_PAGE_SIZE(src_map
));
8665 tmp_entry
->vme_end
-= adjustment
;
8666 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
8671 /* Fix-up start and end points in copy. This is necessary */
8672 /* when the various entries in the copy object were picked */
8673 /* up from different sub-maps */
8675 tmp_entry
= vm_map_copy_first_entry(copy
);
8676 while (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
8677 assert(VM_MAP_PAGE_ALIGNED(
8678 copy_addr
+ (tmp_entry
->vme_end
-
8679 tmp_entry
->vme_start
),
8680 VM_MAP_COPY_PAGE_MASK(copy
)));
8681 assert(VM_MAP_PAGE_ALIGNED(
8683 VM_MAP_COPY_PAGE_MASK(copy
)));
8686 * The copy_entries will be injected directly into the
8687 * destination map and might not be "map aligned" there...
8689 tmp_entry
->map_aligned
= FALSE
;
8691 tmp_entry
->vme_end
= copy_addr
+
8692 (tmp_entry
->vme_end
- tmp_entry
->vme_start
);
8693 tmp_entry
->vme_start
= copy_addr
;
8694 assert(tmp_entry
->vme_start
< tmp_entry
->vme_end
);
8695 copy_addr
+= tmp_entry
->vme_end
- tmp_entry
->vme_start
;
8696 tmp_entry
= (struct vm_map_entry
*)tmp_entry
->vme_next
;
8699 *copy_result
= copy
;
8700 return(KERN_SUCCESS
);
8706 vm_map_copy_extract(
8708 vm_map_address_t src_addr
,
8710 vm_map_copy_t
*copy_result
, /* OUT */
8711 vm_prot_t
*cur_prot
, /* OUT */
8712 vm_prot_t
*max_prot
)
8714 vm_map_offset_t src_start
, src_end
;
8719 * Check for copies of zero bytes.
8723 *copy_result
= VM_MAP_COPY_NULL
;
8724 return(KERN_SUCCESS
);
8728 * Check that the end address doesn't overflow
8730 src_end
= src_addr
+ len
;
8731 if (src_end
< src_addr
)
8732 return KERN_INVALID_ADDRESS
;
8735 * Compute (page aligned) start and end of region
8737 src_start
= vm_map_trunc_page(src_addr
, PAGE_MASK
);
8738 src_end
= vm_map_round_page(src_end
, PAGE_MASK
);
8741 * Allocate a header element for the list.
8743 * Use the start and end in the header to
8744 * remember the endpoints prior to rounding.
8747 copy
= (vm_map_copy_t
) zalloc(vm_map_copy_zone
);
8748 vm_map_copy_first_entry(copy
) =
8749 vm_map_copy_last_entry(copy
) = vm_map_copy_to_entry(copy
);
8750 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
8751 copy
->cpy_hdr
.nentries
= 0;
8752 copy
->cpy_hdr
.entries_pageable
= TRUE
;
8754 vm_map_store_init(©
->cpy_hdr
);
8759 kr
= vm_map_remap_extract(src_map
,
8767 TRUE
); /* pageable */
8768 if (kr
!= KERN_SUCCESS
) {
8769 vm_map_copy_discard(copy
);
8773 *copy_result
= copy
;
8774 return KERN_SUCCESS
;
8778 * vm_map_copyin_object:
8780 * Create a copy object from an object.
8781 * Our caller donates an object reference.
8785 vm_map_copyin_object(
8787 vm_object_offset_t offset
, /* offset of region in object */
8788 vm_object_size_t size
, /* size of region in object */
8789 vm_map_copy_t
*copy_result
) /* OUT */
8791 vm_map_copy_t copy
; /* Resulting copy */
8794 * We drop the object into a special copy object
8795 * that contains the object directly.
8798 copy
= (vm_map_copy_t
) zalloc(vm_map_copy_zone
);
8799 copy
->type
= VM_MAP_COPY_OBJECT
;
8800 copy
->cpy_object
= object
;
8801 copy
->offset
= offset
;
8804 *copy_result
= copy
;
8805 return(KERN_SUCCESS
);
8811 vm_map_entry_t old_entry
,
8815 vm_map_entry_t new_entry
;
8818 * New sharing code. New map entry
8819 * references original object. Internal
8820 * objects use asynchronous copy algorithm for
8821 * future copies. First make sure we have
8822 * the right object. If we need a shadow,
8823 * or someone else already has one, then
8824 * make a new shadow and share it.
8827 object
= old_entry
->object
.vm_object
;
8828 if (old_entry
->is_sub_map
) {
8829 assert(old_entry
->wired_count
== 0);
8830 #ifndef NO_NESTED_PMAP
8831 if(old_entry
->use_pmap
) {
8832 kern_return_t result
;
8834 result
= pmap_nest(new_map
->pmap
,
8835 (old_entry
->object
.sub_map
)->pmap
,
8836 (addr64_t
)old_entry
->vme_start
,
8837 (addr64_t
)old_entry
->vme_start
,
8838 (uint64_t)(old_entry
->vme_end
- old_entry
->vme_start
));
8840 panic("vm_map_fork_share: pmap_nest failed!");
8842 #endif /* NO_NESTED_PMAP */
8843 } else if (object
== VM_OBJECT_NULL
) {
8844 object
= vm_object_allocate((vm_map_size_t
)(old_entry
->vme_end
-
8845 old_entry
->vme_start
));
8846 old_entry
->offset
= 0;
8847 old_entry
->object
.vm_object
= object
;
8848 assert(!old_entry
->needs_copy
);
8849 } else if (object
->copy_strategy
!=
8850 MEMORY_OBJECT_COPY_SYMMETRIC
) {
8853 * We are already using an asymmetric
8854 * copy, and therefore we already have
8858 assert(! old_entry
->needs_copy
);
8860 else if (old_entry
->needs_copy
|| /* case 1 */
8861 object
->shadowed
|| /* case 2 */
8862 (!object
->true_share
&& /* case 3 */
8863 !old_entry
->is_shared
&&
8865 (vm_map_size_t
)(old_entry
->vme_end
-
8866 old_entry
->vme_start
)))) {
8869 * We need to create a shadow.
8870 * There are three cases here.
8871 * In the first case, we need to
8872 * complete a deferred symmetrical
8873 * copy that we participated in.
8874 * In the second and third cases,
8875 * we need to create the shadow so
8876 * that changes that we make to the
8877 * object do not interfere with
8878 * any symmetrical copies which
8879 * have occured (case 2) or which
8880 * might occur (case 3).
8882 * The first case is when we had
8883 * deferred shadow object creation
8884 * via the entry->needs_copy mechanism.
8885 * This mechanism only works when
8886 * only one entry points to the source
8887 * object, and we are about to create
8888 * a second entry pointing to the
8889 * same object. The problem is that
8890 * there is no way of mapping from
8891 * an object to the entries pointing
8892 * to it. (Deferred shadow creation
8893 * works with one entry because occurs
8894 * at fault time, and we walk from the
8895 * entry to the object when handling
8898 * The second case is when the object
8899 * to be shared has already been copied
8900 * with a symmetric copy, but we point
8901 * directly to the object without
8902 * needs_copy set in our entry. (This
8903 * can happen because different ranges
8904 * of an object can be pointed to by
8905 * different entries. In particular,
8906 * a single entry pointing to an object
8907 * can be split by a call to vm_inherit,
8908 * which, combined with task_create, can
8909 * result in the different entries
8910 * having different needs_copy values.)
8911 * The shadowed flag in the object allows
8912 * us to detect this case. The problem
8913 * with this case is that if this object
8914 * has or will have shadows, then we
8915 * must not perform an asymmetric copy
8916 * of this object, since such a copy
8917 * allows the object to be changed, which
8918 * will break the previous symmetrical
8919 * copies (which rely upon the object
8920 * not changing). In a sense, the shadowed
8921 * flag says "don't change this object".
8922 * We fix this by creating a shadow
8923 * object for this object, and sharing
8924 * that. This works because we are free
8925 * to change the shadow object (and thus
8926 * to use an asymmetric copy strategy);
8927 * this is also semantically correct,
8928 * since this object is temporary, and
8929 * therefore a copy of the object is
8930 * as good as the object itself. (This
8931 * is not true for permanent objects,
8932 * since the pager needs to see changes,
8933 * which won't happen if the changes
8934 * are made to a copy.)
8936 * The third case is when the object
8937 * to be shared has parts sticking
8938 * outside of the entry we're working
8939 * with, and thus may in the future
8940 * be subject to a symmetrical copy.
8941 * (This is a preemptive version of
8944 vm_object_shadow(&old_entry
->object
.vm_object
,
8946 (vm_map_size_t
) (old_entry
->vme_end
-
8947 old_entry
->vme_start
));
8950 * If we're making a shadow for other than
8951 * copy on write reasons, then we have
8952 * to remove write permission.
8955 if (!old_entry
->needs_copy
&&
8956 (old_entry
->protection
& VM_PROT_WRITE
)) {
8959 prot
= old_entry
->protection
& ~VM_PROT_WRITE
;
8961 if (override_nx(old_map
, old_entry
->alias
) && prot
)
8962 prot
|= VM_PROT_EXECUTE
;
8964 if (old_map
->mapped_in_other_pmaps
) {
8965 vm_object_pmap_protect(
8966 old_entry
->object
.vm_object
,
8968 (old_entry
->vme_end
-
8969 old_entry
->vme_start
),
8971 old_entry
->vme_start
,
8974 pmap_protect(old_map
->pmap
,
8975 old_entry
->vme_start
,
8981 old_entry
->needs_copy
= FALSE
;
8982 object
= old_entry
->object
.vm_object
;
8987 * If object was using a symmetric copy strategy,
8988 * change its copy strategy to the default
8989 * asymmetric copy strategy, which is copy_delay
8990 * in the non-norma case and copy_call in the
8991 * norma case. Bump the reference count for the
8995 if(old_entry
->is_sub_map
) {
8996 vm_map_lock(old_entry
->object
.sub_map
);
8997 vm_map_reference(old_entry
->object
.sub_map
);
8998 vm_map_unlock(old_entry
->object
.sub_map
);
9000 vm_object_lock(object
);
9001 vm_object_reference_locked(object
);
9002 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
) {
9003 object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
9005 vm_object_unlock(object
);
9009 * Clone the entry, using object ref from above.
9010 * Mark both entries as shared.
9013 new_entry
= vm_map_entry_create(new_map
, FALSE
); /* Never the kernel
9014 * map or descendants */
9015 vm_map_entry_copy(new_entry
, old_entry
);
9016 old_entry
->is_shared
= TRUE
;
9017 new_entry
->is_shared
= TRUE
;
9020 * Insert the entry into the new map -- we
9021 * know we're inserting at the end of the new
9025 vm_map_store_entry_link(new_map
, vm_map_last_entry(new_map
), new_entry
);
9028 * Update the physical map
9031 if (old_entry
->is_sub_map
) {
9032 /* Bill Angell pmap support goes here */
9034 pmap_copy(new_map
->pmap
, old_map
->pmap
, new_entry
->vme_start
,
9035 old_entry
->vme_end
- old_entry
->vme_start
,
9036 old_entry
->vme_start
);
9043 vm_map_entry_t
*old_entry_p
,
9046 vm_map_entry_t old_entry
= *old_entry_p
;
9047 vm_map_size_t entry_size
= old_entry
->vme_end
- old_entry
->vme_start
;
9048 vm_map_offset_t start
= old_entry
->vme_start
;
9050 vm_map_entry_t last
= vm_map_last_entry(new_map
);
9052 vm_map_unlock(old_map
);
9054 * Use maxprot version of copyin because we
9055 * care about whether this memory can ever
9056 * be accessed, not just whether it's accessible
9059 if (vm_map_copyin_maxprot(old_map
, start
, entry_size
, FALSE
, ©
)
9062 * The map might have changed while it
9063 * was unlocked, check it again. Skip
9064 * any blank space or permanently
9065 * unreadable region.
9067 vm_map_lock(old_map
);
9068 if (!vm_map_lookup_entry(old_map
, start
, &last
) ||
9069 (last
->max_protection
& VM_PROT_READ
) == VM_PROT_NONE
) {
9070 last
= last
->vme_next
;
9072 *old_entry_p
= last
;
9075 * XXX For some error returns, want to
9076 * XXX skip to the next element. Note
9077 * that INVALID_ADDRESS and
9078 * PROTECTION_FAILURE are handled above.
9085 * Insert the copy into the new map
9088 vm_map_copy_insert(new_map
, last
, copy
);
9091 * Pick up the traversal at the end of
9092 * the copied region.
9095 vm_map_lock(old_map
);
9096 start
+= entry_size
;
9097 if (! vm_map_lookup_entry(old_map
, start
, &last
)) {
9098 last
= last
->vme_next
;
9100 if (last
->vme_start
== start
) {
9102 * No need to clip here and we don't
9103 * want to cause any unnecessary
9107 vm_map_clip_start(old_map
, last
, start
);
9110 *old_entry_p
= last
;
9118 * Create and return a new map based on the old
9119 * map, according to the inheritance values on the
9120 * regions in that map.
9122 * The source map must not be locked.
9131 vm_map_entry_t old_entry
;
9132 vm_map_size_t new_size
= 0, entry_size
;
9133 vm_map_entry_t new_entry
;
9134 boolean_t src_needs_copy
;
9135 boolean_t new_entry_needs_copy
;
9137 new_pmap
= pmap_create(ledger
, (vm_map_size_t
) 0,
9138 #if defined(__i386__) || defined(__x86_64__)
9139 old_map
->pmap
->pm_task_map
!= TASK_MAP_32BIT
9141 #error Unknown architecture.
9145 vm_map_reference_swap(old_map
);
9146 vm_map_lock(old_map
);
9148 new_map
= vm_map_create(new_pmap
,
9149 old_map
->min_offset
,
9150 old_map
->max_offset
,
9151 old_map
->hdr
.entries_pageable
);
9152 /* inherit the parent map's page size */
9153 vm_map_set_page_shift(new_map
, VM_MAP_PAGE_SHIFT(old_map
));
9155 old_entry
= vm_map_first_entry(old_map
);
9156 old_entry
!= vm_map_to_entry(old_map
);
9159 entry_size
= old_entry
->vme_end
- old_entry
->vme_start
;
9161 switch (old_entry
->inheritance
) {
9162 case VM_INHERIT_NONE
:
9165 case VM_INHERIT_SHARE
:
9166 vm_map_fork_share(old_map
, old_entry
, new_map
);
9167 new_size
+= entry_size
;
9170 case VM_INHERIT_COPY
:
9173 * Inline the copy_quickly case;
9174 * upon failure, fall back on call
9175 * to vm_map_fork_copy.
9178 if(old_entry
->is_sub_map
)
9180 if ((old_entry
->wired_count
!= 0) ||
9181 ((old_entry
->object
.vm_object
!= NULL
) &&
9182 (old_entry
->object
.vm_object
->true_share
))) {
9183 goto slow_vm_map_fork_copy
;
9186 new_entry
= vm_map_entry_create(new_map
, FALSE
); /* never the kernel map or descendants */
9187 vm_map_entry_copy(new_entry
, old_entry
);
9188 /* clear address space specifics */
9189 new_entry
->use_pmap
= FALSE
;
9191 if (! vm_object_copy_quickly(
9192 &new_entry
->object
.vm_object
,
9194 (old_entry
->vme_end
-
9195 old_entry
->vme_start
),
9197 &new_entry_needs_copy
)) {
9198 vm_map_entry_dispose(new_map
, new_entry
);
9199 goto slow_vm_map_fork_copy
;
9203 * Handle copy-on-write obligations
9206 if (src_needs_copy
&& !old_entry
->needs_copy
) {
9209 prot
= old_entry
->protection
& ~VM_PROT_WRITE
;
9211 if (override_nx(old_map
, old_entry
->alias
) && prot
)
9212 prot
|= VM_PROT_EXECUTE
;
9214 vm_object_pmap_protect(
9215 old_entry
->object
.vm_object
,
9217 (old_entry
->vme_end
-
9218 old_entry
->vme_start
),
9219 ((old_entry
->is_shared
9220 || old_map
->mapped_in_other_pmaps
)
9223 old_entry
->vme_start
,
9226 old_entry
->needs_copy
= TRUE
;
9228 new_entry
->needs_copy
= new_entry_needs_copy
;
9231 * Insert the entry at the end
9235 vm_map_store_entry_link(new_map
, vm_map_last_entry(new_map
),
9237 new_size
+= entry_size
;
9240 slow_vm_map_fork_copy
:
9241 if (vm_map_fork_copy(old_map
, &old_entry
, new_map
)) {
9242 new_size
+= entry_size
;
9246 old_entry
= old_entry
->vme_next
;
9249 new_map
->size
= new_size
;
9250 vm_map_unlock(old_map
);
9251 vm_map_deallocate(old_map
);
9259 * Setup the "new_map" with the proper execution environment according
9260 * to the type of executable (platform, 64bit, chroot environment).
9261 * Map the comm page and shared region, etc...
9270 SHARED_REGION_TRACE_DEBUG(
9271 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
9272 current_task(), new_map
, task
, fsroot
, cpu
));
9273 (void) vm_commpage_enter(new_map
, task
);
9274 (void) vm_shared_region_enter(new_map
, task
, fsroot
, cpu
);
9275 SHARED_REGION_TRACE_DEBUG(
9276 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
9277 current_task(), new_map
, task
, fsroot
, cpu
));
9278 return KERN_SUCCESS
;
9282 * vm_map_lookup_locked:
9284 * Finds the VM object, offset, and
9285 * protection for a given virtual address in the
9286 * specified map, assuming a page fault of the
9289 * Returns the (object, offset, protection) for
9290 * this address, whether it is wired down, and whether
9291 * this map has the only reference to the data in question.
9292 * In order to later verify this lookup, a "version"
9295 * The map MUST be locked by the caller and WILL be
9296 * locked on exit. In order to guarantee the
9297 * existence of the returned object, it is returned
9300 * If a lookup is requested with "write protection"
9301 * specified, the map may be changed to perform virtual
9302 * copying operations, although the data referenced will
9306 vm_map_lookup_locked(
9307 vm_map_t
*var_map
, /* IN/OUT */
9308 vm_map_offset_t vaddr
,
9309 vm_prot_t fault_type
,
9310 int object_lock_type
,
9311 vm_map_version_t
*out_version
, /* OUT */
9312 vm_object_t
*object
, /* OUT */
9313 vm_object_offset_t
*offset
, /* OUT */
9314 vm_prot_t
*out_prot
, /* OUT */
9315 boolean_t
*wired
, /* OUT */
9316 vm_object_fault_info_t fault_info
, /* OUT */
9319 vm_map_entry_t entry
;
9320 register vm_map_t map
= *var_map
;
9321 vm_map_t old_map
= *var_map
;
9322 vm_map_t cow_sub_map_parent
= VM_MAP_NULL
;
9323 vm_map_offset_t cow_parent_vaddr
= 0;
9324 vm_map_offset_t old_start
= 0;
9325 vm_map_offset_t old_end
= 0;
9326 register vm_prot_t prot
;
9327 boolean_t mask_protections
;
9328 vm_prot_t original_fault_type
;
9331 * VM_PROT_MASK means that the caller wants us to use "fault_type"
9332 * as a mask against the mapping's actual protections, not as an
9335 mask_protections
= (fault_type
& VM_PROT_IS_MASK
) ? TRUE
: FALSE
;
9336 fault_type
&= ~VM_PROT_IS_MASK
;
9337 original_fault_type
= fault_type
;
9342 fault_type
= original_fault_type
;
9345 * If the map has an interesting hint, try it before calling
9346 * full blown lookup routine.
9350 if ((entry
== vm_map_to_entry(map
)) ||
9351 (vaddr
< entry
->vme_start
) || (vaddr
>= entry
->vme_end
)) {
9352 vm_map_entry_t tmp_entry
;
9355 * Entry was either not a valid hint, or the vaddr
9356 * was not contained in the entry, so do a full lookup.
9358 if (!vm_map_lookup_entry(map
, vaddr
, &tmp_entry
)) {
9359 if((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
))
9360 vm_map_unlock(cow_sub_map_parent
);
9361 if((*real_map
!= map
)
9362 && (*real_map
!= cow_sub_map_parent
))
9363 vm_map_unlock(*real_map
);
9364 return KERN_INVALID_ADDRESS
;
9369 if(map
== old_map
) {
9370 old_start
= entry
->vme_start
;
9371 old_end
= entry
->vme_end
;
9375 * Handle submaps. Drop lock on upper map, submap is
9380 if (entry
->is_sub_map
) {
9381 vm_map_offset_t local_vaddr
;
9382 vm_map_offset_t end_delta
;
9383 vm_map_offset_t start_delta
;
9384 vm_map_entry_t submap_entry
;
9385 boolean_t mapped_needs_copy
=FALSE
;
9387 local_vaddr
= vaddr
;
9389 if ((entry
->use_pmap
&& !(fault_type
& VM_PROT_WRITE
))) {
9390 /* if real_map equals map we unlock below */
9391 if ((*real_map
!= map
) &&
9392 (*real_map
!= cow_sub_map_parent
))
9393 vm_map_unlock(*real_map
);
9394 *real_map
= entry
->object
.sub_map
;
9397 if(entry
->needs_copy
&& (fault_type
& VM_PROT_WRITE
)) {
9398 if (!mapped_needs_copy
) {
9399 if (vm_map_lock_read_to_write(map
)) {
9400 vm_map_lock_read(map
);
9404 vm_map_lock_read(entry
->object
.sub_map
);
9405 *var_map
= entry
->object
.sub_map
;
9406 cow_sub_map_parent
= map
;
9407 /* reset base to map before cow object */
9408 /* this is the map which will accept */
9409 /* the new cow object */
9410 old_start
= entry
->vme_start
;
9411 old_end
= entry
->vme_end
;
9412 cow_parent_vaddr
= vaddr
;
9413 mapped_needs_copy
= TRUE
;
9415 vm_map_lock_read(entry
->object
.sub_map
);
9416 *var_map
= entry
->object
.sub_map
;
9417 if((cow_sub_map_parent
!= map
) &&
9422 vm_map_lock_read(entry
->object
.sub_map
);
9423 *var_map
= entry
->object
.sub_map
;
9424 /* leave map locked if it is a target */
9425 /* cow sub_map above otherwise, just */
9426 /* follow the maps down to the object */
9427 /* here we unlock knowing we are not */
9428 /* revisiting the map. */
9429 if((*real_map
!= map
) && (map
!= cow_sub_map_parent
))
9430 vm_map_unlock_read(map
);
9435 /* calculate the offset in the submap for vaddr */
9436 local_vaddr
= (local_vaddr
- entry
->vme_start
) + entry
->offset
;
9439 if(!vm_map_lookup_entry(map
, local_vaddr
, &submap_entry
)) {
9440 if((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)){
9441 vm_map_unlock(cow_sub_map_parent
);
9443 if((*real_map
!= map
)
9444 && (*real_map
!= cow_sub_map_parent
)) {
9445 vm_map_unlock(*real_map
);
9448 return KERN_INVALID_ADDRESS
;
9451 /* find the attenuated shadow of the underlying object */
9452 /* on our target map */
9454 /* in english the submap object may extend beyond the */
9455 /* region mapped by the entry or, may only fill a portion */
9456 /* of it. For our purposes, we only care if the object */
9457 /* doesn't fill. In this case the area which will */
9458 /* ultimately be clipped in the top map will only need */
9459 /* to be as big as the portion of the underlying entry */
9460 /* which is mapped */
9461 start_delta
= submap_entry
->vme_start
> entry
->offset
?
9462 submap_entry
->vme_start
- entry
->offset
: 0;
9465 (entry
->offset
+ start_delta
+ (old_end
- old_start
)) <=
9466 submap_entry
->vme_end
?
9467 0 : (entry
->offset
+
9468 (old_end
- old_start
))
9469 - submap_entry
->vme_end
;
9471 old_start
+= start_delta
;
9472 old_end
-= end_delta
;
9474 if(submap_entry
->is_sub_map
) {
9475 entry
= submap_entry
;
9476 vaddr
= local_vaddr
;
9477 goto submap_recurse
;
9480 if(((fault_type
& VM_PROT_WRITE
) && cow_sub_map_parent
)) {
9482 vm_object_t sub_object
, copy_object
;
9483 vm_object_offset_t copy_offset
;
9484 vm_map_offset_t local_start
;
9485 vm_map_offset_t local_end
;
9486 boolean_t copied_slowly
= FALSE
;
9488 if (vm_map_lock_read_to_write(map
)) {
9489 vm_map_lock_read(map
);
9490 old_start
-= start_delta
;
9491 old_end
+= end_delta
;
9496 sub_object
= submap_entry
->object
.vm_object
;
9497 if (sub_object
== VM_OBJECT_NULL
) {
9501 (submap_entry
->vme_end
-
9502 submap_entry
->vme_start
));
9503 submap_entry
->object
.vm_object
= sub_object
;
9504 submap_entry
->offset
= 0;
9506 local_start
= local_vaddr
-
9507 (cow_parent_vaddr
- old_start
);
9508 local_end
= local_vaddr
+
9509 (old_end
- cow_parent_vaddr
);
9510 vm_map_clip_start(map
, submap_entry
, local_start
);
9511 vm_map_clip_end(map
, submap_entry
, local_end
);
9512 /* unnesting was done in vm_map_clip_start/end() */
9513 assert(!submap_entry
->use_pmap
);
9515 /* This is the COW case, lets connect */
9516 /* an entry in our space to the underlying */
9517 /* object in the submap, bypassing the */
9521 if(submap_entry
->wired_count
!= 0 ||
9522 (sub_object
->copy_strategy
==
9523 MEMORY_OBJECT_COPY_NONE
)) {
9524 vm_object_lock(sub_object
);
9525 vm_object_copy_slowly(sub_object
,
9526 submap_entry
->offset
,
9527 (submap_entry
->vme_end
-
9528 submap_entry
->vme_start
),
9531 copied_slowly
= TRUE
;
9534 /* set up shadow object */
9535 copy_object
= sub_object
;
9536 vm_object_reference(copy_object
);
9537 sub_object
->shadowed
= TRUE
;
9538 submap_entry
->needs_copy
= TRUE
;
9540 prot
= submap_entry
->protection
& ~VM_PROT_WRITE
;
9542 if (override_nx(old_map
, submap_entry
->alias
) && prot
)
9543 prot
|= VM_PROT_EXECUTE
;
9545 vm_object_pmap_protect(
9547 submap_entry
->offset
,
9548 submap_entry
->vme_end
-
9549 submap_entry
->vme_start
,
9550 (submap_entry
->is_shared
9551 || map
->mapped_in_other_pmaps
) ?
9552 PMAP_NULL
: map
->pmap
,
9553 submap_entry
->vme_start
,
9558 * Adjust the fault offset to the submap entry.
9560 copy_offset
= (local_vaddr
-
9561 submap_entry
->vme_start
+
9562 submap_entry
->offset
);
9564 /* This works diffently than the */
9565 /* normal submap case. We go back */
9566 /* to the parent of the cow map and*/
9567 /* clip out the target portion of */
9568 /* the sub_map, substituting the */
9569 /* new copy object, */
9572 local_start
= old_start
;
9573 local_end
= old_end
;
9574 map
= cow_sub_map_parent
;
9575 *var_map
= cow_sub_map_parent
;
9576 vaddr
= cow_parent_vaddr
;
9577 cow_sub_map_parent
= NULL
;
9579 if(!vm_map_lookup_entry(map
,
9581 vm_object_deallocate(
9583 vm_map_lock_write_to_read(map
);
9584 return KERN_INVALID_ADDRESS
;
9587 /* clip out the portion of space */
9588 /* mapped by the sub map which */
9589 /* corresponds to the underlying */
9593 * Clip (and unnest) the smallest nested chunk
9594 * possible around the faulting address...
9596 local_start
= vaddr
& ~(pmap_nesting_size_min
- 1);
9597 local_end
= local_start
+ pmap_nesting_size_min
;
9599 * ... but don't go beyond the "old_start" to "old_end"
9600 * range, to avoid spanning over another VM region
9601 * with a possibly different VM object and/or offset.
9603 if (local_start
< old_start
) {
9604 local_start
= old_start
;
9606 if (local_end
> old_end
) {
9607 local_end
= old_end
;
9610 * Adjust copy_offset to the start of the range.
9612 copy_offset
-= (vaddr
- local_start
);
9614 vm_map_clip_start(map
, entry
, local_start
);
9615 vm_map_clip_end(map
, entry
, local_end
);
9616 /* unnesting was done in vm_map_clip_start/end() */
9617 assert(!entry
->use_pmap
);
9619 /* substitute copy object for */
9620 /* shared map entry */
9621 vm_map_deallocate(entry
->object
.sub_map
);
9622 entry
->is_sub_map
= FALSE
;
9623 entry
->object
.vm_object
= copy_object
;
9625 /* propagate the submap entry's protections */
9626 entry
->protection
|= submap_entry
->protection
;
9627 entry
->max_protection
|= submap_entry
->max_protection
;
9630 entry
->offset
= local_start
- old_start
;
9631 entry
->needs_copy
= FALSE
;
9632 entry
->is_shared
= FALSE
;
9634 entry
->offset
= copy_offset
;
9635 entry
->needs_copy
= TRUE
;
9636 if(entry
->inheritance
== VM_INHERIT_SHARE
)
9637 entry
->inheritance
= VM_INHERIT_COPY
;
9639 entry
->is_shared
= TRUE
;
9641 if(entry
->inheritance
== VM_INHERIT_SHARE
)
9642 entry
->inheritance
= VM_INHERIT_COPY
;
9644 vm_map_lock_write_to_read(map
);
9646 if((cow_sub_map_parent
)
9647 && (cow_sub_map_parent
!= *real_map
)
9648 && (cow_sub_map_parent
!= map
)) {
9649 vm_map_unlock(cow_sub_map_parent
);
9651 entry
= submap_entry
;
9652 vaddr
= local_vaddr
;
9657 * Check whether this task is allowed to have
9661 prot
= entry
->protection
;
9663 if (override_nx(old_map
, entry
->alias
) && prot
) {
9665 * HACK -- if not a stack, then allow execution
9667 prot
|= VM_PROT_EXECUTE
;
9670 if (mask_protections
) {
9672 if (fault_type
== VM_PROT_NONE
) {
9673 goto protection_failure
;
9676 if ((fault_type
& (prot
)) != fault_type
) {
9678 if (*real_map
!= map
) {
9679 vm_map_unlock(*real_map
);
9683 if ((fault_type
& VM_PROT_EXECUTE
) && prot
)
9684 log_stack_execution_failure((addr64_t
)vaddr
, prot
);
9686 DTRACE_VM2(prot_fault
, int, 1, (uint64_t *), NULL
);
9687 return KERN_PROTECTION_FAILURE
;
9691 * If this page is not pageable, we have to get
9692 * it for all possible accesses.
9695 *wired
= (entry
->wired_count
!= 0);
9700 * If the entry was copy-on-write, we either ...
9703 if (entry
->needs_copy
) {
9705 * If we want to write the page, we may as well
9706 * handle that now since we've got the map locked.
9708 * If we don't need to write the page, we just
9709 * demote the permissions allowed.
9712 if ((fault_type
& VM_PROT_WRITE
) || *wired
) {
9714 * Make a new object, and place it in the
9715 * object chain. Note that no new references
9716 * have appeared -- one just moved from the
9717 * map to the new object.
9720 if (vm_map_lock_read_to_write(map
)) {
9721 vm_map_lock_read(map
);
9724 vm_object_shadow(&entry
->object
.vm_object
,
9726 (vm_map_size_t
) (entry
->vme_end
-
9729 entry
->object
.vm_object
->shadowed
= TRUE
;
9730 entry
->needs_copy
= FALSE
;
9731 vm_map_lock_write_to_read(map
);
9735 * We're attempting to read a copy-on-write
9736 * page -- don't allow writes.
9739 prot
&= (~VM_PROT_WRITE
);
9744 * Create an object if necessary.
9746 if (entry
->object
.vm_object
== VM_OBJECT_NULL
) {
9748 if (vm_map_lock_read_to_write(map
)) {
9749 vm_map_lock_read(map
);
9753 entry
->object
.vm_object
= vm_object_allocate(
9754 (vm_map_size_t
)(entry
->vme_end
- entry
->vme_start
));
9756 vm_map_lock_write_to_read(map
);
9760 * Return the object/offset from this entry. If the entry
9761 * was copy-on-write or empty, it has been fixed up. Also
9762 * return the protection.
9765 *offset
= (vaddr
- entry
->vme_start
) + entry
->offset
;
9766 *object
= entry
->object
.vm_object
;
9770 fault_info
->interruptible
= THREAD_UNINT
; /* for now... */
9771 /* ... the caller will change "interruptible" if needed */
9772 fault_info
->cluster_size
= 0;
9773 fault_info
->user_tag
= entry
->alias
;
9774 fault_info
->behavior
= entry
->behavior
;
9775 fault_info
->lo_offset
= entry
->offset
;
9776 fault_info
->hi_offset
= (entry
->vme_end
- entry
->vme_start
) + entry
->offset
;
9777 fault_info
->no_cache
= entry
->no_cache
;
9778 fault_info
->stealth
= FALSE
;
9779 fault_info
->io_sync
= FALSE
;
9780 fault_info
->cs_bypass
= (entry
->used_for_jit
)? TRUE
: FALSE
;
9781 fault_info
->mark_zf_absent
= FALSE
;
9782 fault_info
->batch_pmap_op
= FALSE
;
9786 * Lock the object to prevent it from disappearing
9788 if (object_lock_type
== OBJECT_LOCK_EXCLUSIVE
)
9789 vm_object_lock(*object
);
9791 vm_object_lock_shared(*object
);
9794 * Save the version number
9797 out_version
->main_timestamp
= map
->timestamp
;
9799 return KERN_SUCCESS
;
9806 * Verifies that the map in question has not changed
9807 * since the given version. If successful, the map
9808 * will not change until vm_map_verify_done() is called.
9812 register vm_map_t map
,
9813 register vm_map_version_t
*version
) /* REF */
9817 vm_map_lock_read(map
);
9818 result
= (map
->timestamp
== version
->main_timestamp
);
9821 vm_map_unlock_read(map
);
9827 * vm_map_verify_done:
9829 * Releases locks acquired by a vm_map_verify.
9831 * This is now a macro in vm/vm_map.h. It does a
9832 * vm_map_unlock_read on the map.
9837 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
9838 * Goes away after regular vm_region_recurse function migrates to
9840 * vm_region_recurse: A form of vm_region which follows the
9841 * submaps in a target map
9846 vm_map_region_recurse_64(
9848 vm_map_offset_t
*address
, /* IN/OUT */
9849 vm_map_size_t
*size
, /* OUT */
9850 natural_t
*nesting_depth
, /* IN/OUT */
9851 vm_region_submap_info_64_t submap_info
, /* IN/OUT */
9852 mach_msg_type_number_t
*count
) /* IN/OUT */
9854 mach_msg_type_number_t original_count
;
9855 vm_region_extended_info_data_t extended
;
9856 vm_map_entry_t tmp_entry
;
9857 vm_map_offset_t user_address
;
9858 unsigned int user_max_depth
;
9861 * "curr_entry" is the VM map entry preceding or including the
9862 * address we're looking for.
9863 * "curr_map" is the map or sub-map containing "curr_entry".
9864 * "curr_address" is the equivalent of the top map's "user_address"
9865 * in the current map.
9866 * "curr_offset" is the cumulated offset of "curr_map" in the
9867 * target task's address space.
9868 * "curr_depth" is the depth of "curr_map" in the chain of
9871 * "curr_max_below" and "curr_max_above" limit the range (around
9872 * "curr_address") we should take into account in the current (sub)map.
9873 * They limit the range to what's visible through the map entries
9874 * we've traversed from the top map to the current map.
9877 vm_map_entry_t curr_entry
;
9878 vm_map_address_t curr_address
;
9879 vm_map_offset_t curr_offset
;
9881 unsigned int curr_depth
;
9882 vm_map_offset_t curr_max_below
, curr_max_above
;
9883 vm_map_offset_t curr_skip
;
9886 * "next_" is the same as "curr_" but for the VM region immediately
9887 * after the address we're looking for. We need to keep track of this
9888 * too because we want to return info about that region if the
9889 * address we're looking for is not mapped.
9891 vm_map_entry_t next_entry
;
9892 vm_map_offset_t next_offset
;
9893 vm_map_offset_t next_address
;
9895 unsigned int next_depth
;
9896 vm_map_offset_t next_max_below
, next_max_above
;
9897 vm_map_offset_t next_skip
;
9899 boolean_t look_for_pages
;
9900 vm_region_submap_short_info_64_t short_info
;
9902 if (map
== VM_MAP_NULL
) {
9903 /* no address space to work on */
9904 return KERN_INVALID_ARGUMENT
;
9908 if (*count
< VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
) {
9910 * "info" structure is not big enough and
9913 return KERN_INVALID_ARGUMENT
;
9916 original_count
= *count
;
9918 if (original_count
< VM_REGION_SUBMAP_INFO_V0_COUNT_64
) {
9919 *count
= VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
;
9920 look_for_pages
= FALSE
;
9921 short_info
= (vm_region_submap_short_info_64_t
) submap_info
;
9924 look_for_pages
= TRUE
;
9925 *count
= VM_REGION_SUBMAP_INFO_V0_COUNT_64
;
9928 if (original_count
>= VM_REGION_SUBMAP_INFO_V1_COUNT_64
) {
9929 *count
= VM_REGION_SUBMAP_INFO_V1_COUNT_64
;
9933 user_address
= *address
;
9934 user_max_depth
= *nesting_depth
;
9938 curr_address
= user_address
;
9942 curr_max_above
= ((vm_map_offset_t
) -1) - curr_address
;
9943 curr_max_below
= curr_address
;
9951 next_max_above
= (vm_map_offset_t
) -1;
9952 next_max_below
= (vm_map_offset_t
) -1;
9955 vm_map_lock_read(curr_map
);
9959 if (vm_map_lookup_entry(curr_map
,
9962 /* tmp_entry contains the address we're looking for */
9963 curr_entry
= tmp_entry
;
9965 vm_map_offset_t skip
;
9967 * The address is not mapped. "tmp_entry" is the
9968 * map entry preceding the address. We want the next
9969 * one, if it exists.
9971 curr_entry
= tmp_entry
->vme_next
;
9973 if (curr_entry
== vm_map_to_entry(curr_map
) ||
9974 (curr_entry
->vme_start
>=
9975 curr_address
+ curr_max_above
)) {
9976 /* no next entry at this level: stop looking */
9978 vm_map_unlock_read(curr_map
);
9989 /* adjust current address and offset */
9990 skip
= curr_entry
->vme_start
- curr_address
;
9991 curr_address
= curr_entry
->vme_start
;
9993 curr_offset
+= skip
;
9994 curr_max_above
-= skip
;
9999 * Is the next entry at this level closer to the address (or
10000 * deeper in the submap chain) than the one we had
10003 tmp_entry
= curr_entry
->vme_next
;
10004 if (tmp_entry
== vm_map_to_entry(curr_map
)) {
10005 /* no next entry at this level */
10006 } else if (tmp_entry
->vme_start
>=
10007 curr_address
+ curr_max_above
) {
10009 * tmp_entry is beyond the scope of what we mapped of
10010 * this submap in the upper level: ignore it.
10012 } else if ((next_entry
== NULL
) ||
10013 (tmp_entry
->vme_start
+ curr_offset
<=
10014 next_entry
->vme_start
+ next_offset
)) {
10016 * We didn't have a "next_entry" or this one is
10017 * closer to the address we're looking for:
10018 * use this "tmp_entry" as the new "next_entry".
10020 if (next_entry
!= NULL
) {
10021 /* unlock the last "next_map" */
10022 if (next_map
!= curr_map
&& not_in_kdp
) {
10023 vm_map_unlock_read(next_map
);
10026 next_entry
= tmp_entry
;
10027 next_map
= curr_map
;
10028 next_depth
= curr_depth
;
10029 next_address
= next_entry
->vme_start
;
10030 next_skip
= curr_skip
;
10031 next_offset
= curr_offset
;
10032 next_offset
+= (next_address
- curr_address
);
10033 next_max_above
= MIN(next_max_above
, curr_max_above
);
10034 next_max_above
= MIN(next_max_above
,
10035 next_entry
->vme_end
- next_address
);
10036 next_max_below
= MIN(next_max_below
, curr_max_below
);
10037 next_max_below
= MIN(next_max_below
,
10038 next_address
- next_entry
->vme_start
);
10042 * "curr_max_{above,below}" allow us to keep track of the
10043 * portion of the submap that is actually mapped at this level:
10044 * the rest of that submap is irrelevant to us, since it's not
10046 * The relevant portion of the map starts at
10047 * "curr_entry->offset" up to the size of "curr_entry".
10049 curr_max_above
= MIN(curr_max_above
,
10050 curr_entry
->vme_end
- curr_address
);
10051 curr_max_below
= MIN(curr_max_below
,
10052 curr_address
- curr_entry
->vme_start
);
10054 if (!curr_entry
->is_sub_map
||
10055 curr_depth
>= user_max_depth
) {
10057 * We hit a leaf map or we reached the maximum depth
10058 * we could, so stop looking. Keep the current map
10065 * Get down to the next submap level.
10069 * Lock the next level and unlock the current level,
10070 * unless we need to keep it locked to access the "next_entry"
10074 vm_map_lock_read(curr_entry
->object
.sub_map
);
10076 if (curr_map
== next_map
) {
10077 /* keep "next_map" locked in case we need it */
10079 /* release this map */
10081 vm_map_unlock_read(curr_map
);
10085 * Adjust the offset. "curr_entry" maps the submap
10086 * at relative address "curr_entry->vme_start" in the
10087 * curr_map but skips the first "curr_entry->offset"
10088 * bytes of the submap.
10089 * "curr_offset" always represents the offset of a virtual
10090 * address in the curr_map relative to the absolute address
10091 * space (i.e. the top-level VM map).
10094 (curr_entry
->offset
- curr_entry
->vme_start
);
10095 curr_address
= user_address
+ curr_offset
;
10096 /* switch to the submap */
10097 curr_map
= curr_entry
->object
.sub_map
;
10102 if (curr_entry
== NULL
) {
10103 /* no VM region contains the address... */
10104 if (next_entry
== NULL
) {
10105 /* ... and no VM region follows it either */
10106 return KERN_INVALID_ADDRESS
;
10108 /* ... gather info about the next VM region */
10109 curr_entry
= next_entry
;
10110 curr_map
= next_map
; /* still locked ... */
10111 curr_address
= next_address
;
10112 curr_skip
= next_skip
;
10113 curr_offset
= next_offset
;
10114 curr_depth
= next_depth
;
10115 curr_max_above
= next_max_above
;
10116 curr_max_below
= next_max_below
;
10117 if (curr_map
== map
) {
10118 user_address
= curr_address
;
10121 /* we won't need "next_entry" after all */
10122 if (next_entry
!= NULL
) {
10123 /* release "next_map" */
10124 if (next_map
!= curr_map
&& not_in_kdp
) {
10125 vm_map_unlock_read(next_map
);
10134 next_max_below
= -1;
10135 next_max_above
= -1;
10137 *nesting_depth
= curr_depth
;
10138 *size
= curr_max_above
+ curr_max_below
;
10139 *address
= user_address
+ curr_skip
- curr_max_below
;
10141 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
10142 // so probably should be a real 32b ID vs. ptr.
10143 // Current users just check for equality
10144 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
10146 if (look_for_pages
) {
10147 submap_info
->user_tag
= curr_entry
->alias
;
10148 submap_info
->offset
= curr_entry
->offset
;
10149 submap_info
->protection
= curr_entry
->protection
;
10150 submap_info
->inheritance
= curr_entry
->inheritance
;
10151 submap_info
->max_protection
= curr_entry
->max_protection
;
10152 submap_info
->behavior
= curr_entry
->behavior
;
10153 submap_info
->user_wired_count
= curr_entry
->user_wired_count
;
10154 submap_info
->is_submap
= curr_entry
->is_sub_map
;
10155 submap_info
->object_id
= INFO_MAKE_OBJECT_ID(curr_entry
->object
.vm_object
);
10157 short_info
->user_tag
= curr_entry
->alias
;
10158 short_info
->offset
= curr_entry
->offset
;
10159 short_info
->protection
= curr_entry
->protection
;
10160 short_info
->inheritance
= curr_entry
->inheritance
;
10161 short_info
->max_protection
= curr_entry
->max_protection
;
10162 short_info
->behavior
= curr_entry
->behavior
;
10163 short_info
->user_wired_count
= curr_entry
->user_wired_count
;
10164 short_info
->is_submap
= curr_entry
->is_sub_map
;
10165 short_info
->object_id
= INFO_MAKE_OBJECT_ID(curr_entry
->object
.vm_object
);
10168 extended
.pages_resident
= 0;
10169 extended
.pages_swapped_out
= 0;
10170 extended
.pages_shared_now_private
= 0;
10171 extended
.pages_dirtied
= 0;
10172 extended
.pages_reusable
= 0;
10173 extended
.external_pager
= 0;
10174 extended
.shadow_depth
= 0;
10177 if (!curr_entry
->is_sub_map
) {
10178 vm_map_offset_t range_start
, range_end
;
10179 range_start
= MAX((curr_address
- curr_max_below
),
10180 curr_entry
->vme_start
);
10181 range_end
= MIN((curr_address
+ curr_max_above
),
10182 curr_entry
->vme_end
);
10183 vm_map_region_walk(curr_map
,
10186 (curr_entry
->offset
+
10188 curr_entry
->vme_start
)),
10189 range_end
- range_start
,
10191 look_for_pages
, VM_REGION_EXTENDED_INFO_COUNT
);
10192 if (extended
.external_pager
&&
10193 extended
.ref_count
== 2 &&
10194 extended
.share_mode
== SM_SHARED
) {
10195 extended
.share_mode
= SM_PRIVATE
;
10198 if (curr_entry
->use_pmap
) {
10199 extended
.share_mode
= SM_TRUESHARED
;
10201 extended
.share_mode
= SM_PRIVATE
;
10203 extended
.ref_count
=
10204 curr_entry
->object
.sub_map
->ref_count
;
10208 if (look_for_pages
) {
10209 submap_info
->pages_resident
= extended
.pages_resident
;
10210 submap_info
->pages_swapped_out
= extended
.pages_swapped_out
;
10211 submap_info
->pages_shared_now_private
=
10212 extended
.pages_shared_now_private
;
10213 submap_info
->pages_dirtied
= extended
.pages_dirtied
;
10214 submap_info
->external_pager
= extended
.external_pager
;
10215 submap_info
->shadow_depth
= extended
.shadow_depth
;
10216 submap_info
->share_mode
= extended
.share_mode
;
10217 submap_info
->ref_count
= extended
.ref_count
;
10219 if (original_count
>= VM_REGION_SUBMAP_INFO_V1_COUNT_64
) {
10220 submap_info
->pages_reusable
= extended
.pages_reusable
;
10223 short_info
->external_pager
= extended
.external_pager
;
10224 short_info
->shadow_depth
= extended
.shadow_depth
;
10225 short_info
->share_mode
= extended
.share_mode
;
10226 short_info
->ref_count
= extended
.ref_count
;
10230 vm_map_unlock_read(curr_map
);
10233 return KERN_SUCCESS
;
10239 * User call to obtain information about a region in
10240 * a task's address map. Currently, only one flavor is
10243 * XXX The reserved and behavior fields cannot be filled
10244 * in until the vm merge from the IK is completed, and
10245 * vm_reserve is implemented.
10251 vm_map_offset_t
*address
, /* IN/OUT */
10252 vm_map_size_t
*size
, /* OUT */
10253 vm_region_flavor_t flavor
, /* IN */
10254 vm_region_info_t info
, /* OUT */
10255 mach_msg_type_number_t
*count
, /* IN/OUT */
10256 mach_port_t
*object_name
) /* OUT */
10258 vm_map_entry_t tmp_entry
;
10259 vm_map_entry_t entry
;
10260 vm_map_offset_t start
;
10262 if (map
== VM_MAP_NULL
)
10263 return(KERN_INVALID_ARGUMENT
);
10267 case VM_REGION_BASIC_INFO
:
10268 /* legacy for old 32-bit objects info */
10270 vm_region_basic_info_t basic
;
10272 if (*count
< VM_REGION_BASIC_INFO_COUNT
)
10273 return(KERN_INVALID_ARGUMENT
);
10275 basic
= (vm_region_basic_info_t
) info
;
10276 *count
= VM_REGION_BASIC_INFO_COUNT
;
10278 vm_map_lock_read(map
);
10281 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
10282 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
10283 vm_map_unlock_read(map
);
10284 return(KERN_INVALID_ADDRESS
);
10290 start
= entry
->vme_start
;
10292 basic
->offset
= (uint32_t)entry
->offset
;
10293 basic
->protection
= entry
->protection
;
10294 basic
->inheritance
= entry
->inheritance
;
10295 basic
->max_protection
= entry
->max_protection
;
10296 basic
->behavior
= entry
->behavior
;
10297 basic
->user_wired_count
= entry
->user_wired_count
;
10298 basic
->reserved
= entry
->is_sub_map
;
10300 *size
= (entry
->vme_end
- start
);
10302 if (object_name
) *object_name
= IP_NULL
;
10303 if (entry
->is_sub_map
) {
10304 basic
->shared
= FALSE
;
10306 basic
->shared
= entry
->is_shared
;
10309 vm_map_unlock_read(map
);
10310 return(KERN_SUCCESS
);
10313 case VM_REGION_BASIC_INFO_64
:
10315 vm_region_basic_info_64_t basic
;
10317 if (*count
< VM_REGION_BASIC_INFO_COUNT_64
)
10318 return(KERN_INVALID_ARGUMENT
);
10320 basic
= (vm_region_basic_info_64_t
) info
;
10321 *count
= VM_REGION_BASIC_INFO_COUNT_64
;
10323 vm_map_lock_read(map
);
10326 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
10327 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
10328 vm_map_unlock_read(map
);
10329 return(KERN_INVALID_ADDRESS
);
10335 start
= entry
->vme_start
;
10337 basic
->offset
= entry
->offset
;
10338 basic
->protection
= entry
->protection
;
10339 basic
->inheritance
= entry
->inheritance
;
10340 basic
->max_protection
= entry
->max_protection
;
10341 basic
->behavior
= entry
->behavior
;
10342 basic
->user_wired_count
= entry
->user_wired_count
;
10343 basic
->reserved
= entry
->is_sub_map
;
10345 *size
= (entry
->vme_end
- start
);
10347 if (object_name
) *object_name
= IP_NULL
;
10348 if (entry
->is_sub_map
) {
10349 basic
->shared
= FALSE
;
10351 basic
->shared
= entry
->is_shared
;
10354 vm_map_unlock_read(map
);
10355 return(KERN_SUCCESS
);
10357 case VM_REGION_EXTENDED_INFO
:
10358 if (*count
< VM_REGION_EXTENDED_INFO_COUNT
)
10359 return(KERN_INVALID_ARGUMENT
);
10361 case VM_REGION_EXTENDED_INFO__legacy
:
10362 if (*count
< VM_REGION_EXTENDED_INFO_COUNT__legacy
)
10363 return KERN_INVALID_ARGUMENT
;
10366 vm_region_extended_info_t extended
;
10367 mach_msg_type_number_t original_count
;
10369 extended
= (vm_region_extended_info_t
) info
;
10371 vm_map_lock_read(map
);
10374 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
10375 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
10376 vm_map_unlock_read(map
);
10377 return(KERN_INVALID_ADDRESS
);
10382 start
= entry
->vme_start
;
10384 extended
->protection
= entry
->protection
;
10385 extended
->user_tag
= entry
->alias
;
10386 extended
->pages_resident
= 0;
10387 extended
->pages_swapped_out
= 0;
10388 extended
->pages_shared_now_private
= 0;
10389 extended
->pages_dirtied
= 0;
10390 extended
->external_pager
= 0;
10391 extended
->shadow_depth
= 0;
10393 original_count
= *count
;
10394 if (flavor
== VM_REGION_EXTENDED_INFO__legacy
) {
10395 *count
= VM_REGION_EXTENDED_INFO_COUNT__legacy
;
10397 extended
->pages_reusable
= 0;
10398 *count
= VM_REGION_EXTENDED_INFO_COUNT
;
10401 vm_map_region_walk(map
, start
, entry
, entry
->offset
, entry
->vme_end
- start
, extended
, TRUE
, *count
);
10403 if (extended
->external_pager
&& extended
->ref_count
== 2 && extended
->share_mode
== SM_SHARED
)
10404 extended
->share_mode
= SM_PRIVATE
;
10407 *object_name
= IP_NULL
;
10409 *size
= (entry
->vme_end
- start
);
10411 vm_map_unlock_read(map
);
10412 return(KERN_SUCCESS
);
10414 case VM_REGION_TOP_INFO
:
10416 vm_region_top_info_t top
;
10418 if (*count
< VM_REGION_TOP_INFO_COUNT
)
10419 return(KERN_INVALID_ARGUMENT
);
10421 top
= (vm_region_top_info_t
) info
;
10422 *count
= VM_REGION_TOP_INFO_COUNT
;
10424 vm_map_lock_read(map
);
10427 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
10428 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
10429 vm_map_unlock_read(map
);
10430 return(KERN_INVALID_ADDRESS
);
10436 start
= entry
->vme_start
;
10438 top
->private_pages_resident
= 0;
10439 top
->shared_pages_resident
= 0;
10441 vm_map_region_top_walk(entry
, top
);
10444 *object_name
= IP_NULL
;
10446 *size
= (entry
->vme_end
- start
);
10448 vm_map_unlock_read(map
);
10449 return(KERN_SUCCESS
);
10452 return(KERN_INVALID_ARGUMENT
);
10456 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
10457 MIN((entry_size), \
10458 ((obj)->all_reusable ? \
10459 (obj)->wired_page_count : \
10460 (obj)->resident_page_count - (obj)->reusable_page_count))
10463 vm_map_region_top_walk(
10464 vm_map_entry_t entry
,
10465 vm_region_top_info_t top
)
10468 if (entry
->object
.vm_object
== 0 || entry
->is_sub_map
) {
10469 top
->share_mode
= SM_EMPTY
;
10470 top
->ref_count
= 0;
10476 struct vm_object
*obj
, *tmp_obj
;
10478 uint32_t entry_size
;
10480 entry_size
= (uint32_t) ((entry
->vme_end
- entry
->vme_start
) / PAGE_SIZE_64
);
10482 obj
= entry
->object
.vm_object
;
10484 vm_object_lock(obj
);
10486 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
)
10489 assert(obj
->reusable_page_count
<= obj
->resident_page_count
);
10491 if (ref_count
== 1)
10492 top
->private_pages_resident
=
10493 OBJ_RESIDENT_COUNT(obj
, entry_size
);
10495 top
->shared_pages_resident
=
10496 OBJ_RESIDENT_COUNT(obj
, entry_size
);
10497 top
->ref_count
= ref_count
;
10498 top
->share_mode
= SM_COW
;
10500 while ((tmp_obj
= obj
->shadow
)) {
10501 vm_object_lock(tmp_obj
);
10502 vm_object_unlock(obj
);
10505 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
)
10508 assert(obj
->reusable_page_count
<= obj
->resident_page_count
);
10509 top
->shared_pages_resident
+=
10510 OBJ_RESIDENT_COUNT(obj
, entry_size
);
10511 top
->ref_count
+= ref_count
- 1;
10514 if (entry
->superpage_size
) {
10515 top
->share_mode
= SM_LARGE_PAGE
;
10516 top
->shared_pages_resident
= 0;
10517 top
->private_pages_resident
= entry_size
;
10518 } else if (entry
->needs_copy
) {
10519 top
->share_mode
= SM_COW
;
10520 top
->shared_pages_resident
=
10521 OBJ_RESIDENT_COUNT(obj
, entry_size
);
10523 if (ref_count
== 1 ||
10524 (ref_count
== 2 && !(obj
->pager_trusted
) && !(obj
->internal
))) {
10525 top
->share_mode
= SM_PRIVATE
;
10526 top
->private_pages_resident
=
10527 OBJ_RESIDENT_COUNT(obj
,
10530 top
->share_mode
= SM_SHARED
;
10531 top
->shared_pages_resident
=
10532 OBJ_RESIDENT_COUNT(obj
,
10536 top
->ref_count
= ref_count
;
10538 /* XXX K64: obj_id will be truncated */
10539 top
->obj_id
= (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj
);
10541 vm_object_unlock(obj
);
10546 vm_map_region_walk(
10548 vm_map_offset_t va
,
10549 vm_map_entry_t entry
,
10550 vm_object_offset_t offset
,
10551 vm_object_size_t range
,
10552 vm_region_extended_info_t extended
,
10553 boolean_t look_for_pages
,
10554 mach_msg_type_number_t count
)
10556 register struct vm_object
*obj
, *tmp_obj
;
10557 register vm_map_offset_t last_offset
;
10559 register int ref_count
;
10560 struct vm_object
*shadow_object
;
10563 if ((entry
->object
.vm_object
== 0) ||
10564 (entry
->is_sub_map
) ||
10565 (entry
->object
.vm_object
->phys_contiguous
&&
10566 !entry
->superpage_size
)) {
10567 extended
->share_mode
= SM_EMPTY
;
10568 extended
->ref_count
= 0;
10572 if (entry
->superpage_size
) {
10573 extended
->shadow_depth
= 0;
10574 extended
->share_mode
= SM_LARGE_PAGE
;
10575 extended
->ref_count
= 1;
10576 extended
->external_pager
= 0;
10577 extended
->pages_resident
= (unsigned int)(range
>> PAGE_SHIFT
);
10578 extended
->shadow_depth
= 0;
10583 obj
= entry
->object
.vm_object
;
10585 vm_object_lock(obj
);
10587 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
)
10590 if (look_for_pages
) {
10591 for (last_offset
= offset
+ range
;
10592 offset
< last_offset
;
10593 offset
+= PAGE_SIZE_64
, va
+= PAGE_SIZE
) {
10594 vm_map_region_look_for_page(map
, va
, obj
,
10596 0, extended
, count
);
10599 shadow_object
= obj
->shadow
;
10602 if ( !(obj
->pager_trusted
) && !(obj
->internal
))
10603 extended
->external_pager
= 1;
10605 if (shadow_object
!= VM_OBJECT_NULL
) {
10606 vm_object_lock(shadow_object
);
10608 shadow_object
!= VM_OBJECT_NULL
;
10610 vm_object_t next_shadow
;
10612 if ( !(shadow_object
->pager_trusted
) &&
10613 !(shadow_object
->internal
))
10614 extended
->external_pager
= 1;
10616 next_shadow
= shadow_object
->shadow
;
10618 vm_object_lock(next_shadow
);
10620 vm_object_unlock(shadow_object
);
10621 shadow_object
= next_shadow
;
10624 extended
->shadow_depth
= shadow_depth
;
10627 if (extended
->shadow_depth
|| entry
->needs_copy
)
10628 extended
->share_mode
= SM_COW
;
10630 if (ref_count
== 1)
10631 extended
->share_mode
= SM_PRIVATE
;
10633 if (obj
->true_share
)
10634 extended
->share_mode
= SM_TRUESHARED
;
10636 extended
->share_mode
= SM_SHARED
;
10639 extended
->ref_count
= ref_count
- extended
->shadow_depth
;
10641 for (i
= 0; i
< extended
->shadow_depth
; i
++) {
10642 if ((tmp_obj
= obj
->shadow
) == 0)
10644 vm_object_lock(tmp_obj
);
10645 vm_object_unlock(obj
);
10647 if ((ref_count
= tmp_obj
->ref_count
) > 1 && tmp_obj
->paging_in_progress
)
10650 extended
->ref_count
+= ref_count
;
10653 vm_object_unlock(obj
);
10655 if (extended
->share_mode
== SM_SHARED
) {
10656 register vm_map_entry_t cur
;
10657 register vm_map_entry_t last
;
10660 obj
= entry
->object
.vm_object
;
10661 last
= vm_map_to_entry(map
);
10664 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
)
10666 for (cur
= vm_map_first_entry(map
); cur
!= last
; cur
= cur
->vme_next
)
10667 my_refs
+= vm_map_region_count_obj_refs(cur
, obj
);
10669 if (my_refs
== ref_count
)
10670 extended
->share_mode
= SM_PRIVATE_ALIASED
;
10671 else if (my_refs
> 1)
10672 extended
->share_mode
= SM_SHARED_ALIASED
;
10678 /* object is locked on entry and locked on return */
10682 vm_map_region_look_for_page(
10683 __unused vm_map_t map
,
10684 __unused vm_map_offset_t va
,
10685 vm_object_t object
,
10686 vm_object_offset_t offset
,
10689 vm_region_extended_info_t extended
,
10690 mach_msg_type_number_t count
)
10692 register vm_page_t p
;
10693 register vm_object_t shadow
;
10694 register int ref_count
;
10695 vm_object_t caller_object
;
10697 shadow
= object
->shadow
;
10698 caller_object
= object
;
10703 if ( !(object
->pager_trusted
) && !(object
->internal
))
10704 extended
->external_pager
= 1;
10706 if ((p
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
10707 if (shadow
&& (max_refcnt
== 1))
10708 extended
->pages_shared_now_private
++;
10710 if (!p
->fictitious
&&
10711 (p
->dirty
|| pmap_is_modified(p
->phys_page
)))
10712 extended
->pages_dirtied
++;
10713 else if (count
>= VM_REGION_EXTENDED_INFO_COUNT
) {
10714 if (p
->reusable
|| p
->object
->all_reusable
) {
10715 extended
->pages_reusable
++;
10719 extended
->pages_resident
++;
10721 if(object
!= caller_object
)
10722 vm_object_unlock(object
);
10727 if (object
->existence_map
) {
10728 if (vm_external_state_get(object
->existence_map
, offset
) == VM_EXTERNAL_STATE_EXISTS
) {
10730 extended
->pages_swapped_out
++;
10732 if(object
!= caller_object
)
10733 vm_object_unlock(object
);
10738 #endif /* MACH_PAGEMAP */
10739 if (object
->internal
&&
10741 !object
->terminating
&&
10742 object
->pager_ready
) {
10744 if (COMPRESSED_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE
) {
10745 if (VM_COMPRESSOR_PAGER_STATE_GET(object
,
10747 == VM_EXTERNAL_STATE_EXISTS
) {
10748 /* the pager has that page */
10749 extended
->pages_swapped_out
++;
10750 if (object
!= caller_object
)
10751 vm_object_unlock(object
);
10755 memory_object_t pager
;
10757 vm_object_paging_begin(object
);
10758 pager
= object
->pager
;
10759 vm_object_unlock(object
);
10761 kr
= memory_object_data_request(
10763 offset
+ object
->paging_offset
,
10764 0, /* just poke the pager */
10768 vm_object_lock(object
);
10769 vm_object_paging_end(object
);
10771 if (kr
== KERN_SUCCESS
) {
10772 /* the pager has that page */
10773 extended
->pages_swapped_out
++;
10774 if (object
!= caller_object
)
10775 vm_object_unlock(object
);
10782 vm_object_lock(shadow
);
10784 if ((ref_count
= shadow
->ref_count
) > 1 && shadow
->paging_in_progress
)
10787 if (++depth
> extended
->shadow_depth
)
10788 extended
->shadow_depth
= depth
;
10790 if (ref_count
> max_refcnt
)
10791 max_refcnt
= ref_count
;
10793 if(object
!= caller_object
)
10794 vm_object_unlock(object
);
10796 offset
= offset
+ object
->vo_shadow_offset
;
10798 shadow
= object
->shadow
;
10801 if(object
!= caller_object
)
10802 vm_object_unlock(object
);
10808 vm_map_region_count_obj_refs(
10809 vm_map_entry_t entry
,
10810 vm_object_t object
)
10812 register int ref_count
;
10813 register vm_object_t chk_obj
;
10814 register vm_object_t tmp_obj
;
10816 if (entry
->object
.vm_object
== 0)
10819 if (entry
->is_sub_map
)
10824 chk_obj
= entry
->object
.vm_object
;
10825 vm_object_lock(chk_obj
);
10828 if (chk_obj
== object
)
10830 tmp_obj
= chk_obj
->shadow
;
10832 vm_object_lock(tmp_obj
);
10833 vm_object_unlock(chk_obj
);
10843 * Routine: vm_map_simplify
10846 * Attempt to simplify the map representation in
10847 * the vicinity of the given starting address.
10849 * This routine is intended primarily to keep the
10850 * kernel maps more compact -- they generally don't
10851 * benefit from the "expand a map entry" technology
10852 * at allocation time because the adjacent entry
10853 * is often wired down.
10856 vm_map_simplify_entry(
10858 vm_map_entry_t this_entry
)
10860 vm_map_entry_t prev_entry
;
10862 counter(c_vm_map_simplify_entry_called
++);
10864 prev_entry
= this_entry
->vme_prev
;
10866 if ((this_entry
!= vm_map_to_entry(map
)) &&
10867 (prev_entry
!= vm_map_to_entry(map
)) &&
10869 (prev_entry
->vme_end
== this_entry
->vme_start
) &&
10871 (prev_entry
->is_sub_map
== this_entry
->is_sub_map
) &&
10873 (prev_entry
->object
.vm_object
== this_entry
->object
.vm_object
) &&
10874 ((prev_entry
->offset
+ (prev_entry
->vme_end
-
10875 prev_entry
->vme_start
))
10876 == this_entry
->offset
) &&
10878 (prev_entry
->map_aligned
== this_entry
->map_aligned
) &&
10879 (prev_entry
->inheritance
== this_entry
->inheritance
) &&
10880 (prev_entry
->protection
== this_entry
->protection
) &&
10881 (prev_entry
->max_protection
== this_entry
->max_protection
) &&
10882 (prev_entry
->behavior
== this_entry
->behavior
) &&
10883 (prev_entry
->alias
== this_entry
->alias
) &&
10884 (prev_entry
->zero_wired_pages
== this_entry
->zero_wired_pages
) &&
10885 (prev_entry
->no_cache
== this_entry
->no_cache
) &&
10886 (prev_entry
->wired_count
== this_entry
->wired_count
) &&
10887 (prev_entry
->user_wired_count
== this_entry
->user_wired_count
) &&
10889 (prev_entry
->needs_copy
== this_entry
->needs_copy
) &&
10890 (prev_entry
->permanent
== this_entry
->permanent
) &&
10892 (prev_entry
->use_pmap
== FALSE
) &&
10893 (this_entry
->use_pmap
== FALSE
) &&
10894 (prev_entry
->in_transition
== FALSE
) &&
10895 (this_entry
->in_transition
== FALSE
) &&
10896 (prev_entry
->needs_wakeup
== FALSE
) &&
10897 (this_entry
->needs_wakeup
== FALSE
) &&
10898 (prev_entry
->is_shared
== FALSE
) &&
10899 (this_entry
->is_shared
== FALSE
)
10901 vm_map_store_entry_unlink(map
, prev_entry
);
10902 assert(prev_entry
->vme_start
< this_entry
->vme_end
);
10903 if (prev_entry
->map_aligned
)
10904 assert(VM_MAP_PAGE_ALIGNED(prev_entry
->vme_start
,
10905 VM_MAP_PAGE_MASK(map
)));
10906 this_entry
->vme_start
= prev_entry
->vme_start
;
10907 this_entry
->offset
= prev_entry
->offset
;
10908 if (prev_entry
->is_sub_map
) {
10909 vm_map_deallocate(prev_entry
->object
.sub_map
);
10911 vm_object_deallocate(prev_entry
->object
.vm_object
);
10913 vm_map_entry_dispose(map
, prev_entry
);
10914 SAVE_HINT_MAP_WRITE(map
, this_entry
);
10915 counter(c_vm_map_simplified
++);
10922 vm_map_offset_t start
)
10924 vm_map_entry_t this_entry
;
10927 if (vm_map_lookup_entry(map
, start
, &this_entry
)) {
10928 vm_map_simplify_entry(map
, this_entry
);
10929 vm_map_simplify_entry(map
, this_entry
->vme_next
);
10931 counter(c_vm_map_simplify_called
++);
10932 vm_map_unlock(map
);
10936 vm_map_simplify_range(
10938 vm_map_offset_t start
,
10939 vm_map_offset_t end
)
10941 vm_map_entry_t entry
;
10944 * The map should be locked (for "write") by the caller.
10947 if (start
>= end
) {
10948 /* invalid address range */
10952 start
= vm_map_trunc_page(start
,
10953 VM_MAP_PAGE_MASK(map
));
10954 end
= vm_map_round_page(end
,
10955 VM_MAP_PAGE_MASK(map
));
10957 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
10958 /* "start" is not mapped and "entry" ends before "start" */
10959 if (entry
== vm_map_to_entry(map
)) {
10960 /* start with first entry in the map */
10961 entry
= vm_map_first_entry(map
);
10963 /* start with next entry */
10964 entry
= entry
->vme_next
;
10968 while (entry
!= vm_map_to_entry(map
) &&
10969 entry
->vme_start
<= end
) {
10970 /* try and coalesce "entry" with its previous entry */
10971 vm_map_simplify_entry(map
, entry
);
10972 entry
= entry
->vme_next
;
10978 * Routine: vm_map_machine_attribute
10980 * Provide machine-specific attributes to mappings,
10981 * such as cachability etc. for machines that provide
10982 * them. NUMA architectures and machines with big/strange
10983 * caches will use this.
10985 * Responsibilities for locking and checking are handled here,
10986 * everything else in the pmap module. If any non-volatile
10987 * information must be kept, the pmap module should handle
10988 * it itself. [This assumes that attributes do not
10989 * need to be inherited, which seems ok to me]
10992 vm_map_machine_attribute(
10994 vm_map_offset_t start
,
10995 vm_map_offset_t end
,
10996 vm_machine_attribute_t attribute
,
10997 vm_machine_attribute_val_t
* value
) /* IN/OUT */
11000 vm_map_size_t sync_size
;
11001 vm_map_entry_t entry
;
11003 if (start
< vm_map_min(map
) || end
> vm_map_max(map
))
11004 return KERN_INVALID_ADDRESS
;
11006 /* Figure how much memory we need to flush (in page increments) */
11007 sync_size
= end
- start
;
11011 if (attribute
!= MATTR_CACHE
) {
11012 /* If we don't have to find physical addresses, we */
11013 /* don't have to do an explicit traversal here. */
11014 ret
= pmap_attribute(map
->pmap
, start
, end
-start
,
11016 vm_map_unlock(map
);
11020 ret
= KERN_SUCCESS
; /* Assume it all worked */
11023 if (vm_map_lookup_entry(map
, start
, &entry
)) {
11024 vm_map_size_t sub_size
;
11025 if((entry
->vme_end
- start
) > sync_size
) {
11026 sub_size
= sync_size
;
11029 sub_size
= entry
->vme_end
- start
;
11030 sync_size
-= sub_size
;
11032 if(entry
->is_sub_map
) {
11033 vm_map_offset_t sub_start
;
11034 vm_map_offset_t sub_end
;
11036 sub_start
= (start
- entry
->vme_start
)
11038 sub_end
= sub_start
+ sub_size
;
11039 vm_map_machine_attribute(
11040 entry
->object
.sub_map
,
11045 if(entry
->object
.vm_object
) {
11047 vm_object_t object
;
11048 vm_object_t base_object
;
11049 vm_object_t last_object
;
11050 vm_object_offset_t offset
;
11051 vm_object_offset_t base_offset
;
11052 vm_map_size_t range
;
11054 offset
= (start
- entry
->vme_start
)
11056 base_offset
= offset
;
11057 object
= entry
->object
.vm_object
;
11058 base_object
= object
;
11059 last_object
= NULL
;
11061 vm_object_lock(object
);
11064 m
= vm_page_lookup(
11067 if (m
&& !m
->fictitious
) {
11069 pmap_attribute_cache_sync(
11074 } else if (object
->shadow
) {
11075 offset
= offset
+ object
->vo_shadow_offset
;
11076 last_object
= object
;
11077 object
= object
->shadow
;
11078 vm_object_lock(last_object
->shadow
);
11079 vm_object_unlock(last_object
);
11082 range
-= PAGE_SIZE
;
11084 if (base_object
!= object
) {
11085 vm_object_unlock(object
);
11086 vm_object_lock(base_object
);
11087 object
= base_object
;
11089 /* Bump to the next page */
11090 base_offset
+= PAGE_SIZE
;
11091 offset
= base_offset
;
11093 vm_object_unlock(object
);
11098 vm_map_unlock(map
);
11099 return KERN_FAILURE
;
11104 vm_map_unlock(map
);
11110 * vm_map_behavior_set:
11112 * Sets the paging reference behavior of the specified address
11113 * range in the target map. Paging reference behavior affects
11114 * how pagein operations resulting from faults on the map will be
11118 vm_map_behavior_set(
11120 vm_map_offset_t start
,
11121 vm_map_offset_t end
,
11122 vm_behavior_t new_behavior
)
11124 register vm_map_entry_t entry
;
11125 vm_map_entry_t temp_entry
;
11128 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
11129 map
, start
, end
, new_behavior
, 0);
11132 start
< vm_map_min(map
) ||
11133 end
> vm_map_max(map
)) {
11134 return KERN_NO_SPACE
;
11137 switch (new_behavior
) {
11140 * This first block of behaviors all set a persistent state on the specified
11141 * memory range. All we have to do here is to record the desired behavior
11142 * in the vm_map_entry_t's.
11145 case VM_BEHAVIOR_DEFAULT
:
11146 case VM_BEHAVIOR_RANDOM
:
11147 case VM_BEHAVIOR_SEQUENTIAL
:
11148 case VM_BEHAVIOR_RSEQNTL
:
11149 case VM_BEHAVIOR_ZERO_WIRED_PAGES
:
11153 * The entire address range must be valid for the map.
11154 * Note that vm_map_range_check() does a
11155 * vm_map_lookup_entry() internally and returns the
11156 * entry containing the start of the address range if
11157 * the entire range is valid.
11159 if (vm_map_range_check(map
, start
, end
, &temp_entry
)) {
11160 entry
= temp_entry
;
11161 vm_map_clip_start(map
, entry
, start
);
11164 vm_map_unlock(map
);
11165 return(KERN_INVALID_ADDRESS
);
11168 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
11169 vm_map_clip_end(map
, entry
, end
);
11170 assert(!entry
->use_pmap
);
11172 if( new_behavior
== VM_BEHAVIOR_ZERO_WIRED_PAGES
) {
11173 entry
->zero_wired_pages
= TRUE
;
11175 entry
->behavior
= new_behavior
;
11177 entry
= entry
->vme_next
;
11180 vm_map_unlock(map
);
11184 * The rest of these are different from the above in that they cause
11185 * an immediate action to take place as opposed to setting a behavior that
11186 * affects future actions.
11189 case VM_BEHAVIOR_WILLNEED
:
11190 return vm_map_willneed(map
, start
, end
);
11192 case VM_BEHAVIOR_DONTNEED
:
11193 return vm_map_msync(map
, start
, end
- start
, VM_SYNC_DEACTIVATE
| VM_SYNC_CONTIGUOUS
);
11195 case VM_BEHAVIOR_FREE
:
11196 return vm_map_msync(map
, start
, end
- start
, VM_SYNC_KILLPAGES
| VM_SYNC_CONTIGUOUS
);
11198 case VM_BEHAVIOR_REUSABLE
:
11199 return vm_map_reusable_pages(map
, start
, end
);
11201 case VM_BEHAVIOR_REUSE
:
11202 return vm_map_reuse_pages(map
, start
, end
);
11204 case VM_BEHAVIOR_CAN_REUSE
:
11205 return vm_map_can_reuse(map
, start
, end
);
11208 return(KERN_INVALID_ARGUMENT
);
11211 return(KERN_SUCCESS
);
11216 * Internals for madvise(MADV_WILLNEED) system call.
11218 * The present implementation is to do a read-ahead if the mapping corresponds
11219 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
11220 * and basically ignore the "advice" (which we are always free to do).
11224 static kern_return_t
11227 vm_map_offset_t start
,
11228 vm_map_offset_t end
11231 vm_map_entry_t entry
;
11232 vm_object_t object
;
11233 memory_object_t pager
;
11234 struct vm_object_fault_info fault_info
;
11236 vm_object_size_t len
;
11237 vm_object_offset_t offset
;
11240 * Fill in static values in fault_info. Several fields get ignored by the code
11241 * we call, but we'll fill them in anyway since uninitialized fields are bad
11242 * when it comes to future backwards compatibility.
11245 fault_info
.interruptible
= THREAD_UNINT
; /* ignored value */
11246 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
11247 fault_info
.no_cache
= FALSE
; /* ignored value */
11248 fault_info
.stealth
= TRUE
;
11249 fault_info
.io_sync
= FALSE
;
11250 fault_info
.cs_bypass
= FALSE
;
11251 fault_info
.mark_zf_absent
= FALSE
;
11252 fault_info
.batch_pmap_op
= FALSE
;
11255 * The MADV_WILLNEED operation doesn't require any changes to the
11256 * vm_map_entry_t's, so the read lock is sufficient.
11259 vm_map_lock_read(map
);
11262 * The madvise semantics require that the address range be fully
11263 * allocated with no holes. Otherwise, we're required to return
11267 if (! vm_map_range_check(map
, start
, end
, &entry
)) {
11268 vm_map_unlock_read(map
);
11269 return KERN_INVALID_ADDRESS
;
11273 * Examine each vm_map_entry_t in the range.
11275 for (; entry
!= vm_map_to_entry(map
) && start
< end
; ) {
11278 * The first time through, the start address could be anywhere
11279 * within the vm_map_entry we found. So adjust the offset to
11280 * correspond. After that, the offset will always be zero to
11281 * correspond to the beginning of the current vm_map_entry.
11283 offset
= (start
- entry
->vme_start
) + entry
->offset
;
11286 * Set the length so we don't go beyond the end of the
11287 * map_entry or beyond the end of the range we were given.
11288 * This range could span also multiple map entries all of which
11289 * map different files, so make sure we only do the right amount
11290 * of I/O for each object. Note that it's possible for there
11291 * to be multiple map entries all referring to the same object
11292 * but with different page permissions, but it's not worth
11293 * trying to optimize that case.
11295 len
= MIN(entry
->vme_end
- start
, end
- start
);
11297 if ((vm_size_t
) len
!= len
) {
11298 /* 32-bit overflow */
11299 len
= (vm_size_t
) (0 - PAGE_SIZE
);
11301 fault_info
.cluster_size
= (vm_size_t
) len
;
11302 fault_info
.lo_offset
= offset
;
11303 fault_info
.hi_offset
= offset
+ len
;
11304 fault_info
.user_tag
= entry
->alias
;
11307 * If there's no read permission to this mapping, then just
11310 if ((entry
->protection
& VM_PROT_READ
) == 0) {
11311 entry
= entry
->vme_next
;
11312 start
= entry
->vme_start
;
11317 * Find the file object backing this map entry. If there is
11318 * none, then we simply ignore the "will need" advice for this
11319 * entry and go on to the next one.
11321 if ((object
= find_vnode_object(entry
)) == VM_OBJECT_NULL
) {
11322 entry
= entry
->vme_next
;
11323 start
= entry
->vme_start
;
11328 * The data_request() could take a long time, so let's
11329 * release the map lock to avoid blocking other threads.
11331 vm_map_unlock_read(map
);
11333 vm_object_paging_begin(object
);
11334 pager
= object
->pager
;
11335 vm_object_unlock(object
);
11338 * Get the data from the object asynchronously.
11340 * Note that memory_object_data_request() places limits on the
11341 * amount of I/O it will do. Regardless of the len we
11342 * specified, it won't do more than MAX_UPL_TRANSFER and it
11343 * silently truncates the len to that size. This isn't
11344 * necessarily bad since madvise shouldn't really be used to
11345 * page in unlimited amounts of data. Other Unix variants
11346 * limit the willneed case as well. If this turns out to be an
11347 * issue for developers, then we can always adjust the policy
11348 * here and still be backwards compatible since this is all
11351 kr
= memory_object_data_request(
11353 offset
+ object
->paging_offset
,
11356 (memory_object_fault_info_t
)&fault_info
);
11358 vm_object_lock(object
);
11359 vm_object_paging_end(object
);
11360 vm_object_unlock(object
);
11363 * If we couldn't do the I/O for some reason, just give up on
11364 * the madvise. We still return success to the user since
11365 * madvise isn't supposed to fail when the advice can't be
11368 if (kr
!= KERN_SUCCESS
) {
11369 return KERN_SUCCESS
;
11373 if (start
>= end
) {
11375 return KERN_SUCCESS
;
11378 /* look up next entry */
11379 vm_map_lock_read(map
);
11380 if (! vm_map_lookup_entry(map
, start
, &entry
)) {
11382 * There's a new hole in the address range.
11384 vm_map_unlock_read(map
);
11385 return KERN_INVALID_ADDRESS
;
11389 vm_map_unlock_read(map
);
11390 return KERN_SUCCESS
;
11394 vm_map_entry_is_reusable(
11395 vm_map_entry_t entry
)
11397 vm_object_t object
;
11399 switch (entry
->alias
) {
11400 case VM_MEMORY_MALLOC
:
11401 case VM_MEMORY_MALLOC_SMALL
:
11402 case VM_MEMORY_MALLOC_LARGE
:
11403 case VM_MEMORY_REALLOC
:
11404 case VM_MEMORY_MALLOC_TINY
:
11405 case VM_MEMORY_MALLOC_LARGE_REUSABLE
:
11406 case VM_MEMORY_MALLOC_LARGE_REUSED
:
11408 * This is a malloc() memory region: check if it's still
11409 * in its original state and can be re-used for more
11410 * malloc() allocations.
11415 * Not a malloc() memory region: let the caller decide if
11421 if (entry
->is_shared
||
11422 entry
->is_sub_map
||
11423 entry
->in_transition
||
11424 entry
->protection
!= VM_PROT_DEFAULT
||
11425 entry
->max_protection
!= VM_PROT_ALL
||
11426 entry
->inheritance
!= VM_INHERIT_DEFAULT
||
11428 entry
->permanent
||
11429 entry
->superpage_size
!= FALSE
||
11430 entry
->zero_wired_pages
||
11431 entry
->wired_count
!= 0 ||
11432 entry
->user_wired_count
!= 0) {
11436 object
= entry
->object
.vm_object
;
11437 if (object
== VM_OBJECT_NULL
) {
11443 * Let's proceed even if the VM object is potentially
11445 * We check for this later when processing the actual
11446 * VM pages, so the contents will be safe if shared.
11448 * But we can still mark this memory region as "reusable" to
11449 * acknowledge that the caller did let us know that the memory
11450 * could be re-used and should not be penalized for holding
11451 * on to it. This allows its "resident size" to not include
11452 * the reusable range.
11454 object
->ref_count
== 1 &&
11456 object
->wired_page_count
== 0 &&
11457 object
->copy
== VM_OBJECT_NULL
&&
11458 object
->shadow
== VM_OBJECT_NULL
&&
11459 object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
&&
11460 object
->internal
&&
11461 !object
->true_share
&&
11462 object
->wimg_bits
== VM_WIMG_USE_DEFAULT
&&
11463 !object
->code_signed
) {
11471 static kern_return_t
11472 vm_map_reuse_pages(
11474 vm_map_offset_t start
,
11475 vm_map_offset_t end
)
11477 vm_map_entry_t entry
;
11478 vm_object_t object
;
11479 vm_object_offset_t start_offset
, end_offset
;
11482 * The MADV_REUSE operation doesn't require any changes to the
11483 * vm_map_entry_t's, so the read lock is sufficient.
11486 vm_map_lock_read(map
);
11489 * The madvise semantics require that the address range be fully
11490 * allocated with no holes. Otherwise, we're required to return
11494 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
11495 vm_map_unlock_read(map
);
11496 vm_page_stats_reusable
.reuse_pages_failure
++;
11497 return KERN_INVALID_ADDRESS
;
11501 * Examine each vm_map_entry_t in the range.
11503 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
11504 entry
= entry
->vme_next
) {
11506 * Sanity check on the VM map entry.
11508 if (! vm_map_entry_is_reusable(entry
)) {
11509 vm_map_unlock_read(map
);
11510 vm_page_stats_reusable
.reuse_pages_failure
++;
11511 return KERN_INVALID_ADDRESS
;
11515 * The first time through, the start address could be anywhere
11516 * within the vm_map_entry we found. So adjust the offset to
11519 if (entry
->vme_start
< start
) {
11520 start_offset
= start
- entry
->vme_start
;
11524 end_offset
= MIN(end
, entry
->vme_end
) - entry
->vme_start
;
11525 start_offset
+= entry
->offset
;
11526 end_offset
+= entry
->offset
;
11528 object
= entry
->object
.vm_object
;
11529 if (object
!= VM_OBJECT_NULL
) {
11530 /* tell pmap to not count this range as "reusable" */
11531 pmap_reusable(map
->pmap
,
11532 MAX(start
, entry
->vme_start
),
11533 MIN(end
, entry
->vme_end
),
11535 vm_object_lock(object
);
11536 vm_object_reuse_pages(object
, start_offset
, end_offset
,
11538 vm_object_unlock(object
);
11541 if (entry
->alias
== VM_MEMORY_MALLOC_LARGE_REUSABLE
) {
11544 * We do not hold the VM map exclusively here.
11545 * The "alias" field is not that critical, so it's
11546 * safe to update it here, as long as it is the only
11547 * one that can be modified while holding the VM map
11550 entry
->alias
= VM_MEMORY_MALLOC_LARGE_REUSED
;
11554 vm_map_unlock_read(map
);
11555 vm_page_stats_reusable
.reuse_pages_success
++;
11556 return KERN_SUCCESS
;
11560 static kern_return_t
11561 vm_map_reusable_pages(
11563 vm_map_offset_t start
,
11564 vm_map_offset_t end
)
11566 vm_map_entry_t entry
;
11567 vm_object_t object
;
11568 vm_object_offset_t start_offset
, end_offset
;
11571 * The MADV_REUSABLE operation doesn't require any changes to the
11572 * vm_map_entry_t's, so the read lock is sufficient.
11575 vm_map_lock_read(map
);
11578 * The madvise semantics require that the address range be fully
11579 * allocated with no holes. Otherwise, we're required to return
11583 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
11584 vm_map_unlock_read(map
);
11585 vm_page_stats_reusable
.reusable_pages_failure
++;
11586 return KERN_INVALID_ADDRESS
;
11590 * Examine each vm_map_entry_t in the range.
11592 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
11593 entry
= entry
->vme_next
) {
11594 int kill_pages
= 0;
11597 * Sanity check on the VM map entry.
11599 if (! vm_map_entry_is_reusable(entry
)) {
11600 vm_map_unlock_read(map
);
11601 vm_page_stats_reusable
.reusable_pages_failure
++;
11602 return KERN_INVALID_ADDRESS
;
11606 * The first time through, the start address could be anywhere
11607 * within the vm_map_entry we found. So adjust the offset to
11610 if (entry
->vme_start
< start
) {
11611 start_offset
= start
- entry
->vme_start
;
11615 end_offset
= MIN(end
, entry
->vme_end
) - entry
->vme_start
;
11616 start_offset
+= entry
->offset
;
11617 end_offset
+= entry
->offset
;
11619 object
= entry
->object
.vm_object
;
11620 if (object
== VM_OBJECT_NULL
)
11624 vm_object_lock(object
);
11625 if (object
->ref_count
== 1 && !object
->shadow
)
11629 if (kill_pages
!= -1) {
11630 /* tell pmap to count this range as "reusable" */
11631 pmap_reusable(map
->pmap
,
11632 MAX(start
, entry
->vme_start
),
11633 MIN(end
, entry
->vme_end
),
11635 vm_object_deactivate_pages(object
,
11637 end_offset
- start_offset
,
11639 TRUE
/*reusable_pages*/);
11641 vm_page_stats_reusable
.reusable_pages_shared
++;
11643 vm_object_unlock(object
);
11645 if (entry
->alias
== VM_MEMORY_MALLOC_LARGE
||
11646 entry
->alias
== VM_MEMORY_MALLOC_LARGE_REUSED
) {
11649 * We do not hold the VM map exclusively here.
11650 * The "alias" field is not that critical, so it's
11651 * safe to update it here, as long as it is the only
11652 * one that can be modified while holding the VM map
11655 entry
->alias
= VM_MEMORY_MALLOC_LARGE_REUSABLE
;
11659 vm_map_unlock_read(map
);
11660 vm_page_stats_reusable
.reusable_pages_success
++;
11661 return KERN_SUCCESS
;
11665 static kern_return_t
11668 vm_map_offset_t start
,
11669 vm_map_offset_t end
)
11671 vm_map_entry_t entry
;
11674 * The MADV_REUSABLE operation doesn't require any changes to the
11675 * vm_map_entry_t's, so the read lock is sufficient.
11678 vm_map_lock_read(map
);
11681 * The madvise semantics require that the address range be fully
11682 * allocated with no holes. Otherwise, we're required to return
11686 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
11687 vm_map_unlock_read(map
);
11688 vm_page_stats_reusable
.can_reuse_failure
++;
11689 return KERN_INVALID_ADDRESS
;
11693 * Examine each vm_map_entry_t in the range.
11695 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
11696 entry
= entry
->vme_next
) {
11698 * Sanity check on the VM map entry.
11700 if (! vm_map_entry_is_reusable(entry
)) {
11701 vm_map_unlock_read(map
);
11702 vm_page_stats_reusable
.can_reuse_failure
++;
11703 return KERN_INVALID_ADDRESS
;
11707 vm_map_unlock_read(map
);
11708 vm_page_stats_reusable
.can_reuse_success
++;
11709 return KERN_SUCCESS
;
11714 * Routine: vm_map_entry_insert
11716 * Descritpion: This routine inserts a new vm_entry in a locked map.
11719 vm_map_entry_insert(
11721 vm_map_entry_t insp_entry
,
11722 vm_map_offset_t start
,
11723 vm_map_offset_t end
,
11724 vm_object_t object
,
11725 vm_object_offset_t offset
,
11726 boolean_t needs_copy
,
11727 boolean_t is_shared
,
11728 boolean_t in_transition
,
11729 vm_prot_t cur_protection
,
11730 vm_prot_t max_protection
,
11731 vm_behavior_t behavior
,
11732 vm_inherit_t inheritance
,
11733 unsigned wired_count
,
11734 boolean_t no_cache
,
11735 boolean_t permanent
,
11736 unsigned int superpage_size
,
11737 boolean_t clear_map_aligned
)
11739 vm_map_entry_t new_entry
;
11741 assert(insp_entry
!= (vm_map_entry_t
)0);
11743 new_entry
= vm_map_entry_create(map
, !map
->hdr
.entries_pageable
);
11745 if (VM_MAP_PAGE_SHIFT(map
) != PAGE_SHIFT
) {
11746 new_entry
->map_aligned
= TRUE
;
11748 new_entry
->map_aligned
= FALSE
;
11750 if (clear_map_aligned
&&
11751 ! VM_MAP_PAGE_ALIGNED(end
, VM_MAP_PAGE_MASK(map
))) {
11752 new_entry
->map_aligned
= FALSE
;
11755 new_entry
->vme_start
= start
;
11756 new_entry
->vme_end
= end
;
11757 assert(page_aligned(new_entry
->vme_start
));
11758 assert(page_aligned(new_entry
->vme_end
));
11759 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_start
,
11760 VM_MAP_PAGE_MASK(map
)));
11761 if (new_entry
->map_aligned
) {
11762 assert(VM_MAP_PAGE_ALIGNED(new_entry
->vme_end
,
11763 VM_MAP_PAGE_MASK(map
)));
11765 assert(new_entry
->vme_start
< new_entry
->vme_end
);
11767 new_entry
->object
.vm_object
= object
;
11768 new_entry
->offset
= offset
;
11769 new_entry
->is_shared
= is_shared
;
11770 new_entry
->is_sub_map
= FALSE
;
11771 new_entry
->needs_copy
= needs_copy
;
11772 new_entry
->in_transition
= in_transition
;
11773 new_entry
->needs_wakeup
= FALSE
;
11774 new_entry
->inheritance
= inheritance
;
11775 new_entry
->protection
= cur_protection
;
11776 new_entry
->max_protection
= max_protection
;
11777 new_entry
->behavior
= behavior
;
11778 new_entry
->wired_count
= wired_count
;
11779 new_entry
->user_wired_count
= 0;
11780 new_entry
->use_pmap
= FALSE
;
11781 new_entry
->alias
= 0;
11782 new_entry
->zero_wired_pages
= FALSE
;
11783 new_entry
->no_cache
= no_cache
;
11784 new_entry
->permanent
= permanent
;
11785 if (superpage_size
)
11786 new_entry
->superpage_size
= TRUE
;
11788 new_entry
->superpage_size
= FALSE
;
11789 new_entry
->used_for_jit
= FALSE
;
11792 * Insert the new entry into the list.
11795 vm_map_store_entry_link(map
, insp_entry
, new_entry
);
11796 map
->size
+= end
- start
;
11799 * Update the free space hint and the lookup hint.
11802 SAVE_HINT_MAP_WRITE(map
, new_entry
);
11807 * Routine: vm_map_remap_extract
11809 * Descritpion: This routine returns a vm_entry list from a map.
11811 static kern_return_t
11812 vm_map_remap_extract(
11814 vm_map_offset_t addr
,
11815 vm_map_size_t size
,
11817 struct vm_map_header
*map_header
,
11818 vm_prot_t
*cur_protection
,
11819 vm_prot_t
*max_protection
,
11820 /* What, no behavior? */
11821 vm_inherit_t inheritance
,
11822 boolean_t pageable
)
11824 kern_return_t result
;
11825 vm_map_size_t mapped_size
;
11826 vm_map_size_t tmp_size
;
11827 vm_map_entry_t src_entry
; /* result of last map lookup */
11828 vm_map_entry_t new_entry
;
11829 vm_object_offset_t offset
;
11830 vm_map_offset_t map_address
;
11831 vm_map_offset_t src_start
; /* start of entry to map */
11832 vm_map_offset_t src_end
; /* end of region to be mapped */
11833 vm_object_t object
;
11834 vm_map_version_t version
;
11835 boolean_t src_needs_copy
;
11836 boolean_t new_entry_needs_copy
;
11838 assert(map
!= VM_MAP_NULL
);
11840 assert(size
== vm_map_round_page(size
, PAGE_MASK
));
11841 assert(inheritance
== VM_INHERIT_NONE
||
11842 inheritance
== VM_INHERIT_COPY
||
11843 inheritance
== VM_INHERIT_SHARE
);
11846 * Compute start and end of region.
11848 src_start
= vm_map_trunc_page(addr
, PAGE_MASK
);
11849 src_end
= vm_map_round_page(src_start
+ size
, PAGE_MASK
);
11853 * Initialize map_header.
11855 map_header
->links
.next
= (struct vm_map_entry
*)&map_header
->links
;
11856 map_header
->links
.prev
= (struct vm_map_entry
*)&map_header
->links
;
11857 map_header
->nentries
= 0;
11858 map_header
->entries_pageable
= pageable
;
11859 map_header
->page_shift
= PAGE_SHIFT
;
11861 vm_map_store_init( map_header
);
11863 *cur_protection
= VM_PROT_ALL
;
11864 *max_protection
= VM_PROT_ALL
;
11868 result
= KERN_SUCCESS
;
11871 * The specified source virtual space might correspond to
11872 * multiple map entries, need to loop on them.
11875 while (mapped_size
!= size
) {
11876 vm_map_size_t entry_size
;
11879 * Find the beginning of the region.
11881 if (! vm_map_lookup_entry(map
, src_start
, &src_entry
)) {
11882 result
= KERN_INVALID_ADDRESS
;
11886 if (src_start
< src_entry
->vme_start
||
11887 (mapped_size
&& src_start
!= src_entry
->vme_start
)) {
11888 result
= KERN_INVALID_ADDRESS
;
11892 tmp_size
= size
- mapped_size
;
11893 if (src_end
> src_entry
->vme_end
)
11894 tmp_size
-= (src_end
- src_entry
->vme_end
);
11896 entry_size
= (vm_map_size_t
)(src_entry
->vme_end
-
11897 src_entry
->vme_start
);
11899 if(src_entry
->is_sub_map
) {
11900 vm_map_reference(src_entry
->object
.sub_map
);
11901 object
= VM_OBJECT_NULL
;
11903 object
= src_entry
->object
.vm_object
;
11905 if (object
== VM_OBJECT_NULL
) {
11906 object
= vm_object_allocate(entry_size
);
11907 src_entry
->offset
= 0;
11908 src_entry
->object
.vm_object
= object
;
11909 } else if (object
->copy_strategy
!=
11910 MEMORY_OBJECT_COPY_SYMMETRIC
) {
11912 * We are already using an asymmetric
11913 * copy, and therefore we already have
11914 * the right object.
11916 assert(!src_entry
->needs_copy
);
11917 } else if (src_entry
->needs_copy
|| object
->shadowed
||
11918 (object
->internal
&& !object
->true_share
&&
11919 !src_entry
->is_shared
&&
11920 object
->vo_size
> entry_size
)) {
11922 vm_object_shadow(&src_entry
->object
.vm_object
,
11923 &src_entry
->offset
,
11926 if (!src_entry
->needs_copy
&&
11927 (src_entry
->protection
& VM_PROT_WRITE
)) {
11930 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
11932 if (override_nx(map
, src_entry
->alias
) && prot
)
11933 prot
|= VM_PROT_EXECUTE
;
11935 if(map
->mapped_in_other_pmaps
) {
11936 vm_object_pmap_protect(
11937 src_entry
->object
.vm_object
,
11941 src_entry
->vme_start
,
11944 pmap_protect(vm_map_pmap(map
),
11945 src_entry
->vme_start
,
11946 src_entry
->vme_end
,
11951 object
= src_entry
->object
.vm_object
;
11952 src_entry
->needs_copy
= FALSE
;
11956 vm_object_lock(object
);
11957 vm_object_reference_locked(object
); /* object ref. for new entry */
11958 if (object
->copy_strategy
==
11959 MEMORY_OBJECT_COPY_SYMMETRIC
) {
11960 object
->copy_strategy
=
11961 MEMORY_OBJECT_COPY_DELAY
;
11963 vm_object_unlock(object
);
11966 offset
= src_entry
->offset
+ (src_start
- src_entry
->vme_start
);
11968 new_entry
= _vm_map_entry_create(map_header
, !map_header
->entries_pageable
);
11969 vm_map_entry_copy(new_entry
, src_entry
);
11970 new_entry
->use_pmap
= FALSE
; /* clr address space specifics */
11972 new_entry
->map_aligned
= FALSE
;
11974 new_entry
->vme_start
= map_address
;
11975 new_entry
->vme_end
= map_address
+ tmp_size
;
11976 assert(new_entry
->vme_start
< new_entry
->vme_end
);
11977 new_entry
->inheritance
= inheritance
;
11978 new_entry
->offset
= offset
;
11981 * The new region has to be copied now if required.
11986 * Cannot allow an entry describing a JIT
11987 * region to be shared across address spaces.
11989 if (src_entry
->used_for_jit
== TRUE
) {
11990 result
= KERN_INVALID_ARGUMENT
;
11993 src_entry
->is_shared
= TRUE
;
11994 new_entry
->is_shared
= TRUE
;
11995 if (!(new_entry
->is_sub_map
))
11996 new_entry
->needs_copy
= FALSE
;
11998 } else if (src_entry
->is_sub_map
) {
11999 /* make this a COW sub_map if not already */
12000 new_entry
->needs_copy
= TRUE
;
12001 object
= VM_OBJECT_NULL
;
12002 } else if (src_entry
->wired_count
== 0 &&
12003 vm_object_copy_quickly(&new_entry
->object
.vm_object
,
12005 (new_entry
->vme_end
-
12006 new_entry
->vme_start
),
12008 &new_entry_needs_copy
)) {
12010 new_entry
->needs_copy
= new_entry_needs_copy
;
12011 new_entry
->is_shared
= FALSE
;
12014 * Handle copy_on_write semantics.
12016 if (src_needs_copy
&& !src_entry
->needs_copy
) {
12019 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
12021 if (override_nx(map
, src_entry
->alias
) && prot
)
12022 prot
|= VM_PROT_EXECUTE
;
12024 vm_object_pmap_protect(object
,
12027 ((src_entry
->is_shared
12028 || map
->mapped_in_other_pmaps
) ?
12029 PMAP_NULL
: map
->pmap
),
12030 src_entry
->vme_start
,
12033 src_entry
->needs_copy
= TRUE
;
12036 * Throw away the old object reference of the new entry.
12038 vm_object_deallocate(object
);
12041 new_entry
->is_shared
= FALSE
;
12044 * The map can be safely unlocked since we
12045 * already hold a reference on the object.
12047 * Record the timestamp of the map for later
12048 * verification, and unlock the map.
12050 version
.main_timestamp
= map
->timestamp
;
12051 vm_map_unlock(map
); /* Increments timestamp once! */
12054 * Perform the copy.
12056 if (src_entry
->wired_count
> 0) {
12057 vm_object_lock(object
);
12058 result
= vm_object_copy_slowly(
12063 &new_entry
->object
.vm_object
);
12065 new_entry
->offset
= 0;
12066 new_entry
->needs_copy
= FALSE
;
12068 result
= vm_object_copy_strategically(
12072 &new_entry
->object
.vm_object
,
12073 &new_entry
->offset
,
12074 &new_entry_needs_copy
);
12076 new_entry
->needs_copy
= new_entry_needs_copy
;
12080 * Throw away the old object reference of the new entry.
12082 vm_object_deallocate(object
);
12084 if (result
!= KERN_SUCCESS
&&
12085 result
!= KERN_MEMORY_RESTART_COPY
) {
12086 _vm_map_entry_dispose(map_header
, new_entry
);
12091 * Verify that the map has not substantially
12092 * changed while the copy was being made.
12096 if (version
.main_timestamp
+ 1 != map
->timestamp
) {
12098 * Simple version comparison failed.
12100 * Retry the lookup and verify that the
12101 * same object/offset are still present.
12103 vm_object_deallocate(new_entry
->
12105 _vm_map_entry_dispose(map_header
, new_entry
);
12106 if (result
== KERN_MEMORY_RESTART_COPY
)
12107 result
= KERN_SUCCESS
;
12111 if (result
== KERN_MEMORY_RESTART_COPY
) {
12112 vm_object_reference(object
);
12117 _vm_map_store_entry_link(map_header
,
12118 map_header
->links
.prev
, new_entry
);
12120 /*Protections for submap mapping are irrelevant here*/
12121 if( !src_entry
->is_sub_map
) {
12122 *cur_protection
&= src_entry
->protection
;
12123 *max_protection
&= src_entry
->max_protection
;
12125 map_address
+= tmp_size
;
12126 mapped_size
+= tmp_size
;
12127 src_start
+= tmp_size
;
12131 vm_map_unlock(map
);
12132 if (result
!= KERN_SUCCESS
) {
12134 * Free all allocated elements.
12136 for (src_entry
= map_header
->links
.next
;
12137 src_entry
!= (struct vm_map_entry
*)&map_header
->links
;
12138 src_entry
= new_entry
) {
12139 new_entry
= src_entry
->vme_next
;
12140 _vm_map_store_entry_unlink(map_header
, src_entry
);
12141 if (src_entry
->is_sub_map
) {
12142 vm_map_deallocate(src_entry
->object
.sub_map
);
12144 vm_object_deallocate(src_entry
->object
.vm_object
);
12146 _vm_map_entry_dispose(map_header
, src_entry
);
12153 * Routine: vm_remap
12155 * Map portion of a task's address space.
12156 * Mapped region must not overlap more than
12157 * one vm memory object. Protections and
12158 * inheritance attributes remain the same
12159 * as in the original task and are out parameters.
12160 * Source and Target task can be identical
12161 * Other attributes are identical as for vm_map()
12165 vm_map_t target_map
,
12166 vm_map_address_t
*address
,
12167 vm_map_size_t size
,
12168 vm_map_offset_t mask
,
12171 vm_map_offset_t memory_address
,
12173 vm_prot_t
*cur_protection
,
12174 vm_prot_t
*max_protection
,
12175 vm_inherit_t inheritance
)
12177 kern_return_t result
;
12178 vm_map_entry_t entry
;
12179 vm_map_entry_t insp_entry
= VM_MAP_ENTRY_NULL
;
12180 vm_map_entry_t new_entry
;
12181 struct vm_map_header map_header
;
12182 vm_map_offset_t offset_in_mapping
;
12184 if (target_map
== VM_MAP_NULL
)
12185 return KERN_INVALID_ARGUMENT
;
12187 switch (inheritance
) {
12188 case VM_INHERIT_NONE
:
12189 case VM_INHERIT_COPY
:
12190 case VM_INHERIT_SHARE
:
12191 if (size
!= 0 && src_map
!= VM_MAP_NULL
)
12195 return KERN_INVALID_ARGUMENT
;
12199 * If the user is requesting that we return the address of the
12200 * first byte of the data (rather than the base of the page),
12201 * then we use different rounding semantics: specifically,
12202 * we assume that (memory_address, size) describes a region
12203 * all of whose pages we must cover, rather than a base to be truncated
12204 * down and a size to be added to that base. So we figure out
12205 * the highest page that the requested region includes and make
12206 * sure that the size will cover it.
12208 * The key example we're worried about it is of the form:
12210 * memory_address = 0x1ff0, size = 0x20
12212 * With the old semantics, we round down the memory_address to 0x1000
12213 * and round up the size to 0x1000, resulting in our covering *only*
12214 * page 0x1000. With the new semantics, we'd realize that the region covers
12215 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
12216 * 0x1000 and page 0x2000 in the region we remap.
12218 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
12219 offset_in_mapping
= memory_address
- vm_map_trunc_page(memory_address
, PAGE_MASK
);
12220 size
= vm_map_round_page(memory_address
+ size
- vm_map_trunc_page(memory_address
, PAGE_MASK
), PAGE_MASK
);
12222 size
= vm_map_round_page(size
, PAGE_MASK
);
12225 result
= vm_map_remap_extract(src_map
, memory_address
,
12226 size
, copy
, &map_header
,
12230 target_map
->hdr
.entries_pageable
);
12232 if (result
!= KERN_SUCCESS
) {
12237 * Allocate/check a range of free virtual address
12238 * space for the target
12240 *address
= vm_map_trunc_page(*address
,
12241 VM_MAP_PAGE_MASK(target_map
));
12242 vm_map_lock(target_map
);
12243 result
= vm_map_remap_range_allocate(target_map
, address
, size
,
12244 mask
, flags
, &insp_entry
);
12246 for (entry
= map_header
.links
.next
;
12247 entry
!= (struct vm_map_entry
*)&map_header
.links
;
12248 entry
= new_entry
) {
12249 new_entry
= entry
->vme_next
;
12250 _vm_map_store_entry_unlink(&map_header
, entry
);
12251 if (result
== KERN_SUCCESS
) {
12252 entry
->vme_start
+= *address
;
12253 entry
->vme_end
+= *address
;
12254 assert(!entry
->map_aligned
);
12255 vm_map_store_entry_link(target_map
, insp_entry
, entry
);
12256 insp_entry
= entry
;
12258 if (!entry
->is_sub_map
) {
12259 vm_object_deallocate(entry
->object
.vm_object
);
12261 vm_map_deallocate(entry
->object
.sub_map
);
12263 _vm_map_entry_dispose(&map_header
, entry
);
12267 if( target_map
->disable_vmentry_reuse
== TRUE
) {
12268 if( target_map
->highest_entry_end
< insp_entry
->vme_end
){
12269 target_map
->highest_entry_end
= insp_entry
->vme_end
;
12273 if (result
== KERN_SUCCESS
) {
12274 target_map
->size
+= size
;
12275 SAVE_HINT_MAP_WRITE(target_map
, insp_entry
);
12277 vm_map_unlock(target_map
);
12279 if (result
== KERN_SUCCESS
&& target_map
->wiring_required
)
12280 result
= vm_map_wire(target_map
, *address
,
12281 *address
+ size
, *cur_protection
, TRUE
);
12284 * If requested, return the address of the data pointed to by the
12285 * request, rather than the base of the resulting page.
12287 if ((flags
& VM_FLAGS_RETURN_DATA_ADDR
) != 0) {
12288 *address
+= offset_in_mapping
;
12295 * Routine: vm_map_remap_range_allocate
12298 * Allocate a range in the specified virtual address map.
12299 * returns the address and the map entry just before the allocated
12302 * Map must be locked.
12305 static kern_return_t
12306 vm_map_remap_range_allocate(
12308 vm_map_address_t
*address
, /* IN/OUT */
12309 vm_map_size_t size
,
12310 vm_map_offset_t mask
,
12312 vm_map_entry_t
*map_entry
) /* OUT */
12314 vm_map_entry_t entry
;
12315 vm_map_offset_t start
;
12316 vm_map_offset_t end
;
12323 if (flags
& VM_FLAGS_ANYWHERE
)
12326 * Calculate the first possible address.
12329 if (start
< map
->min_offset
)
12330 start
= map
->min_offset
;
12331 if (start
> map
->max_offset
)
12332 return(KERN_NO_SPACE
);
12335 * Look for the first possible address;
12336 * if there's already something at this
12337 * address, we have to start after it.
12340 if( map
->disable_vmentry_reuse
== TRUE
) {
12341 VM_MAP_HIGHEST_ENTRY(map
, entry
, start
);
12343 assert(first_free_is_valid(map
));
12344 if (start
== map
->min_offset
) {
12345 if ((entry
= map
->first_free
) != vm_map_to_entry(map
))
12346 start
= entry
->vme_end
;
12348 vm_map_entry_t tmp_entry
;
12349 if (vm_map_lookup_entry(map
, start
, &tmp_entry
))
12350 start
= tmp_entry
->vme_end
;
12353 start
= vm_map_round_page(start
,
12354 VM_MAP_PAGE_MASK(map
));
12358 * In any case, the "entry" always precedes
12359 * the proposed new region throughout the
12364 register vm_map_entry_t next
;
12367 * Find the end of the proposed new region.
12368 * Be sure we didn't go beyond the end, or
12369 * wrap around the address.
12372 end
= ((start
+ mask
) & ~mask
);
12373 end
= vm_map_round_page(end
,
12374 VM_MAP_PAGE_MASK(map
));
12376 return(KERN_NO_SPACE
);
12380 if ((end
> map
->max_offset
) || (end
< start
)) {
12381 if (map
->wait_for_space
) {
12382 if (size
<= (map
->max_offset
-
12383 map
->min_offset
)) {
12384 assert_wait((event_t
) map
, THREAD_INTERRUPTIBLE
);
12385 vm_map_unlock(map
);
12386 thread_block(THREAD_CONTINUE_NULL
);
12392 return(KERN_NO_SPACE
);
12396 * If there are no more entries, we must win.
12399 next
= entry
->vme_next
;
12400 if (next
== vm_map_to_entry(map
))
12404 * If there is another entry, it must be
12405 * after the end of the potential new region.
12408 if (next
->vme_start
>= end
)
12412 * Didn't fit -- move to the next entry.
12416 start
= entry
->vme_end
;
12420 vm_map_entry_t temp_entry
;
12424 * the address doesn't itself violate
12425 * the mask requirement.
12428 if ((start
& mask
) != 0)
12429 return(KERN_NO_SPACE
);
12433 * ... the address is within bounds
12436 end
= start
+ size
;
12438 if ((start
< map
->min_offset
) ||
12439 (end
> map
->max_offset
) ||
12441 return(KERN_INVALID_ADDRESS
);
12445 * If we're asked to overwrite whatever was mapped in that
12446 * range, first deallocate that range.
12448 if (flags
& VM_FLAGS_OVERWRITE
) {
12452 * We use a "zap_map" to avoid having to unlock
12453 * the "map" in vm_map_delete(), which would compromise
12454 * the atomicity of the "deallocate" and then "remap"
12457 zap_map
= vm_map_create(PMAP_NULL
,
12460 map
->hdr
.entries_pageable
);
12461 if (zap_map
== VM_MAP_NULL
) {
12462 return KERN_RESOURCE_SHORTAGE
;
12464 vm_map_set_page_shift(zap_map
, VM_MAP_PAGE_SHIFT(map
));
12466 kr
= vm_map_delete(map
, start
, end
,
12467 VM_MAP_REMOVE_SAVE_ENTRIES
,
12469 if (kr
== KERN_SUCCESS
) {
12470 vm_map_destroy(zap_map
,
12471 VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
12472 zap_map
= VM_MAP_NULL
;
12477 * ... the starting address isn't allocated
12480 if (vm_map_lookup_entry(map
, start
, &temp_entry
))
12481 return(KERN_NO_SPACE
);
12483 entry
= temp_entry
;
12486 * ... the next region doesn't overlap the
12490 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
12491 (entry
->vme_next
->vme_start
< end
))
12492 return(KERN_NO_SPACE
);
12494 *map_entry
= entry
;
12495 return(KERN_SUCCESS
);
12501 * Set the address map for the current thread to the specified map
12509 thread_t thread
= current_thread();
12510 vm_map_t oldmap
= thread
->map
;
12512 mp_disable_preemption();
12513 mycpu
= cpu_number();
12516 * Deactivate the current map and activate the requested map
12518 PMAP_SWITCH_USER(thread
, map
, mycpu
);
12520 mp_enable_preemption();
12526 * Routine: vm_map_write_user
12529 * Copy out data from a kernel space into space in the
12530 * destination map. The space must already exist in the
12532 * NOTE: This routine should only be called by threads
12533 * which can block on a page fault. i.e. kernel mode user
12541 vm_map_address_t dst_addr
,
12544 kern_return_t kr
= KERN_SUCCESS
;
12546 if(current_map() == map
) {
12547 if (copyout(src_p
, dst_addr
, size
)) {
12548 kr
= KERN_INVALID_ADDRESS
;
12553 /* take on the identity of the target map while doing */
12556 vm_map_reference(map
);
12557 oldmap
= vm_map_switch(map
);
12558 if (copyout(src_p
, dst_addr
, size
)) {
12559 kr
= KERN_INVALID_ADDRESS
;
12561 vm_map_switch(oldmap
);
12562 vm_map_deallocate(map
);
12568 * Routine: vm_map_read_user
12571 * Copy in data from a user space source map into the
12572 * kernel map. The space must already exist in the
12574 * NOTE: This routine should only be called by threads
12575 * which can block on a page fault. i.e. kernel mode user
12582 vm_map_address_t src_addr
,
12586 kern_return_t kr
= KERN_SUCCESS
;
12588 if(current_map() == map
) {
12589 if (copyin(src_addr
, dst_p
, size
)) {
12590 kr
= KERN_INVALID_ADDRESS
;
12595 /* take on the identity of the target map while doing */
12598 vm_map_reference(map
);
12599 oldmap
= vm_map_switch(map
);
12600 if (copyin(src_addr
, dst_p
, size
)) {
12601 kr
= KERN_INVALID_ADDRESS
;
12603 vm_map_switch(oldmap
);
12604 vm_map_deallocate(map
);
12611 * vm_map_check_protection:
12613 * Assert that the target map allows the specified
12614 * privilege on the entire address region given.
12615 * The entire region must be allocated.
12618 vm_map_check_protection(vm_map_t map
, vm_map_offset_t start
,
12619 vm_map_offset_t end
, vm_prot_t protection
)
12621 vm_map_entry_t entry
;
12622 vm_map_entry_t tmp_entry
;
12626 if (start
< vm_map_min(map
) || end
> vm_map_max(map
) || start
> end
)
12628 vm_map_unlock(map
);
12632 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
12633 vm_map_unlock(map
);
12639 while (start
< end
) {
12640 if (entry
== vm_map_to_entry(map
)) {
12641 vm_map_unlock(map
);
12646 * No holes allowed!
12649 if (start
< entry
->vme_start
) {
12650 vm_map_unlock(map
);
12655 * Check protection associated with entry.
12658 if ((entry
->protection
& protection
) != protection
) {
12659 vm_map_unlock(map
);
12663 /* go to next entry */
12665 start
= entry
->vme_end
;
12666 entry
= entry
->vme_next
;
12668 vm_map_unlock(map
);
12673 vm_map_purgable_control(
12675 vm_map_offset_t address
,
12676 vm_purgable_t control
,
12679 vm_map_entry_t entry
;
12680 vm_object_t object
;
12684 * Vet all the input parameters and current type and state of the
12685 * underlaying object. Return with an error if anything is amiss.
12687 if (map
== VM_MAP_NULL
)
12688 return(KERN_INVALID_ARGUMENT
);
12690 if (control
!= VM_PURGABLE_SET_STATE
&&
12691 control
!= VM_PURGABLE_GET_STATE
&&
12692 control
!= VM_PURGABLE_PURGE_ALL
)
12693 return(KERN_INVALID_ARGUMENT
);
12695 if (control
== VM_PURGABLE_PURGE_ALL
) {
12696 vm_purgeable_object_purge_all();
12697 return KERN_SUCCESS
;
12700 if (control
== VM_PURGABLE_SET_STATE
&&
12701 (((*state
& ~(VM_PURGABLE_ALL_MASKS
)) != 0) ||
12702 ((*state
& VM_PURGABLE_STATE_MASK
) > VM_PURGABLE_STATE_MASK
)))
12703 return(KERN_INVALID_ARGUMENT
);
12705 vm_map_lock_read(map
);
12707 if (!vm_map_lookup_entry(map
, address
, &entry
) || entry
->is_sub_map
) {
12710 * Must pass a valid non-submap address.
12712 vm_map_unlock_read(map
);
12713 return(KERN_INVALID_ADDRESS
);
12716 if ((entry
->protection
& VM_PROT_WRITE
) == 0) {
12718 * Can't apply purgable controls to something you can't write.
12720 vm_map_unlock_read(map
);
12721 return(KERN_PROTECTION_FAILURE
);
12724 object
= entry
->object
.vm_object
;
12725 if (object
== VM_OBJECT_NULL
) {
12727 * Object must already be present or it can't be purgable.
12729 vm_map_unlock_read(map
);
12730 return KERN_INVALID_ARGUMENT
;
12733 vm_object_lock(object
);
12736 if (entry
->offset
!= 0 ||
12737 entry
->vme_end
- entry
->vme_start
!= object
->vo_size
) {
12739 * Can only apply purgable controls to the whole (existing)
12742 vm_map_unlock_read(map
);
12743 vm_object_unlock(object
);
12744 return KERN_INVALID_ARGUMENT
;
12748 vm_map_unlock_read(map
);
12750 kr
= vm_object_purgable_control(object
, control
, state
);
12752 vm_object_unlock(object
);
12758 vm_map_page_query_internal(
12759 vm_map_t target_map
,
12760 vm_map_offset_t offset
,
12765 vm_page_info_basic_data_t info
;
12766 mach_msg_type_number_t count
;
12768 count
= VM_PAGE_INFO_BASIC_COUNT
;
12769 kr
= vm_map_page_info(target_map
,
12771 VM_PAGE_INFO_BASIC
,
12772 (vm_page_info_t
) &info
,
12774 if (kr
== KERN_SUCCESS
) {
12775 *disposition
= info
.disposition
;
12776 *ref_count
= info
.ref_count
;
12788 vm_map_offset_t offset
,
12789 vm_page_info_flavor_t flavor
,
12790 vm_page_info_t info
,
12791 mach_msg_type_number_t
*count
)
12793 vm_map_entry_t map_entry
;
12794 vm_object_t object
;
12797 kern_return_t retval
= KERN_SUCCESS
;
12798 boolean_t top_object
;
12801 vm_page_info_basic_t basic_info
;
12803 vm_map_offset_t offset_in_page
;
12806 case VM_PAGE_INFO_BASIC
:
12807 if (*count
!= VM_PAGE_INFO_BASIC_COUNT
) {
12809 * The "vm_page_info_basic_data" structure was not
12810 * properly padded, so allow the size to be off by
12811 * one to maintain backwards binary compatibility...
12813 if (*count
!= VM_PAGE_INFO_BASIC_COUNT
- 1)
12814 return KERN_INVALID_ARGUMENT
;
12818 return KERN_INVALID_ARGUMENT
;
12826 retval
= KERN_SUCCESS
;
12827 offset_in_page
= offset
& PAGE_MASK
;
12828 offset
= vm_map_trunc_page(offset
, PAGE_MASK
);
12830 vm_map_lock_read(map
);
12833 * First, find the map entry covering "offset", going down
12834 * submaps if necessary.
12837 if (!vm_map_lookup_entry(map
, offset
, &map_entry
)) {
12838 vm_map_unlock_read(map
);
12839 return KERN_INVALID_ADDRESS
;
12841 /* compute offset from this map entry's start */
12842 offset
-= map_entry
->vme_start
;
12843 /* compute offset into this map entry's object (or submap) */
12844 offset
+= map_entry
->offset
;
12846 if (map_entry
->is_sub_map
) {
12849 sub_map
= map_entry
->object
.sub_map
;
12850 vm_map_lock_read(sub_map
);
12851 vm_map_unlock_read(map
);
12855 ref_count
= MAX(ref_count
, map
->ref_count
);
12861 object
= map_entry
->object
.vm_object
;
12862 if (object
== VM_OBJECT_NULL
) {
12863 /* no object -> no page */
12864 vm_map_unlock_read(map
);
12868 vm_object_lock(object
);
12869 vm_map_unlock_read(map
);
12872 * Go down the VM object shadow chain until we find the page
12873 * we're looking for.
12876 ref_count
= MAX(ref_count
, object
->ref_count
);
12878 m
= vm_page_lookup(object
, offset
);
12880 if (m
!= VM_PAGE_NULL
) {
12881 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
12885 if (object
->existence_map
) {
12886 if (vm_external_state_get(object
->existence_map
,
12888 VM_EXTERNAL_STATE_EXISTS
) {
12890 * this page has been paged out
12892 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
12897 if (object
->internal
&&
12899 !object
->terminating
&&
12900 object
->pager_ready
) {
12902 if (COMPRESSED_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE
) {
12903 if (VM_COMPRESSOR_PAGER_STATE_GET(
12906 == VM_EXTERNAL_STATE_EXISTS
) {
12907 /* the pager has that page */
12908 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
12912 memory_object_t pager
;
12914 vm_object_paging_begin(object
);
12915 pager
= object
->pager
;
12916 vm_object_unlock(object
);
12919 * Ask the default pager if
12920 * it has this page.
12922 kr
= memory_object_data_request(
12924 offset
+ object
->paging_offset
,
12925 0, /* just poke the pager */
12929 vm_object_lock(object
);
12930 vm_object_paging_end(object
);
12932 if (kr
== KERN_SUCCESS
) {
12933 /* the default pager has it */
12934 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
12940 if (object
->shadow
!= VM_OBJECT_NULL
) {
12941 vm_object_t shadow
;
12943 offset
+= object
->vo_shadow_offset
;
12944 shadow
= object
->shadow
;
12946 vm_object_lock(shadow
);
12947 vm_object_unlock(object
);
12950 top_object
= FALSE
;
12953 // if (!object->internal)
12955 // retval = KERN_FAILURE;
12956 // goto done_with_object;
12961 /* The ref_count is not strictly accurate, it measures the number */
12962 /* of entities holding a ref on the object, they may not be mapping */
12963 /* the object or may not be mapping the section holding the */
12964 /* target page but its still a ball park number and though an over- */
12965 /* count, it picks up the copy-on-write cases */
12967 /* We could also get a picture of page sharing from pmap_attributes */
12968 /* but this would under count as only faulted-in mappings would */
12971 if (top_object
== TRUE
&& object
->shadow
)
12972 disposition
|= VM_PAGE_QUERY_PAGE_COPIED
;
12974 if (! object
->internal
)
12975 disposition
|= VM_PAGE_QUERY_PAGE_EXTERNAL
;
12977 if (m
== VM_PAGE_NULL
)
12978 goto done_with_object
;
12980 if (m
->fictitious
) {
12981 disposition
|= VM_PAGE_QUERY_PAGE_FICTITIOUS
;
12982 goto done_with_object
;
12984 if (m
->dirty
|| pmap_is_modified(m
->phys_page
))
12985 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
12987 if (m
->reference
|| pmap_is_referenced(m
->phys_page
))
12988 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
12990 if (m
->speculative
)
12991 disposition
|= VM_PAGE_QUERY_PAGE_SPECULATIVE
;
12993 if (m
->cs_validated
)
12994 disposition
|= VM_PAGE_QUERY_PAGE_CS_VALIDATED
;
12996 disposition
|= VM_PAGE_QUERY_PAGE_CS_TAINTED
;
12999 vm_object_unlock(object
);
13003 case VM_PAGE_INFO_BASIC
:
13004 basic_info
= (vm_page_info_basic_t
) info
;
13005 basic_info
->disposition
= disposition
;
13006 basic_info
->ref_count
= ref_count
;
13007 basic_info
->object_id
= (vm_object_id_t
) (uintptr_t)
13008 VM_KERNEL_ADDRPERM(object
);
13009 basic_info
->offset
=
13010 (memory_object_offset_t
) offset
+ offset_in_page
;
13011 basic_info
->depth
= depth
;
13021 * Synchronises the memory range specified with its backing store
13022 * image by either flushing or cleaning the contents to the appropriate
13023 * memory manager engaging in a memory object synchronize dialog with
13024 * the manager. The client doesn't return until the manager issues
13025 * m_o_s_completed message. MIG Magically converts user task parameter
13026 * to the task's address map.
13028 * interpretation of sync_flags
13029 * VM_SYNC_INVALIDATE - discard pages, only return precious
13030 * pages to manager.
13032 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
13033 * - discard pages, write dirty or precious
13034 * pages back to memory manager.
13036 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
13037 * - write dirty or precious pages back to
13038 * the memory manager.
13040 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
13041 * is a hole in the region, and we would
13042 * have returned KERN_SUCCESS, return
13043 * KERN_INVALID_ADDRESS instead.
13046 * The memory object attributes have not yet been implemented, this
13047 * function will have to deal with the invalidate attribute
13050 * KERN_INVALID_TASK Bad task parameter
13051 * KERN_INVALID_ARGUMENT both sync and async were specified.
13052 * KERN_SUCCESS The usual.
13053 * KERN_INVALID_ADDRESS There was a hole in the region.
13059 vm_map_address_t address
,
13060 vm_map_size_t size
,
13061 vm_sync_t sync_flags
)
13064 msync_req_t new_msr
;
13065 queue_chain_t req_q
; /* queue of requests for this msync */
13066 vm_map_entry_t entry
;
13067 vm_map_size_t amount_left
;
13068 vm_object_offset_t offset
;
13069 boolean_t do_sync_req
;
13070 boolean_t had_hole
= FALSE
;
13071 memory_object_t pager
;
13073 if ((sync_flags
& VM_SYNC_ASYNCHRONOUS
) &&
13074 (sync_flags
& VM_SYNC_SYNCHRONOUS
))
13075 return(KERN_INVALID_ARGUMENT
);
13078 * align address and size on page boundaries
13080 size
= (vm_map_round_page(address
+ size
,
13081 VM_MAP_PAGE_MASK(map
)) -
13082 vm_map_trunc_page(address
,
13083 VM_MAP_PAGE_MASK(map
)));
13084 address
= vm_map_trunc_page(address
,
13085 VM_MAP_PAGE_MASK(map
));
13087 if (map
== VM_MAP_NULL
)
13088 return(KERN_INVALID_TASK
);
13091 return(KERN_SUCCESS
);
13093 queue_init(&req_q
);
13094 amount_left
= size
;
13096 while (amount_left
> 0) {
13097 vm_object_size_t flush_size
;
13098 vm_object_t object
;
13101 if (!vm_map_lookup_entry(map
,
13104 VM_MAP_PAGE_MASK(map
)),
13107 vm_map_size_t skip
;
13110 * hole in the address map.
13115 * Check for empty map.
13117 if (entry
== vm_map_to_entry(map
) &&
13118 entry
->vme_next
== entry
) {
13119 vm_map_unlock(map
);
13123 * Check that we don't wrap and that
13124 * we have at least one real map entry.
13126 if ((map
->hdr
.nentries
== 0) ||
13127 (entry
->vme_next
->vme_start
< address
)) {
13128 vm_map_unlock(map
);
13132 * Move up to the next entry if needed
13134 skip
= (entry
->vme_next
->vme_start
- address
);
13135 if (skip
>= amount_left
)
13138 amount_left
-= skip
;
13139 address
= entry
->vme_next
->vme_start
;
13140 vm_map_unlock(map
);
13144 offset
= address
- entry
->vme_start
;
13147 * do we have more to flush than is contained in this
13150 if (amount_left
+ entry
->vme_start
+ offset
> entry
->vme_end
) {
13151 flush_size
= entry
->vme_end
-
13152 (entry
->vme_start
+ offset
);
13154 flush_size
= amount_left
;
13156 amount_left
-= flush_size
;
13157 address
+= flush_size
;
13159 if (entry
->is_sub_map
== TRUE
) {
13160 vm_map_t local_map
;
13161 vm_map_offset_t local_offset
;
13163 local_map
= entry
->object
.sub_map
;
13164 local_offset
= entry
->offset
;
13165 vm_map_unlock(map
);
13170 sync_flags
) == KERN_INVALID_ADDRESS
) {
13175 object
= entry
->object
.vm_object
;
13178 * We can't sync this object if the object has not been
13181 if (object
== VM_OBJECT_NULL
) {
13182 vm_map_unlock(map
);
13185 offset
+= entry
->offset
;
13187 vm_object_lock(object
);
13189 if (sync_flags
& (VM_SYNC_KILLPAGES
| VM_SYNC_DEACTIVATE
)) {
13190 int kill_pages
= 0;
13191 boolean_t reusable_pages
= FALSE
;
13193 if (sync_flags
& VM_SYNC_KILLPAGES
) {
13194 if (object
->ref_count
== 1 && !object
->shadow
)
13199 if (kill_pages
!= -1)
13200 vm_object_deactivate_pages(object
, offset
,
13201 (vm_object_size_t
)flush_size
, kill_pages
, reusable_pages
);
13202 vm_object_unlock(object
);
13203 vm_map_unlock(map
);
13207 * We can't sync this object if there isn't a pager.
13208 * Don't bother to sync internal objects, since there can't
13209 * be any "permanent" storage for these objects anyway.
13211 if ((object
->pager
== MEMORY_OBJECT_NULL
) ||
13212 (object
->internal
) || (object
->private)) {
13213 vm_object_unlock(object
);
13214 vm_map_unlock(map
);
13218 * keep reference on the object until syncing is done
13220 vm_object_reference_locked(object
);
13221 vm_object_unlock(object
);
13223 vm_map_unlock(map
);
13225 do_sync_req
= vm_object_sync(object
,
13228 sync_flags
& VM_SYNC_INVALIDATE
,
13229 ((sync_flags
& VM_SYNC_SYNCHRONOUS
) ||
13230 (sync_flags
& VM_SYNC_ASYNCHRONOUS
)),
13231 sync_flags
& VM_SYNC_SYNCHRONOUS
);
13233 * only send a m_o_s if we returned pages or if the entry
13234 * is writable (ie dirty pages may have already been sent back)
13236 if (!do_sync_req
) {
13237 if ((sync_flags
& VM_SYNC_INVALIDATE
) && object
->resident_page_count
== 0) {
13239 * clear out the clustering and read-ahead hints
13241 vm_object_lock(object
);
13243 object
->pages_created
= 0;
13244 object
->pages_used
= 0;
13245 object
->sequential
= 0;
13246 object
->last_alloc
= 0;
13248 vm_object_unlock(object
);
13250 vm_object_deallocate(object
);
13253 msync_req_alloc(new_msr
);
13255 vm_object_lock(object
);
13256 offset
+= object
->paging_offset
;
13258 new_msr
->offset
= offset
;
13259 new_msr
->length
= flush_size
;
13260 new_msr
->object
= object
;
13261 new_msr
->flag
= VM_MSYNC_SYNCHRONIZING
;
13265 * We can't sync this object if there isn't a pager. The
13266 * pager can disappear anytime we're not holding the object
13267 * lock. So this has to be checked anytime we goto re_iterate.
13270 pager
= object
->pager
;
13272 if (pager
== MEMORY_OBJECT_NULL
) {
13273 vm_object_unlock(object
);
13274 vm_object_deallocate(object
);
13275 msync_req_free(new_msr
);
13280 queue_iterate(&object
->msr_q
, msr
, msync_req_t
, msr_q
) {
13282 * need to check for overlapping entry, if found, wait
13283 * on overlapping msr to be done, then reiterate
13286 if (msr
->flag
== VM_MSYNC_SYNCHRONIZING
&&
13287 ((offset
>= msr
->offset
&&
13288 offset
< (msr
->offset
+ msr
->length
)) ||
13289 (msr
->offset
>= offset
&&
13290 msr
->offset
< (offset
+ flush_size
))))
13292 assert_wait((event_t
) msr
,THREAD_INTERRUPTIBLE
);
13294 vm_object_unlock(object
);
13295 thread_block(THREAD_CONTINUE_NULL
);
13296 vm_object_lock(object
);
13300 }/* queue_iterate */
13302 queue_enter(&object
->msr_q
, new_msr
, msync_req_t
, msr_q
);
13304 vm_object_paging_begin(object
);
13305 vm_object_unlock(object
);
13307 queue_enter(&req_q
, new_msr
, msync_req_t
, req_q
);
13309 (void) memory_object_synchronize(
13313 sync_flags
& ~VM_SYNC_CONTIGUOUS
);
13315 vm_object_lock(object
);
13316 vm_object_paging_end(object
);
13317 vm_object_unlock(object
);
13321 * wait for memory_object_sychronize_completed messages from pager(s)
13324 while (!queue_empty(&req_q
)) {
13325 msr
= (msync_req_t
)queue_first(&req_q
);
13327 while(msr
->flag
!= VM_MSYNC_DONE
) {
13328 assert_wait((event_t
) msr
, THREAD_INTERRUPTIBLE
);
13330 thread_block(THREAD_CONTINUE_NULL
);
13333 queue_remove(&req_q
, msr
, msync_req_t
, req_q
);
13335 vm_object_deallocate(msr
->object
);
13336 msync_req_free(msr
);
13337 }/* queue_iterate */
13339 /* for proper msync() behaviour */
13340 if (had_hole
== TRUE
&& (sync_flags
& VM_SYNC_CONTIGUOUS
))
13341 return(KERN_INVALID_ADDRESS
);
13343 return(KERN_SUCCESS
);
13347 * Routine: convert_port_entry_to_map
13349 * Convert from a port specifying an entry or a task
13350 * to a map. Doesn't consume the port ref; produces a map ref,
13351 * which may be null. Unlike convert_port_to_map, the
13352 * port may be task or a named entry backed.
13359 convert_port_entry_to_map(
13363 vm_named_entry_t named_entry
;
13364 uint32_t try_failed_count
= 0;
13366 if(IP_VALID(port
) && (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
13369 if(ip_active(port
) && (ip_kotype(port
)
13370 == IKOT_NAMED_ENTRY
)) {
13372 (vm_named_entry_t
)port
->ip_kobject
;
13373 if (!(lck_mtx_try_lock(&(named_entry
)->Lock
))) {
13376 try_failed_count
++;
13377 mutex_pause(try_failed_count
);
13380 named_entry
->ref_count
++;
13381 lck_mtx_unlock(&(named_entry
)->Lock
);
13383 if ((named_entry
->is_sub_map
) &&
13384 (named_entry
->protection
13385 & VM_PROT_WRITE
)) {
13386 map
= named_entry
->backing
.map
;
13388 mach_destroy_memory_entry(port
);
13389 return VM_MAP_NULL
;
13391 vm_map_reference_swap(map
);
13392 mach_destroy_memory_entry(port
);
13396 return VM_MAP_NULL
;
13400 map
= convert_port_to_map(port
);
13406 * Routine: convert_port_entry_to_object
13408 * Convert from a port specifying a named entry to an
13409 * object. Doesn't consume the port ref; produces a map ref,
13410 * which may be null.
13417 convert_port_entry_to_object(
13420 vm_object_t object
= VM_OBJECT_NULL
;
13421 vm_named_entry_t named_entry
;
13422 uint32_t try_failed_count
= 0;
13424 if (IP_VALID(port
) &&
13425 (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
13428 if (ip_active(port
) &&
13429 (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
13430 named_entry
= (vm_named_entry_t
)port
->ip_kobject
;
13431 if (!(lck_mtx_try_lock(&(named_entry
)->Lock
))) {
13433 try_failed_count
++;
13434 mutex_pause(try_failed_count
);
13437 named_entry
->ref_count
++;
13438 lck_mtx_unlock(&(named_entry
)->Lock
);
13440 if (!(named_entry
->is_sub_map
) &&
13441 !(named_entry
->is_pager
) &&
13442 !(named_entry
->is_copy
) &&
13443 (named_entry
->protection
& VM_PROT_WRITE
)) {
13444 object
= named_entry
->backing
.object
;
13445 vm_object_reference(object
);
13447 mach_destroy_memory_entry(port
);
13455 * Export routines to other components for the things we access locally through
13462 return (current_map_fast());
13466 * vm_map_reference:
13468 * Most code internal to the osfmk will go through a
13469 * macro defining this. This is always here for the
13470 * use of other kernel components.
13472 #undef vm_map_reference
13475 register vm_map_t map
)
13477 if (map
== VM_MAP_NULL
)
13480 lck_mtx_lock(&map
->s_lock
);
13482 assert(map
->res_count
> 0);
13483 assert(map
->ref_count
>= map
->res_count
);
13487 lck_mtx_unlock(&map
->s_lock
);
13491 * vm_map_deallocate:
13493 * Removes a reference from the specified map,
13494 * destroying it if no references remain.
13495 * The map should not be locked.
13499 register vm_map_t map
)
13503 if (map
== VM_MAP_NULL
)
13506 lck_mtx_lock(&map
->s_lock
);
13507 ref
= --map
->ref_count
;
13509 vm_map_res_deallocate(map
);
13510 lck_mtx_unlock(&map
->s_lock
);
13513 assert(map
->ref_count
== 0);
13514 lck_mtx_unlock(&map
->s_lock
);
13518 * The map residence count isn't decremented here because
13519 * the vm_map_delete below will traverse the entire map,
13520 * deleting entries, and the residence counts on objects
13521 * and sharing maps will go away then.
13525 vm_map_destroy(map
, VM_MAP_NO_FLAGS
);
13530 vm_map_disable_NX(vm_map_t map
)
13534 if (map
->pmap
== NULL
)
13537 pmap_disable_NX(map
->pmap
);
13541 vm_map_disallow_data_exec(vm_map_t map
)
13546 map
->map_disallow_data_exec
= TRUE
;
13549 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
13550 * more descriptive.
13553 vm_map_set_32bit(vm_map_t map
)
13555 map
->max_offset
= (vm_map_offset_t
)VM_MAX_ADDRESS
;
13560 vm_map_set_64bit(vm_map_t map
)
13562 map
->max_offset
= (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
;
13566 vm_compute_max_offset(unsigned is64
)
13568 return (is64
? (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
: (vm_map_offset_t
)VM_MAX_ADDRESS
);
13572 vm_map_get_max_aslr_slide_pages(vm_map_t map
)
13574 return (1 << (vm_map_is_64bit(map
) ? 16 : 8));
13581 return map
->max_offset
> ((vm_map_offset_t
)VM_MAX_ADDRESS
);
13585 vm_map_has_hard_pagezero(
13587 vm_map_offset_t pagezero_size
)
13591 * We should lock the VM map (for read) here but we can get away
13592 * with it for now because there can't really be any race condition:
13593 * the VM map's min_offset is changed only when the VM map is created
13594 * and when the zero page is established (when the binary gets loaded),
13595 * and this routine gets called only when the task terminates and the
13596 * VM map is being torn down, and when a new map is created via
13597 * load_machfile()/execve().
13599 return (map
->min_offset
>= pagezero_size
);
13603 vm_map_set_4GB_pagezero(vm_map_t map
)
13605 #pragma unused(map)
13610 vm_map_clear_4GB_pagezero(vm_map_t map
)
13612 #pragma unused(map)
13616 * Raise a VM map's maximun offset.
13619 vm_map_raise_max_offset(
13621 vm_map_offset_t new_max_offset
)
13626 ret
= KERN_INVALID_ADDRESS
;
13628 if (new_max_offset
>= map
->max_offset
) {
13629 if (!vm_map_is_64bit(map
)) {
13630 if (new_max_offset
<= (vm_map_offset_t
)VM_MAX_ADDRESS
) {
13631 map
->max_offset
= new_max_offset
;
13632 ret
= KERN_SUCCESS
;
13635 if (new_max_offset
<= (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
) {
13636 map
->max_offset
= new_max_offset
;
13637 ret
= KERN_SUCCESS
;
13642 vm_map_unlock(map
);
13648 * Raise a VM map's minimum offset.
13649 * To strictly enforce "page zero" reservation.
13652 vm_map_raise_min_offset(
13654 vm_map_offset_t new_min_offset
)
13656 vm_map_entry_t first_entry
;
13658 new_min_offset
= vm_map_round_page(new_min_offset
,
13659 VM_MAP_PAGE_MASK(map
));
13663 if (new_min_offset
< map
->min_offset
) {
13665 * Can't move min_offset backwards, as that would expose
13666 * a part of the address space that was previously, and for
13667 * possibly good reasons, inaccessible.
13669 vm_map_unlock(map
);
13670 return KERN_INVALID_ADDRESS
;
13673 first_entry
= vm_map_first_entry(map
);
13674 if (first_entry
!= vm_map_to_entry(map
) &&
13675 first_entry
->vme_start
< new_min_offset
) {
13677 * Some memory was already allocated below the new
13678 * minimun offset. It's too late to change it now...
13680 vm_map_unlock(map
);
13681 return KERN_NO_SPACE
;
13684 map
->min_offset
= new_min_offset
;
13686 vm_map_unlock(map
);
13688 return KERN_SUCCESS
;
13692 * Set the limit on the maximum amount of user wired memory allowed for this map.
13693 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
13694 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
13695 * don't have to reach over to the BSD data structures.
13699 vm_map_set_user_wire_limit(vm_map_t map
,
13702 map
->user_wire_limit
= limit
;
13706 void vm_map_switch_protect(vm_map_t map
,
13710 map
->switch_protect
=val
;
13711 vm_map_unlock(map
);
13715 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
13716 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
13717 * bump both counters.
13720 vm_map_iokit_mapped_region(vm_map_t map
, vm_size_t bytes
)
13722 pmap_t pmap
= vm_map_pmap(map
);
13724 ledger_credit(pmap
->ledger
, task_ledgers
.iokit_mem
, bytes
);
13725 ledger_credit(pmap
->ledger
, task_ledgers
.phys_footprint
, bytes
);
13729 vm_map_iokit_unmapped_region(vm_map_t map
, vm_size_t bytes
)
13731 pmap_t pmap
= vm_map_pmap(map
);
13733 ledger_debit(pmap
->ledger
, task_ledgers
.iokit_mem
, bytes
);
13734 ledger_debit(pmap
->ledger
, task_ledgers
.phys_footprint
, bytes
);
13737 /* Add (generate) code signature for memory range */
13738 #if CONFIG_DYNAMIC_CODE_SIGNING
13739 kern_return_t
vm_map_sign(vm_map_t map
,
13740 vm_map_offset_t start
,
13741 vm_map_offset_t end
)
13743 vm_map_entry_t entry
;
13745 vm_object_t object
;
13748 * Vet all the input parameters and current type and state of the
13749 * underlaying object. Return with an error if anything is amiss.
13751 if (map
== VM_MAP_NULL
)
13752 return(KERN_INVALID_ARGUMENT
);
13754 vm_map_lock_read(map
);
13756 if (!vm_map_lookup_entry(map
, start
, &entry
) || entry
->is_sub_map
) {
13758 * Must pass a valid non-submap address.
13760 vm_map_unlock_read(map
);
13761 return(KERN_INVALID_ADDRESS
);
13764 if((entry
->vme_start
> start
) || (entry
->vme_end
< end
)) {
13766 * Map entry doesn't cover the requested range. Not handling
13767 * this situation currently.
13769 vm_map_unlock_read(map
);
13770 return(KERN_INVALID_ARGUMENT
);
13773 object
= entry
->object
.vm_object
;
13774 if (object
== VM_OBJECT_NULL
) {
13776 * Object must already be present or we can't sign.
13778 vm_map_unlock_read(map
);
13779 return KERN_INVALID_ARGUMENT
;
13782 vm_object_lock(object
);
13783 vm_map_unlock_read(map
);
13785 while(start
< end
) {
13788 m
= vm_page_lookup(object
, start
- entry
->vme_start
+ entry
->offset
);
13789 if (m
==VM_PAGE_NULL
) {
13790 /* shoud we try to fault a page here? we can probably
13791 * demand it exists and is locked for this request */
13792 vm_object_unlock(object
);
13793 return KERN_FAILURE
;
13795 /* deal with special page status */
13797 (m
->unusual
&& (m
->error
|| m
->restart
|| m
->private || m
->absent
))) {
13798 vm_object_unlock(object
);
13799 return KERN_FAILURE
;
13802 /* Page is OK... now "validate" it */
13803 /* This is the place where we'll call out to create a code
13804 * directory, later */
13805 m
->cs_validated
= TRUE
;
13807 /* The page is now "clean" for codesigning purposes. That means
13808 * we don't consider it as modified (wpmapped) anymore. But
13809 * we'll disconnect the page so we note any future modification
13811 m
->wpmapped
= FALSE
;
13812 refmod
= pmap_disconnect(m
->phys_page
);
13814 /* Pull the dirty status from the pmap, since we cleared the
13816 if ((refmod
& VM_MEM_MODIFIED
) && !m
->dirty
) {
13817 SET_PAGE_DIRTY(m
, FALSE
);
13820 /* On to the next page */
13821 start
+= PAGE_SIZE
;
13823 vm_object_unlock(object
);
13825 return KERN_SUCCESS
;
13831 kern_return_t
vm_map_freeze_walk(
13833 unsigned int *purgeable_count
,
13834 unsigned int *wired_count
,
13835 unsigned int *clean_count
,
13836 unsigned int *dirty_count
,
13837 unsigned int dirty_budget
,
13838 boolean_t
*has_shared
)
13840 vm_map_entry_t entry
;
13842 vm_map_lock_read(map
);
13844 *purgeable_count
= *wired_count
= *clean_count
= *dirty_count
= 0;
13845 *has_shared
= FALSE
;
13847 for (entry
= vm_map_first_entry(map
);
13848 entry
!= vm_map_to_entry(map
);
13849 entry
= entry
->vme_next
) {
13850 unsigned int purgeable
, clean
, dirty
, wired
;
13853 if ((entry
->object
.vm_object
== 0) ||
13854 (entry
->is_sub_map
) ||
13855 (entry
->object
.vm_object
->phys_contiguous
)) {
13859 default_freezer_pack(&purgeable
, &wired
, &clean
, &dirty
, dirty_budget
, &shared
, entry
->object
.vm_object
, NULL
);
13861 *purgeable_count
+= purgeable
;
13862 *wired_count
+= wired
;
13863 *clean_count
+= clean
;
13864 *dirty_count
+= dirty
;
13867 *has_shared
= TRUE
;
13870 /* Adjust pageout budget and finish up if reached */
13871 if (dirty_budget
) {
13872 dirty_budget
-= dirty
;
13873 if (dirty_budget
== 0) {
13879 vm_map_unlock_read(map
);
13881 return KERN_SUCCESS
;
13884 kern_return_t
vm_map_freeze(
13886 unsigned int *purgeable_count
,
13887 unsigned int *wired_count
,
13888 unsigned int *clean_count
,
13889 unsigned int *dirty_count
,
13890 unsigned int dirty_budget
,
13891 boolean_t
*has_shared
)
13893 vm_map_entry_t entry2
= VM_MAP_ENTRY_NULL
;
13894 kern_return_t kr
= KERN_SUCCESS
;
13895 boolean_t default_freezer_active
= TRUE
;
13897 *purgeable_count
= *wired_count
= *clean_count
= *dirty_count
= 0;
13898 *has_shared
= FALSE
;
13901 * We need the exclusive lock here so that we can
13902 * block any page faults or lookups while we are
13903 * in the middle of freezing this vm map.
13907 if (COMPRESSED_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE
) {
13908 default_freezer_active
= FALSE
;
13911 if (default_freezer_active
) {
13912 if (map
->default_freezer_handle
== NULL
) {
13913 map
->default_freezer_handle
= default_freezer_handle_allocate();
13916 if ((kr
= default_freezer_handle_init(map
->default_freezer_handle
)) != KERN_SUCCESS
) {
13918 * Can happen if default_freezer_handle passed in is NULL
13919 * Or, a table has already been allocated and associated
13920 * with this handle, i.e. the map is already frozen.
13926 for (entry2
= vm_map_first_entry(map
);
13927 entry2
!= vm_map_to_entry(map
);
13928 entry2
= entry2
->vme_next
) {
13930 vm_object_t src_object
= entry2
->object
.vm_object
;
13932 if (entry2
->object
.vm_object
&& !entry2
->is_sub_map
&& !entry2
->object
.vm_object
->phys_contiguous
) {
13933 /* If eligible, scan the entry, moving eligible pages over to our parent object */
13934 if (default_freezer_active
) {
13935 unsigned int purgeable
, clean
, dirty
, wired
;
13938 default_freezer_pack(&purgeable
, &wired
, &clean
, &dirty
, dirty_budget
, &shared
,
13939 src_object
, map
->default_freezer_handle
);
13941 *purgeable_count
+= purgeable
;
13942 *wired_count
+= wired
;
13943 *clean_count
+= clean
;
13944 *dirty_count
+= dirty
;
13946 /* Adjust pageout budget and finish up if reached */
13947 if (dirty_budget
) {
13948 dirty_budget
-= dirty
;
13949 if (dirty_budget
== 0) {
13955 *has_shared
= TRUE
;
13959 * To the compressor.
13961 if (entry2
->object
.vm_object
->internal
== TRUE
) {
13962 vm_object_pageout(entry2
->object
.vm_object
);
13968 if (default_freezer_active
) {
13969 /* Finally, throw out the pages to swap */
13970 default_freezer_pageout(map
->default_freezer_handle
);
13974 vm_map_unlock(map
);
13983 kern_return_t kr
= KERN_SUCCESS
;
13985 if (COMPRESSED_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE
) {
13987 * We will on-demand thaw in the presence of the compressed pager.
13994 if (map
->default_freezer_handle
== NULL
) {
13996 * This map is not in a frozen state.
14002 kr
= default_freezer_unpack(map
->default_freezer_handle
);
14004 vm_map_unlock(map
);
14011 * vm_map_entry_should_cow_for_true_share:
14013 * Determines if the map entry should be clipped and setup for copy-on-write
14014 * to avoid applying "true_share" to a large VM object when only a subset is
14017 * For now, we target only the map entries created for the Objective C
14018 * Garbage Collector, which initially have the following properties:
14019 * - alias == VM_MEMORY_MALLOC
14020 * - wired_count == 0
14022 * and a VM object with:
14024 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
14026 * - vo_size == ANON_CHUNK_SIZE
14029 vm_map_entry_should_cow_for_true_share(
14030 vm_map_entry_t entry
)
14032 vm_object_t object
;
14034 if (entry
->is_sub_map
) {
14035 /* entry does not point at a VM object */
14039 if (entry
->needs_copy
) {
14040 /* already set for copy_on_write: done! */
14044 if (entry
->alias
!= VM_MEMORY_MALLOC
) {
14045 /* not tagged as an ObjectiveC's Garbage Collector entry */
14049 if (entry
->wired_count
) {
14050 /* wired: can't change the map entry... */
14054 object
= entry
->object
.vm_object
;
14056 if (object
== VM_OBJECT_NULL
) {
14057 /* no object yet... */
14061 if (!object
->internal
) {
14062 /* not an internal object */
14066 if (object
->copy_strategy
!= MEMORY_OBJECT_COPY_SYMMETRIC
) {
14067 /* not the default copy strategy */
14071 if (object
->true_share
) {
14072 /* already true_share: too late to avoid it */
14076 if (object
->vo_size
!= ANON_CHUNK_SIZE
) {
14077 /* not an object created for the ObjC Garbage Collector */
14082 * All the criteria match: we have a large object being targeted for "true_share".
14083 * To limit the adverse side-effects linked with "true_share", tell the caller to
14084 * try and avoid setting up the entire object for "true_share" by clipping the
14085 * targeted range and setting it up for copy-on-write.
14091 vm_map_round_page_mask(
14092 vm_map_offset_t offset
,
14093 vm_map_offset_t mask
)
14095 return VM_MAP_ROUND_PAGE(offset
, mask
);
14099 vm_map_trunc_page_mask(
14100 vm_map_offset_t offset
,
14101 vm_map_offset_t mask
)
14103 return VM_MAP_TRUNC_PAGE(offset
, mask
);
14110 return VM_MAP_PAGE_SHIFT(map
);
14117 return VM_MAP_PAGE_SIZE(map
);
14124 return VM_MAP_PAGE_MASK(map
);
14128 vm_map_set_page_shift(
14132 if (map
->hdr
.nentries
!= 0) {
14133 /* too late to change page size */
14134 return KERN_FAILURE
;
14137 map
->hdr
.page_shift
= pageshift
;
14139 return KERN_SUCCESS
;
14143 vm_map_query_volatile(
14145 mach_vm_size_t
*volatile_virtual_size_p
,
14146 mach_vm_size_t
*volatile_resident_size_p
,
14147 mach_vm_size_t
*volatile_pmap_size_p
)
14149 mach_vm_size_t volatile_virtual_size
;
14150 mach_vm_size_t volatile_resident_count
;
14151 mach_vm_size_t volatile_pmap_count
;
14152 mach_vm_size_t resident_count
;
14153 vm_map_entry_t entry
;
14154 vm_object_t object
;
14156 /* map should be locked by caller */
14158 volatile_virtual_size
= 0;
14159 volatile_resident_count
= 0;
14160 volatile_pmap_count
= 0;
14162 for (entry
= vm_map_first_entry(map
);
14163 entry
!= vm_map_to_entry(map
);
14164 entry
= entry
->vme_next
) {
14165 if (entry
->is_sub_map
) {
14168 if (! (entry
->protection
& VM_PROT_WRITE
)) {
14171 object
= entry
->object
.vm_object
;
14172 if (object
== VM_OBJECT_NULL
) {
14175 if (object
->purgable
!= VM_PURGABLE_VOLATILE
) {
14178 if (entry
->offset
!= 0) {
14180 * If the map entry has been split and the object now
14181 * appears several times in the VM map, we don't want
14182 * to count the object's resident_page_count more than
14183 * once. We count it only for the first one, starting
14184 * at offset 0 and ignore the other VM map entries.
14188 resident_count
= object
->resident_page_count
;
14189 if ((entry
->offset
/ PAGE_SIZE
) >= resident_count
) {
14190 resident_count
= 0;
14192 resident_count
-= (entry
->offset
/ PAGE_SIZE
);
14195 volatile_virtual_size
+= entry
->vme_end
- entry
->vme_start
;
14196 volatile_resident_count
+= resident_count
;
14197 volatile_pmap_count
+= pmap_query_resident(map
->pmap
,
14202 /* map is still locked on return */
14204 *volatile_virtual_size_p
= volatile_virtual_size
;
14205 *volatile_resident_size_p
= volatile_resident_count
* PAGE_SIZE
;
14206 *volatile_pmap_size_p
= volatile_pmap_count
* PAGE_SIZE
;
14208 return KERN_SUCCESS
;