2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
63 * Virtual memory mapping module.
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68 #include <libkern/OSAtomic.h>
70 #include <mach/kern_return.h>
71 #include <mach/port.h>
72 #include <mach/vm_attributes.h>
73 #include <mach/vm_param.h>
74 #include <mach/vm_behavior.h>
75 #include <mach/vm_statistics.h>
76 #include <mach/memory_object.h>
77 #include <mach/mach_vm.h>
78 #include <machine/cpu_capabilities.h>
81 #include <kern/assert.h>
82 #include <kern/counters.h>
83 #include <kern/kalloc.h>
84 #include <kern/zalloc.h>
87 #include <vm/vm_init.h>
88 #include <vm/vm_fault.h>
89 #include <vm/vm_map.h>
90 #include <vm/vm_object.h>
91 #include <vm/vm_page.h>
92 #include <vm/vm_pageout.h>
93 #include <vm/vm_kern.h>
94 #include <ipc/ipc_port.h>
95 #include <kern/sched_prim.h>
96 #include <kern/misc_protos.h>
97 #include <machine/db_machdep.h>
100 #include <mach/vm_map_server.h>
101 #include <mach/mach_host_server.h>
102 #include <vm/vm_protos.h>
103 #include <vm/vm_purgeable_internal.h>
106 #include <ppc/mappings.h>
109 #include <vm/vm_protos.h>
110 #include <vm/vm_shared_region.h>
112 /* Internal prototypes
115 static void vm_map_simplify_range(
117 vm_map_offset_t start
,
118 vm_map_offset_t end
); /* forward */
120 static boolean_t
vm_map_range_check(
122 vm_map_offset_t start
,
124 vm_map_entry_t
*entry
);
126 static vm_map_entry_t
_vm_map_entry_create(
127 struct vm_map_header
*map_header
);
129 static void _vm_map_entry_dispose(
130 struct vm_map_header
*map_header
,
131 vm_map_entry_t entry
);
133 static void vm_map_pmap_enter(
135 vm_map_offset_t addr
,
136 vm_map_offset_t end_addr
,
138 vm_object_offset_t offset
,
139 vm_prot_t protection
);
141 static void _vm_map_clip_end(
142 struct vm_map_header
*map_header
,
143 vm_map_entry_t entry
,
144 vm_map_offset_t end
);
146 static void _vm_map_clip_start(
147 struct vm_map_header
*map_header
,
148 vm_map_entry_t entry
,
149 vm_map_offset_t start
);
151 static void vm_map_entry_delete(
153 vm_map_entry_t entry
);
155 static kern_return_t
vm_map_delete(
157 vm_map_offset_t start
,
162 static kern_return_t
vm_map_copy_overwrite_unaligned(
164 vm_map_entry_t entry
,
166 vm_map_address_t start
);
168 static kern_return_t
vm_map_copy_overwrite_aligned(
170 vm_map_entry_t tmp_entry
,
172 vm_map_offset_t start
,
175 static kern_return_t
vm_map_copyin_kernel_buffer(
177 vm_map_address_t src_addr
,
179 boolean_t src_destroy
,
180 vm_map_copy_t
*copy_result
); /* OUT */
182 static kern_return_t
vm_map_copyout_kernel_buffer(
184 vm_map_address_t
*addr
, /* IN/OUT */
186 boolean_t overwrite
);
188 static void vm_map_fork_share(
190 vm_map_entry_t old_entry
,
193 static boolean_t
vm_map_fork_copy(
195 vm_map_entry_t
*old_entry_p
,
198 void vm_map_region_top_walk(
199 vm_map_entry_t entry
,
200 vm_region_top_info_t top
);
202 void vm_map_region_walk(
205 vm_map_entry_t entry
,
206 vm_object_offset_t offset
,
207 vm_object_size_t range
,
208 vm_region_extended_info_t extended
,
209 boolean_t look_for_pages
);
211 static kern_return_t
vm_map_wire_nested(
213 vm_map_offset_t start
,
215 vm_prot_t access_type
,
218 vm_map_offset_t pmap_addr
);
220 static kern_return_t
vm_map_unwire_nested(
222 vm_map_offset_t start
,
226 vm_map_offset_t pmap_addr
);
228 static kern_return_t
vm_map_overwrite_submap_recurse(
230 vm_map_offset_t dst_addr
,
231 vm_map_size_t dst_size
);
233 static kern_return_t
vm_map_copy_overwrite_nested(
235 vm_map_offset_t dst_addr
,
237 boolean_t interruptible
,
240 static kern_return_t
vm_map_remap_extract(
242 vm_map_offset_t addr
,
245 struct vm_map_header
*map_header
,
246 vm_prot_t
*cur_protection
,
247 vm_prot_t
*max_protection
,
248 vm_inherit_t inheritance
,
251 static kern_return_t
vm_map_remap_range_allocate(
253 vm_map_address_t
*address
,
255 vm_map_offset_t mask
,
257 vm_map_entry_t
*map_entry
);
259 static void vm_map_region_look_for_page(
263 vm_object_offset_t offset
,
266 vm_region_extended_info_t extended
);
268 static int vm_map_region_count_obj_refs(
269 vm_map_entry_t entry
,
273 static kern_return_t
vm_map_willneed(
275 vm_map_offset_t start
,
276 vm_map_offset_t end
);
278 static kern_return_t
vm_map_reuse_pages(
280 vm_map_offset_t start
,
281 vm_map_offset_t end
);
283 static kern_return_t
vm_map_reusable_pages(
285 vm_map_offset_t start
,
286 vm_map_offset_t end
);
288 static kern_return_t
vm_map_can_reuse(
290 vm_map_offset_t start
,
291 vm_map_offset_t end
);
294 * Macros to copy a vm_map_entry. We must be careful to correctly
295 * manage the wired page count. vm_map_entry_copy() creates a new
296 * map entry to the same memory - the wired count in the new entry
297 * must be set to zero. vm_map_entry_copy_full() creates a new
298 * entry that is identical to the old entry. This preserves the
299 * wire count; it's used for map splitting and zone changing in
302 #define vm_map_entry_copy(NEW,OLD) \
305 (NEW)->is_shared = FALSE; \
306 (NEW)->needs_wakeup = FALSE; \
307 (NEW)->in_transition = FALSE; \
308 (NEW)->wired_count = 0; \
309 (NEW)->user_wired_count = 0; \
310 (NEW)->permanent = FALSE; \
313 #define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD))
316 * Decide if we want to allow processes to execute from their data or stack areas.
317 * override_nx() returns true if we do. Data/stack execution can be enabled independently
318 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
319 * or allow_stack_exec to enable data execution for that type of data area for that particular
320 * ABI (or both by or'ing the flags together). These are initialized in the architecture
321 * specific pmap files since the default behavior varies according to architecture. The
322 * main reason it varies is because of the need to provide binary compatibility with old
323 * applications that were written before these restrictions came into being. In the old
324 * days, an app could execute anything it could read, but this has slowly been tightened
325 * up over time. The default behavior is:
327 * 32-bit PPC apps may execute from both stack and data areas
328 * 32-bit Intel apps may exeucte from data areas but not stack
329 * 64-bit PPC/Intel apps may not execute from either data or stack
331 * An application on any architecture may override these defaults by explicitly
332 * adding PROT_EXEC permission to the page in question with the mprotect(2)
333 * system call. This code here just determines what happens when an app tries to
334 * execute from a page that lacks execute permission.
336 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
337 * default behavior for both 32 and 64 bit apps on a system-wide basis.
340 extern int allow_data_exec
, allow_stack_exec
;
343 override_nx(vm_map_t map
, uint32_t user_tag
) /* map unused on arm */
348 * Determine if the app is running in 32 or 64 bit mode.
351 if (vm_map_is_64bit(map
))
352 current_abi
= VM_ABI_64
;
354 current_abi
= VM_ABI_32
;
357 * Determine if we should allow the execution based on whether it's a
358 * stack or data area and the current architecture.
361 if (user_tag
== VM_MEMORY_STACK
)
362 return allow_stack_exec
& current_abi
;
364 return allow_data_exec
& current_abi
;
369 * Virtual memory maps provide for the mapping, protection,
370 * and sharing of virtual memory objects. In addition,
371 * this module provides for an efficient virtual copy of
372 * memory from one map to another.
374 * Synchronization is required prior to most operations.
376 * Maps consist of an ordered doubly-linked list of simple
377 * entries; a single hint is used to speed up lookups.
379 * Sharing maps have been deleted from this version of Mach.
380 * All shared objects are now mapped directly into the respective
381 * maps. This requires a change in the copy on write strategy;
382 * the asymmetric (delayed) strategy is used for shared temporary
383 * objects instead of the symmetric (shadow) strategy. All maps
384 * are now "top level" maps (either task map, kernel map or submap
385 * of the kernel map).
387 * Since portions of maps are specified by start/end addreses,
388 * which may not align with existing map entries, all
389 * routines merely "clip" entries to these start/end values.
390 * [That is, an entry is split into two, bordering at a
391 * start or end value.] Note that these clippings may not
392 * always be necessary (as the two resulting entries are then
393 * not changed); however, the clipping is done for convenience.
394 * No attempt is currently made to "glue back together" two
397 * The symmetric (shadow) copy strategy implements virtual copy
398 * by copying VM object references from one map to
399 * another, and then marking both regions as copy-on-write.
400 * It is important to note that only one writeable reference
401 * to a VM object region exists in any map when this strategy
402 * is used -- this means that shadow object creation can be
403 * delayed until a write operation occurs. The symmetric (delayed)
404 * strategy allows multiple maps to have writeable references to
405 * the same region of a vm object, and hence cannot delay creating
406 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
407 * Copying of permanent objects is completely different; see
408 * vm_object_copy_strategically() in vm_object.c.
411 static zone_t vm_map_zone
; /* zone for vm_map structures */
412 static zone_t vm_map_entry_zone
; /* zone for vm_map_entry structures */
413 static zone_t vm_map_kentry_zone
; /* zone for kernel entry structures */
414 static zone_t vm_map_copy_zone
; /* zone for vm_map_copy structures */
418 * Placeholder object for submap operations. This object is dropped
419 * into the range by a call to vm_map_find, and removed when
420 * vm_map_submap creates the submap.
423 vm_object_t vm_submap_object
;
425 static void *map_data
;
426 static vm_size_t map_data_size
;
427 static void *kentry_data
;
428 static vm_size_t kentry_data_size
;
429 static int kentry_count
= 2048; /* to init kentry_data_size */
431 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
434 /* Skip acquiring locks if we're in the midst of a kernel core dump */
435 unsigned int not_in_kdp
= 1;
437 #if CONFIG_CODE_DECRYPTION
439 * vm_map_apple_protected:
440 * This remaps the requested part of the object with an object backed by
441 * the decrypting pager.
442 * crypt_info contains entry points and session data for the crypt module.
443 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
444 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
447 vm_map_apple_protected(
449 vm_map_offset_t start
,
451 struct pager_crypt_info
*crypt_info
)
453 boolean_t map_locked
;
455 vm_map_entry_t map_entry
;
456 memory_object_t protected_mem_obj
;
457 vm_object_t protected_object
;
458 vm_map_offset_t map_addr
;
460 vm_map_lock_read(map
);
463 /* lookup the protected VM object */
464 if (!vm_map_lookup_entry(map
,
467 map_entry
->vme_end
< end
||
468 map_entry
->is_sub_map
) {
469 /* that memory is not properly mapped */
470 kr
= KERN_INVALID_ARGUMENT
;
473 protected_object
= map_entry
->object
.vm_object
;
474 if (protected_object
== VM_OBJECT_NULL
) {
475 /* there should be a VM object here at this point */
476 kr
= KERN_INVALID_ARGUMENT
;
480 /* make sure protected object stays alive while map is unlocked */
481 vm_object_reference(protected_object
);
483 vm_map_unlock_read(map
);
487 * Lookup (and create if necessary) the protected memory object
488 * matching that VM object.
489 * If successful, this also grabs a reference on the memory object,
490 * to guarantee that it doesn't go away before we get a chance to map
493 protected_mem_obj
= apple_protect_pager_setup(protected_object
, crypt_info
);
495 /* release extra ref on protected object */
496 vm_object_deallocate(protected_object
);
498 if (protected_mem_obj
== NULL
) {
503 /* map this memory object in place of the current one */
505 kr
= vm_map_enter_mem_object(map
,
508 (mach_vm_offset_t
) 0,
509 VM_FLAGS_FIXED
| VM_FLAGS_OVERWRITE
,
510 (ipc_port_t
) protected_mem_obj
,
512 (start
- map_entry
->vme_start
)),
514 map_entry
->protection
,
515 map_entry
->max_protection
,
516 map_entry
->inheritance
);
517 assert(map_addr
== start
);
519 * Release the reference obtained by apple_protect_pager_setup().
520 * The mapping (if it succeeded) is now holding a reference on the
523 memory_object_deallocate(protected_mem_obj
);
527 vm_map_unlock_read(map
);
531 #endif /* CONFIG_CODE_DECRYPTION */
534 lck_grp_t vm_map_lck_grp
;
535 lck_grp_attr_t vm_map_lck_grp_attr
;
536 lck_attr_t vm_map_lck_attr
;
542 * Initialize the vm_map module. Must be called before
543 * any other vm_map routines.
545 * Map and entry structures are allocated from zones -- we must
546 * initialize those zones.
548 * There are three zones of interest:
550 * vm_map_zone: used to allocate maps.
551 * vm_map_entry_zone: used to allocate map entries.
552 * vm_map_kentry_zone: used to allocate map entries for the kernel.
554 * The kernel allocates map entries from a special zone that is initially
555 * "crammed" with memory. It would be difficult (perhaps impossible) for
556 * the kernel to allocate more memory to a entry zone when it became
557 * empty since the very act of allocating memory implies the creation
564 vm_map_zone
= zinit((vm_map_size_t
) sizeof(struct _vm_map
), 40*1024,
566 zone_change(vm_map_zone
, Z_NOENCRYPT
, TRUE
);
569 vm_map_entry_zone
= zinit((vm_map_size_t
) sizeof(struct vm_map_entry
),
570 1024*1024, PAGE_SIZE
*5,
571 "non-kernel map entries");
572 zone_change(vm_map_entry_zone
, Z_NOENCRYPT
, TRUE
);
574 vm_map_kentry_zone
= zinit((vm_map_size_t
) sizeof(struct vm_map_entry
),
575 kentry_data_size
, kentry_data_size
,
576 "kernel map entries");
577 zone_change(vm_map_kentry_zone
, Z_NOENCRYPT
, TRUE
);
579 vm_map_copy_zone
= zinit((vm_map_size_t
) sizeof(struct vm_map_copy
),
580 16*1024, PAGE_SIZE
, "map copies");
581 zone_change(vm_map_copy_zone
, Z_NOENCRYPT
, TRUE
);
584 * Cram the map and kentry zones with initial data.
585 * Set kentry_zone non-collectible to aid zone_gc().
587 zone_change(vm_map_zone
, Z_COLLECT
, FALSE
);
588 zone_change(vm_map_kentry_zone
, Z_COLLECT
, FALSE
);
589 zone_change(vm_map_kentry_zone
, Z_EXPAND
, FALSE
);
590 zone_change(vm_map_kentry_zone
, Z_FOREIGN
, TRUE
);
591 zcram(vm_map_zone
, map_data
, map_data_size
);
592 zcram(vm_map_kentry_zone
, kentry_data
, kentry_data_size
);
594 lck_grp_attr_setdefault(&vm_map_lck_grp_attr
);
595 lck_grp_init(&vm_map_lck_grp
, "vm_map", &vm_map_lck_grp_attr
);
596 lck_attr_setdefault(&vm_map_lck_attr
);
603 map_data_size
= round_page(10 * sizeof(struct _vm_map
));
604 map_data
= pmap_steal_memory(map_data_size
);
608 * Limiting worst case: vm_map_kentry_zone needs to map each "available"
609 * physical page (i.e. that beyond the kernel image and page tables)
610 * individually; we guess at most one entry per eight pages in the
611 * real world. This works out to roughly .1 of 1% of physical memory,
612 * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
615 kentry_count
= pmap_free_pages() / 8;
619 round_page(kentry_count
* sizeof(struct vm_map_entry
));
620 kentry_data
= pmap_steal_memory(kentry_data_size
);
626 * Creates and returns a new empty VM map with
627 * the given physical map structure, and having
628 * the given lower and upper address bounds.
637 static int color_seed
= 0;
638 register vm_map_t result
;
640 result
= (vm_map_t
) zalloc(vm_map_zone
);
641 if (result
== VM_MAP_NULL
)
642 panic("vm_map_create");
644 vm_map_first_entry(result
) = vm_map_to_entry(result
);
645 vm_map_last_entry(result
) = vm_map_to_entry(result
);
646 result
->hdr
.nentries
= 0;
647 result
->hdr
.entries_pageable
= pageable
;
650 result
->user_wire_limit
= MACH_VM_MAX_ADDRESS
; /* default limit is unlimited */
651 result
->user_wire_size
= 0;
652 result
->ref_count
= 1;
654 result
->res_count
= 1;
655 result
->sw_state
= MAP_SW_IN
;
656 #endif /* TASK_SWAPPER */
658 result
->min_offset
= min
;
659 result
->max_offset
= max
;
660 result
->wiring_required
= FALSE
;
661 result
->no_zero_fill
= FALSE
;
662 result
->mapped
= FALSE
;
663 result
->wait_for_space
= FALSE
;
664 result
->switch_protect
= FALSE
;
665 result
->first_free
= vm_map_to_entry(result
);
666 result
->hint
= vm_map_to_entry(result
);
667 result
->color_rr
= (color_seed
++) & vm_color_mask
;
668 vm_map_lock_init(result
);
669 lck_mtx_init_ext(&result
->s_lock
, &result
->s_lock_ext
, &vm_map_lck_grp
, &vm_map_lck_attr
);
675 * vm_map_entry_create: [ internal use only ]
677 * Allocates a VM map entry for insertion in the
678 * given map (or map copy). No fields are filled.
680 #define vm_map_entry_create(map) \
681 _vm_map_entry_create(&(map)->hdr)
683 #define vm_map_copy_entry_create(copy) \
684 _vm_map_entry_create(&(copy)->cpy_hdr)
686 static vm_map_entry_t
687 _vm_map_entry_create(
688 register struct vm_map_header
*map_header
)
690 register zone_t zone
;
691 register vm_map_entry_t entry
;
693 if (map_header
->entries_pageable
)
694 zone
= vm_map_entry_zone
;
696 zone
= vm_map_kentry_zone
;
698 entry
= (vm_map_entry_t
) zalloc(zone
);
699 if (entry
== VM_MAP_ENTRY_NULL
)
700 panic("vm_map_entry_create");
706 * vm_map_entry_dispose: [ internal use only ]
708 * Inverse of vm_map_entry_create.
710 * write map lock held so no need to
711 * do anything special to insure correctness
714 #define vm_map_entry_dispose(map, entry) \
716 if((entry) == (map)->first_free) \
717 (map)->first_free = vm_map_to_entry(map); \
718 if((entry) == (map)->hint) \
719 (map)->hint = vm_map_to_entry(map); \
720 _vm_map_entry_dispose(&(map)->hdr, (entry)); \
723 #define vm_map_copy_entry_dispose(map, entry) \
724 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
727 _vm_map_entry_dispose(
728 register struct vm_map_header
*map_header
,
729 register vm_map_entry_t entry
)
731 register zone_t zone
;
733 if (map_header
->entries_pageable
)
734 zone
= vm_map_entry_zone
;
736 zone
= vm_map_kentry_zone
;
742 static boolean_t
first_free_is_valid(vm_map_t map
); /* forward */
743 static boolean_t first_free_check
= FALSE
;
748 vm_map_entry_t entry
, next
;
750 if (!first_free_check
)
753 entry
= vm_map_to_entry(map
);
754 next
= entry
->vme_next
;
755 while (vm_map_trunc_page(next
->vme_start
) == vm_map_trunc_page(entry
->vme_end
) ||
756 (vm_map_trunc_page(next
->vme_start
) == vm_map_trunc_page(entry
->vme_start
) &&
757 next
!= vm_map_to_entry(map
))) {
759 next
= entry
->vme_next
;
760 if (entry
== vm_map_to_entry(map
))
763 if (map
->first_free
!= entry
) {
764 printf("Bad first_free for map %p: %p should be %p\n",
765 map
, map
->first_free
, entry
);
770 #endif /* MACH_ASSERT */
775 * Updates the map->first_free pointer to the
776 * entry immediately before the first hole in the map.
777 * The map should be locked.
779 #define UPDATE_FIRST_FREE(map, new_first_free) \
782 vm_map_entry_t UFF_first_free; \
783 vm_map_entry_t UFF_next_entry; \
785 UFF_first_free = (new_first_free); \
786 UFF_next_entry = UFF_first_free->vme_next; \
787 while (vm_map_trunc_page(UFF_next_entry->vme_start) == \
788 vm_map_trunc_page(UFF_first_free->vme_end) || \
789 (vm_map_trunc_page(UFF_next_entry->vme_start) == \
790 vm_map_trunc_page(UFF_first_free->vme_start) && \
791 UFF_next_entry != vm_map_to_entry(UFF_map))) { \
792 UFF_first_free = UFF_next_entry; \
793 UFF_next_entry = UFF_first_free->vme_next; \
794 if (UFF_first_free == vm_map_to_entry(UFF_map)) \
797 UFF_map->first_free = UFF_first_free; \
798 assert(first_free_is_valid(UFF_map)); \
802 * vm_map_entry_{un,}link:
804 * Insert/remove entries from maps (or map copies).
806 #define vm_map_entry_link(map, after_where, entry) \
809 vm_map_entry_t VMEL_entry; \
811 VMEL_entry = (entry); \
812 _vm_map_entry_link(&VMEL_map->hdr, after_where, VMEL_entry); \
813 UPDATE_FIRST_FREE(VMEL_map, VMEL_map->first_free); \
817 #define vm_map_copy_entry_link(copy, after_where, entry) \
818 _vm_map_entry_link(&(copy)->cpy_hdr, after_where, (entry))
820 #define _vm_map_entry_link(hdr, after_where, entry) \
823 (entry)->vme_prev = (after_where); \
824 (entry)->vme_next = (after_where)->vme_next; \
825 (entry)->vme_prev->vme_next = (entry)->vme_next->vme_prev = (entry); \
828 #define vm_map_entry_unlink(map, entry) \
831 vm_map_entry_t VMEU_entry; \
832 vm_map_entry_t VMEU_first_free; \
834 VMEU_entry = (entry); \
835 if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start) \
836 VMEU_first_free = VMEU_entry->vme_prev; \
838 VMEU_first_free = VMEU_map->first_free; \
839 _vm_map_entry_unlink(&VMEU_map->hdr, VMEU_entry); \
840 UPDATE_FIRST_FREE(VMEU_map, VMEU_first_free); \
843 #define vm_map_copy_entry_unlink(copy, entry) \
844 _vm_map_entry_unlink(&(copy)->cpy_hdr, (entry))
846 #define _vm_map_entry_unlink(hdr, entry) \
849 (entry)->vme_next->vme_prev = (entry)->vme_prev; \
850 (entry)->vme_prev->vme_next = (entry)->vme_next; \
853 #if MACH_ASSERT && TASK_SWAPPER
855 * vm_map_res_reference:
857 * Adds another valid residence count to the given map.
859 * Map is locked so this function can be called from
863 void vm_map_res_reference(register vm_map_t map
)
865 /* assert map is locked */
866 assert(map
->res_count
>= 0);
867 assert(map
->ref_count
>= map
->res_count
);
868 if (map
->res_count
== 0) {
869 lck_mtx_unlock(&map
->s_lock
);
872 lck_mtx_lock(&map
->s_lock
);
880 * vm_map_reference_swap:
882 * Adds valid reference and residence counts to the given map.
884 * The map may not be in memory (i.e. zero residence count).
887 void vm_map_reference_swap(register vm_map_t map
)
889 assert(map
!= VM_MAP_NULL
);
890 lck_mtx_lock(&map
->s_lock
);
891 assert(map
->res_count
>= 0);
892 assert(map
->ref_count
>= map
->res_count
);
894 vm_map_res_reference(map
);
895 lck_mtx_unlock(&map
->s_lock
);
899 * vm_map_res_deallocate:
901 * Decrement residence count on a map; possibly causing swapout.
903 * The map must be in memory (i.e. non-zero residence count).
905 * The map is locked, so this function is callable from vm_map_deallocate.
908 void vm_map_res_deallocate(register vm_map_t map
)
910 assert(map
->res_count
> 0);
911 if (--map
->res_count
== 0) {
912 lck_mtx_unlock(&map
->s_lock
);
916 lck_mtx_lock(&map
->s_lock
);
918 assert(map
->ref_count
>= map
->res_count
);
920 #endif /* MACH_ASSERT && TASK_SWAPPER */
925 * Actually destroy a map.
934 /* clean up regular map entries */
935 (void) vm_map_delete(map
, map
->min_offset
, map
->max_offset
,
937 /* clean up leftover special mappings (commpage, etc...) */
940 * PPC51: ppc64 is limited to 51-bit addresses.
941 * Memory beyond this 51-bit limit is mapped specially at the
942 * pmap level, so do not interfere.
943 * On PPC64, the commpage is mapped beyond the addressable range
944 * via a special pmap hack, so ask pmap to clean it explicitly...
947 pmap_unmap_sharedpage(map
->pmap
);
949 /* ... and do not let regular pmap cleanup apply here */
950 flags
|= VM_MAP_REMOVE_NO_PMAP_CLEANUP
;
952 (void) vm_map_delete(map
, 0x0, 0xFFFFFFFFFFFFF000ULL
,
956 assert(map
->hdr
.nentries
== 0);
959 pmap_destroy(map
->pmap
);
961 zfree(vm_map_zone
, map
);
966 * vm_map_swapin/vm_map_swapout
968 * Swap a map in and out, either referencing or releasing its resources.
969 * These functions are internal use only; however, they must be exported
970 * because they may be called from macros, which are exported.
972 * In the case of swapout, there could be races on the residence count,
973 * so if the residence count is up, we return, assuming that a
974 * vm_map_deallocate() call in the near future will bring us back.
977 * -- We use the map write lock for synchronization among races.
978 * -- The map write lock, and not the simple s_lock, protects the
979 * swap state of the map.
980 * -- If a map entry is a share map, then we hold both locks, in
981 * hierarchical order.
983 * Synchronization Notes:
984 * 1) If a vm_map_swapin() call happens while swapout in progress, it
985 * will block on the map lock and proceed when swapout is through.
986 * 2) A vm_map_reference() call at this time is illegal, and will
987 * cause a panic. vm_map_reference() is only allowed on resident
988 * maps, since it refuses to block.
989 * 3) A vm_map_swapin() call during a swapin will block, and
990 * proceeed when the first swapin is done, turning into a nop.
991 * This is the reason the res_count is not incremented until
992 * after the swapin is complete.
993 * 4) There is a timing hole after the checks of the res_count, before
994 * the map lock is taken, during which a swapin may get the lock
995 * before a swapout about to happen. If this happens, the swapin
996 * will detect the state and increment the reference count, causing
997 * the swapout to be a nop, thereby delaying it until a later
998 * vm_map_deallocate. If the swapout gets the lock first, then
999 * the swapin will simply block until the swapout is done, and
1002 * Because vm_map_swapin() is potentially an expensive operation, it
1003 * should be used with caution.
1006 * 1) A map with a residence count of zero is either swapped, or
1008 * 2) A map with a non-zero residence count is either resident,
1009 * or being swapped in.
1012 int vm_map_swap_enable
= 1;
1014 void vm_map_swapin (vm_map_t map
)
1016 register vm_map_entry_t entry
;
1018 if (!vm_map_swap_enable
) /* debug */
1023 * First deal with various races.
1025 if (map
->sw_state
== MAP_SW_IN
)
1027 * we raced with swapout and won. Returning will incr.
1028 * the res_count, turning the swapout into a nop.
1033 * The residence count must be zero. If we raced with another
1034 * swapin, the state would have been IN; if we raced with a
1035 * swapout (after another competing swapin), we must have lost
1036 * the race to get here (see above comment), in which case
1037 * res_count is still 0.
1039 assert(map
->res_count
== 0);
1042 * There are no intermediate states of a map going out or
1043 * coming in, since the map is locked during the transition.
1045 assert(map
->sw_state
== MAP_SW_OUT
);
1048 * We now operate upon each map entry. If the entry is a sub-
1049 * or share-map, we call vm_map_res_reference upon it.
1050 * If the entry is an object, we call vm_object_res_reference
1051 * (this may iterate through the shadow chain).
1052 * Note that we hold the map locked the entire time,
1053 * even if we get back here via a recursive call in
1054 * vm_map_res_reference.
1056 entry
= vm_map_first_entry(map
);
1058 while (entry
!= vm_map_to_entry(map
)) {
1059 if (entry
->object
.vm_object
!= VM_OBJECT_NULL
) {
1060 if (entry
->is_sub_map
) {
1061 vm_map_t lmap
= entry
->object
.sub_map
;
1062 lck_mtx_lock(&lmap
->s_lock
);
1063 vm_map_res_reference(lmap
);
1064 lck_mtx_unlock(&lmap
->s_lock
);
1066 vm_object_t object
= entry
->object
.vm_object
;
1067 vm_object_lock(object
);
1069 * This call may iterate through the
1072 vm_object_res_reference(object
);
1073 vm_object_unlock(object
);
1076 entry
= entry
->vme_next
;
1078 assert(map
->sw_state
== MAP_SW_OUT
);
1079 map
->sw_state
= MAP_SW_IN
;
1082 void vm_map_swapout(vm_map_t map
)
1084 register vm_map_entry_t entry
;
1088 * First deal with various races.
1089 * If we raced with a swapin and lost, the residence count
1090 * will have been incremented to 1, and we simply return.
1092 lck_mtx_lock(&map
->s_lock
);
1093 if (map
->res_count
!= 0) {
1094 lck_mtx_unlock(&map
->s_lock
);
1097 lck_mtx_unlock(&map
->s_lock
);
1100 * There are no intermediate states of a map going out or
1101 * coming in, since the map is locked during the transition.
1103 assert(map
->sw_state
== MAP_SW_IN
);
1105 if (!vm_map_swap_enable
)
1109 * We now operate upon each map entry. If the entry is a sub-
1110 * or share-map, we call vm_map_res_deallocate upon it.
1111 * If the entry is an object, we call vm_object_res_deallocate
1112 * (this may iterate through the shadow chain).
1113 * Note that we hold the map locked the entire time,
1114 * even if we get back here via a recursive call in
1115 * vm_map_res_deallocate.
1117 entry
= vm_map_first_entry(map
);
1119 while (entry
!= vm_map_to_entry(map
)) {
1120 if (entry
->object
.vm_object
!= VM_OBJECT_NULL
) {
1121 if (entry
->is_sub_map
) {
1122 vm_map_t lmap
= entry
->object
.sub_map
;
1123 lck_mtx_lock(&lmap
->s_lock
);
1124 vm_map_res_deallocate(lmap
);
1125 lck_mtx_unlock(&lmap
->s_lock
);
1127 vm_object_t object
= entry
->object
.vm_object
;
1128 vm_object_lock(object
);
1130 * This call may take a long time,
1131 * since it could actively push
1132 * out pages (if we implement it
1135 vm_object_res_deallocate(object
);
1136 vm_object_unlock(object
);
1139 entry
= entry
->vme_next
;
1141 assert(map
->sw_state
== MAP_SW_IN
);
1142 map
->sw_state
= MAP_SW_OUT
;
1145 #endif /* TASK_SWAPPER */
1149 * SAVE_HINT_MAP_READ:
1151 * Saves the specified entry as the hint for
1152 * future lookups. only a read lock is held on map,
1153 * so make sure the store is atomic... OSCompareAndSwap
1154 * guarantees this... also, we don't care if we collide
1155 * and someone else wins and stores their 'hint'
1157 #define SAVE_HINT_MAP_READ(map,value) \
1159 OSCompareAndSwapPtr((map)->hint, value, &(map)->hint); \
1164 * SAVE_HINT_MAP_WRITE:
1166 * Saves the specified entry as the hint for
1167 * future lookups. write lock held on map,
1168 * so no one else can be writing or looking
1169 * until the lock is dropped, so it's safe
1170 * to just do an assignment
1172 #define SAVE_HINT_MAP_WRITE(map,value) \
1174 (map)->hint = (value); \
1178 * vm_map_lookup_entry: [ internal use only ]
1180 * Finds the map entry containing (or
1181 * immediately preceding) the specified address
1182 * in the given map; the entry is returned
1183 * in the "entry" parameter. The boolean
1184 * result indicates whether the address is
1185 * actually contained in the map.
1188 vm_map_lookup_entry(
1189 register vm_map_t map
,
1190 register vm_map_offset_t address
,
1191 vm_map_entry_t
*entry
) /* OUT */
1193 register vm_map_entry_t cur
;
1194 register vm_map_entry_t last
;
1197 * Start looking either from the head of the
1198 * list, or from the hint.
1202 if (cur
== vm_map_to_entry(map
))
1203 cur
= cur
->vme_next
;
1205 if (address
>= cur
->vme_start
) {
1207 * Go from hint to end of list.
1209 * But first, make a quick check to see if
1210 * we are already looking at the entry we
1211 * want (which is usually the case).
1212 * Note also that we don't need to save the hint
1213 * here... it is the same hint (unless we are
1214 * at the header, in which case the hint didn't
1215 * buy us anything anyway).
1217 last
= vm_map_to_entry(map
);
1218 if ((cur
!= last
) && (cur
->vme_end
> address
)) {
1225 * Go from start to hint, *inclusively*
1227 last
= cur
->vme_next
;
1228 cur
= vm_map_first_entry(map
);
1235 while (cur
!= last
) {
1236 if (cur
->vme_end
> address
) {
1237 if (address
>= cur
->vme_start
) {
1239 * Save this lookup for future
1244 SAVE_HINT_MAP_READ(map
, cur
);
1250 cur
= cur
->vme_next
;
1252 *entry
= cur
->vme_prev
;
1253 SAVE_HINT_MAP_READ(map
, *entry
);
1259 * Routine: vm_map_find_space
1261 * Allocate a range in the specified virtual address map,
1262 * returning the entry allocated for that range.
1263 * Used by kmem_alloc, etc.
1265 * The map must be NOT be locked. It will be returned locked
1266 * on KERN_SUCCESS, unlocked on failure.
1268 * If an entry is allocated, the object/offset fields
1269 * are initialized to zero.
1273 register vm_map_t map
,
1274 vm_map_offset_t
*address
, /* OUT */
1276 vm_map_offset_t mask
,
1278 vm_map_entry_t
*o_entry
) /* OUT */
1280 register vm_map_entry_t entry
, new_entry
;
1281 register vm_map_offset_t start
;
1282 register vm_map_offset_t end
;
1286 return KERN_INVALID_ARGUMENT
;
1289 if (flags
& VM_FLAGS_GUARD_AFTER
) {
1290 /* account for the back guard page in the size */
1291 size
+= PAGE_SIZE_64
;
1294 new_entry
= vm_map_entry_create(map
);
1297 * Look for the first possible address; if there's already
1298 * something at this address, we have to start after it.
1303 assert(first_free_is_valid(map
));
1304 if ((entry
= map
->first_free
) == vm_map_to_entry(map
))
1305 start
= map
->min_offset
;
1307 start
= entry
->vme_end
;
1310 * In any case, the "entry" always precedes
1311 * the proposed new region throughout the loop:
1315 register vm_map_entry_t next
;
1318 * Find the end of the proposed new region.
1319 * Be sure we didn't go beyond the end, or
1320 * wrap around the address.
1323 if (flags
& VM_FLAGS_GUARD_BEFORE
) {
1324 /* reserve space for the front guard page */
1325 start
+= PAGE_SIZE_64
;
1327 end
= ((start
+ mask
) & ~mask
);
1330 vm_map_entry_dispose(map
, new_entry
);
1332 return(KERN_NO_SPACE
);
1337 if ((end
> map
->max_offset
) || (end
< start
)) {
1338 vm_map_entry_dispose(map
, new_entry
);
1340 return(KERN_NO_SPACE
);
1344 * If there are no more entries, we must win.
1347 next
= entry
->vme_next
;
1348 if (next
== vm_map_to_entry(map
))
1352 * If there is another entry, it must be
1353 * after the end of the potential new region.
1356 if (next
->vme_start
>= end
)
1360 * Didn't fit -- move to the next entry.
1364 start
= entry
->vme_end
;
1369 * "start" and "end" should define the endpoints of the
1370 * available new range, and
1371 * "entry" should refer to the region before the new
1374 * the map should be locked.
1377 if (flags
& VM_FLAGS_GUARD_BEFORE
) {
1378 /* go back for the front guard page */
1379 start
-= PAGE_SIZE_64
;
1383 new_entry
->vme_start
= start
;
1384 new_entry
->vme_end
= end
;
1385 assert(page_aligned(new_entry
->vme_start
));
1386 assert(page_aligned(new_entry
->vme_end
));
1388 new_entry
->is_shared
= FALSE
;
1389 new_entry
->is_sub_map
= FALSE
;
1390 new_entry
->use_pmap
= FALSE
;
1391 new_entry
->object
.vm_object
= VM_OBJECT_NULL
;
1392 new_entry
->offset
= (vm_object_offset_t
) 0;
1394 new_entry
->needs_copy
= FALSE
;
1396 new_entry
->inheritance
= VM_INHERIT_DEFAULT
;
1397 new_entry
->protection
= VM_PROT_DEFAULT
;
1398 new_entry
->max_protection
= VM_PROT_ALL
;
1399 new_entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
1400 new_entry
->wired_count
= 0;
1401 new_entry
->user_wired_count
= 0;
1403 new_entry
->in_transition
= FALSE
;
1404 new_entry
->needs_wakeup
= FALSE
;
1405 new_entry
->no_cache
= FALSE
;
1406 new_entry
->permanent
= FALSE
;
1407 new_entry
->superpage_size
= 0;
1409 new_entry
->alias
= 0;
1410 new_entry
->zero_wired_pages
= FALSE
;
1412 VM_GET_FLAGS_ALIAS(flags
, new_entry
->alias
);
1415 * Insert the new entry into the list
1418 vm_map_entry_link(map
, entry
, new_entry
);
1423 * Update the lookup hint
1425 SAVE_HINT_MAP_WRITE(map
, new_entry
);
1427 *o_entry
= new_entry
;
1428 return(KERN_SUCCESS
);
1431 int vm_map_pmap_enter_print
= FALSE
;
1432 int vm_map_pmap_enter_enable
= FALSE
;
1435 * Routine: vm_map_pmap_enter [internal only]
1438 * Force pages from the specified object to be entered into
1439 * the pmap at the specified address if they are present.
1440 * As soon as a page not found in the object the scan ends.
1445 * In/out conditions:
1446 * The source map should not be locked on entry.
1451 register vm_map_offset_t addr
,
1452 register vm_map_offset_t end_addr
,
1453 register vm_object_t object
,
1454 vm_object_offset_t offset
,
1455 vm_prot_t protection
)
1463 while (addr
< end_addr
) {
1464 register vm_page_t m
;
1466 vm_object_lock(object
);
1468 m
= vm_page_lookup(object
, offset
);
1471 * The user should never see encrypted data, so do not
1472 * enter an encrypted page in the page table.
1474 if (m
== VM_PAGE_NULL
|| m
->busy
|| m
->encrypted
||
1476 (m
->unusual
&& ( m
->error
|| m
->restart
|| m
->absent
))) {
1477 vm_object_unlock(object
);
1481 if (vm_map_pmap_enter_print
) {
1482 printf("vm_map_pmap_enter:");
1483 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1484 map
, (unsigned long long)addr
, object
, (unsigned long long)offset
);
1486 type_of_fault
= DBG_CACHE_HIT_FAULT
;
1487 kr
= vm_fault_enter(m
, map
->pmap
, addr
, protection
,
1488 VM_PAGE_WIRED(m
), FALSE
, FALSE
,
1491 vm_object_unlock(object
);
1493 offset
+= PAGE_SIZE_64
;
1498 boolean_t
vm_map_pmap_is_empty(
1500 vm_map_offset_t start
,
1501 vm_map_offset_t end
);
1502 boolean_t
vm_map_pmap_is_empty(
1504 vm_map_offset_t start
,
1505 vm_map_offset_t end
)
1507 #ifdef MACHINE_PMAP_IS_EMPTY
1508 return pmap_is_empty(map
->pmap
, start
, end
);
1509 #else /* MACHINE_PMAP_IS_EMPTY */
1510 vm_map_offset_t offset
;
1513 if (map
->pmap
== NULL
) {
1517 for (offset
= start
;
1519 offset
+= PAGE_SIZE
) {
1520 phys_page
= pmap_find_phys(map
->pmap
, offset
);
1522 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1523 "page %d at 0x%llx\n",
1524 map
, (long long)start
, (long long)end
,
1525 phys_page
, (long long)offset
);
1530 #endif /* MACHINE_PMAP_IS_EMPTY */
1534 * Routine: vm_map_enter
1537 * Allocate a range in the specified virtual address map.
1538 * The resulting range will refer to memory defined by
1539 * the given memory object and offset into that object.
1541 * Arguments are as defined in the vm_map call.
1543 int _map_enter_debug
= 0;
1544 static unsigned int vm_map_enter_restore_successes
= 0;
1545 static unsigned int vm_map_enter_restore_failures
= 0;
1549 vm_map_offset_t
*address
, /* IN/OUT */
1551 vm_map_offset_t mask
,
1554 vm_object_offset_t offset
,
1555 boolean_t needs_copy
,
1556 vm_prot_t cur_protection
,
1557 vm_prot_t max_protection
,
1558 vm_inherit_t inheritance
)
1560 vm_map_entry_t entry
, new_entry
;
1561 vm_map_offset_t start
, tmp_start
, tmp_offset
;
1562 vm_map_offset_t end
, tmp_end
;
1563 vm_map_offset_t tmp2_start
, tmp2_end
;
1564 vm_map_offset_t step
;
1565 kern_return_t result
= KERN_SUCCESS
;
1566 vm_map_t zap_old_map
= VM_MAP_NULL
;
1567 vm_map_t zap_new_map
= VM_MAP_NULL
;
1568 boolean_t map_locked
= FALSE
;
1569 boolean_t pmap_empty
= TRUE
;
1570 boolean_t new_mapping_established
= FALSE
;
1571 boolean_t anywhere
= ((flags
& VM_FLAGS_ANYWHERE
) != 0);
1572 boolean_t purgable
= ((flags
& VM_FLAGS_PURGABLE
) != 0);
1573 boolean_t overwrite
= ((flags
& VM_FLAGS_OVERWRITE
) != 0);
1574 boolean_t no_cache
= ((flags
& VM_FLAGS_NO_CACHE
) != 0);
1575 boolean_t is_submap
= ((flags
& VM_FLAGS_SUBMAP
) != 0);
1576 boolean_t permanent
= ((flags
& VM_FLAGS_PERMANENT
) != 0);
1577 unsigned int superpage_size
= ((flags
& VM_FLAGS_SUPERPAGE_MASK
) >> VM_FLAGS_SUPERPAGE_SHIFT
);
1579 vm_map_offset_t effective_min_offset
, effective_max_offset
;
1582 if (superpage_size
) {
1583 switch (superpage_size
) {
1585 * Note that the current implementation only supports
1586 * a single size for superpages, SUPERPAGE_SIZE, per
1587 * architecture. As soon as more sizes are supposed
1588 * to be supported, SUPERPAGE_SIZE has to be replaced
1589 * with a lookup of the size depending on superpage_size.
1592 case SUPERPAGE_SIZE_2MB
:
1596 return KERN_INVALID_ARGUMENT
;
1598 mask
= SUPERPAGE_SIZE
-1;
1599 if (size
& (SUPERPAGE_SIZE
-1))
1600 return KERN_INVALID_ARGUMENT
;
1601 inheritance
= VM_INHERIT_NONE
; /* fork() children won't inherit superpages */
1605 if (cur_protection
& VM_PROT_WRITE
) {
1606 if (cur_protection
& VM_PROT_EXECUTE
) {
1607 printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__
);
1608 cur_protection
&= ~VM_PROT_EXECUTE
;
1611 #endif /* CONFIG_EMBEDDED */
1615 /* submaps can not be purgeable */
1616 return KERN_INVALID_ARGUMENT
;
1618 if (object
== VM_OBJECT_NULL
) {
1619 /* submaps can not be created lazily */
1620 return KERN_INVALID_ARGUMENT
;
1623 if (flags
& VM_FLAGS_ALREADY
) {
1625 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1626 * is already present. For it to be meaningul, the requested
1627 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1628 * we shouldn't try and remove what was mapped there first
1629 * (!VM_FLAGS_OVERWRITE).
1631 if ((flags
& VM_FLAGS_ANYWHERE
) ||
1632 (flags
& VM_FLAGS_OVERWRITE
)) {
1633 return KERN_INVALID_ARGUMENT
;
1637 if (flags
& VM_FLAGS_BELOW_MIN
) {
1639 * Allow an insertion below the map's min offset.
1641 effective_min_offset
= 0ULL;
1643 effective_min_offset
= map
->min_offset
;
1646 if (flags
& VM_FLAGS_BEYOND_MAX
) {
1648 * Allow an insertion beyond the map's max offset.
1650 if (vm_map_is_64bit(map
))
1651 effective_max_offset
= 0xFFFFFFFFFFFFF000ULL
;
1653 effective_max_offset
= 0x00000000FFFFF000ULL
;
1655 effective_max_offset
= map
->max_offset
;
1659 (offset
& PAGE_MASK_64
) != 0) {
1661 return KERN_INVALID_ARGUMENT
;
1664 VM_GET_FLAGS_ALIAS(flags
, alias
);
1666 #define RETURN(value) { result = value; goto BailOut; }
1668 assert(page_aligned(*address
));
1669 assert(page_aligned(size
));
1672 * Only zero-fill objects are allowed to be purgable.
1673 * LP64todo - limit purgable objects to 32-bits for now
1677 (object
!= VM_OBJECT_NULL
&&
1678 (object
->size
!= size
||
1679 object
->purgable
== VM_PURGABLE_DENY
))
1680 || size
> ANON_MAX_SIZE
)) /* LP64todo: remove when dp capable */
1681 return KERN_INVALID_ARGUMENT
;
1683 if (!anywhere
&& overwrite
) {
1685 * Create a temporary VM map to hold the old mappings in the
1686 * affected area while we create the new one.
1687 * This avoids releasing the VM map lock in
1688 * vm_map_entry_delete() and allows atomicity
1689 * when we want to replace some mappings with a new one.
1690 * It also allows us to restore the old VM mappings if the
1691 * new mapping fails.
1693 zap_old_map
= vm_map_create(PMAP_NULL
,
1696 map
->hdr
.entries_pageable
);
1708 * Calculate the first possible address.
1711 if (start
< effective_min_offset
)
1712 start
= effective_min_offset
;
1713 if (start
> effective_max_offset
)
1714 RETURN(KERN_NO_SPACE
);
1717 * Look for the first possible address;
1718 * if there's already something at this
1719 * address, we have to start after it.
1722 assert(first_free_is_valid(map
));
1723 if (start
== effective_min_offset
) {
1724 if ((entry
= map
->first_free
) != vm_map_to_entry(map
))
1725 start
= entry
->vme_end
;
1727 vm_map_entry_t tmp_entry
;
1728 if (vm_map_lookup_entry(map
, start
, &tmp_entry
))
1729 start
= tmp_entry
->vme_end
;
1734 * In any case, the "entry" always precedes
1735 * the proposed new region throughout the
1740 register vm_map_entry_t next
;
1743 * Find the end of the proposed new region.
1744 * Be sure we didn't go beyond the end, or
1745 * wrap around the address.
1748 end
= ((start
+ mask
) & ~mask
);
1750 RETURN(KERN_NO_SPACE
);
1754 if ((end
> effective_max_offset
) || (end
< start
)) {
1755 if (map
->wait_for_space
) {
1756 if (size
<= (effective_max_offset
-
1757 effective_min_offset
)) {
1758 assert_wait((event_t
)map
,
1762 thread_block(THREAD_CONTINUE_NULL
);
1766 RETURN(KERN_NO_SPACE
);
1770 * If there are no more entries, we must win.
1773 next
= entry
->vme_next
;
1774 if (next
== vm_map_to_entry(map
))
1778 * If there is another entry, it must be
1779 * after the end of the potential new region.
1782 if (next
->vme_start
>= end
)
1786 * Didn't fit -- move to the next entry.
1790 start
= entry
->vme_end
;
1796 * the address doesn't itself violate
1797 * the mask requirement.
1802 if ((start
& mask
) != 0)
1803 RETURN(KERN_NO_SPACE
);
1806 * ... the address is within bounds
1811 if ((start
< effective_min_offset
) ||
1812 (end
> effective_max_offset
) ||
1814 RETURN(KERN_INVALID_ADDRESS
);
1817 if (overwrite
&& zap_old_map
!= VM_MAP_NULL
) {
1819 * Fixed mapping and "overwrite" flag: attempt to
1820 * remove all existing mappings in the specified
1821 * address range, saving them in our "zap_old_map".
1823 (void) vm_map_delete(map
, start
, end
,
1824 VM_MAP_REMOVE_SAVE_ENTRIES
,
1829 * ... the starting address isn't allocated
1832 if (vm_map_lookup_entry(map
, start
, &entry
)) {
1833 if (! (flags
& VM_FLAGS_ALREADY
)) {
1834 RETURN(KERN_NO_SPACE
);
1837 * Check if what's already there is what we want.
1840 tmp_offset
= offset
;
1841 if (entry
->vme_start
< start
) {
1842 tmp_start
-= start
- entry
->vme_start
;
1843 tmp_offset
-= start
- entry
->vme_start
;
1846 for (; entry
->vme_start
< end
;
1847 entry
= entry
->vme_next
) {
1849 * Check if the mapping's attributes
1850 * match the existing map entry.
1852 if (entry
== vm_map_to_entry(map
) ||
1853 entry
->vme_start
!= tmp_start
||
1854 entry
->is_sub_map
!= is_submap
||
1855 entry
->offset
!= tmp_offset
||
1856 entry
->needs_copy
!= needs_copy
||
1857 entry
->protection
!= cur_protection
||
1858 entry
->max_protection
!= max_protection
||
1859 entry
->inheritance
!= inheritance
||
1860 entry
->alias
!= alias
) {
1861 /* not the same mapping ! */
1862 RETURN(KERN_NO_SPACE
);
1865 * Check if the same object is being mapped.
1868 if (entry
->object
.sub_map
!=
1869 (vm_map_t
) object
) {
1870 /* not the same submap */
1871 RETURN(KERN_NO_SPACE
);
1874 if (entry
->object
.vm_object
!= object
) {
1875 /* not the same VM object... */
1878 obj2
= entry
->object
.vm_object
;
1879 if ((obj2
== VM_OBJECT_NULL
||
1881 (object
== VM_OBJECT_NULL
||
1882 object
->internal
)) {
1889 RETURN(KERN_NO_SPACE
);
1894 tmp_offset
+= entry
->vme_end
- entry
->vme_start
;
1895 tmp_start
+= entry
->vme_end
- entry
->vme_start
;
1896 if (entry
->vme_end
>= end
) {
1897 /* reached the end of our mapping */
1901 /* it all matches: let's use what's already there ! */
1902 RETURN(KERN_MEMORY_PRESENT
);
1906 * ... the next region doesn't overlap the
1910 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
1911 (entry
->vme_next
->vme_start
< end
))
1912 RETURN(KERN_NO_SPACE
);
1917 * "start" and "end" should define the endpoints of the
1918 * available new range, and
1919 * "entry" should refer to the region before the new
1922 * the map should be locked.
1926 * See whether we can avoid creating a new entry (and object) by
1927 * extending one of our neighbors. [So far, we only attempt to
1928 * extend from below.] Note that we can never extend/join
1929 * purgable objects because they need to remain distinct
1930 * entities in order to implement their "volatile object"
1935 if (object
== VM_OBJECT_NULL
) {
1936 object
= vm_object_allocate(size
);
1937 object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
1938 object
->purgable
= VM_PURGABLE_NONVOLATILE
;
1939 offset
= (vm_object_offset_t
)0;
1941 } else if ((is_submap
== FALSE
) &&
1942 (object
== VM_OBJECT_NULL
) &&
1943 (entry
!= vm_map_to_entry(map
)) &&
1944 (entry
->vme_end
== start
) &&
1945 (!entry
->is_shared
) &&
1946 (!entry
->is_sub_map
) &&
1947 (entry
->alias
== alias
) &&
1948 (entry
->inheritance
== inheritance
) &&
1949 (entry
->protection
== cur_protection
) &&
1950 (entry
->max_protection
== max_protection
) &&
1951 (entry
->behavior
== VM_BEHAVIOR_DEFAULT
) &&
1952 (entry
->in_transition
== 0) &&
1953 (entry
->no_cache
== no_cache
) &&
1954 ((entry
->vme_end
- entry
->vme_start
) + size
<=
1955 (alias
== VM_MEMORY_REALLOC
?
1957 NO_COALESCE_LIMIT
)) &&
1958 (entry
->wired_count
== 0)) { /* implies user_wired_count == 0 */
1959 if (vm_object_coalesce(entry
->object
.vm_object
,
1962 (vm_object_offset_t
) 0,
1963 (vm_map_size_t
)(entry
->vme_end
- entry
->vme_start
),
1964 (vm_map_size_t
)(end
- entry
->vme_end
))) {
1967 * Coalesced the two objects - can extend
1968 * the previous map entry to include the
1971 map
->size
+= (end
- entry
->vme_end
);
1972 entry
->vme_end
= end
;
1973 UPDATE_FIRST_FREE(map
, map
->first_free
);
1974 RETURN(KERN_SUCCESS
);
1978 step
= superpage_size
? SUPERPAGE_SIZE
: (end
- start
);
1981 for (tmp2_start
= start
; tmp2_start
<end
; tmp2_start
+= step
) {
1982 tmp2_end
= tmp2_start
+ step
;
1984 * Create a new entry
1985 * LP64todo - for now, we can only allocate 4GB internal objects
1986 * because the default pager can't page bigger ones. Remove this
1990 * The reserved "page zero" in each process's address space can
1991 * be arbitrarily large. Splitting it into separate 4GB objects and
1992 * therefore different VM map entries serves no purpose and just
1993 * slows down operations on the VM map, so let's not split the
1994 * allocation into 4GB chunks if the max protection is NONE. That
1995 * memory should never be accessible, so it will never get to the
1998 tmp_start
= tmp2_start
;
1999 if (object
== VM_OBJECT_NULL
&&
2000 size
> (vm_map_size_t
)ANON_CHUNK_SIZE
&&
2001 max_protection
!= VM_PROT_NONE
&&
2002 superpage_size
== 0)
2003 tmp_end
= tmp_start
+ (vm_map_size_t
)ANON_CHUNK_SIZE
;
2007 new_entry
= vm_map_entry_insert(map
, entry
, tmp_start
, tmp_end
,
2008 object
, offset
, needs_copy
,
2010 cur_protection
, max_protection
,
2011 VM_BEHAVIOR_DEFAULT
,
2012 inheritance
, 0, no_cache
,
2013 permanent
, superpage_size
);
2014 new_entry
->alias
= alias
;
2017 boolean_t submap_is_64bit
;
2020 new_entry
->is_sub_map
= TRUE
;
2021 submap
= (vm_map_t
) object
;
2022 submap_is_64bit
= vm_map_is_64bit(submap
);
2023 use_pmap
= (alias
== VM_MEMORY_SHARED_PMAP
);
2024 #ifndef NO_NESTED_PMAP
2025 if (use_pmap
&& submap
->pmap
== NULL
) {
2026 /* we need a sub pmap to nest... */
2027 submap
->pmap
= pmap_create(0, submap_is_64bit
);
2028 if (submap
->pmap
== NULL
) {
2029 /* let's proceed without nesting... */
2032 if (use_pmap
&& submap
->pmap
!= NULL
) {
2033 kr
= pmap_nest(map
->pmap
,
2037 tmp_end
- tmp_start
);
2038 if (kr
!= KERN_SUCCESS
) {
2039 printf("vm_map_enter: "
2040 "pmap_nest(0x%llx,0x%llx) "
2042 (long long)tmp_start
,
2046 /* we're now nested ! */
2047 new_entry
->use_pmap
= TRUE
;
2051 #endif /* NO_NESTED_PMAP */
2055 if (superpage_size
) {
2057 vm_object_t sp_object
;
2061 /* allocate one superpage */
2062 kr
= cpm_allocate(SUPERPAGE_SIZE
, &pages
, 0, SUPERPAGE_NBASEPAGES
-1, TRUE
, 0);
2063 if (kr
!= KERN_SUCCESS
) {
2064 new_mapping_established
= TRUE
; /* will cause deallocation of whole range */
2068 /* create one vm_object per superpage */
2069 sp_object
= vm_object_allocate((vm_map_size_t
)(entry
->vme_end
- entry
->vme_start
));
2070 sp_object
->phys_contiguous
= TRUE
;
2071 sp_object
->shadow_offset
= (vm_object_offset_t
)pages
->phys_page
*PAGE_SIZE
;
2072 entry
->object
.vm_object
= sp_object
;
2074 /* enter the base pages into the object */
2075 vm_object_lock(sp_object
);
2076 for (offset
= 0; offset
< SUPERPAGE_SIZE
; offset
+= PAGE_SIZE
) {
2078 pmap_zero_page(m
->phys_page
);
2079 pages
= NEXT_PAGE(m
);
2080 *(NEXT_PAGE_PTR(m
)) = VM_PAGE_NULL
;
2081 vm_page_insert(m
, sp_object
, offset
);
2083 vm_object_unlock(sp_object
);
2085 } while (tmp_end
!= tmp2_end
&&
2086 (tmp_start
= tmp_end
) &&
2087 (tmp_end
= (tmp2_end
- tmp_end
> (vm_map_size_t
)ANON_CHUNK_SIZE
) ?
2088 tmp_end
+ (vm_map_size_t
)ANON_CHUNK_SIZE
: tmp2_end
));
2094 new_mapping_established
= TRUE
;
2096 /* Wire down the new entry if the user
2097 * requested all new map entries be wired.
2099 if ((map
->wiring_required
)||(superpage_size
)) {
2100 pmap_empty
= FALSE
; /* pmap won't be empty */
2101 result
= vm_map_wire(map
, start
, end
,
2102 new_entry
->protection
, TRUE
);
2106 if ((object
!= VM_OBJECT_NULL
) &&
2107 (vm_map_pmap_enter_enable
) &&
2110 (size
< (128*1024))) {
2111 pmap_empty
= FALSE
; /* pmap won't be empty */
2113 if (override_nx(map
, alias
) && cur_protection
)
2114 cur_protection
|= VM_PROT_EXECUTE
;
2116 vm_map_pmap_enter(map
, start
, end
,
2117 object
, offset
, cur_protection
);
2121 if (result
== KERN_SUCCESS
) {
2122 vm_prot_t pager_prot
;
2123 memory_object_t pager
;
2126 !(flags
& VM_FLAGS_NO_PMAP_CHECK
)) {
2127 assert(vm_map_pmap_is_empty(map
,
2133 * For "named" VM objects, let the pager know that the
2134 * memory object is being mapped. Some pagers need to keep
2135 * track of this, to know when they can reclaim the memory
2136 * object, for example.
2137 * VM calls memory_object_map() for each mapping (specifying
2138 * the protection of each mapping) and calls
2139 * memory_object_last_unmap() when all the mappings are gone.
2141 pager_prot
= max_protection
;
2144 * Copy-On-Write mapping: won't modify
2145 * the memory object.
2147 pager_prot
&= ~VM_PROT_WRITE
;
2150 object
!= VM_OBJECT_NULL
&&
2152 object
->pager
!= MEMORY_OBJECT_NULL
) {
2153 vm_object_lock(object
);
2154 pager
= object
->pager
;
2155 if (object
->named
&&
2156 pager
!= MEMORY_OBJECT_NULL
) {
2157 assert(object
->pager_ready
);
2158 vm_object_mapping_wait(object
, THREAD_UNINT
);
2159 vm_object_mapping_begin(object
);
2160 vm_object_unlock(object
);
2162 kr
= memory_object_map(pager
, pager_prot
);
2163 assert(kr
== KERN_SUCCESS
);
2165 vm_object_lock(object
);
2166 vm_object_mapping_end(object
);
2168 vm_object_unlock(object
);
2171 if (new_mapping_established
) {
2173 * We have to get rid of the new mappings since we
2174 * won't make them available to the user.
2175 * Try and do that atomically, to minimize the risk
2176 * that someone else create new mappings that range.
2178 zap_new_map
= vm_map_create(PMAP_NULL
,
2181 map
->hdr
.entries_pageable
);
2186 (void) vm_map_delete(map
, *address
, *address
+size
,
2187 VM_MAP_REMOVE_SAVE_ENTRIES
,
2190 if (zap_old_map
!= VM_MAP_NULL
&&
2191 zap_old_map
->hdr
.nentries
!= 0) {
2192 vm_map_entry_t entry1
, entry2
;
2195 * The new mapping failed. Attempt to restore
2196 * the old mappings, saved in the "zap_old_map".
2203 /* first check if the coast is still clear */
2204 start
= vm_map_first_entry(zap_old_map
)->vme_start
;
2205 end
= vm_map_last_entry(zap_old_map
)->vme_end
;
2206 if (vm_map_lookup_entry(map
, start
, &entry1
) ||
2207 vm_map_lookup_entry(map
, end
, &entry2
) ||
2210 * Part of that range has already been
2211 * re-mapped: we can't restore the old
2214 vm_map_enter_restore_failures
++;
2217 * Transfer the saved map entries from
2218 * "zap_old_map" to the original "map",
2219 * inserting them all after "entry1".
2221 for (entry2
= vm_map_first_entry(zap_old_map
);
2222 entry2
!= vm_map_to_entry(zap_old_map
);
2223 entry2
= vm_map_first_entry(zap_old_map
)) {
2224 vm_map_size_t entry_size
;
2226 entry_size
= (entry2
->vme_end
-
2228 vm_map_entry_unlink(zap_old_map
,
2230 zap_old_map
->size
-= entry_size
;
2231 vm_map_entry_link(map
, entry1
, entry2
);
2232 map
->size
+= entry_size
;
2235 if (map
->wiring_required
) {
2237 * XXX TODO: we should rewire the
2241 vm_map_enter_restore_successes
++;
2251 * Get rid of the "zap_maps" and all the map entries that
2252 * they may still contain.
2254 if (zap_old_map
!= VM_MAP_NULL
) {
2255 vm_map_destroy(zap_old_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
2256 zap_old_map
= VM_MAP_NULL
;
2258 if (zap_new_map
!= VM_MAP_NULL
) {
2259 vm_map_destroy(zap_new_map
, VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
2260 zap_new_map
= VM_MAP_NULL
;
2269 vm_map_enter_mem_object(
2270 vm_map_t target_map
,
2271 vm_map_offset_t
*address
,
2272 vm_map_size_t initial_size
,
2273 vm_map_offset_t mask
,
2276 vm_object_offset_t offset
,
2278 vm_prot_t cur_protection
,
2279 vm_prot_t max_protection
,
2280 vm_inherit_t inheritance
)
2282 vm_map_address_t map_addr
;
2283 vm_map_size_t map_size
;
2285 vm_object_size_t size
;
2286 kern_return_t result
;
2289 * Check arguments for validity
2291 if ((target_map
== VM_MAP_NULL
) ||
2292 (cur_protection
& ~VM_PROT_ALL
) ||
2293 (max_protection
& ~VM_PROT_ALL
) ||
2294 (inheritance
> VM_INHERIT_LAST_VALID
) ||
2296 return KERN_INVALID_ARGUMENT
;
2298 map_addr
= vm_map_trunc_page(*address
);
2299 map_size
= vm_map_round_page(initial_size
);
2300 size
= vm_object_round_page(initial_size
);
2303 * Find the vm object (if any) corresponding to this port.
2305 if (!IP_VALID(port
)) {
2306 object
= VM_OBJECT_NULL
;
2309 } else if (ip_kotype(port
) == IKOT_NAMED_ENTRY
) {
2310 vm_named_entry_t named_entry
;
2312 named_entry
= (vm_named_entry_t
) port
->ip_kobject
;
2313 /* a few checks to make sure user is obeying rules */
2315 if (offset
>= named_entry
->size
)
2316 return KERN_INVALID_RIGHT
;
2317 size
= named_entry
->size
- offset
;
2319 if ((named_entry
->protection
& max_protection
) !=
2321 return KERN_INVALID_RIGHT
;
2322 if ((named_entry
->protection
& cur_protection
) !=
2324 return KERN_INVALID_RIGHT
;
2325 if (named_entry
->size
< (offset
+ size
))
2326 return KERN_INVALID_ARGUMENT
;
2328 /* the callers parameter offset is defined to be the */
2329 /* offset from beginning of named entry offset in object */
2330 offset
= offset
+ named_entry
->offset
;
2332 named_entry_lock(named_entry
);
2333 if (named_entry
->is_sub_map
) {
2336 submap
= named_entry
->backing
.map
;
2337 vm_map_lock(submap
);
2338 vm_map_reference(submap
);
2339 vm_map_unlock(submap
);
2340 named_entry_unlock(named_entry
);
2342 result
= vm_map_enter(target_map
,
2346 flags
| VM_FLAGS_SUBMAP
,
2347 (vm_object_t
) submap
,
2353 if (result
!= KERN_SUCCESS
) {
2354 vm_map_deallocate(submap
);
2357 * No need to lock "submap" just to check its
2358 * "mapped" flag: that flag is never reset
2359 * once it's been set and if we race, we'll
2360 * just end up setting it twice, which is OK.
2362 if (submap
->mapped
== FALSE
) {
2364 * This submap has never been mapped.
2365 * Set its "mapped" flag now that it
2367 * This happens only for the first ever
2368 * mapping of a "submap".
2370 vm_map_lock(submap
);
2371 submap
->mapped
= TRUE
;
2372 vm_map_unlock(submap
);
2374 *address
= map_addr
;
2378 } else if (named_entry
->is_pager
) {
2379 unsigned int access
;
2380 vm_prot_t protections
;
2381 unsigned int wimg_mode
;
2382 boolean_t cache_attr
;
2384 protections
= named_entry
->protection
& VM_PROT_ALL
;
2385 access
= GET_MAP_MEM(named_entry
->protection
);
2387 object
= vm_object_enter(named_entry
->backing
.pager
,
2389 named_entry
->internal
,
2392 if (object
== VM_OBJECT_NULL
) {
2393 named_entry_unlock(named_entry
);
2394 return KERN_INVALID_OBJECT
;
2397 /* JMM - drop reference on pager here */
2399 /* create an extra ref for the named entry */
2400 vm_object_lock(object
);
2401 vm_object_reference_locked(object
);
2402 named_entry
->backing
.object
= object
;
2403 named_entry
->is_pager
= FALSE
;
2404 named_entry_unlock(named_entry
);
2406 wimg_mode
= object
->wimg_bits
;
2407 if (access
== MAP_MEM_IO
) {
2408 wimg_mode
= VM_WIMG_IO
;
2409 } else if (access
== MAP_MEM_COPYBACK
) {
2410 wimg_mode
= VM_WIMG_USE_DEFAULT
;
2411 } else if (access
== MAP_MEM_WTHRU
) {
2412 wimg_mode
= VM_WIMG_WTHRU
;
2413 } else if (access
== MAP_MEM_WCOMB
) {
2414 wimg_mode
= VM_WIMG_WCOMB
;
2416 if (wimg_mode
== VM_WIMG_IO
||
2417 wimg_mode
== VM_WIMG_WCOMB
)
2422 /* wait for object (if any) to be ready */
2423 if (!named_entry
->internal
) {
2424 while (!object
->pager_ready
) {
2427 VM_OBJECT_EVENT_PAGER_READY
,
2429 vm_object_lock(object
);
2433 if (object
->wimg_bits
!= wimg_mode
) {
2436 vm_object_paging_wait(object
, THREAD_UNINT
);
2438 object
->wimg_bits
= wimg_mode
;
2439 queue_iterate(&object
->memq
, p
, vm_page_t
, listq
) {
2440 if (!p
->fictitious
) {
2442 pmap_disconnect(p
->phys_page
);
2444 pmap_sync_page_attributes_phys(p
->phys_page
);
2448 object
->true_share
= TRUE
;
2449 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
)
2450 object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
2451 vm_object_unlock(object
);
2453 /* This is the case where we are going to map */
2454 /* an already mapped object. If the object is */
2455 /* not ready it is internal. An external */
2456 /* object cannot be mapped until it is ready */
2457 /* we can therefore avoid the ready check */
2459 object
= named_entry
->backing
.object
;
2460 assert(object
!= VM_OBJECT_NULL
);
2461 named_entry_unlock(named_entry
);
2462 vm_object_reference(object
);
2464 } else if (ip_kotype(port
) == IKOT_MEMORY_OBJECT
) {
2466 * JMM - This is temporary until we unify named entries
2467 * and raw memory objects.
2469 * Detected fake ip_kotype for a memory object. In
2470 * this case, the port isn't really a port at all, but
2471 * instead is just a raw memory object.
2474 object
= vm_object_enter((memory_object_t
)port
,
2475 size
, FALSE
, FALSE
, FALSE
);
2476 if (object
== VM_OBJECT_NULL
)
2477 return KERN_INVALID_OBJECT
;
2479 /* wait for object (if any) to be ready */
2480 if (object
!= VM_OBJECT_NULL
) {
2481 if (object
== kernel_object
) {
2482 printf("Warning: Attempt to map kernel object"
2483 " by a non-private kernel entity\n");
2484 return KERN_INVALID_OBJECT
;
2486 if (!object
->pager_ready
) {
2487 vm_object_lock(object
);
2489 while (!object
->pager_ready
) {
2490 vm_object_wait(object
,
2491 VM_OBJECT_EVENT_PAGER_READY
,
2493 vm_object_lock(object
);
2495 vm_object_unlock(object
);
2499 return KERN_INVALID_OBJECT
;
2502 if (object
!= VM_OBJECT_NULL
&&
2504 object
->pager
!= MEMORY_OBJECT_NULL
&&
2505 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
2506 memory_object_t pager
;
2507 vm_prot_t pager_prot
;
2511 * For "named" VM objects, let the pager know that the
2512 * memory object is being mapped. Some pagers need to keep
2513 * track of this, to know when they can reclaim the memory
2514 * object, for example.
2515 * VM calls memory_object_map() for each mapping (specifying
2516 * the protection of each mapping) and calls
2517 * memory_object_last_unmap() when all the mappings are gone.
2519 pager_prot
= max_protection
;
2522 * Copy-On-Write mapping: won't modify the
2525 pager_prot
&= ~VM_PROT_WRITE
;
2527 vm_object_lock(object
);
2528 pager
= object
->pager
;
2529 if (object
->named
&&
2530 pager
!= MEMORY_OBJECT_NULL
&&
2531 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
2532 assert(object
->pager_ready
);
2533 vm_object_mapping_wait(object
, THREAD_UNINT
);
2534 vm_object_mapping_begin(object
);
2535 vm_object_unlock(object
);
2537 kr
= memory_object_map(pager
, pager_prot
);
2538 assert(kr
== KERN_SUCCESS
);
2540 vm_object_lock(object
);
2541 vm_object_mapping_end(object
);
2543 vm_object_unlock(object
);
2547 * Perform the copy if requested
2551 vm_object_t new_object
;
2552 vm_object_offset_t new_offset
;
2554 result
= vm_object_copy_strategically(object
, offset
, size
,
2555 &new_object
, &new_offset
,
2559 if (result
== KERN_MEMORY_RESTART_COPY
) {
2561 boolean_t src_needs_copy
;
2565 * We currently ignore src_needs_copy.
2566 * This really is the issue of how to make
2567 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2568 * non-kernel users to use. Solution forthcoming.
2569 * In the meantime, since we don't allow non-kernel
2570 * memory managers to specify symmetric copy,
2571 * we won't run into problems here.
2573 new_object
= object
;
2574 new_offset
= offset
;
2575 success
= vm_object_copy_quickly(&new_object
,
2580 result
= KERN_SUCCESS
;
2583 * Throw away the reference to the
2584 * original object, as it won't be mapped.
2587 vm_object_deallocate(object
);
2589 if (result
!= KERN_SUCCESS
)
2592 object
= new_object
;
2593 offset
= new_offset
;
2596 result
= vm_map_enter(target_map
,
2597 &map_addr
, map_size
,
2598 (vm_map_offset_t
)mask
,
2602 cur_protection
, max_protection
, inheritance
);
2603 if (result
!= KERN_SUCCESS
)
2604 vm_object_deallocate(object
);
2605 *address
= map_addr
;
2613 vm_map_enter_mem_object_control(
2614 vm_map_t target_map
,
2615 vm_map_offset_t
*address
,
2616 vm_map_size_t initial_size
,
2617 vm_map_offset_t mask
,
2619 memory_object_control_t control
,
2620 vm_object_offset_t offset
,
2622 vm_prot_t cur_protection
,
2623 vm_prot_t max_protection
,
2624 vm_inherit_t inheritance
)
2626 vm_map_address_t map_addr
;
2627 vm_map_size_t map_size
;
2629 vm_object_size_t size
;
2630 kern_return_t result
;
2631 memory_object_t pager
;
2632 vm_prot_t pager_prot
;
2636 * Check arguments for validity
2638 if ((target_map
== VM_MAP_NULL
) ||
2639 (cur_protection
& ~VM_PROT_ALL
) ||
2640 (max_protection
& ~VM_PROT_ALL
) ||
2641 (inheritance
> VM_INHERIT_LAST_VALID
) ||
2643 return KERN_INVALID_ARGUMENT
;
2645 map_addr
= vm_map_trunc_page(*address
);
2646 map_size
= vm_map_round_page(initial_size
);
2647 size
= vm_object_round_page(initial_size
);
2649 object
= memory_object_control_to_vm_object(control
);
2651 if (object
== VM_OBJECT_NULL
)
2652 return KERN_INVALID_OBJECT
;
2654 if (object
== kernel_object
) {
2655 printf("Warning: Attempt to map kernel object"
2656 " by a non-private kernel entity\n");
2657 return KERN_INVALID_OBJECT
;
2660 vm_object_lock(object
);
2661 object
->ref_count
++;
2662 vm_object_res_reference(object
);
2665 * For "named" VM objects, let the pager know that the
2666 * memory object is being mapped. Some pagers need to keep
2667 * track of this, to know when they can reclaim the memory
2668 * object, for example.
2669 * VM calls memory_object_map() for each mapping (specifying
2670 * the protection of each mapping) and calls
2671 * memory_object_last_unmap() when all the mappings are gone.
2673 pager_prot
= max_protection
;
2675 pager_prot
&= ~VM_PROT_WRITE
;
2677 pager
= object
->pager
;
2678 if (object
->named
&&
2679 pager
!= MEMORY_OBJECT_NULL
&&
2680 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
) {
2681 assert(object
->pager_ready
);
2682 vm_object_mapping_wait(object
, THREAD_UNINT
);
2683 vm_object_mapping_begin(object
);
2684 vm_object_unlock(object
);
2686 kr
= memory_object_map(pager
, pager_prot
);
2687 assert(kr
== KERN_SUCCESS
);
2689 vm_object_lock(object
);
2690 vm_object_mapping_end(object
);
2692 vm_object_unlock(object
);
2695 * Perform the copy if requested
2699 vm_object_t new_object
;
2700 vm_object_offset_t new_offset
;
2702 result
= vm_object_copy_strategically(object
, offset
, size
,
2703 &new_object
, &new_offset
,
2707 if (result
== KERN_MEMORY_RESTART_COPY
) {
2709 boolean_t src_needs_copy
;
2713 * We currently ignore src_needs_copy.
2714 * This really is the issue of how to make
2715 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2716 * non-kernel users to use. Solution forthcoming.
2717 * In the meantime, since we don't allow non-kernel
2718 * memory managers to specify symmetric copy,
2719 * we won't run into problems here.
2721 new_object
= object
;
2722 new_offset
= offset
;
2723 success
= vm_object_copy_quickly(&new_object
,
2728 result
= KERN_SUCCESS
;
2731 * Throw away the reference to the
2732 * original object, as it won't be mapped.
2735 vm_object_deallocate(object
);
2737 if (result
!= KERN_SUCCESS
)
2740 object
= new_object
;
2741 offset
= new_offset
;
2744 result
= vm_map_enter(target_map
,
2745 &map_addr
, map_size
,
2746 (vm_map_offset_t
)mask
,
2750 cur_protection
, max_protection
, inheritance
);
2751 if (result
!= KERN_SUCCESS
)
2752 vm_object_deallocate(object
);
2753 *address
= map_addr
;
2762 extern pmap_paddr_t avail_start
, avail_end
;
2766 * Allocate memory in the specified map, with the caveat that
2767 * the memory is physically contiguous. This call may fail
2768 * if the system can't find sufficient contiguous memory.
2769 * This call may cause or lead to heart-stopping amounts of
2772 * Memory obtained from this call should be freed in the
2773 * normal way, viz., via vm_deallocate.
2778 vm_map_offset_t
*addr
,
2782 vm_object_t cpm_obj
;
2786 vm_map_offset_t va
, start
, end
, offset
;
2788 vm_map_offset_t prev_addr
;
2789 #endif /* MACH_ASSERT */
2791 boolean_t anywhere
= ((VM_FLAGS_ANYWHERE
& flags
) != 0);
2793 if (!vm_allocate_cpm_enabled
)
2794 return KERN_FAILURE
;
2798 return KERN_SUCCESS
;
2801 *addr
= vm_map_min(map
);
2803 *addr
= vm_map_trunc_page(*addr
);
2804 size
= vm_map_round_page(size
);
2807 * LP64todo - cpm_allocate should probably allow
2808 * allocations of >4GB, but not with the current
2809 * algorithm, so just cast down the size for now.
2811 if (size
> VM_MAX_ADDRESS
)
2812 return KERN_RESOURCE_SHORTAGE
;
2813 if ((kr
= cpm_allocate(CAST_DOWN(vm_size_t
, size
),
2814 &pages
, 0, 0, TRUE
, flags
)) != KERN_SUCCESS
)
2817 cpm_obj
= vm_object_allocate((vm_object_size_t
)size
);
2818 assert(cpm_obj
!= VM_OBJECT_NULL
);
2819 assert(cpm_obj
->internal
);
2820 assert(cpm_obj
->size
== (vm_object_size_t
)size
);
2821 assert(cpm_obj
->can_persist
== FALSE
);
2822 assert(cpm_obj
->pager_created
== FALSE
);
2823 assert(cpm_obj
->pageout
== FALSE
);
2824 assert(cpm_obj
->shadow
== VM_OBJECT_NULL
);
2827 * Insert pages into object.
2830 vm_object_lock(cpm_obj
);
2831 for (offset
= 0; offset
< size
; offset
+= PAGE_SIZE
) {
2833 pages
= NEXT_PAGE(m
);
2834 *(NEXT_PAGE_PTR(m
)) = VM_PAGE_NULL
;
2836 assert(!m
->gobbled
);
2838 assert(!m
->pageout
);
2840 assert(VM_PAGE_WIRED(m
));
2843 * "m" is not supposed to be pageable, so it
2844 * should not be encrypted. It wouldn't be safe
2845 * to enter it in a new VM object while encrypted.
2847 ASSERT_PAGE_DECRYPTED(m
);
2849 assert(m
->phys_page
>=(avail_start
>>PAGE_SHIFT
) && m
->phys_page
<=(avail_end
>>PAGE_SHIFT
));
2852 vm_page_insert(m
, cpm_obj
, offset
);
2854 assert(cpm_obj
->resident_page_count
== size
/ PAGE_SIZE
);
2855 vm_object_unlock(cpm_obj
);
2858 * Hang onto a reference on the object in case a
2859 * multi-threaded application for some reason decides
2860 * to deallocate the portion of the address space into
2861 * which we will insert this object.
2863 * Unfortunately, we must insert the object now before
2864 * we can talk to the pmap module about which addresses
2865 * must be wired down. Hence, the race with a multi-
2868 vm_object_reference(cpm_obj
);
2871 * Insert object into map.
2881 (vm_object_offset_t
)0,
2885 VM_INHERIT_DEFAULT
);
2887 if (kr
!= KERN_SUCCESS
) {
2889 * A CPM object doesn't have can_persist set,
2890 * so all we have to do is deallocate it to
2891 * free up these pages.
2893 assert(cpm_obj
->pager_created
== FALSE
);
2894 assert(cpm_obj
->can_persist
== FALSE
);
2895 assert(cpm_obj
->pageout
== FALSE
);
2896 assert(cpm_obj
->shadow
== VM_OBJECT_NULL
);
2897 vm_object_deallocate(cpm_obj
); /* kill acquired ref */
2898 vm_object_deallocate(cpm_obj
); /* kill creation ref */
2902 * Inform the physical mapping system that the
2903 * range of addresses may not fault, so that
2904 * page tables and such can be locked down as well.
2908 pmap
= vm_map_pmap(map
);
2909 pmap_pageable(pmap
, start
, end
, FALSE
);
2912 * Enter each page into the pmap, to avoid faults.
2913 * Note that this loop could be coded more efficiently,
2914 * if the need arose, rather than looking up each page
2917 for (offset
= 0, va
= start
; offset
< size
;
2918 va
+= PAGE_SIZE
, offset
+= PAGE_SIZE
) {
2921 vm_object_lock(cpm_obj
);
2922 m
= vm_page_lookup(cpm_obj
, (vm_object_offset_t
)offset
);
2923 assert(m
!= VM_PAGE_NULL
);
2925 vm_page_zero_fill(m
);
2927 type_of_fault
= DBG_ZERO_FILL_FAULT
;
2929 vm_fault_enter(m
, pmap
, va
, VM_PROT_ALL
,
2930 VM_PAGE_WIRED(m
), FALSE
, FALSE
,
2933 vm_object_unlock(cpm_obj
);
2938 * Verify ordering in address space.
2940 for (offset
= 0; offset
< size
; offset
+= PAGE_SIZE
) {
2941 vm_object_lock(cpm_obj
);
2942 m
= vm_page_lookup(cpm_obj
, (vm_object_offset_t
)offset
);
2943 vm_object_unlock(cpm_obj
);
2944 if (m
== VM_PAGE_NULL
)
2945 panic("vm_allocate_cpm: obj 0x%x off 0x%x no page",
2950 assert(!m
->fictitious
);
2951 assert(!m
->private);
2954 assert(!m
->cleaning
);
2955 assert(!m
->precious
);
2956 assert(!m
->clustered
);
2958 if (m
->phys_page
!= prev_addr
+ 1) {
2959 printf("start 0x%x end 0x%x va 0x%x\n",
2961 printf("obj 0x%x off 0x%x\n", cpm_obj
, offset
);
2962 printf("m 0x%x prev_address 0x%x\n", m
,
2964 panic("vm_allocate_cpm: pages not contig!");
2967 prev_addr
= m
->phys_page
;
2969 #endif /* MACH_ASSERT */
2971 vm_object_deallocate(cpm_obj
); /* kill extra ref */
2980 * Interface is defined in all cases, but unless the kernel
2981 * is built explicitly for this option, the interface does
2987 __unused vm_map_t map
,
2988 __unused vm_map_offset_t
*addr
,
2989 __unused vm_map_size_t size
,
2992 return KERN_FAILURE
;
2996 /* Not used without nested pmaps */
2997 #ifndef NO_NESTED_PMAP
2999 * Clip and unnest a portion of a nested submap mapping.
3006 vm_map_entry_t entry
,
3007 vm_map_offset_t start_unnest
,
3008 vm_map_offset_t end_unnest
)
3010 vm_map_offset_t old_start_unnest
= start_unnest
;
3011 vm_map_offset_t old_end_unnest
= end_unnest
;
3013 assert(entry
->is_sub_map
);
3014 assert(entry
->object
.sub_map
!= NULL
);
3017 * Query the platform for the optimal unnest range.
3018 * DRK: There's some duplication of effort here, since
3019 * callers may have adjusted the range to some extent. This
3020 * routine was introduced to support 1GiB subtree nesting
3021 * for x86 platforms, which can also nest on 2MiB boundaries
3022 * depending on size/alignment.
3024 if (pmap_adjust_unnest_parameters(map
->pmap
, &start_unnest
, &end_unnest
)) {
3025 log_unnest_badness(map
, old_start_unnest
, old_end_unnest
);
3028 if (entry
->vme_start
> start_unnest
||
3029 entry
->vme_end
< end_unnest
) {
3030 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3031 "bad nested entry: start=0x%llx end=0x%llx\n",
3032 (long long)start_unnest
, (long long)end_unnest
,
3033 (long long)entry
->vme_start
, (long long)entry
->vme_end
);
3036 if (start_unnest
> entry
->vme_start
) {
3037 _vm_map_clip_start(&map
->hdr
,
3040 UPDATE_FIRST_FREE(map
, map
->first_free
);
3042 if (entry
->vme_end
> end_unnest
) {
3043 _vm_map_clip_end(&map
->hdr
,
3046 UPDATE_FIRST_FREE(map
, map
->first_free
);
3049 pmap_unnest(map
->pmap
,
3051 entry
->vme_end
- entry
->vme_start
);
3052 if ((map
->mapped
) && (map
->ref_count
)) {
3053 /* clean up parent map/maps */
3054 vm_map_submap_pmap_clean(
3055 map
, entry
->vme_start
,
3057 entry
->object
.sub_map
,
3060 entry
->use_pmap
= FALSE
;
3062 #endif /* NO_NESTED_PMAP */
3065 * vm_map_clip_start: [ internal use only ]
3067 * Asserts that the given entry begins at or after
3068 * the specified address; if necessary,
3069 * it splits the entry into two.
3074 vm_map_entry_t entry
,
3075 vm_map_offset_t startaddr
)
3077 #ifndef NO_NESTED_PMAP
3078 if (entry
->use_pmap
&&
3079 startaddr
>= entry
->vme_start
) {
3080 vm_map_offset_t start_unnest
, end_unnest
;
3083 * Make sure "startaddr" is no longer in a nested range
3084 * before we clip. Unnest only the minimum range the platform
3086 * vm_map_clip_unnest may perform additional adjustments to
3089 start_unnest
= startaddr
& ~(pmap_nesting_size_min
- 1);
3090 end_unnest
= start_unnest
+ pmap_nesting_size_min
;
3091 vm_map_clip_unnest(map
, entry
, start_unnest
, end_unnest
);
3093 #endif /* NO_NESTED_PMAP */
3094 if (startaddr
> entry
->vme_start
) {
3095 if (entry
->object
.vm_object
&&
3096 !entry
->is_sub_map
&&
3097 entry
->object
.vm_object
->phys_contiguous
) {
3098 pmap_remove(map
->pmap
,
3099 (addr64_t
)(entry
->vme_start
),
3100 (addr64_t
)(entry
->vme_end
));
3102 _vm_map_clip_start(&map
->hdr
, entry
, startaddr
);
3103 UPDATE_FIRST_FREE(map
, map
->first_free
);
3108 #define vm_map_copy_clip_start(copy, entry, startaddr) \
3110 if ((startaddr) > (entry)->vme_start) \
3111 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3115 * This routine is called only when it is known that
3116 * the entry must be split.
3120 register struct vm_map_header
*map_header
,
3121 register vm_map_entry_t entry
,
3122 register vm_map_offset_t start
)
3124 register vm_map_entry_t new_entry
;
3127 * Split off the front portion --
3128 * note that we must insert the new
3129 * entry BEFORE this one, so that
3130 * this entry has the specified starting
3134 new_entry
= _vm_map_entry_create(map_header
);
3135 vm_map_entry_copy_full(new_entry
, entry
);
3137 new_entry
->vme_end
= start
;
3138 entry
->offset
+= (start
- entry
->vme_start
);
3139 entry
->vme_start
= start
;
3141 _vm_map_entry_link(map_header
, entry
->vme_prev
, new_entry
);
3143 if (entry
->is_sub_map
)
3144 vm_map_reference(new_entry
->object
.sub_map
);
3146 vm_object_reference(new_entry
->object
.vm_object
);
3151 * vm_map_clip_end: [ internal use only ]
3153 * Asserts that the given entry ends at or before
3154 * the specified address; if necessary,
3155 * it splits the entry into two.
3160 vm_map_entry_t entry
,
3161 vm_map_offset_t endaddr
)
3163 if (endaddr
> entry
->vme_end
) {
3165 * Within the scope of this clipping, limit "endaddr" to
3166 * the end of this map entry...
3168 endaddr
= entry
->vme_end
;
3170 #ifndef NO_NESTED_PMAP
3171 if (entry
->use_pmap
) {
3172 vm_map_offset_t start_unnest
, end_unnest
;
3175 * Make sure the range between the start of this entry and
3176 * the new "endaddr" is no longer nested before we clip.
3177 * Unnest only the minimum range the platform can handle.
3178 * vm_map_clip_unnest may perform additional adjustments to
3181 start_unnest
= entry
->vme_start
;
3183 (endaddr
+ pmap_nesting_size_min
- 1) &
3184 ~(pmap_nesting_size_min
- 1);
3185 vm_map_clip_unnest(map
, entry
, start_unnest
, end_unnest
);
3187 #endif /* NO_NESTED_PMAP */
3188 if (endaddr
< entry
->vme_end
) {
3189 if (entry
->object
.vm_object
&&
3190 !entry
->is_sub_map
&&
3191 entry
->object
.vm_object
->phys_contiguous
) {
3192 pmap_remove(map
->pmap
,
3193 (addr64_t
)(entry
->vme_start
),
3194 (addr64_t
)(entry
->vme_end
));
3196 _vm_map_clip_end(&map
->hdr
, entry
, endaddr
);
3197 UPDATE_FIRST_FREE(map
, map
->first_free
);
3202 #define vm_map_copy_clip_end(copy, entry, endaddr) \
3204 if ((endaddr) < (entry)->vme_end) \
3205 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3209 * This routine is called only when it is known that
3210 * the entry must be split.
3214 register struct vm_map_header
*map_header
,
3215 register vm_map_entry_t entry
,
3216 register vm_map_offset_t end
)
3218 register vm_map_entry_t new_entry
;
3221 * Create a new entry and insert it
3222 * AFTER the specified entry
3225 new_entry
= _vm_map_entry_create(map_header
);
3226 vm_map_entry_copy_full(new_entry
, entry
);
3228 new_entry
->vme_start
= entry
->vme_end
= end
;
3229 new_entry
->offset
+= (end
- entry
->vme_start
);
3231 _vm_map_entry_link(map_header
, entry
, new_entry
);
3233 if (entry
->is_sub_map
)
3234 vm_map_reference(new_entry
->object
.sub_map
);
3236 vm_object_reference(new_entry
->object
.vm_object
);
3241 * VM_MAP_RANGE_CHECK: [ internal use only ]
3243 * Asserts that the starting and ending region
3244 * addresses fall within the valid range of the map.
3246 #define VM_MAP_RANGE_CHECK(map, start, end) \
3248 if (start < vm_map_min(map)) \
3249 start = vm_map_min(map); \
3250 if (end > vm_map_max(map)) \
3251 end = vm_map_max(map); \
3257 * vm_map_range_check: [ internal use only ]
3259 * Check that the region defined by the specified start and
3260 * end addresses are wholly contained within a single map
3261 * entry or set of adjacent map entries of the spacified map,
3262 * i.e. the specified region contains no unmapped space.
3263 * If any or all of the region is unmapped, FALSE is returned.
3264 * Otherwise, TRUE is returned and if the output argument 'entry'
3265 * is not NULL it points to the map entry containing the start
3268 * The map is locked for reading on entry and is left locked.
3272 register vm_map_t map
,
3273 register vm_map_offset_t start
,
3274 register vm_map_offset_t end
,
3275 vm_map_entry_t
*entry
)
3278 register vm_map_offset_t prev
;
3281 * Basic sanity checks first
3283 if (start
< vm_map_min(map
) || end
> vm_map_max(map
) || start
> end
)
3287 * Check first if the region starts within a valid
3288 * mapping for the map.
3290 if (!vm_map_lookup_entry(map
, start
, &cur
))
3294 * Optimize for the case that the region is contained
3295 * in a single map entry.
3297 if (entry
!= (vm_map_entry_t
*) NULL
)
3299 if (end
<= cur
->vme_end
)
3303 * If the region is not wholly contained within a
3304 * single entry, walk the entries looking for holes.
3306 prev
= cur
->vme_end
;
3307 cur
= cur
->vme_next
;
3308 while ((cur
!= vm_map_to_entry(map
)) && (prev
== cur
->vme_start
)) {
3309 if (end
<= cur
->vme_end
)
3311 prev
= cur
->vme_end
;
3312 cur
= cur
->vme_next
;
3318 * vm_map_submap: [ kernel use only ]
3320 * Mark the given range as handled by a subordinate map.
3322 * This range must have been created with vm_map_find using
3323 * the vm_submap_object, and no other operations may have been
3324 * performed on this range prior to calling vm_map_submap.
3326 * Only a limited number of operations can be performed
3327 * within this rage after calling vm_map_submap:
3329 * [Don't try vm_map_copyin!]
3331 * To remove a submapping, one must first remove the
3332 * range from the superior map, and then destroy the
3333 * submap (if desired). [Better yet, don't try it.]
3338 vm_map_offset_t start
,
3339 vm_map_offset_t end
,
3341 vm_map_offset_t offset
,
3342 #ifdef NO_NESTED_PMAP
3344 #endif /* NO_NESTED_PMAP */
3347 vm_map_entry_t entry
;
3348 register kern_return_t result
= KERN_INVALID_ARGUMENT
;
3349 register vm_object_t object
;
3353 if (! vm_map_lookup_entry(map
, start
, &entry
)) {
3354 entry
= entry
->vme_next
;
3357 if (entry
== vm_map_to_entry(map
) ||
3358 entry
->is_sub_map
) {
3360 return KERN_INVALID_ARGUMENT
;
3363 assert(!entry
->use_pmap
); /* we don't want to unnest anything here */
3364 vm_map_clip_start(map
, entry
, start
);
3365 vm_map_clip_end(map
, entry
, end
);
3367 if ((entry
->vme_start
== start
) && (entry
->vme_end
== end
) &&
3368 (!entry
->is_sub_map
) &&
3369 ((object
= entry
->object
.vm_object
) == vm_submap_object
) &&
3370 (object
->resident_page_count
== 0) &&
3371 (object
->copy
== VM_OBJECT_NULL
) &&
3372 (object
->shadow
== VM_OBJECT_NULL
) &&
3373 (!object
->pager_created
)) {
3374 entry
->offset
= (vm_object_offset_t
)offset
;
3375 entry
->object
.vm_object
= VM_OBJECT_NULL
;
3376 vm_object_deallocate(object
);
3377 entry
->is_sub_map
= TRUE
;
3378 entry
->object
.sub_map
= submap
;
3379 vm_map_reference(submap
);
3380 submap
->mapped
= TRUE
;
3382 #ifndef NO_NESTED_PMAP
3384 /* nest if platform code will allow */
3385 if(submap
->pmap
== NULL
) {
3386 submap
->pmap
= pmap_create((vm_map_size_t
) 0, FALSE
);
3387 if(submap
->pmap
== PMAP_NULL
) {
3389 return(KERN_NO_SPACE
);
3392 result
= pmap_nest(map
->pmap
,
3393 (entry
->object
.sub_map
)->pmap
,
3396 (uint64_t)(end
- start
));
3398 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result
);
3399 entry
->use_pmap
= TRUE
;
3401 #else /* NO_NESTED_PMAP */
3402 pmap_remove(map
->pmap
, (addr64_t
)start
, (addr64_t
)end
);
3403 #endif /* NO_NESTED_PMAP */
3404 result
= KERN_SUCCESS
;
3414 * Sets the protection of the specified address
3415 * region in the target map. If "set_max" is
3416 * specified, the maximum protection is to be set;
3417 * otherwise, only the current protection is affected.
3421 register vm_map_t map
,
3422 register vm_map_offset_t start
,
3423 register vm_map_offset_t end
,
3424 register vm_prot_t new_prot
,
3425 register boolean_t set_max
)
3427 register vm_map_entry_t current
;
3428 register vm_map_offset_t prev
;
3429 vm_map_entry_t entry
;
3433 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3434 map
, start
, end
, new_prot
, set_max
);
3438 /* LP64todo - remove this check when vm_map_commpage64()
3439 * no longer has to stuff in a map_entry for the commpage
3440 * above the map's max_offset.
3442 if (start
>= map
->max_offset
) {
3444 return(KERN_INVALID_ADDRESS
);
3449 * Lookup the entry. If it doesn't start in a valid
3450 * entry, return an error.
3452 if (! vm_map_lookup_entry(map
, start
, &entry
)) {
3454 return(KERN_INVALID_ADDRESS
);
3457 if (entry
->superpage_size
&& (start
& (SUPERPAGE_SIZE
-1))) { /* extend request to whole entry */
3458 start
= SUPERPAGE_ROUND_DOWN(start
);
3463 if (entry
->superpage_size
)
3464 end
= SUPERPAGE_ROUND_UP(end
);
3467 * Make a first pass to check for protection and address
3472 prev
= current
->vme_start
;
3473 while ((current
!= vm_map_to_entry(map
)) &&
3474 (current
->vme_start
< end
)) {
3477 * If there is a hole, return an error.
3479 if (current
->vme_start
!= prev
) {
3481 return(KERN_INVALID_ADDRESS
);
3484 new_max
= current
->max_protection
;
3485 if(new_prot
& VM_PROT_COPY
) {
3486 new_max
|= VM_PROT_WRITE
;
3487 if ((new_prot
& (new_max
| VM_PROT_COPY
)) != new_prot
) {
3489 return(KERN_PROTECTION_FAILURE
);
3492 if ((new_prot
& new_max
) != new_prot
) {
3494 return(KERN_PROTECTION_FAILURE
);
3499 if (new_prot
& VM_PROT_WRITE
) {
3500 if (new_prot
& VM_PROT_EXECUTE
) {
3501 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__
);
3502 new_prot
&= ~VM_PROT_EXECUTE
;
3507 prev
= current
->vme_end
;
3508 current
= current
->vme_next
;
3512 return(KERN_INVALID_ADDRESS
);
3516 * Go back and fix up protections.
3517 * Clip to start here if the range starts within
3522 if (current
!= vm_map_to_entry(map
)) {
3523 /* clip and unnest if necessary */
3524 vm_map_clip_start(map
, current
, start
);
3527 while ((current
!= vm_map_to_entry(map
)) &&
3528 (current
->vme_start
< end
)) {
3532 vm_map_clip_end(map
, current
, end
);
3534 assert(!current
->use_pmap
); /* clipping did unnest if needed */
3536 old_prot
= current
->protection
;
3538 if(new_prot
& VM_PROT_COPY
) {
3539 /* caller is asking specifically to copy the */
3540 /* mapped data, this implies that max protection */
3541 /* will include write. Caller must be prepared */
3542 /* for loss of shared memory communication in the */
3543 /* target area after taking this step */
3544 current
->needs_copy
= TRUE
;
3545 current
->max_protection
|= VM_PROT_WRITE
;
3549 current
->protection
=
3550 (current
->max_protection
=
3551 new_prot
& ~VM_PROT_COPY
) &
3554 current
->protection
= new_prot
& ~VM_PROT_COPY
;
3557 * Update physical map if necessary.
3558 * If the request is to turn off write protection,
3559 * we won't do it for real (in pmap). This is because
3560 * it would cause copy-on-write to fail. We've already
3561 * set, the new protection in the map, so if a
3562 * write-protect fault occurred, it will be fixed up
3563 * properly, COW or not.
3565 if (current
->protection
!= old_prot
) {
3566 /* Look one level in we support nested pmaps */
3567 /* from mapped submaps which are direct entries */
3572 prot
= current
->protection
& ~VM_PROT_WRITE
;
3574 if (override_nx(map
, current
->alias
) && prot
)
3575 prot
|= VM_PROT_EXECUTE
;
3577 if (current
->is_sub_map
&& current
->use_pmap
) {
3578 pmap_protect(current
->object
.sub_map
->pmap
,
3583 pmap_protect(map
->pmap
,
3589 current
= current
->vme_next
;
3593 while ((current
!= vm_map_to_entry(map
)) &&
3594 (current
->vme_start
<= end
)) {
3595 vm_map_simplify_entry(map
, current
);
3596 current
= current
->vme_next
;
3600 return(KERN_SUCCESS
);
3606 * Sets the inheritance of the specified address
3607 * range in the target map. Inheritance
3608 * affects how the map will be shared with
3609 * child maps at the time of vm_map_fork.
3613 register vm_map_t map
,
3614 register vm_map_offset_t start
,
3615 register vm_map_offset_t end
,
3616 register vm_inherit_t new_inheritance
)
3618 register vm_map_entry_t entry
;
3619 vm_map_entry_t temp_entry
;
3623 VM_MAP_RANGE_CHECK(map
, start
, end
);
3625 if (vm_map_lookup_entry(map
, start
, &temp_entry
)) {
3629 temp_entry
= temp_entry
->vme_next
;
3633 /* first check entire range for submaps which can't support the */
3634 /* given inheritance. */
3635 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
3636 if(entry
->is_sub_map
) {
3637 if(new_inheritance
== VM_INHERIT_COPY
) {
3639 return(KERN_INVALID_ARGUMENT
);
3643 entry
= entry
->vme_next
;
3647 if (entry
!= vm_map_to_entry(map
)) {
3648 /* clip and unnest if necessary */
3649 vm_map_clip_start(map
, entry
, start
);
3652 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
3653 vm_map_clip_end(map
, entry
, end
);
3654 assert(!entry
->use_pmap
); /* clip did unnest if needed */
3656 entry
->inheritance
= new_inheritance
;
3658 entry
= entry
->vme_next
;
3662 return(KERN_SUCCESS
);
3666 * Update the accounting for the amount of wired memory in this map. If the user has
3667 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
3670 static kern_return_t
3673 vm_map_entry_t entry
,
3674 boolean_t user_wire
)
3681 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
3685 if (entry
->user_wired_count
== 0) {
3686 size
= entry
->vme_end
- entry
->vme_start
;
3689 * Since this is the first time the user is wiring this map entry, check to see if we're
3690 * exceeding the user wire limits. There is a per map limit which is the smaller of either
3691 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
3692 * a system-wide limit on the amount of memory all users can wire. If the user is over either
3693 * limit, then we fail.
3696 if(size
+ map
->user_wire_size
> MIN(map
->user_wire_limit
, vm_user_wire_limit
) ||
3697 size
+ ptoa_64(vm_page_wire_count
) > vm_global_user_wire_limit
||
3698 size
+ ptoa_64(vm_page_wire_count
) > max_mem
- vm_global_no_user_wire_amount
)
3699 return KERN_RESOURCE_SHORTAGE
;
3702 * The first time the user wires an entry, we also increment the wired_count and add this to
3703 * the total that has been wired in the map.
3706 if (entry
->wired_count
>= MAX_WIRE_COUNT
)
3707 return KERN_FAILURE
;
3709 entry
->wired_count
++;
3710 map
->user_wire_size
+= size
;
3713 if (entry
->user_wired_count
>= MAX_WIRE_COUNT
)
3714 return KERN_FAILURE
;
3716 entry
->user_wired_count
++;
3721 * The kernel's wiring the memory. Just bump the count and continue.
3724 if (entry
->wired_count
>= MAX_WIRE_COUNT
)
3725 panic("vm_map_wire: too many wirings");
3727 entry
->wired_count
++;
3730 return KERN_SUCCESS
;
3734 * Update the memory wiring accounting now that the given map entry is being unwired.
3738 subtract_wire_counts(
3740 vm_map_entry_t entry
,
3741 boolean_t user_wire
)
3747 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
3750 if (entry
->user_wired_count
== 1) {
3753 * We're removing the last user wire reference. Decrement the wired_count and the total
3754 * user wired memory for this map.
3757 assert(entry
->wired_count
>= 1);
3758 entry
->wired_count
--;
3759 map
->user_wire_size
-= entry
->vme_end
- entry
->vme_start
;
3762 assert(entry
->user_wired_count
>= 1);
3763 entry
->user_wired_count
--;
3768 * The kernel is unwiring the memory. Just update the count.
3771 assert(entry
->wired_count
>= 1);
3772 entry
->wired_count
--;
3779 * Sets the pageability of the specified address range in the
3780 * target map as wired. Regions specified as not pageable require
3781 * locked-down physical memory and physical page maps. The
3782 * access_type variable indicates types of accesses that must not
3783 * generate page faults. This is checked against protection of
3784 * memory being locked-down.
3786 * The map must not be locked, but a reference must remain to the
3787 * map throughout the call.
3789 static kern_return_t
3791 register vm_map_t map
,
3792 register vm_map_offset_t start
,
3793 register vm_map_offset_t end
,
3794 register vm_prot_t access_type
,
3795 boolean_t user_wire
,
3797 vm_map_offset_t pmap_addr
)
3799 register vm_map_entry_t entry
;
3800 struct vm_map_entry
*first_entry
, tmp_entry
;
3802 register vm_map_offset_t s
,e
;
3804 boolean_t need_wakeup
;
3805 boolean_t main_map
= FALSE
;
3806 wait_interrupt_t interruptible_state
;
3807 thread_t cur_thread
;
3808 unsigned int last_timestamp
;
3812 if(map_pmap
== NULL
)
3814 last_timestamp
= map
->timestamp
;
3816 VM_MAP_RANGE_CHECK(map
, start
, end
);
3817 assert(page_aligned(start
));
3818 assert(page_aligned(end
));
3820 /* We wired what the caller asked for, zero pages */
3822 return KERN_SUCCESS
;
3825 need_wakeup
= FALSE
;
3826 cur_thread
= current_thread();
3831 if (vm_map_lookup_entry(map
, s
, &first_entry
)) {
3832 entry
= first_entry
;
3834 * vm_map_clip_start will be done later.
3835 * We don't want to unnest any nested submaps here !
3838 /* Start address is not in map */
3839 rc
= KERN_INVALID_ADDRESS
;
3843 while ((entry
!= vm_map_to_entry(map
)) && (s
< end
)) {
3845 * At this point, we have wired from "start" to "s".
3846 * We still need to wire from "s" to "end".
3848 * "entry" hasn't been clipped, so it could start before "s"
3849 * and/or end after "end".
3852 /* "e" is how far we want to wire in this entry */
3858 * If another thread is wiring/unwiring this entry then
3859 * block after informing other thread to wake us up.
3861 if (entry
->in_transition
) {
3862 wait_result_t wait_result
;
3865 * We have not clipped the entry. Make sure that
3866 * the start address is in range so that the lookup
3867 * below will succeed.
3868 * "s" is the current starting point: we've already
3869 * wired from "start" to "s" and we still have
3870 * to wire from "s" to "end".
3873 entry
->needs_wakeup
= TRUE
;
3876 * wake up anybody waiting on entries that we have
3880 vm_map_entry_wakeup(map
);
3881 need_wakeup
= FALSE
;
3884 * User wiring is interruptible
3886 wait_result
= vm_map_entry_wait(map
,
3887 (user_wire
) ? THREAD_ABORTSAFE
:
3889 if (user_wire
&& wait_result
== THREAD_INTERRUPTED
) {
3891 * undo the wirings we have done so far
3892 * We do not clear the needs_wakeup flag,
3893 * because we cannot tell if we were the
3901 * Cannot avoid a lookup here. reset timestamp.
3903 last_timestamp
= map
->timestamp
;
3906 * The entry could have been clipped, look it up again.
3907 * Worse that can happen is, it may not exist anymore.
3909 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
3911 panic("vm_map_wire: re-lookup failed");
3914 * User: undo everything upto the previous
3915 * entry. let vm_map_unwire worry about
3916 * checking the validity of the range.
3921 entry
= first_entry
;
3925 if (entry
->is_sub_map
) {
3926 vm_map_offset_t sub_start
;
3927 vm_map_offset_t sub_end
;
3928 vm_map_offset_t local_start
;
3929 vm_map_offset_t local_end
;
3932 vm_map_clip_start(map
, entry
, s
);
3933 vm_map_clip_end(map
, entry
, end
);
3935 sub_start
= entry
->offset
;
3936 sub_end
= entry
->vme_end
;
3937 sub_end
+= entry
->offset
- entry
->vme_start
;
3939 local_end
= entry
->vme_end
;
3940 if(map_pmap
== NULL
) {
3942 vm_object_offset_t offset
;
3945 vm_map_entry_t local_entry
;
3946 vm_map_version_t version
;
3947 vm_map_t lookup_map
;
3949 if(entry
->use_pmap
) {
3950 pmap
= entry
->object
.sub_map
->pmap
;
3951 /* ppc implementation requires that */
3952 /* submaps pmap address ranges line */
3953 /* up with parent map */
3955 pmap_addr
= sub_start
;
3963 if (entry
->wired_count
) {
3964 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
)
3968 * The map was not unlocked:
3969 * no need to goto re-lookup.
3970 * Just go directly to next entry.
3972 entry
= entry
->vme_next
;
3973 s
= entry
->vme_start
;
3978 /* call vm_map_lookup_locked to */
3979 /* cause any needs copy to be */
3981 local_start
= entry
->vme_start
;
3983 vm_map_lock_write_to_read(map
);
3984 if(vm_map_lookup_locked(
3985 &lookup_map
, local_start
,
3987 OBJECT_LOCK_EXCLUSIVE
,
3989 &offset
, &prot
, &wired
,
3993 vm_map_unlock_read(lookup_map
);
3994 vm_map_unwire(map
, start
,
3996 return(KERN_FAILURE
);
3998 if(real_map
!= lookup_map
)
3999 vm_map_unlock(real_map
);
4000 vm_map_unlock_read(lookup_map
);
4002 vm_object_unlock(object
);
4004 /* we unlocked, so must re-lookup */
4005 if (!vm_map_lookup_entry(map
,
4013 * entry could have been "simplified",
4016 entry
= local_entry
;
4017 assert(s
== local_start
);
4018 vm_map_clip_start(map
, entry
, s
);
4019 vm_map_clip_end(map
, entry
, end
);
4020 /* re-compute "e" */
4025 /* did we have a change of type? */
4026 if (!entry
->is_sub_map
) {
4027 last_timestamp
= map
->timestamp
;
4031 local_start
= entry
->vme_start
;
4035 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
)
4038 entry
->in_transition
= TRUE
;
4041 rc
= vm_map_wire_nested(entry
->object
.sub_map
,
4044 user_wire
, pmap
, pmap_addr
);
4048 * Find the entry again. It could have been clipped
4049 * after we unlocked the map.
4051 if (!vm_map_lookup_entry(map
, local_start
,
4053 panic("vm_map_wire: re-lookup failed");
4054 entry
= first_entry
;
4056 assert(local_start
== s
);
4057 /* re-compute "e" */
4062 last_timestamp
= map
->timestamp
;
4063 while ((entry
!= vm_map_to_entry(map
)) &&
4064 (entry
->vme_start
< e
)) {
4065 assert(entry
->in_transition
);
4066 entry
->in_transition
= FALSE
;
4067 if (entry
->needs_wakeup
) {
4068 entry
->needs_wakeup
= FALSE
;
4071 if (rc
!= KERN_SUCCESS
) {/* from vm_*_wire */
4072 subtract_wire_counts(map
, entry
, user_wire
);
4074 entry
= entry
->vme_next
;
4076 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
4080 /* no need to relookup again */
4081 s
= entry
->vme_start
;
4086 * If this entry is already wired then increment
4087 * the appropriate wire reference count.
4089 if (entry
->wired_count
) {
4091 * entry is already wired down, get our reference
4092 * after clipping to our range.
4094 vm_map_clip_start(map
, entry
, s
);
4095 vm_map_clip_end(map
, entry
, end
);
4097 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
)
4100 /* map was not unlocked: no need to relookup */
4101 entry
= entry
->vme_next
;
4102 s
= entry
->vme_start
;
4107 * Unwired entry or wire request transmitted via submap
4112 * Perform actions of vm_map_lookup that need the write
4113 * lock on the map: create a shadow object for a
4114 * copy-on-write region, or an object for a zero-fill
4117 size
= entry
->vme_end
- entry
->vme_start
;
4119 * If wiring a copy-on-write page, we need to copy it now
4120 * even if we're only (currently) requesting read access.
4121 * This is aggressive, but once it's wired we can't move it.
4123 if (entry
->needs_copy
) {
4124 vm_object_shadow(&entry
->object
.vm_object
,
4125 &entry
->offset
, size
);
4126 entry
->needs_copy
= FALSE
;
4127 } else if (entry
->object
.vm_object
== VM_OBJECT_NULL
) {
4128 entry
->object
.vm_object
= vm_object_allocate(size
);
4129 entry
->offset
= (vm_object_offset_t
)0;
4132 vm_map_clip_start(map
, entry
, s
);
4133 vm_map_clip_end(map
, entry
, end
);
4135 /* re-compute "e" */
4141 * Check for holes and protection mismatch.
4142 * Holes: Next entry should be contiguous unless this
4143 * is the end of the region.
4144 * Protection: Access requested must be allowed, unless
4145 * wiring is by protection class
4147 if ((entry
->vme_end
< end
) &&
4148 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
4149 (entry
->vme_next
->vme_start
> entry
->vme_end
))) {
4151 rc
= KERN_INVALID_ADDRESS
;
4154 if ((entry
->protection
& access_type
) != access_type
) {
4155 /* found a protection problem */
4156 rc
= KERN_PROTECTION_FAILURE
;
4160 assert(entry
->wired_count
== 0 && entry
->user_wired_count
== 0);
4162 if ((rc
= add_wire_counts(map
, entry
, user_wire
)) != KERN_SUCCESS
)
4165 entry
->in_transition
= TRUE
;
4168 * This entry might get split once we unlock the map.
4169 * In vm_fault_wire(), we need the current range as
4170 * defined by this entry. In order for this to work
4171 * along with a simultaneous clip operation, we make a
4172 * temporary copy of this entry and use that for the
4173 * wiring. Note that the underlying objects do not
4174 * change during a clip.
4179 * The in_transition state guarentees that the entry
4180 * (or entries for this range, if split occured) will be
4181 * there when the map lock is acquired for the second time.
4185 if (!user_wire
&& cur_thread
!= THREAD_NULL
)
4186 interruptible_state
= thread_interrupt_level(THREAD_UNINT
);
4188 interruptible_state
= THREAD_UNINT
;
4191 rc
= vm_fault_wire(map
,
4192 &tmp_entry
, map_pmap
, pmap_addr
);
4194 rc
= vm_fault_wire(map
,
4195 &tmp_entry
, map
->pmap
,
4196 tmp_entry
.vme_start
);
4198 if (!user_wire
&& cur_thread
!= THREAD_NULL
)
4199 thread_interrupt_level(interruptible_state
);
4203 if (last_timestamp
+1 != map
->timestamp
) {
4205 * Find the entry again. It could have been clipped
4206 * after we unlocked the map.
4208 if (!vm_map_lookup_entry(map
, tmp_entry
.vme_start
,
4210 panic("vm_map_wire: re-lookup failed");
4212 entry
= first_entry
;
4215 last_timestamp
= map
->timestamp
;
4217 while ((entry
!= vm_map_to_entry(map
)) &&
4218 (entry
->vme_start
< tmp_entry
.vme_end
)) {
4219 assert(entry
->in_transition
);
4220 entry
->in_transition
= FALSE
;
4221 if (entry
->needs_wakeup
) {
4222 entry
->needs_wakeup
= FALSE
;
4225 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
4226 subtract_wire_counts(map
, entry
, user_wire
);
4228 entry
= entry
->vme_next
;
4231 if (rc
!= KERN_SUCCESS
) { /* from vm_*_wire */
4235 s
= entry
->vme_start
;
4236 } /* end while loop through map entries */
4239 if (rc
== KERN_SUCCESS
) {
4240 /* repair any damage we may have made to the VM map */
4241 vm_map_simplify_range(map
, start
, end
);
4247 * wake up anybody waiting on entries we wired.
4250 vm_map_entry_wakeup(map
);
4252 if (rc
!= KERN_SUCCESS
) {
4253 /* undo what has been wired so far */
4254 vm_map_unwire(map
, start
, s
, user_wire
);
4263 register vm_map_t map
,
4264 register vm_map_offset_t start
,
4265 register vm_map_offset_t end
,
4266 register vm_prot_t access_type
,
4267 boolean_t user_wire
)
4274 * the calls to mapping_prealloc and mapping_relpre
4275 * (along with the VM_MAP_RANGE_CHECK to insure a
4276 * resonable range was passed in) are
4277 * currently necessary because
4278 * we haven't enabled kernel pre-emption
4279 * and/or the pmap_enter cannot purge and re-use
4282 VM_MAP_RANGE_CHECK(map
, start
, end
);
4283 assert((unsigned int) (end
- start
) == (end
- start
));
4284 mapping_prealloc((unsigned int) (end
- start
));
4286 kret
= vm_map_wire_nested(map
, start
, end
, access_type
,
4287 user_wire
, (pmap_t
)NULL
, 0);
4297 * Sets the pageability of the specified address range in the target
4298 * as pageable. Regions specified must have been wired previously.
4300 * The map must not be locked, but a reference must remain to the map
4301 * throughout the call.
4303 * Kernel will panic on failures. User unwire ignores holes and
4304 * unwired and intransition entries to avoid losing memory by leaving
4307 static kern_return_t
4308 vm_map_unwire_nested(
4309 register vm_map_t map
,
4310 register vm_map_offset_t start
,
4311 register vm_map_offset_t end
,
4312 boolean_t user_wire
,
4314 vm_map_offset_t pmap_addr
)
4316 register vm_map_entry_t entry
;
4317 struct vm_map_entry
*first_entry
, tmp_entry
;
4318 boolean_t need_wakeup
;
4319 boolean_t main_map
= FALSE
;
4320 unsigned int last_timestamp
;
4323 if(map_pmap
== NULL
)
4325 last_timestamp
= map
->timestamp
;
4327 VM_MAP_RANGE_CHECK(map
, start
, end
);
4328 assert(page_aligned(start
));
4329 assert(page_aligned(end
));
4332 /* We unwired what the caller asked for: zero pages */
4334 return KERN_SUCCESS
;
4337 if (vm_map_lookup_entry(map
, start
, &first_entry
)) {
4338 entry
= first_entry
;
4340 * vm_map_clip_start will be done later.
4341 * We don't want to unnest any nested sub maps here !
4346 panic("vm_map_unwire: start not found");
4348 /* Start address is not in map. */
4350 return(KERN_INVALID_ADDRESS
);
4353 if (entry
->superpage_size
) {
4354 /* superpages are always wired */
4356 return KERN_INVALID_ADDRESS
;
4359 need_wakeup
= FALSE
;
4360 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
4361 if (entry
->in_transition
) {
4364 * Another thread is wiring down this entry. Note
4365 * that if it is not for the other thread we would
4366 * be unwiring an unwired entry. This is not
4367 * permitted. If we wait, we will be unwiring memory
4371 * Another thread is unwiring this entry. We did not
4372 * have a reference to it, because if we did, this
4373 * entry will not be getting unwired now.
4378 * This could happen: there could be some
4379 * overlapping vslock/vsunlock operations
4381 * We should probably just wait and retry,
4382 * but then we have to be careful that this
4383 * entry could get "simplified" after
4384 * "in_transition" gets unset and before
4385 * we re-lookup the entry, so we would
4386 * have to re-clip the entry to avoid
4387 * re-unwiring what we have already unwired...
4388 * See vm_map_wire_nested().
4390 * Or we could just ignore "in_transition"
4391 * here and proceed to decement the wired
4392 * count(s) on this entry. That should be fine
4393 * as long as "wired_count" doesn't drop all
4394 * the way to 0 (and we should panic if THAT
4397 panic("vm_map_unwire: in_transition entry");
4400 entry
= entry
->vme_next
;
4404 if (entry
->is_sub_map
) {
4405 vm_map_offset_t sub_start
;
4406 vm_map_offset_t sub_end
;
4407 vm_map_offset_t local_end
;
4410 vm_map_clip_start(map
, entry
, start
);
4411 vm_map_clip_end(map
, entry
, end
);
4413 sub_start
= entry
->offset
;
4414 sub_end
= entry
->vme_end
- entry
->vme_start
;
4415 sub_end
+= entry
->offset
;
4416 local_end
= entry
->vme_end
;
4417 if(map_pmap
== NULL
) {
4418 if(entry
->use_pmap
) {
4419 pmap
= entry
->object
.sub_map
->pmap
;
4420 pmap_addr
= sub_start
;
4425 if (entry
->wired_count
== 0 ||
4426 (user_wire
&& entry
->user_wired_count
== 0)) {
4428 panic("vm_map_unwire: entry is unwired");
4429 entry
= entry
->vme_next
;
4435 * Holes: Next entry should be contiguous unless
4436 * this is the end of the region.
4438 if (((entry
->vme_end
< end
) &&
4439 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
4440 (entry
->vme_next
->vme_start
4441 > entry
->vme_end
)))) {
4443 panic("vm_map_unwire: non-contiguous region");
4445 entry = entry->vme_next;
4450 subtract_wire_counts(map
, entry
, user_wire
);
4452 if (entry
->wired_count
!= 0) {
4453 entry
= entry
->vme_next
;
4457 entry
->in_transition
= TRUE
;
4458 tmp_entry
= *entry
;/* see comment in vm_map_wire() */
4461 * We can unlock the map now. The in_transition state
4462 * guarantees existance of the entry.
4465 vm_map_unwire_nested(entry
->object
.sub_map
,
4466 sub_start
, sub_end
, user_wire
, pmap
, pmap_addr
);
4469 if (last_timestamp
+1 != map
->timestamp
) {
4471 * Find the entry again. It could have been
4472 * clipped or deleted after we unlocked the map.
4474 if (!vm_map_lookup_entry(map
,
4475 tmp_entry
.vme_start
,
4478 panic("vm_map_unwire: re-lookup failed");
4479 entry
= first_entry
->vme_next
;
4481 entry
= first_entry
;
4483 last_timestamp
= map
->timestamp
;
4486 * clear transition bit for all constituent entries
4487 * that were in the original entry (saved in
4488 * tmp_entry). Also check for waiters.
4490 while ((entry
!= vm_map_to_entry(map
)) &&
4491 (entry
->vme_start
< tmp_entry
.vme_end
)) {
4492 assert(entry
->in_transition
);
4493 entry
->in_transition
= FALSE
;
4494 if (entry
->needs_wakeup
) {
4495 entry
->needs_wakeup
= FALSE
;
4498 entry
= entry
->vme_next
;
4503 vm_map_unwire_nested(entry
->object
.sub_map
,
4504 sub_start
, sub_end
, user_wire
, map_pmap
,
4508 if (last_timestamp
+1 != map
->timestamp
) {
4510 * Find the entry again. It could have been
4511 * clipped or deleted after we unlocked the map.
4513 if (!vm_map_lookup_entry(map
,
4514 tmp_entry
.vme_start
,
4517 panic("vm_map_unwire: re-lookup failed");
4518 entry
= first_entry
->vme_next
;
4520 entry
= first_entry
;
4522 last_timestamp
= map
->timestamp
;
4527 if ((entry
->wired_count
== 0) ||
4528 (user_wire
&& entry
->user_wired_count
== 0)) {
4530 panic("vm_map_unwire: entry is unwired");
4532 entry
= entry
->vme_next
;
4536 assert(entry
->wired_count
> 0 &&
4537 (!user_wire
|| entry
->user_wired_count
> 0));
4539 vm_map_clip_start(map
, entry
, start
);
4540 vm_map_clip_end(map
, entry
, end
);
4544 * Holes: Next entry should be contiguous unless
4545 * this is the end of the region.
4547 if (((entry
->vme_end
< end
) &&
4548 ((entry
->vme_next
== vm_map_to_entry(map
)) ||
4549 (entry
->vme_next
->vme_start
> entry
->vme_end
)))) {
4552 panic("vm_map_unwire: non-contiguous region");
4553 entry
= entry
->vme_next
;
4557 subtract_wire_counts(map
, entry
, user_wire
);
4559 if (entry
->wired_count
!= 0) {
4560 entry
= entry
->vme_next
;
4564 if(entry
->zero_wired_pages
) {
4565 entry
->zero_wired_pages
= FALSE
;
4568 entry
->in_transition
= TRUE
;
4569 tmp_entry
= *entry
; /* see comment in vm_map_wire() */
4572 * We can unlock the map now. The in_transition state
4573 * guarantees existance of the entry.
4577 vm_fault_unwire(map
,
4578 &tmp_entry
, FALSE
, map_pmap
, pmap_addr
);
4580 vm_fault_unwire(map
,
4581 &tmp_entry
, FALSE
, map
->pmap
,
4582 tmp_entry
.vme_start
);
4586 if (last_timestamp
+1 != map
->timestamp
) {
4588 * Find the entry again. It could have been clipped
4589 * or deleted after we unlocked the map.
4591 if (!vm_map_lookup_entry(map
, tmp_entry
.vme_start
,
4594 panic("vm_map_unwire: re-lookup failed");
4595 entry
= first_entry
->vme_next
;
4597 entry
= first_entry
;
4599 last_timestamp
= map
->timestamp
;
4602 * clear transition bit for all constituent entries that
4603 * were in the original entry (saved in tmp_entry). Also
4604 * check for waiters.
4606 while ((entry
!= vm_map_to_entry(map
)) &&
4607 (entry
->vme_start
< tmp_entry
.vme_end
)) {
4608 assert(entry
->in_transition
);
4609 entry
->in_transition
= FALSE
;
4610 if (entry
->needs_wakeup
) {
4611 entry
->needs_wakeup
= FALSE
;
4614 entry
= entry
->vme_next
;
4619 * We might have fragmented the address space when we wired this
4620 * range of addresses. Attempt to re-coalesce these VM map entries
4621 * with their neighbors now that they're no longer wired.
4622 * Under some circumstances, address space fragmentation can
4623 * prevent VM object shadow chain collapsing, which can cause
4626 vm_map_simplify_range(map
, start
, end
);
4630 * wake up anybody waiting on entries that we have unwired.
4633 vm_map_entry_wakeup(map
);
4634 return(KERN_SUCCESS
);
4640 register vm_map_t map
,
4641 register vm_map_offset_t start
,
4642 register vm_map_offset_t end
,
4643 boolean_t user_wire
)
4645 return vm_map_unwire_nested(map
, start
, end
,
4646 user_wire
, (pmap_t
)NULL
, 0);
4651 * vm_map_entry_delete: [ internal use only ]
4653 * Deallocate the given entry from the target map.
4656 vm_map_entry_delete(
4657 register vm_map_t map
,
4658 register vm_map_entry_t entry
)
4660 register vm_map_offset_t s
, e
;
4661 register vm_object_t object
;
4662 register vm_map_t submap
;
4664 s
= entry
->vme_start
;
4666 assert(page_aligned(s
));
4667 assert(page_aligned(e
));
4668 assert(entry
->wired_count
== 0);
4669 assert(entry
->user_wired_count
== 0);
4670 assert(!entry
->permanent
);
4672 if (entry
->is_sub_map
) {
4674 submap
= entry
->object
.sub_map
;
4677 object
= entry
->object
.vm_object
;
4680 vm_map_entry_unlink(map
, entry
);
4683 vm_map_entry_dispose(map
, entry
);
4687 * Deallocate the object only after removing all
4688 * pmap entries pointing to its pages.
4691 vm_map_deallocate(submap
);
4693 vm_object_deallocate(object
);
4698 vm_map_submap_pmap_clean(
4700 vm_map_offset_t start
,
4701 vm_map_offset_t end
,
4703 vm_map_offset_t offset
)
4705 vm_map_offset_t submap_start
;
4706 vm_map_offset_t submap_end
;
4707 vm_map_size_t remove_size
;
4708 vm_map_entry_t entry
;
4710 submap_end
= offset
+ (end
- start
);
4711 submap_start
= offset
;
4713 vm_map_lock_read(sub_map
);
4714 if(vm_map_lookup_entry(sub_map
, offset
, &entry
)) {
4716 remove_size
= (entry
->vme_end
- entry
->vme_start
);
4717 if(offset
> entry
->vme_start
)
4718 remove_size
-= offset
- entry
->vme_start
;
4721 if(submap_end
< entry
->vme_end
) {
4723 entry
->vme_end
- submap_end
;
4725 if(entry
->is_sub_map
) {
4726 vm_map_submap_pmap_clean(
4729 start
+ remove_size
,
4730 entry
->object
.sub_map
,
4734 if((map
->mapped
) && (map
->ref_count
)
4735 && (entry
->object
.vm_object
!= NULL
)) {
4736 vm_object_pmap_protect(
4737 entry
->object
.vm_object
,
4744 pmap_remove(map
->pmap
,
4746 (addr64_t
)(start
+ remove_size
));
4751 entry
= entry
->vme_next
;
4753 while((entry
!= vm_map_to_entry(sub_map
))
4754 && (entry
->vme_start
< submap_end
)) {
4755 remove_size
= (entry
->vme_end
- entry
->vme_start
);
4756 if(submap_end
< entry
->vme_end
) {
4757 remove_size
-= entry
->vme_end
- submap_end
;
4759 if(entry
->is_sub_map
) {
4760 vm_map_submap_pmap_clean(
4762 (start
+ entry
->vme_start
) - offset
,
4763 ((start
+ entry
->vme_start
) - offset
) + remove_size
,
4764 entry
->object
.sub_map
,
4767 if((map
->mapped
) && (map
->ref_count
)
4768 && (entry
->object
.vm_object
!= NULL
)) {
4769 vm_object_pmap_protect(
4770 entry
->object
.vm_object
,
4777 pmap_remove(map
->pmap
,
4778 (addr64_t
)((start
+ entry
->vme_start
)
4780 (addr64_t
)(((start
+ entry
->vme_start
)
4781 - offset
) + remove_size
));
4784 entry
= entry
->vme_next
;
4786 vm_map_unlock_read(sub_map
);
4791 * vm_map_delete: [ internal use only ]
4793 * Deallocates the given address range from the target map.
4794 * Removes all user wirings. Unwires one kernel wiring if
4795 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
4796 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
4797 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4799 * This routine is called with map locked and leaves map locked.
4801 static kern_return_t
4804 vm_map_offset_t start
,
4805 vm_map_offset_t end
,
4809 vm_map_entry_t entry
, next
;
4810 struct vm_map_entry
*first_entry
, tmp_entry
;
4811 register vm_map_offset_t s
;
4812 register vm_object_t object
;
4813 boolean_t need_wakeup
;
4814 unsigned int last_timestamp
= ~0; /* unlikely value */
4817 interruptible
= (flags
& VM_MAP_REMOVE_INTERRUPTIBLE
) ?
4818 THREAD_ABORTSAFE
: THREAD_UNINT
;
4821 * All our DMA I/O operations in IOKit are currently done by
4822 * wiring through the map entries of the task requesting the I/O.
4823 * Because of this, we must always wait for kernel wirings
4824 * to go away on the entries before deleting them.
4826 * Any caller who wants to actually remove a kernel wiring
4827 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4828 * properly remove one wiring instead of blasting through
4831 flags
|= VM_MAP_REMOVE_WAIT_FOR_KWIRE
;
4835 * Find the start of the region, and clip it
4837 if (vm_map_lookup_entry(map
, start
, &first_entry
)) {
4838 entry
= first_entry
;
4839 if (entry
->superpage_size
&& (start
& ~SUPERPAGE_MASK
)) { /* extend request to whole entry */ start
= SUPERPAGE_ROUND_DOWN(start
);
4840 start
= SUPERPAGE_ROUND_DOWN(start
);
4843 if (start
== entry
->vme_start
) {
4845 * No need to clip. We don't want to cause
4846 * any unnecessary unnesting in this case...
4849 vm_map_clip_start(map
, entry
, start
);
4853 * Fix the lookup hint now, rather than each
4854 * time through the loop.
4856 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
4858 entry
= first_entry
->vme_next
;
4862 if (entry
->superpage_size
)
4863 end
= SUPERPAGE_ROUND_UP(end
);
4865 need_wakeup
= FALSE
;
4867 * Step through all entries in this region
4869 s
= entry
->vme_start
;
4870 while ((entry
!= vm_map_to_entry(map
)) && (s
< end
)) {
4872 * At this point, we have deleted all the memory entries
4873 * between "start" and "s". We still need to delete
4874 * all memory entries between "s" and "end".
4875 * While we were blocked and the map was unlocked, some
4876 * new memory entries could have been re-allocated between
4877 * "start" and "s" and we don't want to mess with those.
4878 * Some of those entries could even have been re-assembled
4879 * with an entry after "s" (in vm_map_simplify_entry()), so
4880 * we may have to vm_map_clip_start() again.
4883 if (entry
->vme_start
>= s
) {
4885 * This entry starts on or after "s"
4886 * so no need to clip its start.
4890 * This entry has been re-assembled by a
4891 * vm_map_simplify_entry(). We need to
4892 * re-clip its start.
4894 vm_map_clip_start(map
, entry
, s
);
4896 if (entry
->vme_end
<= end
) {
4898 * This entry is going away completely, so no need
4899 * to clip and possibly cause an unnecessary unnesting.
4902 vm_map_clip_end(map
, entry
, end
);
4905 if (entry
->permanent
) {
4906 panic("attempt to remove permanent VM map entry "
4907 "%p [0x%llx:0x%llx]\n",
4908 entry
, (uint64_t) s
, (uint64_t) end
);
4912 if (entry
->in_transition
) {
4913 wait_result_t wait_result
;
4916 * Another thread is wiring/unwiring this entry.
4917 * Let the other thread know we are waiting.
4919 assert(s
== entry
->vme_start
);
4920 entry
->needs_wakeup
= TRUE
;
4923 * wake up anybody waiting on entries that we have
4924 * already unwired/deleted.
4927 vm_map_entry_wakeup(map
);
4928 need_wakeup
= FALSE
;
4931 wait_result
= vm_map_entry_wait(map
, interruptible
);
4933 if (interruptible
&&
4934 wait_result
== THREAD_INTERRUPTED
) {
4936 * We do not clear the needs_wakeup flag,
4937 * since we cannot tell if we were the only one.
4940 return KERN_ABORTED
;
4944 * The entry could have been clipped or it
4945 * may not exist anymore. Look it up again.
4947 if (!vm_map_lookup_entry(map
, s
, &first_entry
)) {
4948 assert((map
!= kernel_map
) &&
4949 (!entry
->is_sub_map
));
4951 * User: use the next entry
4953 entry
= first_entry
->vme_next
;
4954 s
= entry
->vme_start
;
4956 entry
= first_entry
;
4957 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
4959 last_timestamp
= map
->timestamp
;
4961 } /* end in_transition */
4963 if (entry
->wired_count
) {
4964 boolean_t user_wire
;
4966 user_wire
= entry
->user_wired_count
> 0;
4969 * Remove a kernel wiring if requested
4971 if (flags
& VM_MAP_REMOVE_KUNWIRE
) {
4972 entry
->wired_count
--;
4976 * Remove all user wirings for proper accounting
4978 if (entry
->user_wired_count
> 0) {
4979 while (entry
->user_wired_count
)
4980 subtract_wire_counts(map
, entry
, user_wire
);
4983 if (entry
->wired_count
!= 0) {
4984 assert(map
!= kernel_map
);
4986 * Cannot continue. Typical case is when
4987 * a user thread has physical io pending on
4988 * on this page. Either wait for the
4989 * kernel wiring to go away or return an
4992 if (flags
& VM_MAP_REMOVE_WAIT_FOR_KWIRE
) {
4993 wait_result_t wait_result
;
4995 assert(s
== entry
->vme_start
);
4996 entry
->needs_wakeup
= TRUE
;
4997 wait_result
= vm_map_entry_wait(map
,
5000 if (interruptible
&&
5001 wait_result
== THREAD_INTERRUPTED
) {
5003 * We do not clear the
5004 * needs_wakeup flag, since we
5005 * cannot tell if we were the
5009 return KERN_ABORTED
;
5013 * The entry could have been clipped or
5014 * it may not exist anymore. Look it
5017 if (!vm_map_lookup_entry(map
, s
,
5019 assert(map
!= kernel_map
);
5021 * User: use the next entry
5023 entry
= first_entry
->vme_next
;
5024 s
= entry
->vme_start
;
5026 entry
= first_entry
;
5027 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
5029 last_timestamp
= map
->timestamp
;
5033 return KERN_FAILURE
;
5037 entry
->in_transition
= TRUE
;
5039 * copy current entry. see comment in vm_map_wire()
5042 assert(s
== entry
->vme_start
);
5045 * We can unlock the map now. The in_transition
5046 * state guarentees existance of the entry.
5050 if (tmp_entry
.is_sub_map
) {
5052 vm_map_offset_t sub_start
, sub_end
;
5054 vm_map_offset_t pmap_addr
;
5057 sub_map
= tmp_entry
.object
.sub_map
;
5058 sub_start
= tmp_entry
.offset
;
5059 sub_end
= sub_start
+ (tmp_entry
.vme_end
-
5060 tmp_entry
.vme_start
);
5061 if (tmp_entry
.use_pmap
) {
5062 pmap
= sub_map
->pmap
;
5063 pmap_addr
= tmp_entry
.vme_start
;
5066 pmap_addr
= tmp_entry
.vme_start
;
5068 (void) vm_map_unwire_nested(sub_map
,
5074 vm_fault_unwire(map
, &tmp_entry
,
5075 tmp_entry
.object
.vm_object
== kernel_object
,
5076 map
->pmap
, tmp_entry
.vme_start
);
5081 if (last_timestamp
+1 != map
->timestamp
) {
5083 * Find the entry again. It could have
5084 * been clipped after we unlocked the map.
5086 if (!vm_map_lookup_entry(map
, s
, &first_entry
)){
5087 assert((map
!= kernel_map
) &&
5088 (!entry
->is_sub_map
));
5089 first_entry
= first_entry
->vme_next
;
5090 s
= first_entry
->vme_start
;
5092 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
5095 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
5096 first_entry
= entry
;
5099 last_timestamp
= map
->timestamp
;
5101 entry
= first_entry
;
5102 while ((entry
!= vm_map_to_entry(map
)) &&
5103 (entry
->vme_start
< tmp_entry
.vme_end
)) {
5104 assert(entry
->in_transition
);
5105 entry
->in_transition
= FALSE
;
5106 if (entry
->needs_wakeup
) {
5107 entry
->needs_wakeup
= FALSE
;
5110 entry
= entry
->vme_next
;
5113 * We have unwired the entry(s). Go back and
5116 entry
= first_entry
;
5120 /* entry is unwired */
5121 assert(entry
->wired_count
== 0);
5122 assert(entry
->user_wired_count
== 0);
5124 assert(s
== entry
->vme_start
);
5126 if (flags
& VM_MAP_REMOVE_NO_PMAP_CLEANUP
) {
5128 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5129 * vm_map_delete(), some map entries might have been
5130 * transferred to a "zap_map", which doesn't have a
5131 * pmap. The original pmap has already been flushed
5132 * in the vm_map_delete() call targeting the original
5133 * map, but when we get to destroying the "zap_map",
5134 * we don't have any pmap to flush, so let's just skip
5137 } else if (entry
->is_sub_map
) {
5138 if (entry
->use_pmap
) {
5139 #ifndef NO_NESTED_PMAP
5140 pmap_unnest(map
->pmap
,
5141 (addr64_t
)entry
->vme_start
,
5142 entry
->vme_end
- entry
->vme_start
);
5143 #endif /* NO_NESTED_PMAP */
5144 if ((map
->mapped
) && (map
->ref_count
)) {
5145 /* clean up parent map/maps */
5146 vm_map_submap_pmap_clean(
5147 map
, entry
->vme_start
,
5149 entry
->object
.sub_map
,
5153 vm_map_submap_pmap_clean(
5154 map
, entry
->vme_start
, entry
->vme_end
,
5155 entry
->object
.sub_map
,
5158 } else if (entry
->object
.vm_object
!= kernel_object
) {
5159 object
= entry
->object
.vm_object
;
5160 if((map
->mapped
) && (map
->ref_count
)) {
5161 vm_object_pmap_protect(
5162 object
, entry
->offset
,
5163 entry
->vme_end
- entry
->vme_start
,
5168 pmap_remove(map
->pmap
,
5169 (addr64_t
)entry
->vme_start
,
5170 (addr64_t
)entry
->vme_end
);
5175 * All pmap mappings for this map entry must have been
5178 assert(vm_map_pmap_is_empty(map
,
5182 next
= entry
->vme_next
;
5183 s
= next
->vme_start
;
5184 last_timestamp
= map
->timestamp
;
5186 if ((flags
& VM_MAP_REMOVE_SAVE_ENTRIES
) &&
5187 zap_map
!= VM_MAP_NULL
) {
5188 vm_map_size_t entry_size
;
5190 * The caller wants to save the affected VM map entries
5191 * into the "zap_map". The caller will take care of
5194 /* unlink the entry from "map" ... */
5195 vm_map_entry_unlink(map
, entry
);
5196 /* ... and add it to the end of the "zap_map" */
5197 vm_map_entry_link(zap_map
,
5198 vm_map_last_entry(zap_map
),
5200 entry_size
= entry
->vme_end
- entry
->vme_start
;
5201 map
->size
-= entry_size
;
5202 zap_map
->size
+= entry_size
;
5203 /* we didn't unlock the map, so no timestamp increase */
5206 vm_map_entry_delete(map
, entry
);
5207 /* vm_map_entry_delete unlocks the map */
5213 if(entry
== vm_map_to_entry(map
)) {
5216 if (last_timestamp
+1 != map
->timestamp
) {
5218 * we are responsible for deleting everything
5219 * from the give space, if someone has interfered
5220 * we pick up where we left off, back fills should
5221 * be all right for anyone except map_delete and
5222 * we have to assume that the task has been fully
5223 * disabled before we get here
5225 if (!vm_map_lookup_entry(map
, s
, &entry
)){
5226 entry
= entry
->vme_next
;
5227 s
= entry
->vme_start
;
5229 SAVE_HINT_MAP_WRITE(map
, entry
->vme_prev
);
5232 * others can not only allocate behind us, we can
5233 * also see coalesce while we don't have the map lock
5235 if(entry
== vm_map_to_entry(map
)) {
5239 last_timestamp
= map
->timestamp
;
5242 if (map
->wait_for_space
)
5243 thread_wakeup((event_t
) map
);
5245 * wake up anybody waiting on entries that we have already deleted.
5248 vm_map_entry_wakeup(map
);
5250 return KERN_SUCCESS
;
5256 * Remove the given address range from the target map.
5257 * This is the exported form of vm_map_delete.
5261 register vm_map_t map
,
5262 register vm_map_offset_t start
,
5263 register vm_map_offset_t end
,
5264 register boolean_t flags
)
5266 register kern_return_t result
;
5269 VM_MAP_RANGE_CHECK(map
, start
, end
);
5270 result
= vm_map_delete(map
, start
, end
, flags
, VM_MAP_NULL
);
5278 * Routine: vm_map_copy_discard
5281 * Dispose of a map copy object (returned by
5285 vm_map_copy_discard(
5288 if (copy
== VM_MAP_COPY_NULL
)
5291 switch (copy
->type
) {
5292 case VM_MAP_COPY_ENTRY_LIST
:
5293 while (vm_map_copy_first_entry(copy
) !=
5294 vm_map_copy_to_entry(copy
)) {
5295 vm_map_entry_t entry
= vm_map_copy_first_entry(copy
);
5297 vm_map_copy_entry_unlink(copy
, entry
);
5298 vm_object_deallocate(entry
->object
.vm_object
);
5299 vm_map_copy_entry_dispose(copy
, entry
);
5302 case VM_MAP_COPY_OBJECT
:
5303 vm_object_deallocate(copy
->cpy_object
);
5305 case VM_MAP_COPY_KERNEL_BUFFER
:
5308 * The vm_map_copy_t and possibly the data buffer were
5309 * allocated by a single call to kalloc(), i.e. the
5310 * vm_map_copy_t was not allocated out of the zone.
5312 kfree(copy
, copy
->cpy_kalloc_size
);
5315 zfree(vm_map_copy_zone
, copy
);
5319 * Routine: vm_map_copy_copy
5322 * Move the information in a map copy object to
5323 * a new map copy object, leaving the old one
5326 * This is used by kernel routines that need
5327 * to look at out-of-line data (in copyin form)
5328 * before deciding whether to return SUCCESS.
5329 * If the routine returns FAILURE, the original
5330 * copy object will be deallocated; therefore,
5331 * these routines must make a copy of the copy
5332 * object and leave the original empty so that
5333 * deallocation will not fail.
5339 vm_map_copy_t new_copy
;
5341 if (copy
== VM_MAP_COPY_NULL
)
5342 return VM_MAP_COPY_NULL
;
5345 * Allocate a new copy object, and copy the information
5346 * from the old one into it.
5349 new_copy
= (vm_map_copy_t
) zalloc(vm_map_copy_zone
);
5352 if (copy
->type
== VM_MAP_COPY_ENTRY_LIST
) {
5354 * The links in the entry chain must be
5355 * changed to point to the new copy object.
5357 vm_map_copy_first_entry(copy
)->vme_prev
5358 = vm_map_copy_to_entry(new_copy
);
5359 vm_map_copy_last_entry(copy
)->vme_next
5360 = vm_map_copy_to_entry(new_copy
);
5364 * Change the old copy object into one that contains
5365 * nothing to be deallocated.
5367 copy
->type
= VM_MAP_COPY_OBJECT
;
5368 copy
->cpy_object
= VM_OBJECT_NULL
;
5371 * Return the new object.
5376 static kern_return_t
5377 vm_map_overwrite_submap_recurse(
5379 vm_map_offset_t dst_addr
,
5380 vm_map_size_t dst_size
)
5382 vm_map_offset_t dst_end
;
5383 vm_map_entry_t tmp_entry
;
5384 vm_map_entry_t entry
;
5385 kern_return_t result
;
5386 boolean_t encountered_sub_map
= FALSE
;
5391 * Verify that the destination is all writeable
5392 * initially. We have to trunc the destination
5393 * address and round the copy size or we'll end up
5394 * splitting entries in strange ways.
5397 dst_end
= vm_map_round_page(dst_addr
+ dst_size
);
5398 vm_map_lock(dst_map
);
5401 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &tmp_entry
)) {
5402 vm_map_unlock(dst_map
);
5403 return(KERN_INVALID_ADDRESS
);
5406 vm_map_clip_start(dst_map
, tmp_entry
, vm_map_trunc_page(dst_addr
));
5407 assert(!tmp_entry
->use_pmap
); /* clipping did unnest if needed */
5409 for (entry
= tmp_entry
;;) {
5410 vm_map_entry_t next
;
5412 next
= entry
->vme_next
;
5413 while(entry
->is_sub_map
) {
5414 vm_map_offset_t sub_start
;
5415 vm_map_offset_t sub_end
;
5416 vm_map_offset_t local_end
;
5418 if (entry
->in_transition
) {
5420 * Say that we are waiting, and wait for entry.
5422 entry
->needs_wakeup
= TRUE
;
5423 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
5428 encountered_sub_map
= TRUE
;
5429 sub_start
= entry
->offset
;
5431 if(entry
->vme_end
< dst_end
)
5432 sub_end
= entry
->vme_end
;
5435 sub_end
-= entry
->vme_start
;
5436 sub_end
+= entry
->offset
;
5437 local_end
= entry
->vme_end
;
5438 vm_map_unlock(dst_map
);
5440 result
= vm_map_overwrite_submap_recurse(
5441 entry
->object
.sub_map
,
5443 sub_end
- sub_start
);
5445 if(result
!= KERN_SUCCESS
)
5447 if (dst_end
<= entry
->vme_end
)
5448 return KERN_SUCCESS
;
5449 vm_map_lock(dst_map
);
5450 if(!vm_map_lookup_entry(dst_map
, local_end
,
5452 vm_map_unlock(dst_map
);
5453 return(KERN_INVALID_ADDRESS
);
5456 next
= entry
->vme_next
;
5459 if ( ! (entry
->protection
& VM_PROT_WRITE
)) {
5460 vm_map_unlock(dst_map
);
5461 return(KERN_PROTECTION_FAILURE
);
5465 * If the entry is in transition, we must wait
5466 * for it to exit that state. Anything could happen
5467 * when we unlock the map, so start over.
5469 if (entry
->in_transition
) {
5472 * Say that we are waiting, and wait for entry.
5474 entry
->needs_wakeup
= TRUE
;
5475 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
5481 * our range is contained completely within this map entry
5483 if (dst_end
<= entry
->vme_end
) {
5484 vm_map_unlock(dst_map
);
5485 return KERN_SUCCESS
;
5488 * check that range specified is contiguous region
5490 if ((next
== vm_map_to_entry(dst_map
)) ||
5491 (next
->vme_start
!= entry
->vme_end
)) {
5492 vm_map_unlock(dst_map
);
5493 return(KERN_INVALID_ADDRESS
);
5497 * Check for permanent objects in the destination.
5499 if ((entry
->object
.vm_object
!= VM_OBJECT_NULL
) &&
5500 ((!entry
->object
.vm_object
->internal
) ||
5501 (entry
->object
.vm_object
->true_share
))) {
5502 if(encountered_sub_map
) {
5503 vm_map_unlock(dst_map
);
5504 return(KERN_FAILURE
);
5511 vm_map_unlock(dst_map
);
5512 return(KERN_SUCCESS
);
5516 * Routine: vm_map_copy_overwrite
5519 * Copy the memory described by the map copy
5520 * object (copy; returned by vm_map_copyin) onto
5521 * the specified destination region (dst_map, dst_addr).
5522 * The destination must be writeable.
5524 * Unlike vm_map_copyout, this routine actually
5525 * writes over previously-mapped memory. If the
5526 * previous mapping was to a permanent (user-supplied)
5527 * memory object, it is preserved.
5529 * The attributes (protection and inheritance) of the
5530 * destination region are preserved.
5532 * If successful, consumes the copy object.
5533 * Otherwise, the caller is responsible for it.
5535 * Implementation notes:
5536 * To overwrite aligned temporary virtual memory, it is
5537 * sufficient to remove the previous mapping and insert
5538 * the new copy. This replacement is done either on
5539 * the whole region (if no permanent virtual memory
5540 * objects are embedded in the destination region) or
5541 * in individual map entries.
5543 * To overwrite permanent virtual memory , it is necessary
5544 * to copy each page, as the external memory management
5545 * interface currently does not provide any optimizations.
5547 * Unaligned memory also has to be copied. It is possible
5548 * to use 'vm_trickery' to copy the aligned data. This is
5549 * not done but not hard to implement.
5551 * Once a page of permanent memory has been overwritten,
5552 * it is impossible to interrupt this function; otherwise,
5553 * the call would be neither atomic nor location-independent.
5554 * The kernel-state portion of a user thread must be
5557 * It may be expensive to forward all requests that might
5558 * overwrite permanent memory (vm_write, vm_copy) to
5559 * uninterruptible kernel threads. This routine may be
5560 * called by interruptible threads; however, success is
5561 * not guaranteed -- if the request cannot be performed
5562 * atomically and interruptibly, an error indication is
5566 static kern_return_t
5567 vm_map_copy_overwrite_nested(
5569 vm_map_address_t dst_addr
,
5571 boolean_t interruptible
,
5574 vm_map_offset_t dst_end
;
5575 vm_map_entry_t tmp_entry
;
5576 vm_map_entry_t entry
;
5578 boolean_t aligned
= TRUE
;
5579 boolean_t contains_permanent_objects
= FALSE
;
5580 boolean_t encountered_sub_map
= FALSE
;
5581 vm_map_offset_t base_addr
;
5582 vm_map_size_t copy_size
;
5583 vm_map_size_t total_size
;
5587 * Check for null copy object.
5590 if (copy
== VM_MAP_COPY_NULL
)
5591 return(KERN_SUCCESS
);
5594 * Check for special kernel buffer allocated
5595 * by new_ipc_kmsg_copyin.
5598 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
5599 return(vm_map_copyout_kernel_buffer(
5605 * Only works for entry lists at the moment. Will
5606 * support page lists later.
5609 assert(copy
->type
== VM_MAP_COPY_ENTRY_LIST
);
5611 if (copy
->size
== 0) {
5612 vm_map_copy_discard(copy
);
5613 return(KERN_SUCCESS
);
5617 * Verify that the destination is all writeable
5618 * initially. We have to trunc the destination
5619 * address and round the copy size or we'll end up
5620 * splitting entries in strange ways.
5623 if (!page_aligned(copy
->size
) ||
5624 !page_aligned (copy
->offset
) ||
5625 !page_aligned (dst_addr
))
5628 dst_end
= vm_map_round_page(dst_addr
+ copy
->size
);
5630 dst_end
= dst_addr
+ copy
->size
;
5633 vm_map_lock(dst_map
);
5635 /* LP64todo - remove this check when vm_map_commpage64()
5636 * no longer has to stuff in a map_entry for the commpage
5637 * above the map's max_offset.
5639 if (dst_addr
>= dst_map
->max_offset
) {
5640 vm_map_unlock(dst_map
);
5641 return(KERN_INVALID_ADDRESS
);
5645 if (!vm_map_lookup_entry(dst_map
, dst_addr
, &tmp_entry
)) {
5646 vm_map_unlock(dst_map
);
5647 return(KERN_INVALID_ADDRESS
);
5649 vm_map_clip_start(dst_map
, tmp_entry
, vm_map_trunc_page(dst_addr
));
5650 for (entry
= tmp_entry
;;) {
5651 vm_map_entry_t next
= entry
->vme_next
;
5653 while(entry
->is_sub_map
) {
5654 vm_map_offset_t sub_start
;
5655 vm_map_offset_t sub_end
;
5656 vm_map_offset_t local_end
;
5658 if (entry
->in_transition
) {
5661 * Say that we are waiting, and wait for entry.
5663 entry
->needs_wakeup
= TRUE
;
5664 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
5669 local_end
= entry
->vme_end
;
5670 if (!(entry
->needs_copy
)) {
5671 /* if needs_copy we are a COW submap */
5672 /* in such a case we just replace so */
5673 /* there is no need for the follow- */
5675 encountered_sub_map
= TRUE
;
5676 sub_start
= entry
->offset
;
5678 if(entry
->vme_end
< dst_end
)
5679 sub_end
= entry
->vme_end
;
5682 sub_end
-= entry
->vme_start
;
5683 sub_end
+= entry
->offset
;
5684 vm_map_unlock(dst_map
);
5686 kr
= vm_map_overwrite_submap_recurse(
5687 entry
->object
.sub_map
,
5689 sub_end
- sub_start
);
5690 if(kr
!= KERN_SUCCESS
)
5692 vm_map_lock(dst_map
);
5695 if (dst_end
<= entry
->vme_end
)
5696 goto start_overwrite
;
5697 if(!vm_map_lookup_entry(dst_map
, local_end
,
5699 vm_map_unlock(dst_map
);
5700 return(KERN_INVALID_ADDRESS
);
5702 next
= entry
->vme_next
;
5705 if ( ! (entry
->protection
& VM_PROT_WRITE
)) {
5706 vm_map_unlock(dst_map
);
5707 return(KERN_PROTECTION_FAILURE
);
5711 * If the entry is in transition, we must wait
5712 * for it to exit that state. Anything could happen
5713 * when we unlock the map, so start over.
5715 if (entry
->in_transition
) {
5718 * Say that we are waiting, and wait for entry.
5720 entry
->needs_wakeup
= TRUE
;
5721 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
5727 * our range is contained completely within this map entry
5729 if (dst_end
<= entry
->vme_end
)
5732 * check that range specified is contiguous region
5734 if ((next
== vm_map_to_entry(dst_map
)) ||
5735 (next
->vme_start
!= entry
->vme_end
)) {
5736 vm_map_unlock(dst_map
);
5737 return(KERN_INVALID_ADDRESS
);
5742 * Check for permanent objects in the destination.
5744 if ((entry
->object
.vm_object
!= VM_OBJECT_NULL
) &&
5745 ((!entry
->object
.vm_object
->internal
) ||
5746 (entry
->object
.vm_object
->true_share
))) {
5747 contains_permanent_objects
= TRUE
;
5755 * If there are permanent objects in the destination, then
5756 * the copy cannot be interrupted.
5759 if (interruptible
&& contains_permanent_objects
) {
5760 vm_map_unlock(dst_map
);
5761 return(KERN_FAILURE
); /* XXX */
5766 * Make a second pass, overwriting the data
5767 * At the beginning of each loop iteration,
5768 * the next entry to be overwritten is "tmp_entry"
5769 * (initially, the value returned from the lookup above),
5770 * and the starting address expected in that entry
5774 total_size
= copy
->size
;
5775 if(encountered_sub_map
) {
5777 /* re-calculate tmp_entry since we've had the map */
5779 if (!vm_map_lookup_entry( dst_map
, dst_addr
, &tmp_entry
)) {
5780 vm_map_unlock(dst_map
);
5781 return(KERN_INVALID_ADDRESS
);
5784 copy_size
= copy
->size
;
5787 base_addr
= dst_addr
;
5789 /* deconstruct the copy object and do in parts */
5790 /* only in sub_map, interruptable case */
5791 vm_map_entry_t copy_entry
;
5792 vm_map_entry_t previous_prev
= VM_MAP_ENTRY_NULL
;
5793 vm_map_entry_t next_copy
= VM_MAP_ENTRY_NULL
;
5795 int remaining_entries
= 0;
5796 vm_map_offset_t new_offset
= 0;
5798 for (entry
= tmp_entry
; copy_size
== 0;) {
5799 vm_map_entry_t next
;
5801 next
= entry
->vme_next
;
5803 /* tmp_entry and base address are moved along */
5804 /* each time we encounter a sub-map. Otherwise */
5805 /* entry can outpase tmp_entry, and the copy_size */
5806 /* may reflect the distance between them */
5807 /* if the current entry is found to be in transition */
5808 /* we will start over at the beginning or the last */
5809 /* encounter of a submap as dictated by base_addr */
5810 /* we will zero copy_size accordingly. */
5811 if (entry
->in_transition
) {
5813 * Say that we are waiting, and wait for entry.
5815 entry
->needs_wakeup
= TRUE
;
5816 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
5818 if(!vm_map_lookup_entry(dst_map
, base_addr
,
5820 vm_map_unlock(dst_map
);
5821 return(KERN_INVALID_ADDRESS
);
5827 if(entry
->is_sub_map
) {
5828 vm_map_offset_t sub_start
;
5829 vm_map_offset_t sub_end
;
5830 vm_map_offset_t local_end
;
5832 if (entry
->needs_copy
) {
5833 /* if this is a COW submap */
5834 /* just back the range with a */
5835 /* anonymous entry */
5836 if(entry
->vme_end
< dst_end
)
5837 sub_end
= entry
->vme_end
;
5840 if(entry
->vme_start
< base_addr
)
5841 sub_start
= base_addr
;
5843 sub_start
= entry
->vme_start
;
5845 dst_map
, entry
, sub_end
);
5847 dst_map
, entry
, sub_start
);
5848 assert(!entry
->use_pmap
);
5849 entry
->is_sub_map
= FALSE
;
5851 entry
->object
.sub_map
);
5852 entry
->object
.sub_map
= NULL
;
5853 entry
->is_shared
= FALSE
;
5854 entry
->needs_copy
= FALSE
;
5858 * We should propagate the protections
5859 * of the submap entry here instead
5860 * of forcing them to VM_PROT_ALL...
5861 * Or better yet, we should inherit
5862 * the protection of the copy_entry.
5864 entry
->protection
= VM_PROT_ALL
;
5865 entry
->max_protection
= VM_PROT_ALL
;
5866 entry
->wired_count
= 0;
5867 entry
->user_wired_count
= 0;
5868 if(entry
->inheritance
5869 == VM_INHERIT_SHARE
)
5870 entry
->inheritance
= VM_INHERIT_COPY
;
5873 /* first take care of any non-sub_map */
5874 /* entries to send */
5875 if(base_addr
< entry
->vme_start
) {
5878 entry
->vme_start
- base_addr
;
5881 sub_start
= entry
->offset
;
5883 if(entry
->vme_end
< dst_end
)
5884 sub_end
= entry
->vme_end
;
5887 sub_end
-= entry
->vme_start
;
5888 sub_end
+= entry
->offset
;
5889 local_end
= entry
->vme_end
;
5890 vm_map_unlock(dst_map
);
5891 copy_size
= sub_end
- sub_start
;
5893 /* adjust the copy object */
5894 if (total_size
> copy_size
) {
5895 vm_map_size_t local_size
= 0;
5896 vm_map_size_t entry_size
;
5899 new_offset
= copy
->offset
;
5900 copy_entry
= vm_map_copy_first_entry(copy
);
5902 vm_map_copy_to_entry(copy
)){
5903 entry_size
= copy_entry
->vme_end
-
5904 copy_entry
->vme_start
;
5905 if((local_size
< copy_size
) &&
5906 ((local_size
+ entry_size
)
5908 vm_map_copy_clip_end(copy
,
5910 copy_entry
->vme_start
+
5911 (copy_size
- local_size
));
5912 entry_size
= copy_entry
->vme_end
-
5913 copy_entry
->vme_start
;
5914 local_size
+= entry_size
;
5915 new_offset
+= entry_size
;
5917 if(local_size
>= copy_size
) {
5918 next_copy
= copy_entry
->vme_next
;
5919 copy_entry
->vme_next
=
5920 vm_map_copy_to_entry(copy
);
5922 copy
->cpy_hdr
.links
.prev
;
5923 copy
->cpy_hdr
.links
.prev
= copy_entry
;
5924 copy
->size
= copy_size
;
5926 copy
->cpy_hdr
.nentries
;
5927 remaining_entries
-= nentries
;
5928 copy
->cpy_hdr
.nentries
= nentries
;
5931 local_size
+= entry_size
;
5932 new_offset
+= entry_size
;
5935 copy_entry
= copy_entry
->vme_next
;
5939 if((entry
->use_pmap
) && (pmap
== NULL
)) {
5940 kr
= vm_map_copy_overwrite_nested(
5941 entry
->object
.sub_map
,
5945 entry
->object
.sub_map
->pmap
);
5946 } else if (pmap
!= NULL
) {
5947 kr
= vm_map_copy_overwrite_nested(
5948 entry
->object
.sub_map
,
5951 interruptible
, pmap
);
5953 kr
= vm_map_copy_overwrite_nested(
5954 entry
->object
.sub_map
,
5960 if(kr
!= KERN_SUCCESS
) {
5961 if(next_copy
!= NULL
) {
5962 copy
->cpy_hdr
.nentries
+=
5964 copy
->cpy_hdr
.links
.prev
->vme_next
=
5966 copy
->cpy_hdr
.links
.prev
5968 copy
->size
= total_size
;
5972 if (dst_end
<= local_end
) {
5973 return(KERN_SUCCESS
);
5975 /* otherwise copy no longer exists, it was */
5976 /* destroyed after successful copy_overwrite */
5977 copy
= (vm_map_copy_t
)
5978 zalloc(vm_map_copy_zone
);
5979 vm_map_copy_first_entry(copy
) =
5980 vm_map_copy_last_entry(copy
) =
5981 vm_map_copy_to_entry(copy
);
5982 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
5983 copy
->offset
= new_offset
;
5985 total_size
-= copy_size
;
5987 /* put back remainder of copy in container */
5988 if(next_copy
!= NULL
) {
5989 copy
->cpy_hdr
.nentries
= remaining_entries
;
5990 copy
->cpy_hdr
.links
.next
= next_copy
;
5991 copy
->cpy_hdr
.links
.prev
= previous_prev
;
5992 copy
->size
= total_size
;
5993 next_copy
->vme_prev
=
5994 vm_map_copy_to_entry(copy
);
5997 base_addr
= local_end
;
5998 vm_map_lock(dst_map
);
5999 if(!vm_map_lookup_entry(dst_map
,
6000 local_end
, &tmp_entry
)) {
6001 vm_map_unlock(dst_map
);
6002 return(KERN_INVALID_ADDRESS
);
6007 if (dst_end
<= entry
->vme_end
) {
6008 copy_size
= dst_end
- base_addr
;
6012 if ((next
== vm_map_to_entry(dst_map
)) ||
6013 (next
->vme_start
!= entry
->vme_end
)) {
6014 vm_map_unlock(dst_map
);
6015 return(KERN_INVALID_ADDRESS
);
6024 /* adjust the copy object */
6025 if (total_size
> copy_size
) {
6026 vm_map_size_t local_size
= 0;
6027 vm_map_size_t entry_size
;
6029 new_offset
= copy
->offset
;
6030 copy_entry
= vm_map_copy_first_entry(copy
);
6031 while(copy_entry
!= vm_map_copy_to_entry(copy
)) {
6032 entry_size
= copy_entry
->vme_end
-
6033 copy_entry
->vme_start
;
6034 if((local_size
< copy_size
) &&
6035 ((local_size
+ entry_size
)
6037 vm_map_copy_clip_end(copy
, copy_entry
,
6038 copy_entry
->vme_start
+
6039 (copy_size
- local_size
));
6040 entry_size
= copy_entry
->vme_end
-
6041 copy_entry
->vme_start
;
6042 local_size
+= entry_size
;
6043 new_offset
+= entry_size
;
6045 if(local_size
>= copy_size
) {
6046 next_copy
= copy_entry
->vme_next
;
6047 copy_entry
->vme_next
=
6048 vm_map_copy_to_entry(copy
);
6050 copy
->cpy_hdr
.links
.prev
;
6051 copy
->cpy_hdr
.links
.prev
= copy_entry
;
6052 copy
->size
= copy_size
;
6054 copy
->cpy_hdr
.nentries
;
6055 remaining_entries
-= nentries
;
6056 copy
->cpy_hdr
.nentries
= nentries
;
6059 local_size
+= entry_size
;
6060 new_offset
+= entry_size
;
6063 copy_entry
= copy_entry
->vme_next
;
6073 local_pmap
= dst_map
->pmap
;
6075 if ((kr
= vm_map_copy_overwrite_aligned(
6076 dst_map
, tmp_entry
, copy
,
6077 base_addr
, local_pmap
)) != KERN_SUCCESS
) {
6078 if(next_copy
!= NULL
) {
6079 copy
->cpy_hdr
.nentries
+=
6081 copy
->cpy_hdr
.links
.prev
->vme_next
=
6083 copy
->cpy_hdr
.links
.prev
=
6085 copy
->size
+= copy_size
;
6089 vm_map_unlock(dst_map
);
6094 * if the copy and dst address are misaligned but the same
6095 * offset within the page we can copy_not_aligned the
6096 * misaligned parts and copy aligned the rest. If they are
6097 * aligned but len is unaligned we simply need to copy
6098 * the end bit unaligned. We'll need to split the misaligned
6099 * bits of the region in this case !
6101 /* ALWAYS UNLOCKS THE dst_map MAP */
6102 if ((kr
= vm_map_copy_overwrite_unaligned( dst_map
,
6103 tmp_entry
, copy
, base_addr
)) != KERN_SUCCESS
) {
6104 if(next_copy
!= NULL
) {
6105 copy
->cpy_hdr
.nentries
+=
6107 copy
->cpy_hdr
.links
.prev
->vme_next
=
6109 copy
->cpy_hdr
.links
.prev
=
6111 copy
->size
+= copy_size
;
6116 total_size
-= copy_size
;
6119 base_addr
+= copy_size
;
6121 copy
->offset
= new_offset
;
6122 if(next_copy
!= NULL
) {
6123 copy
->cpy_hdr
.nentries
= remaining_entries
;
6124 copy
->cpy_hdr
.links
.next
= next_copy
;
6125 copy
->cpy_hdr
.links
.prev
= previous_prev
;
6126 next_copy
->vme_prev
= vm_map_copy_to_entry(copy
);
6127 copy
->size
= total_size
;
6129 vm_map_lock(dst_map
);
6131 if (!vm_map_lookup_entry(dst_map
,
6132 base_addr
, &tmp_entry
)) {
6133 vm_map_unlock(dst_map
);
6134 return(KERN_INVALID_ADDRESS
);
6136 if (tmp_entry
->in_transition
) {
6137 entry
->needs_wakeup
= TRUE
;
6138 vm_map_entry_wait(dst_map
, THREAD_UNINT
);
6143 vm_map_clip_start(dst_map
, tmp_entry
, vm_map_trunc_page(base_addr
));
6149 * Throw away the vm_map_copy object
6151 vm_map_copy_discard(copy
);
6153 return(KERN_SUCCESS
);
6154 }/* vm_map_copy_overwrite */
6157 vm_map_copy_overwrite(
6159 vm_map_offset_t dst_addr
,
6161 boolean_t interruptible
)
6163 return vm_map_copy_overwrite_nested(
6164 dst_map
, dst_addr
, copy
, interruptible
, (pmap_t
) NULL
);
6169 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
6172 * Physically copy unaligned data
6175 * Unaligned parts of pages have to be physically copied. We use
6176 * a modified form of vm_fault_copy (which understands none-aligned
6177 * page offsets and sizes) to do the copy. We attempt to copy as
6178 * much memory in one go as possibly, however vm_fault_copy copies
6179 * within 1 memory object so we have to find the smaller of "amount left"
6180 * "source object data size" and "target object data size". With
6181 * unaligned data we don't need to split regions, therefore the source
6182 * (copy) object should be one map entry, the target range may be split
6183 * over multiple map entries however. In any event we are pessimistic
6184 * about these assumptions.
6187 * dst_map is locked on entry and is return locked on success,
6188 * unlocked on error.
6191 static kern_return_t
6192 vm_map_copy_overwrite_unaligned(
6194 vm_map_entry_t entry
,
6196 vm_map_offset_t start
)
6198 vm_map_entry_t copy_entry
= vm_map_copy_first_entry(copy
);
6199 vm_map_version_t version
;
6200 vm_object_t dst_object
;
6201 vm_object_offset_t dst_offset
;
6202 vm_object_offset_t src_offset
;
6203 vm_object_offset_t entry_offset
;
6204 vm_map_offset_t entry_end
;
6205 vm_map_size_t src_size
,
6209 kern_return_t kr
= KERN_SUCCESS
;
6211 vm_map_lock_write_to_read(dst_map
);
6213 src_offset
= copy
->offset
- vm_object_trunc_page(copy
->offset
);
6214 amount_left
= copy
->size
;
6216 * unaligned so we never clipped this entry, we need the offset into
6217 * the vm_object not just the data.
6219 while (amount_left
> 0) {
6221 if (entry
== vm_map_to_entry(dst_map
)) {
6222 vm_map_unlock_read(dst_map
);
6223 return KERN_INVALID_ADDRESS
;
6226 /* "start" must be within the current map entry */
6227 assert ((start
>=entry
->vme_start
) && (start
<entry
->vme_end
));
6229 dst_offset
= start
- entry
->vme_start
;
6231 dst_size
= entry
->vme_end
- start
;
6233 src_size
= copy_entry
->vme_end
-
6234 (copy_entry
->vme_start
+ src_offset
);
6236 if (dst_size
< src_size
) {
6238 * we can only copy dst_size bytes before
6239 * we have to get the next destination entry
6241 copy_size
= dst_size
;
6244 * we can only copy src_size bytes before
6245 * we have to get the next source copy entry
6247 copy_size
= src_size
;
6250 if (copy_size
> amount_left
) {
6251 copy_size
= amount_left
;
6254 * Entry needs copy, create a shadow shadow object for
6255 * Copy on write region.
6257 if (entry
->needs_copy
&&
6258 ((entry
->protection
& VM_PROT_WRITE
) != 0))
6260 if (vm_map_lock_read_to_write(dst_map
)) {
6261 vm_map_lock_read(dst_map
);
6264 vm_object_shadow(&entry
->object
.vm_object
,
6266 (vm_map_size_t
)(entry
->vme_end
6267 - entry
->vme_start
));
6268 entry
->needs_copy
= FALSE
;
6269 vm_map_lock_write_to_read(dst_map
);
6271 dst_object
= entry
->object
.vm_object
;
6273 * unlike with the virtual (aligned) copy we're going
6274 * to fault on it therefore we need a target object.
6276 if (dst_object
== VM_OBJECT_NULL
) {
6277 if (vm_map_lock_read_to_write(dst_map
)) {
6278 vm_map_lock_read(dst_map
);
6281 dst_object
= vm_object_allocate((vm_map_size_t
)
6282 entry
->vme_end
- entry
->vme_start
);
6283 entry
->object
.vm_object
= dst_object
;
6285 vm_map_lock_write_to_read(dst_map
);
6288 * Take an object reference and unlock map. The "entry" may
6289 * disappear or change when the map is unlocked.
6291 vm_object_reference(dst_object
);
6292 version
.main_timestamp
= dst_map
->timestamp
;
6293 entry_offset
= entry
->offset
;
6294 entry_end
= entry
->vme_end
;
6295 vm_map_unlock_read(dst_map
);
6297 * Copy as much as possible in one pass
6300 copy_entry
->object
.vm_object
,
6301 copy_entry
->offset
+ src_offset
,
6304 entry_offset
+ dst_offset
,
6310 src_offset
+= copy_size
;
6311 amount_left
-= copy_size
;
6313 * Release the object reference
6315 vm_object_deallocate(dst_object
);
6317 * If a hard error occurred, return it now
6319 if (kr
!= KERN_SUCCESS
)
6322 if ((copy_entry
->vme_start
+ src_offset
) == copy_entry
->vme_end
6323 || amount_left
== 0)
6326 * all done with this copy entry, dispose.
6328 vm_map_copy_entry_unlink(copy
, copy_entry
);
6329 vm_object_deallocate(copy_entry
->object
.vm_object
);
6330 vm_map_copy_entry_dispose(copy
, copy_entry
);
6332 if ((copy_entry
= vm_map_copy_first_entry(copy
))
6333 == vm_map_copy_to_entry(copy
) && amount_left
) {
6335 * not finished copying but run out of source
6337 return KERN_INVALID_ADDRESS
;
6342 if (amount_left
== 0)
6343 return KERN_SUCCESS
;
6345 vm_map_lock_read(dst_map
);
6346 if (version
.main_timestamp
== dst_map
->timestamp
) {
6347 if (start
== entry_end
) {
6349 * destination region is split. Use the version
6350 * information to avoid a lookup in the normal
6353 entry
= entry
->vme_next
;
6355 * should be contiguous. Fail if we encounter
6356 * a hole in the destination.
6358 if (start
!= entry
->vme_start
) {
6359 vm_map_unlock_read(dst_map
);
6360 return KERN_INVALID_ADDRESS
;
6365 * Map version check failed.
6366 * we must lookup the entry because somebody
6367 * might have changed the map behind our backs.
6370 if (!vm_map_lookup_entry(dst_map
, start
, &entry
))
6372 vm_map_unlock_read(dst_map
);
6373 return KERN_INVALID_ADDRESS
;
6378 return KERN_SUCCESS
;
6379 }/* vm_map_copy_overwrite_unaligned */
6382 * Routine: vm_map_copy_overwrite_aligned [internal use only]
6385 * Does all the vm_trickery possible for whole pages.
6389 * If there are no permanent objects in the destination,
6390 * and the source and destination map entry zones match,
6391 * and the destination map entry is not shared,
6392 * then the map entries can be deleted and replaced
6393 * with those from the copy. The following code is the
6394 * basic idea of what to do, but there are lots of annoying
6395 * little details about getting protection and inheritance
6396 * right. Should add protection, inheritance, and sharing checks
6397 * to the above pass and make sure that no wiring is involved.
6400 static kern_return_t
6401 vm_map_copy_overwrite_aligned(
6403 vm_map_entry_t tmp_entry
,
6405 vm_map_offset_t start
,
6406 __unused pmap_t pmap
)
6409 vm_map_entry_t copy_entry
;
6410 vm_map_size_t copy_size
;
6412 vm_map_entry_t entry
;
6414 while ((copy_entry
= vm_map_copy_first_entry(copy
))
6415 != vm_map_copy_to_entry(copy
))
6417 copy_size
= (copy_entry
->vme_end
- copy_entry
->vme_start
);
6420 assert(!entry
->use_pmap
); /* unnested when clipped earlier */
6421 if (entry
== vm_map_to_entry(dst_map
)) {
6422 vm_map_unlock(dst_map
);
6423 return KERN_INVALID_ADDRESS
;
6425 size
= (entry
->vme_end
- entry
->vme_start
);
6427 * Make sure that no holes popped up in the
6428 * address map, and that the protection is
6429 * still valid, in case the map was unlocked
6433 if ((entry
->vme_start
!= start
) || ((entry
->is_sub_map
)
6434 && !entry
->needs_copy
)) {
6435 vm_map_unlock(dst_map
);
6436 return(KERN_INVALID_ADDRESS
);
6438 assert(entry
!= vm_map_to_entry(dst_map
));
6441 * Check protection again
6444 if ( ! (entry
->protection
& VM_PROT_WRITE
)) {
6445 vm_map_unlock(dst_map
);
6446 return(KERN_PROTECTION_FAILURE
);
6450 * Adjust to source size first
6453 if (copy_size
< size
) {
6454 vm_map_clip_end(dst_map
, entry
, entry
->vme_start
+ copy_size
);
6459 * Adjust to destination size
6462 if (size
< copy_size
) {
6463 vm_map_copy_clip_end(copy
, copy_entry
,
6464 copy_entry
->vme_start
+ size
);
6468 assert((entry
->vme_end
- entry
->vme_start
) == size
);
6469 assert((tmp_entry
->vme_end
- tmp_entry
->vme_start
) == size
);
6470 assert((copy_entry
->vme_end
- copy_entry
->vme_start
) == size
);
6473 * If the destination contains temporary unshared memory,
6474 * we can perform the copy by throwing it away and
6475 * installing the source data.
6478 object
= entry
->object
.vm_object
;
6479 if ((!entry
->is_shared
&&
6480 ((object
== VM_OBJECT_NULL
) ||
6481 (object
->internal
&& !object
->true_share
))) ||
6482 entry
->needs_copy
) {
6483 vm_object_t old_object
= entry
->object
.vm_object
;
6484 vm_object_offset_t old_offset
= entry
->offset
;
6485 vm_object_offset_t offset
;
6488 * Ensure that the source and destination aren't
6491 if (old_object
== copy_entry
->object
.vm_object
&&
6492 old_offset
== copy_entry
->offset
) {
6493 vm_map_copy_entry_unlink(copy
, copy_entry
);
6494 vm_map_copy_entry_dispose(copy
, copy_entry
);
6496 if (old_object
!= VM_OBJECT_NULL
)
6497 vm_object_deallocate(old_object
);
6499 start
= tmp_entry
->vme_end
;
6500 tmp_entry
= tmp_entry
->vme_next
;
6504 if (old_object
!= VM_OBJECT_NULL
) {
6505 if(entry
->is_sub_map
) {
6506 if(entry
->use_pmap
) {
6507 #ifndef NO_NESTED_PMAP
6508 pmap_unnest(dst_map
->pmap
,
6509 (addr64_t
)entry
->vme_start
,
6510 entry
->vme_end
- entry
->vme_start
);
6511 #endif /* NO_NESTED_PMAP */
6512 if(dst_map
->mapped
) {
6513 /* clean up parent */
6515 vm_map_submap_pmap_clean(
6516 dst_map
, entry
->vme_start
,
6518 entry
->object
.sub_map
,
6522 vm_map_submap_pmap_clean(
6523 dst_map
, entry
->vme_start
,
6525 entry
->object
.sub_map
,
6529 entry
->object
.sub_map
);
6531 if(dst_map
->mapped
) {
6532 vm_object_pmap_protect(
6533 entry
->object
.vm_object
,
6541 pmap_remove(dst_map
->pmap
,
6542 (addr64_t
)(entry
->vme_start
),
6543 (addr64_t
)(entry
->vme_end
));
6545 vm_object_deallocate(old_object
);
6549 entry
->is_sub_map
= FALSE
;
6550 entry
->object
= copy_entry
->object
;
6551 object
= entry
->object
.vm_object
;
6552 entry
->needs_copy
= copy_entry
->needs_copy
;
6553 entry
->wired_count
= 0;
6554 entry
->user_wired_count
= 0;
6555 offset
= entry
->offset
= copy_entry
->offset
;
6557 vm_map_copy_entry_unlink(copy
, copy_entry
);
6558 vm_map_copy_entry_dispose(copy
, copy_entry
);
6561 * we could try to push pages into the pmap at this point, BUT
6562 * this optimization only saved on average 2 us per page if ALL
6563 * the pages in the source were currently mapped
6564 * and ALL the pages in the dest were touched, if there were fewer
6565 * than 2/3 of the pages touched, this optimization actually cost more cycles
6566 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6570 * Set up for the next iteration. The map
6571 * has not been unlocked, so the next
6572 * address should be at the end of this
6573 * entry, and the next map entry should be
6574 * the one following it.
6577 start
= tmp_entry
->vme_end
;
6578 tmp_entry
= tmp_entry
->vme_next
;
6580 vm_map_version_t version
;
6581 vm_object_t dst_object
= entry
->object
.vm_object
;
6582 vm_object_offset_t dst_offset
= entry
->offset
;
6586 * Take an object reference, and record
6587 * the map version information so that the
6588 * map can be safely unlocked.
6591 vm_object_reference(dst_object
);
6593 /* account for unlock bumping up timestamp */
6594 version
.main_timestamp
= dst_map
->timestamp
+ 1;
6596 vm_map_unlock(dst_map
);
6599 * Copy as much as possible in one pass
6604 copy_entry
->object
.vm_object
,
6614 * Release the object reference
6617 vm_object_deallocate(dst_object
);
6620 * If a hard error occurred, return it now
6623 if (r
!= KERN_SUCCESS
)
6626 if (copy_size
!= 0) {
6628 * Dispose of the copied region
6631 vm_map_copy_clip_end(copy
, copy_entry
,
6632 copy_entry
->vme_start
+ copy_size
);
6633 vm_map_copy_entry_unlink(copy
, copy_entry
);
6634 vm_object_deallocate(copy_entry
->object
.vm_object
);
6635 vm_map_copy_entry_dispose(copy
, copy_entry
);
6639 * Pick up in the destination map where we left off.
6641 * Use the version information to avoid a lookup
6642 * in the normal case.
6646 vm_map_lock(dst_map
);
6647 if (version
.main_timestamp
== dst_map
->timestamp
) {
6648 /* We can safely use saved tmp_entry value */
6650 vm_map_clip_end(dst_map
, tmp_entry
, start
);
6651 tmp_entry
= tmp_entry
->vme_next
;
6653 /* Must do lookup of tmp_entry */
6655 if (!vm_map_lookup_entry(dst_map
, start
, &tmp_entry
)) {
6656 vm_map_unlock(dst_map
);
6657 return(KERN_INVALID_ADDRESS
);
6659 vm_map_clip_start(dst_map
, tmp_entry
, start
);
6664 return(KERN_SUCCESS
);
6665 }/* vm_map_copy_overwrite_aligned */
6668 * Routine: vm_map_copyin_kernel_buffer [internal use only]
6671 * Copy in data to a kernel buffer from space in the
6672 * source map. The original space may be optionally
6675 * If successful, returns a new copy object.
6677 static kern_return_t
6678 vm_map_copyin_kernel_buffer(
6680 vm_map_offset_t src_addr
,
6682 boolean_t src_destroy
,
6683 vm_map_copy_t
*copy_result
)
6687 vm_size_t kalloc_size
;
6689 if ((vm_size_t
) len
!= len
) {
6690 /* "len" is too big and doesn't fit in a "vm_size_t" */
6691 return KERN_RESOURCE_SHORTAGE
;
6693 kalloc_size
= (vm_size_t
) (sizeof(struct vm_map_copy
) + len
);
6694 assert((vm_map_size_t
) kalloc_size
== sizeof (struct vm_map_copy
) + len
);
6696 copy
= (vm_map_copy_t
) kalloc(kalloc_size
);
6697 if (copy
== VM_MAP_COPY_NULL
) {
6698 return KERN_RESOURCE_SHORTAGE
;
6700 copy
->type
= VM_MAP_COPY_KERNEL_BUFFER
;
6703 copy
->cpy_kdata
= (void *) (copy
+ 1);
6704 copy
->cpy_kalloc_size
= kalloc_size
;
6706 kr
= copyinmap(src_map
, src_addr
, copy
->cpy_kdata
, (vm_size_t
) len
);
6707 if (kr
!= KERN_SUCCESS
) {
6708 kfree(copy
, kalloc_size
);
6712 (void) vm_map_remove(src_map
, vm_map_trunc_page(src_addr
),
6713 vm_map_round_page(src_addr
+ len
),
6714 VM_MAP_REMOVE_INTERRUPTIBLE
|
6715 VM_MAP_REMOVE_WAIT_FOR_KWIRE
|
6716 (src_map
== kernel_map
) ?
6717 VM_MAP_REMOVE_KUNWIRE
: 0);
6719 *copy_result
= copy
;
6720 return KERN_SUCCESS
;
6724 * Routine: vm_map_copyout_kernel_buffer [internal use only]
6727 * Copy out data from a kernel buffer into space in the
6728 * destination map. The space may be otpionally dynamically
6731 * If successful, consumes the copy object.
6732 * Otherwise, the caller is responsible for it.
6734 static int vm_map_copyout_kernel_buffer_failures
= 0;
6735 static kern_return_t
6736 vm_map_copyout_kernel_buffer(
6738 vm_map_address_t
*addr
, /* IN/OUT */
6740 boolean_t overwrite
)
6742 kern_return_t kr
= KERN_SUCCESS
;
6743 thread_t thread
= current_thread();
6748 * Allocate space in the target map for the data
6751 kr
= vm_map_enter(map
,
6753 vm_map_round_page(copy
->size
),
6754 (vm_map_offset_t
) 0,
6757 (vm_object_offset_t
) 0,
6761 VM_INHERIT_DEFAULT
);
6762 if (kr
!= KERN_SUCCESS
)
6767 * Copyout the data from the kernel buffer to the target map.
6769 if (thread
->map
== map
) {
6772 * If the target map is the current map, just do
6775 assert((vm_size_t
) copy
->size
== copy
->size
);
6776 if (copyout(copy
->cpy_kdata
, *addr
, (vm_size_t
) copy
->size
)) {
6777 kr
= KERN_INVALID_ADDRESS
;
6784 * If the target map is another map, assume the
6785 * target's address space identity for the duration
6788 vm_map_reference(map
);
6789 oldmap
= vm_map_switch(map
);
6791 assert((vm_size_t
) copy
->size
== copy
->size
);
6792 if (copyout(copy
->cpy_kdata
, *addr
, (vm_size_t
) copy
->size
)) {
6793 vm_map_copyout_kernel_buffer_failures
++;
6794 kr
= KERN_INVALID_ADDRESS
;
6797 (void) vm_map_switch(oldmap
);
6798 vm_map_deallocate(map
);
6801 if (kr
!= KERN_SUCCESS
) {
6802 /* the copy failed, clean up */
6805 * Deallocate the space we allocated in the target map.
6807 (void) vm_map_remove(map
,
6808 vm_map_trunc_page(*addr
),
6809 vm_map_round_page(*addr
+
6810 vm_map_round_page(copy
->size
)),
6815 /* copy was successful, dicard the copy structure */
6816 kfree(copy
, copy
->cpy_kalloc_size
);
6823 * Macro: vm_map_copy_insert
6826 * Link a copy chain ("copy") into a map at the
6827 * specified location (after "where").
6829 * The copy chain is destroyed.
6831 * The arguments are evaluated multiple times.
6833 #define vm_map_copy_insert(map, where, copy) \
6835 vm_map_t VMCI_map; \
6836 vm_map_entry_t VMCI_where; \
6837 vm_map_copy_t VMCI_copy; \
6839 VMCI_where = (where); \
6840 VMCI_copy = (copy); \
6841 ((VMCI_where->vme_next)->vme_prev = vm_map_copy_last_entry(VMCI_copy))\
6842 ->vme_next = (VMCI_where->vme_next); \
6843 ((VMCI_where)->vme_next = vm_map_copy_first_entry(VMCI_copy)) \
6844 ->vme_prev = VMCI_where; \
6845 VMCI_map->hdr.nentries += VMCI_copy->cpy_hdr.nentries; \
6846 UPDATE_FIRST_FREE(VMCI_map, VMCI_map->first_free); \
6847 zfree(vm_map_copy_zone, VMCI_copy); \
6851 * Routine: vm_map_copyout
6854 * Copy out a copy chain ("copy") into newly-allocated
6855 * space in the destination map.
6857 * If successful, consumes the copy object.
6858 * Otherwise, the caller is responsible for it.
6863 vm_map_address_t
*dst_addr
, /* OUT */
6867 vm_map_size_t adjustment
;
6868 vm_map_offset_t start
;
6869 vm_object_offset_t vm_copy_start
;
6870 vm_map_entry_t last
;
6872 vm_map_entry_t entry
;
6875 * Check for null copy object.
6878 if (copy
== VM_MAP_COPY_NULL
) {
6880 return(KERN_SUCCESS
);
6884 * Check for special copy object, created
6885 * by vm_map_copyin_object.
6888 if (copy
->type
== VM_MAP_COPY_OBJECT
) {
6889 vm_object_t object
= copy
->cpy_object
;
6891 vm_object_offset_t offset
;
6893 offset
= vm_object_trunc_page(copy
->offset
);
6894 size
= vm_map_round_page(copy
->size
+
6895 (vm_map_size_t
)(copy
->offset
- offset
));
6897 kr
= vm_map_enter(dst_map
, dst_addr
, size
,
6898 (vm_map_offset_t
) 0, VM_FLAGS_ANYWHERE
,
6899 object
, offset
, FALSE
,
6900 VM_PROT_DEFAULT
, VM_PROT_ALL
,
6901 VM_INHERIT_DEFAULT
);
6902 if (kr
!= KERN_SUCCESS
)
6904 /* Account for non-pagealigned copy object */
6905 *dst_addr
+= (vm_map_offset_t
)(copy
->offset
- offset
);
6906 zfree(vm_map_copy_zone
, copy
);
6907 return(KERN_SUCCESS
);
6911 * Check for special kernel buffer allocated
6912 * by new_ipc_kmsg_copyin.
6915 if (copy
->type
== VM_MAP_COPY_KERNEL_BUFFER
) {
6916 return(vm_map_copyout_kernel_buffer(dst_map
, dst_addr
,
6921 * Find space for the data
6924 vm_copy_start
= vm_object_trunc_page(copy
->offset
);
6925 size
= vm_map_round_page((vm_map_size_t
)copy
->offset
+ copy
->size
)
6930 vm_map_lock(dst_map
);
6931 assert(first_free_is_valid(dst_map
));
6932 start
= ((last
= dst_map
->first_free
) == vm_map_to_entry(dst_map
)) ?
6933 vm_map_min(dst_map
) : last
->vme_end
;
6936 vm_map_entry_t next
= last
->vme_next
;
6937 vm_map_offset_t end
= start
+ size
;
6939 if ((end
> dst_map
->max_offset
) || (end
< start
)) {
6940 if (dst_map
->wait_for_space
) {
6941 if (size
<= (dst_map
->max_offset
- dst_map
->min_offset
)) {
6942 assert_wait((event_t
) dst_map
,
6943 THREAD_INTERRUPTIBLE
);
6944 vm_map_unlock(dst_map
);
6945 thread_block(THREAD_CONTINUE_NULL
);
6949 vm_map_unlock(dst_map
);
6950 return(KERN_NO_SPACE
);
6953 if ((next
== vm_map_to_entry(dst_map
)) ||
6954 (next
->vme_start
>= end
))
6958 start
= last
->vme_end
;
6962 * Since we're going to just drop the map
6963 * entries from the copy into the destination
6964 * map, they must come from the same pool.
6967 if (copy
->cpy_hdr
.entries_pageable
!= dst_map
->hdr
.entries_pageable
) {
6969 * Mismatches occur when dealing with the default
6973 vm_map_entry_t next
, new;
6976 * Find the zone that the copies were allocated from
6978 old_zone
= (copy
->cpy_hdr
.entries_pageable
)
6980 : vm_map_kentry_zone
;
6981 entry
= vm_map_copy_first_entry(copy
);
6984 * Reinitialize the copy so that vm_map_copy_entry_link
6987 copy
->cpy_hdr
.nentries
= 0;
6988 copy
->cpy_hdr
.entries_pageable
= dst_map
->hdr
.entries_pageable
;
6989 vm_map_copy_first_entry(copy
) =
6990 vm_map_copy_last_entry(copy
) =
6991 vm_map_copy_to_entry(copy
);
6996 while (entry
!= vm_map_copy_to_entry(copy
)) {
6997 new = vm_map_copy_entry_create(copy
);
6998 vm_map_entry_copy_full(new, entry
);
6999 new->use_pmap
= FALSE
; /* clr address space specifics */
7000 vm_map_copy_entry_link(copy
,
7001 vm_map_copy_last_entry(copy
),
7003 next
= entry
->vme_next
;
7004 zfree(old_zone
, entry
);
7010 * Adjust the addresses in the copy chain, and
7011 * reset the region attributes.
7014 adjustment
= start
- vm_copy_start
;
7015 for (entry
= vm_map_copy_first_entry(copy
);
7016 entry
!= vm_map_copy_to_entry(copy
);
7017 entry
= entry
->vme_next
) {
7018 entry
->vme_start
+= adjustment
;
7019 entry
->vme_end
+= adjustment
;
7021 entry
->inheritance
= VM_INHERIT_DEFAULT
;
7022 entry
->protection
= VM_PROT_DEFAULT
;
7023 entry
->max_protection
= VM_PROT_ALL
;
7024 entry
->behavior
= VM_BEHAVIOR_DEFAULT
;
7027 * If the entry is now wired,
7028 * map the pages into the destination map.
7030 if (entry
->wired_count
!= 0) {
7031 register vm_map_offset_t va
;
7032 vm_object_offset_t offset
;
7033 register vm_object_t object
;
7037 object
= entry
->object
.vm_object
;
7038 offset
= entry
->offset
;
7039 va
= entry
->vme_start
;
7041 pmap_pageable(dst_map
->pmap
,
7046 while (va
< entry
->vme_end
) {
7047 register vm_page_t m
;
7050 * Look up the page in the object.
7051 * Assert that the page will be found in the
7054 * the object was newly created by
7055 * vm_object_copy_slowly, and has
7056 * copies of all of the pages from
7059 * the object was moved from the old
7060 * map entry; because the old map
7061 * entry was wired, all of the pages
7062 * were in the top-level object.
7063 * (XXX not true if we wire pages for
7066 vm_object_lock(object
);
7068 m
= vm_page_lookup(object
, offset
);
7069 if (m
== VM_PAGE_NULL
|| !VM_PAGE_WIRED(m
) ||
7071 panic("vm_map_copyout: wiring %p", m
);
7075 * The page is assumed to be wired here, so it
7076 * shouldn't be encrypted. Otherwise, we
7077 * couldn't enter it in the page table, since
7078 * we don't want the user to see the encrypted
7081 ASSERT_PAGE_DECRYPTED(m
);
7083 prot
= entry
->protection
;
7085 if (override_nx(dst_map
, entry
->alias
) && prot
)
7086 prot
|= VM_PROT_EXECUTE
;
7088 type_of_fault
= DBG_CACHE_HIT_FAULT
;
7090 vm_fault_enter(m
, dst_map
->pmap
, va
, prot
,
7091 VM_PAGE_WIRED(m
), FALSE
, FALSE
,
7094 vm_object_unlock(object
);
7096 offset
+= PAGE_SIZE_64
;
7103 * Correct the page alignment for the result
7106 *dst_addr
= start
+ (copy
->offset
- vm_copy_start
);
7109 * Update the hints and the map size
7112 SAVE_HINT_MAP_WRITE(dst_map
, vm_map_copy_last_entry(copy
));
7114 dst_map
->size
+= size
;
7120 vm_map_copy_insert(dst_map
, last
, copy
);
7122 vm_map_unlock(dst_map
);
7125 * XXX If wiring_required, call vm_map_pageable
7128 return(KERN_SUCCESS
);
7132 * Routine: vm_map_copyin
7135 * see vm_map_copyin_common. Exported via Unsupported.exports.
7139 #undef vm_map_copyin
7144 vm_map_address_t src_addr
,
7146 boolean_t src_destroy
,
7147 vm_map_copy_t
*copy_result
) /* OUT */
7149 return(vm_map_copyin_common(src_map
, src_addr
, len
, src_destroy
,
7150 FALSE
, copy_result
, FALSE
));
7154 * Routine: vm_map_copyin_common
7157 * Copy the specified region (src_addr, len) from the
7158 * source address space (src_map), possibly removing
7159 * the region from the source address space (src_destroy).
7162 * A vm_map_copy_t object (copy_result), suitable for
7163 * insertion into another address space (using vm_map_copyout),
7164 * copying over another address space region (using
7165 * vm_map_copy_overwrite). If the copy is unused, it
7166 * should be destroyed (using vm_map_copy_discard).
7168 * In/out conditions:
7169 * The source map should not be locked on entry.
7172 typedef struct submap_map
{
7173 vm_map_t parent_map
;
7174 vm_map_offset_t base_start
;
7175 vm_map_offset_t base_end
;
7176 vm_map_size_t base_len
;
7177 struct submap_map
*next
;
7181 vm_map_copyin_common(
7183 vm_map_address_t src_addr
,
7185 boolean_t src_destroy
,
7186 __unused boolean_t src_volatile
,
7187 vm_map_copy_t
*copy_result
, /* OUT */
7188 boolean_t use_maxprot
)
7190 vm_map_entry_t tmp_entry
; /* Result of last map lookup --
7191 * in multi-level lookup, this
7192 * entry contains the actual
7196 vm_map_entry_t new_entry
= VM_MAP_ENTRY_NULL
; /* Map entry for copy */
7198 vm_map_offset_t src_start
; /* Start of current entry --
7199 * where copy is taking place now
7201 vm_map_offset_t src_end
; /* End of entire region to be
7203 vm_map_offset_t src_base
;
7204 vm_map_t base_map
= src_map
;
7205 boolean_t map_share
=FALSE
;
7206 submap_map_t
*parent_maps
= NULL
;
7209 vm_map_copy_t copy
; /* Resulting copy */
7210 vm_map_address_t copy_addr
;
7213 * Check for copies of zero bytes.
7217 *copy_result
= VM_MAP_COPY_NULL
;
7218 return(KERN_SUCCESS
);
7222 * Check that the end address doesn't overflow
7224 src_end
= src_addr
+ len
;
7225 if (src_end
< src_addr
)
7226 return KERN_INVALID_ADDRESS
;
7229 * If the copy is sufficiently small, use a kernel buffer instead
7230 * of making a virtual copy. The theory being that the cost of
7231 * setting up VM (and taking C-O-W faults) dominates the copy costs
7232 * for small regions.
7234 if ((len
< msg_ool_size_small
) && !use_maxprot
)
7235 return vm_map_copyin_kernel_buffer(src_map
, src_addr
, len
,
7236 src_destroy
, copy_result
);
7239 * Compute (page aligned) start and end of region
7241 src_start
= vm_map_trunc_page(src_addr
);
7242 src_end
= vm_map_round_page(src_end
);
7244 XPR(XPR_VM_MAP
, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map
, src_addr
, len
, src_destroy
, 0);
7247 * Allocate a header element for the list.
7249 * Use the start and end in the header to
7250 * remember the endpoints prior to rounding.
7253 copy
= (vm_map_copy_t
) zalloc(vm_map_copy_zone
);
7254 vm_map_copy_first_entry(copy
) =
7255 vm_map_copy_last_entry(copy
) = vm_map_copy_to_entry(copy
);
7256 copy
->type
= VM_MAP_COPY_ENTRY_LIST
;
7257 copy
->cpy_hdr
.nentries
= 0;
7258 copy
->cpy_hdr
.entries_pageable
= TRUE
;
7260 copy
->offset
= src_addr
;
7263 new_entry
= vm_map_copy_entry_create(copy
);
7267 vm_map_unlock(src_map); \
7268 if(src_map != base_map) \
7269 vm_map_deallocate(src_map); \
7270 if (new_entry != VM_MAP_ENTRY_NULL) \
7271 vm_map_copy_entry_dispose(copy,new_entry); \
7272 vm_map_copy_discard(copy); \
7274 submap_map_t *_ptr; \
7276 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
7277 parent_maps=parent_maps->next; \
7278 if (_ptr->parent_map != base_map) \
7279 vm_map_deallocate(_ptr->parent_map); \
7280 kfree(_ptr, sizeof(submap_map_t)); \
7287 * Find the beginning of the region.
7290 vm_map_lock(src_map
);
7292 if (!vm_map_lookup_entry(src_map
, src_start
, &tmp_entry
))
7293 RETURN(KERN_INVALID_ADDRESS
);
7294 if(!tmp_entry
->is_sub_map
) {
7295 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
7297 /* set for later submap fix-up */
7298 copy_addr
= src_start
;
7301 * Go through entries until we get to the end.
7306 vm_map_entry_t src_entry
= tmp_entry
; /* Top-level entry */
7307 vm_map_size_t src_size
; /* Size of source
7308 * map entry (in both
7313 vm_object_t src_object
; /* Object to copy */
7314 vm_object_offset_t src_offset
;
7316 boolean_t src_needs_copy
; /* Should source map
7318 * for copy-on-write?
7321 boolean_t new_entry_needs_copy
; /* Will new entry be COW? */
7323 boolean_t was_wired
; /* Was source wired? */
7324 vm_map_version_t version
; /* Version before locks
7325 * dropped to make copy
7327 kern_return_t result
; /* Return value from
7328 * copy_strategically.
7330 while(tmp_entry
->is_sub_map
) {
7331 vm_map_size_t submap_len
;
7334 ptr
= (submap_map_t
*)kalloc(sizeof(submap_map_t
));
7335 ptr
->next
= parent_maps
;
7337 ptr
->parent_map
= src_map
;
7338 ptr
->base_start
= src_start
;
7339 ptr
->base_end
= src_end
;
7340 submap_len
= tmp_entry
->vme_end
- src_start
;
7341 if(submap_len
> (src_end
-src_start
))
7342 submap_len
= src_end
-src_start
;
7343 ptr
->base_len
= submap_len
;
7345 src_start
-= tmp_entry
->vme_start
;
7346 src_start
+= tmp_entry
->offset
;
7347 src_end
= src_start
+ submap_len
;
7348 src_map
= tmp_entry
->object
.sub_map
;
7349 vm_map_lock(src_map
);
7350 /* keep an outstanding reference for all maps in */
7351 /* the parents tree except the base map */
7352 vm_map_reference(src_map
);
7353 vm_map_unlock(ptr
->parent_map
);
7354 if (!vm_map_lookup_entry(
7355 src_map
, src_start
, &tmp_entry
))
7356 RETURN(KERN_INVALID_ADDRESS
);
7358 if(!tmp_entry
->is_sub_map
)
7359 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
7360 src_entry
= tmp_entry
;
7362 /* we are now in the lowest level submap... */
7364 if ((tmp_entry
->object
.vm_object
!= VM_OBJECT_NULL
) &&
7365 (tmp_entry
->object
.vm_object
->phys_contiguous
)) {
7366 /* This is not, supported for now.In future */
7367 /* we will need to detect the phys_contig */
7368 /* condition and then upgrade copy_slowly */
7369 /* to do physical copy from the device mem */
7370 /* based object. We can piggy-back off of */
7371 /* the was wired boolean to set-up the */
7372 /* proper handling */
7373 RETURN(KERN_PROTECTION_FAILURE
);
7376 * Create a new address map entry to hold the result.
7377 * Fill in the fields from the appropriate source entries.
7378 * We must unlock the source map to do this if we need
7379 * to allocate a map entry.
7381 if (new_entry
== VM_MAP_ENTRY_NULL
) {
7382 version
.main_timestamp
= src_map
->timestamp
;
7383 vm_map_unlock(src_map
);
7385 new_entry
= vm_map_copy_entry_create(copy
);
7387 vm_map_lock(src_map
);
7388 if ((version
.main_timestamp
+ 1) != src_map
->timestamp
) {
7389 if (!vm_map_lookup_entry(src_map
, src_start
,
7391 RETURN(KERN_INVALID_ADDRESS
);
7393 if (!tmp_entry
->is_sub_map
)
7394 vm_map_clip_start(src_map
, tmp_entry
, src_start
);
7395 continue; /* restart w/ new tmp_entry */
7400 * Verify that the region can be read.
7402 if (((src_entry
->protection
& VM_PROT_READ
) == VM_PROT_NONE
&&
7404 (src_entry
->max_protection
& VM_PROT_READ
) == 0)
7405 RETURN(KERN_PROTECTION_FAILURE
);
7408 * Clip against the endpoints of the entire region.
7411 vm_map_clip_end(src_map
, src_entry
, src_end
);
7413 src_size
= src_entry
->vme_end
- src_start
;
7414 src_object
= src_entry
->object
.vm_object
;
7415 src_offset
= src_entry
->offset
;
7416 was_wired
= (src_entry
->wired_count
!= 0);
7418 vm_map_entry_copy(new_entry
, src_entry
);
7419 new_entry
->use_pmap
= FALSE
; /* clr address space specifics */
7422 * Attempt non-blocking copy-on-write optimizations.
7426 (src_object
== VM_OBJECT_NULL
||
7427 (src_object
->internal
&& !src_object
->true_share
7430 * If we are destroying the source, and the object
7431 * is internal, we can move the object reference
7432 * from the source to the copy. The copy is
7433 * copy-on-write only if the source is.
7434 * We make another reference to the object, because
7435 * destroying the source entry will deallocate it.
7437 vm_object_reference(src_object
);
7440 * Copy is always unwired. vm_map_copy_entry
7441 * set its wired count to zero.
7444 goto CopySuccessful
;
7449 XPR(XPR_VM_MAP
, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
7450 src_object
, new_entry
, new_entry
->object
.vm_object
,
7452 if ((src_object
== VM_OBJECT_NULL
||
7453 (!was_wired
&& !map_share
&& !tmp_entry
->is_shared
)) &&
7454 vm_object_copy_quickly(
7455 &new_entry
->object
.vm_object
,
7459 &new_entry_needs_copy
)) {
7461 new_entry
->needs_copy
= new_entry_needs_copy
;
7464 * Handle copy-on-write obligations
7467 if (src_needs_copy
&& !tmp_entry
->needs_copy
) {
7470 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
7472 if (override_nx(src_map
, src_entry
->alias
) && prot
)
7473 prot
|= VM_PROT_EXECUTE
;
7475 vm_object_pmap_protect(
7479 (src_entry
->is_shared
?
7482 src_entry
->vme_start
,
7485 tmp_entry
->needs_copy
= TRUE
;
7489 * The map has never been unlocked, so it's safe
7490 * to move to the next entry rather than doing
7494 goto CopySuccessful
;
7498 * Take an object reference, so that we may
7499 * release the map lock(s).
7502 assert(src_object
!= VM_OBJECT_NULL
);
7503 vm_object_reference(src_object
);
7506 * Record the timestamp for later verification.
7510 version
.main_timestamp
= src_map
->timestamp
;
7511 vm_map_unlock(src_map
); /* Increments timestamp once! */
7519 vm_object_lock(src_object
);
7520 result
= vm_object_copy_slowly(
7525 &new_entry
->object
.vm_object
);
7526 new_entry
->offset
= 0;
7527 new_entry
->needs_copy
= FALSE
;
7530 else if (src_object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
&&
7531 (tmp_entry
->is_shared
|| map_share
)) {
7532 vm_object_t new_object
;
7534 vm_object_lock_shared(src_object
);
7535 new_object
= vm_object_copy_delayed(
7540 if (new_object
== VM_OBJECT_NULL
)
7543 new_entry
->object
.vm_object
= new_object
;
7544 new_entry
->needs_copy
= TRUE
;
7545 result
= KERN_SUCCESS
;
7548 result
= vm_object_copy_strategically(src_object
,
7551 &new_entry
->object
.vm_object
,
7553 &new_entry_needs_copy
);
7555 new_entry
->needs_copy
= new_entry_needs_copy
;
7558 if (result
!= KERN_SUCCESS
&&
7559 result
!= KERN_MEMORY_RESTART_COPY
) {
7560 vm_map_lock(src_map
);
7565 * Throw away the extra reference
7568 vm_object_deallocate(src_object
);
7571 * Verify that the map has not substantially
7572 * changed while the copy was being made.
7575 vm_map_lock(src_map
);
7577 if ((version
.main_timestamp
+ 1) == src_map
->timestamp
)
7578 goto VerificationSuccessful
;
7581 * Simple version comparison failed.
7583 * Retry the lookup and verify that the
7584 * same object/offset are still present.
7586 * [Note: a memory manager that colludes with
7587 * the calling task can detect that we have
7588 * cheated. While the map was unlocked, the
7589 * mapping could have been changed and restored.]
7592 if (!vm_map_lookup_entry(src_map
, src_start
, &tmp_entry
)) {
7593 RETURN(KERN_INVALID_ADDRESS
);
7596 src_entry
= tmp_entry
;
7597 vm_map_clip_start(src_map
, src_entry
, src_start
);
7599 if ((((src_entry
->protection
& VM_PROT_READ
) == VM_PROT_NONE
) &&
7601 ((src_entry
->max_protection
& VM_PROT_READ
) == 0))
7602 goto VerificationFailed
;
7604 if (src_entry
->vme_end
< new_entry
->vme_end
)
7605 src_size
= (new_entry
->vme_end
= src_entry
->vme_end
) - src_start
;
7607 if ((src_entry
->object
.vm_object
!= src_object
) ||
7608 (src_entry
->offset
!= src_offset
) ) {
7611 * Verification failed.
7613 * Start over with this top-level entry.
7616 VerificationFailed
: ;
7618 vm_object_deallocate(new_entry
->object
.vm_object
);
7619 tmp_entry
= src_entry
;
7624 * Verification succeeded.
7627 VerificationSuccessful
: ;
7629 if (result
== KERN_MEMORY_RESTART_COPY
)
7639 * Link in the new copy entry.
7642 vm_map_copy_entry_link(copy
, vm_map_copy_last_entry(copy
),
7646 * Determine whether the entire region
7649 src_base
= src_start
;
7650 src_start
= new_entry
->vme_end
;
7651 new_entry
= VM_MAP_ENTRY_NULL
;
7652 while ((src_start
>= src_end
) && (src_end
!= 0)) {
7653 if (src_map
!= base_map
) {
7657 assert(ptr
!= NULL
);
7658 parent_maps
= parent_maps
->next
;
7660 /* fix up the damage we did in that submap */
7661 vm_map_simplify_range(src_map
,
7665 vm_map_unlock(src_map
);
7666 vm_map_deallocate(src_map
);
7667 vm_map_lock(ptr
->parent_map
);
7668 src_map
= ptr
->parent_map
;
7669 src_base
= ptr
->base_start
;
7670 src_start
= ptr
->base_start
+ ptr
->base_len
;
7671 src_end
= ptr
->base_end
;
7672 if ((src_end
> src_start
) &&
7673 !vm_map_lookup_entry(
7674 src_map
, src_start
, &tmp_entry
))
7675 RETURN(KERN_INVALID_ADDRESS
);
7676 kfree(ptr
, sizeof(submap_map_t
));
7677 if(parent_maps
== NULL
)
7679 src_entry
= tmp_entry
->vme_prev
;
7683 if ((src_start
>= src_end
) && (src_end
!= 0))
7687 * Verify that there are no gaps in the region
7690 tmp_entry
= src_entry
->vme_next
;
7691 if ((tmp_entry
->vme_start
!= src_start
) ||
7692 (tmp_entry
== vm_map_to_entry(src_map
)))
7693 RETURN(KERN_INVALID_ADDRESS
);
7697 * If the source should be destroyed, do it now, since the
7698 * copy was successful.
7701 (void) vm_map_delete(src_map
,
7702 vm_map_trunc_page(src_addr
),
7704 (src_map
== kernel_map
) ?
7705 VM_MAP_REMOVE_KUNWIRE
:
7709 /* fix up the damage we did in the base map */
7710 vm_map_simplify_range(src_map
,
7711 vm_map_trunc_page(src_addr
),
7712 vm_map_round_page(src_end
));
7715 vm_map_unlock(src_map
);
7717 /* Fix-up start and end points in copy. This is necessary */
7718 /* when the various entries in the copy object were picked */
7719 /* up from different sub-maps */
7721 tmp_entry
= vm_map_copy_first_entry(copy
);
7722 while (tmp_entry
!= vm_map_copy_to_entry(copy
)) {
7723 tmp_entry
->vme_end
= copy_addr
+
7724 (tmp_entry
->vme_end
- tmp_entry
->vme_start
);
7725 tmp_entry
->vme_start
= copy_addr
;
7726 copy_addr
+= tmp_entry
->vme_end
- tmp_entry
->vme_start
;
7727 tmp_entry
= (struct vm_map_entry
*)tmp_entry
->vme_next
;
7730 *copy_result
= copy
;
7731 return(KERN_SUCCESS
);
7737 * vm_map_copyin_object:
7739 * Create a copy object from an object.
7740 * Our caller donates an object reference.
7744 vm_map_copyin_object(
7746 vm_object_offset_t offset
, /* offset of region in object */
7747 vm_object_size_t size
, /* size of region in object */
7748 vm_map_copy_t
*copy_result
) /* OUT */
7750 vm_map_copy_t copy
; /* Resulting copy */
7753 * We drop the object into a special copy object
7754 * that contains the object directly.
7757 copy
= (vm_map_copy_t
) zalloc(vm_map_copy_zone
);
7758 copy
->type
= VM_MAP_COPY_OBJECT
;
7759 copy
->cpy_object
= object
;
7760 copy
->offset
= offset
;
7763 *copy_result
= copy
;
7764 return(KERN_SUCCESS
);
7770 vm_map_entry_t old_entry
,
7774 vm_map_entry_t new_entry
;
7777 * New sharing code. New map entry
7778 * references original object. Internal
7779 * objects use asynchronous copy algorithm for
7780 * future copies. First make sure we have
7781 * the right object. If we need a shadow,
7782 * or someone else already has one, then
7783 * make a new shadow and share it.
7786 object
= old_entry
->object
.vm_object
;
7787 if (old_entry
->is_sub_map
) {
7788 assert(old_entry
->wired_count
== 0);
7789 #ifndef NO_NESTED_PMAP
7790 if(old_entry
->use_pmap
) {
7791 kern_return_t result
;
7793 result
= pmap_nest(new_map
->pmap
,
7794 (old_entry
->object
.sub_map
)->pmap
,
7795 (addr64_t
)old_entry
->vme_start
,
7796 (addr64_t
)old_entry
->vme_start
,
7797 (uint64_t)(old_entry
->vme_end
- old_entry
->vme_start
));
7799 panic("vm_map_fork_share: pmap_nest failed!");
7801 #endif /* NO_NESTED_PMAP */
7802 } else if (object
== VM_OBJECT_NULL
) {
7803 object
= vm_object_allocate((vm_map_size_t
)(old_entry
->vme_end
-
7804 old_entry
->vme_start
));
7805 old_entry
->offset
= 0;
7806 old_entry
->object
.vm_object
= object
;
7807 assert(!old_entry
->needs_copy
);
7808 } else if (object
->copy_strategy
!=
7809 MEMORY_OBJECT_COPY_SYMMETRIC
) {
7812 * We are already using an asymmetric
7813 * copy, and therefore we already have
7817 assert(! old_entry
->needs_copy
);
7819 else if (old_entry
->needs_copy
|| /* case 1 */
7820 object
->shadowed
|| /* case 2 */
7821 (!object
->true_share
&& /* case 3 */
7822 !old_entry
->is_shared
&&
7824 (vm_map_size_t
)(old_entry
->vme_end
-
7825 old_entry
->vme_start
)))) {
7828 * We need to create a shadow.
7829 * There are three cases here.
7830 * In the first case, we need to
7831 * complete a deferred symmetrical
7832 * copy that we participated in.
7833 * In the second and third cases,
7834 * we need to create the shadow so
7835 * that changes that we make to the
7836 * object do not interfere with
7837 * any symmetrical copies which
7838 * have occured (case 2) or which
7839 * might occur (case 3).
7841 * The first case is when we had
7842 * deferred shadow object creation
7843 * via the entry->needs_copy mechanism.
7844 * This mechanism only works when
7845 * only one entry points to the source
7846 * object, and we are about to create
7847 * a second entry pointing to the
7848 * same object. The problem is that
7849 * there is no way of mapping from
7850 * an object to the entries pointing
7851 * to it. (Deferred shadow creation
7852 * works with one entry because occurs
7853 * at fault time, and we walk from the
7854 * entry to the object when handling
7857 * The second case is when the object
7858 * to be shared has already been copied
7859 * with a symmetric copy, but we point
7860 * directly to the object without
7861 * needs_copy set in our entry. (This
7862 * can happen because different ranges
7863 * of an object can be pointed to by
7864 * different entries. In particular,
7865 * a single entry pointing to an object
7866 * can be split by a call to vm_inherit,
7867 * which, combined with task_create, can
7868 * result in the different entries
7869 * having different needs_copy values.)
7870 * The shadowed flag in the object allows
7871 * us to detect this case. The problem
7872 * with this case is that if this object
7873 * has or will have shadows, then we
7874 * must not perform an asymmetric copy
7875 * of this object, since such a copy
7876 * allows the object to be changed, which
7877 * will break the previous symmetrical
7878 * copies (which rely upon the object
7879 * not changing). In a sense, the shadowed
7880 * flag says "don't change this object".
7881 * We fix this by creating a shadow
7882 * object for this object, and sharing
7883 * that. This works because we are free
7884 * to change the shadow object (and thus
7885 * to use an asymmetric copy strategy);
7886 * this is also semantically correct,
7887 * since this object is temporary, and
7888 * therefore a copy of the object is
7889 * as good as the object itself. (This
7890 * is not true for permanent objects,
7891 * since the pager needs to see changes,
7892 * which won't happen if the changes
7893 * are made to a copy.)
7895 * The third case is when the object
7896 * to be shared has parts sticking
7897 * outside of the entry we're working
7898 * with, and thus may in the future
7899 * be subject to a symmetrical copy.
7900 * (This is a preemptive version of
7904 vm_object_shadow(&old_entry
->object
.vm_object
,
7906 (vm_map_size_t
) (old_entry
->vme_end
-
7907 old_entry
->vme_start
));
7910 * If we're making a shadow for other than
7911 * copy on write reasons, then we have
7912 * to remove write permission.
7915 if (!old_entry
->needs_copy
&&
7916 (old_entry
->protection
& VM_PROT_WRITE
)) {
7919 prot
= old_entry
->protection
& ~VM_PROT_WRITE
;
7921 if (override_nx(old_map
, old_entry
->alias
) && prot
)
7922 prot
|= VM_PROT_EXECUTE
;
7924 if (old_map
->mapped
) {
7925 vm_object_pmap_protect(
7926 old_entry
->object
.vm_object
,
7928 (old_entry
->vme_end
-
7929 old_entry
->vme_start
),
7931 old_entry
->vme_start
,
7934 pmap_protect(old_map
->pmap
,
7935 old_entry
->vme_start
,
7941 old_entry
->needs_copy
= FALSE
;
7942 object
= old_entry
->object
.vm_object
;
7946 * If object was using a symmetric copy strategy,
7947 * change its copy strategy to the default
7948 * asymmetric copy strategy, which is copy_delay
7949 * in the non-norma case and copy_call in the
7950 * norma case. Bump the reference count for the
7954 if(old_entry
->is_sub_map
) {
7955 vm_map_lock(old_entry
->object
.sub_map
);
7956 vm_map_reference(old_entry
->object
.sub_map
);
7957 vm_map_unlock(old_entry
->object
.sub_map
);
7959 vm_object_lock(object
);
7960 vm_object_reference_locked(object
);
7961 if (object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
) {
7962 object
->copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
7964 vm_object_unlock(object
);
7968 * Clone the entry, using object ref from above.
7969 * Mark both entries as shared.
7972 new_entry
= vm_map_entry_create(new_map
);
7973 vm_map_entry_copy(new_entry
, old_entry
);
7974 old_entry
->is_shared
= TRUE
;
7975 new_entry
->is_shared
= TRUE
;
7978 * Insert the entry into the new map -- we
7979 * know we're inserting at the end of the new
7983 vm_map_entry_link(new_map
, vm_map_last_entry(new_map
), new_entry
);
7986 * Update the physical map
7989 if (old_entry
->is_sub_map
) {
7990 /* Bill Angell pmap support goes here */
7992 pmap_copy(new_map
->pmap
, old_map
->pmap
, new_entry
->vme_start
,
7993 old_entry
->vme_end
- old_entry
->vme_start
,
7994 old_entry
->vme_start
);
8001 vm_map_entry_t
*old_entry_p
,
8004 vm_map_entry_t old_entry
= *old_entry_p
;
8005 vm_map_size_t entry_size
= old_entry
->vme_end
- old_entry
->vme_start
;
8006 vm_map_offset_t start
= old_entry
->vme_start
;
8008 vm_map_entry_t last
= vm_map_last_entry(new_map
);
8010 vm_map_unlock(old_map
);
8012 * Use maxprot version of copyin because we
8013 * care about whether this memory can ever
8014 * be accessed, not just whether it's accessible
8017 if (vm_map_copyin_maxprot(old_map
, start
, entry_size
, FALSE
, ©
)
8020 * The map might have changed while it
8021 * was unlocked, check it again. Skip
8022 * any blank space or permanently
8023 * unreadable region.
8025 vm_map_lock(old_map
);
8026 if (!vm_map_lookup_entry(old_map
, start
, &last
) ||
8027 (last
->max_protection
& VM_PROT_READ
) == VM_PROT_NONE
) {
8028 last
= last
->vme_next
;
8030 *old_entry_p
= last
;
8033 * XXX For some error returns, want to
8034 * XXX skip to the next element. Note
8035 * that INVALID_ADDRESS and
8036 * PROTECTION_FAILURE are handled above.
8043 * Insert the copy into the new map
8046 vm_map_copy_insert(new_map
, last
, copy
);
8049 * Pick up the traversal at the end of
8050 * the copied region.
8053 vm_map_lock(old_map
);
8054 start
+= entry_size
;
8055 if (! vm_map_lookup_entry(old_map
, start
, &last
)) {
8056 last
= last
->vme_next
;
8058 if (last
->vme_start
== start
) {
8060 * No need to clip here and we don't
8061 * want to cause any unnecessary
8065 vm_map_clip_start(old_map
, last
, start
);
8068 *old_entry_p
= last
;
8076 * Create and return a new map based on the old
8077 * map, according to the inheritance values on the
8078 * regions in that map.
8080 * The source map must not be locked.
8088 vm_map_entry_t old_entry
;
8089 vm_map_size_t new_size
= 0, entry_size
;
8090 vm_map_entry_t new_entry
;
8091 boolean_t src_needs_copy
;
8092 boolean_t new_entry_needs_copy
;
8094 new_pmap
= pmap_create((vm_map_size_t
) 0,
8095 #if defined(__i386__) || defined(__x86_64__)
8096 old_map
->pmap
->pm_task_map
!= TASK_MAP_32BIT
8101 #if defined(__i386__)
8102 if (old_map
->pmap
->pm_task_map
== TASK_MAP_64BIT_SHARED
)
8103 pmap_set_4GB_pagezero(new_pmap
);
8106 vm_map_reference_swap(old_map
);
8107 vm_map_lock(old_map
);
8109 new_map
= vm_map_create(new_pmap
,
8110 old_map
->min_offset
,
8111 old_map
->max_offset
,
8112 old_map
->hdr
.entries_pageable
);
8115 old_entry
= vm_map_first_entry(old_map
);
8116 old_entry
!= vm_map_to_entry(old_map
);
8119 entry_size
= old_entry
->vme_end
- old_entry
->vme_start
;
8121 switch (old_entry
->inheritance
) {
8122 case VM_INHERIT_NONE
:
8125 case VM_INHERIT_SHARE
:
8126 vm_map_fork_share(old_map
, old_entry
, new_map
);
8127 new_size
+= entry_size
;
8130 case VM_INHERIT_COPY
:
8133 * Inline the copy_quickly case;
8134 * upon failure, fall back on call
8135 * to vm_map_fork_copy.
8138 if(old_entry
->is_sub_map
)
8140 if ((old_entry
->wired_count
!= 0) ||
8141 ((old_entry
->object
.vm_object
!= NULL
) &&
8142 (old_entry
->object
.vm_object
->true_share
))) {
8143 goto slow_vm_map_fork_copy
;
8146 new_entry
= vm_map_entry_create(new_map
);
8147 vm_map_entry_copy(new_entry
, old_entry
);
8148 /* clear address space specifics */
8149 new_entry
->use_pmap
= FALSE
;
8151 if (! vm_object_copy_quickly(
8152 &new_entry
->object
.vm_object
,
8154 (old_entry
->vme_end
-
8155 old_entry
->vme_start
),
8157 &new_entry_needs_copy
)) {
8158 vm_map_entry_dispose(new_map
, new_entry
);
8159 goto slow_vm_map_fork_copy
;
8163 * Handle copy-on-write obligations
8166 if (src_needs_copy
&& !old_entry
->needs_copy
) {
8169 prot
= old_entry
->protection
& ~VM_PROT_WRITE
;
8171 if (override_nx(old_map
, old_entry
->alias
) && prot
)
8172 prot
|= VM_PROT_EXECUTE
;
8174 vm_object_pmap_protect(
8175 old_entry
->object
.vm_object
,
8177 (old_entry
->vme_end
-
8178 old_entry
->vme_start
),
8179 ((old_entry
->is_shared
8183 old_entry
->vme_start
,
8186 old_entry
->needs_copy
= TRUE
;
8188 new_entry
->needs_copy
= new_entry_needs_copy
;
8191 * Insert the entry at the end
8195 vm_map_entry_link(new_map
, vm_map_last_entry(new_map
),
8197 new_size
+= entry_size
;
8200 slow_vm_map_fork_copy
:
8201 if (vm_map_fork_copy(old_map
, &old_entry
, new_map
)) {
8202 new_size
+= entry_size
;
8206 old_entry
= old_entry
->vme_next
;
8209 new_map
->size
= new_size
;
8210 vm_map_unlock(old_map
);
8211 vm_map_deallocate(old_map
);
8219 * Setup the "new_map" with the proper execution environment according
8220 * to the type of executable (platform, 64bit, chroot environment).
8221 * Map the comm page and shared region, etc...
8230 SHARED_REGION_TRACE_DEBUG(
8231 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
8232 current_task(), new_map
, task
, fsroot
, cpu
));
8233 (void) vm_commpage_enter(new_map
, task
);
8234 (void) vm_shared_region_enter(new_map
, task
, fsroot
, cpu
);
8235 SHARED_REGION_TRACE_DEBUG(
8236 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
8237 current_task(), new_map
, task
, fsroot
, cpu
));
8238 return KERN_SUCCESS
;
8242 * vm_map_lookup_locked:
8244 * Finds the VM object, offset, and
8245 * protection for a given virtual address in the
8246 * specified map, assuming a page fault of the
8249 * Returns the (object, offset, protection) for
8250 * this address, whether it is wired down, and whether
8251 * this map has the only reference to the data in question.
8252 * In order to later verify this lookup, a "version"
8255 * The map MUST be locked by the caller and WILL be
8256 * locked on exit. In order to guarantee the
8257 * existence of the returned object, it is returned
8260 * If a lookup is requested with "write protection"
8261 * specified, the map may be changed to perform virtual
8262 * copying operations, although the data referenced will
8266 vm_map_lookup_locked(
8267 vm_map_t
*var_map
, /* IN/OUT */
8268 vm_map_offset_t vaddr
,
8269 vm_prot_t fault_type
,
8270 int object_lock_type
,
8271 vm_map_version_t
*out_version
, /* OUT */
8272 vm_object_t
*object
, /* OUT */
8273 vm_object_offset_t
*offset
, /* OUT */
8274 vm_prot_t
*out_prot
, /* OUT */
8275 boolean_t
*wired
, /* OUT */
8276 vm_object_fault_info_t fault_info
, /* OUT */
8279 vm_map_entry_t entry
;
8280 register vm_map_t map
= *var_map
;
8281 vm_map_t old_map
= *var_map
;
8282 vm_map_t cow_sub_map_parent
= VM_MAP_NULL
;
8283 vm_map_offset_t cow_parent_vaddr
= 0;
8284 vm_map_offset_t old_start
= 0;
8285 vm_map_offset_t old_end
= 0;
8286 register vm_prot_t prot
;
8292 * If the map has an interesting hint, try it before calling
8293 * full blown lookup routine.
8297 if ((entry
== vm_map_to_entry(map
)) ||
8298 (vaddr
< entry
->vme_start
) || (vaddr
>= entry
->vme_end
)) {
8299 vm_map_entry_t tmp_entry
;
8302 * Entry was either not a valid hint, or the vaddr
8303 * was not contained in the entry, so do a full lookup.
8305 if (!vm_map_lookup_entry(map
, vaddr
, &tmp_entry
)) {
8306 if((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
))
8307 vm_map_unlock(cow_sub_map_parent
);
8308 if((*real_map
!= map
)
8309 && (*real_map
!= cow_sub_map_parent
))
8310 vm_map_unlock(*real_map
);
8311 return KERN_INVALID_ADDRESS
;
8316 if(map
== old_map
) {
8317 old_start
= entry
->vme_start
;
8318 old_end
= entry
->vme_end
;
8322 * Handle submaps. Drop lock on upper map, submap is
8327 if (entry
->is_sub_map
) {
8328 vm_map_offset_t local_vaddr
;
8329 vm_map_offset_t end_delta
;
8330 vm_map_offset_t start_delta
;
8331 vm_map_entry_t submap_entry
;
8332 boolean_t mapped_needs_copy
=FALSE
;
8334 local_vaddr
= vaddr
;
8336 if ((entry
->use_pmap
&& !(fault_type
& VM_PROT_WRITE
))) {
8337 /* if real_map equals map we unlock below */
8338 if ((*real_map
!= map
) &&
8339 (*real_map
!= cow_sub_map_parent
))
8340 vm_map_unlock(*real_map
);
8341 *real_map
= entry
->object
.sub_map
;
8344 if(entry
->needs_copy
&& (fault_type
& VM_PROT_WRITE
)) {
8345 if (!mapped_needs_copy
) {
8346 if (vm_map_lock_read_to_write(map
)) {
8347 vm_map_lock_read(map
);
8348 /* XXX FBDP: entry still valid ? */
8349 if(*real_map
== entry
->object
.sub_map
)
8353 vm_map_lock_read(entry
->object
.sub_map
);
8354 cow_sub_map_parent
= map
;
8355 /* reset base to map before cow object */
8356 /* this is the map which will accept */
8357 /* the new cow object */
8358 old_start
= entry
->vme_start
;
8359 old_end
= entry
->vme_end
;
8360 cow_parent_vaddr
= vaddr
;
8361 mapped_needs_copy
= TRUE
;
8363 vm_map_lock_read(entry
->object
.sub_map
);
8364 if((cow_sub_map_parent
!= map
) &&
8369 vm_map_lock_read(entry
->object
.sub_map
);
8370 /* leave map locked if it is a target */
8371 /* cow sub_map above otherwise, just */
8372 /* follow the maps down to the object */
8373 /* here we unlock knowing we are not */
8374 /* revisiting the map. */
8375 if((*real_map
!= map
) && (map
!= cow_sub_map_parent
))
8376 vm_map_unlock_read(map
);
8379 /* XXX FBDP: map has been unlocked, what protects "entry" !? */
8380 *var_map
= map
= entry
->object
.sub_map
;
8382 /* calculate the offset in the submap for vaddr */
8383 local_vaddr
= (local_vaddr
- entry
->vme_start
) + entry
->offset
;
8386 if(!vm_map_lookup_entry(map
, local_vaddr
, &submap_entry
)) {
8387 if((cow_sub_map_parent
) && (cow_sub_map_parent
!= map
)){
8388 vm_map_unlock(cow_sub_map_parent
);
8390 if((*real_map
!= map
)
8391 && (*real_map
!= cow_sub_map_parent
)) {
8392 vm_map_unlock(*real_map
);
8395 return KERN_INVALID_ADDRESS
;
8398 /* find the attenuated shadow of the underlying object */
8399 /* on our target map */
8401 /* in english the submap object may extend beyond the */
8402 /* region mapped by the entry or, may only fill a portion */
8403 /* of it. For our purposes, we only care if the object */
8404 /* doesn't fill. In this case the area which will */
8405 /* ultimately be clipped in the top map will only need */
8406 /* to be as big as the portion of the underlying entry */
8407 /* which is mapped */
8408 start_delta
= submap_entry
->vme_start
> entry
->offset
?
8409 submap_entry
->vme_start
- entry
->offset
: 0;
8412 (entry
->offset
+ start_delta
+ (old_end
- old_start
)) <=
8413 submap_entry
->vme_end
?
8414 0 : (entry
->offset
+
8415 (old_end
- old_start
))
8416 - submap_entry
->vme_end
;
8418 old_start
+= start_delta
;
8419 old_end
-= end_delta
;
8421 if(submap_entry
->is_sub_map
) {
8422 entry
= submap_entry
;
8423 vaddr
= local_vaddr
;
8424 goto submap_recurse
;
8427 if(((fault_type
& VM_PROT_WRITE
) && cow_sub_map_parent
)) {
8429 vm_object_t sub_object
, copy_object
;
8430 vm_object_offset_t copy_offset
;
8431 vm_map_offset_t local_start
;
8432 vm_map_offset_t local_end
;
8433 boolean_t copied_slowly
= FALSE
;
8435 if (vm_map_lock_read_to_write(map
)) {
8436 vm_map_lock_read(map
);
8437 old_start
-= start_delta
;
8438 old_end
+= end_delta
;
8443 sub_object
= submap_entry
->object
.vm_object
;
8444 if (sub_object
== VM_OBJECT_NULL
) {
8448 (submap_entry
->vme_end
-
8449 submap_entry
->vme_start
));
8450 submap_entry
->object
.vm_object
= sub_object
;
8451 submap_entry
->offset
= 0;
8453 local_start
= local_vaddr
-
8454 (cow_parent_vaddr
- old_start
);
8455 local_end
= local_vaddr
+
8456 (old_end
- cow_parent_vaddr
);
8457 vm_map_clip_start(map
, submap_entry
, local_start
);
8458 vm_map_clip_end(map
, submap_entry
, local_end
);
8459 /* unnesting was done in vm_map_clip_start/end() */
8460 assert(!submap_entry
->use_pmap
);
8462 /* This is the COW case, lets connect */
8463 /* an entry in our space to the underlying */
8464 /* object in the submap, bypassing the */
8468 if(submap_entry
->wired_count
!= 0 ||
8469 (sub_object
->copy_strategy
==
8470 MEMORY_OBJECT_COPY_NONE
)) {
8471 vm_object_lock(sub_object
);
8472 vm_object_copy_slowly(sub_object
,
8473 submap_entry
->offset
,
8474 (submap_entry
->vme_end
-
8475 submap_entry
->vme_start
),
8478 copied_slowly
= TRUE
;
8481 /* set up shadow object */
8482 copy_object
= sub_object
;
8483 vm_object_reference(copy_object
);
8484 sub_object
->shadowed
= TRUE
;
8485 submap_entry
->needs_copy
= TRUE
;
8487 prot
= submap_entry
->protection
& ~VM_PROT_WRITE
;
8489 if (override_nx(map
, submap_entry
->alias
) && prot
)
8490 prot
|= VM_PROT_EXECUTE
;
8492 vm_object_pmap_protect(
8494 submap_entry
->offset
,
8495 submap_entry
->vme_end
-
8496 submap_entry
->vme_start
,
8497 (submap_entry
->is_shared
8499 PMAP_NULL
: map
->pmap
,
8500 submap_entry
->vme_start
,
8505 * Adjust the fault offset to the submap entry.
8507 copy_offset
= (local_vaddr
-
8508 submap_entry
->vme_start
+
8509 submap_entry
->offset
);
8511 /* This works diffently than the */
8512 /* normal submap case. We go back */
8513 /* to the parent of the cow map and*/
8514 /* clip out the target portion of */
8515 /* the sub_map, substituting the */
8516 /* new copy object, */
8519 local_start
= old_start
;
8520 local_end
= old_end
;
8521 map
= cow_sub_map_parent
;
8522 *var_map
= cow_sub_map_parent
;
8523 vaddr
= cow_parent_vaddr
;
8524 cow_sub_map_parent
= NULL
;
8526 if(!vm_map_lookup_entry(map
,
8528 vm_object_deallocate(
8530 vm_map_lock_write_to_read(map
);
8531 return KERN_INVALID_ADDRESS
;
8534 /* clip out the portion of space */
8535 /* mapped by the sub map which */
8536 /* corresponds to the underlying */
8540 * Clip (and unnest) the smallest nested chunk
8541 * possible around the faulting address...
8543 local_start
= vaddr
& ~(pmap_nesting_size_min
- 1);
8544 local_end
= local_start
+ pmap_nesting_size_min
;
8546 * ... but don't go beyond the "old_start" to "old_end"
8547 * range, to avoid spanning over another VM region
8548 * with a possibly different VM object and/or offset.
8550 if (local_start
< old_start
) {
8551 local_start
= old_start
;
8553 if (local_end
> old_end
) {
8554 local_end
= old_end
;
8557 * Adjust copy_offset to the start of the range.
8559 copy_offset
-= (vaddr
- local_start
);
8561 vm_map_clip_start(map
, entry
, local_start
);
8562 vm_map_clip_end(map
, entry
, local_end
);
8563 /* unnesting was done in vm_map_clip_start/end() */
8564 assert(!entry
->use_pmap
);
8566 /* substitute copy object for */
8567 /* shared map entry */
8568 vm_map_deallocate(entry
->object
.sub_map
);
8569 entry
->is_sub_map
= FALSE
;
8570 entry
->object
.vm_object
= copy_object
;
8572 /* propagate the submap entry's protections */
8573 entry
->protection
|= submap_entry
->protection
;
8574 entry
->max_protection
|= submap_entry
->max_protection
;
8577 entry
->offset
= local_start
- old_start
;
8578 entry
->needs_copy
= FALSE
;
8579 entry
->is_shared
= FALSE
;
8581 entry
->offset
= copy_offset
;
8582 entry
->needs_copy
= TRUE
;
8583 if(entry
->inheritance
== VM_INHERIT_SHARE
)
8584 entry
->inheritance
= VM_INHERIT_COPY
;
8586 entry
->is_shared
= TRUE
;
8588 if(entry
->inheritance
== VM_INHERIT_SHARE
)
8589 entry
->inheritance
= VM_INHERIT_COPY
;
8591 vm_map_lock_write_to_read(map
);
8593 if((cow_sub_map_parent
)
8594 && (cow_sub_map_parent
!= *real_map
)
8595 && (cow_sub_map_parent
!= map
)) {
8596 vm_map_unlock(cow_sub_map_parent
);
8598 entry
= submap_entry
;
8599 vaddr
= local_vaddr
;
8604 * Check whether this task is allowed to have
8608 prot
= entry
->protection
;
8610 if (override_nx(map
, entry
->alias
) && prot
) {
8612 * HACK -- if not a stack, then allow execution
8614 prot
|= VM_PROT_EXECUTE
;
8617 if ((fault_type
& (prot
)) != fault_type
) {
8618 if (*real_map
!= map
) {
8619 vm_map_unlock(*real_map
);
8623 if ((fault_type
& VM_PROT_EXECUTE
) && prot
)
8624 log_stack_execution_failure((addr64_t
)vaddr
, prot
);
8626 DTRACE_VM2(prot_fault
, int, 1, (uint64_t *), NULL
);
8627 return KERN_PROTECTION_FAILURE
;
8631 * If this page is not pageable, we have to get
8632 * it for all possible accesses.
8635 *wired
= (entry
->wired_count
!= 0);
8640 * If the entry was copy-on-write, we either ...
8643 if (entry
->needs_copy
) {
8645 * If we want to write the page, we may as well
8646 * handle that now since we've got the map locked.
8648 * If we don't need to write the page, we just
8649 * demote the permissions allowed.
8652 if ((fault_type
& VM_PROT_WRITE
) || *wired
) {
8654 * Make a new object, and place it in the
8655 * object chain. Note that no new references
8656 * have appeared -- one just moved from the
8657 * map to the new object.
8660 if (vm_map_lock_read_to_write(map
)) {
8661 vm_map_lock_read(map
);
8664 vm_object_shadow(&entry
->object
.vm_object
,
8666 (vm_map_size_t
) (entry
->vme_end
-
8669 entry
->object
.vm_object
->shadowed
= TRUE
;
8670 entry
->needs_copy
= FALSE
;
8671 vm_map_lock_write_to_read(map
);
8675 * We're attempting to read a copy-on-write
8676 * page -- don't allow writes.
8679 prot
&= (~VM_PROT_WRITE
);
8684 * Create an object if necessary.
8686 if (entry
->object
.vm_object
== VM_OBJECT_NULL
) {
8688 if (vm_map_lock_read_to_write(map
)) {
8689 vm_map_lock_read(map
);
8693 entry
->object
.vm_object
= vm_object_allocate(
8694 (vm_map_size_t
)(entry
->vme_end
- entry
->vme_start
));
8696 vm_map_lock_write_to_read(map
);
8700 * Return the object/offset from this entry. If the entry
8701 * was copy-on-write or empty, it has been fixed up. Also
8702 * return the protection.
8705 *offset
= (vaddr
- entry
->vme_start
) + entry
->offset
;
8706 *object
= entry
->object
.vm_object
;
8710 fault_info
->interruptible
= THREAD_UNINT
; /* for now... */
8711 /* ... the caller will change "interruptible" if needed */
8712 fault_info
->cluster_size
= 0;
8713 fault_info
->user_tag
= entry
->alias
;
8714 fault_info
->behavior
= entry
->behavior
;
8715 fault_info
->lo_offset
= entry
->offset
;
8716 fault_info
->hi_offset
= (entry
->vme_end
- entry
->vme_start
) + entry
->offset
;
8717 fault_info
->no_cache
= entry
->no_cache
;
8718 fault_info
->stealth
= FALSE
;
8719 fault_info
->mark_zf_absent
= FALSE
;
8723 * Lock the object to prevent it from disappearing
8725 if (object_lock_type
== OBJECT_LOCK_EXCLUSIVE
)
8726 vm_object_lock(*object
);
8728 vm_object_lock_shared(*object
);
8731 * Save the version number
8734 out_version
->main_timestamp
= map
->timestamp
;
8736 return KERN_SUCCESS
;
8743 * Verifies that the map in question has not changed
8744 * since the given version. If successful, the map
8745 * will not change until vm_map_verify_done() is called.
8749 register vm_map_t map
,
8750 register vm_map_version_t
*version
) /* REF */
8754 vm_map_lock_read(map
);
8755 result
= (map
->timestamp
== version
->main_timestamp
);
8758 vm_map_unlock_read(map
);
8764 * vm_map_verify_done:
8766 * Releases locks acquired by a vm_map_verify.
8768 * This is now a macro in vm/vm_map.h. It does a
8769 * vm_map_unlock_read on the map.
8774 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
8775 * Goes away after regular vm_region_recurse function migrates to
8777 * vm_region_recurse: A form of vm_region which follows the
8778 * submaps in a target map
8783 vm_map_region_recurse_64(
8785 vm_map_offset_t
*address
, /* IN/OUT */
8786 vm_map_size_t
*size
, /* OUT */
8787 natural_t
*nesting_depth
, /* IN/OUT */
8788 vm_region_submap_info_64_t submap_info
, /* IN/OUT */
8789 mach_msg_type_number_t
*count
) /* IN/OUT */
8791 vm_region_extended_info_data_t extended
;
8792 vm_map_entry_t tmp_entry
;
8793 vm_map_offset_t user_address
;
8794 unsigned int user_max_depth
;
8797 * "curr_entry" is the VM map entry preceding or including the
8798 * address we're looking for.
8799 * "curr_map" is the map or sub-map containing "curr_entry".
8800 * "curr_offset" is the cumulated offset of "curr_map" in the
8801 * target task's address space.
8802 * "curr_depth" is the depth of "curr_map" in the chain of
8804 * "curr_max_offset" is the maximum offset we should take into
8805 * account in the current map. It may be smaller than the current
8806 * map's "max_offset" because we might not have mapped it all in
8807 * the upper level map.
8809 vm_map_entry_t curr_entry
;
8810 vm_map_offset_t curr_offset
;
8812 unsigned int curr_depth
;
8813 vm_map_offset_t curr_max_offset
;
8816 * "next_" is the same as "curr_" but for the VM region immediately
8817 * after the address we're looking for. We need to keep track of this
8818 * too because we want to return info about that region if the
8819 * address we're looking for is not mapped.
8821 vm_map_entry_t next_entry
;
8822 vm_map_offset_t next_offset
;
8824 unsigned int next_depth
;
8825 vm_map_offset_t next_max_offset
;
8827 boolean_t look_for_pages
;
8828 vm_region_submap_short_info_64_t short_info
;
8830 if (map
== VM_MAP_NULL
) {
8831 /* no address space to work on */
8832 return KERN_INVALID_ARGUMENT
;
8835 if (*count
< VM_REGION_SUBMAP_INFO_COUNT_64
) {
8836 if (*count
< VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
) {
8838 * "info" structure is not big enough and
8841 return KERN_INVALID_ARGUMENT
;
8843 look_for_pages
= FALSE
;
8844 *count
= VM_REGION_SUBMAP_SHORT_INFO_COUNT_64
;
8845 short_info
= (vm_region_submap_short_info_64_t
) submap_info
;
8849 look_for_pages
= TRUE
;
8850 *count
= VM_REGION_SUBMAP_INFO_COUNT_64
;
8855 user_address
= *address
;
8856 user_max_depth
= *nesting_depth
;
8862 curr_max_offset
= curr_map
->max_offset
;
8868 next_max_offset
= curr_max_offset
;
8871 vm_map_lock_read(curr_map
);
8875 if (vm_map_lookup_entry(curr_map
,
8876 user_address
- curr_offset
,
8878 /* tmp_entry contains the address we're looking for */
8879 curr_entry
= tmp_entry
;
8882 * The address is not mapped. "tmp_entry" is the
8883 * map entry preceding the address. We want the next
8884 * one, if it exists.
8886 curr_entry
= tmp_entry
->vme_next
;
8887 if (curr_entry
== vm_map_to_entry(curr_map
) ||
8888 curr_entry
->vme_start
>= curr_max_offset
) {
8889 /* no next entry at this level: stop looking */
8891 vm_map_unlock_read(curr_map
);
8897 curr_max_offset
= 0;
8903 * Is the next entry at this level closer to the address (or
8904 * deeper in the submap chain) than the one we had
8907 tmp_entry
= curr_entry
->vme_next
;
8908 if (tmp_entry
== vm_map_to_entry(curr_map
)) {
8909 /* no next entry at this level */
8910 } else if (tmp_entry
->vme_start
>= curr_max_offset
) {
8912 * tmp_entry is beyond the scope of what we mapped of
8913 * this submap in the upper level: ignore it.
8915 } else if ((next_entry
== NULL
) ||
8916 (tmp_entry
->vme_start
+ curr_offset
<=
8917 next_entry
->vme_start
+ next_offset
)) {
8919 * We didn't have a "next_entry" or this one is
8920 * closer to the address we're looking for:
8921 * use this "tmp_entry" as the new "next_entry".
8923 if (next_entry
!= NULL
) {
8924 /* unlock the last "next_map" */
8925 if (next_map
!= curr_map
&& not_in_kdp
) {
8926 vm_map_unlock_read(next_map
);
8929 next_entry
= tmp_entry
;
8930 next_map
= curr_map
;
8931 next_offset
= curr_offset
;
8932 next_depth
= curr_depth
;
8933 next_max_offset
= curr_max_offset
;
8936 if (!curr_entry
->is_sub_map
||
8937 curr_depth
>= user_max_depth
) {
8939 * We hit a leaf map or we reached the maximum depth
8940 * we could, so stop looking. Keep the current map
8947 * Get down to the next submap level.
8951 * Lock the next level and unlock the current level,
8952 * unless we need to keep it locked to access the "next_entry"
8956 vm_map_lock_read(curr_entry
->object
.sub_map
);
8958 if (curr_map
== next_map
) {
8959 /* keep "next_map" locked in case we need it */
8961 /* release this map */
8963 vm_map_unlock_read(curr_map
);
8967 * Adjust the offset. "curr_entry" maps the submap
8968 * at relative address "curr_entry->vme_start" in the
8969 * curr_map but skips the first "curr_entry->offset"
8970 * bytes of the submap.
8971 * "curr_offset" always represents the offset of a virtual
8972 * address in the curr_map relative to the absolute address
8973 * space (i.e. the top-level VM map).
8976 (curr_entry
->vme_start
- curr_entry
->offset
);
8977 /* switch to the submap */
8978 curr_map
= curr_entry
->object
.sub_map
;
8981 * "curr_max_offset" allows us to keep track of the
8982 * portion of the submap that is actually mapped at this level:
8983 * the rest of that submap is irrelevant to us, since it's not
8985 * The relevant portion of the map starts at
8986 * "curr_entry->offset" up to the size of "curr_entry".
8989 curr_entry
->vme_end
- curr_entry
->vme_start
+
8994 if (curr_entry
== NULL
) {
8995 /* no VM region contains the address... */
8996 if (next_entry
== NULL
) {
8997 /* ... and no VM region follows it either */
8998 return KERN_INVALID_ADDRESS
;
9000 /* ... gather info about the next VM region */
9001 curr_entry
= next_entry
;
9002 curr_map
= next_map
; /* still locked ... */
9003 curr_offset
= next_offset
;
9004 curr_depth
= next_depth
;
9005 curr_max_offset
= next_max_offset
;
9007 /* we won't need "next_entry" after all */
9008 if (next_entry
!= NULL
) {
9009 /* release "next_map" */
9010 if (next_map
!= curr_map
&& not_in_kdp
) {
9011 vm_map_unlock_read(next_map
);
9019 next_max_offset
= 0;
9021 *nesting_depth
= curr_depth
;
9022 *size
= curr_entry
->vme_end
- curr_entry
->vme_start
;
9023 *address
= curr_entry
->vme_start
+ curr_offset
;
9025 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
9026 // so probably should be a real 32b ID vs. ptr.
9027 // Current users just check for equality
9028 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)p)
9030 if (look_for_pages
) {
9031 submap_info
->user_tag
= curr_entry
->alias
;
9032 submap_info
->offset
= curr_entry
->offset
;
9033 submap_info
->protection
= curr_entry
->protection
;
9034 submap_info
->inheritance
= curr_entry
->inheritance
;
9035 submap_info
->max_protection
= curr_entry
->max_protection
;
9036 submap_info
->behavior
= curr_entry
->behavior
;
9037 submap_info
->user_wired_count
= curr_entry
->user_wired_count
;
9038 submap_info
->is_submap
= curr_entry
->is_sub_map
;
9039 submap_info
->object_id
= INFO_MAKE_OBJECT_ID(curr_entry
->object
.vm_object
);
9041 short_info
->user_tag
= curr_entry
->alias
;
9042 short_info
->offset
= curr_entry
->offset
;
9043 short_info
->protection
= curr_entry
->protection
;
9044 short_info
->inheritance
= curr_entry
->inheritance
;
9045 short_info
->max_protection
= curr_entry
->max_protection
;
9046 short_info
->behavior
= curr_entry
->behavior
;
9047 short_info
->user_wired_count
= curr_entry
->user_wired_count
;
9048 short_info
->is_submap
= curr_entry
->is_sub_map
;
9049 short_info
->object_id
= INFO_MAKE_OBJECT_ID(curr_entry
->object
.vm_object
);
9052 extended
.pages_resident
= 0;
9053 extended
.pages_swapped_out
= 0;
9054 extended
.pages_shared_now_private
= 0;
9055 extended
.pages_dirtied
= 0;
9056 extended
.external_pager
= 0;
9057 extended
.shadow_depth
= 0;
9060 if (!curr_entry
->is_sub_map
) {
9061 vm_map_region_walk(curr_map
,
9062 curr_entry
->vme_start
,
9065 (curr_entry
->vme_end
-
9066 curr_entry
->vme_start
),
9069 if (extended
.external_pager
&&
9070 extended
.ref_count
== 2 &&
9071 extended
.share_mode
== SM_SHARED
) {
9072 extended
.share_mode
= SM_PRIVATE
;
9075 if (curr_entry
->use_pmap
) {
9076 extended
.share_mode
= SM_TRUESHARED
;
9078 extended
.share_mode
= SM_PRIVATE
;
9080 extended
.ref_count
=
9081 curr_entry
->object
.sub_map
->ref_count
;
9085 if (look_for_pages
) {
9086 submap_info
->pages_resident
= extended
.pages_resident
;
9087 submap_info
->pages_swapped_out
= extended
.pages_swapped_out
;
9088 submap_info
->pages_shared_now_private
=
9089 extended
.pages_shared_now_private
;
9090 submap_info
->pages_dirtied
= extended
.pages_dirtied
;
9091 submap_info
->external_pager
= extended
.external_pager
;
9092 submap_info
->shadow_depth
= extended
.shadow_depth
;
9093 submap_info
->share_mode
= extended
.share_mode
;
9094 submap_info
->ref_count
= extended
.ref_count
;
9096 short_info
->external_pager
= extended
.external_pager
;
9097 short_info
->shadow_depth
= extended
.shadow_depth
;
9098 short_info
->share_mode
= extended
.share_mode
;
9099 short_info
->ref_count
= extended
.ref_count
;
9103 vm_map_unlock_read(curr_map
);
9106 return KERN_SUCCESS
;
9112 * User call to obtain information about a region in
9113 * a task's address map. Currently, only one flavor is
9116 * XXX The reserved and behavior fields cannot be filled
9117 * in until the vm merge from the IK is completed, and
9118 * vm_reserve is implemented.
9124 vm_map_offset_t
*address
, /* IN/OUT */
9125 vm_map_size_t
*size
, /* OUT */
9126 vm_region_flavor_t flavor
, /* IN */
9127 vm_region_info_t info
, /* OUT */
9128 mach_msg_type_number_t
*count
, /* IN/OUT */
9129 mach_port_t
*object_name
) /* OUT */
9131 vm_map_entry_t tmp_entry
;
9132 vm_map_entry_t entry
;
9133 vm_map_offset_t start
;
9135 if (map
== VM_MAP_NULL
)
9136 return(KERN_INVALID_ARGUMENT
);
9140 case VM_REGION_BASIC_INFO
:
9141 /* legacy for old 32-bit objects info */
9143 vm_region_basic_info_t basic
;
9145 if (*count
< VM_REGION_BASIC_INFO_COUNT
)
9146 return(KERN_INVALID_ARGUMENT
);
9148 basic
= (vm_region_basic_info_t
) info
;
9149 *count
= VM_REGION_BASIC_INFO_COUNT
;
9151 vm_map_lock_read(map
);
9154 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
9155 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
9156 vm_map_unlock_read(map
);
9157 return(KERN_INVALID_ADDRESS
);
9163 start
= entry
->vme_start
;
9165 basic
->offset
= (uint32_t)entry
->offset
;
9166 basic
->protection
= entry
->protection
;
9167 basic
->inheritance
= entry
->inheritance
;
9168 basic
->max_protection
= entry
->max_protection
;
9169 basic
->behavior
= entry
->behavior
;
9170 basic
->user_wired_count
= entry
->user_wired_count
;
9171 basic
->reserved
= entry
->is_sub_map
;
9173 *size
= (entry
->vme_end
- start
);
9175 if (object_name
) *object_name
= IP_NULL
;
9176 if (entry
->is_sub_map
) {
9177 basic
->shared
= FALSE
;
9179 basic
->shared
= entry
->is_shared
;
9182 vm_map_unlock_read(map
);
9183 return(KERN_SUCCESS
);
9186 case VM_REGION_BASIC_INFO_64
:
9188 vm_region_basic_info_64_t basic
;
9190 if (*count
< VM_REGION_BASIC_INFO_COUNT_64
)
9191 return(KERN_INVALID_ARGUMENT
);
9193 basic
= (vm_region_basic_info_64_t
) info
;
9194 *count
= VM_REGION_BASIC_INFO_COUNT_64
;
9196 vm_map_lock_read(map
);
9199 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
9200 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
9201 vm_map_unlock_read(map
);
9202 return(KERN_INVALID_ADDRESS
);
9208 start
= entry
->vme_start
;
9210 basic
->offset
= entry
->offset
;
9211 basic
->protection
= entry
->protection
;
9212 basic
->inheritance
= entry
->inheritance
;
9213 basic
->max_protection
= entry
->max_protection
;
9214 basic
->behavior
= entry
->behavior
;
9215 basic
->user_wired_count
= entry
->user_wired_count
;
9216 basic
->reserved
= entry
->is_sub_map
;
9218 *size
= (entry
->vme_end
- start
);
9220 if (object_name
) *object_name
= IP_NULL
;
9221 if (entry
->is_sub_map
) {
9222 basic
->shared
= FALSE
;
9224 basic
->shared
= entry
->is_shared
;
9227 vm_map_unlock_read(map
);
9228 return(KERN_SUCCESS
);
9230 case VM_REGION_EXTENDED_INFO
:
9232 vm_region_extended_info_t extended
;
9234 if (*count
< VM_REGION_EXTENDED_INFO_COUNT
)
9235 return(KERN_INVALID_ARGUMENT
);
9237 extended
= (vm_region_extended_info_t
) info
;
9238 *count
= VM_REGION_EXTENDED_INFO_COUNT
;
9240 vm_map_lock_read(map
);
9243 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
9244 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
9245 vm_map_unlock_read(map
);
9246 return(KERN_INVALID_ADDRESS
);
9251 start
= entry
->vme_start
;
9253 extended
->protection
= entry
->protection
;
9254 extended
->user_tag
= entry
->alias
;
9255 extended
->pages_resident
= 0;
9256 extended
->pages_swapped_out
= 0;
9257 extended
->pages_shared_now_private
= 0;
9258 extended
->pages_dirtied
= 0;
9259 extended
->external_pager
= 0;
9260 extended
->shadow_depth
= 0;
9262 vm_map_region_walk(map
, start
, entry
, entry
->offset
, entry
->vme_end
- start
, extended
, TRUE
);
9264 if (extended
->external_pager
&& extended
->ref_count
== 2 && extended
->share_mode
== SM_SHARED
)
9265 extended
->share_mode
= SM_PRIVATE
;
9268 *object_name
= IP_NULL
;
9270 *size
= (entry
->vme_end
- start
);
9272 vm_map_unlock_read(map
);
9273 return(KERN_SUCCESS
);
9275 case VM_REGION_TOP_INFO
:
9277 vm_region_top_info_t top
;
9279 if (*count
< VM_REGION_TOP_INFO_COUNT
)
9280 return(KERN_INVALID_ARGUMENT
);
9282 top
= (vm_region_top_info_t
) info
;
9283 *count
= VM_REGION_TOP_INFO_COUNT
;
9285 vm_map_lock_read(map
);
9288 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
9289 if ((entry
= tmp_entry
->vme_next
) == vm_map_to_entry(map
)) {
9290 vm_map_unlock_read(map
);
9291 return(KERN_INVALID_ADDRESS
);
9297 start
= entry
->vme_start
;
9299 top
->private_pages_resident
= 0;
9300 top
->shared_pages_resident
= 0;
9302 vm_map_region_top_walk(entry
, top
);
9305 *object_name
= IP_NULL
;
9307 *size
= (entry
->vme_end
- start
);
9309 vm_map_unlock_read(map
);
9310 return(KERN_SUCCESS
);
9313 return(KERN_INVALID_ARGUMENT
);
9317 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
9319 ((obj)->all_reusable ? \
9320 (obj)->wired_page_count : \
9321 (obj)->resident_page_count - (obj)->reusable_page_count))
9324 vm_map_region_top_walk(
9325 vm_map_entry_t entry
,
9326 vm_region_top_info_t top
)
9329 if (entry
->object
.vm_object
== 0 || entry
->is_sub_map
) {
9330 top
->share_mode
= SM_EMPTY
;
9337 struct vm_object
*obj
, *tmp_obj
;
9339 uint32_t entry_size
;
9341 entry_size
= (uint32_t) ((entry
->vme_end
- entry
->vme_start
) / PAGE_SIZE_64
);
9343 obj
= entry
->object
.vm_object
;
9345 vm_object_lock(obj
);
9347 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
)
9350 assert(obj
->reusable_page_count
<= obj
->resident_page_count
);
9353 top
->private_pages_resident
=
9354 OBJ_RESIDENT_COUNT(obj
, entry_size
);
9356 top
->shared_pages_resident
=
9357 OBJ_RESIDENT_COUNT(obj
, entry_size
);
9358 top
->ref_count
= ref_count
;
9359 top
->share_mode
= SM_COW
;
9361 while ((tmp_obj
= obj
->shadow
)) {
9362 vm_object_lock(tmp_obj
);
9363 vm_object_unlock(obj
);
9366 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
)
9369 assert(obj
->reusable_page_count
<= obj
->resident_page_count
);
9370 top
->shared_pages_resident
+=
9371 OBJ_RESIDENT_COUNT(obj
, entry_size
);
9372 top
->ref_count
+= ref_count
- 1;
9375 if (entry
->needs_copy
) {
9376 top
->share_mode
= SM_COW
;
9377 top
->shared_pages_resident
=
9378 OBJ_RESIDENT_COUNT(obj
, entry_size
);
9380 if (ref_count
== 1 ||
9381 (ref_count
== 2 && !(obj
->pager_trusted
) && !(obj
->internal
))) {
9382 top
->share_mode
= SM_PRIVATE
;
9383 top
->private_pages_resident
=
9384 OBJ_RESIDENT_COUNT(obj
,
9387 top
->share_mode
= SM_SHARED
;
9388 top
->shared_pages_resident
=
9389 OBJ_RESIDENT_COUNT(obj
,
9393 top
->ref_count
= ref_count
;
9395 /* XXX K64: obj_id will be truncated */
9396 top
->obj_id
= (unsigned int) (uintptr_t)obj
;
9398 vm_object_unlock(obj
);
9406 vm_map_entry_t entry
,
9407 vm_object_offset_t offset
,
9408 vm_object_size_t range
,
9409 vm_region_extended_info_t extended
,
9410 boolean_t look_for_pages
)
9412 register struct vm_object
*obj
, *tmp_obj
;
9413 register vm_map_offset_t last_offset
;
9415 register int ref_count
;
9416 struct vm_object
*shadow_object
;
9419 if ((entry
->object
.vm_object
== 0) ||
9420 (entry
->is_sub_map
) ||
9421 (entry
->object
.vm_object
->phys_contiguous
)) {
9422 extended
->share_mode
= SM_EMPTY
;
9423 extended
->ref_count
= 0;
9427 obj
= entry
->object
.vm_object
;
9429 vm_object_lock(obj
);
9431 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
)
9434 if (look_for_pages
) {
9435 for (last_offset
= offset
+ range
;
9436 offset
< last_offset
;
9437 offset
+= PAGE_SIZE_64
, va
+= PAGE_SIZE
)
9438 vm_map_region_look_for_page(map
, va
, obj
,
9442 shadow_object
= obj
->shadow
;
9445 if ( !(obj
->pager_trusted
) && !(obj
->internal
))
9446 extended
->external_pager
= 1;
9448 if (shadow_object
!= VM_OBJECT_NULL
) {
9449 vm_object_lock(shadow_object
);
9451 shadow_object
!= VM_OBJECT_NULL
;
9453 vm_object_t next_shadow
;
9455 if ( !(shadow_object
->pager_trusted
) &&
9456 !(shadow_object
->internal
))
9457 extended
->external_pager
= 1;
9459 next_shadow
= shadow_object
->shadow
;
9461 vm_object_lock(next_shadow
);
9463 vm_object_unlock(shadow_object
);
9464 shadow_object
= next_shadow
;
9467 extended
->shadow_depth
= shadow_depth
;
9470 if (extended
->shadow_depth
|| entry
->needs_copy
)
9471 extended
->share_mode
= SM_COW
;
9474 extended
->share_mode
= SM_PRIVATE
;
9476 if (obj
->true_share
)
9477 extended
->share_mode
= SM_TRUESHARED
;
9479 extended
->share_mode
= SM_SHARED
;
9482 extended
->ref_count
= ref_count
- extended
->shadow_depth
;
9484 for (i
= 0; i
< extended
->shadow_depth
; i
++) {
9485 if ((tmp_obj
= obj
->shadow
) == 0)
9487 vm_object_lock(tmp_obj
);
9488 vm_object_unlock(obj
);
9490 if ((ref_count
= tmp_obj
->ref_count
) > 1 && tmp_obj
->paging_in_progress
)
9493 extended
->ref_count
+= ref_count
;
9496 vm_object_unlock(obj
);
9498 if (extended
->share_mode
== SM_SHARED
) {
9499 register vm_map_entry_t cur
;
9500 register vm_map_entry_t last
;
9503 obj
= entry
->object
.vm_object
;
9504 last
= vm_map_to_entry(map
);
9507 if ((ref_count
= obj
->ref_count
) > 1 && obj
->paging_in_progress
)
9509 for (cur
= vm_map_first_entry(map
); cur
!= last
; cur
= cur
->vme_next
)
9510 my_refs
+= vm_map_region_count_obj_refs(cur
, obj
);
9512 if (my_refs
== ref_count
)
9513 extended
->share_mode
= SM_PRIVATE_ALIASED
;
9514 else if (my_refs
> 1)
9515 extended
->share_mode
= SM_SHARED_ALIASED
;
9521 /* object is locked on entry and locked on return */
9525 vm_map_region_look_for_page(
9526 __unused vm_map_t map
,
9527 __unused vm_map_offset_t va
,
9529 vm_object_offset_t offset
,
9532 vm_region_extended_info_t extended
)
9534 register vm_page_t p
;
9535 register vm_object_t shadow
;
9536 register int ref_count
;
9537 vm_object_t caller_object
;
9541 shadow
= object
->shadow
;
9542 caller_object
= object
;
9547 if ( !(object
->pager_trusted
) && !(object
->internal
))
9548 extended
->external_pager
= 1;
9550 if ((p
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
9551 if (shadow
&& (max_refcnt
== 1))
9552 extended
->pages_shared_now_private
++;
9554 if (!p
->fictitious
&&
9555 (p
->dirty
|| pmap_is_modified(p
->phys_page
)))
9556 extended
->pages_dirtied
++;
9558 extended
->pages_resident
++;
9560 if(object
!= caller_object
)
9561 vm_object_unlock(object
);
9566 if (object
->existence_map
) {
9567 if (vm_external_state_get(object
->existence_map
, offset
) == VM_EXTERNAL_STATE_EXISTS
) {
9569 extended
->pages_swapped_out
++;
9571 if(object
!= caller_object
)
9572 vm_object_unlock(object
);
9576 } else if (object
->internal
&&
9578 !object
->terminating
&&
9579 object
->pager_ready
) {
9581 memory_object_t pager
;
9583 vm_object_paging_begin(object
);
9584 pager
= object
->pager
;
9585 vm_object_unlock(object
);
9587 kr
= memory_object_data_request(
9589 offset
+ object
->paging_offset
,
9590 0, /* just poke the pager */
9594 vm_object_lock(object
);
9595 vm_object_paging_end(object
);
9597 if (kr
== KERN_SUCCESS
) {
9598 /* the pager has that page */
9599 extended
->pages_swapped_out
++;
9600 if (object
!= caller_object
)
9601 vm_object_unlock(object
);
9605 #endif /* MACH_PAGEMAP */
9608 vm_object_lock(shadow
);
9610 if ((ref_count
= shadow
->ref_count
) > 1 && shadow
->paging_in_progress
)
9613 if (++depth
> extended
->shadow_depth
)
9614 extended
->shadow_depth
= depth
;
9616 if (ref_count
> max_refcnt
)
9617 max_refcnt
= ref_count
;
9619 if(object
!= caller_object
)
9620 vm_object_unlock(object
);
9622 offset
= offset
+ object
->shadow_offset
;
9624 shadow
= object
->shadow
;
9627 if(object
!= caller_object
)
9628 vm_object_unlock(object
);
9634 vm_map_region_count_obj_refs(
9635 vm_map_entry_t entry
,
9638 register int ref_count
;
9639 register vm_object_t chk_obj
;
9640 register vm_object_t tmp_obj
;
9642 if (entry
->object
.vm_object
== 0)
9645 if (entry
->is_sub_map
)
9650 chk_obj
= entry
->object
.vm_object
;
9651 vm_object_lock(chk_obj
);
9654 if (chk_obj
== object
)
9656 tmp_obj
= chk_obj
->shadow
;
9658 vm_object_lock(tmp_obj
);
9659 vm_object_unlock(chk_obj
);
9669 * Routine: vm_map_simplify
9672 * Attempt to simplify the map representation in
9673 * the vicinity of the given starting address.
9675 * This routine is intended primarily to keep the
9676 * kernel maps more compact -- they generally don't
9677 * benefit from the "expand a map entry" technology
9678 * at allocation time because the adjacent entry
9679 * is often wired down.
9682 vm_map_simplify_entry(
9684 vm_map_entry_t this_entry
)
9686 vm_map_entry_t prev_entry
;
9688 counter(c_vm_map_simplify_entry_called
++);
9690 prev_entry
= this_entry
->vme_prev
;
9692 if ((this_entry
!= vm_map_to_entry(map
)) &&
9693 (prev_entry
!= vm_map_to_entry(map
)) &&
9695 (prev_entry
->vme_end
== this_entry
->vme_start
) &&
9697 (prev_entry
->is_sub_map
== this_entry
->is_sub_map
) &&
9699 (prev_entry
->object
.vm_object
== this_entry
->object
.vm_object
) &&
9700 ((prev_entry
->offset
+ (prev_entry
->vme_end
-
9701 prev_entry
->vme_start
))
9702 == this_entry
->offset
) &&
9704 (prev_entry
->inheritance
== this_entry
->inheritance
) &&
9705 (prev_entry
->protection
== this_entry
->protection
) &&
9706 (prev_entry
->max_protection
== this_entry
->max_protection
) &&
9707 (prev_entry
->behavior
== this_entry
->behavior
) &&
9708 (prev_entry
->alias
== this_entry
->alias
) &&
9709 (prev_entry
->zero_wired_pages
== this_entry
->zero_wired_pages
) &&
9710 (prev_entry
->no_cache
== this_entry
->no_cache
) &&
9711 (prev_entry
->wired_count
== this_entry
->wired_count
) &&
9712 (prev_entry
->user_wired_count
== this_entry
->user_wired_count
) &&
9714 (prev_entry
->needs_copy
== this_entry
->needs_copy
) &&
9715 (prev_entry
->permanent
== this_entry
->permanent
) &&
9717 (prev_entry
->use_pmap
== FALSE
) &&
9718 (this_entry
->use_pmap
== FALSE
) &&
9719 (prev_entry
->in_transition
== FALSE
) &&
9720 (this_entry
->in_transition
== FALSE
) &&
9721 (prev_entry
->needs_wakeup
== FALSE
) &&
9722 (this_entry
->needs_wakeup
== FALSE
) &&
9723 (prev_entry
->is_shared
== FALSE
) &&
9724 (this_entry
->is_shared
== FALSE
)
9726 _vm_map_entry_unlink(&map
->hdr
, prev_entry
);
9727 this_entry
->vme_start
= prev_entry
->vme_start
;
9728 this_entry
->offset
= prev_entry
->offset
;
9729 if (prev_entry
->is_sub_map
) {
9730 vm_map_deallocate(prev_entry
->object
.sub_map
);
9732 vm_object_deallocate(prev_entry
->object
.vm_object
);
9734 vm_map_entry_dispose(map
, prev_entry
);
9735 SAVE_HINT_MAP_WRITE(map
, this_entry
);
9736 counter(c_vm_map_simplified
++);
9743 vm_map_offset_t start
)
9745 vm_map_entry_t this_entry
;
9748 if (vm_map_lookup_entry(map
, start
, &this_entry
)) {
9749 vm_map_simplify_entry(map
, this_entry
);
9750 vm_map_simplify_entry(map
, this_entry
->vme_next
);
9752 counter(c_vm_map_simplify_called
++);
9757 vm_map_simplify_range(
9759 vm_map_offset_t start
,
9760 vm_map_offset_t end
)
9762 vm_map_entry_t entry
;
9765 * The map should be locked (for "write") by the caller.
9769 /* invalid address range */
9773 start
= vm_map_trunc_page(start
);
9774 end
= vm_map_round_page(end
);
9776 if (!vm_map_lookup_entry(map
, start
, &entry
)) {
9777 /* "start" is not mapped and "entry" ends before "start" */
9778 if (entry
== vm_map_to_entry(map
)) {
9779 /* start with first entry in the map */
9780 entry
= vm_map_first_entry(map
);
9782 /* start with next entry */
9783 entry
= entry
->vme_next
;
9787 while (entry
!= vm_map_to_entry(map
) &&
9788 entry
->vme_start
<= end
) {
9789 /* try and coalesce "entry" with its previous entry */
9790 vm_map_simplify_entry(map
, entry
);
9791 entry
= entry
->vme_next
;
9797 * Routine: vm_map_machine_attribute
9799 * Provide machine-specific attributes to mappings,
9800 * such as cachability etc. for machines that provide
9801 * them. NUMA architectures and machines with big/strange
9802 * caches will use this.
9804 * Responsibilities for locking and checking are handled here,
9805 * everything else in the pmap module. If any non-volatile
9806 * information must be kept, the pmap module should handle
9807 * it itself. [This assumes that attributes do not
9808 * need to be inherited, which seems ok to me]
9811 vm_map_machine_attribute(
9813 vm_map_offset_t start
,
9814 vm_map_offset_t end
,
9815 vm_machine_attribute_t attribute
,
9816 vm_machine_attribute_val_t
* value
) /* IN/OUT */
9819 vm_map_size_t sync_size
;
9820 vm_map_entry_t entry
;
9822 if (start
< vm_map_min(map
) || end
> vm_map_max(map
))
9823 return KERN_INVALID_ADDRESS
;
9825 /* Figure how much memory we need to flush (in page increments) */
9826 sync_size
= end
- start
;
9830 if (attribute
!= MATTR_CACHE
) {
9831 /* If we don't have to find physical addresses, we */
9832 /* don't have to do an explicit traversal here. */
9833 ret
= pmap_attribute(map
->pmap
, start
, end
-start
,
9839 ret
= KERN_SUCCESS
; /* Assume it all worked */
9842 if (vm_map_lookup_entry(map
, start
, &entry
)) {
9843 vm_map_size_t sub_size
;
9844 if((entry
->vme_end
- start
) > sync_size
) {
9845 sub_size
= sync_size
;
9848 sub_size
= entry
->vme_end
- start
;
9849 sync_size
-= sub_size
;
9851 if(entry
->is_sub_map
) {
9852 vm_map_offset_t sub_start
;
9853 vm_map_offset_t sub_end
;
9855 sub_start
= (start
- entry
->vme_start
)
9857 sub_end
= sub_start
+ sub_size
;
9858 vm_map_machine_attribute(
9859 entry
->object
.sub_map
,
9864 if(entry
->object
.vm_object
) {
9867 vm_object_t base_object
;
9868 vm_object_t last_object
;
9869 vm_object_offset_t offset
;
9870 vm_object_offset_t base_offset
;
9871 vm_map_size_t range
;
9873 offset
= (start
- entry
->vme_start
)
9875 base_offset
= offset
;
9876 object
= entry
->object
.vm_object
;
9877 base_object
= object
;
9880 vm_object_lock(object
);
9886 if (m
&& !m
->fictitious
) {
9888 pmap_attribute_cache_sync(
9893 } else if (object
->shadow
) {
9894 offset
= offset
+ object
->shadow_offset
;
9895 last_object
= object
;
9896 object
= object
->shadow
;
9897 vm_object_lock(last_object
->shadow
);
9898 vm_object_unlock(last_object
);
9903 if (base_object
!= object
) {
9904 vm_object_unlock(object
);
9905 vm_object_lock(base_object
);
9906 object
= base_object
;
9908 /* Bump to the next page */
9909 base_offset
+= PAGE_SIZE
;
9910 offset
= base_offset
;
9912 vm_object_unlock(object
);
9918 return KERN_FAILURE
;
9929 * vm_map_behavior_set:
9931 * Sets the paging reference behavior of the specified address
9932 * range in the target map. Paging reference behavior affects
9933 * how pagein operations resulting from faults on the map will be
9937 vm_map_behavior_set(
9939 vm_map_offset_t start
,
9940 vm_map_offset_t end
,
9941 vm_behavior_t new_behavior
)
9943 register vm_map_entry_t entry
;
9944 vm_map_entry_t temp_entry
;
9947 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
9948 map
, start
, end
, new_behavior
, 0);
9950 switch (new_behavior
) {
9953 * This first block of behaviors all set a persistent state on the specified
9954 * memory range. All we have to do here is to record the desired behavior
9955 * in the vm_map_entry_t's.
9958 case VM_BEHAVIOR_DEFAULT
:
9959 case VM_BEHAVIOR_RANDOM
:
9960 case VM_BEHAVIOR_SEQUENTIAL
:
9961 case VM_BEHAVIOR_RSEQNTL
:
9962 case VM_BEHAVIOR_ZERO_WIRED_PAGES
:
9966 * The entire address range must be valid for the map.
9967 * Note that vm_map_range_check() does a
9968 * vm_map_lookup_entry() internally and returns the
9969 * entry containing the start of the address range if
9970 * the entire range is valid.
9972 if (vm_map_range_check(map
, start
, end
, &temp_entry
)) {
9974 vm_map_clip_start(map
, entry
, start
);
9978 return(KERN_INVALID_ADDRESS
);
9981 while ((entry
!= vm_map_to_entry(map
)) && (entry
->vme_start
< end
)) {
9982 vm_map_clip_end(map
, entry
, end
);
9983 assert(!entry
->use_pmap
);
9985 if( new_behavior
== VM_BEHAVIOR_ZERO_WIRED_PAGES
) {
9986 entry
->zero_wired_pages
= TRUE
;
9988 entry
->behavior
= new_behavior
;
9990 entry
= entry
->vme_next
;
9997 * The rest of these are different from the above in that they cause
9998 * an immediate action to take place as opposed to setting a behavior that
9999 * affects future actions.
10002 case VM_BEHAVIOR_WILLNEED
:
10003 return vm_map_willneed(map
, start
, end
);
10005 case VM_BEHAVIOR_DONTNEED
:
10006 return vm_map_msync(map
, start
, end
- start
, VM_SYNC_DEACTIVATE
| VM_SYNC_CONTIGUOUS
);
10008 case VM_BEHAVIOR_FREE
:
10009 return vm_map_msync(map
, start
, end
- start
, VM_SYNC_KILLPAGES
| VM_SYNC_CONTIGUOUS
);
10011 case VM_BEHAVIOR_REUSABLE
:
10012 return vm_map_reusable_pages(map
, start
, end
);
10014 case VM_BEHAVIOR_REUSE
:
10015 return vm_map_reuse_pages(map
, start
, end
);
10017 case VM_BEHAVIOR_CAN_REUSE
:
10018 return vm_map_can_reuse(map
, start
, end
);
10021 return(KERN_INVALID_ARGUMENT
);
10024 return(KERN_SUCCESS
);
10029 * Internals for madvise(MADV_WILLNEED) system call.
10031 * The present implementation is to do a read-ahead if the mapping corresponds
10032 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
10033 * and basically ignore the "advice" (which we are always free to do).
10037 static kern_return_t
10040 vm_map_offset_t start
,
10041 vm_map_offset_t end
10044 vm_map_entry_t entry
;
10045 vm_object_t object
;
10046 memory_object_t pager
;
10047 struct vm_object_fault_info fault_info
;
10049 vm_object_size_t len
;
10050 vm_object_offset_t offset
;
10053 * Fill in static values in fault_info. Several fields get ignored by the code
10054 * we call, but we'll fill them in anyway since uninitialized fields are bad
10055 * when it comes to future backwards compatibility.
10058 fault_info
.interruptible
= THREAD_UNINT
; /* ignored value */
10059 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
10060 fault_info
.no_cache
= FALSE
; /* ignored value */
10061 fault_info
.stealth
= TRUE
;
10062 fault_info
.mark_zf_absent
= FALSE
;
10065 * The MADV_WILLNEED operation doesn't require any changes to the
10066 * vm_map_entry_t's, so the read lock is sufficient.
10069 vm_map_lock_read(map
);
10072 * The madvise semantics require that the address range be fully
10073 * allocated with no holes. Otherwise, we're required to return
10077 if (vm_map_range_check(map
, start
, end
, &entry
)) {
10080 * Examine each vm_map_entry_t in the range.
10083 for (; entry
->vme_start
< end
; start
+= len
, entry
= entry
->vme_next
) {
10086 * The first time through, the start address could be anywhere within the
10087 * vm_map_entry we found. So adjust the offset to correspond. After that,
10088 * the offset will always be zero to correspond to the beginning of the current
10092 offset
= (start
- entry
->vme_start
) + entry
->offset
;
10095 * Set the length so we don't go beyond the end of the map_entry or beyond the
10096 * end of the range we were given. This range could span also multiple map
10097 * entries all of which map different files, so make sure we only do the right
10098 * amount of I/O for each object. Note that it's possible for there to be
10099 * multiple map entries all referring to the same object but with different
10100 * page permissions, but it's not worth trying to optimize that case.
10103 len
= MIN(entry
->vme_end
- start
, end
- start
);
10105 if ((vm_size_t
) len
!= len
) {
10106 /* 32-bit overflow */
10107 len
= (vm_size_t
) (0 - PAGE_SIZE
);
10109 fault_info
.cluster_size
= (vm_size_t
) len
;
10110 fault_info
.lo_offset
= offset
;
10111 fault_info
.hi_offset
= offset
+ len
;
10112 fault_info
.user_tag
= entry
->alias
;
10115 * If there's no read permission to this mapping, then just skip it.
10118 if ((entry
->protection
& VM_PROT_READ
) == 0) {
10123 * Find the file object backing this map entry. If there is none,
10124 * then we simply ignore the "will need" advice for this entry and
10125 * go on to the next one.
10128 if ((object
= find_vnode_object(entry
)) == VM_OBJECT_NULL
) {
10132 vm_object_paging_begin(object
);
10133 pager
= object
->pager
;
10134 vm_object_unlock(object
);
10137 * Get the data from the object asynchronously.
10139 * Note that memory_object_data_request() places limits on the amount
10140 * of I/O it will do. Regardless of the len we specified, it won't do
10141 * more than MAX_UPL_TRANSFER and it silently truncates the len to that
10142 * size. This isn't necessarily bad since madvise shouldn't really be
10143 * used to page in unlimited amounts of data. Other Unix variants limit
10144 * the willneed case as well. If this turns out to be an issue for
10145 * developers, then we can always adjust the policy here and still be
10146 * backwards compatible since this is all just "advice".
10149 kr
= memory_object_data_request(
10151 offset
+ object
->paging_offset
,
10154 (memory_object_fault_info_t
)&fault_info
);
10156 vm_object_lock(object
);
10157 vm_object_paging_end(object
);
10158 vm_object_unlock(object
);
10161 * If we couldn't do the I/O for some reason, just give up on the
10162 * madvise. We still return success to the user since madvise isn't
10163 * supposed to fail when the advice can't be taken.
10166 if (kr
!= KERN_SUCCESS
) {
10173 kr
= KERN_INVALID_ADDRESS
;
10175 vm_map_unlock_read(map
);
10180 vm_map_entry_is_reusable(
10181 vm_map_entry_t entry
)
10183 vm_object_t object
;
10185 if (entry
->is_shared
||
10186 entry
->is_sub_map
||
10187 entry
->in_transition
||
10188 entry
->protection
!= VM_PROT_DEFAULT
||
10189 entry
->max_protection
!= VM_PROT_ALL
||
10190 entry
->inheritance
!= VM_INHERIT_DEFAULT
||
10192 entry
->permanent
||
10193 entry
->superpage_size
!= 0 ||
10194 entry
->zero_wired_pages
||
10195 entry
->wired_count
!= 0 ||
10196 entry
->user_wired_count
!= 0) {
10200 object
= entry
->object
.vm_object
;
10201 if (object
== VM_OBJECT_NULL
) {
10204 if (object
->ref_count
== 1 &&
10205 object
->wired_page_count
== 0 &&
10206 object
->copy
== VM_OBJECT_NULL
&&
10207 object
->shadow
== VM_OBJECT_NULL
&&
10208 object
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
&&
10209 object
->internal
&&
10210 !object
->true_share
&&
10211 object
->wimg_bits
== VM_WIMG_DEFAULT
&&
10212 !object
->code_signed
) {
10220 static kern_return_t
10221 vm_map_reuse_pages(
10223 vm_map_offset_t start
,
10224 vm_map_offset_t end
)
10226 vm_map_entry_t entry
;
10227 vm_object_t object
;
10228 vm_object_offset_t start_offset
, end_offset
;
10231 * The MADV_REUSE operation doesn't require any changes to the
10232 * vm_map_entry_t's, so the read lock is sufficient.
10235 vm_map_lock_read(map
);
10238 * The madvise semantics require that the address range be fully
10239 * allocated with no holes. Otherwise, we're required to return
10243 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
10244 vm_map_unlock_read(map
);
10245 vm_page_stats_reusable
.reuse_pages_failure
++;
10246 return KERN_INVALID_ADDRESS
;
10250 * Examine each vm_map_entry_t in the range.
10252 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
10253 entry
= entry
->vme_next
) {
10255 * Sanity check on the VM map entry.
10257 if (! vm_map_entry_is_reusable(entry
)) {
10258 vm_map_unlock_read(map
);
10259 vm_page_stats_reusable
.reuse_pages_failure
++;
10260 return KERN_INVALID_ADDRESS
;
10264 * The first time through, the start address could be anywhere
10265 * within the vm_map_entry we found. So adjust the offset to
10268 if (entry
->vme_start
< start
) {
10269 start_offset
= start
- entry
->vme_start
;
10273 end_offset
= MIN(end
, entry
->vme_end
) - entry
->vme_start
;
10274 start_offset
+= entry
->offset
;
10275 end_offset
+= entry
->offset
;
10277 object
= entry
->object
.vm_object
;
10278 if (object
!= VM_OBJECT_NULL
) {
10279 vm_object_lock(object
);
10280 vm_object_reuse_pages(object
, start_offset
, end_offset
,
10282 vm_object_unlock(object
);
10285 if (entry
->alias
== VM_MEMORY_MALLOC_LARGE_REUSABLE
) {
10288 * We do not hold the VM map exclusively here.
10289 * The "alias" field is not that critical, so it's
10290 * safe to update it here, as long as it is the only
10291 * one that can be modified while holding the VM map
10294 entry
->alias
= VM_MEMORY_MALLOC_LARGE_REUSED
;
10298 vm_map_unlock_read(map
);
10299 vm_page_stats_reusable
.reuse_pages_success
++;
10300 return KERN_SUCCESS
;
10304 static kern_return_t
10305 vm_map_reusable_pages(
10307 vm_map_offset_t start
,
10308 vm_map_offset_t end
)
10310 vm_map_entry_t entry
;
10311 vm_object_t object
;
10312 vm_object_offset_t start_offset
, end_offset
;
10315 * The MADV_REUSABLE operation doesn't require any changes to the
10316 * vm_map_entry_t's, so the read lock is sufficient.
10319 vm_map_lock_read(map
);
10322 * The madvise semantics require that the address range be fully
10323 * allocated with no holes. Otherwise, we're required to return
10327 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
10328 vm_map_unlock_read(map
);
10329 vm_page_stats_reusable
.reusable_pages_failure
++;
10330 return KERN_INVALID_ADDRESS
;
10334 * Examine each vm_map_entry_t in the range.
10336 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
10337 entry
= entry
->vme_next
) {
10338 int kill_pages
= 0;
10341 * Sanity check on the VM map entry.
10343 if (! vm_map_entry_is_reusable(entry
)) {
10344 vm_map_unlock_read(map
);
10345 vm_page_stats_reusable
.reusable_pages_failure
++;
10346 return KERN_INVALID_ADDRESS
;
10350 * The first time through, the start address could be anywhere
10351 * within the vm_map_entry we found. So adjust the offset to
10354 if (entry
->vme_start
< start
) {
10355 start_offset
= start
- entry
->vme_start
;
10359 end_offset
= MIN(end
, entry
->vme_end
) - entry
->vme_start
;
10360 start_offset
+= entry
->offset
;
10361 end_offset
+= entry
->offset
;
10363 object
= entry
->object
.vm_object
;
10364 if (object
== VM_OBJECT_NULL
)
10368 vm_object_lock(object
);
10369 if (object
->ref_count
== 1 && !object
->shadow
)
10373 if (kill_pages
!= -1) {
10374 vm_object_deactivate_pages(object
,
10376 end_offset
- start_offset
,
10378 TRUE
/*reusable_pages*/);
10380 vm_page_stats_reusable
.reusable_pages_shared
++;
10382 vm_object_unlock(object
);
10384 if (entry
->alias
== VM_MEMORY_MALLOC_LARGE
||
10385 entry
->alias
== VM_MEMORY_MALLOC_LARGE_REUSED
) {
10388 * We do not hold the VM map exclusively here.
10389 * The "alias" field is not that critical, so it's
10390 * safe to update it here, as long as it is the only
10391 * one that can be modified while holding the VM map
10394 entry
->alias
= VM_MEMORY_MALLOC_LARGE_REUSABLE
;
10398 vm_map_unlock_read(map
);
10399 vm_page_stats_reusable
.reusable_pages_success
++;
10400 return KERN_SUCCESS
;
10404 static kern_return_t
10407 vm_map_offset_t start
,
10408 vm_map_offset_t end
)
10410 vm_map_entry_t entry
;
10413 * The MADV_REUSABLE operation doesn't require any changes to the
10414 * vm_map_entry_t's, so the read lock is sufficient.
10417 vm_map_lock_read(map
);
10420 * The madvise semantics require that the address range be fully
10421 * allocated with no holes. Otherwise, we're required to return
10425 if (!vm_map_range_check(map
, start
, end
, &entry
)) {
10426 vm_map_unlock_read(map
);
10427 vm_page_stats_reusable
.can_reuse_failure
++;
10428 return KERN_INVALID_ADDRESS
;
10432 * Examine each vm_map_entry_t in the range.
10434 for (; entry
!= vm_map_to_entry(map
) && entry
->vme_start
< end
;
10435 entry
= entry
->vme_next
) {
10437 * Sanity check on the VM map entry.
10439 if (! vm_map_entry_is_reusable(entry
)) {
10440 vm_map_unlock_read(map
);
10441 vm_page_stats_reusable
.can_reuse_failure
++;
10442 return KERN_INVALID_ADDRESS
;
10446 vm_map_unlock_read(map
);
10447 vm_page_stats_reusable
.can_reuse_success
++;
10448 return KERN_SUCCESS
;
10453 #include <mach_kdb.h>
10455 #include <ddb/db_output.h>
10456 #include <vm/vm_print.h>
10458 #define printf db_printf
10461 * Forward declarations for internal functions.
10463 extern void vm_map_links_print(
10464 struct vm_map_links
*links
);
10466 extern void vm_map_header_print(
10467 struct vm_map_header
*header
);
10469 extern void vm_map_entry_print(
10470 vm_map_entry_t entry
);
10472 extern void vm_follow_entry(
10473 vm_map_entry_t entry
);
10475 extern void vm_follow_map(
10479 * vm_map_links_print: [ debug ]
10482 vm_map_links_print(
10483 struct vm_map_links
*links
)
10485 iprintf("prev = %08X next = %08X start = %016llX end = %016llX\n",
10488 (unsigned long long)links
->start
,
10489 (unsigned long long)links
->end
);
10493 * vm_map_header_print: [ debug ]
10496 vm_map_header_print(
10497 struct vm_map_header
*header
)
10499 vm_map_links_print(&header
->links
);
10500 iprintf("nentries = %08X, %sentries_pageable\n",
10502 (header
->entries_pageable
? "" : "!"));
10506 * vm_follow_entry: [ debug ]
10510 vm_map_entry_t entry
)
10514 iprintf("map entry %08X\n", entry
);
10518 shadows
= vm_follow_object(entry
->object
.vm_object
);
10519 iprintf("Total objects : %d\n",shadows
);
10525 * vm_map_entry_print: [ debug ]
10528 vm_map_entry_print(
10529 register vm_map_entry_t entry
)
10531 static const char *inheritance_name
[4] =
10532 { "share", "copy", "none", "?"};
10533 static const char *behavior_name
[4] =
10534 { "dflt", "rand", "seqtl", "rseqntl" };
10536 iprintf("map entry %08X - prev = %08X next = %08X\n", entry
, entry
->vme_prev
, entry
->vme_next
);
10540 vm_map_links_print(&entry
->links
);
10542 iprintf("start = %016llX end = %016llX - prot=%x/%x/%s\n",
10543 (unsigned long long)entry
->vme_start
,
10544 (unsigned long long)entry
->vme_end
,
10546 entry
->max_protection
,
10547 inheritance_name
[(entry
->inheritance
& 0x3)]);
10549 iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
10550 behavior_name
[(entry
->behavior
& 0x3)],
10551 entry
->wired_count
,
10552 entry
->user_wired_count
);
10553 iprintf("%sin_transition, %sneeds_wakeup\n",
10554 (entry
->in_transition
? "" : "!"),
10555 (entry
->needs_wakeup
? "" : "!"));
10557 if (entry
->is_sub_map
) {
10558 iprintf("submap = %08X - offset = %016llX\n",
10559 entry
->object
.sub_map
,
10560 (unsigned long long)entry
->offset
);
10562 iprintf("object = %08X offset = %016llX - ",
10563 entry
->object
.vm_object
,
10564 (unsigned long long)entry
->offset
);
10565 printf("%sis_shared, %sneeds_copy\n",
10566 (entry
->is_shared
? "" : "!"),
10567 (entry
->needs_copy
? "" : "!"));
10574 * vm_follow_map: [ debug ]
10580 register vm_map_entry_t entry
;
10582 iprintf("task map %08X\n", map
);
10586 for (entry
= vm_map_first_entry(map
);
10587 entry
&& entry
!= vm_map_to_entry(map
);
10588 entry
= entry
->vme_next
) {
10589 vm_follow_entry(entry
);
10596 * vm_map_print: [ debug ]
10602 register vm_map_entry_t entry
;
10606 #endif /* TASK_SWAPPER */
10608 map
= (vm_map_t
)(long)
10609 inmap
; /* Make sure we have the right type */
10611 iprintf("task map %08X\n", map
);
10615 vm_map_header_print(&map
->hdr
);
10617 iprintf("pmap = %08X size = %08X ref = %d hint = %08X first_free = %08X\n",
10624 iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
10625 (map
->wait_for_space
? "" : "!"),
10626 (map
->wiring_required
? "" : "!"),
10630 switch (map
->sw_state
) {
10635 swstate
= "SW_OUT";
10641 iprintf("res = %d, sw_state = %s\n", map
->res_count
, swstate
);
10642 #endif /* TASK_SWAPPER */
10644 for (entry
= vm_map_first_entry(map
);
10645 entry
&& entry
!= vm_map_to_entry(map
);
10646 entry
= entry
->vme_next
) {
10647 vm_map_entry_print(entry
);
10654 * Routine: vm_map_copy_print
10656 * Pretty-print a copy object for ddb.
10663 vm_map_copy_t copy
;
10664 vm_map_entry_t entry
;
10666 copy
= (vm_map_copy_t
)(long)
10667 incopy
; /* Make sure we have the right type */
10669 printf("copy object 0x%x\n", copy
);
10673 iprintf("type=%d", copy
->type
);
10674 switch (copy
->type
) {
10675 case VM_MAP_COPY_ENTRY_LIST
:
10676 printf("[entry_list]");
10679 case VM_MAP_COPY_OBJECT
:
10680 printf("[object]");
10683 case VM_MAP_COPY_KERNEL_BUFFER
:
10684 printf("[kernel_buffer]");
10688 printf("[bad type]");
10691 printf(", offset=0x%llx", (unsigned long long)copy
->offset
);
10692 printf(", size=0x%x\n", copy
->size
);
10694 switch (copy
->type
) {
10695 case VM_MAP_COPY_ENTRY_LIST
:
10696 vm_map_header_print(©
->cpy_hdr
);
10697 for (entry
= vm_map_copy_first_entry(copy
);
10698 entry
&& entry
!= vm_map_copy_to_entry(copy
);
10699 entry
= entry
->vme_next
) {
10700 vm_map_entry_print(entry
);
10704 case VM_MAP_COPY_OBJECT
:
10705 iprintf("object=0x%x\n", copy
->cpy_object
);
10708 case VM_MAP_COPY_KERNEL_BUFFER
:
10709 iprintf("kernel buffer=0x%x", copy
->cpy_kdata
);
10710 printf(", kalloc_size=0x%x\n", copy
->cpy_kalloc_size
);
10719 * db_vm_map_total_size(map) [ debug ]
10721 * return the total virtual size (in bytes) of the map
10724 db_vm_map_total_size(
10727 vm_map_entry_t entry
;
10728 vm_map_size_t total
;
10731 map
= (vm_map_t
)(long)
10732 inmap
; /* Make sure we have the right type */
10735 for (entry
= vm_map_first_entry(map
);
10736 entry
!= vm_map_to_entry(map
);
10737 entry
= entry
->vme_next
) {
10738 total
+= entry
->vme_end
- entry
->vme_start
;
10744 #endif /* MACH_KDB */
10747 * Routine: vm_map_entry_insert
10749 * Descritpion: This routine inserts a new vm_entry in a locked map.
10752 vm_map_entry_insert(
10754 vm_map_entry_t insp_entry
,
10755 vm_map_offset_t start
,
10756 vm_map_offset_t end
,
10757 vm_object_t object
,
10758 vm_object_offset_t offset
,
10759 boolean_t needs_copy
,
10760 boolean_t is_shared
,
10761 boolean_t in_transition
,
10762 vm_prot_t cur_protection
,
10763 vm_prot_t max_protection
,
10764 vm_behavior_t behavior
,
10765 vm_inherit_t inheritance
,
10766 unsigned wired_count
,
10767 boolean_t no_cache
,
10768 boolean_t permanent
,
10769 unsigned int superpage_size
)
10771 vm_map_entry_t new_entry
;
10773 assert(insp_entry
!= (vm_map_entry_t
)0);
10775 new_entry
= vm_map_entry_create(map
);
10777 new_entry
->vme_start
= start
;
10778 new_entry
->vme_end
= end
;
10779 assert(page_aligned(new_entry
->vme_start
));
10780 assert(page_aligned(new_entry
->vme_end
));
10782 new_entry
->object
.vm_object
= object
;
10783 new_entry
->offset
= offset
;
10784 new_entry
->is_shared
= is_shared
;
10785 new_entry
->is_sub_map
= FALSE
;
10786 new_entry
->needs_copy
= needs_copy
;
10787 new_entry
->in_transition
= in_transition
;
10788 new_entry
->needs_wakeup
= FALSE
;
10789 new_entry
->inheritance
= inheritance
;
10790 new_entry
->protection
= cur_protection
;
10791 new_entry
->max_protection
= max_protection
;
10792 new_entry
->behavior
= behavior
;
10793 new_entry
->wired_count
= wired_count
;
10794 new_entry
->user_wired_count
= 0;
10795 new_entry
->use_pmap
= FALSE
;
10796 new_entry
->alias
= 0;
10797 new_entry
->zero_wired_pages
= FALSE
;
10798 new_entry
->no_cache
= no_cache
;
10799 new_entry
->permanent
= permanent
;
10800 new_entry
->superpage_size
= superpage_size
;
10803 * Insert the new entry into the list.
10806 vm_map_entry_link(map
, insp_entry
, new_entry
);
10807 map
->size
+= end
- start
;
10810 * Update the free space hint and the lookup hint.
10813 SAVE_HINT_MAP_WRITE(map
, new_entry
);
10818 * Routine: vm_map_remap_extract
10820 * Descritpion: This routine returns a vm_entry list from a map.
10822 static kern_return_t
10823 vm_map_remap_extract(
10825 vm_map_offset_t addr
,
10826 vm_map_size_t size
,
10828 struct vm_map_header
*map_header
,
10829 vm_prot_t
*cur_protection
,
10830 vm_prot_t
*max_protection
,
10831 /* What, no behavior? */
10832 vm_inherit_t inheritance
,
10833 boolean_t pageable
)
10835 kern_return_t result
;
10836 vm_map_size_t mapped_size
;
10837 vm_map_size_t tmp_size
;
10838 vm_map_entry_t src_entry
; /* result of last map lookup */
10839 vm_map_entry_t new_entry
;
10840 vm_object_offset_t offset
;
10841 vm_map_offset_t map_address
;
10842 vm_map_offset_t src_start
; /* start of entry to map */
10843 vm_map_offset_t src_end
; /* end of region to be mapped */
10844 vm_object_t object
;
10845 vm_map_version_t version
;
10846 boolean_t src_needs_copy
;
10847 boolean_t new_entry_needs_copy
;
10849 assert(map
!= VM_MAP_NULL
);
10850 assert(size
!= 0 && size
== vm_map_round_page(size
));
10851 assert(inheritance
== VM_INHERIT_NONE
||
10852 inheritance
== VM_INHERIT_COPY
||
10853 inheritance
== VM_INHERIT_SHARE
);
10856 * Compute start and end of region.
10858 src_start
= vm_map_trunc_page(addr
);
10859 src_end
= vm_map_round_page(src_start
+ size
);
10862 * Initialize map_header.
10864 map_header
->links
.next
= (struct vm_map_entry
*)&map_header
->links
;
10865 map_header
->links
.prev
= (struct vm_map_entry
*)&map_header
->links
;
10866 map_header
->nentries
= 0;
10867 map_header
->entries_pageable
= pageable
;
10869 *cur_protection
= VM_PROT_ALL
;
10870 *max_protection
= VM_PROT_ALL
;
10874 result
= KERN_SUCCESS
;
10877 * The specified source virtual space might correspond to
10878 * multiple map entries, need to loop on them.
10881 while (mapped_size
!= size
) {
10882 vm_map_size_t entry_size
;
10885 * Find the beginning of the region.
10887 if (! vm_map_lookup_entry(map
, src_start
, &src_entry
)) {
10888 result
= KERN_INVALID_ADDRESS
;
10892 if (src_start
< src_entry
->vme_start
||
10893 (mapped_size
&& src_start
!= src_entry
->vme_start
)) {
10894 result
= KERN_INVALID_ADDRESS
;
10898 tmp_size
= size
- mapped_size
;
10899 if (src_end
> src_entry
->vme_end
)
10900 tmp_size
-= (src_end
- src_entry
->vme_end
);
10902 entry_size
= (vm_map_size_t
)(src_entry
->vme_end
-
10903 src_entry
->vme_start
);
10905 if(src_entry
->is_sub_map
) {
10906 vm_map_reference(src_entry
->object
.sub_map
);
10907 object
= VM_OBJECT_NULL
;
10909 object
= src_entry
->object
.vm_object
;
10911 if (object
== VM_OBJECT_NULL
) {
10912 object
= vm_object_allocate(entry_size
);
10913 src_entry
->offset
= 0;
10914 src_entry
->object
.vm_object
= object
;
10915 } else if (object
->copy_strategy
!=
10916 MEMORY_OBJECT_COPY_SYMMETRIC
) {
10918 * We are already using an asymmetric
10919 * copy, and therefore we already have
10920 * the right object.
10922 assert(!src_entry
->needs_copy
);
10923 } else if (src_entry
->needs_copy
|| object
->shadowed
||
10924 (object
->internal
&& !object
->true_share
&&
10925 !src_entry
->is_shared
&&
10926 object
->size
> entry_size
)) {
10928 vm_object_shadow(&src_entry
->object
.vm_object
,
10929 &src_entry
->offset
,
10932 if (!src_entry
->needs_copy
&&
10933 (src_entry
->protection
& VM_PROT_WRITE
)) {
10936 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
10938 if (override_nx(map
, src_entry
->alias
) && prot
)
10939 prot
|= VM_PROT_EXECUTE
;
10942 vm_object_pmap_protect(
10943 src_entry
->object
.vm_object
,
10947 src_entry
->vme_start
,
10950 pmap_protect(vm_map_pmap(map
),
10951 src_entry
->vme_start
,
10952 src_entry
->vme_end
,
10957 object
= src_entry
->object
.vm_object
;
10958 src_entry
->needs_copy
= FALSE
;
10962 vm_object_lock(object
);
10963 vm_object_reference_locked(object
); /* object ref. for new entry */
10964 if (object
->copy_strategy
==
10965 MEMORY_OBJECT_COPY_SYMMETRIC
) {
10966 object
->copy_strategy
=
10967 MEMORY_OBJECT_COPY_DELAY
;
10969 vm_object_unlock(object
);
10972 offset
= src_entry
->offset
+ (src_start
- src_entry
->vme_start
);
10974 new_entry
= _vm_map_entry_create(map_header
);
10975 vm_map_entry_copy(new_entry
, src_entry
);
10976 new_entry
->use_pmap
= FALSE
; /* clr address space specifics */
10978 new_entry
->vme_start
= map_address
;
10979 new_entry
->vme_end
= map_address
+ tmp_size
;
10980 new_entry
->inheritance
= inheritance
;
10981 new_entry
->offset
= offset
;
10984 * The new region has to be copied now if required.
10988 src_entry
->is_shared
= TRUE
;
10989 new_entry
->is_shared
= TRUE
;
10990 if (!(new_entry
->is_sub_map
))
10991 new_entry
->needs_copy
= FALSE
;
10993 } else if (src_entry
->is_sub_map
) {
10994 /* make this a COW sub_map if not already */
10995 new_entry
->needs_copy
= TRUE
;
10996 object
= VM_OBJECT_NULL
;
10997 } else if (src_entry
->wired_count
== 0 &&
10998 vm_object_copy_quickly(&new_entry
->object
.vm_object
,
11000 (new_entry
->vme_end
-
11001 new_entry
->vme_start
),
11003 &new_entry_needs_copy
)) {
11005 new_entry
->needs_copy
= new_entry_needs_copy
;
11006 new_entry
->is_shared
= FALSE
;
11009 * Handle copy_on_write semantics.
11011 if (src_needs_copy
&& !src_entry
->needs_copy
) {
11014 prot
= src_entry
->protection
& ~VM_PROT_WRITE
;
11016 if (override_nx(map
, src_entry
->alias
) && prot
)
11017 prot
|= VM_PROT_EXECUTE
;
11019 vm_object_pmap_protect(object
,
11022 ((src_entry
->is_shared
11024 PMAP_NULL
: map
->pmap
),
11025 src_entry
->vme_start
,
11028 src_entry
->needs_copy
= TRUE
;
11031 * Throw away the old object reference of the new entry.
11033 vm_object_deallocate(object
);
11036 new_entry
->is_shared
= FALSE
;
11039 * The map can be safely unlocked since we
11040 * already hold a reference on the object.
11042 * Record the timestamp of the map for later
11043 * verification, and unlock the map.
11045 version
.main_timestamp
= map
->timestamp
;
11046 vm_map_unlock(map
); /* Increments timestamp once! */
11049 * Perform the copy.
11051 if (src_entry
->wired_count
> 0) {
11052 vm_object_lock(object
);
11053 result
= vm_object_copy_slowly(
11058 &new_entry
->object
.vm_object
);
11060 new_entry
->offset
= 0;
11061 new_entry
->needs_copy
= FALSE
;
11063 result
= vm_object_copy_strategically(
11067 &new_entry
->object
.vm_object
,
11068 &new_entry
->offset
,
11069 &new_entry_needs_copy
);
11071 new_entry
->needs_copy
= new_entry_needs_copy
;
11075 * Throw away the old object reference of the new entry.
11077 vm_object_deallocate(object
);
11079 if (result
!= KERN_SUCCESS
&&
11080 result
!= KERN_MEMORY_RESTART_COPY
) {
11081 _vm_map_entry_dispose(map_header
, new_entry
);
11086 * Verify that the map has not substantially
11087 * changed while the copy was being made.
11091 if (version
.main_timestamp
+ 1 != map
->timestamp
) {
11093 * Simple version comparison failed.
11095 * Retry the lookup and verify that the
11096 * same object/offset are still present.
11098 vm_object_deallocate(new_entry
->
11100 _vm_map_entry_dispose(map_header
, new_entry
);
11101 if (result
== KERN_MEMORY_RESTART_COPY
)
11102 result
= KERN_SUCCESS
;
11106 if (result
== KERN_MEMORY_RESTART_COPY
) {
11107 vm_object_reference(object
);
11112 _vm_map_entry_link(map_header
,
11113 map_header
->links
.prev
, new_entry
);
11115 *cur_protection
&= src_entry
->protection
;
11116 *max_protection
&= src_entry
->max_protection
;
11118 map_address
+= tmp_size
;
11119 mapped_size
+= tmp_size
;
11120 src_start
+= tmp_size
;
11124 vm_map_unlock(map
);
11125 if (result
!= KERN_SUCCESS
) {
11127 * Free all allocated elements.
11129 for (src_entry
= map_header
->links
.next
;
11130 src_entry
!= (struct vm_map_entry
*)&map_header
->links
;
11131 src_entry
= new_entry
) {
11132 new_entry
= src_entry
->vme_next
;
11133 _vm_map_entry_unlink(map_header
, src_entry
);
11134 vm_object_deallocate(src_entry
->object
.vm_object
);
11135 _vm_map_entry_dispose(map_header
, src_entry
);
11142 * Routine: vm_remap
11144 * Map portion of a task's address space.
11145 * Mapped region must not overlap more than
11146 * one vm memory object. Protections and
11147 * inheritance attributes remain the same
11148 * as in the original task and are out parameters.
11149 * Source and Target task can be identical
11150 * Other attributes are identical as for vm_map()
11154 vm_map_t target_map
,
11155 vm_map_address_t
*address
,
11156 vm_map_size_t size
,
11157 vm_map_offset_t mask
,
11160 vm_map_offset_t memory_address
,
11162 vm_prot_t
*cur_protection
,
11163 vm_prot_t
*max_protection
,
11164 vm_inherit_t inheritance
)
11166 kern_return_t result
;
11167 vm_map_entry_t entry
;
11168 vm_map_entry_t insp_entry
= VM_MAP_ENTRY_NULL
;
11169 vm_map_entry_t new_entry
;
11170 struct vm_map_header map_header
;
11172 if (target_map
== VM_MAP_NULL
)
11173 return KERN_INVALID_ARGUMENT
;
11175 switch (inheritance
) {
11176 case VM_INHERIT_NONE
:
11177 case VM_INHERIT_COPY
:
11178 case VM_INHERIT_SHARE
:
11179 if (size
!= 0 && src_map
!= VM_MAP_NULL
)
11183 return KERN_INVALID_ARGUMENT
;
11186 size
= vm_map_round_page(size
);
11188 result
= vm_map_remap_extract(src_map
, memory_address
,
11189 size
, copy
, &map_header
,
11196 if (result
!= KERN_SUCCESS
) {
11201 * Allocate/check a range of free virtual address
11202 * space for the target
11204 *address
= vm_map_trunc_page(*address
);
11205 vm_map_lock(target_map
);
11206 result
= vm_map_remap_range_allocate(target_map
, address
, size
,
11207 mask
, flags
, &insp_entry
);
11209 for (entry
= map_header
.links
.next
;
11210 entry
!= (struct vm_map_entry
*)&map_header
.links
;
11211 entry
= new_entry
) {
11212 new_entry
= entry
->vme_next
;
11213 _vm_map_entry_unlink(&map_header
, entry
);
11214 if (result
== KERN_SUCCESS
) {
11215 entry
->vme_start
+= *address
;
11216 entry
->vme_end
+= *address
;
11217 vm_map_entry_link(target_map
, insp_entry
, entry
);
11218 insp_entry
= entry
;
11220 if (!entry
->is_sub_map
) {
11221 vm_object_deallocate(entry
->object
.vm_object
);
11223 vm_map_deallocate(entry
->object
.sub_map
);
11225 _vm_map_entry_dispose(&map_header
, entry
);
11229 if (result
== KERN_SUCCESS
) {
11230 target_map
->size
+= size
;
11231 SAVE_HINT_MAP_WRITE(target_map
, insp_entry
);
11233 vm_map_unlock(target_map
);
11235 if (result
== KERN_SUCCESS
&& target_map
->wiring_required
)
11236 result
= vm_map_wire(target_map
, *address
,
11237 *address
+ size
, *cur_protection
, TRUE
);
11242 * Routine: vm_map_remap_range_allocate
11245 * Allocate a range in the specified virtual address map.
11246 * returns the address and the map entry just before the allocated
11249 * Map must be locked.
11252 static kern_return_t
11253 vm_map_remap_range_allocate(
11255 vm_map_address_t
*address
, /* IN/OUT */
11256 vm_map_size_t size
,
11257 vm_map_offset_t mask
,
11259 vm_map_entry_t
*map_entry
) /* OUT */
11261 vm_map_entry_t entry
;
11262 vm_map_offset_t start
;
11263 vm_map_offset_t end
;
11270 if (flags
& VM_FLAGS_ANYWHERE
)
11273 * Calculate the first possible address.
11276 if (start
< map
->min_offset
)
11277 start
= map
->min_offset
;
11278 if (start
> map
->max_offset
)
11279 return(KERN_NO_SPACE
);
11282 * Look for the first possible address;
11283 * if there's already something at this
11284 * address, we have to start after it.
11287 assert(first_free_is_valid(map
));
11288 if (start
== map
->min_offset
) {
11289 if ((entry
= map
->first_free
) != vm_map_to_entry(map
))
11290 start
= entry
->vme_end
;
11292 vm_map_entry_t tmp_entry
;
11293 if (vm_map_lookup_entry(map
, start
, &tmp_entry
))
11294 start
= tmp_entry
->vme_end
;
11299 * In any case, the "entry" always precedes
11300 * the proposed new region throughout the
11305 register vm_map_entry_t next
;
11308 * Find the end of the proposed new region.
11309 * Be sure we didn't go beyond the end, or
11310 * wrap around the address.
11313 end
= ((start
+ mask
) & ~mask
);
11315 return(KERN_NO_SPACE
);
11319 if ((end
> map
->max_offset
) || (end
< start
)) {
11320 if (map
->wait_for_space
) {
11321 if (size
<= (map
->max_offset
-
11322 map
->min_offset
)) {
11323 assert_wait((event_t
) map
, THREAD_INTERRUPTIBLE
);
11324 vm_map_unlock(map
);
11325 thread_block(THREAD_CONTINUE_NULL
);
11331 return(KERN_NO_SPACE
);
11335 * If there are no more entries, we must win.
11338 next
= entry
->vme_next
;
11339 if (next
== vm_map_to_entry(map
))
11343 * If there is another entry, it must be
11344 * after the end of the potential new region.
11347 if (next
->vme_start
>= end
)
11351 * Didn't fit -- move to the next entry.
11355 start
= entry
->vme_end
;
11359 vm_map_entry_t temp_entry
;
11363 * the address doesn't itself violate
11364 * the mask requirement.
11367 if ((start
& mask
) != 0)
11368 return(KERN_NO_SPACE
);
11372 * ... the address is within bounds
11375 end
= start
+ size
;
11377 if ((start
< map
->min_offset
) ||
11378 (end
> map
->max_offset
) ||
11380 return(KERN_INVALID_ADDRESS
);
11384 * If we're asked to overwrite whatever was mapped in that
11385 * range, first deallocate that range.
11387 if (flags
& VM_FLAGS_OVERWRITE
) {
11391 * We use a "zap_map" to avoid having to unlock
11392 * the "map" in vm_map_delete(), which would compromise
11393 * the atomicity of the "deallocate" and then "remap"
11396 zap_map
= vm_map_create(PMAP_NULL
,
11399 map
->hdr
.entries_pageable
);
11400 if (zap_map
== VM_MAP_NULL
) {
11401 return KERN_RESOURCE_SHORTAGE
;
11404 kr
= vm_map_delete(map
, start
, end
,
11405 VM_MAP_REMOVE_SAVE_ENTRIES
,
11407 if (kr
== KERN_SUCCESS
) {
11408 vm_map_destroy(zap_map
,
11409 VM_MAP_REMOVE_NO_PMAP_CLEANUP
);
11410 zap_map
= VM_MAP_NULL
;
11415 * ... the starting address isn't allocated
11418 if (vm_map_lookup_entry(map
, start
, &temp_entry
))
11419 return(KERN_NO_SPACE
);
11421 entry
= temp_entry
;
11424 * ... the next region doesn't overlap the
11428 if ((entry
->vme_next
!= vm_map_to_entry(map
)) &&
11429 (entry
->vme_next
->vme_start
< end
))
11430 return(KERN_NO_SPACE
);
11432 *map_entry
= entry
;
11433 return(KERN_SUCCESS
);
11439 * Set the address map for the current thread to the specified map
11447 thread_t thread
= current_thread();
11448 vm_map_t oldmap
= thread
->map
;
11450 mp_disable_preemption();
11451 mycpu
= cpu_number();
11454 * Deactivate the current map and activate the requested map
11456 PMAP_SWITCH_USER(thread
, map
, mycpu
);
11458 mp_enable_preemption();
11464 * Routine: vm_map_write_user
11467 * Copy out data from a kernel space into space in the
11468 * destination map. The space must already exist in the
11470 * NOTE: This routine should only be called by threads
11471 * which can block on a page fault. i.e. kernel mode user
11479 vm_map_address_t dst_addr
,
11482 kern_return_t kr
= KERN_SUCCESS
;
11484 if(current_map() == map
) {
11485 if (copyout(src_p
, dst_addr
, size
)) {
11486 kr
= KERN_INVALID_ADDRESS
;
11491 /* take on the identity of the target map while doing */
11494 vm_map_reference(map
);
11495 oldmap
= vm_map_switch(map
);
11496 if (copyout(src_p
, dst_addr
, size
)) {
11497 kr
= KERN_INVALID_ADDRESS
;
11499 vm_map_switch(oldmap
);
11500 vm_map_deallocate(map
);
11506 * Routine: vm_map_read_user
11509 * Copy in data from a user space source map into the
11510 * kernel map. The space must already exist in the
11512 * NOTE: This routine should only be called by threads
11513 * which can block on a page fault. i.e. kernel mode user
11520 vm_map_address_t src_addr
,
11524 kern_return_t kr
= KERN_SUCCESS
;
11526 if(current_map() == map
) {
11527 if (copyin(src_addr
, dst_p
, size
)) {
11528 kr
= KERN_INVALID_ADDRESS
;
11533 /* take on the identity of the target map while doing */
11536 vm_map_reference(map
);
11537 oldmap
= vm_map_switch(map
);
11538 if (copyin(src_addr
, dst_p
, size
)) {
11539 kr
= KERN_INVALID_ADDRESS
;
11541 vm_map_switch(oldmap
);
11542 vm_map_deallocate(map
);
11549 * vm_map_check_protection:
11551 * Assert that the target map allows the specified
11552 * privilege on the entire address region given.
11553 * The entire region must be allocated.
11556 vm_map_check_protection(vm_map_t map
, vm_map_offset_t start
,
11557 vm_map_offset_t end
, vm_prot_t protection
)
11559 vm_map_entry_t entry
;
11560 vm_map_entry_t tmp_entry
;
11564 if (start
< vm_map_min(map
) || end
> vm_map_max(map
) || start
> end
)
11566 vm_map_unlock(map
);
11570 if (!vm_map_lookup_entry(map
, start
, &tmp_entry
)) {
11571 vm_map_unlock(map
);
11577 while (start
< end
) {
11578 if (entry
== vm_map_to_entry(map
)) {
11579 vm_map_unlock(map
);
11584 * No holes allowed!
11587 if (start
< entry
->vme_start
) {
11588 vm_map_unlock(map
);
11593 * Check protection associated with entry.
11596 if ((entry
->protection
& protection
) != protection
) {
11597 vm_map_unlock(map
);
11601 /* go to next entry */
11603 start
= entry
->vme_end
;
11604 entry
= entry
->vme_next
;
11606 vm_map_unlock(map
);
11611 vm_map_purgable_control(
11613 vm_map_offset_t address
,
11614 vm_purgable_t control
,
11617 vm_map_entry_t entry
;
11618 vm_object_t object
;
11622 * Vet all the input parameters and current type and state of the
11623 * underlaying object. Return with an error if anything is amiss.
11625 if (map
== VM_MAP_NULL
)
11626 return(KERN_INVALID_ARGUMENT
);
11628 if (control
!= VM_PURGABLE_SET_STATE
&&
11629 control
!= VM_PURGABLE_GET_STATE
&&
11630 control
!= VM_PURGABLE_PURGE_ALL
)
11631 return(KERN_INVALID_ARGUMENT
);
11633 if (control
== VM_PURGABLE_PURGE_ALL
) {
11634 vm_purgeable_object_purge_all();
11635 return KERN_SUCCESS
;
11638 if (control
== VM_PURGABLE_SET_STATE
&&
11639 (((*state
& ~(VM_PURGABLE_ALL_MASKS
)) != 0) ||
11640 ((*state
& VM_PURGABLE_STATE_MASK
) > VM_PURGABLE_STATE_MASK
)))
11641 return(KERN_INVALID_ARGUMENT
);
11643 vm_map_lock_read(map
);
11645 if (!vm_map_lookup_entry(map
, address
, &entry
) || entry
->is_sub_map
) {
11648 * Must pass a valid non-submap address.
11650 vm_map_unlock_read(map
);
11651 return(KERN_INVALID_ADDRESS
);
11654 if ((entry
->protection
& VM_PROT_WRITE
) == 0) {
11656 * Can't apply purgable controls to something you can't write.
11658 vm_map_unlock_read(map
);
11659 return(KERN_PROTECTION_FAILURE
);
11662 object
= entry
->object
.vm_object
;
11663 if (object
== VM_OBJECT_NULL
) {
11665 * Object must already be present or it can't be purgable.
11667 vm_map_unlock_read(map
);
11668 return KERN_INVALID_ARGUMENT
;
11671 vm_object_lock(object
);
11673 if (entry
->offset
!= 0 ||
11674 entry
->vme_end
- entry
->vme_start
!= object
->size
) {
11676 * Can only apply purgable controls to the whole (existing)
11679 vm_map_unlock_read(map
);
11680 vm_object_unlock(object
);
11681 return KERN_INVALID_ARGUMENT
;
11684 vm_map_unlock_read(map
);
11686 kr
= vm_object_purgable_control(object
, control
, state
);
11688 vm_object_unlock(object
);
11694 vm_map_page_query_internal(
11695 vm_map_t target_map
,
11696 vm_map_offset_t offset
,
11701 vm_page_info_basic_data_t info
;
11702 mach_msg_type_number_t count
;
11704 count
= VM_PAGE_INFO_BASIC_COUNT
;
11705 kr
= vm_map_page_info(target_map
,
11707 VM_PAGE_INFO_BASIC
,
11708 (vm_page_info_t
) &info
,
11710 if (kr
== KERN_SUCCESS
) {
11711 *disposition
= info
.disposition
;
11712 *ref_count
= info
.ref_count
;
11724 vm_map_offset_t offset
,
11725 vm_page_info_flavor_t flavor
,
11726 vm_page_info_t info
,
11727 mach_msg_type_number_t
*count
)
11729 vm_map_entry_t map_entry
;
11730 vm_object_t object
;
11733 kern_return_t retval
= KERN_SUCCESS
;
11734 boolean_t top_object
;
11737 vm_object_id_t object_id
;
11738 vm_page_info_basic_t basic_info
;
11742 case VM_PAGE_INFO_BASIC
:
11743 if (*count
!= VM_PAGE_INFO_BASIC_COUNT
) {
11744 return KERN_INVALID_ARGUMENT
;
11748 return KERN_INVALID_ARGUMENT
;
11757 retval
= KERN_SUCCESS
;
11758 offset
= vm_map_trunc_page(offset
);
11760 vm_map_lock_read(map
);
11763 * First, find the map entry covering "offset", going down
11764 * submaps if necessary.
11767 if (!vm_map_lookup_entry(map
, offset
, &map_entry
)) {
11768 vm_map_unlock_read(map
);
11769 return KERN_INVALID_ADDRESS
;
11771 /* compute offset from this map entry's start */
11772 offset
-= map_entry
->vme_start
;
11773 /* compute offset into this map entry's object (or submap) */
11774 offset
+= map_entry
->offset
;
11776 if (map_entry
->is_sub_map
) {
11779 sub_map
= map_entry
->object
.sub_map
;
11780 vm_map_lock_read(sub_map
);
11781 vm_map_unlock_read(map
);
11785 ref_count
= MAX(ref_count
, map
->ref_count
);
11791 object
= map_entry
->object
.vm_object
;
11792 if (object
== VM_OBJECT_NULL
) {
11793 /* no object -> no page */
11794 vm_map_unlock_read(map
);
11798 vm_object_lock(object
);
11799 vm_map_unlock_read(map
);
11802 * Go down the VM object shadow chain until we find the page
11803 * we're looking for.
11806 ref_count
= MAX(ref_count
, object
->ref_count
);
11808 m
= vm_page_lookup(object
, offset
);
11810 if (m
!= VM_PAGE_NULL
) {
11811 disposition
|= VM_PAGE_QUERY_PAGE_PRESENT
;
11815 if (object
->existence_map
) {
11816 if (vm_external_state_get(object
->existence_map
,
11818 VM_EXTERNAL_STATE_EXISTS
) {
11820 * this page has been paged out
11822 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
11828 if (object
->internal
&&
11830 !object
->terminating
&&
11831 object
->pager_ready
) {
11833 memory_object_t pager
;
11835 vm_object_paging_begin(object
);
11836 pager
= object
->pager
;
11837 vm_object_unlock(object
);
11840 * Ask the default pager if
11841 * it has this page.
11843 kr
= memory_object_data_request(
11845 offset
+ object
->paging_offset
,
11846 0, /* just poke the pager */
11850 vm_object_lock(object
);
11851 vm_object_paging_end(object
);
11853 if (kr
== KERN_SUCCESS
) {
11854 /* the default pager has it */
11855 disposition
|= VM_PAGE_QUERY_PAGE_PAGED_OUT
;
11861 if (object
->shadow
!= VM_OBJECT_NULL
) {
11862 vm_object_t shadow
;
11864 offset
+= object
->shadow_offset
;
11865 shadow
= object
->shadow
;
11867 vm_object_lock(shadow
);
11868 vm_object_unlock(object
);
11871 top_object
= FALSE
;
11874 // if (!object->internal)
11876 // retval = KERN_FAILURE;
11877 // goto done_with_object;
11882 /* The ref_count is not strictly accurate, it measures the number */
11883 /* of entities holding a ref on the object, they may not be mapping */
11884 /* the object or may not be mapping the section holding the */
11885 /* target page but its still a ball park number and though an over- */
11886 /* count, it picks up the copy-on-write cases */
11888 /* We could also get a picture of page sharing from pmap_attributes */
11889 /* but this would under count as only faulted-in mappings would */
11892 if (top_object
== TRUE
&& object
->shadow
)
11893 disposition
|= VM_PAGE_QUERY_PAGE_COPIED
;
11895 if (! object
->internal
)
11896 disposition
|= VM_PAGE_QUERY_PAGE_EXTERNAL
;
11898 if (m
== VM_PAGE_NULL
)
11899 goto done_with_object
;
11901 if (m
->fictitious
) {
11902 disposition
|= VM_PAGE_QUERY_PAGE_FICTITIOUS
;
11903 goto done_with_object
;
11905 if (m
->dirty
|| pmap_is_modified(m
->phys_page
))
11906 disposition
|= VM_PAGE_QUERY_PAGE_DIRTY
;
11908 if (m
->reference
|| pmap_is_referenced(m
->phys_page
))
11909 disposition
|= VM_PAGE_QUERY_PAGE_REF
;
11911 if (m
->speculative
)
11912 disposition
|= VM_PAGE_QUERY_PAGE_SPECULATIVE
;
11914 if (m
->cs_validated
)
11915 disposition
|= VM_PAGE_QUERY_PAGE_CS_VALIDATED
;
11917 disposition
|= VM_PAGE_QUERY_PAGE_CS_TAINTED
;
11920 vm_object_unlock(object
);
11924 case VM_PAGE_INFO_BASIC
:
11925 basic_info
= (vm_page_info_basic_t
) info
;
11926 basic_info
->disposition
= disposition
;
11927 basic_info
->ref_count
= ref_count
;
11928 basic_info
->object_id
= (vm_object_id_t
) (uintptr_t) object
;
11929 basic_info
->offset
= (memory_object_offset_t
) offset
;
11930 basic_info
->depth
= depth
;
11940 * Synchronises the memory range specified with its backing store
11941 * image by either flushing or cleaning the contents to the appropriate
11942 * memory manager engaging in a memory object synchronize dialog with
11943 * the manager. The client doesn't return until the manager issues
11944 * m_o_s_completed message. MIG Magically converts user task parameter
11945 * to the task's address map.
11947 * interpretation of sync_flags
11948 * VM_SYNC_INVALIDATE - discard pages, only return precious
11949 * pages to manager.
11951 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
11952 * - discard pages, write dirty or precious
11953 * pages back to memory manager.
11955 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
11956 * - write dirty or precious pages back to
11957 * the memory manager.
11959 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
11960 * is a hole in the region, and we would
11961 * have returned KERN_SUCCESS, return
11962 * KERN_INVALID_ADDRESS instead.
11965 * The memory object attributes have not yet been implemented, this
11966 * function will have to deal with the invalidate attribute
11969 * KERN_INVALID_TASK Bad task parameter
11970 * KERN_INVALID_ARGUMENT both sync and async were specified.
11971 * KERN_SUCCESS The usual.
11972 * KERN_INVALID_ADDRESS There was a hole in the region.
11978 vm_map_address_t address
,
11979 vm_map_size_t size
,
11980 vm_sync_t sync_flags
)
11983 msync_req_t new_msr
;
11984 queue_chain_t req_q
; /* queue of requests for this msync */
11985 vm_map_entry_t entry
;
11986 vm_map_size_t amount_left
;
11987 vm_object_offset_t offset
;
11988 boolean_t do_sync_req
;
11989 boolean_t had_hole
= FALSE
;
11990 memory_object_t pager
;
11992 if ((sync_flags
& VM_SYNC_ASYNCHRONOUS
) &&
11993 (sync_flags
& VM_SYNC_SYNCHRONOUS
))
11994 return(KERN_INVALID_ARGUMENT
);
11997 * align address and size on page boundaries
11999 size
= vm_map_round_page(address
+ size
) - vm_map_trunc_page(address
);
12000 address
= vm_map_trunc_page(address
);
12002 if (map
== VM_MAP_NULL
)
12003 return(KERN_INVALID_TASK
);
12006 return(KERN_SUCCESS
);
12008 queue_init(&req_q
);
12009 amount_left
= size
;
12011 while (amount_left
> 0) {
12012 vm_object_size_t flush_size
;
12013 vm_object_t object
;
12016 if (!vm_map_lookup_entry(map
,
12017 vm_map_trunc_page(address
), &entry
)) {
12019 vm_map_size_t skip
;
12022 * hole in the address map.
12027 * Check for empty map.
12029 if (entry
== vm_map_to_entry(map
) &&
12030 entry
->vme_next
== entry
) {
12031 vm_map_unlock(map
);
12035 * Check that we don't wrap and that
12036 * we have at least one real map entry.
12038 if ((map
->hdr
.nentries
== 0) ||
12039 (entry
->vme_next
->vme_start
< address
)) {
12040 vm_map_unlock(map
);
12044 * Move up to the next entry if needed
12046 skip
= (entry
->vme_next
->vme_start
- address
);
12047 if (skip
>= amount_left
)
12050 amount_left
-= skip
;
12051 address
= entry
->vme_next
->vme_start
;
12052 vm_map_unlock(map
);
12056 offset
= address
- entry
->vme_start
;
12059 * do we have more to flush than is contained in this
12062 if (amount_left
+ entry
->vme_start
+ offset
> entry
->vme_end
) {
12063 flush_size
= entry
->vme_end
-
12064 (entry
->vme_start
+ offset
);
12066 flush_size
= amount_left
;
12068 amount_left
-= flush_size
;
12069 address
+= flush_size
;
12071 if (entry
->is_sub_map
== TRUE
) {
12072 vm_map_t local_map
;
12073 vm_map_offset_t local_offset
;
12075 local_map
= entry
->object
.sub_map
;
12076 local_offset
= entry
->offset
;
12077 vm_map_unlock(map
);
12082 sync_flags
) == KERN_INVALID_ADDRESS
) {
12087 object
= entry
->object
.vm_object
;
12090 * We can't sync this object if the object has not been
12093 if (object
== VM_OBJECT_NULL
) {
12094 vm_map_unlock(map
);
12097 offset
+= entry
->offset
;
12099 vm_object_lock(object
);
12101 if (sync_flags
& (VM_SYNC_KILLPAGES
| VM_SYNC_DEACTIVATE
)) {
12102 int kill_pages
= 0;
12103 boolean_t reusable_pages
= FALSE
;
12105 if (sync_flags
& VM_SYNC_KILLPAGES
) {
12106 if (object
->ref_count
== 1 && !object
->shadow
)
12111 if (kill_pages
!= -1)
12112 vm_object_deactivate_pages(object
, offset
,
12113 (vm_object_size_t
)flush_size
, kill_pages
, reusable_pages
);
12114 vm_object_unlock(object
);
12115 vm_map_unlock(map
);
12119 * We can't sync this object if there isn't a pager.
12120 * Don't bother to sync internal objects, since there can't
12121 * be any "permanent" storage for these objects anyway.
12123 if ((object
->pager
== MEMORY_OBJECT_NULL
) ||
12124 (object
->internal
) || (object
->private)) {
12125 vm_object_unlock(object
);
12126 vm_map_unlock(map
);
12130 * keep reference on the object until syncing is done
12132 vm_object_reference_locked(object
);
12133 vm_object_unlock(object
);
12135 vm_map_unlock(map
);
12137 do_sync_req
= vm_object_sync(object
,
12140 sync_flags
& VM_SYNC_INVALIDATE
,
12141 ((sync_flags
& VM_SYNC_SYNCHRONOUS
) ||
12142 (sync_flags
& VM_SYNC_ASYNCHRONOUS
)),
12143 sync_flags
& VM_SYNC_SYNCHRONOUS
);
12145 * only send a m_o_s if we returned pages or if the entry
12146 * is writable (ie dirty pages may have already been sent back)
12148 if (!do_sync_req
) {
12149 if ((sync_flags
& VM_SYNC_INVALIDATE
) && object
->resident_page_count
== 0) {
12151 * clear out the clustering and read-ahead hints
12153 vm_object_lock(object
);
12155 object
->pages_created
= 0;
12156 object
->pages_used
= 0;
12157 object
->sequential
= 0;
12158 object
->last_alloc
= 0;
12160 vm_object_unlock(object
);
12162 vm_object_deallocate(object
);
12165 msync_req_alloc(new_msr
);
12167 vm_object_lock(object
);
12168 offset
+= object
->paging_offset
;
12170 new_msr
->offset
= offset
;
12171 new_msr
->length
= flush_size
;
12172 new_msr
->object
= object
;
12173 new_msr
->flag
= VM_MSYNC_SYNCHRONIZING
;
12177 * We can't sync this object if there isn't a pager. The
12178 * pager can disappear anytime we're not holding the object
12179 * lock. So this has to be checked anytime we goto re_iterate.
12182 pager
= object
->pager
;
12184 if (pager
== MEMORY_OBJECT_NULL
) {
12185 vm_object_unlock(object
);
12186 vm_object_deallocate(object
);
12190 queue_iterate(&object
->msr_q
, msr
, msync_req_t
, msr_q
) {
12192 * need to check for overlapping entry, if found, wait
12193 * on overlapping msr to be done, then reiterate
12196 if (msr
->flag
== VM_MSYNC_SYNCHRONIZING
&&
12197 ((offset
>= msr
->offset
&&
12198 offset
< (msr
->offset
+ msr
->length
)) ||
12199 (msr
->offset
>= offset
&&
12200 msr
->offset
< (offset
+ flush_size
))))
12202 assert_wait((event_t
) msr
,THREAD_INTERRUPTIBLE
);
12204 vm_object_unlock(object
);
12205 thread_block(THREAD_CONTINUE_NULL
);
12206 vm_object_lock(object
);
12210 }/* queue_iterate */
12212 queue_enter(&object
->msr_q
, new_msr
, msync_req_t
, msr_q
);
12214 vm_object_paging_begin(object
);
12215 vm_object_unlock(object
);
12217 queue_enter(&req_q
, new_msr
, msync_req_t
, req_q
);
12219 (void) memory_object_synchronize(
12223 sync_flags
& ~VM_SYNC_CONTIGUOUS
);
12225 vm_object_lock(object
);
12226 vm_object_paging_end(object
);
12227 vm_object_unlock(object
);
12231 * wait for memory_object_sychronize_completed messages from pager(s)
12234 while (!queue_empty(&req_q
)) {
12235 msr
= (msync_req_t
)queue_first(&req_q
);
12237 while(msr
->flag
!= VM_MSYNC_DONE
) {
12238 assert_wait((event_t
) msr
, THREAD_INTERRUPTIBLE
);
12240 thread_block(THREAD_CONTINUE_NULL
);
12243 queue_remove(&req_q
, msr
, msync_req_t
, req_q
);
12245 vm_object_deallocate(msr
->object
);
12246 msync_req_free(msr
);
12247 }/* queue_iterate */
12249 /* for proper msync() behaviour */
12250 if (had_hole
== TRUE
&& (sync_flags
& VM_SYNC_CONTIGUOUS
))
12251 return(KERN_INVALID_ADDRESS
);
12253 return(KERN_SUCCESS
);
12257 * Routine: convert_port_entry_to_map
12259 * Convert from a port specifying an entry or a task
12260 * to a map. Doesn't consume the port ref; produces a map ref,
12261 * which may be null. Unlike convert_port_to_map, the
12262 * port may be task or a named entry backed.
12269 convert_port_entry_to_map(
12273 vm_named_entry_t named_entry
;
12274 uint32_t try_failed_count
= 0;
12276 if(IP_VALID(port
) && (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
12279 if(ip_active(port
) && (ip_kotype(port
)
12280 == IKOT_NAMED_ENTRY
)) {
12282 (vm_named_entry_t
)port
->ip_kobject
;
12283 if (!(lck_mtx_try_lock(&(named_entry
)->Lock
))) {
12286 try_failed_count
++;
12287 mutex_pause(try_failed_count
);
12290 named_entry
->ref_count
++;
12291 lck_mtx_unlock(&(named_entry
)->Lock
);
12293 if ((named_entry
->is_sub_map
) &&
12294 (named_entry
->protection
12295 & VM_PROT_WRITE
)) {
12296 map
= named_entry
->backing
.map
;
12298 mach_destroy_memory_entry(port
);
12299 return VM_MAP_NULL
;
12301 vm_map_reference_swap(map
);
12302 mach_destroy_memory_entry(port
);
12306 return VM_MAP_NULL
;
12310 map
= convert_port_to_map(port
);
12316 * Routine: convert_port_entry_to_object
12318 * Convert from a port specifying a named entry to an
12319 * object. Doesn't consume the port ref; produces a map ref,
12320 * which may be null.
12327 convert_port_entry_to_object(
12330 vm_object_t object
;
12331 vm_named_entry_t named_entry
;
12332 uint32_t try_failed_count
= 0;
12334 if(IP_VALID(port
) && (ip_kotype(port
) == IKOT_NAMED_ENTRY
)) {
12337 if(ip_active(port
) && (ip_kotype(port
)
12338 == IKOT_NAMED_ENTRY
)) {
12340 (vm_named_entry_t
)port
->ip_kobject
;
12341 if (!(lck_mtx_try_lock(&(named_entry
)->Lock
))) {
12344 try_failed_count
++;
12345 mutex_pause(try_failed_count
);
12348 named_entry
->ref_count
++;
12349 lck_mtx_unlock(&(named_entry
)->Lock
);
12351 if ((!named_entry
->is_sub_map
) &&
12352 (!named_entry
->is_pager
) &&
12353 (named_entry
->protection
12354 & VM_PROT_WRITE
)) {
12355 object
= named_entry
->backing
.object
;
12357 mach_destroy_memory_entry(port
);
12358 return (vm_object_t
)NULL
;
12360 vm_object_reference(named_entry
->backing
.object
);
12361 mach_destroy_memory_entry(port
);
12365 return (vm_object_t
)NULL
;
12368 return (vm_object_t
)NULL
;
12375 * Export routines to other components for the things we access locally through
12382 return (current_map_fast());
12386 * vm_map_reference:
12388 * Most code internal to the osfmk will go through a
12389 * macro defining this. This is always here for the
12390 * use of other kernel components.
12392 #undef vm_map_reference
12395 register vm_map_t map
)
12397 if (map
== VM_MAP_NULL
)
12400 lck_mtx_lock(&map
->s_lock
);
12402 assert(map
->res_count
> 0);
12403 assert(map
->ref_count
>= map
->res_count
);
12407 lck_mtx_unlock(&map
->s_lock
);
12411 * vm_map_deallocate:
12413 * Removes a reference from the specified map,
12414 * destroying it if no references remain.
12415 * The map should not be locked.
12419 register vm_map_t map
)
12423 if (map
== VM_MAP_NULL
)
12426 lck_mtx_lock(&map
->s_lock
);
12427 ref
= --map
->ref_count
;
12429 vm_map_res_deallocate(map
);
12430 lck_mtx_unlock(&map
->s_lock
);
12433 assert(map
->ref_count
== 0);
12434 lck_mtx_unlock(&map
->s_lock
);
12438 * The map residence count isn't decremented here because
12439 * the vm_map_delete below will traverse the entire map,
12440 * deleting entries, and the residence counts on objects
12441 * and sharing maps will go away then.
12445 vm_map_destroy(map
, VM_MAP_NO_FLAGS
);
12450 vm_map_disable_NX(vm_map_t map
)
12454 if (map
->pmap
== NULL
)
12457 pmap_disable_NX(map
->pmap
);
12460 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
12461 * more descriptive.
12464 vm_map_set_32bit(vm_map_t map
)
12466 map
->max_offset
= (vm_map_offset_t
)VM_MAX_ADDRESS
;
12471 vm_map_set_64bit(vm_map_t map
)
12473 map
->max_offset
= (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
;
12477 vm_compute_max_offset(unsigned is64
)
12479 return (is64
? (vm_map_offset_t
)MACH_VM_MAX_ADDRESS
: (vm_map_offset_t
)VM_MAX_ADDRESS
);
12486 return map
->max_offset
> ((vm_map_offset_t
)VM_MAX_ADDRESS
);
12490 vm_map_has_4GB_pagezero(
12495 * We should lock the VM map (for read) here but we can get away
12496 * with it for now because there can't really be any race condition:
12497 * the VM map's min_offset is changed only when the VM map is created
12498 * and when the zero page is established (when the binary gets loaded),
12499 * and this routine gets called only when the task terminates and the
12500 * VM map is being torn down, and when a new map is created via
12501 * load_machfile()/execve().
12503 return (map
->min_offset
>= 0x100000000ULL
);
12507 vm_map_set_4GB_pagezero(vm_map_t map
)
12510 pmap_set_4GB_pagezero(map
->pmap
);
12512 #pragma unused(map)
12518 vm_map_clear_4GB_pagezero(vm_map_t map
)
12521 pmap_clear_4GB_pagezero(map
->pmap
);
12523 #pragma unused(map)
12528 * Raise a VM map's minimum offset.
12529 * To strictly enforce "page zero" reservation.
12532 vm_map_raise_min_offset(
12534 vm_map_offset_t new_min_offset
)
12536 vm_map_entry_t first_entry
;
12538 new_min_offset
= vm_map_round_page(new_min_offset
);
12542 if (new_min_offset
< map
->min_offset
) {
12544 * Can't move min_offset backwards, as that would expose
12545 * a part of the address space that was previously, and for
12546 * possibly good reasons, inaccessible.
12548 vm_map_unlock(map
);
12549 return KERN_INVALID_ADDRESS
;
12552 first_entry
= vm_map_first_entry(map
);
12553 if (first_entry
!= vm_map_to_entry(map
) &&
12554 first_entry
->vme_start
< new_min_offset
) {
12556 * Some memory was already allocated below the new
12557 * minimun offset. It's too late to change it now...
12559 vm_map_unlock(map
);
12560 return KERN_NO_SPACE
;
12563 map
->min_offset
= new_min_offset
;
12565 vm_map_unlock(map
);
12567 return KERN_SUCCESS
;
12571 * Set the limit on the maximum amount of user wired memory allowed for this map.
12572 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
12573 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
12574 * don't have to reach over to the BSD data structures.
12578 vm_map_set_user_wire_limit(vm_map_t map
,
12581 map
->user_wire_limit
= limit
;
12585 void vm_map_switch_protect(vm_map_t map
,
12589 map
->switch_protect
=val
;
12590 vm_map_unlock(map
);
12593 /* Add (generate) code signature for memory range */
12594 #if CONFIG_DYNAMIC_CODE_SIGNING
12595 kern_return_t
vm_map_sign(vm_map_t map
,
12596 vm_map_offset_t start
,
12597 vm_map_offset_t end
)
12599 vm_map_entry_t entry
;
12601 vm_object_t object
;
12604 * Vet all the input parameters and current type and state of the
12605 * underlaying object. Return with an error if anything is amiss.
12607 if (map
== VM_MAP_NULL
)
12608 return(KERN_INVALID_ARGUMENT
);
12610 vm_map_lock_read(map
);
12612 if (!vm_map_lookup_entry(map
, start
, &entry
) || entry
->is_sub_map
) {
12614 * Must pass a valid non-submap address.
12616 vm_map_unlock_read(map
);
12617 return(KERN_INVALID_ADDRESS
);
12620 if((entry
->vme_start
> start
) || (entry
->vme_end
< end
)) {
12622 * Map entry doesn't cover the requested range. Not handling
12623 * this situation currently.
12625 vm_map_unlock_read(map
);
12626 return(KERN_INVALID_ARGUMENT
);
12629 object
= entry
->object
.vm_object
;
12630 if (object
== VM_OBJECT_NULL
) {
12632 * Object must already be present or we can't sign.
12634 vm_map_unlock_read(map
);
12635 return KERN_INVALID_ARGUMENT
;
12638 vm_object_lock(object
);
12639 vm_map_unlock_read(map
);
12641 while(start
< end
) {
12644 m
= vm_page_lookup(object
, start
- entry
->vme_start
+ entry
->offset
);
12645 if (m
==VM_PAGE_NULL
) {
12646 /* shoud we try to fault a page here? we can probably
12647 * demand it exists and is locked for this request */
12648 vm_object_unlock(object
);
12649 return KERN_FAILURE
;
12651 /* deal with special page status */
12653 (m
->unusual
&& (m
->error
|| m
->restart
|| m
->private || m
->absent
))) {
12654 vm_object_unlock(object
);
12655 return KERN_FAILURE
;
12658 /* Page is OK... now "validate" it */
12659 /* This is the place where we'll call out to create a code
12660 * directory, later */
12661 m
->cs_validated
= TRUE
;
12663 /* The page is now "clean" for codesigning purposes. That means
12664 * we don't consider it as modified (wpmapped) anymore. But
12665 * we'll disconnect the page so we note any future modification
12667 m
->wpmapped
= FALSE
;
12668 refmod
= pmap_disconnect(m
->phys_page
);
12670 /* Pull the dirty status from the pmap, since we cleared the
12672 if ((refmod
& VM_MEM_MODIFIED
) && !m
->dirty
) {
12676 /* On to the next page */
12677 start
+= PAGE_SIZE
;
12679 vm_object_unlock(object
);
12681 return KERN_SUCCESS
;