]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
xnu-1699.24.23.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68 #include <libkern/OSAtomic.h>
69
70 #include <mach/kern_return.h>
71 #include <mach/port.h>
72 #include <mach/vm_attributes.h>
73 #include <mach/vm_param.h>
74 #include <mach/vm_behavior.h>
75 #include <mach/vm_statistics.h>
76 #include <mach/memory_object.h>
77 #include <mach/mach_vm.h>
78 #include <machine/cpu_capabilities.h>
79 #include <mach/sdt.h>
80
81 #include <kern/assert.h>
82 #include <kern/counters.h>
83 #include <kern/kalloc.h>
84 #include <kern/zalloc.h>
85
86 #include <vm/cpm.h>
87 #include <vm/vm_init.h>
88 #include <vm/vm_fault.h>
89 #include <vm/vm_map.h>
90 #include <vm/vm_object.h>
91 #include <vm/vm_page.h>
92 #include <vm/vm_pageout.h>
93 #include <vm/vm_kern.h>
94 #include <ipc/ipc_port.h>
95 #include <kern/sched_prim.h>
96 #include <kern/misc_protos.h>
97 #include <machine/db_machdep.h>
98 #include <kern/xpr.h>
99
100 #include <mach/vm_map_server.h>
101 #include <mach/mach_host_server.h>
102 #include <vm/vm_protos.h>
103 #include <vm/vm_purgeable_internal.h>
104
105 #include <vm/vm_protos.h>
106 #include <vm/vm_shared_region.h>
107 #include <vm/vm_map_store.h>
108
109 /* Internal prototypes
110 */
111
112 static void vm_map_simplify_range(
113 vm_map_t map,
114 vm_map_offset_t start,
115 vm_map_offset_t end); /* forward */
116
117 static boolean_t vm_map_range_check(
118 vm_map_t map,
119 vm_map_offset_t start,
120 vm_map_offset_t end,
121 vm_map_entry_t *entry);
122
123 static vm_map_entry_t _vm_map_entry_create(
124 struct vm_map_header *map_header);
125
126 static void _vm_map_entry_dispose(
127 struct vm_map_header *map_header,
128 vm_map_entry_t entry);
129
130 static void vm_map_pmap_enter(
131 vm_map_t map,
132 vm_map_offset_t addr,
133 vm_map_offset_t end_addr,
134 vm_object_t object,
135 vm_object_offset_t offset,
136 vm_prot_t protection);
137
138 static void _vm_map_clip_end(
139 struct vm_map_header *map_header,
140 vm_map_entry_t entry,
141 vm_map_offset_t end);
142
143 static void _vm_map_clip_start(
144 struct vm_map_header *map_header,
145 vm_map_entry_t entry,
146 vm_map_offset_t start);
147
148 static void vm_map_entry_delete(
149 vm_map_t map,
150 vm_map_entry_t entry);
151
152 static kern_return_t vm_map_delete(
153 vm_map_t map,
154 vm_map_offset_t start,
155 vm_map_offset_t end,
156 int flags,
157 vm_map_t zap_map);
158
159 static kern_return_t vm_map_copy_overwrite_unaligned(
160 vm_map_t dst_map,
161 vm_map_entry_t entry,
162 vm_map_copy_t copy,
163 vm_map_address_t start);
164
165 static kern_return_t vm_map_copy_overwrite_aligned(
166 vm_map_t dst_map,
167 vm_map_entry_t tmp_entry,
168 vm_map_copy_t copy,
169 vm_map_offset_t start,
170 pmap_t pmap);
171
172 static kern_return_t vm_map_copyin_kernel_buffer(
173 vm_map_t src_map,
174 vm_map_address_t src_addr,
175 vm_map_size_t len,
176 boolean_t src_destroy,
177 vm_map_copy_t *copy_result); /* OUT */
178
179 static kern_return_t vm_map_copyout_kernel_buffer(
180 vm_map_t map,
181 vm_map_address_t *addr, /* IN/OUT */
182 vm_map_copy_t copy,
183 boolean_t overwrite);
184
185 static void vm_map_fork_share(
186 vm_map_t old_map,
187 vm_map_entry_t old_entry,
188 vm_map_t new_map);
189
190 static boolean_t vm_map_fork_copy(
191 vm_map_t old_map,
192 vm_map_entry_t *old_entry_p,
193 vm_map_t new_map);
194
195 void vm_map_region_top_walk(
196 vm_map_entry_t entry,
197 vm_region_top_info_t top);
198
199 void vm_map_region_walk(
200 vm_map_t map,
201 vm_map_offset_t va,
202 vm_map_entry_t entry,
203 vm_object_offset_t offset,
204 vm_object_size_t range,
205 vm_region_extended_info_t extended,
206 boolean_t look_for_pages);
207
208 static kern_return_t vm_map_wire_nested(
209 vm_map_t map,
210 vm_map_offset_t start,
211 vm_map_offset_t end,
212 vm_prot_t access_type,
213 boolean_t user_wire,
214 pmap_t map_pmap,
215 vm_map_offset_t pmap_addr);
216
217 static kern_return_t vm_map_unwire_nested(
218 vm_map_t map,
219 vm_map_offset_t start,
220 vm_map_offset_t end,
221 boolean_t user_wire,
222 pmap_t map_pmap,
223 vm_map_offset_t pmap_addr);
224
225 static kern_return_t vm_map_overwrite_submap_recurse(
226 vm_map_t dst_map,
227 vm_map_offset_t dst_addr,
228 vm_map_size_t dst_size);
229
230 static kern_return_t vm_map_copy_overwrite_nested(
231 vm_map_t dst_map,
232 vm_map_offset_t dst_addr,
233 vm_map_copy_t copy,
234 boolean_t interruptible,
235 pmap_t pmap,
236 boolean_t discard_on_success);
237
238 static kern_return_t vm_map_remap_extract(
239 vm_map_t map,
240 vm_map_offset_t addr,
241 vm_map_size_t size,
242 boolean_t copy,
243 struct vm_map_header *map_header,
244 vm_prot_t *cur_protection,
245 vm_prot_t *max_protection,
246 vm_inherit_t inheritance,
247 boolean_t pageable);
248
249 static kern_return_t vm_map_remap_range_allocate(
250 vm_map_t map,
251 vm_map_address_t *address,
252 vm_map_size_t size,
253 vm_map_offset_t mask,
254 int flags,
255 vm_map_entry_t *map_entry);
256
257 static void vm_map_region_look_for_page(
258 vm_map_t map,
259 vm_map_offset_t va,
260 vm_object_t object,
261 vm_object_offset_t offset,
262 int max_refcnt,
263 int depth,
264 vm_region_extended_info_t extended);
265
266 static int vm_map_region_count_obj_refs(
267 vm_map_entry_t entry,
268 vm_object_t object);
269
270
271 static kern_return_t vm_map_willneed(
272 vm_map_t map,
273 vm_map_offset_t start,
274 vm_map_offset_t end);
275
276 static kern_return_t vm_map_reuse_pages(
277 vm_map_t map,
278 vm_map_offset_t start,
279 vm_map_offset_t end);
280
281 static kern_return_t vm_map_reusable_pages(
282 vm_map_t map,
283 vm_map_offset_t start,
284 vm_map_offset_t end);
285
286 static kern_return_t vm_map_can_reuse(
287 vm_map_t map,
288 vm_map_offset_t start,
289 vm_map_offset_t end);
290
291 #if CONFIG_FREEZE
292 struct default_freezer_table;
293 __private_extern__ void* default_freezer_mapping_create(vm_object_t, vm_offset_t);
294 __private_extern__ void default_freezer_mapping_free(void**, boolean_t all);
295 #endif
296
297 /*
298 * Macros to copy a vm_map_entry. We must be careful to correctly
299 * manage the wired page count. vm_map_entry_copy() creates a new
300 * map entry to the same memory - the wired count in the new entry
301 * must be set to zero. vm_map_entry_copy_full() creates a new
302 * entry that is identical to the old entry. This preserves the
303 * wire count; it's used for map splitting and zone changing in
304 * vm_map_copyout.
305 */
306 #define vm_map_entry_copy(NEW,OLD) \
307 MACRO_BEGIN \
308 *(NEW) = *(OLD); \
309 (NEW)->is_shared = FALSE; \
310 (NEW)->needs_wakeup = FALSE; \
311 (NEW)->in_transition = FALSE; \
312 (NEW)->wired_count = 0; \
313 (NEW)->user_wired_count = 0; \
314 (NEW)->permanent = FALSE; \
315 MACRO_END
316
317 #define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD))
318
319 /*
320 * Decide if we want to allow processes to execute from their data or stack areas.
321 * override_nx() returns true if we do. Data/stack execution can be enabled independently
322 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
323 * or allow_stack_exec to enable data execution for that type of data area for that particular
324 * ABI (or both by or'ing the flags together). These are initialized in the architecture
325 * specific pmap files since the default behavior varies according to architecture. The
326 * main reason it varies is because of the need to provide binary compatibility with old
327 * applications that were written before these restrictions came into being. In the old
328 * days, an app could execute anything it could read, but this has slowly been tightened
329 * up over time. The default behavior is:
330 *
331 * 32-bit PPC apps may execute from both stack and data areas
332 * 32-bit Intel apps may exeucte from data areas but not stack
333 * 64-bit PPC/Intel apps may not execute from either data or stack
334 *
335 * An application on any architecture may override these defaults by explicitly
336 * adding PROT_EXEC permission to the page in question with the mprotect(2)
337 * system call. This code here just determines what happens when an app tries to
338 * execute from a page that lacks execute permission.
339 *
340 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
341 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
342 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
343 * execution from data areas for a particular binary even if the arch normally permits it. As
344 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
345 * to support some complicated use cases, notably browsers with out-of-process plugins that
346 * are not all NX-safe.
347 */
348
349 extern int allow_data_exec, allow_stack_exec;
350
351 int
352 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
353 {
354 int current_abi;
355
356 /*
357 * Determine if the app is running in 32 or 64 bit mode.
358 */
359
360 if (vm_map_is_64bit(map))
361 current_abi = VM_ABI_64;
362 else
363 current_abi = VM_ABI_32;
364
365 /*
366 * Determine if we should allow the execution based on whether it's a
367 * stack or data area and the current architecture.
368 */
369
370 if (user_tag == VM_MEMORY_STACK)
371 return allow_stack_exec & current_abi;
372
373 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
374 }
375
376
377 /*
378 * Virtual memory maps provide for the mapping, protection,
379 * and sharing of virtual memory objects. In addition,
380 * this module provides for an efficient virtual copy of
381 * memory from one map to another.
382 *
383 * Synchronization is required prior to most operations.
384 *
385 * Maps consist of an ordered doubly-linked list of simple
386 * entries; a single hint is used to speed up lookups.
387 *
388 * Sharing maps have been deleted from this version of Mach.
389 * All shared objects are now mapped directly into the respective
390 * maps. This requires a change in the copy on write strategy;
391 * the asymmetric (delayed) strategy is used for shared temporary
392 * objects instead of the symmetric (shadow) strategy. All maps
393 * are now "top level" maps (either task map, kernel map or submap
394 * of the kernel map).
395 *
396 * Since portions of maps are specified by start/end addreses,
397 * which may not align with existing map entries, all
398 * routines merely "clip" entries to these start/end values.
399 * [That is, an entry is split into two, bordering at a
400 * start or end value.] Note that these clippings may not
401 * always be necessary (as the two resulting entries are then
402 * not changed); however, the clipping is done for convenience.
403 * No attempt is currently made to "glue back together" two
404 * abutting entries.
405 *
406 * The symmetric (shadow) copy strategy implements virtual copy
407 * by copying VM object references from one map to
408 * another, and then marking both regions as copy-on-write.
409 * It is important to note that only one writeable reference
410 * to a VM object region exists in any map when this strategy
411 * is used -- this means that shadow object creation can be
412 * delayed until a write operation occurs. The symmetric (delayed)
413 * strategy allows multiple maps to have writeable references to
414 * the same region of a vm object, and hence cannot delay creating
415 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
416 * Copying of permanent objects is completely different; see
417 * vm_object_copy_strategically() in vm_object.c.
418 */
419
420 static zone_t vm_map_zone; /* zone for vm_map structures */
421 static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
422 static zone_t vm_map_kentry_zone; /* zone for kernel entry structures */
423 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
424
425
426 /*
427 * Placeholder object for submap operations. This object is dropped
428 * into the range by a call to vm_map_find, and removed when
429 * vm_map_submap creates the submap.
430 */
431
432 vm_object_t vm_submap_object;
433
434 static void *map_data;
435 static vm_size_t map_data_size;
436 static void *kentry_data;
437 static vm_size_t kentry_data_size;
438 static int kentry_count = 2048; /* to init kentry_data_size */
439
440 #if CONFIG_EMBEDDED
441 #define NO_COALESCE_LIMIT 0
442 #else
443 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
444 #endif
445
446 /* Skip acquiring locks if we're in the midst of a kernel core dump */
447 unsigned int not_in_kdp = 1;
448
449 unsigned int vm_map_set_cache_attr_count = 0;
450
451 kern_return_t
452 vm_map_set_cache_attr(
453 vm_map_t map,
454 vm_map_offset_t va)
455 {
456 vm_map_entry_t map_entry;
457 vm_object_t object;
458 kern_return_t kr = KERN_SUCCESS;
459
460 vm_map_lock_read(map);
461
462 if (!vm_map_lookup_entry(map, va, &map_entry) ||
463 map_entry->is_sub_map) {
464 /*
465 * that memory is not properly mapped
466 */
467 kr = KERN_INVALID_ARGUMENT;
468 goto done;
469 }
470 object = map_entry->object.vm_object;
471
472 if (object == VM_OBJECT_NULL) {
473 /*
474 * there should be a VM object here at this point
475 */
476 kr = KERN_INVALID_ARGUMENT;
477 goto done;
478 }
479 vm_object_lock(object);
480 object->set_cache_attr = TRUE;
481 vm_object_unlock(object);
482
483 vm_map_set_cache_attr_count++;
484 done:
485 vm_map_unlock_read(map);
486
487 return kr;
488 }
489
490
491 #if CONFIG_CODE_DECRYPTION
492 /*
493 * vm_map_apple_protected:
494 * This remaps the requested part of the object with an object backed by
495 * the decrypting pager.
496 * crypt_info contains entry points and session data for the crypt module.
497 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
498 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
499 */
500 kern_return_t
501 vm_map_apple_protected(
502 vm_map_t map,
503 vm_map_offset_t start,
504 vm_map_offset_t end,
505 struct pager_crypt_info *crypt_info)
506 {
507 boolean_t map_locked;
508 kern_return_t kr;
509 vm_map_entry_t map_entry;
510 memory_object_t protected_mem_obj;
511 vm_object_t protected_object;
512 vm_map_offset_t map_addr;
513
514 vm_map_lock_read(map);
515 map_locked = TRUE;
516
517 /* lookup the protected VM object */
518 if (!vm_map_lookup_entry(map,
519 start,
520 &map_entry) ||
521 map_entry->vme_end < end ||
522 map_entry->is_sub_map) {
523 /* that memory is not properly mapped */
524 kr = KERN_INVALID_ARGUMENT;
525 goto done;
526 }
527 protected_object = map_entry->object.vm_object;
528 if (protected_object == VM_OBJECT_NULL) {
529 /* there should be a VM object here at this point */
530 kr = KERN_INVALID_ARGUMENT;
531 goto done;
532 }
533
534 /* make sure protected object stays alive while map is unlocked */
535 vm_object_reference(protected_object);
536
537 vm_map_unlock_read(map);
538 map_locked = FALSE;
539
540 /*
541 * Lookup (and create if necessary) the protected memory object
542 * matching that VM object.
543 * If successful, this also grabs a reference on the memory object,
544 * to guarantee that it doesn't go away before we get a chance to map
545 * it.
546 */
547 protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
548
549 /* release extra ref on protected object */
550 vm_object_deallocate(protected_object);
551
552 if (protected_mem_obj == NULL) {
553 kr = KERN_FAILURE;
554 goto done;
555 }
556
557 /* map this memory object in place of the current one */
558 map_addr = start;
559 kr = vm_map_enter_mem_object(map,
560 &map_addr,
561 end - start,
562 (mach_vm_offset_t) 0,
563 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
564 (ipc_port_t) protected_mem_obj,
565 (map_entry->offset +
566 (start - map_entry->vme_start)),
567 TRUE,
568 map_entry->protection,
569 map_entry->max_protection,
570 map_entry->inheritance);
571 assert(map_addr == start);
572 /*
573 * Release the reference obtained by apple_protect_pager_setup().
574 * The mapping (if it succeeded) is now holding a reference on the
575 * memory object.
576 */
577 memory_object_deallocate(protected_mem_obj);
578
579 done:
580 if (map_locked) {
581 vm_map_unlock_read(map);
582 }
583 return kr;
584 }
585 #endif /* CONFIG_CODE_DECRYPTION */
586
587
588 lck_grp_t vm_map_lck_grp;
589 lck_grp_attr_t vm_map_lck_grp_attr;
590 lck_attr_t vm_map_lck_attr;
591
592
593 /*
594 * vm_map_init:
595 *
596 * Initialize the vm_map module. Must be called before
597 * any other vm_map routines.
598 *
599 * Map and entry structures are allocated from zones -- we must
600 * initialize those zones.
601 *
602 * There are three zones of interest:
603 *
604 * vm_map_zone: used to allocate maps.
605 * vm_map_entry_zone: used to allocate map entries.
606 * vm_map_kentry_zone: used to allocate map entries for the kernel.
607 *
608 * The kernel allocates map entries from a special zone that is initially
609 * "crammed" with memory. It would be difficult (perhaps impossible) for
610 * the kernel to allocate more memory to a entry zone when it became
611 * empty since the very act of allocating memory implies the creation
612 * of a new entry.
613 */
614 void
615 vm_map_init(
616 void)
617 {
618 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
619 PAGE_SIZE, "maps");
620 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
621
622 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
623 1024*1024, PAGE_SIZE*5,
624 "non-kernel map entries");
625 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
626
627 vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
628 kentry_data_size, kentry_data_size,
629 "kernel map entries");
630 zone_change(vm_map_kentry_zone, Z_NOENCRYPT, TRUE);
631
632 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
633 16*1024, PAGE_SIZE, "map copies");
634 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
635
636 /*
637 * Cram the map and kentry zones with initial data.
638 * Set kentry_zone non-collectible to aid zone_gc().
639 */
640 zone_change(vm_map_zone, Z_COLLECT, FALSE);
641 zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
642 zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
643 zone_change(vm_map_kentry_zone, Z_FOREIGN, TRUE);
644 zone_change(vm_map_kentry_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
645 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
646
647 zcram(vm_map_zone, map_data, map_data_size);
648 zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
649
650 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
651 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
652 lck_attr_setdefault(&vm_map_lck_attr);
653 }
654
655 void
656 vm_map_steal_memory(
657 void)
658 {
659 map_data_size = round_page(10 * sizeof(struct _vm_map));
660 map_data = pmap_steal_memory(map_data_size);
661
662 #if 0
663 /*
664 * Limiting worst case: vm_map_kentry_zone needs to map each "available"
665 * physical page (i.e. that beyond the kernel image and page tables)
666 * individually; we guess at most one entry per eight pages in the
667 * real world. This works out to roughly .1 of 1% of physical memory,
668 * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
669 */
670 #endif
671 kentry_count = pmap_free_pages() / 8;
672
673
674 kentry_data_size =
675 round_page(kentry_count * sizeof(struct vm_map_entry));
676 kentry_data = pmap_steal_memory(kentry_data_size);
677 }
678
679 /*
680 * vm_map_create:
681 *
682 * Creates and returns a new empty VM map with
683 * the given physical map structure, and having
684 * the given lower and upper address bounds.
685 */
686 vm_map_t
687 vm_map_create(
688 pmap_t pmap,
689 vm_map_offset_t min,
690 vm_map_offset_t max,
691 boolean_t pageable)
692 {
693 static int color_seed = 0;
694 register vm_map_t result;
695
696 result = (vm_map_t) zalloc(vm_map_zone);
697 if (result == VM_MAP_NULL)
698 panic("vm_map_create");
699
700 vm_map_first_entry(result) = vm_map_to_entry(result);
701 vm_map_last_entry(result) = vm_map_to_entry(result);
702 result->hdr.nentries = 0;
703 result->hdr.entries_pageable = pageable;
704
705 vm_map_store_init( &(result->hdr) );
706
707 result->size = 0;
708 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
709 result->user_wire_size = 0;
710 result->ref_count = 1;
711 #if TASK_SWAPPER
712 result->res_count = 1;
713 result->sw_state = MAP_SW_IN;
714 #endif /* TASK_SWAPPER */
715 result->pmap = pmap;
716 result->min_offset = min;
717 result->max_offset = max;
718 result->wiring_required = FALSE;
719 result->no_zero_fill = FALSE;
720 result->mapped = FALSE;
721 result->wait_for_space = FALSE;
722 result->switch_protect = FALSE;
723 result->disable_vmentry_reuse = FALSE;
724 result->map_disallow_data_exec = FALSE;
725 result->highest_entry_end = 0;
726 result->first_free = vm_map_to_entry(result);
727 result->hint = vm_map_to_entry(result);
728 result->color_rr = (color_seed++) & vm_color_mask;
729 result->jit_entry_exists = FALSE;
730 #if CONFIG_FREEZE
731 result->default_freezer_toc = NULL;
732 #endif
733 vm_map_lock_init(result);
734 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
735
736 return(result);
737 }
738
739 /*
740 * vm_map_entry_create: [ internal use only ]
741 *
742 * Allocates a VM map entry for insertion in the
743 * given map (or map copy). No fields are filled.
744 */
745 #define vm_map_entry_create(map) \
746 _vm_map_entry_create(&(map)->hdr)
747
748 #define vm_map_copy_entry_create(copy) \
749 _vm_map_entry_create(&(copy)->cpy_hdr)
750
751 static vm_map_entry_t
752 _vm_map_entry_create(
753 register struct vm_map_header *map_header)
754 {
755 register zone_t zone;
756 register vm_map_entry_t entry;
757
758 if (map_header->entries_pageable)
759 zone = vm_map_entry_zone;
760 else
761 zone = vm_map_kentry_zone;
762
763 entry = (vm_map_entry_t) zalloc(zone);
764 if (entry == VM_MAP_ENTRY_NULL)
765 panic("vm_map_entry_create");
766 vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
767
768 return(entry);
769 }
770
771 /*
772 * vm_map_entry_dispose: [ internal use only ]
773 *
774 * Inverse of vm_map_entry_create.
775 *
776 * write map lock held so no need to
777 * do anything special to insure correctness
778 * of the stores
779 */
780 #define vm_map_entry_dispose(map, entry) \
781 vm_map_store_update( map, entry, VM_MAP_ENTRY_DELETE); \
782 _vm_map_entry_dispose(&(map)->hdr, (entry))
783
784 #define vm_map_copy_entry_dispose(map, entry) \
785 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
786
787 static void
788 _vm_map_entry_dispose(
789 register struct vm_map_header *map_header,
790 register vm_map_entry_t entry)
791 {
792 register zone_t zone;
793
794 if (map_header->entries_pageable)
795 zone = vm_map_entry_zone;
796 else
797 zone = vm_map_kentry_zone;
798
799 zfree(zone, entry);
800 }
801
802 #if MACH_ASSERT
803 static boolean_t first_free_check = FALSE;
804 boolean_t
805 first_free_is_valid(
806 vm_map_t map)
807 {
808 if (!first_free_check)
809 return TRUE;
810
811 return( first_free_is_valid_store( map ));
812 }
813 #endif /* MACH_ASSERT */
814
815
816 #define vm_map_copy_entry_link(copy, after_where, entry) \
817 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
818
819 #define vm_map_copy_entry_unlink(copy, entry) \
820 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
821
822 #if MACH_ASSERT && TASK_SWAPPER
823 /*
824 * vm_map_res_reference:
825 *
826 * Adds another valid residence count to the given map.
827 *
828 * Map is locked so this function can be called from
829 * vm_map_swapin.
830 *
831 */
832 void vm_map_res_reference(register vm_map_t map)
833 {
834 /* assert map is locked */
835 assert(map->res_count >= 0);
836 assert(map->ref_count >= map->res_count);
837 if (map->res_count == 0) {
838 lck_mtx_unlock(&map->s_lock);
839 vm_map_lock(map);
840 vm_map_swapin(map);
841 lck_mtx_lock(&map->s_lock);
842 ++map->res_count;
843 vm_map_unlock(map);
844 } else
845 ++map->res_count;
846 }
847
848 /*
849 * vm_map_reference_swap:
850 *
851 * Adds valid reference and residence counts to the given map.
852 *
853 * The map may not be in memory (i.e. zero residence count).
854 *
855 */
856 void vm_map_reference_swap(register vm_map_t map)
857 {
858 assert(map != VM_MAP_NULL);
859 lck_mtx_lock(&map->s_lock);
860 assert(map->res_count >= 0);
861 assert(map->ref_count >= map->res_count);
862 map->ref_count++;
863 vm_map_res_reference(map);
864 lck_mtx_unlock(&map->s_lock);
865 }
866
867 /*
868 * vm_map_res_deallocate:
869 *
870 * Decrement residence count on a map; possibly causing swapout.
871 *
872 * The map must be in memory (i.e. non-zero residence count).
873 *
874 * The map is locked, so this function is callable from vm_map_deallocate.
875 *
876 */
877 void vm_map_res_deallocate(register vm_map_t map)
878 {
879 assert(map->res_count > 0);
880 if (--map->res_count == 0) {
881 lck_mtx_unlock(&map->s_lock);
882 vm_map_lock(map);
883 vm_map_swapout(map);
884 vm_map_unlock(map);
885 lck_mtx_lock(&map->s_lock);
886 }
887 assert(map->ref_count >= map->res_count);
888 }
889 #endif /* MACH_ASSERT && TASK_SWAPPER */
890
891 /*
892 * vm_map_destroy:
893 *
894 * Actually destroy a map.
895 */
896 void
897 vm_map_destroy(
898 vm_map_t map,
899 int flags)
900 {
901 vm_map_lock(map);
902
903 /* clean up regular map entries */
904 (void) vm_map_delete(map, map->min_offset, map->max_offset,
905 flags, VM_MAP_NULL);
906 /* clean up leftover special mappings (commpage, etc...) */
907 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
908 flags, VM_MAP_NULL);
909
910 #if CONFIG_FREEZE
911 if (map->default_freezer_toc){
912 default_freezer_mapping_free( &(map->default_freezer_toc), TRUE);
913 }
914 #endif
915 vm_map_unlock(map);
916
917 assert(map->hdr.nentries == 0);
918
919 if(map->pmap)
920 pmap_destroy(map->pmap);
921
922 zfree(vm_map_zone, map);
923 }
924
925 #if TASK_SWAPPER
926 /*
927 * vm_map_swapin/vm_map_swapout
928 *
929 * Swap a map in and out, either referencing or releasing its resources.
930 * These functions are internal use only; however, they must be exported
931 * because they may be called from macros, which are exported.
932 *
933 * In the case of swapout, there could be races on the residence count,
934 * so if the residence count is up, we return, assuming that a
935 * vm_map_deallocate() call in the near future will bring us back.
936 *
937 * Locking:
938 * -- We use the map write lock for synchronization among races.
939 * -- The map write lock, and not the simple s_lock, protects the
940 * swap state of the map.
941 * -- If a map entry is a share map, then we hold both locks, in
942 * hierarchical order.
943 *
944 * Synchronization Notes:
945 * 1) If a vm_map_swapin() call happens while swapout in progress, it
946 * will block on the map lock and proceed when swapout is through.
947 * 2) A vm_map_reference() call at this time is illegal, and will
948 * cause a panic. vm_map_reference() is only allowed on resident
949 * maps, since it refuses to block.
950 * 3) A vm_map_swapin() call during a swapin will block, and
951 * proceeed when the first swapin is done, turning into a nop.
952 * This is the reason the res_count is not incremented until
953 * after the swapin is complete.
954 * 4) There is a timing hole after the checks of the res_count, before
955 * the map lock is taken, during which a swapin may get the lock
956 * before a swapout about to happen. If this happens, the swapin
957 * will detect the state and increment the reference count, causing
958 * the swapout to be a nop, thereby delaying it until a later
959 * vm_map_deallocate. If the swapout gets the lock first, then
960 * the swapin will simply block until the swapout is done, and
961 * then proceed.
962 *
963 * Because vm_map_swapin() is potentially an expensive operation, it
964 * should be used with caution.
965 *
966 * Invariants:
967 * 1) A map with a residence count of zero is either swapped, or
968 * being swapped.
969 * 2) A map with a non-zero residence count is either resident,
970 * or being swapped in.
971 */
972
973 int vm_map_swap_enable = 1;
974
975 void vm_map_swapin (vm_map_t map)
976 {
977 register vm_map_entry_t entry;
978
979 if (!vm_map_swap_enable) /* debug */
980 return;
981
982 /*
983 * Map is locked
984 * First deal with various races.
985 */
986 if (map->sw_state == MAP_SW_IN)
987 /*
988 * we raced with swapout and won. Returning will incr.
989 * the res_count, turning the swapout into a nop.
990 */
991 return;
992
993 /*
994 * The residence count must be zero. If we raced with another
995 * swapin, the state would have been IN; if we raced with a
996 * swapout (after another competing swapin), we must have lost
997 * the race to get here (see above comment), in which case
998 * res_count is still 0.
999 */
1000 assert(map->res_count == 0);
1001
1002 /*
1003 * There are no intermediate states of a map going out or
1004 * coming in, since the map is locked during the transition.
1005 */
1006 assert(map->sw_state == MAP_SW_OUT);
1007
1008 /*
1009 * We now operate upon each map entry. If the entry is a sub-
1010 * or share-map, we call vm_map_res_reference upon it.
1011 * If the entry is an object, we call vm_object_res_reference
1012 * (this may iterate through the shadow chain).
1013 * Note that we hold the map locked the entire time,
1014 * even if we get back here via a recursive call in
1015 * vm_map_res_reference.
1016 */
1017 entry = vm_map_first_entry(map);
1018
1019 while (entry != vm_map_to_entry(map)) {
1020 if (entry->object.vm_object != VM_OBJECT_NULL) {
1021 if (entry->is_sub_map) {
1022 vm_map_t lmap = entry->object.sub_map;
1023 lck_mtx_lock(&lmap->s_lock);
1024 vm_map_res_reference(lmap);
1025 lck_mtx_unlock(&lmap->s_lock);
1026 } else {
1027 vm_object_t object = entry->object.vm_object;
1028 vm_object_lock(object);
1029 /*
1030 * This call may iterate through the
1031 * shadow chain.
1032 */
1033 vm_object_res_reference(object);
1034 vm_object_unlock(object);
1035 }
1036 }
1037 entry = entry->vme_next;
1038 }
1039 assert(map->sw_state == MAP_SW_OUT);
1040 map->sw_state = MAP_SW_IN;
1041 }
1042
1043 void vm_map_swapout(vm_map_t map)
1044 {
1045 register vm_map_entry_t entry;
1046
1047 /*
1048 * Map is locked
1049 * First deal with various races.
1050 * If we raced with a swapin and lost, the residence count
1051 * will have been incremented to 1, and we simply return.
1052 */
1053 lck_mtx_lock(&map->s_lock);
1054 if (map->res_count != 0) {
1055 lck_mtx_unlock(&map->s_lock);
1056 return;
1057 }
1058 lck_mtx_unlock(&map->s_lock);
1059
1060 /*
1061 * There are no intermediate states of a map going out or
1062 * coming in, since the map is locked during the transition.
1063 */
1064 assert(map->sw_state == MAP_SW_IN);
1065
1066 if (!vm_map_swap_enable)
1067 return;
1068
1069 /*
1070 * We now operate upon each map entry. If the entry is a sub-
1071 * or share-map, we call vm_map_res_deallocate upon it.
1072 * If the entry is an object, we call vm_object_res_deallocate
1073 * (this may iterate through the shadow chain).
1074 * Note that we hold the map locked the entire time,
1075 * even if we get back here via a recursive call in
1076 * vm_map_res_deallocate.
1077 */
1078 entry = vm_map_first_entry(map);
1079
1080 while (entry != vm_map_to_entry(map)) {
1081 if (entry->object.vm_object != VM_OBJECT_NULL) {
1082 if (entry->is_sub_map) {
1083 vm_map_t lmap = entry->object.sub_map;
1084 lck_mtx_lock(&lmap->s_lock);
1085 vm_map_res_deallocate(lmap);
1086 lck_mtx_unlock(&lmap->s_lock);
1087 } else {
1088 vm_object_t object = entry->object.vm_object;
1089 vm_object_lock(object);
1090 /*
1091 * This call may take a long time,
1092 * since it could actively push
1093 * out pages (if we implement it
1094 * that way).
1095 */
1096 vm_object_res_deallocate(object);
1097 vm_object_unlock(object);
1098 }
1099 }
1100 entry = entry->vme_next;
1101 }
1102 assert(map->sw_state == MAP_SW_IN);
1103 map->sw_state = MAP_SW_OUT;
1104 }
1105
1106 #endif /* TASK_SWAPPER */
1107
1108 /*
1109 * vm_map_lookup_entry: [ internal use only ]
1110 *
1111 * Calls into the vm map store layer to find the map
1112 * entry containing (or immediately preceding) the
1113 * specified address in the given map; the entry is returned
1114 * in the "entry" parameter. The boolean
1115 * result indicates whether the address is
1116 * actually contained in the map.
1117 */
1118 boolean_t
1119 vm_map_lookup_entry(
1120 register vm_map_t map,
1121 register vm_map_offset_t address,
1122 vm_map_entry_t *entry) /* OUT */
1123 {
1124 return ( vm_map_store_lookup_entry( map, address, entry ));
1125 }
1126
1127 /*
1128 * Routine: vm_map_find_space
1129 * Purpose:
1130 * Allocate a range in the specified virtual address map,
1131 * returning the entry allocated for that range.
1132 * Used by kmem_alloc, etc.
1133 *
1134 * The map must be NOT be locked. It will be returned locked
1135 * on KERN_SUCCESS, unlocked on failure.
1136 *
1137 * If an entry is allocated, the object/offset fields
1138 * are initialized to zero.
1139 */
1140 kern_return_t
1141 vm_map_find_space(
1142 register vm_map_t map,
1143 vm_map_offset_t *address, /* OUT */
1144 vm_map_size_t size,
1145 vm_map_offset_t mask,
1146 int flags,
1147 vm_map_entry_t *o_entry) /* OUT */
1148 {
1149 register vm_map_entry_t entry, new_entry;
1150 register vm_map_offset_t start;
1151 register vm_map_offset_t end;
1152
1153 if (size == 0) {
1154 *address = 0;
1155 return KERN_INVALID_ARGUMENT;
1156 }
1157
1158 if (flags & VM_FLAGS_GUARD_AFTER) {
1159 /* account for the back guard page in the size */
1160 size += PAGE_SIZE_64;
1161 }
1162
1163 new_entry = vm_map_entry_create(map);
1164
1165 /*
1166 * Look for the first possible address; if there's already
1167 * something at this address, we have to start after it.
1168 */
1169
1170 vm_map_lock(map);
1171
1172 if( map->disable_vmentry_reuse == TRUE) {
1173 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1174 } else {
1175 assert(first_free_is_valid(map));
1176 if ((entry = map->first_free) == vm_map_to_entry(map))
1177 start = map->min_offset;
1178 else
1179 start = entry->vme_end;
1180 }
1181
1182 /*
1183 * In any case, the "entry" always precedes
1184 * the proposed new region throughout the loop:
1185 */
1186
1187 while (TRUE) {
1188 register vm_map_entry_t next;
1189
1190 /*
1191 * Find the end of the proposed new region.
1192 * Be sure we didn't go beyond the end, or
1193 * wrap around the address.
1194 */
1195
1196 if (flags & VM_FLAGS_GUARD_BEFORE) {
1197 /* reserve space for the front guard page */
1198 start += PAGE_SIZE_64;
1199 }
1200 end = ((start + mask) & ~mask);
1201
1202 if (end < start) {
1203 vm_map_entry_dispose(map, new_entry);
1204 vm_map_unlock(map);
1205 return(KERN_NO_SPACE);
1206 }
1207 start = end;
1208 end += size;
1209
1210 if ((end > map->max_offset) || (end < start)) {
1211 vm_map_entry_dispose(map, new_entry);
1212 vm_map_unlock(map);
1213 return(KERN_NO_SPACE);
1214 }
1215
1216 /*
1217 * If there are no more entries, we must win.
1218 */
1219
1220 next = entry->vme_next;
1221 if (next == vm_map_to_entry(map))
1222 break;
1223
1224 /*
1225 * If there is another entry, it must be
1226 * after the end of the potential new region.
1227 */
1228
1229 if (next->vme_start >= end)
1230 break;
1231
1232 /*
1233 * Didn't fit -- move to the next entry.
1234 */
1235
1236 entry = next;
1237 start = entry->vme_end;
1238 }
1239
1240 /*
1241 * At this point,
1242 * "start" and "end" should define the endpoints of the
1243 * available new range, and
1244 * "entry" should refer to the region before the new
1245 * range, and
1246 *
1247 * the map should be locked.
1248 */
1249
1250 if (flags & VM_FLAGS_GUARD_BEFORE) {
1251 /* go back for the front guard page */
1252 start -= PAGE_SIZE_64;
1253 }
1254 *address = start;
1255
1256 assert(start < end);
1257 new_entry->vme_start = start;
1258 new_entry->vme_end = end;
1259 assert(page_aligned(new_entry->vme_start));
1260 assert(page_aligned(new_entry->vme_end));
1261
1262 new_entry->is_shared = FALSE;
1263 new_entry->is_sub_map = FALSE;
1264 new_entry->use_pmap = FALSE;
1265 new_entry->object.vm_object = VM_OBJECT_NULL;
1266 new_entry->offset = (vm_object_offset_t) 0;
1267
1268 new_entry->needs_copy = FALSE;
1269
1270 new_entry->inheritance = VM_INHERIT_DEFAULT;
1271 new_entry->protection = VM_PROT_DEFAULT;
1272 new_entry->max_protection = VM_PROT_ALL;
1273 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1274 new_entry->wired_count = 0;
1275 new_entry->user_wired_count = 0;
1276
1277 new_entry->in_transition = FALSE;
1278 new_entry->needs_wakeup = FALSE;
1279 new_entry->no_cache = FALSE;
1280 new_entry->permanent = FALSE;
1281 new_entry->superpage_size = 0;
1282
1283 new_entry->alias = 0;
1284 new_entry->zero_wired_pages = FALSE;
1285
1286 VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1287
1288 /*
1289 * Insert the new entry into the list
1290 */
1291
1292 vm_map_store_entry_link(map, entry, new_entry);
1293
1294 map->size += size;
1295
1296 /*
1297 * Update the lookup hint
1298 */
1299 SAVE_HINT_MAP_WRITE(map, new_entry);
1300
1301 *o_entry = new_entry;
1302 return(KERN_SUCCESS);
1303 }
1304
1305 int vm_map_pmap_enter_print = FALSE;
1306 int vm_map_pmap_enter_enable = FALSE;
1307
1308 /*
1309 * Routine: vm_map_pmap_enter [internal only]
1310 *
1311 * Description:
1312 * Force pages from the specified object to be entered into
1313 * the pmap at the specified address if they are present.
1314 * As soon as a page not found in the object the scan ends.
1315 *
1316 * Returns:
1317 * Nothing.
1318 *
1319 * In/out conditions:
1320 * The source map should not be locked on entry.
1321 */
1322 static void
1323 vm_map_pmap_enter(
1324 vm_map_t map,
1325 register vm_map_offset_t addr,
1326 register vm_map_offset_t end_addr,
1327 register vm_object_t object,
1328 vm_object_offset_t offset,
1329 vm_prot_t protection)
1330 {
1331 int type_of_fault;
1332 kern_return_t kr;
1333
1334 if(map->pmap == 0)
1335 return;
1336
1337 while (addr < end_addr) {
1338 register vm_page_t m;
1339
1340 vm_object_lock(object);
1341
1342 m = vm_page_lookup(object, offset);
1343 /*
1344 * ENCRYPTED SWAP:
1345 * The user should never see encrypted data, so do not
1346 * enter an encrypted page in the page table.
1347 */
1348 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1349 m->fictitious ||
1350 (m->unusual && ( m->error || m->restart || m->absent))) {
1351 vm_object_unlock(object);
1352 return;
1353 }
1354
1355 if (vm_map_pmap_enter_print) {
1356 printf("vm_map_pmap_enter:");
1357 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1358 map, (unsigned long long)addr, object, (unsigned long long)offset);
1359 }
1360 type_of_fault = DBG_CACHE_HIT_FAULT;
1361 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1362 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1363 &type_of_fault);
1364
1365 vm_object_unlock(object);
1366
1367 offset += PAGE_SIZE_64;
1368 addr += PAGE_SIZE;
1369 }
1370 }
1371
1372 boolean_t vm_map_pmap_is_empty(
1373 vm_map_t map,
1374 vm_map_offset_t start,
1375 vm_map_offset_t end);
1376 boolean_t vm_map_pmap_is_empty(
1377 vm_map_t map,
1378 vm_map_offset_t start,
1379 vm_map_offset_t end)
1380 {
1381 #ifdef MACHINE_PMAP_IS_EMPTY
1382 return pmap_is_empty(map->pmap, start, end);
1383 #else /* MACHINE_PMAP_IS_EMPTY */
1384 vm_map_offset_t offset;
1385 ppnum_t phys_page;
1386
1387 if (map->pmap == NULL) {
1388 return TRUE;
1389 }
1390
1391 for (offset = start;
1392 offset < end;
1393 offset += PAGE_SIZE) {
1394 phys_page = pmap_find_phys(map->pmap, offset);
1395 if (phys_page) {
1396 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1397 "page %d at 0x%llx\n",
1398 map, (long long)start, (long long)end,
1399 phys_page, (long long)offset);
1400 return FALSE;
1401 }
1402 }
1403 return TRUE;
1404 #endif /* MACHINE_PMAP_IS_EMPTY */
1405 }
1406
1407 /*
1408 * Routine: vm_map_enter
1409 *
1410 * Description:
1411 * Allocate a range in the specified virtual address map.
1412 * The resulting range will refer to memory defined by
1413 * the given memory object and offset into that object.
1414 *
1415 * Arguments are as defined in the vm_map call.
1416 */
1417 int _map_enter_debug = 0;
1418 static unsigned int vm_map_enter_restore_successes = 0;
1419 static unsigned int vm_map_enter_restore_failures = 0;
1420 kern_return_t
1421 vm_map_enter(
1422 vm_map_t map,
1423 vm_map_offset_t *address, /* IN/OUT */
1424 vm_map_size_t size,
1425 vm_map_offset_t mask,
1426 int flags,
1427 vm_object_t object,
1428 vm_object_offset_t offset,
1429 boolean_t needs_copy,
1430 vm_prot_t cur_protection,
1431 vm_prot_t max_protection,
1432 vm_inherit_t inheritance)
1433 {
1434 vm_map_entry_t entry, new_entry;
1435 vm_map_offset_t start, tmp_start, tmp_offset;
1436 vm_map_offset_t end, tmp_end;
1437 vm_map_offset_t tmp2_start, tmp2_end;
1438 vm_map_offset_t step;
1439 kern_return_t result = KERN_SUCCESS;
1440 vm_map_t zap_old_map = VM_MAP_NULL;
1441 vm_map_t zap_new_map = VM_MAP_NULL;
1442 boolean_t map_locked = FALSE;
1443 boolean_t pmap_empty = TRUE;
1444 boolean_t new_mapping_established = FALSE;
1445 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1446 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1447 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1448 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1449 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1450 boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1451 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1452 char alias;
1453 vm_map_offset_t effective_min_offset, effective_max_offset;
1454 kern_return_t kr;
1455
1456 if (superpage_size) {
1457 switch (superpage_size) {
1458 /*
1459 * Note that the current implementation only supports
1460 * a single size for superpages, SUPERPAGE_SIZE, per
1461 * architecture. As soon as more sizes are supposed
1462 * to be supported, SUPERPAGE_SIZE has to be replaced
1463 * with a lookup of the size depending on superpage_size.
1464 */
1465 #ifdef __x86_64__
1466 case SUPERPAGE_SIZE_ANY:
1467 /* handle it like 2 MB and round up to page size */
1468 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1469 case SUPERPAGE_SIZE_2MB:
1470 break;
1471 #endif
1472 default:
1473 return KERN_INVALID_ARGUMENT;
1474 }
1475 mask = SUPERPAGE_SIZE-1;
1476 if (size & (SUPERPAGE_SIZE-1))
1477 return KERN_INVALID_ARGUMENT;
1478 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
1479 }
1480
1481
1482 #if CONFIG_EMBEDDED
1483 if (cur_protection & VM_PROT_WRITE){
1484 if ((cur_protection & VM_PROT_EXECUTE) && !(flags & VM_FLAGS_MAP_JIT)){
1485 printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
1486 cur_protection &= ~VM_PROT_EXECUTE;
1487 }
1488 }
1489 #endif /* CONFIG_EMBEDDED */
1490
1491 if (is_submap) {
1492 if (purgable) {
1493 /* submaps can not be purgeable */
1494 return KERN_INVALID_ARGUMENT;
1495 }
1496 if (object == VM_OBJECT_NULL) {
1497 /* submaps can not be created lazily */
1498 return KERN_INVALID_ARGUMENT;
1499 }
1500 }
1501 if (flags & VM_FLAGS_ALREADY) {
1502 /*
1503 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1504 * is already present. For it to be meaningul, the requested
1505 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1506 * we shouldn't try and remove what was mapped there first
1507 * (!VM_FLAGS_OVERWRITE).
1508 */
1509 if ((flags & VM_FLAGS_ANYWHERE) ||
1510 (flags & VM_FLAGS_OVERWRITE)) {
1511 return KERN_INVALID_ARGUMENT;
1512 }
1513 }
1514
1515 effective_min_offset = map->min_offset;
1516
1517 if (flags & VM_FLAGS_BEYOND_MAX) {
1518 /*
1519 * Allow an insertion beyond the map's max offset.
1520 */
1521 if (vm_map_is_64bit(map))
1522 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1523 else
1524 effective_max_offset = 0x00000000FFFFF000ULL;
1525 } else {
1526 effective_max_offset = map->max_offset;
1527 }
1528
1529 if (size == 0 ||
1530 (offset & PAGE_MASK_64) != 0) {
1531 *address = 0;
1532 return KERN_INVALID_ARGUMENT;
1533 }
1534
1535 VM_GET_FLAGS_ALIAS(flags, alias);
1536
1537 #define RETURN(value) { result = value; goto BailOut; }
1538
1539 assert(page_aligned(*address));
1540 assert(page_aligned(size));
1541
1542 /*
1543 * Only zero-fill objects are allowed to be purgable.
1544 * LP64todo - limit purgable objects to 32-bits for now
1545 */
1546 if (purgable &&
1547 (offset != 0 ||
1548 (object != VM_OBJECT_NULL &&
1549 (object->vo_size != size ||
1550 object->purgable == VM_PURGABLE_DENY))
1551 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1552 return KERN_INVALID_ARGUMENT;
1553
1554 if (!anywhere && overwrite) {
1555 /*
1556 * Create a temporary VM map to hold the old mappings in the
1557 * affected area while we create the new one.
1558 * This avoids releasing the VM map lock in
1559 * vm_map_entry_delete() and allows atomicity
1560 * when we want to replace some mappings with a new one.
1561 * It also allows us to restore the old VM mappings if the
1562 * new mapping fails.
1563 */
1564 zap_old_map = vm_map_create(PMAP_NULL,
1565 *address,
1566 *address + size,
1567 map->hdr.entries_pageable);
1568 }
1569
1570 StartAgain: ;
1571
1572 start = *address;
1573
1574 if (anywhere) {
1575 vm_map_lock(map);
1576 map_locked = TRUE;
1577
1578 if ((flags & VM_FLAGS_MAP_JIT) && (map->jit_entry_exists)){
1579 result = KERN_INVALID_ARGUMENT;
1580 goto BailOut;
1581 }
1582
1583 /*
1584 * Calculate the first possible address.
1585 */
1586
1587 if (start < effective_min_offset)
1588 start = effective_min_offset;
1589 if (start > effective_max_offset)
1590 RETURN(KERN_NO_SPACE);
1591
1592 /*
1593 * Look for the first possible address;
1594 * if there's already something at this
1595 * address, we have to start after it.
1596 */
1597
1598 if( map->disable_vmentry_reuse == TRUE) {
1599 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1600 } else {
1601 assert(first_free_is_valid(map));
1602
1603 entry = map->first_free;
1604
1605 if (entry == vm_map_to_entry(map)) {
1606 entry = NULL;
1607 } else {
1608 if (entry->vme_next == vm_map_to_entry(map)){
1609 /*
1610 * Hole at the end of the map.
1611 */
1612 entry = NULL;
1613 } else {
1614 if (start < (entry->vme_next)->vme_start ) {
1615 start = entry->vme_end;
1616 } else {
1617 /*
1618 * Need to do a lookup.
1619 */
1620 entry = NULL;
1621 }
1622 }
1623 }
1624
1625 if (entry == NULL) {
1626 vm_map_entry_t tmp_entry;
1627 if (vm_map_lookup_entry(map, start, &tmp_entry))
1628 start = tmp_entry->vme_end;
1629 entry = tmp_entry;
1630 }
1631 }
1632
1633 /*
1634 * In any case, the "entry" always precedes
1635 * the proposed new region throughout the
1636 * loop:
1637 */
1638
1639 while (TRUE) {
1640 register vm_map_entry_t next;
1641
1642 /*
1643 * Find the end of the proposed new region.
1644 * Be sure we didn't go beyond the end, or
1645 * wrap around the address.
1646 */
1647
1648 end = ((start + mask) & ~mask);
1649 if (end < start)
1650 RETURN(KERN_NO_SPACE);
1651 start = end;
1652 end += size;
1653
1654 if ((end > effective_max_offset) || (end < start)) {
1655 if (map->wait_for_space) {
1656 if (size <= (effective_max_offset -
1657 effective_min_offset)) {
1658 assert_wait((event_t)map,
1659 THREAD_ABORTSAFE);
1660 vm_map_unlock(map);
1661 map_locked = FALSE;
1662 thread_block(THREAD_CONTINUE_NULL);
1663 goto StartAgain;
1664 }
1665 }
1666 RETURN(KERN_NO_SPACE);
1667 }
1668
1669 /*
1670 * If there are no more entries, we must win.
1671 */
1672
1673 next = entry->vme_next;
1674 if (next == vm_map_to_entry(map))
1675 break;
1676
1677 /*
1678 * If there is another entry, it must be
1679 * after the end of the potential new region.
1680 */
1681
1682 if (next->vme_start >= end)
1683 break;
1684
1685 /*
1686 * Didn't fit -- move to the next entry.
1687 */
1688
1689 entry = next;
1690 start = entry->vme_end;
1691 }
1692 *address = start;
1693 } else {
1694 /*
1695 * Verify that:
1696 * the address doesn't itself violate
1697 * the mask requirement.
1698 */
1699
1700 vm_map_lock(map);
1701 map_locked = TRUE;
1702 if ((start & mask) != 0)
1703 RETURN(KERN_NO_SPACE);
1704
1705 /*
1706 * ... the address is within bounds
1707 */
1708
1709 end = start + size;
1710
1711 if ((start < effective_min_offset) ||
1712 (end > effective_max_offset) ||
1713 (start >= end)) {
1714 RETURN(KERN_INVALID_ADDRESS);
1715 }
1716
1717 if (overwrite && zap_old_map != VM_MAP_NULL) {
1718 /*
1719 * Fixed mapping and "overwrite" flag: attempt to
1720 * remove all existing mappings in the specified
1721 * address range, saving them in our "zap_old_map".
1722 */
1723 (void) vm_map_delete(map, start, end,
1724 VM_MAP_REMOVE_SAVE_ENTRIES,
1725 zap_old_map);
1726 }
1727
1728 /*
1729 * ... the starting address isn't allocated
1730 */
1731
1732 if (vm_map_lookup_entry(map, start, &entry)) {
1733 if (! (flags & VM_FLAGS_ALREADY)) {
1734 RETURN(KERN_NO_SPACE);
1735 }
1736 /*
1737 * Check if what's already there is what we want.
1738 */
1739 tmp_start = start;
1740 tmp_offset = offset;
1741 if (entry->vme_start < start) {
1742 tmp_start -= start - entry->vme_start;
1743 tmp_offset -= start - entry->vme_start;
1744
1745 }
1746 for (; entry->vme_start < end;
1747 entry = entry->vme_next) {
1748 /*
1749 * Check if the mapping's attributes
1750 * match the existing map entry.
1751 */
1752 if (entry == vm_map_to_entry(map) ||
1753 entry->vme_start != tmp_start ||
1754 entry->is_sub_map != is_submap ||
1755 entry->offset != tmp_offset ||
1756 entry->needs_copy != needs_copy ||
1757 entry->protection != cur_protection ||
1758 entry->max_protection != max_protection ||
1759 entry->inheritance != inheritance ||
1760 entry->alias != alias) {
1761 /* not the same mapping ! */
1762 RETURN(KERN_NO_SPACE);
1763 }
1764 /*
1765 * Check if the same object is being mapped.
1766 */
1767 if (is_submap) {
1768 if (entry->object.sub_map !=
1769 (vm_map_t) object) {
1770 /* not the same submap */
1771 RETURN(KERN_NO_SPACE);
1772 }
1773 } else {
1774 if (entry->object.vm_object != object) {
1775 /* not the same VM object... */
1776 vm_object_t obj2;
1777
1778 obj2 = entry->object.vm_object;
1779 if ((obj2 == VM_OBJECT_NULL ||
1780 obj2->internal) &&
1781 (object == VM_OBJECT_NULL ||
1782 object->internal)) {
1783 /*
1784 * ... but both are
1785 * anonymous memory,
1786 * so equivalent.
1787 */
1788 } else {
1789 RETURN(KERN_NO_SPACE);
1790 }
1791 }
1792 }
1793
1794 tmp_offset += entry->vme_end - entry->vme_start;
1795 tmp_start += entry->vme_end - entry->vme_start;
1796 if (entry->vme_end >= end) {
1797 /* reached the end of our mapping */
1798 break;
1799 }
1800 }
1801 /* it all matches: let's use what's already there ! */
1802 RETURN(KERN_MEMORY_PRESENT);
1803 }
1804
1805 /*
1806 * ... the next region doesn't overlap the
1807 * end point.
1808 */
1809
1810 if ((entry->vme_next != vm_map_to_entry(map)) &&
1811 (entry->vme_next->vme_start < end))
1812 RETURN(KERN_NO_SPACE);
1813 }
1814
1815 /*
1816 * At this point,
1817 * "start" and "end" should define the endpoints of the
1818 * available new range, and
1819 * "entry" should refer to the region before the new
1820 * range, and
1821 *
1822 * the map should be locked.
1823 */
1824
1825 /*
1826 * See whether we can avoid creating a new entry (and object) by
1827 * extending one of our neighbors. [So far, we only attempt to
1828 * extend from below.] Note that we can never extend/join
1829 * purgable objects because they need to remain distinct
1830 * entities in order to implement their "volatile object"
1831 * semantics.
1832 */
1833
1834 if (purgable) {
1835 if (object == VM_OBJECT_NULL) {
1836 object = vm_object_allocate(size);
1837 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1838 object->purgable = VM_PURGABLE_NONVOLATILE;
1839 offset = (vm_object_offset_t)0;
1840 }
1841 } else if ((is_submap == FALSE) &&
1842 (object == VM_OBJECT_NULL) &&
1843 (entry != vm_map_to_entry(map)) &&
1844 (entry->vme_end == start) &&
1845 (!entry->is_shared) &&
1846 (!entry->is_sub_map) &&
1847 ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
1848 (entry->inheritance == inheritance) &&
1849 (entry->protection == cur_protection) &&
1850 (entry->max_protection == max_protection) &&
1851 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1852 (entry->in_transition == 0) &&
1853 (entry->no_cache == no_cache) &&
1854 ((entry->vme_end - entry->vme_start) + size <=
1855 (alias == VM_MEMORY_REALLOC ?
1856 ANON_CHUNK_SIZE :
1857 NO_COALESCE_LIMIT)) &&
1858 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1859 if (vm_object_coalesce(entry->object.vm_object,
1860 VM_OBJECT_NULL,
1861 entry->offset,
1862 (vm_object_offset_t) 0,
1863 (vm_map_size_t)(entry->vme_end - entry->vme_start),
1864 (vm_map_size_t)(end - entry->vme_end))) {
1865
1866 /*
1867 * Coalesced the two objects - can extend
1868 * the previous map entry to include the
1869 * new range.
1870 */
1871 map->size += (end - entry->vme_end);
1872 assert(entry->vme_start < end);
1873 entry->vme_end = end;
1874 vm_map_store_update_first_free(map, map->first_free);
1875 RETURN(KERN_SUCCESS);
1876 }
1877 }
1878
1879 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
1880 new_entry = NULL;
1881
1882 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
1883 tmp2_end = tmp2_start + step;
1884 /*
1885 * Create a new entry
1886 * LP64todo - for now, we can only allocate 4GB internal objects
1887 * because the default pager can't page bigger ones. Remove this
1888 * when it can.
1889 *
1890 * XXX FBDP
1891 * The reserved "page zero" in each process's address space can
1892 * be arbitrarily large. Splitting it into separate 4GB objects and
1893 * therefore different VM map entries serves no purpose and just
1894 * slows down operations on the VM map, so let's not split the
1895 * allocation into 4GB chunks if the max protection is NONE. That
1896 * memory should never be accessible, so it will never get to the
1897 * default pager.
1898 */
1899 tmp_start = tmp2_start;
1900 if (object == VM_OBJECT_NULL &&
1901 size > (vm_map_size_t)ANON_CHUNK_SIZE &&
1902 max_protection != VM_PROT_NONE &&
1903 superpage_size == 0)
1904 tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
1905 else
1906 tmp_end = tmp2_end;
1907 do {
1908 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
1909 object, offset, needs_copy,
1910 FALSE, FALSE,
1911 cur_protection, max_protection,
1912 VM_BEHAVIOR_DEFAULT,
1913 (flags & VM_FLAGS_MAP_JIT)? VM_INHERIT_NONE: inheritance,
1914 0, no_cache,
1915 permanent, superpage_size);
1916 new_entry->alias = alias;
1917 if (flags & VM_FLAGS_MAP_JIT){
1918 if (!(map->jit_entry_exists)){
1919 new_entry->used_for_jit = TRUE;
1920 map->jit_entry_exists = TRUE;
1921 }
1922 }
1923
1924 if (is_submap) {
1925 vm_map_t submap;
1926 boolean_t submap_is_64bit;
1927 boolean_t use_pmap;
1928
1929 new_entry->is_sub_map = TRUE;
1930 submap = (vm_map_t) object;
1931 submap_is_64bit = vm_map_is_64bit(submap);
1932 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
1933 #ifndef NO_NESTED_PMAP
1934 if (use_pmap && submap->pmap == NULL) {
1935 /* we need a sub pmap to nest... */
1936 submap->pmap = pmap_create(0, submap_is_64bit);
1937 if (submap->pmap == NULL) {
1938 /* let's proceed without nesting... */
1939 }
1940 }
1941 if (use_pmap && submap->pmap != NULL) {
1942 kr = pmap_nest(map->pmap,
1943 submap->pmap,
1944 tmp_start,
1945 tmp_start,
1946 tmp_end - tmp_start);
1947 if (kr != KERN_SUCCESS) {
1948 printf("vm_map_enter: "
1949 "pmap_nest(0x%llx,0x%llx) "
1950 "error 0x%x\n",
1951 (long long)tmp_start,
1952 (long long)tmp_end,
1953 kr);
1954 } else {
1955 /* we're now nested ! */
1956 new_entry->use_pmap = TRUE;
1957 pmap_empty = FALSE;
1958 }
1959 }
1960 #endif /* NO_NESTED_PMAP */
1961 }
1962 entry = new_entry;
1963
1964 if (superpage_size) {
1965 vm_page_t pages, m;
1966 vm_object_t sp_object;
1967
1968 entry->offset = 0;
1969
1970 /* allocate one superpage */
1971 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
1972 if (kr != KERN_SUCCESS) {
1973 new_mapping_established = TRUE; /* will cause deallocation of whole range */
1974 RETURN(kr);
1975 }
1976
1977 /* create one vm_object per superpage */
1978 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
1979 sp_object->phys_contiguous = TRUE;
1980 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
1981 entry->object.vm_object = sp_object;
1982
1983 /* enter the base pages into the object */
1984 vm_object_lock(sp_object);
1985 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
1986 m = pages;
1987 pmap_zero_page(m->phys_page);
1988 pages = NEXT_PAGE(m);
1989 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
1990 vm_page_insert(m, sp_object, offset);
1991 }
1992 vm_object_unlock(sp_object);
1993 }
1994 } while (tmp_end != tmp2_end &&
1995 (tmp_start = tmp_end) &&
1996 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
1997 tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
1998 }
1999
2000 vm_map_unlock(map);
2001 map_locked = FALSE;
2002
2003 new_mapping_established = TRUE;
2004
2005 /* Wire down the new entry if the user
2006 * requested all new map entries be wired.
2007 */
2008 if ((map->wiring_required)||(superpage_size)) {
2009 pmap_empty = FALSE; /* pmap won't be empty */
2010 result = vm_map_wire(map, start, end,
2011 new_entry->protection, TRUE);
2012 RETURN(result);
2013 }
2014
2015 if ((object != VM_OBJECT_NULL) &&
2016 (vm_map_pmap_enter_enable) &&
2017 (!anywhere) &&
2018 (!needs_copy) &&
2019 (size < (128*1024))) {
2020 pmap_empty = FALSE; /* pmap won't be empty */
2021
2022 if (override_nx(map, alias) && cur_protection)
2023 cur_protection |= VM_PROT_EXECUTE;
2024
2025 vm_map_pmap_enter(map, start, end,
2026 object, offset, cur_protection);
2027 }
2028
2029 BailOut: ;
2030 if (result == KERN_SUCCESS) {
2031 vm_prot_t pager_prot;
2032 memory_object_t pager;
2033
2034 if (pmap_empty &&
2035 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2036 assert(vm_map_pmap_is_empty(map,
2037 *address,
2038 *address+size));
2039 }
2040
2041 /*
2042 * For "named" VM objects, let the pager know that the
2043 * memory object is being mapped. Some pagers need to keep
2044 * track of this, to know when they can reclaim the memory
2045 * object, for example.
2046 * VM calls memory_object_map() for each mapping (specifying
2047 * the protection of each mapping) and calls
2048 * memory_object_last_unmap() when all the mappings are gone.
2049 */
2050 pager_prot = max_protection;
2051 if (needs_copy) {
2052 /*
2053 * Copy-On-Write mapping: won't modify
2054 * the memory object.
2055 */
2056 pager_prot &= ~VM_PROT_WRITE;
2057 }
2058 if (!is_submap &&
2059 object != VM_OBJECT_NULL &&
2060 object->named &&
2061 object->pager != MEMORY_OBJECT_NULL) {
2062 vm_object_lock(object);
2063 pager = object->pager;
2064 if (object->named &&
2065 pager != MEMORY_OBJECT_NULL) {
2066 assert(object->pager_ready);
2067 vm_object_mapping_wait(object, THREAD_UNINT);
2068 vm_object_mapping_begin(object);
2069 vm_object_unlock(object);
2070
2071 kr = memory_object_map(pager, pager_prot);
2072 assert(kr == KERN_SUCCESS);
2073
2074 vm_object_lock(object);
2075 vm_object_mapping_end(object);
2076 }
2077 vm_object_unlock(object);
2078 }
2079 } else {
2080 if (new_mapping_established) {
2081 /*
2082 * We have to get rid of the new mappings since we
2083 * won't make them available to the user.
2084 * Try and do that atomically, to minimize the risk
2085 * that someone else create new mappings that range.
2086 */
2087 zap_new_map = vm_map_create(PMAP_NULL,
2088 *address,
2089 *address + size,
2090 map->hdr.entries_pageable);
2091 if (!map_locked) {
2092 vm_map_lock(map);
2093 map_locked = TRUE;
2094 }
2095 (void) vm_map_delete(map, *address, *address+size,
2096 VM_MAP_REMOVE_SAVE_ENTRIES,
2097 zap_new_map);
2098 }
2099 if (zap_old_map != VM_MAP_NULL &&
2100 zap_old_map->hdr.nentries != 0) {
2101 vm_map_entry_t entry1, entry2;
2102
2103 /*
2104 * The new mapping failed. Attempt to restore
2105 * the old mappings, saved in the "zap_old_map".
2106 */
2107 if (!map_locked) {
2108 vm_map_lock(map);
2109 map_locked = TRUE;
2110 }
2111
2112 /* first check if the coast is still clear */
2113 start = vm_map_first_entry(zap_old_map)->vme_start;
2114 end = vm_map_last_entry(zap_old_map)->vme_end;
2115 if (vm_map_lookup_entry(map, start, &entry1) ||
2116 vm_map_lookup_entry(map, end, &entry2) ||
2117 entry1 != entry2) {
2118 /*
2119 * Part of that range has already been
2120 * re-mapped: we can't restore the old
2121 * mappings...
2122 */
2123 vm_map_enter_restore_failures++;
2124 } else {
2125 /*
2126 * Transfer the saved map entries from
2127 * "zap_old_map" to the original "map",
2128 * inserting them all after "entry1".
2129 */
2130 for (entry2 = vm_map_first_entry(zap_old_map);
2131 entry2 != vm_map_to_entry(zap_old_map);
2132 entry2 = vm_map_first_entry(zap_old_map)) {
2133 vm_map_size_t entry_size;
2134
2135 entry_size = (entry2->vme_end -
2136 entry2->vme_start);
2137 vm_map_store_entry_unlink(zap_old_map,
2138 entry2);
2139 zap_old_map->size -= entry_size;
2140 vm_map_store_entry_link(map, entry1, entry2);
2141 map->size += entry_size;
2142 entry1 = entry2;
2143 }
2144 if (map->wiring_required) {
2145 /*
2146 * XXX TODO: we should rewire the
2147 * old pages here...
2148 */
2149 }
2150 vm_map_enter_restore_successes++;
2151 }
2152 }
2153 }
2154
2155 if (map_locked) {
2156 vm_map_unlock(map);
2157 }
2158
2159 /*
2160 * Get rid of the "zap_maps" and all the map entries that
2161 * they may still contain.
2162 */
2163 if (zap_old_map != VM_MAP_NULL) {
2164 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2165 zap_old_map = VM_MAP_NULL;
2166 }
2167 if (zap_new_map != VM_MAP_NULL) {
2168 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2169 zap_new_map = VM_MAP_NULL;
2170 }
2171
2172 return result;
2173
2174 #undef RETURN
2175 }
2176
2177 kern_return_t
2178 vm_map_enter_mem_object(
2179 vm_map_t target_map,
2180 vm_map_offset_t *address,
2181 vm_map_size_t initial_size,
2182 vm_map_offset_t mask,
2183 int flags,
2184 ipc_port_t port,
2185 vm_object_offset_t offset,
2186 boolean_t copy,
2187 vm_prot_t cur_protection,
2188 vm_prot_t max_protection,
2189 vm_inherit_t inheritance)
2190 {
2191 vm_map_address_t map_addr;
2192 vm_map_size_t map_size;
2193 vm_object_t object;
2194 vm_object_size_t size;
2195 kern_return_t result;
2196 boolean_t mask_cur_protection, mask_max_protection;
2197
2198 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2199 mask_max_protection = max_protection & VM_PROT_IS_MASK;
2200 cur_protection &= ~VM_PROT_IS_MASK;
2201 max_protection &= ~VM_PROT_IS_MASK;
2202
2203 /*
2204 * Check arguments for validity
2205 */
2206 if ((target_map == VM_MAP_NULL) ||
2207 (cur_protection & ~VM_PROT_ALL) ||
2208 (max_protection & ~VM_PROT_ALL) ||
2209 (inheritance > VM_INHERIT_LAST_VALID) ||
2210 initial_size == 0)
2211 return KERN_INVALID_ARGUMENT;
2212
2213 map_addr = vm_map_trunc_page(*address);
2214 map_size = vm_map_round_page(initial_size);
2215 size = vm_object_round_page(initial_size);
2216
2217 /*
2218 * Find the vm object (if any) corresponding to this port.
2219 */
2220 if (!IP_VALID(port)) {
2221 object = VM_OBJECT_NULL;
2222 offset = 0;
2223 copy = FALSE;
2224 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2225 vm_named_entry_t named_entry;
2226
2227 named_entry = (vm_named_entry_t) port->ip_kobject;
2228 /* a few checks to make sure user is obeying rules */
2229 if (size == 0) {
2230 if (offset >= named_entry->size)
2231 return KERN_INVALID_RIGHT;
2232 size = named_entry->size - offset;
2233 }
2234 if (mask_max_protection) {
2235 max_protection &= named_entry->protection;
2236 }
2237 if (mask_cur_protection) {
2238 cur_protection &= named_entry->protection;
2239 }
2240 if ((named_entry->protection & max_protection) !=
2241 max_protection)
2242 return KERN_INVALID_RIGHT;
2243 if ((named_entry->protection & cur_protection) !=
2244 cur_protection)
2245 return KERN_INVALID_RIGHT;
2246 if (named_entry->size < (offset + size))
2247 return KERN_INVALID_ARGUMENT;
2248
2249 /* the callers parameter offset is defined to be the */
2250 /* offset from beginning of named entry offset in object */
2251 offset = offset + named_entry->offset;
2252
2253 named_entry_lock(named_entry);
2254 if (named_entry->is_sub_map) {
2255 vm_map_t submap;
2256
2257 submap = named_entry->backing.map;
2258 vm_map_lock(submap);
2259 vm_map_reference(submap);
2260 vm_map_unlock(submap);
2261 named_entry_unlock(named_entry);
2262
2263 result = vm_map_enter(target_map,
2264 &map_addr,
2265 map_size,
2266 mask,
2267 flags | VM_FLAGS_SUBMAP,
2268 (vm_object_t) submap,
2269 offset,
2270 copy,
2271 cur_protection,
2272 max_protection,
2273 inheritance);
2274 if (result != KERN_SUCCESS) {
2275 vm_map_deallocate(submap);
2276 } else {
2277 /*
2278 * No need to lock "submap" just to check its
2279 * "mapped" flag: that flag is never reset
2280 * once it's been set and if we race, we'll
2281 * just end up setting it twice, which is OK.
2282 */
2283 if (submap->mapped == FALSE) {
2284 /*
2285 * This submap has never been mapped.
2286 * Set its "mapped" flag now that it
2287 * has been mapped.
2288 * This happens only for the first ever
2289 * mapping of a "submap".
2290 */
2291 vm_map_lock(submap);
2292 submap->mapped = TRUE;
2293 vm_map_unlock(submap);
2294 }
2295 *address = map_addr;
2296 }
2297 return result;
2298
2299 } else if (named_entry->is_pager) {
2300 unsigned int access;
2301 vm_prot_t protections;
2302 unsigned int wimg_mode;
2303
2304 protections = named_entry->protection & VM_PROT_ALL;
2305 access = GET_MAP_MEM(named_entry->protection);
2306
2307 object = vm_object_enter(named_entry->backing.pager,
2308 named_entry->size,
2309 named_entry->internal,
2310 FALSE,
2311 FALSE);
2312 if (object == VM_OBJECT_NULL) {
2313 named_entry_unlock(named_entry);
2314 return KERN_INVALID_OBJECT;
2315 }
2316
2317 /* JMM - drop reference on pager here */
2318
2319 /* create an extra ref for the named entry */
2320 vm_object_lock(object);
2321 vm_object_reference_locked(object);
2322 named_entry->backing.object = object;
2323 named_entry->is_pager = FALSE;
2324 named_entry_unlock(named_entry);
2325
2326 wimg_mode = object->wimg_bits;
2327
2328 if (access == MAP_MEM_IO) {
2329 wimg_mode = VM_WIMG_IO;
2330 } else if (access == MAP_MEM_COPYBACK) {
2331 wimg_mode = VM_WIMG_USE_DEFAULT;
2332 } else if (access == MAP_MEM_WTHRU) {
2333 wimg_mode = VM_WIMG_WTHRU;
2334 } else if (access == MAP_MEM_WCOMB) {
2335 wimg_mode = VM_WIMG_WCOMB;
2336 }
2337
2338 /* wait for object (if any) to be ready */
2339 if (!named_entry->internal) {
2340 while (!object->pager_ready) {
2341 vm_object_wait(
2342 object,
2343 VM_OBJECT_EVENT_PAGER_READY,
2344 THREAD_UNINT);
2345 vm_object_lock(object);
2346 }
2347 }
2348
2349 if (object->wimg_bits != wimg_mode)
2350 vm_object_change_wimg_mode(object, wimg_mode);
2351
2352 object->true_share = TRUE;
2353
2354 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2355 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2356 vm_object_unlock(object);
2357 } else {
2358 /* This is the case where we are going to map */
2359 /* an already mapped object. If the object is */
2360 /* not ready it is internal. An external */
2361 /* object cannot be mapped until it is ready */
2362 /* we can therefore avoid the ready check */
2363 /* in this case. */
2364 object = named_entry->backing.object;
2365 assert(object != VM_OBJECT_NULL);
2366 named_entry_unlock(named_entry);
2367 vm_object_reference(object);
2368 }
2369 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2370 /*
2371 * JMM - This is temporary until we unify named entries
2372 * and raw memory objects.
2373 *
2374 * Detected fake ip_kotype for a memory object. In
2375 * this case, the port isn't really a port at all, but
2376 * instead is just a raw memory object.
2377 */
2378
2379 object = vm_object_enter((memory_object_t)port,
2380 size, FALSE, FALSE, FALSE);
2381 if (object == VM_OBJECT_NULL)
2382 return KERN_INVALID_OBJECT;
2383
2384 /* wait for object (if any) to be ready */
2385 if (object != VM_OBJECT_NULL) {
2386 if (object == kernel_object) {
2387 printf("Warning: Attempt to map kernel object"
2388 " by a non-private kernel entity\n");
2389 return KERN_INVALID_OBJECT;
2390 }
2391 if (!object->pager_ready) {
2392 vm_object_lock(object);
2393
2394 while (!object->pager_ready) {
2395 vm_object_wait(object,
2396 VM_OBJECT_EVENT_PAGER_READY,
2397 THREAD_UNINT);
2398 vm_object_lock(object);
2399 }
2400 vm_object_unlock(object);
2401 }
2402 }
2403 } else {
2404 return KERN_INVALID_OBJECT;
2405 }
2406
2407 if (object != VM_OBJECT_NULL &&
2408 object->named &&
2409 object->pager != MEMORY_OBJECT_NULL &&
2410 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2411 memory_object_t pager;
2412 vm_prot_t pager_prot;
2413 kern_return_t kr;
2414
2415 /*
2416 * For "named" VM objects, let the pager know that the
2417 * memory object is being mapped. Some pagers need to keep
2418 * track of this, to know when they can reclaim the memory
2419 * object, for example.
2420 * VM calls memory_object_map() for each mapping (specifying
2421 * the protection of each mapping) and calls
2422 * memory_object_last_unmap() when all the mappings are gone.
2423 */
2424 pager_prot = max_protection;
2425 if (copy) {
2426 /*
2427 * Copy-On-Write mapping: won't modify the
2428 * memory object.
2429 */
2430 pager_prot &= ~VM_PROT_WRITE;
2431 }
2432 vm_object_lock(object);
2433 pager = object->pager;
2434 if (object->named &&
2435 pager != MEMORY_OBJECT_NULL &&
2436 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2437 assert(object->pager_ready);
2438 vm_object_mapping_wait(object, THREAD_UNINT);
2439 vm_object_mapping_begin(object);
2440 vm_object_unlock(object);
2441
2442 kr = memory_object_map(pager, pager_prot);
2443 assert(kr == KERN_SUCCESS);
2444
2445 vm_object_lock(object);
2446 vm_object_mapping_end(object);
2447 }
2448 vm_object_unlock(object);
2449 }
2450
2451 /*
2452 * Perform the copy if requested
2453 */
2454
2455 if (copy) {
2456 vm_object_t new_object;
2457 vm_object_offset_t new_offset;
2458
2459 result = vm_object_copy_strategically(object, offset, size,
2460 &new_object, &new_offset,
2461 &copy);
2462
2463
2464 if (result == KERN_MEMORY_RESTART_COPY) {
2465 boolean_t success;
2466 boolean_t src_needs_copy;
2467
2468 /*
2469 * XXX
2470 * We currently ignore src_needs_copy.
2471 * This really is the issue of how to make
2472 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2473 * non-kernel users to use. Solution forthcoming.
2474 * In the meantime, since we don't allow non-kernel
2475 * memory managers to specify symmetric copy,
2476 * we won't run into problems here.
2477 */
2478 new_object = object;
2479 new_offset = offset;
2480 success = vm_object_copy_quickly(&new_object,
2481 new_offset, size,
2482 &src_needs_copy,
2483 &copy);
2484 assert(success);
2485 result = KERN_SUCCESS;
2486 }
2487 /*
2488 * Throw away the reference to the
2489 * original object, as it won't be mapped.
2490 */
2491
2492 vm_object_deallocate(object);
2493
2494 if (result != KERN_SUCCESS)
2495 return result;
2496
2497 object = new_object;
2498 offset = new_offset;
2499 }
2500
2501 result = vm_map_enter(target_map,
2502 &map_addr, map_size,
2503 (vm_map_offset_t)mask,
2504 flags,
2505 object, offset,
2506 copy,
2507 cur_protection, max_protection, inheritance);
2508 if (result != KERN_SUCCESS)
2509 vm_object_deallocate(object);
2510 *address = map_addr;
2511 return result;
2512 }
2513
2514
2515
2516
2517 kern_return_t
2518 vm_map_enter_mem_object_control(
2519 vm_map_t target_map,
2520 vm_map_offset_t *address,
2521 vm_map_size_t initial_size,
2522 vm_map_offset_t mask,
2523 int flags,
2524 memory_object_control_t control,
2525 vm_object_offset_t offset,
2526 boolean_t copy,
2527 vm_prot_t cur_protection,
2528 vm_prot_t max_protection,
2529 vm_inherit_t inheritance)
2530 {
2531 vm_map_address_t map_addr;
2532 vm_map_size_t map_size;
2533 vm_object_t object;
2534 vm_object_size_t size;
2535 kern_return_t result;
2536 memory_object_t pager;
2537 vm_prot_t pager_prot;
2538 kern_return_t kr;
2539
2540 /*
2541 * Check arguments for validity
2542 */
2543 if ((target_map == VM_MAP_NULL) ||
2544 (cur_protection & ~VM_PROT_ALL) ||
2545 (max_protection & ~VM_PROT_ALL) ||
2546 (inheritance > VM_INHERIT_LAST_VALID) ||
2547 initial_size == 0)
2548 return KERN_INVALID_ARGUMENT;
2549
2550 map_addr = vm_map_trunc_page(*address);
2551 map_size = vm_map_round_page(initial_size);
2552 size = vm_object_round_page(initial_size);
2553
2554 object = memory_object_control_to_vm_object(control);
2555
2556 if (object == VM_OBJECT_NULL)
2557 return KERN_INVALID_OBJECT;
2558
2559 if (object == kernel_object) {
2560 printf("Warning: Attempt to map kernel object"
2561 " by a non-private kernel entity\n");
2562 return KERN_INVALID_OBJECT;
2563 }
2564
2565 vm_object_lock(object);
2566 object->ref_count++;
2567 vm_object_res_reference(object);
2568
2569 /*
2570 * For "named" VM objects, let the pager know that the
2571 * memory object is being mapped. Some pagers need to keep
2572 * track of this, to know when they can reclaim the memory
2573 * object, for example.
2574 * VM calls memory_object_map() for each mapping (specifying
2575 * the protection of each mapping) and calls
2576 * memory_object_last_unmap() when all the mappings are gone.
2577 */
2578 pager_prot = max_protection;
2579 if (copy) {
2580 pager_prot &= ~VM_PROT_WRITE;
2581 }
2582 pager = object->pager;
2583 if (object->named &&
2584 pager != MEMORY_OBJECT_NULL &&
2585 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2586 assert(object->pager_ready);
2587 vm_object_mapping_wait(object, THREAD_UNINT);
2588 vm_object_mapping_begin(object);
2589 vm_object_unlock(object);
2590
2591 kr = memory_object_map(pager, pager_prot);
2592 assert(kr == KERN_SUCCESS);
2593
2594 vm_object_lock(object);
2595 vm_object_mapping_end(object);
2596 }
2597 vm_object_unlock(object);
2598
2599 /*
2600 * Perform the copy if requested
2601 */
2602
2603 if (copy) {
2604 vm_object_t new_object;
2605 vm_object_offset_t new_offset;
2606
2607 result = vm_object_copy_strategically(object, offset, size,
2608 &new_object, &new_offset,
2609 &copy);
2610
2611
2612 if (result == KERN_MEMORY_RESTART_COPY) {
2613 boolean_t success;
2614 boolean_t src_needs_copy;
2615
2616 /*
2617 * XXX
2618 * We currently ignore src_needs_copy.
2619 * This really is the issue of how to make
2620 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2621 * non-kernel users to use. Solution forthcoming.
2622 * In the meantime, since we don't allow non-kernel
2623 * memory managers to specify symmetric copy,
2624 * we won't run into problems here.
2625 */
2626 new_object = object;
2627 new_offset = offset;
2628 success = vm_object_copy_quickly(&new_object,
2629 new_offset, size,
2630 &src_needs_copy,
2631 &copy);
2632 assert(success);
2633 result = KERN_SUCCESS;
2634 }
2635 /*
2636 * Throw away the reference to the
2637 * original object, as it won't be mapped.
2638 */
2639
2640 vm_object_deallocate(object);
2641
2642 if (result != KERN_SUCCESS)
2643 return result;
2644
2645 object = new_object;
2646 offset = new_offset;
2647 }
2648
2649 result = vm_map_enter(target_map,
2650 &map_addr, map_size,
2651 (vm_map_offset_t)mask,
2652 flags,
2653 object, offset,
2654 copy,
2655 cur_protection, max_protection, inheritance);
2656 if (result != KERN_SUCCESS)
2657 vm_object_deallocate(object);
2658 *address = map_addr;
2659
2660 return result;
2661 }
2662
2663
2664 #if VM_CPM
2665
2666 #ifdef MACH_ASSERT
2667 extern pmap_paddr_t avail_start, avail_end;
2668 #endif
2669
2670 /*
2671 * Allocate memory in the specified map, with the caveat that
2672 * the memory is physically contiguous. This call may fail
2673 * if the system can't find sufficient contiguous memory.
2674 * This call may cause or lead to heart-stopping amounts of
2675 * paging activity.
2676 *
2677 * Memory obtained from this call should be freed in the
2678 * normal way, viz., via vm_deallocate.
2679 */
2680 kern_return_t
2681 vm_map_enter_cpm(
2682 vm_map_t map,
2683 vm_map_offset_t *addr,
2684 vm_map_size_t size,
2685 int flags)
2686 {
2687 vm_object_t cpm_obj;
2688 pmap_t pmap;
2689 vm_page_t m, pages;
2690 kern_return_t kr;
2691 vm_map_offset_t va, start, end, offset;
2692 #if MACH_ASSERT
2693 vm_map_offset_t prev_addr;
2694 #endif /* MACH_ASSERT */
2695
2696 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2697
2698 if (!vm_allocate_cpm_enabled)
2699 return KERN_FAILURE;
2700
2701 if (size == 0) {
2702 *addr = 0;
2703 return KERN_SUCCESS;
2704 }
2705 if (anywhere)
2706 *addr = vm_map_min(map);
2707 else
2708 *addr = vm_map_trunc_page(*addr);
2709 size = vm_map_round_page(size);
2710
2711 /*
2712 * LP64todo - cpm_allocate should probably allow
2713 * allocations of >4GB, but not with the current
2714 * algorithm, so just cast down the size for now.
2715 */
2716 if (size > VM_MAX_ADDRESS)
2717 return KERN_RESOURCE_SHORTAGE;
2718 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
2719 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2720 return kr;
2721
2722 cpm_obj = vm_object_allocate((vm_object_size_t)size);
2723 assert(cpm_obj != VM_OBJECT_NULL);
2724 assert(cpm_obj->internal);
2725 assert(cpm_obj->size == (vm_object_size_t)size);
2726 assert(cpm_obj->can_persist == FALSE);
2727 assert(cpm_obj->pager_created == FALSE);
2728 assert(cpm_obj->pageout == FALSE);
2729 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2730
2731 /*
2732 * Insert pages into object.
2733 */
2734
2735 vm_object_lock(cpm_obj);
2736 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2737 m = pages;
2738 pages = NEXT_PAGE(m);
2739 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2740
2741 assert(!m->gobbled);
2742 assert(!m->wanted);
2743 assert(!m->pageout);
2744 assert(!m->tabled);
2745 assert(VM_PAGE_WIRED(m));
2746 /*
2747 * ENCRYPTED SWAP:
2748 * "m" is not supposed to be pageable, so it
2749 * should not be encrypted. It wouldn't be safe
2750 * to enter it in a new VM object while encrypted.
2751 */
2752 ASSERT_PAGE_DECRYPTED(m);
2753 assert(m->busy);
2754 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
2755
2756 m->busy = FALSE;
2757 vm_page_insert(m, cpm_obj, offset);
2758 }
2759 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2760 vm_object_unlock(cpm_obj);
2761
2762 /*
2763 * Hang onto a reference on the object in case a
2764 * multi-threaded application for some reason decides
2765 * to deallocate the portion of the address space into
2766 * which we will insert this object.
2767 *
2768 * Unfortunately, we must insert the object now before
2769 * we can talk to the pmap module about which addresses
2770 * must be wired down. Hence, the race with a multi-
2771 * threaded app.
2772 */
2773 vm_object_reference(cpm_obj);
2774
2775 /*
2776 * Insert object into map.
2777 */
2778
2779 kr = vm_map_enter(
2780 map,
2781 addr,
2782 size,
2783 (vm_map_offset_t)0,
2784 flags,
2785 cpm_obj,
2786 (vm_object_offset_t)0,
2787 FALSE,
2788 VM_PROT_ALL,
2789 VM_PROT_ALL,
2790 VM_INHERIT_DEFAULT);
2791
2792 if (kr != KERN_SUCCESS) {
2793 /*
2794 * A CPM object doesn't have can_persist set,
2795 * so all we have to do is deallocate it to
2796 * free up these pages.
2797 */
2798 assert(cpm_obj->pager_created == FALSE);
2799 assert(cpm_obj->can_persist == FALSE);
2800 assert(cpm_obj->pageout == FALSE);
2801 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2802 vm_object_deallocate(cpm_obj); /* kill acquired ref */
2803 vm_object_deallocate(cpm_obj); /* kill creation ref */
2804 }
2805
2806 /*
2807 * Inform the physical mapping system that the
2808 * range of addresses may not fault, so that
2809 * page tables and such can be locked down as well.
2810 */
2811 start = *addr;
2812 end = start + size;
2813 pmap = vm_map_pmap(map);
2814 pmap_pageable(pmap, start, end, FALSE);
2815
2816 /*
2817 * Enter each page into the pmap, to avoid faults.
2818 * Note that this loop could be coded more efficiently,
2819 * if the need arose, rather than looking up each page
2820 * again.
2821 */
2822 for (offset = 0, va = start; offset < size;
2823 va += PAGE_SIZE, offset += PAGE_SIZE) {
2824 int type_of_fault;
2825
2826 vm_object_lock(cpm_obj);
2827 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2828 assert(m != VM_PAGE_NULL);
2829
2830 vm_page_zero_fill(m);
2831
2832 type_of_fault = DBG_ZERO_FILL_FAULT;
2833
2834 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
2835 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
2836 &type_of_fault);
2837
2838 vm_object_unlock(cpm_obj);
2839 }
2840
2841 #if MACH_ASSERT
2842 /*
2843 * Verify ordering in address space.
2844 */
2845 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2846 vm_object_lock(cpm_obj);
2847 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2848 vm_object_unlock(cpm_obj);
2849 if (m == VM_PAGE_NULL)
2850 panic("vm_allocate_cpm: obj 0x%x off 0x%x no page",
2851 cpm_obj, offset);
2852 assert(m->tabled);
2853 assert(!m->busy);
2854 assert(!m->wanted);
2855 assert(!m->fictitious);
2856 assert(!m->private);
2857 assert(!m->absent);
2858 assert(!m->error);
2859 assert(!m->cleaning);
2860 assert(!m->precious);
2861 assert(!m->clustered);
2862 if (offset != 0) {
2863 if (m->phys_page != prev_addr + 1) {
2864 printf("start 0x%x end 0x%x va 0x%x\n",
2865 start, end, va);
2866 printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2867 printf("m 0x%x prev_address 0x%x\n", m,
2868 prev_addr);
2869 panic("vm_allocate_cpm: pages not contig!");
2870 }
2871 }
2872 prev_addr = m->phys_page;
2873 }
2874 #endif /* MACH_ASSERT */
2875
2876 vm_object_deallocate(cpm_obj); /* kill extra ref */
2877
2878 return kr;
2879 }
2880
2881
2882 #else /* VM_CPM */
2883
2884 /*
2885 * Interface is defined in all cases, but unless the kernel
2886 * is built explicitly for this option, the interface does
2887 * nothing.
2888 */
2889
2890 kern_return_t
2891 vm_map_enter_cpm(
2892 __unused vm_map_t map,
2893 __unused vm_map_offset_t *addr,
2894 __unused vm_map_size_t size,
2895 __unused int flags)
2896 {
2897 return KERN_FAILURE;
2898 }
2899 #endif /* VM_CPM */
2900
2901 /* Not used without nested pmaps */
2902 #ifndef NO_NESTED_PMAP
2903 /*
2904 * Clip and unnest a portion of a nested submap mapping.
2905 */
2906
2907
2908 static void
2909 vm_map_clip_unnest(
2910 vm_map_t map,
2911 vm_map_entry_t entry,
2912 vm_map_offset_t start_unnest,
2913 vm_map_offset_t end_unnest)
2914 {
2915 vm_map_offset_t old_start_unnest = start_unnest;
2916 vm_map_offset_t old_end_unnest = end_unnest;
2917
2918 assert(entry->is_sub_map);
2919 assert(entry->object.sub_map != NULL);
2920
2921 /*
2922 * Query the platform for the optimal unnest range.
2923 * DRK: There's some duplication of effort here, since
2924 * callers may have adjusted the range to some extent. This
2925 * routine was introduced to support 1GiB subtree nesting
2926 * for x86 platforms, which can also nest on 2MiB boundaries
2927 * depending on size/alignment.
2928 */
2929 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
2930 log_unnest_badness(map, old_start_unnest, old_end_unnest);
2931 }
2932
2933 if (entry->vme_start > start_unnest ||
2934 entry->vme_end < end_unnest) {
2935 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
2936 "bad nested entry: start=0x%llx end=0x%llx\n",
2937 (long long)start_unnest, (long long)end_unnest,
2938 (long long)entry->vme_start, (long long)entry->vme_end);
2939 }
2940
2941 if (start_unnest > entry->vme_start) {
2942 _vm_map_clip_start(&map->hdr,
2943 entry,
2944 start_unnest);
2945 vm_map_store_update_first_free(map, map->first_free);
2946 }
2947 if (entry->vme_end > end_unnest) {
2948 _vm_map_clip_end(&map->hdr,
2949 entry,
2950 end_unnest);
2951 vm_map_store_update_first_free(map, map->first_free);
2952 }
2953
2954 pmap_unnest(map->pmap,
2955 entry->vme_start,
2956 entry->vme_end - entry->vme_start);
2957 if ((map->mapped) && (map->ref_count)) {
2958 /* clean up parent map/maps */
2959 vm_map_submap_pmap_clean(
2960 map, entry->vme_start,
2961 entry->vme_end,
2962 entry->object.sub_map,
2963 entry->offset);
2964 }
2965 entry->use_pmap = FALSE;
2966 }
2967 #endif /* NO_NESTED_PMAP */
2968
2969 /*
2970 * vm_map_clip_start: [ internal use only ]
2971 *
2972 * Asserts that the given entry begins at or after
2973 * the specified address; if necessary,
2974 * it splits the entry into two.
2975 */
2976 void
2977 vm_map_clip_start(
2978 vm_map_t map,
2979 vm_map_entry_t entry,
2980 vm_map_offset_t startaddr)
2981 {
2982 #ifndef NO_NESTED_PMAP
2983 if (entry->use_pmap &&
2984 startaddr >= entry->vme_start) {
2985 vm_map_offset_t start_unnest, end_unnest;
2986
2987 /*
2988 * Make sure "startaddr" is no longer in a nested range
2989 * before we clip. Unnest only the minimum range the platform
2990 * can handle.
2991 * vm_map_clip_unnest may perform additional adjustments to
2992 * the unnest range.
2993 */
2994 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
2995 end_unnest = start_unnest + pmap_nesting_size_min;
2996 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
2997 }
2998 #endif /* NO_NESTED_PMAP */
2999 if (startaddr > entry->vme_start) {
3000 if (entry->object.vm_object &&
3001 !entry->is_sub_map &&
3002 entry->object.vm_object->phys_contiguous) {
3003 pmap_remove(map->pmap,
3004 (addr64_t)(entry->vme_start),
3005 (addr64_t)(entry->vme_end));
3006 }
3007 _vm_map_clip_start(&map->hdr, entry, startaddr);
3008 vm_map_store_update_first_free(map, map->first_free);
3009 }
3010 }
3011
3012
3013 #define vm_map_copy_clip_start(copy, entry, startaddr) \
3014 MACRO_BEGIN \
3015 if ((startaddr) > (entry)->vme_start) \
3016 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3017 MACRO_END
3018
3019 /*
3020 * This routine is called only when it is known that
3021 * the entry must be split.
3022 */
3023 static void
3024 _vm_map_clip_start(
3025 register struct vm_map_header *map_header,
3026 register vm_map_entry_t entry,
3027 register vm_map_offset_t start)
3028 {
3029 register vm_map_entry_t new_entry;
3030
3031 /*
3032 * Split off the front portion --
3033 * note that we must insert the new
3034 * entry BEFORE this one, so that
3035 * this entry has the specified starting
3036 * address.
3037 */
3038
3039 new_entry = _vm_map_entry_create(map_header);
3040 vm_map_entry_copy_full(new_entry, entry);
3041
3042 new_entry->vme_end = start;
3043 assert(new_entry->vme_start < new_entry->vme_end);
3044 entry->offset += (start - entry->vme_start);
3045 assert(start < entry->vme_end);
3046 entry->vme_start = start;
3047
3048 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
3049
3050 if (entry->is_sub_map)
3051 vm_map_reference(new_entry->object.sub_map);
3052 else
3053 vm_object_reference(new_entry->object.vm_object);
3054 }
3055
3056
3057 /*
3058 * vm_map_clip_end: [ internal use only ]
3059 *
3060 * Asserts that the given entry ends at or before
3061 * the specified address; if necessary,
3062 * it splits the entry into two.
3063 */
3064 void
3065 vm_map_clip_end(
3066 vm_map_t map,
3067 vm_map_entry_t entry,
3068 vm_map_offset_t endaddr)
3069 {
3070 if (endaddr > entry->vme_end) {
3071 /*
3072 * Within the scope of this clipping, limit "endaddr" to
3073 * the end of this map entry...
3074 */
3075 endaddr = entry->vme_end;
3076 }
3077 #ifndef NO_NESTED_PMAP
3078 if (entry->use_pmap) {
3079 vm_map_offset_t start_unnest, end_unnest;
3080
3081 /*
3082 * Make sure the range between the start of this entry and
3083 * the new "endaddr" is no longer nested before we clip.
3084 * Unnest only the minimum range the platform can handle.
3085 * vm_map_clip_unnest may perform additional adjustments to
3086 * the unnest range.
3087 */
3088 start_unnest = entry->vme_start;
3089 end_unnest =
3090 (endaddr + pmap_nesting_size_min - 1) &
3091 ~(pmap_nesting_size_min - 1);
3092 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3093 }
3094 #endif /* NO_NESTED_PMAP */
3095 if (endaddr < entry->vme_end) {
3096 if (entry->object.vm_object &&
3097 !entry->is_sub_map &&
3098 entry->object.vm_object->phys_contiguous) {
3099 pmap_remove(map->pmap,
3100 (addr64_t)(entry->vme_start),
3101 (addr64_t)(entry->vme_end));
3102 }
3103 _vm_map_clip_end(&map->hdr, entry, endaddr);
3104 vm_map_store_update_first_free(map, map->first_free);
3105 }
3106 }
3107
3108
3109 #define vm_map_copy_clip_end(copy, entry, endaddr) \
3110 MACRO_BEGIN \
3111 if ((endaddr) < (entry)->vme_end) \
3112 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3113 MACRO_END
3114
3115 /*
3116 * This routine is called only when it is known that
3117 * the entry must be split.
3118 */
3119 static void
3120 _vm_map_clip_end(
3121 register struct vm_map_header *map_header,
3122 register vm_map_entry_t entry,
3123 register vm_map_offset_t end)
3124 {
3125 register vm_map_entry_t new_entry;
3126
3127 /*
3128 * Create a new entry and insert it
3129 * AFTER the specified entry
3130 */
3131
3132 new_entry = _vm_map_entry_create(map_header);
3133 vm_map_entry_copy_full(new_entry, entry);
3134
3135 assert(entry->vme_start < end);
3136 new_entry->vme_start = entry->vme_end = end;
3137 new_entry->offset += (end - entry->vme_start);
3138 assert(new_entry->vme_start < new_entry->vme_end);
3139
3140 _vm_map_store_entry_link(map_header, entry, new_entry);
3141
3142 if (entry->is_sub_map)
3143 vm_map_reference(new_entry->object.sub_map);
3144 else
3145 vm_object_reference(new_entry->object.vm_object);
3146 }
3147
3148
3149 /*
3150 * VM_MAP_RANGE_CHECK: [ internal use only ]
3151 *
3152 * Asserts that the starting and ending region
3153 * addresses fall within the valid range of the map.
3154 */
3155 #define VM_MAP_RANGE_CHECK(map, start, end) \
3156 MACRO_BEGIN \
3157 if (start < vm_map_min(map)) \
3158 start = vm_map_min(map); \
3159 if (end > vm_map_max(map)) \
3160 end = vm_map_max(map); \
3161 if (start > end) \
3162 start = end; \
3163 MACRO_END
3164
3165 /*
3166 * vm_map_range_check: [ internal use only ]
3167 *
3168 * Check that the region defined by the specified start and
3169 * end addresses are wholly contained within a single map
3170 * entry or set of adjacent map entries of the spacified map,
3171 * i.e. the specified region contains no unmapped space.
3172 * If any or all of the region is unmapped, FALSE is returned.
3173 * Otherwise, TRUE is returned and if the output argument 'entry'
3174 * is not NULL it points to the map entry containing the start
3175 * of the region.
3176 *
3177 * The map is locked for reading on entry and is left locked.
3178 */
3179 static boolean_t
3180 vm_map_range_check(
3181 register vm_map_t map,
3182 register vm_map_offset_t start,
3183 register vm_map_offset_t end,
3184 vm_map_entry_t *entry)
3185 {
3186 vm_map_entry_t cur;
3187 register vm_map_offset_t prev;
3188
3189 /*
3190 * Basic sanity checks first
3191 */
3192 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3193 return (FALSE);
3194
3195 /*
3196 * Check first if the region starts within a valid
3197 * mapping for the map.
3198 */
3199 if (!vm_map_lookup_entry(map, start, &cur))
3200 return (FALSE);
3201
3202 /*
3203 * Optimize for the case that the region is contained
3204 * in a single map entry.
3205 */
3206 if (entry != (vm_map_entry_t *) NULL)
3207 *entry = cur;
3208 if (end <= cur->vme_end)
3209 return (TRUE);
3210
3211 /*
3212 * If the region is not wholly contained within a
3213 * single entry, walk the entries looking for holes.
3214 */
3215 prev = cur->vme_end;
3216 cur = cur->vme_next;
3217 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3218 if (end <= cur->vme_end)
3219 return (TRUE);
3220 prev = cur->vme_end;
3221 cur = cur->vme_next;
3222 }
3223 return (FALSE);
3224 }
3225
3226 /*
3227 * vm_map_submap: [ kernel use only ]
3228 *
3229 * Mark the given range as handled by a subordinate map.
3230 *
3231 * This range must have been created with vm_map_find using
3232 * the vm_submap_object, and no other operations may have been
3233 * performed on this range prior to calling vm_map_submap.
3234 *
3235 * Only a limited number of operations can be performed
3236 * within this rage after calling vm_map_submap:
3237 * vm_fault
3238 * [Don't try vm_map_copyin!]
3239 *
3240 * To remove a submapping, one must first remove the
3241 * range from the superior map, and then destroy the
3242 * submap (if desired). [Better yet, don't try it.]
3243 */
3244 kern_return_t
3245 vm_map_submap(
3246 vm_map_t map,
3247 vm_map_offset_t start,
3248 vm_map_offset_t end,
3249 vm_map_t submap,
3250 vm_map_offset_t offset,
3251 #ifdef NO_NESTED_PMAP
3252 __unused
3253 #endif /* NO_NESTED_PMAP */
3254 boolean_t use_pmap)
3255 {
3256 vm_map_entry_t entry;
3257 register kern_return_t result = KERN_INVALID_ARGUMENT;
3258 register vm_object_t object;
3259
3260 vm_map_lock(map);
3261
3262 if (! vm_map_lookup_entry(map, start, &entry)) {
3263 entry = entry->vme_next;
3264 }
3265
3266 if (entry == vm_map_to_entry(map) ||
3267 entry->is_sub_map) {
3268 vm_map_unlock(map);
3269 return KERN_INVALID_ARGUMENT;
3270 }
3271
3272 assert(!entry->use_pmap); /* we don't want to unnest anything here */
3273 vm_map_clip_start(map, entry, start);
3274 vm_map_clip_end(map, entry, end);
3275
3276 if ((entry->vme_start == start) && (entry->vme_end == end) &&
3277 (!entry->is_sub_map) &&
3278 ((object = entry->object.vm_object) == vm_submap_object) &&
3279 (object->resident_page_count == 0) &&
3280 (object->copy == VM_OBJECT_NULL) &&
3281 (object->shadow == VM_OBJECT_NULL) &&
3282 (!object->pager_created)) {
3283 entry->offset = (vm_object_offset_t)offset;
3284 entry->object.vm_object = VM_OBJECT_NULL;
3285 vm_object_deallocate(object);
3286 entry->is_sub_map = TRUE;
3287 entry->object.sub_map = submap;
3288 vm_map_reference(submap);
3289 submap->mapped = TRUE;
3290
3291 #ifndef NO_NESTED_PMAP
3292 if (use_pmap) {
3293 /* nest if platform code will allow */
3294 if(submap->pmap == NULL) {
3295 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
3296 if(submap->pmap == PMAP_NULL) {
3297 vm_map_unlock(map);
3298 return(KERN_NO_SPACE);
3299 }
3300 }
3301 result = pmap_nest(map->pmap,
3302 (entry->object.sub_map)->pmap,
3303 (addr64_t)start,
3304 (addr64_t)start,
3305 (uint64_t)(end - start));
3306 if(result)
3307 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3308 entry->use_pmap = TRUE;
3309 }
3310 #else /* NO_NESTED_PMAP */
3311 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3312 #endif /* NO_NESTED_PMAP */
3313 result = KERN_SUCCESS;
3314 }
3315 vm_map_unlock(map);
3316
3317 return(result);
3318 }
3319
3320 /*
3321 * vm_map_protect:
3322 *
3323 * Sets the protection of the specified address
3324 * region in the target map. If "set_max" is
3325 * specified, the maximum protection is to be set;
3326 * otherwise, only the current protection is affected.
3327 */
3328 kern_return_t
3329 vm_map_protect(
3330 register vm_map_t map,
3331 register vm_map_offset_t start,
3332 register vm_map_offset_t end,
3333 register vm_prot_t new_prot,
3334 register boolean_t set_max)
3335 {
3336 register vm_map_entry_t current;
3337 register vm_map_offset_t prev;
3338 vm_map_entry_t entry;
3339 vm_prot_t new_max;
3340
3341 XPR(XPR_VM_MAP,
3342 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3343 map, start, end, new_prot, set_max);
3344
3345 vm_map_lock(map);
3346
3347 /* LP64todo - remove this check when vm_map_commpage64()
3348 * no longer has to stuff in a map_entry for the commpage
3349 * above the map's max_offset.
3350 */
3351 if (start >= map->max_offset) {
3352 vm_map_unlock(map);
3353 return(KERN_INVALID_ADDRESS);
3354 }
3355
3356 while(1) {
3357 /*
3358 * Lookup the entry. If it doesn't start in a valid
3359 * entry, return an error.
3360 */
3361 if (! vm_map_lookup_entry(map, start, &entry)) {
3362 vm_map_unlock(map);
3363 return(KERN_INVALID_ADDRESS);
3364 }
3365
3366 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
3367 start = SUPERPAGE_ROUND_DOWN(start);
3368 continue;
3369 }
3370 break;
3371 }
3372 if (entry->superpage_size)
3373 end = SUPERPAGE_ROUND_UP(end);
3374
3375 /*
3376 * Make a first pass to check for protection and address
3377 * violations.
3378 */
3379
3380 current = entry;
3381 prev = current->vme_start;
3382 while ((current != vm_map_to_entry(map)) &&
3383 (current->vme_start < end)) {
3384
3385 /*
3386 * If there is a hole, return an error.
3387 */
3388 if (current->vme_start != prev) {
3389 vm_map_unlock(map);
3390 return(KERN_INVALID_ADDRESS);
3391 }
3392
3393 new_max = current->max_protection;
3394 if(new_prot & VM_PROT_COPY) {
3395 new_max |= VM_PROT_WRITE;
3396 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3397 vm_map_unlock(map);
3398 return(KERN_PROTECTION_FAILURE);
3399 }
3400 } else {
3401 if ((new_prot & new_max) != new_prot) {
3402 vm_map_unlock(map);
3403 return(KERN_PROTECTION_FAILURE);
3404 }
3405 }
3406
3407 #if CONFIG_EMBEDDED
3408 if (new_prot & VM_PROT_WRITE) {
3409 if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) {
3410 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
3411 new_prot &= ~VM_PROT_EXECUTE;
3412 }
3413 }
3414 #endif
3415
3416 prev = current->vme_end;
3417 current = current->vme_next;
3418 }
3419 if (end > prev) {
3420 vm_map_unlock(map);
3421 return(KERN_INVALID_ADDRESS);
3422 }
3423
3424 /*
3425 * Go back and fix up protections.
3426 * Clip to start here if the range starts within
3427 * the entry.
3428 */
3429
3430 current = entry;
3431 if (current != vm_map_to_entry(map)) {
3432 /* clip and unnest if necessary */
3433 vm_map_clip_start(map, current, start);
3434 }
3435
3436 while ((current != vm_map_to_entry(map)) &&
3437 (current->vme_start < end)) {
3438
3439 vm_prot_t old_prot;
3440
3441 vm_map_clip_end(map, current, end);
3442
3443 assert(!current->use_pmap); /* clipping did unnest if needed */
3444
3445 old_prot = current->protection;
3446
3447 if(new_prot & VM_PROT_COPY) {
3448 /* caller is asking specifically to copy the */
3449 /* mapped data, this implies that max protection */
3450 /* will include write. Caller must be prepared */
3451 /* for loss of shared memory communication in the */
3452 /* target area after taking this step */
3453
3454 if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
3455 current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
3456 current->offset = 0;
3457 }
3458 current->needs_copy = TRUE;
3459 current->max_protection |= VM_PROT_WRITE;
3460 }
3461
3462 if (set_max)
3463 current->protection =
3464 (current->max_protection =
3465 new_prot & ~VM_PROT_COPY) &
3466 old_prot;
3467 else
3468 current->protection = new_prot & ~VM_PROT_COPY;
3469
3470 /*
3471 * Update physical map if necessary.
3472 * If the request is to turn off write protection,
3473 * we won't do it for real (in pmap). This is because
3474 * it would cause copy-on-write to fail. We've already
3475 * set, the new protection in the map, so if a
3476 * write-protect fault occurred, it will be fixed up
3477 * properly, COW or not.
3478 */
3479 if (current->protection != old_prot) {
3480 /* Look one level in we support nested pmaps */
3481 /* from mapped submaps which are direct entries */
3482 /* in our map */
3483
3484 vm_prot_t prot;
3485
3486 prot = current->protection & ~VM_PROT_WRITE;
3487
3488 if (override_nx(map, current->alias) && prot)
3489 prot |= VM_PROT_EXECUTE;
3490
3491 if (current->is_sub_map && current->use_pmap) {
3492 pmap_protect(current->object.sub_map->pmap,
3493 current->vme_start,
3494 current->vme_end,
3495 prot);
3496 } else {
3497 pmap_protect(map->pmap,
3498 current->vme_start,
3499 current->vme_end,
3500 prot);
3501 }
3502 }
3503 current = current->vme_next;
3504 }
3505
3506 current = entry;
3507 while ((current != vm_map_to_entry(map)) &&
3508 (current->vme_start <= end)) {
3509 vm_map_simplify_entry(map, current);
3510 current = current->vme_next;
3511 }
3512
3513 vm_map_unlock(map);
3514 return(KERN_SUCCESS);
3515 }
3516
3517 /*
3518 * vm_map_inherit:
3519 *
3520 * Sets the inheritance of the specified address
3521 * range in the target map. Inheritance
3522 * affects how the map will be shared with
3523 * child maps at the time of vm_map_fork.
3524 */
3525 kern_return_t
3526 vm_map_inherit(
3527 register vm_map_t map,
3528 register vm_map_offset_t start,
3529 register vm_map_offset_t end,
3530 register vm_inherit_t new_inheritance)
3531 {
3532 register vm_map_entry_t entry;
3533 vm_map_entry_t temp_entry;
3534
3535 vm_map_lock(map);
3536
3537 VM_MAP_RANGE_CHECK(map, start, end);
3538
3539 if (vm_map_lookup_entry(map, start, &temp_entry)) {
3540 entry = temp_entry;
3541 }
3542 else {
3543 temp_entry = temp_entry->vme_next;
3544 entry = temp_entry;
3545 }
3546
3547 /* first check entire range for submaps which can't support the */
3548 /* given inheritance. */
3549 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3550 if(entry->is_sub_map) {
3551 if(new_inheritance == VM_INHERIT_COPY) {
3552 vm_map_unlock(map);
3553 return(KERN_INVALID_ARGUMENT);
3554 }
3555 }
3556
3557 entry = entry->vme_next;
3558 }
3559
3560 entry = temp_entry;
3561 if (entry != vm_map_to_entry(map)) {
3562 /* clip and unnest if necessary */
3563 vm_map_clip_start(map, entry, start);
3564 }
3565
3566 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3567 vm_map_clip_end(map, entry, end);
3568 assert(!entry->use_pmap); /* clip did unnest if needed */
3569
3570 entry->inheritance = new_inheritance;
3571
3572 entry = entry->vme_next;
3573 }
3574
3575 vm_map_unlock(map);
3576 return(KERN_SUCCESS);
3577 }
3578
3579 /*
3580 * Update the accounting for the amount of wired memory in this map. If the user has
3581 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
3582 */
3583
3584 static kern_return_t
3585 add_wire_counts(
3586 vm_map_t map,
3587 vm_map_entry_t entry,
3588 boolean_t user_wire)
3589 {
3590 vm_map_size_t size;
3591
3592 if (user_wire) {
3593 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
3594
3595 /*
3596 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
3597 * this map entry.
3598 */
3599
3600 if (entry->user_wired_count == 0) {
3601 size = entry->vme_end - entry->vme_start;
3602
3603 /*
3604 * Since this is the first time the user is wiring this map entry, check to see if we're
3605 * exceeding the user wire limits. There is a per map limit which is the smaller of either
3606 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
3607 * a system-wide limit on the amount of memory all users can wire. If the user is over either
3608 * limit, then we fail.
3609 */
3610
3611 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
3612 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
3613 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
3614 return KERN_RESOURCE_SHORTAGE;
3615
3616 /*
3617 * The first time the user wires an entry, we also increment the wired_count and add this to
3618 * the total that has been wired in the map.
3619 */
3620
3621 if (entry->wired_count >= MAX_WIRE_COUNT)
3622 return KERN_FAILURE;
3623
3624 entry->wired_count++;
3625 map->user_wire_size += size;
3626 }
3627
3628 if (entry->user_wired_count >= MAX_WIRE_COUNT)
3629 return KERN_FAILURE;
3630
3631 entry->user_wired_count++;
3632
3633 } else {
3634
3635 /*
3636 * The kernel's wiring the memory. Just bump the count and continue.
3637 */
3638
3639 if (entry->wired_count >= MAX_WIRE_COUNT)
3640 panic("vm_map_wire: too many wirings");
3641
3642 entry->wired_count++;
3643 }
3644
3645 return KERN_SUCCESS;
3646 }
3647
3648 /*
3649 * Update the memory wiring accounting now that the given map entry is being unwired.
3650 */
3651
3652 static void
3653 subtract_wire_counts(
3654 vm_map_t map,
3655 vm_map_entry_t entry,
3656 boolean_t user_wire)
3657 {
3658
3659 if (user_wire) {
3660
3661 /*
3662 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
3663 */
3664
3665 if (entry->user_wired_count == 1) {
3666
3667 /*
3668 * We're removing the last user wire reference. Decrement the wired_count and the total
3669 * user wired memory for this map.
3670 */
3671
3672 assert(entry->wired_count >= 1);
3673 entry->wired_count--;
3674 map->user_wire_size -= entry->vme_end - entry->vme_start;
3675 }
3676
3677 assert(entry->user_wired_count >= 1);
3678 entry->user_wired_count--;
3679
3680 } else {
3681
3682 /*
3683 * The kernel is unwiring the memory. Just update the count.
3684 */
3685
3686 assert(entry->wired_count >= 1);
3687 entry->wired_count--;
3688 }
3689 }
3690
3691 /*
3692 * vm_map_wire:
3693 *
3694 * Sets the pageability of the specified address range in the
3695 * target map as wired. Regions specified as not pageable require
3696 * locked-down physical memory and physical page maps. The
3697 * access_type variable indicates types of accesses that must not
3698 * generate page faults. This is checked against protection of
3699 * memory being locked-down.
3700 *
3701 * The map must not be locked, but a reference must remain to the
3702 * map throughout the call.
3703 */
3704 static kern_return_t
3705 vm_map_wire_nested(
3706 register vm_map_t map,
3707 register vm_map_offset_t start,
3708 register vm_map_offset_t end,
3709 register vm_prot_t access_type,
3710 boolean_t user_wire,
3711 pmap_t map_pmap,
3712 vm_map_offset_t pmap_addr)
3713 {
3714 register vm_map_entry_t entry;
3715 struct vm_map_entry *first_entry, tmp_entry;
3716 vm_map_t real_map;
3717 register vm_map_offset_t s,e;
3718 kern_return_t rc;
3719 boolean_t need_wakeup;
3720 boolean_t main_map = FALSE;
3721 wait_interrupt_t interruptible_state;
3722 thread_t cur_thread;
3723 unsigned int last_timestamp;
3724 vm_map_size_t size;
3725
3726 vm_map_lock(map);
3727 if(map_pmap == NULL)
3728 main_map = TRUE;
3729 last_timestamp = map->timestamp;
3730
3731 VM_MAP_RANGE_CHECK(map, start, end);
3732 assert(page_aligned(start));
3733 assert(page_aligned(end));
3734 if (start == end) {
3735 /* We wired what the caller asked for, zero pages */
3736 vm_map_unlock(map);
3737 return KERN_SUCCESS;
3738 }
3739
3740 need_wakeup = FALSE;
3741 cur_thread = current_thread();
3742
3743 s = start;
3744 rc = KERN_SUCCESS;
3745
3746 if (vm_map_lookup_entry(map, s, &first_entry)) {
3747 entry = first_entry;
3748 /*
3749 * vm_map_clip_start will be done later.
3750 * We don't want to unnest any nested submaps here !
3751 */
3752 } else {
3753 /* Start address is not in map */
3754 rc = KERN_INVALID_ADDRESS;
3755 goto done;
3756 }
3757
3758 while ((entry != vm_map_to_entry(map)) && (s < end)) {
3759 /*
3760 * At this point, we have wired from "start" to "s".
3761 * We still need to wire from "s" to "end".
3762 *
3763 * "entry" hasn't been clipped, so it could start before "s"
3764 * and/or end after "end".
3765 */
3766
3767 /* "e" is how far we want to wire in this entry */
3768 e = entry->vme_end;
3769 if (e > end)
3770 e = end;
3771
3772 /*
3773 * If another thread is wiring/unwiring this entry then
3774 * block after informing other thread to wake us up.
3775 */
3776 if (entry->in_transition) {
3777 wait_result_t wait_result;
3778
3779 /*
3780 * We have not clipped the entry. Make sure that
3781 * the start address is in range so that the lookup
3782 * below will succeed.
3783 * "s" is the current starting point: we've already
3784 * wired from "start" to "s" and we still have
3785 * to wire from "s" to "end".
3786 */
3787
3788 entry->needs_wakeup = TRUE;
3789
3790 /*
3791 * wake up anybody waiting on entries that we have
3792 * already wired.
3793 */
3794 if (need_wakeup) {
3795 vm_map_entry_wakeup(map);
3796 need_wakeup = FALSE;
3797 }
3798 /*
3799 * User wiring is interruptible
3800 */
3801 wait_result = vm_map_entry_wait(map,
3802 (user_wire) ? THREAD_ABORTSAFE :
3803 THREAD_UNINT);
3804 if (user_wire && wait_result == THREAD_INTERRUPTED) {
3805 /*
3806 * undo the wirings we have done so far
3807 * We do not clear the needs_wakeup flag,
3808 * because we cannot tell if we were the
3809 * only one waiting.
3810 */
3811 rc = KERN_FAILURE;
3812 goto done;
3813 }
3814
3815 /*
3816 * Cannot avoid a lookup here. reset timestamp.
3817 */
3818 last_timestamp = map->timestamp;
3819
3820 /*
3821 * The entry could have been clipped, look it up again.
3822 * Worse that can happen is, it may not exist anymore.
3823 */
3824 if (!vm_map_lookup_entry(map, s, &first_entry)) {
3825 /*
3826 * User: undo everything upto the previous
3827 * entry. let vm_map_unwire worry about
3828 * checking the validity of the range.
3829 */
3830 rc = KERN_FAILURE;
3831 goto done;
3832 }
3833 entry = first_entry;
3834 continue;
3835 }
3836
3837 if (entry->is_sub_map) {
3838 vm_map_offset_t sub_start;
3839 vm_map_offset_t sub_end;
3840 vm_map_offset_t local_start;
3841 vm_map_offset_t local_end;
3842 pmap_t pmap;
3843
3844 vm_map_clip_start(map, entry, s);
3845 vm_map_clip_end(map, entry, end);
3846
3847 sub_start = entry->offset;
3848 sub_end = entry->vme_end;
3849 sub_end += entry->offset - entry->vme_start;
3850
3851 local_end = entry->vme_end;
3852 if(map_pmap == NULL) {
3853 vm_object_t object;
3854 vm_object_offset_t offset;
3855 vm_prot_t prot;
3856 boolean_t wired;
3857 vm_map_entry_t local_entry;
3858 vm_map_version_t version;
3859 vm_map_t lookup_map;
3860
3861 if(entry->use_pmap) {
3862 pmap = entry->object.sub_map->pmap;
3863 /* ppc implementation requires that */
3864 /* submaps pmap address ranges line */
3865 /* up with parent map */
3866 #ifdef notdef
3867 pmap_addr = sub_start;
3868 #endif
3869 pmap_addr = s;
3870 } else {
3871 pmap = map->pmap;
3872 pmap_addr = s;
3873 }
3874
3875 if (entry->wired_count) {
3876 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3877 goto done;
3878
3879 /*
3880 * The map was not unlocked:
3881 * no need to goto re-lookup.
3882 * Just go directly to next entry.
3883 */
3884 entry = entry->vme_next;
3885 s = entry->vme_start;
3886 continue;
3887
3888 }
3889
3890 /* call vm_map_lookup_locked to */
3891 /* cause any needs copy to be */
3892 /* evaluated */
3893 local_start = entry->vme_start;
3894 lookup_map = map;
3895 vm_map_lock_write_to_read(map);
3896 if(vm_map_lookup_locked(
3897 &lookup_map, local_start,
3898 access_type,
3899 OBJECT_LOCK_EXCLUSIVE,
3900 &version, &object,
3901 &offset, &prot, &wired,
3902 NULL,
3903 &real_map)) {
3904
3905 vm_map_unlock_read(lookup_map);
3906 vm_map_unwire(map, start,
3907 s, user_wire);
3908 return(KERN_FAILURE);
3909 }
3910 if(real_map != lookup_map)
3911 vm_map_unlock(real_map);
3912 vm_map_unlock_read(lookup_map);
3913 vm_map_lock(map);
3914 vm_object_unlock(object);
3915
3916 /* we unlocked, so must re-lookup */
3917 if (!vm_map_lookup_entry(map,
3918 local_start,
3919 &local_entry)) {
3920 rc = KERN_FAILURE;
3921 goto done;
3922 }
3923
3924 /*
3925 * entry could have been "simplified",
3926 * so re-clip
3927 */
3928 entry = local_entry;
3929 assert(s == local_start);
3930 vm_map_clip_start(map, entry, s);
3931 vm_map_clip_end(map, entry, end);
3932 /* re-compute "e" */
3933 e = entry->vme_end;
3934 if (e > end)
3935 e = end;
3936
3937 /* did we have a change of type? */
3938 if (!entry->is_sub_map) {
3939 last_timestamp = map->timestamp;
3940 continue;
3941 }
3942 } else {
3943 local_start = entry->vme_start;
3944 pmap = map_pmap;
3945 }
3946
3947 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3948 goto done;
3949
3950 entry->in_transition = TRUE;
3951
3952 vm_map_unlock(map);
3953 rc = vm_map_wire_nested(entry->object.sub_map,
3954 sub_start, sub_end,
3955 access_type,
3956 user_wire, pmap, pmap_addr);
3957 vm_map_lock(map);
3958
3959 /*
3960 * Find the entry again. It could have been clipped
3961 * after we unlocked the map.
3962 */
3963 if (!vm_map_lookup_entry(map, local_start,
3964 &first_entry))
3965 panic("vm_map_wire: re-lookup failed");
3966 entry = first_entry;
3967
3968 assert(local_start == s);
3969 /* re-compute "e" */
3970 e = entry->vme_end;
3971 if (e > end)
3972 e = end;
3973
3974 last_timestamp = map->timestamp;
3975 while ((entry != vm_map_to_entry(map)) &&
3976 (entry->vme_start < e)) {
3977 assert(entry->in_transition);
3978 entry->in_transition = FALSE;
3979 if (entry->needs_wakeup) {
3980 entry->needs_wakeup = FALSE;
3981 need_wakeup = TRUE;
3982 }
3983 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
3984 subtract_wire_counts(map, entry, user_wire);
3985 }
3986 entry = entry->vme_next;
3987 }
3988 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
3989 goto done;
3990 }
3991
3992 /* no need to relookup again */
3993 s = entry->vme_start;
3994 continue;
3995 }
3996
3997 /*
3998 * If this entry is already wired then increment
3999 * the appropriate wire reference count.
4000 */
4001 if (entry->wired_count) {
4002 /*
4003 * entry is already wired down, get our reference
4004 * after clipping to our range.
4005 */
4006 vm_map_clip_start(map, entry, s);
4007 vm_map_clip_end(map, entry, end);
4008
4009 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4010 goto done;
4011
4012 /* map was not unlocked: no need to relookup */
4013 entry = entry->vme_next;
4014 s = entry->vme_start;
4015 continue;
4016 }
4017
4018 /*
4019 * Unwired entry or wire request transmitted via submap
4020 */
4021
4022
4023 /*
4024 * Perform actions of vm_map_lookup that need the write
4025 * lock on the map: create a shadow object for a
4026 * copy-on-write region, or an object for a zero-fill
4027 * region.
4028 */
4029 size = entry->vme_end - entry->vme_start;
4030 /*
4031 * If wiring a copy-on-write page, we need to copy it now
4032 * even if we're only (currently) requesting read access.
4033 * This is aggressive, but once it's wired we can't move it.
4034 */
4035 if (entry->needs_copy) {
4036 vm_object_shadow(&entry->object.vm_object,
4037 &entry->offset, size);
4038 entry->needs_copy = FALSE;
4039 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
4040 entry->object.vm_object = vm_object_allocate(size);
4041 entry->offset = (vm_object_offset_t)0;
4042 }
4043
4044 vm_map_clip_start(map, entry, s);
4045 vm_map_clip_end(map, entry, end);
4046
4047 /* re-compute "e" */
4048 e = entry->vme_end;
4049 if (e > end)
4050 e = end;
4051
4052 /*
4053 * Check for holes and protection mismatch.
4054 * Holes: Next entry should be contiguous unless this
4055 * is the end of the region.
4056 * Protection: Access requested must be allowed, unless
4057 * wiring is by protection class
4058 */
4059 if ((entry->vme_end < end) &&
4060 ((entry->vme_next == vm_map_to_entry(map)) ||
4061 (entry->vme_next->vme_start > entry->vme_end))) {
4062 /* found a hole */
4063 rc = KERN_INVALID_ADDRESS;
4064 goto done;
4065 }
4066 if ((entry->protection & access_type) != access_type) {
4067 /* found a protection problem */
4068 rc = KERN_PROTECTION_FAILURE;
4069 goto done;
4070 }
4071
4072 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4073
4074 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4075 goto done;
4076
4077 entry->in_transition = TRUE;
4078
4079 /*
4080 * This entry might get split once we unlock the map.
4081 * In vm_fault_wire(), we need the current range as
4082 * defined by this entry. In order for this to work
4083 * along with a simultaneous clip operation, we make a
4084 * temporary copy of this entry and use that for the
4085 * wiring. Note that the underlying objects do not
4086 * change during a clip.
4087 */
4088 tmp_entry = *entry;
4089
4090 /*
4091 * The in_transition state guarentees that the entry
4092 * (or entries for this range, if split occured) will be
4093 * there when the map lock is acquired for the second time.
4094 */
4095 vm_map_unlock(map);
4096
4097 if (!user_wire && cur_thread != THREAD_NULL)
4098 interruptible_state = thread_interrupt_level(THREAD_UNINT);
4099 else
4100 interruptible_state = THREAD_UNINT;
4101
4102 if(map_pmap)
4103 rc = vm_fault_wire(map,
4104 &tmp_entry, map_pmap, pmap_addr);
4105 else
4106 rc = vm_fault_wire(map,
4107 &tmp_entry, map->pmap,
4108 tmp_entry.vme_start);
4109
4110 if (!user_wire && cur_thread != THREAD_NULL)
4111 thread_interrupt_level(interruptible_state);
4112
4113 vm_map_lock(map);
4114
4115 if (last_timestamp+1 != map->timestamp) {
4116 /*
4117 * Find the entry again. It could have been clipped
4118 * after we unlocked the map.
4119 */
4120 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4121 &first_entry))
4122 panic("vm_map_wire: re-lookup failed");
4123
4124 entry = first_entry;
4125 }
4126
4127 last_timestamp = map->timestamp;
4128
4129 while ((entry != vm_map_to_entry(map)) &&
4130 (entry->vme_start < tmp_entry.vme_end)) {
4131 assert(entry->in_transition);
4132 entry->in_transition = FALSE;
4133 if (entry->needs_wakeup) {
4134 entry->needs_wakeup = FALSE;
4135 need_wakeup = TRUE;
4136 }
4137 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4138 subtract_wire_counts(map, entry, user_wire);
4139 }
4140 entry = entry->vme_next;
4141 }
4142
4143 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4144 goto done;
4145 }
4146
4147 s = entry->vme_start;
4148 } /* end while loop through map entries */
4149
4150 done:
4151 if (rc == KERN_SUCCESS) {
4152 /* repair any damage we may have made to the VM map */
4153 vm_map_simplify_range(map, start, end);
4154 }
4155
4156 vm_map_unlock(map);
4157
4158 /*
4159 * wake up anybody waiting on entries we wired.
4160 */
4161 if (need_wakeup)
4162 vm_map_entry_wakeup(map);
4163
4164 if (rc != KERN_SUCCESS) {
4165 /* undo what has been wired so far */
4166 vm_map_unwire(map, start, s, user_wire);
4167 }
4168
4169 return rc;
4170
4171 }
4172
4173 kern_return_t
4174 vm_map_wire(
4175 register vm_map_t map,
4176 register vm_map_offset_t start,
4177 register vm_map_offset_t end,
4178 register vm_prot_t access_type,
4179 boolean_t user_wire)
4180 {
4181
4182 kern_return_t kret;
4183
4184 kret = vm_map_wire_nested(map, start, end, access_type,
4185 user_wire, (pmap_t)NULL, 0);
4186 return kret;
4187 }
4188
4189 /*
4190 * vm_map_unwire:
4191 *
4192 * Sets the pageability of the specified address range in the target
4193 * as pageable. Regions specified must have been wired previously.
4194 *
4195 * The map must not be locked, but a reference must remain to the map
4196 * throughout the call.
4197 *
4198 * Kernel will panic on failures. User unwire ignores holes and
4199 * unwired and intransition entries to avoid losing memory by leaving
4200 * it unwired.
4201 */
4202 static kern_return_t
4203 vm_map_unwire_nested(
4204 register vm_map_t map,
4205 register vm_map_offset_t start,
4206 register vm_map_offset_t end,
4207 boolean_t user_wire,
4208 pmap_t map_pmap,
4209 vm_map_offset_t pmap_addr)
4210 {
4211 register vm_map_entry_t entry;
4212 struct vm_map_entry *first_entry, tmp_entry;
4213 boolean_t need_wakeup;
4214 boolean_t main_map = FALSE;
4215 unsigned int last_timestamp;
4216
4217 vm_map_lock(map);
4218 if(map_pmap == NULL)
4219 main_map = TRUE;
4220 last_timestamp = map->timestamp;
4221
4222 VM_MAP_RANGE_CHECK(map, start, end);
4223 assert(page_aligned(start));
4224 assert(page_aligned(end));
4225
4226 if (start == end) {
4227 /* We unwired what the caller asked for: zero pages */
4228 vm_map_unlock(map);
4229 return KERN_SUCCESS;
4230 }
4231
4232 if (vm_map_lookup_entry(map, start, &first_entry)) {
4233 entry = first_entry;
4234 /*
4235 * vm_map_clip_start will be done later.
4236 * We don't want to unnest any nested sub maps here !
4237 */
4238 }
4239 else {
4240 if (!user_wire) {
4241 panic("vm_map_unwire: start not found");
4242 }
4243 /* Start address is not in map. */
4244 vm_map_unlock(map);
4245 return(KERN_INVALID_ADDRESS);
4246 }
4247
4248 if (entry->superpage_size) {
4249 /* superpages are always wired */
4250 vm_map_unlock(map);
4251 return KERN_INVALID_ADDRESS;
4252 }
4253
4254 need_wakeup = FALSE;
4255 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4256 if (entry->in_transition) {
4257 /*
4258 * 1)
4259 * Another thread is wiring down this entry. Note
4260 * that if it is not for the other thread we would
4261 * be unwiring an unwired entry. This is not
4262 * permitted. If we wait, we will be unwiring memory
4263 * we did not wire.
4264 *
4265 * 2)
4266 * Another thread is unwiring this entry. We did not
4267 * have a reference to it, because if we did, this
4268 * entry will not be getting unwired now.
4269 */
4270 if (!user_wire) {
4271 /*
4272 * XXX FBDP
4273 * This could happen: there could be some
4274 * overlapping vslock/vsunlock operations
4275 * going on.
4276 * We should probably just wait and retry,
4277 * but then we have to be careful that this
4278 * entry could get "simplified" after
4279 * "in_transition" gets unset and before
4280 * we re-lookup the entry, so we would
4281 * have to re-clip the entry to avoid
4282 * re-unwiring what we have already unwired...
4283 * See vm_map_wire_nested().
4284 *
4285 * Or we could just ignore "in_transition"
4286 * here and proceed to decement the wired
4287 * count(s) on this entry. That should be fine
4288 * as long as "wired_count" doesn't drop all
4289 * the way to 0 (and we should panic if THAT
4290 * happens).
4291 */
4292 panic("vm_map_unwire: in_transition entry");
4293 }
4294
4295 entry = entry->vme_next;
4296 continue;
4297 }
4298
4299 if (entry->is_sub_map) {
4300 vm_map_offset_t sub_start;
4301 vm_map_offset_t sub_end;
4302 vm_map_offset_t local_end;
4303 pmap_t pmap;
4304
4305 vm_map_clip_start(map, entry, start);
4306 vm_map_clip_end(map, entry, end);
4307
4308 sub_start = entry->offset;
4309 sub_end = entry->vme_end - entry->vme_start;
4310 sub_end += entry->offset;
4311 local_end = entry->vme_end;
4312 if(map_pmap == NULL) {
4313 if(entry->use_pmap) {
4314 pmap = entry->object.sub_map->pmap;
4315 pmap_addr = sub_start;
4316 } else {
4317 pmap = map->pmap;
4318 pmap_addr = start;
4319 }
4320 if (entry->wired_count == 0 ||
4321 (user_wire && entry->user_wired_count == 0)) {
4322 if (!user_wire)
4323 panic("vm_map_unwire: entry is unwired");
4324 entry = entry->vme_next;
4325 continue;
4326 }
4327
4328 /*
4329 * Check for holes
4330 * Holes: Next entry should be contiguous unless
4331 * this is the end of the region.
4332 */
4333 if (((entry->vme_end < end) &&
4334 ((entry->vme_next == vm_map_to_entry(map)) ||
4335 (entry->vme_next->vme_start
4336 > entry->vme_end)))) {
4337 if (!user_wire)
4338 panic("vm_map_unwire: non-contiguous region");
4339 /*
4340 entry = entry->vme_next;
4341 continue;
4342 */
4343 }
4344
4345 subtract_wire_counts(map, entry, user_wire);
4346
4347 if (entry->wired_count != 0) {
4348 entry = entry->vme_next;
4349 continue;
4350 }
4351
4352 entry->in_transition = TRUE;
4353 tmp_entry = *entry;/* see comment in vm_map_wire() */
4354
4355 /*
4356 * We can unlock the map now. The in_transition state
4357 * guarantees existance of the entry.
4358 */
4359 vm_map_unlock(map);
4360 vm_map_unwire_nested(entry->object.sub_map,
4361 sub_start, sub_end, user_wire, pmap, pmap_addr);
4362 vm_map_lock(map);
4363
4364 if (last_timestamp+1 != map->timestamp) {
4365 /*
4366 * Find the entry again. It could have been
4367 * clipped or deleted after we unlocked the map.
4368 */
4369 if (!vm_map_lookup_entry(map,
4370 tmp_entry.vme_start,
4371 &first_entry)) {
4372 if (!user_wire)
4373 panic("vm_map_unwire: re-lookup failed");
4374 entry = first_entry->vme_next;
4375 } else
4376 entry = first_entry;
4377 }
4378 last_timestamp = map->timestamp;
4379
4380 /*
4381 * clear transition bit for all constituent entries
4382 * that were in the original entry (saved in
4383 * tmp_entry). Also check for waiters.
4384 */
4385 while ((entry != vm_map_to_entry(map)) &&
4386 (entry->vme_start < tmp_entry.vme_end)) {
4387 assert(entry->in_transition);
4388 entry->in_transition = FALSE;
4389 if (entry->needs_wakeup) {
4390 entry->needs_wakeup = FALSE;
4391 need_wakeup = TRUE;
4392 }
4393 entry = entry->vme_next;
4394 }
4395 continue;
4396 } else {
4397 vm_map_unlock(map);
4398 vm_map_unwire_nested(entry->object.sub_map,
4399 sub_start, sub_end, user_wire, map_pmap,
4400 pmap_addr);
4401 vm_map_lock(map);
4402
4403 if (last_timestamp+1 != map->timestamp) {
4404 /*
4405 * Find the entry again. It could have been
4406 * clipped or deleted after we unlocked the map.
4407 */
4408 if (!vm_map_lookup_entry(map,
4409 tmp_entry.vme_start,
4410 &first_entry)) {
4411 if (!user_wire)
4412 panic("vm_map_unwire: re-lookup failed");
4413 entry = first_entry->vme_next;
4414 } else
4415 entry = first_entry;
4416 }
4417 last_timestamp = map->timestamp;
4418 }
4419 }
4420
4421
4422 if ((entry->wired_count == 0) ||
4423 (user_wire && entry->user_wired_count == 0)) {
4424 if (!user_wire)
4425 panic("vm_map_unwire: entry is unwired");
4426
4427 entry = entry->vme_next;
4428 continue;
4429 }
4430
4431 assert(entry->wired_count > 0 &&
4432 (!user_wire || entry->user_wired_count > 0));
4433
4434 vm_map_clip_start(map, entry, start);
4435 vm_map_clip_end(map, entry, end);
4436
4437 /*
4438 * Check for holes
4439 * Holes: Next entry should be contiguous unless
4440 * this is the end of the region.
4441 */
4442 if (((entry->vme_end < end) &&
4443 ((entry->vme_next == vm_map_to_entry(map)) ||
4444 (entry->vme_next->vme_start > entry->vme_end)))) {
4445
4446 if (!user_wire)
4447 panic("vm_map_unwire: non-contiguous region");
4448 entry = entry->vme_next;
4449 continue;
4450 }
4451
4452 subtract_wire_counts(map, entry, user_wire);
4453
4454 if (entry->wired_count != 0) {
4455 entry = entry->vme_next;
4456 continue;
4457 }
4458
4459 if(entry->zero_wired_pages) {
4460 entry->zero_wired_pages = FALSE;
4461 }
4462
4463 entry->in_transition = TRUE;
4464 tmp_entry = *entry; /* see comment in vm_map_wire() */
4465
4466 /*
4467 * We can unlock the map now. The in_transition state
4468 * guarantees existance of the entry.
4469 */
4470 vm_map_unlock(map);
4471 if(map_pmap) {
4472 vm_fault_unwire(map,
4473 &tmp_entry, FALSE, map_pmap, pmap_addr);
4474 } else {
4475 vm_fault_unwire(map,
4476 &tmp_entry, FALSE, map->pmap,
4477 tmp_entry.vme_start);
4478 }
4479 vm_map_lock(map);
4480
4481 if (last_timestamp+1 != map->timestamp) {
4482 /*
4483 * Find the entry again. It could have been clipped
4484 * or deleted after we unlocked the map.
4485 */
4486 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4487 &first_entry)) {
4488 if (!user_wire)
4489 panic("vm_map_unwire: re-lookup failed");
4490 entry = first_entry->vme_next;
4491 } else
4492 entry = first_entry;
4493 }
4494 last_timestamp = map->timestamp;
4495
4496 /*
4497 * clear transition bit for all constituent entries that
4498 * were in the original entry (saved in tmp_entry). Also
4499 * check for waiters.
4500 */
4501 while ((entry != vm_map_to_entry(map)) &&
4502 (entry->vme_start < tmp_entry.vme_end)) {
4503 assert(entry->in_transition);
4504 entry->in_transition = FALSE;
4505 if (entry->needs_wakeup) {
4506 entry->needs_wakeup = FALSE;
4507 need_wakeup = TRUE;
4508 }
4509 entry = entry->vme_next;
4510 }
4511 }
4512
4513 /*
4514 * We might have fragmented the address space when we wired this
4515 * range of addresses. Attempt to re-coalesce these VM map entries
4516 * with their neighbors now that they're no longer wired.
4517 * Under some circumstances, address space fragmentation can
4518 * prevent VM object shadow chain collapsing, which can cause
4519 * swap space leaks.
4520 */
4521 vm_map_simplify_range(map, start, end);
4522
4523 vm_map_unlock(map);
4524 /*
4525 * wake up anybody waiting on entries that we have unwired.
4526 */
4527 if (need_wakeup)
4528 vm_map_entry_wakeup(map);
4529 return(KERN_SUCCESS);
4530
4531 }
4532
4533 kern_return_t
4534 vm_map_unwire(
4535 register vm_map_t map,
4536 register vm_map_offset_t start,
4537 register vm_map_offset_t end,
4538 boolean_t user_wire)
4539 {
4540 return vm_map_unwire_nested(map, start, end,
4541 user_wire, (pmap_t)NULL, 0);
4542 }
4543
4544
4545 /*
4546 * vm_map_entry_delete: [ internal use only ]
4547 *
4548 * Deallocate the given entry from the target map.
4549 */
4550 static void
4551 vm_map_entry_delete(
4552 register vm_map_t map,
4553 register vm_map_entry_t entry)
4554 {
4555 register vm_map_offset_t s, e;
4556 register vm_object_t object;
4557 register vm_map_t submap;
4558
4559 s = entry->vme_start;
4560 e = entry->vme_end;
4561 assert(page_aligned(s));
4562 assert(page_aligned(e));
4563 assert(entry->wired_count == 0);
4564 assert(entry->user_wired_count == 0);
4565 assert(!entry->permanent);
4566
4567 if (entry->is_sub_map) {
4568 object = NULL;
4569 submap = entry->object.sub_map;
4570 } else {
4571 submap = NULL;
4572 object = entry->object.vm_object;
4573 }
4574
4575 vm_map_store_entry_unlink(map, entry);
4576 map->size -= e - s;
4577
4578 vm_map_entry_dispose(map, entry);
4579
4580 vm_map_unlock(map);
4581 /*
4582 * Deallocate the object only after removing all
4583 * pmap entries pointing to its pages.
4584 */
4585 if (submap)
4586 vm_map_deallocate(submap);
4587 else
4588 vm_object_deallocate(object);
4589
4590 }
4591
4592 void
4593 vm_map_submap_pmap_clean(
4594 vm_map_t map,
4595 vm_map_offset_t start,
4596 vm_map_offset_t end,
4597 vm_map_t sub_map,
4598 vm_map_offset_t offset)
4599 {
4600 vm_map_offset_t submap_start;
4601 vm_map_offset_t submap_end;
4602 vm_map_size_t remove_size;
4603 vm_map_entry_t entry;
4604
4605 submap_end = offset + (end - start);
4606 submap_start = offset;
4607
4608 vm_map_lock_read(sub_map);
4609 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
4610
4611 remove_size = (entry->vme_end - entry->vme_start);
4612 if(offset > entry->vme_start)
4613 remove_size -= offset - entry->vme_start;
4614
4615
4616 if(submap_end < entry->vme_end) {
4617 remove_size -=
4618 entry->vme_end - submap_end;
4619 }
4620 if(entry->is_sub_map) {
4621 vm_map_submap_pmap_clean(
4622 sub_map,
4623 start,
4624 start + remove_size,
4625 entry->object.sub_map,
4626 entry->offset);
4627 } else {
4628
4629 if((map->mapped) && (map->ref_count)
4630 && (entry->object.vm_object != NULL)) {
4631 vm_object_pmap_protect(
4632 entry->object.vm_object,
4633 entry->offset+(offset-entry->vme_start),
4634 remove_size,
4635 PMAP_NULL,
4636 entry->vme_start,
4637 VM_PROT_NONE);
4638 } else {
4639 pmap_remove(map->pmap,
4640 (addr64_t)start,
4641 (addr64_t)(start + remove_size));
4642 }
4643 }
4644 }
4645
4646 entry = entry->vme_next;
4647
4648 while((entry != vm_map_to_entry(sub_map))
4649 && (entry->vme_start < submap_end)) {
4650 remove_size = (entry->vme_end - entry->vme_start);
4651 if(submap_end < entry->vme_end) {
4652 remove_size -= entry->vme_end - submap_end;
4653 }
4654 if(entry->is_sub_map) {
4655 vm_map_submap_pmap_clean(
4656 sub_map,
4657 (start + entry->vme_start) - offset,
4658 ((start + entry->vme_start) - offset) + remove_size,
4659 entry->object.sub_map,
4660 entry->offset);
4661 } else {
4662 if((map->mapped) && (map->ref_count)
4663 && (entry->object.vm_object != NULL)) {
4664 vm_object_pmap_protect(
4665 entry->object.vm_object,
4666 entry->offset,
4667 remove_size,
4668 PMAP_NULL,
4669 entry->vme_start,
4670 VM_PROT_NONE);
4671 } else {
4672 pmap_remove(map->pmap,
4673 (addr64_t)((start + entry->vme_start)
4674 - offset),
4675 (addr64_t)(((start + entry->vme_start)
4676 - offset) + remove_size));
4677 }
4678 }
4679 entry = entry->vme_next;
4680 }
4681 vm_map_unlock_read(sub_map);
4682 return;
4683 }
4684
4685 /*
4686 * vm_map_delete: [ internal use only ]
4687 *
4688 * Deallocates the given address range from the target map.
4689 * Removes all user wirings. Unwires one kernel wiring if
4690 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
4691 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
4692 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4693 *
4694 * This routine is called with map locked and leaves map locked.
4695 */
4696 static kern_return_t
4697 vm_map_delete(
4698 vm_map_t map,
4699 vm_map_offset_t start,
4700 vm_map_offset_t end,
4701 int flags,
4702 vm_map_t zap_map)
4703 {
4704 vm_map_entry_t entry, next;
4705 struct vm_map_entry *first_entry, tmp_entry;
4706 register vm_map_offset_t s;
4707 register vm_object_t object;
4708 boolean_t need_wakeup;
4709 unsigned int last_timestamp = ~0; /* unlikely value */
4710 int interruptible;
4711
4712 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
4713 THREAD_ABORTSAFE : THREAD_UNINT;
4714
4715 /*
4716 * All our DMA I/O operations in IOKit are currently done by
4717 * wiring through the map entries of the task requesting the I/O.
4718 * Because of this, we must always wait for kernel wirings
4719 * to go away on the entries before deleting them.
4720 *
4721 * Any caller who wants to actually remove a kernel wiring
4722 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4723 * properly remove one wiring instead of blasting through
4724 * them all.
4725 */
4726 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4727
4728 while(1) {
4729 /*
4730 * Find the start of the region, and clip it
4731 */
4732 if (vm_map_lookup_entry(map, start, &first_entry)) {
4733 entry = first_entry;
4734 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start);
4735 start = SUPERPAGE_ROUND_DOWN(start);
4736 continue;
4737 }
4738 if (start == entry->vme_start) {
4739 /*
4740 * No need to clip. We don't want to cause
4741 * any unnecessary unnesting in this case...
4742 */
4743 } else {
4744 vm_map_clip_start(map, entry, start);
4745 }
4746
4747 /*
4748 * Fix the lookup hint now, rather than each
4749 * time through the loop.
4750 */
4751 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4752 } else {
4753 entry = first_entry->vme_next;
4754 }
4755 break;
4756 }
4757 if (entry->superpage_size)
4758 end = SUPERPAGE_ROUND_UP(end);
4759
4760 need_wakeup = FALSE;
4761 /*
4762 * Step through all entries in this region
4763 */
4764 s = entry->vme_start;
4765 while ((entry != vm_map_to_entry(map)) && (s < end)) {
4766 /*
4767 * At this point, we have deleted all the memory entries
4768 * between "start" and "s". We still need to delete
4769 * all memory entries between "s" and "end".
4770 * While we were blocked and the map was unlocked, some
4771 * new memory entries could have been re-allocated between
4772 * "start" and "s" and we don't want to mess with those.
4773 * Some of those entries could even have been re-assembled
4774 * with an entry after "s" (in vm_map_simplify_entry()), so
4775 * we may have to vm_map_clip_start() again.
4776 */
4777
4778 if (entry->vme_start >= s) {
4779 /*
4780 * This entry starts on or after "s"
4781 * so no need to clip its start.
4782 */
4783 } else {
4784 /*
4785 * This entry has been re-assembled by a
4786 * vm_map_simplify_entry(). We need to
4787 * re-clip its start.
4788 */
4789 vm_map_clip_start(map, entry, s);
4790 }
4791 if (entry->vme_end <= end) {
4792 /*
4793 * This entry is going away completely, so no need
4794 * to clip and possibly cause an unnecessary unnesting.
4795 */
4796 } else {
4797 vm_map_clip_end(map, entry, end);
4798 }
4799
4800 if (entry->permanent) {
4801 panic("attempt to remove permanent VM map entry "
4802 "%p [0x%llx:0x%llx]\n",
4803 entry, (uint64_t) s, (uint64_t) end);
4804 }
4805
4806
4807 if (entry->in_transition) {
4808 wait_result_t wait_result;
4809
4810 /*
4811 * Another thread is wiring/unwiring this entry.
4812 * Let the other thread know we are waiting.
4813 */
4814 assert(s == entry->vme_start);
4815 entry->needs_wakeup = TRUE;
4816
4817 /*
4818 * wake up anybody waiting on entries that we have
4819 * already unwired/deleted.
4820 */
4821 if (need_wakeup) {
4822 vm_map_entry_wakeup(map);
4823 need_wakeup = FALSE;
4824 }
4825
4826 wait_result = vm_map_entry_wait(map, interruptible);
4827
4828 if (interruptible &&
4829 wait_result == THREAD_INTERRUPTED) {
4830 /*
4831 * We do not clear the needs_wakeup flag,
4832 * since we cannot tell if we were the only one.
4833 */
4834 vm_map_unlock(map);
4835 return KERN_ABORTED;
4836 }
4837
4838 /*
4839 * The entry could have been clipped or it
4840 * may not exist anymore. Look it up again.
4841 */
4842 if (!vm_map_lookup_entry(map, s, &first_entry)) {
4843 assert((map != kernel_map) &&
4844 (!entry->is_sub_map));
4845 /*
4846 * User: use the next entry
4847 */
4848 entry = first_entry->vme_next;
4849 s = entry->vme_start;
4850 } else {
4851 entry = first_entry;
4852 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4853 }
4854 last_timestamp = map->timestamp;
4855 continue;
4856 } /* end in_transition */
4857
4858 if (entry->wired_count) {
4859 boolean_t user_wire;
4860
4861 user_wire = entry->user_wired_count > 0;
4862
4863 /*
4864 * Remove a kernel wiring if requested
4865 */
4866 if (flags & VM_MAP_REMOVE_KUNWIRE) {
4867 entry->wired_count--;
4868 }
4869
4870 /*
4871 * Remove all user wirings for proper accounting
4872 */
4873 if (entry->user_wired_count > 0) {
4874 while (entry->user_wired_count)
4875 subtract_wire_counts(map, entry, user_wire);
4876 }
4877
4878 if (entry->wired_count != 0) {
4879 assert(map != kernel_map);
4880 /*
4881 * Cannot continue. Typical case is when
4882 * a user thread has physical io pending on
4883 * on this page. Either wait for the
4884 * kernel wiring to go away or return an
4885 * error.
4886 */
4887 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
4888 wait_result_t wait_result;
4889
4890 assert(s == entry->vme_start);
4891 entry->needs_wakeup = TRUE;
4892 wait_result = vm_map_entry_wait(map,
4893 interruptible);
4894
4895 if (interruptible &&
4896 wait_result == THREAD_INTERRUPTED) {
4897 /*
4898 * We do not clear the
4899 * needs_wakeup flag, since we
4900 * cannot tell if we were the
4901 * only one.
4902 */
4903 vm_map_unlock(map);
4904 return KERN_ABORTED;
4905 }
4906
4907 /*
4908 * The entry could have been clipped or
4909 * it may not exist anymore. Look it
4910 * up again.
4911 */
4912 if (!vm_map_lookup_entry(map, s,
4913 &first_entry)) {
4914 assert(map != kernel_map);
4915 /*
4916 * User: use the next entry
4917 */
4918 entry = first_entry->vme_next;
4919 s = entry->vme_start;
4920 } else {
4921 entry = first_entry;
4922 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4923 }
4924 last_timestamp = map->timestamp;
4925 continue;
4926 }
4927 else {
4928 return KERN_FAILURE;
4929 }
4930 }
4931
4932 entry->in_transition = TRUE;
4933 /*
4934 * copy current entry. see comment in vm_map_wire()
4935 */
4936 tmp_entry = *entry;
4937 assert(s == entry->vme_start);
4938
4939 /*
4940 * We can unlock the map now. The in_transition
4941 * state guarentees existance of the entry.
4942 */
4943 vm_map_unlock(map);
4944
4945 if (tmp_entry.is_sub_map) {
4946 vm_map_t sub_map;
4947 vm_map_offset_t sub_start, sub_end;
4948 pmap_t pmap;
4949 vm_map_offset_t pmap_addr;
4950
4951
4952 sub_map = tmp_entry.object.sub_map;
4953 sub_start = tmp_entry.offset;
4954 sub_end = sub_start + (tmp_entry.vme_end -
4955 tmp_entry.vme_start);
4956 if (tmp_entry.use_pmap) {
4957 pmap = sub_map->pmap;
4958 pmap_addr = tmp_entry.vme_start;
4959 } else {
4960 pmap = map->pmap;
4961 pmap_addr = tmp_entry.vme_start;
4962 }
4963 (void) vm_map_unwire_nested(sub_map,
4964 sub_start, sub_end,
4965 user_wire,
4966 pmap, pmap_addr);
4967 } else {
4968
4969 vm_fault_unwire(map, &tmp_entry,
4970 tmp_entry.object.vm_object == kernel_object,
4971 map->pmap, tmp_entry.vme_start);
4972 }
4973
4974 vm_map_lock(map);
4975
4976 if (last_timestamp+1 != map->timestamp) {
4977 /*
4978 * Find the entry again. It could have
4979 * been clipped after we unlocked the map.
4980 */
4981 if (!vm_map_lookup_entry(map, s, &first_entry)){
4982 assert((map != kernel_map) &&
4983 (!entry->is_sub_map));
4984 first_entry = first_entry->vme_next;
4985 s = first_entry->vme_start;
4986 } else {
4987 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4988 }
4989 } else {
4990 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4991 first_entry = entry;
4992 }
4993
4994 last_timestamp = map->timestamp;
4995
4996 entry = first_entry;
4997 while ((entry != vm_map_to_entry(map)) &&
4998 (entry->vme_start < tmp_entry.vme_end)) {
4999 assert(entry->in_transition);
5000 entry->in_transition = FALSE;
5001 if (entry->needs_wakeup) {
5002 entry->needs_wakeup = FALSE;
5003 need_wakeup = TRUE;
5004 }
5005 entry = entry->vme_next;
5006 }
5007 /*
5008 * We have unwired the entry(s). Go back and
5009 * delete them.
5010 */
5011 entry = first_entry;
5012 continue;
5013 }
5014
5015 /* entry is unwired */
5016 assert(entry->wired_count == 0);
5017 assert(entry->user_wired_count == 0);
5018
5019 assert(s == entry->vme_start);
5020
5021 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5022 /*
5023 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5024 * vm_map_delete(), some map entries might have been
5025 * transferred to a "zap_map", which doesn't have a
5026 * pmap. The original pmap has already been flushed
5027 * in the vm_map_delete() call targeting the original
5028 * map, but when we get to destroying the "zap_map",
5029 * we don't have any pmap to flush, so let's just skip
5030 * all this.
5031 */
5032 } else if (entry->is_sub_map) {
5033 if (entry->use_pmap) {
5034 #ifndef NO_NESTED_PMAP
5035 pmap_unnest(map->pmap,
5036 (addr64_t)entry->vme_start,
5037 entry->vme_end - entry->vme_start);
5038 #endif /* NO_NESTED_PMAP */
5039 if ((map->mapped) && (map->ref_count)) {
5040 /* clean up parent map/maps */
5041 vm_map_submap_pmap_clean(
5042 map, entry->vme_start,
5043 entry->vme_end,
5044 entry->object.sub_map,
5045 entry->offset);
5046 }
5047 } else {
5048 vm_map_submap_pmap_clean(
5049 map, entry->vme_start, entry->vme_end,
5050 entry->object.sub_map,
5051 entry->offset);
5052 }
5053 } else if (entry->object.vm_object != kernel_object) {
5054 object = entry->object.vm_object;
5055 if((map->mapped) && (map->ref_count)) {
5056 vm_object_pmap_protect(
5057 object, entry->offset,
5058 entry->vme_end - entry->vme_start,
5059 PMAP_NULL,
5060 entry->vme_start,
5061 VM_PROT_NONE);
5062 } else {
5063 pmap_remove(map->pmap,
5064 (addr64_t)entry->vme_start,
5065 (addr64_t)entry->vme_end);
5066 }
5067 }
5068
5069 /*
5070 * All pmap mappings for this map entry must have been
5071 * cleared by now.
5072 */
5073 assert(vm_map_pmap_is_empty(map,
5074 entry->vme_start,
5075 entry->vme_end));
5076
5077 next = entry->vme_next;
5078 s = next->vme_start;
5079 last_timestamp = map->timestamp;
5080
5081 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
5082 zap_map != VM_MAP_NULL) {
5083 vm_map_size_t entry_size;
5084 /*
5085 * The caller wants to save the affected VM map entries
5086 * into the "zap_map". The caller will take care of
5087 * these entries.
5088 */
5089 /* unlink the entry from "map" ... */
5090 vm_map_store_entry_unlink(map, entry);
5091 /* ... and add it to the end of the "zap_map" */
5092 vm_map_store_entry_link(zap_map,
5093 vm_map_last_entry(zap_map),
5094 entry);
5095 entry_size = entry->vme_end - entry->vme_start;
5096 map->size -= entry_size;
5097 zap_map->size += entry_size;
5098 /* we didn't unlock the map, so no timestamp increase */
5099 last_timestamp--;
5100 } else {
5101 vm_map_entry_delete(map, entry);
5102 /* vm_map_entry_delete unlocks the map */
5103 vm_map_lock(map);
5104 }
5105
5106 entry = next;
5107
5108 if(entry == vm_map_to_entry(map)) {
5109 break;
5110 }
5111 if (last_timestamp+1 != map->timestamp) {
5112 /*
5113 * we are responsible for deleting everything
5114 * from the give space, if someone has interfered
5115 * we pick up where we left off, back fills should
5116 * be all right for anyone except map_delete and
5117 * we have to assume that the task has been fully
5118 * disabled before we get here
5119 */
5120 if (!vm_map_lookup_entry(map, s, &entry)){
5121 entry = entry->vme_next;
5122 s = entry->vme_start;
5123 } else {
5124 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5125 }
5126 /*
5127 * others can not only allocate behind us, we can
5128 * also see coalesce while we don't have the map lock
5129 */
5130 if(entry == vm_map_to_entry(map)) {
5131 break;
5132 }
5133 }
5134 last_timestamp = map->timestamp;
5135 }
5136
5137 if (map->wait_for_space)
5138 thread_wakeup((event_t) map);
5139 /*
5140 * wake up anybody waiting on entries that we have already deleted.
5141 */
5142 if (need_wakeup)
5143 vm_map_entry_wakeup(map);
5144
5145 return KERN_SUCCESS;
5146 }
5147
5148 /*
5149 * vm_map_remove:
5150 *
5151 * Remove the given address range from the target map.
5152 * This is the exported form of vm_map_delete.
5153 */
5154 kern_return_t
5155 vm_map_remove(
5156 register vm_map_t map,
5157 register vm_map_offset_t start,
5158 register vm_map_offset_t end,
5159 register boolean_t flags)
5160 {
5161 register kern_return_t result;
5162
5163 vm_map_lock(map);
5164 VM_MAP_RANGE_CHECK(map, start, end);
5165 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
5166 vm_map_unlock(map);
5167
5168 return(result);
5169 }
5170
5171
5172 /*
5173 * Routine: vm_map_copy_discard
5174 *
5175 * Description:
5176 * Dispose of a map copy object (returned by
5177 * vm_map_copyin).
5178 */
5179 void
5180 vm_map_copy_discard(
5181 vm_map_copy_t copy)
5182 {
5183 if (copy == VM_MAP_COPY_NULL)
5184 return;
5185
5186 switch (copy->type) {
5187 case VM_MAP_COPY_ENTRY_LIST:
5188 while (vm_map_copy_first_entry(copy) !=
5189 vm_map_copy_to_entry(copy)) {
5190 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
5191
5192 vm_map_copy_entry_unlink(copy, entry);
5193 vm_object_deallocate(entry->object.vm_object);
5194 vm_map_copy_entry_dispose(copy, entry);
5195 }
5196 break;
5197 case VM_MAP_COPY_OBJECT:
5198 vm_object_deallocate(copy->cpy_object);
5199 break;
5200 case VM_MAP_COPY_KERNEL_BUFFER:
5201
5202 /*
5203 * The vm_map_copy_t and possibly the data buffer were
5204 * allocated by a single call to kalloc(), i.e. the
5205 * vm_map_copy_t was not allocated out of the zone.
5206 */
5207 kfree(copy, copy->cpy_kalloc_size);
5208 return;
5209 }
5210 zfree(vm_map_copy_zone, copy);
5211 }
5212
5213 /*
5214 * Routine: vm_map_copy_copy
5215 *
5216 * Description:
5217 * Move the information in a map copy object to
5218 * a new map copy object, leaving the old one
5219 * empty.
5220 *
5221 * This is used by kernel routines that need
5222 * to look at out-of-line data (in copyin form)
5223 * before deciding whether to return SUCCESS.
5224 * If the routine returns FAILURE, the original
5225 * copy object will be deallocated; therefore,
5226 * these routines must make a copy of the copy
5227 * object and leave the original empty so that
5228 * deallocation will not fail.
5229 */
5230 vm_map_copy_t
5231 vm_map_copy_copy(
5232 vm_map_copy_t copy)
5233 {
5234 vm_map_copy_t new_copy;
5235
5236 if (copy == VM_MAP_COPY_NULL)
5237 return VM_MAP_COPY_NULL;
5238
5239 /*
5240 * Allocate a new copy object, and copy the information
5241 * from the old one into it.
5242 */
5243
5244 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5245 *new_copy = *copy;
5246
5247 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5248 /*
5249 * The links in the entry chain must be
5250 * changed to point to the new copy object.
5251 */
5252 vm_map_copy_first_entry(copy)->vme_prev
5253 = vm_map_copy_to_entry(new_copy);
5254 vm_map_copy_last_entry(copy)->vme_next
5255 = vm_map_copy_to_entry(new_copy);
5256 }
5257
5258 /*
5259 * Change the old copy object into one that contains
5260 * nothing to be deallocated.
5261 */
5262 copy->type = VM_MAP_COPY_OBJECT;
5263 copy->cpy_object = VM_OBJECT_NULL;
5264
5265 /*
5266 * Return the new object.
5267 */
5268 return new_copy;
5269 }
5270
5271 static kern_return_t
5272 vm_map_overwrite_submap_recurse(
5273 vm_map_t dst_map,
5274 vm_map_offset_t dst_addr,
5275 vm_map_size_t dst_size)
5276 {
5277 vm_map_offset_t dst_end;
5278 vm_map_entry_t tmp_entry;
5279 vm_map_entry_t entry;
5280 kern_return_t result;
5281 boolean_t encountered_sub_map = FALSE;
5282
5283
5284
5285 /*
5286 * Verify that the destination is all writeable
5287 * initially. We have to trunc the destination
5288 * address and round the copy size or we'll end up
5289 * splitting entries in strange ways.
5290 */
5291
5292 dst_end = vm_map_round_page(dst_addr + dst_size);
5293 vm_map_lock(dst_map);
5294
5295 start_pass_1:
5296 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5297 vm_map_unlock(dst_map);
5298 return(KERN_INVALID_ADDRESS);
5299 }
5300
5301 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5302 assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
5303
5304 for (entry = tmp_entry;;) {
5305 vm_map_entry_t next;
5306
5307 next = entry->vme_next;
5308 while(entry->is_sub_map) {
5309 vm_map_offset_t sub_start;
5310 vm_map_offset_t sub_end;
5311 vm_map_offset_t local_end;
5312
5313 if (entry->in_transition) {
5314 /*
5315 * Say that we are waiting, and wait for entry.
5316 */
5317 entry->needs_wakeup = TRUE;
5318 vm_map_entry_wait(dst_map, THREAD_UNINT);
5319
5320 goto start_pass_1;
5321 }
5322
5323 encountered_sub_map = TRUE;
5324 sub_start = entry->offset;
5325
5326 if(entry->vme_end < dst_end)
5327 sub_end = entry->vme_end;
5328 else
5329 sub_end = dst_end;
5330 sub_end -= entry->vme_start;
5331 sub_end += entry->offset;
5332 local_end = entry->vme_end;
5333 vm_map_unlock(dst_map);
5334
5335 result = vm_map_overwrite_submap_recurse(
5336 entry->object.sub_map,
5337 sub_start,
5338 sub_end - sub_start);
5339
5340 if(result != KERN_SUCCESS)
5341 return result;
5342 if (dst_end <= entry->vme_end)
5343 return KERN_SUCCESS;
5344 vm_map_lock(dst_map);
5345 if(!vm_map_lookup_entry(dst_map, local_end,
5346 &tmp_entry)) {
5347 vm_map_unlock(dst_map);
5348 return(KERN_INVALID_ADDRESS);
5349 }
5350 entry = tmp_entry;
5351 next = entry->vme_next;
5352 }
5353
5354 if ( ! (entry->protection & VM_PROT_WRITE)) {
5355 vm_map_unlock(dst_map);
5356 return(KERN_PROTECTION_FAILURE);
5357 }
5358
5359 /*
5360 * If the entry is in transition, we must wait
5361 * for it to exit that state. Anything could happen
5362 * when we unlock the map, so start over.
5363 */
5364 if (entry->in_transition) {
5365
5366 /*
5367 * Say that we are waiting, and wait for entry.
5368 */
5369 entry->needs_wakeup = TRUE;
5370 vm_map_entry_wait(dst_map, THREAD_UNINT);
5371
5372 goto start_pass_1;
5373 }
5374
5375 /*
5376 * our range is contained completely within this map entry
5377 */
5378 if (dst_end <= entry->vme_end) {
5379 vm_map_unlock(dst_map);
5380 return KERN_SUCCESS;
5381 }
5382 /*
5383 * check that range specified is contiguous region
5384 */
5385 if ((next == vm_map_to_entry(dst_map)) ||
5386 (next->vme_start != entry->vme_end)) {
5387 vm_map_unlock(dst_map);
5388 return(KERN_INVALID_ADDRESS);
5389 }
5390
5391 /*
5392 * Check for permanent objects in the destination.
5393 */
5394 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5395 ((!entry->object.vm_object->internal) ||
5396 (entry->object.vm_object->true_share))) {
5397 if(encountered_sub_map) {
5398 vm_map_unlock(dst_map);
5399 return(KERN_FAILURE);
5400 }
5401 }
5402
5403
5404 entry = next;
5405 }/* for */
5406 vm_map_unlock(dst_map);
5407 return(KERN_SUCCESS);
5408 }
5409
5410 /*
5411 * Routine: vm_map_copy_overwrite
5412 *
5413 * Description:
5414 * Copy the memory described by the map copy
5415 * object (copy; returned by vm_map_copyin) onto
5416 * the specified destination region (dst_map, dst_addr).
5417 * The destination must be writeable.
5418 *
5419 * Unlike vm_map_copyout, this routine actually
5420 * writes over previously-mapped memory. If the
5421 * previous mapping was to a permanent (user-supplied)
5422 * memory object, it is preserved.
5423 *
5424 * The attributes (protection and inheritance) of the
5425 * destination region are preserved.
5426 *
5427 * If successful, consumes the copy object.
5428 * Otherwise, the caller is responsible for it.
5429 *
5430 * Implementation notes:
5431 * To overwrite aligned temporary virtual memory, it is
5432 * sufficient to remove the previous mapping and insert
5433 * the new copy. This replacement is done either on
5434 * the whole region (if no permanent virtual memory
5435 * objects are embedded in the destination region) or
5436 * in individual map entries.
5437 *
5438 * To overwrite permanent virtual memory , it is necessary
5439 * to copy each page, as the external memory management
5440 * interface currently does not provide any optimizations.
5441 *
5442 * Unaligned memory also has to be copied. It is possible
5443 * to use 'vm_trickery' to copy the aligned data. This is
5444 * not done but not hard to implement.
5445 *
5446 * Once a page of permanent memory has been overwritten,
5447 * it is impossible to interrupt this function; otherwise,
5448 * the call would be neither atomic nor location-independent.
5449 * The kernel-state portion of a user thread must be
5450 * interruptible.
5451 *
5452 * It may be expensive to forward all requests that might
5453 * overwrite permanent memory (vm_write, vm_copy) to
5454 * uninterruptible kernel threads. This routine may be
5455 * called by interruptible threads; however, success is
5456 * not guaranteed -- if the request cannot be performed
5457 * atomically and interruptibly, an error indication is
5458 * returned.
5459 */
5460
5461 static kern_return_t
5462 vm_map_copy_overwrite_nested(
5463 vm_map_t dst_map,
5464 vm_map_address_t dst_addr,
5465 vm_map_copy_t copy,
5466 boolean_t interruptible,
5467 pmap_t pmap,
5468 boolean_t discard_on_success)
5469 {
5470 vm_map_offset_t dst_end;
5471 vm_map_entry_t tmp_entry;
5472 vm_map_entry_t entry;
5473 kern_return_t kr;
5474 boolean_t aligned = TRUE;
5475 boolean_t contains_permanent_objects = FALSE;
5476 boolean_t encountered_sub_map = FALSE;
5477 vm_map_offset_t base_addr;
5478 vm_map_size_t copy_size;
5479 vm_map_size_t total_size;
5480
5481
5482 /*
5483 * Check for null copy object.
5484 */
5485
5486 if (copy == VM_MAP_COPY_NULL)
5487 return(KERN_SUCCESS);
5488
5489 /*
5490 * Check for special kernel buffer allocated
5491 * by new_ipc_kmsg_copyin.
5492 */
5493
5494 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5495 return(vm_map_copyout_kernel_buffer(
5496 dst_map, &dst_addr,
5497 copy, TRUE));
5498 }
5499
5500 /*
5501 * Only works for entry lists at the moment. Will
5502 * support page lists later.
5503 */
5504
5505 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5506
5507 if (copy->size == 0) {
5508 if (discard_on_success)
5509 vm_map_copy_discard(copy);
5510 return(KERN_SUCCESS);
5511 }
5512
5513 /*
5514 * Verify that the destination is all writeable
5515 * initially. We have to trunc the destination
5516 * address and round the copy size or we'll end up
5517 * splitting entries in strange ways.
5518 */
5519
5520 if (!page_aligned(copy->size) ||
5521 !page_aligned (copy->offset) ||
5522 !page_aligned (dst_addr))
5523 {
5524 aligned = FALSE;
5525 dst_end = vm_map_round_page(dst_addr + copy->size);
5526 } else {
5527 dst_end = dst_addr + copy->size;
5528 }
5529
5530 vm_map_lock(dst_map);
5531
5532 /* LP64todo - remove this check when vm_map_commpage64()
5533 * no longer has to stuff in a map_entry for the commpage
5534 * above the map's max_offset.
5535 */
5536 if (dst_addr >= dst_map->max_offset) {
5537 vm_map_unlock(dst_map);
5538 return(KERN_INVALID_ADDRESS);
5539 }
5540
5541 start_pass_1:
5542 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5543 vm_map_unlock(dst_map);
5544 return(KERN_INVALID_ADDRESS);
5545 }
5546 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5547 for (entry = tmp_entry;;) {
5548 vm_map_entry_t next = entry->vme_next;
5549
5550 while(entry->is_sub_map) {
5551 vm_map_offset_t sub_start;
5552 vm_map_offset_t sub_end;
5553 vm_map_offset_t local_end;
5554
5555 if (entry->in_transition) {
5556
5557 /*
5558 * Say that we are waiting, and wait for entry.
5559 */
5560 entry->needs_wakeup = TRUE;
5561 vm_map_entry_wait(dst_map, THREAD_UNINT);
5562
5563 goto start_pass_1;
5564 }
5565
5566 local_end = entry->vme_end;
5567 if (!(entry->needs_copy)) {
5568 /* if needs_copy we are a COW submap */
5569 /* in such a case we just replace so */
5570 /* there is no need for the follow- */
5571 /* ing check. */
5572 encountered_sub_map = TRUE;
5573 sub_start = entry->offset;
5574
5575 if(entry->vme_end < dst_end)
5576 sub_end = entry->vme_end;
5577 else
5578 sub_end = dst_end;
5579 sub_end -= entry->vme_start;
5580 sub_end += entry->offset;
5581 vm_map_unlock(dst_map);
5582
5583 kr = vm_map_overwrite_submap_recurse(
5584 entry->object.sub_map,
5585 sub_start,
5586 sub_end - sub_start);
5587 if(kr != KERN_SUCCESS)
5588 return kr;
5589 vm_map_lock(dst_map);
5590 }
5591
5592 if (dst_end <= entry->vme_end)
5593 goto start_overwrite;
5594 if(!vm_map_lookup_entry(dst_map, local_end,
5595 &entry)) {
5596 vm_map_unlock(dst_map);
5597 return(KERN_INVALID_ADDRESS);
5598 }
5599 next = entry->vme_next;
5600 }
5601
5602 if ( ! (entry->protection & VM_PROT_WRITE)) {
5603 vm_map_unlock(dst_map);
5604 return(KERN_PROTECTION_FAILURE);
5605 }
5606
5607 /*
5608 * If the entry is in transition, we must wait
5609 * for it to exit that state. Anything could happen
5610 * when we unlock the map, so start over.
5611 */
5612 if (entry->in_transition) {
5613
5614 /*
5615 * Say that we are waiting, and wait for entry.
5616 */
5617 entry->needs_wakeup = TRUE;
5618 vm_map_entry_wait(dst_map, THREAD_UNINT);
5619
5620 goto start_pass_1;
5621 }
5622
5623 /*
5624 * our range is contained completely within this map entry
5625 */
5626 if (dst_end <= entry->vme_end)
5627 break;
5628 /*
5629 * check that range specified is contiguous region
5630 */
5631 if ((next == vm_map_to_entry(dst_map)) ||
5632 (next->vme_start != entry->vme_end)) {
5633 vm_map_unlock(dst_map);
5634 return(KERN_INVALID_ADDRESS);
5635 }
5636
5637
5638 /*
5639 * Check for permanent objects in the destination.
5640 */
5641 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5642 ((!entry->object.vm_object->internal) ||
5643 (entry->object.vm_object->true_share))) {
5644 contains_permanent_objects = TRUE;
5645 }
5646
5647 entry = next;
5648 }/* for */
5649
5650 start_overwrite:
5651 /*
5652 * If there are permanent objects in the destination, then
5653 * the copy cannot be interrupted.
5654 */
5655
5656 if (interruptible && contains_permanent_objects) {
5657 vm_map_unlock(dst_map);
5658 return(KERN_FAILURE); /* XXX */
5659 }
5660
5661 /*
5662 *
5663 * Make a second pass, overwriting the data
5664 * At the beginning of each loop iteration,
5665 * the next entry to be overwritten is "tmp_entry"
5666 * (initially, the value returned from the lookup above),
5667 * and the starting address expected in that entry
5668 * is "start".
5669 */
5670
5671 total_size = copy->size;
5672 if(encountered_sub_map) {
5673 copy_size = 0;
5674 /* re-calculate tmp_entry since we've had the map */
5675 /* unlocked */
5676 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5677 vm_map_unlock(dst_map);
5678 return(KERN_INVALID_ADDRESS);
5679 }
5680 } else {
5681 copy_size = copy->size;
5682 }
5683
5684 base_addr = dst_addr;
5685 while(TRUE) {
5686 /* deconstruct the copy object and do in parts */
5687 /* only in sub_map, interruptable case */
5688 vm_map_entry_t copy_entry;
5689 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
5690 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
5691 int nentries;
5692 int remaining_entries = 0;
5693 vm_map_offset_t new_offset = 0;
5694
5695 for (entry = tmp_entry; copy_size == 0;) {
5696 vm_map_entry_t next;
5697
5698 next = entry->vme_next;
5699
5700 /* tmp_entry and base address are moved along */
5701 /* each time we encounter a sub-map. Otherwise */
5702 /* entry can outpase tmp_entry, and the copy_size */
5703 /* may reflect the distance between them */
5704 /* if the current entry is found to be in transition */
5705 /* we will start over at the beginning or the last */
5706 /* encounter of a submap as dictated by base_addr */
5707 /* we will zero copy_size accordingly. */
5708 if (entry->in_transition) {
5709 /*
5710 * Say that we are waiting, and wait for entry.
5711 */
5712 entry->needs_wakeup = TRUE;
5713 vm_map_entry_wait(dst_map, THREAD_UNINT);
5714
5715 if(!vm_map_lookup_entry(dst_map, base_addr,
5716 &tmp_entry)) {
5717 vm_map_unlock(dst_map);
5718 return(KERN_INVALID_ADDRESS);
5719 }
5720 copy_size = 0;
5721 entry = tmp_entry;
5722 continue;
5723 }
5724 if(entry->is_sub_map) {
5725 vm_map_offset_t sub_start;
5726 vm_map_offset_t sub_end;
5727 vm_map_offset_t local_end;
5728
5729 if (entry->needs_copy) {
5730 /* if this is a COW submap */
5731 /* just back the range with a */
5732 /* anonymous entry */
5733 if(entry->vme_end < dst_end)
5734 sub_end = entry->vme_end;
5735 else
5736 sub_end = dst_end;
5737 if(entry->vme_start < base_addr)
5738 sub_start = base_addr;
5739 else
5740 sub_start = entry->vme_start;
5741 vm_map_clip_end(
5742 dst_map, entry, sub_end);
5743 vm_map_clip_start(
5744 dst_map, entry, sub_start);
5745 assert(!entry->use_pmap);
5746 entry->is_sub_map = FALSE;
5747 vm_map_deallocate(
5748 entry->object.sub_map);
5749 entry->object.sub_map = NULL;
5750 entry->is_shared = FALSE;
5751 entry->needs_copy = FALSE;
5752 entry->offset = 0;
5753 /*
5754 * XXX FBDP
5755 * We should propagate the protections
5756 * of the submap entry here instead
5757 * of forcing them to VM_PROT_ALL...
5758 * Or better yet, we should inherit
5759 * the protection of the copy_entry.
5760 */
5761 entry->protection = VM_PROT_ALL;
5762 entry->max_protection = VM_PROT_ALL;
5763 entry->wired_count = 0;
5764 entry->user_wired_count = 0;
5765 if(entry->inheritance
5766 == VM_INHERIT_SHARE)
5767 entry->inheritance = VM_INHERIT_COPY;
5768 continue;
5769 }
5770 /* first take care of any non-sub_map */
5771 /* entries to send */
5772 if(base_addr < entry->vme_start) {
5773 /* stuff to send */
5774 copy_size =
5775 entry->vme_start - base_addr;
5776 break;
5777 }
5778 sub_start = entry->offset;
5779
5780 if(entry->vme_end < dst_end)
5781 sub_end = entry->vme_end;
5782 else
5783 sub_end = dst_end;
5784 sub_end -= entry->vme_start;
5785 sub_end += entry->offset;
5786 local_end = entry->vme_end;
5787 vm_map_unlock(dst_map);
5788 copy_size = sub_end - sub_start;
5789
5790 /* adjust the copy object */
5791 if (total_size > copy_size) {
5792 vm_map_size_t local_size = 0;
5793 vm_map_size_t entry_size;
5794
5795 nentries = 1;
5796 new_offset = copy->offset;
5797 copy_entry = vm_map_copy_first_entry(copy);
5798 while(copy_entry !=
5799 vm_map_copy_to_entry(copy)){
5800 entry_size = copy_entry->vme_end -
5801 copy_entry->vme_start;
5802 if((local_size < copy_size) &&
5803 ((local_size + entry_size)
5804 >= copy_size)) {
5805 vm_map_copy_clip_end(copy,
5806 copy_entry,
5807 copy_entry->vme_start +
5808 (copy_size - local_size));
5809 entry_size = copy_entry->vme_end -
5810 copy_entry->vme_start;
5811 local_size += entry_size;
5812 new_offset += entry_size;
5813 }
5814 if(local_size >= copy_size) {
5815 next_copy = copy_entry->vme_next;
5816 copy_entry->vme_next =
5817 vm_map_copy_to_entry(copy);
5818 previous_prev =
5819 copy->cpy_hdr.links.prev;
5820 copy->cpy_hdr.links.prev = copy_entry;
5821 copy->size = copy_size;
5822 remaining_entries =
5823 copy->cpy_hdr.nentries;
5824 remaining_entries -= nentries;
5825 copy->cpy_hdr.nentries = nentries;
5826 break;
5827 } else {
5828 local_size += entry_size;
5829 new_offset += entry_size;
5830 nentries++;
5831 }
5832 copy_entry = copy_entry->vme_next;
5833 }
5834 }
5835
5836 if((entry->use_pmap) && (pmap == NULL)) {
5837 kr = vm_map_copy_overwrite_nested(
5838 entry->object.sub_map,
5839 sub_start,
5840 copy,
5841 interruptible,
5842 entry->object.sub_map->pmap,
5843 TRUE);
5844 } else if (pmap != NULL) {
5845 kr = vm_map_copy_overwrite_nested(
5846 entry->object.sub_map,
5847 sub_start,
5848 copy,
5849 interruptible, pmap,
5850 TRUE);
5851 } else {
5852 kr = vm_map_copy_overwrite_nested(
5853 entry->object.sub_map,
5854 sub_start,
5855 copy,
5856 interruptible,
5857 dst_map->pmap,
5858 TRUE);
5859 }
5860 if(kr != KERN_SUCCESS) {
5861 if(next_copy != NULL) {
5862 copy->cpy_hdr.nentries +=
5863 remaining_entries;
5864 copy->cpy_hdr.links.prev->vme_next =
5865 next_copy;
5866 copy->cpy_hdr.links.prev
5867 = previous_prev;
5868 copy->size = total_size;
5869 }
5870 return kr;
5871 }
5872 if (dst_end <= local_end) {
5873 return(KERN_SUCCESS);
5874 }
5875 /* otherwise copy no longer exists, it was */
5876 /* destroyed after successful copy_overwrite */
5877 copy = (vm_map_copy_t)
5878 zalloc(vm_map_copy_zone);
5879 vm_map_copy_first_entry(copy) =
5880 vm_map_copy_last_entry(copy) =
5881 vm_map_copy_to_entry(copy);
5882 copy->type = VM_MAP_COPY_ENTRY_LIST;
5883 copy->offset = new_offset;
5884
5885 /*
5886 * XXX FBDP
5887 * this does not seem to deal with
5888 * the VM map store (R&B tree)
5889 */
5890
5891 total_size -= copy_size;
5892 copy_size = 0;
5893 /* put back remainder of copy in container */
5894 if(next_copy != NULL) {
5895 copy->cpy_hdr.nentries = remaining_entries;
5896 copy->cpy_hdr.links.next = next_copy;
5897 copy->cpy_hdr.links.prev = previous_prev;
5898 copy->size = total_size;
5899 next_copy->vme_prev =
5900 vm_map_copy_to_entry(copy);
5901 next_copy = NULL;
5902 }
5903 base_addr = local_end;
5904 vm_map_lock(dst_map);
5905 if(!vm_map_lookup_entry(dst_map,
5906 local_end, &tmp_entry)) {
5907 vm_map_unlock(dst_map);
5908 return(KERN_INVALID_ADDRESS);
5909 }
5910 entry = tmp_entry;
5911 continue;
5912 }
5913 if (dst_end <= entry->vme_end) {
5914 copy_size = dst_end - base_addr;
5915 break;
5916 }
5917
5918 if ((next == vm_map_to_entry(dst_map)) ||
5919 (next->vme_start != entry->vme_end)) {
5920 vm_map_unlock(dst_map);
5921 return(KERN_INVALID_ADDRESS);
5922 }
5923
5924 entry = next;
5925 }/* for */
5926
5927 next_copy = NULL;
5928 nentries = 1;
5929
5930 /* adjust the copy object */
5931 if (total_size > copy_size) {
5932 vm_map_size_t local_size = 0;
5933 vm_map_size_t entry_size;
5934
5935 new_offset = copy->offset;
5936 copy_entry = vm_map_copy_first_entry(copy);
5937 while(copy_entry != vm_map_copy_to_entry(copy)) {
5938 entry_size = copy_entry->vme_end -
5939 copy_entry->vme_start;
5940 if((local_size < copy_size) &&
5941 ((local_size + entry_size)
5942 >= copy_size)) {
5943 vm_map_copy_clip_end(copy, copy_entry,
5944 copy_entry->vme_start +
5945 (copy_size - local_size));
5946 entry_size = copy_entry->vme_end -
5947 copy_entry->vme_start;
5948 local_size += entry_size;
5949 new_offset += entry_size;
5950 }
5951 if(local_size >= copy_size) {
5952 next_copy = copy_entry->vme_next;
5953 copy_entry->vme_next =
5954 vm_map_copy_to_entry(copy);
5955 previous_prev =
5956 copy->cpy_hdr.links.prev;
5957 copy->cpy_hdr.links.prev = copy_entry;
5958 copy->size = copy_size;
5959 remaining_entries =
5960 copy->cpy_hdr.nentries;
5961 remaining_entries -= nentries;
5962 copy->cpy_hdr.nentries = nentries;
5963 break;
5964 } else {
5965 local_size += entry_size;
5966 new_offset += entry_size;
5967 nentries++;
5968 }
5969 copy_entry = copy_entry->vme_next;
5970 }
5971 }
5972
5973 if (aligned) {
5974 pmap_t local_pmap;
5975
5976 if(pmap)
5977 local_pmap = pmap;
5978 else
5979 local_pmap = dst_map->pmap;
5980
5981 if ((kr = vm_map_copy_overwrite_aligned(
5982 dst_map, tmp_entry, copy,
5983 base_addr, local_pmap)) != KERN_SUCCESS) {
5984 if(next_copy != NULL) {
5985 copy->cpy_hdr.nentries +=
5986 remaining_entries;
5987 copy->cpy_hdr.links.prev->vme_next =
5988 next_copy;
5989 copy->cpy_hdr.links.prev =
5990 previous_prev;
5991 copy->size += copy_size;
5992 }
5993 return kr;
5994 }
5995 vm_map_unlock(dst_map);
5996 } else {
5997 /*
5998 * Performance gain:
5999 *
6000 * if the copy and dst address are misaligned but the same
6001 * offset within the page we can copy_not_aligned the
6002 * misaligned parts and copy aligned the rest. If they are
6003 * aligned but len is unaligned we simply need to copy
6004 * the end bit unaligned. We'll need to split the misaligned
6005 * bits of the region in this case !
6006 */
6007 /* ALWAYS UNLOCKS THE dst_map MAP */
6008 if ((kr = vm_map_copy_overwrite_unaligned( dst_map,
6009 tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
6010 if(next_copy != NULL) {
6011 copy->cpy_hdr.nentries +=
6012 remaining_entries;
6013 copy->cpy_hdr.links.prev->vme_next =
6014 next_copy;
6015 copy->cpy_hdr.links.prev =
6016 previous_prev;
6017 copy->size += copy_size;
6018 }
6019 return kr;
6020 }
6021 }
6022 total_size -= copy_size;
6023 if(total_size == 0)
6024 break;
6025 base_addr += copy_size;
6026 copy_size = 0;
6027 copy->offset = new_offset;
6028 if(next_copy != NULL) {
6029 copy->cpy_hdr.nentries = remaining_entries;
6030 copy->cpy_hdr.links.next = next_copy;
6031 copy->cpy_hdr.links.prev = previous_prev;
6032 next_copy->vme_prev = vm_map_copy_to_entry(copy);
6033 copy->size = total_size;
6034 }
6035 vm_map_lock(dst_map);
6036 while(TRUE) {
6037 if (!vm_map_lookup_entry(dst_map,
6038 base_addr, &tmp_entry)) {
6039 vm_map_unlock(dst_map);
6040 return(KERN_INVALID_ADDRESS);
6041 }
6042 if (tmp_entry->in_transition) {
6043 entry->needs_wakeup = TRUE;
6044 vm_map_entry_wait(dst_map, THREAD_UNINT);
6045 } else {
6046 break;
6047 }
6048 }
6049 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
6050
6051 entry = tmp_entry;
6052 } /* while */
6053
6054 /*
6055 * Throw away the vm_map_copy object
6056 */
6057 if (discard_on_success)
6058 vm_map_copy_discard(copy);
6059
6060 return(KERN_SUCCESS);
6061 }/* vm_map_copy_overwrite */
6062
6063 kern_return_t
6064 vm_map_copy_overwrite(
6065 vm_map_t dst_map,
6066 vm_map_offset_t dst_addr,
6067 vm_map_copy_t copy,
6068 boolean_t interruptible)
6069 {
6070 vm_map_size_t head_size, tail_size;
6071 vm_map_copy_t head_copy, tail_copy;
6072 vm_map_offset_t head_addr, tail_addr;
6073 vm_map_entry_t entry;
6074 kern_return_t kr;
6075
6076 head_size = 0;
6077 tail_size = 0;
6078 head_copy = NULL;
6079 tail_copy = NULL;
6080 head_addr = 0;
6081 tail_addr = 0;
6082
6083 if (interruptible ||
6084 copy == VM_MAP_COPY_NULL ||
6085 copy->type != VM_MAP_COPY_ENTRY_LIST) {
6086 /*
6087 * We can't split the "copy" map if we're interruptible
6088 * or if we don't have a "copy" map...
6089 */
6090 blunt_copy:
6091 return vm_map_copy_overwrite_nested(dst_map,
6092 dst_addr,
6093 copy,
6094 interruptible,
6095 (pmap_t) NULL,
6096 TRUE);
6097 }
6098
6099 if (copy->size < 3 * PAGE_SIZE) {
6100 /*
6101 * Too small to bother with optimizing...
6102 */
6103 goto blunt_copy;
6104 }
6105
6106 if ((dst_addr & PAGE_MASK) != (copy->offset & PAGE_MASK)) {
6107 /*
6108 * Incompatible mis-alignment of source and destination...
6109 */
6110 goto blunt_copy;
6111 }
6112
6113 /*
6114 * Proper alignment or identical mis-alignment at the beginning.
6115 * Let's try and do a small unaligned copy first (if needed)
6116 * and then an aligned copy for the rest.
6117 */
6118 if (!page_aligned(dst_addr)) {
6119 head_addr = dst_addr;
6120 head_size = PAGE_SIZE - (copy->offset & PAGE_MASK);
6121 }
6122 if (!page_aligned(copy->offset + copy->size)) {
6123 /*
6124 * Mis-alignment at the end.
6125 * Do an aligned copy up to the last page and
6126 * then an unaligned copy for the remaining bytes.
6127 */
6128 tail_size = (copy->offset + copy->size) & PAGE_MASK;
6129 tail_addr = dst_addr + copy->size - tail_size;
6130 }
6131
6132 if (head_size + tail_size == copy->size) {
6133 /*
6134 * It's all unaligned, no optimization possible...
6135 */
6136 goto blunt_copy;
6137 }
6138
6139 /*
6140 * Can't optimize if there are any submaps in the
6141 * destination due to the way we free the "copy" map
6142 * progressively in vm_map_copy_overwrite_nested()
6143 * in that case.
6144 */
6145 vm_map_lock_read(dst_map);
6146 if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
6147 vm_map_unlock_read(dst_map);
6148 goto blunt_copy;
6149 }
6150 for (;
6151 (entry != vm_map_copy_to_entry(copy) &&
6152 entry->vme_start < dst_addr + copy->size);
6153 entry = entry->vme_next) {
6154 if (entry->is_sub_map) {
6155 vm_map_unlock_read(dst_map);
6156 goto blunt_copy;
6157 }
6158 }
6159 vm_map_unlock_read(dst_map);
6160
6161 if (head_size) {
6162 /*
6163 * Unaligned copy of the first "head_size" bytes, to reach
6164 * a page boundary.
6165 */
6166
6167 /*
6168 * Extract "head_copy" out of "copy".
6169 */
6170 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6171 vm_map_copy_first_entry(head_copy) =
6172 vm_map_copy_to_entry(head_copy);
6173 vm_map_copy_last_entry(head_copy) =
6174 vm_map_copy_to_entry(head_copy);
6175 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
6176 head_copy->cpy_hdr.nentries = 0;
6177 head_copy->cpy_hdr.entries_pageable =
6178 copy->cpy_hdr.entries_pageable;
6179 vm_map_store_init(&head_copy->cpy_hdr);
6180
6181 head_copy->offset = copy->offset;
6182 head_copy->size = head_size;
6183
6184 copy->offset += head_size;
6185 copy->size -= head_size;
6186
6187 entry = vm_map_copy_first_entry(copy);
6188 vm_map_copy_clip_end(copy, entry, copy->offset);
6189 vm_map_copy_entry_unlink(copy, entry);
6190 vm_map_copy_entry_link(head_copy,
6191 vm_map_copy_to_entry(head_copy),
6192 entry);
6193
6194 /*
6195 * Do the unaligned copy.
6196 */
6197 kr = vm_map_copy_overwrite_nested(dst_map,
6198 head_addr,
6199 head_copy,
6200 interruptible,
6201 (pmap_t) NULL,
6202 FALSE);
6203 if (kr != KERN_SUCCESS)
6204 goto done;
6205 }
6206
6207 if (tail_size) {
6208 /*
6209 * Extract "tail_copy" out of "copy".
6210 */
6211 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6212 vm_map_copy_first_entry(tail_copy) =
6213 vm_map_copy_to_entry(tail_copy);
6214 vm_map_copy_last_entry(tail_copy) =
6215 vm_map_copy_to_entry(tail_copy);
6216 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
6217 tail_copy->cpy_hdr.nentries = 0;
6218 tail_copy->cpy_hdr.entries_pageable =
6219 copy->cpy_hdr.entries_pageable;
6220 vm_map_store_init(&tail_copy->cpy_hdr);
6221
6222 tail_copy->offset = copy->offset + copy->size - tail_size;
6223 tail_copy->size = tail_size;
6224
6225 copy->size -= tail_size;
6226
6227 entry = vm_map_copy_last_entry(copy);
6228 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
6229 entry = vm_map_copy_last_entry(copy);
6230 vm_map_copy_entry_unlink(copy, entry);
6231 vm_map_copy_entry_link(tail_copy,
6232 vm_map_copy_last_entry(tail_copy),
6233 entry);
6234 }
6235
6236 /*
6237 * Copy most (or possibly all) of the data.
6238 */
6239 kr = vm_map_copy_overwrite_nested(dst_map,
6240 dst_addr + head_size,
6241 copy,
6242 interruptible,
6243 (pmap_t) NULL,
6244 FALSE);
6245 if (kr != KERN_SUCCESS) {
6246 goto done;
6247 }
6248
6249 if (tail_size) {
6250 kr = vm_map_copy_overwrite_nested(dst_map,
6251 tail_addr,
6252 tail_copy,
6253 interruptible,
6254 (pmap_t) NULL,
6255 FALSE);
6256 }
6257
6258 done:
6259 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6260 if (kr == KERN_SUCCESS) {
6261 /*
6262 * Discard all the copy maps.
6263 */
6264 if (head_copy) {
6265 vm_map_copy_discard(head_copy);
6266 head_copy = NULL;
6267 }
6268 vm_map_copy_discard(copy);
6269 if (tail_copy) {
6270 vm_map_copy_discard(tail_copy);
6271 tail_copy = NULL;
6272 }
6273 } else {
6274 /*
6275 * Re-assemble the original copy map.
6276 */
6277 if (head_copy) {
6278 entry = vm_map_copy_first_entry(head_copy);
6279 vm_map_copy_entry_unlink(head_copy, entry);
6280 vm_map_copy_entry_link(copy,
6281 vm_map_copy_to_entry(copy),
6282 entry);
6283 copy->offset -= head_size;
6284 copy->size += head_size;
6285 vm_map_copy_discard(head_copy);
6286 head_copy = NULL;
6287 }
6288 if (tail_copy) {
6289 entry = vm_map_copy_last_entry(tail_copy);
6290 vm_map_copy_entry_unlink(tail_copy, entry);
6291 vm_map_copy_entry_link(copy,
6292 vm_map_copy_last_entry(copy),
6293 entry);
6294 copy->size += tail_size;
6295 vm_map_copy_discard(tail_copy);
6296 tail_copy = NULL;
6297 }
6298 }
6299 return kr;
6300 }
6301
6302
6303 /*
6304 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
6305 *
6306 * Decription:
6307 * Physically copy unaligned data
6308 *
6309 * Implementation:
6310 * Unaligned parts of pages have to be physically copied. We use
6311 * a modified form of vm_fault_copy (which understands none-aligned
6312 * page offsets and sizes) to do the copy. We attempt to copy as
6313 * much memory in one go as possibly, however vm_fault_copy copies
6314 * within 1 memory object so we have to find the smaller of "amount left"
6315 * "source object data size" and "target object data size". With
6316 * unaligned data we don't need to split regions, therefore the source
6317 * (copy) object should be one map entry, the target range may be split
6318 * over multiple map entries however. In any event we are pessimistic
6319 * about these assumptions.
6320 *
6321 * Assumptions:
6322 * dst_map is locked on entry and is return locked on success,
6323 * unlocked on error.
6324 */
6325
6326 static kern_return_t
6327 vm_map_copy_overwrite_unaligned(
6328 vm_map_t dst_map,
6329 vm_map_entry_t entry,
6330 vm_map_copy_t copy,
6331 vm_map_offset_t start)
6332 {
6333 vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy);
6334 vm_map_version_t version;
6335 vm_object_t dst_object;
6336 vm_object_offset_t dst_offset;
6337 vm_object_offset_t src_offset;
6338 vm_object_offset_t entry_offset;
6339 vm_map_offset_t entry_end;
6340 vm_map_size_t src_size,
6341 dst_size,
6342 copy_size,
6343 amount_left;
6344 kern_return_t kr = KERN_SUCCESS;
6345
6346 vm_map_lock_write_to_read(dst_map);
6347
6348 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
6349 amount_left = copy->size;
6350 /*
6351 * unaligned so we never clipped this entry, we need the offset into
6352 * the vm_object not just the data.
6353 */
6354 while (amount_left > 0) {
6355
6356 if (entry == vm_map_to_entry(dst_map)) {
6357 vm_map_unlock_read(dst_map);
6358 return KERN_INVALID_ADDRESS;
6359 }
6360
6361 /* "start" must be within the current map entry */
6362 assert ((start>=entry->vme_start) && (start<entry->vme_end));
6363
6364 dst_offset = start - entry->vme_start;
6365
6366 dst_size = entry->vme_end - start;
6367
6368 src_size = copy_entry->vme_end -
6369 (copy_entry->vme_start + src_offset);
6370
6371 if (dst_size < src_size) {
6372 /*
6373 * we can only copy dst_size bytes before
6374 * we have to get the next destination entry
6375 */
6376 copy_size = dst_size;
6377 } else {
6378 /*
6379 * we can only copy src_size bytes before
6380 * we have to get the next source copy entry
6381 */
6382 copy_size = src_size;
6383 }
6384
6385 if (copy_size > amount_left) {
6386 copy_size = amount_left;
6387 }
6388 /*
6389 * Entry needs copy, create a shadow shadow object for
6390 * Copy on write region.
6391 */
6392 if (entry->needs_copy &&
6393 ((entry->protection & VM_PROT_WRITE) != 0))
6394 {
6395 if (vm_map_lock_read_to_write(dst_map)) {
6396 vm_map_lock_read(dst_map);
6397 goto RetryLookup;
6398 }
6399 vm_object_shadow(&entry->object.vm_object,
6400 &entry->offset,
6401 (vm_map_size_t)(entry->vme_end
6402 - entry->vme_start));
6403 entry->needs_copy = FALSE;
6404 vm_map_lock_write_to_read(dst_map);
6405 }
6406 dst_object = entry->object.vm_object;
6407 /*
6408 * unlike with the virtual (aligned) copy we're going
6409 * to fault on it therefore we need a target object.
6410 */
6411 if (dst_object == VM_OBJECT_NULL) {
6412 if (vm_map_lock_read_to_write(dst_map)) {
6413 vm_map_lock_read(dst_map);
6414 goto RetryLookup;
6415 }
6416 dst_object = vm_object_allocate((vm_map_size_t)
6417 entry->vme_end - entry->vme_start);
6418 entry->object.vm_object = dst_object;
6419 entry->offset = 0;
6420 vm_map_lock_write_to_read(dst_map);
6421 }
6422 /*
6423 * Take an object reference and unlock map. The "entry" may
6424 * disappear or change when the map is unlocked.
6425 */
6426 vm_object_reference(dst_object);
6427 version.main_timestamp = dst_map->timestamp;
6428 entry_offset = entry->offset;
6429 entry_end = entry->vme_end;
6430 vm_map_unlock_read(dst_map);
6431 /*
6432 * Copy as much as possible in one pass
6433 */
6434 kr = vm_fault_copy(
6435 copy_entry->object.vm_object,
6436 copy_entry->offset + src_offset,
6437 &copy_size,
6438 dst_object,
6439 entry_offset + dst_offset,
6440 dst_map,
6441 &version,
6442 THREAD_UNINT );
6443
6444 start += copy_size;
6445 src_offset += copy_size;
6446 amount_left -= copy_size;
6447 /*
6448 * Release the object reference
6449 */
6450 vm_object_deallocate(dst_object);
6451 /*
6452 * If a hard error occurred, return it now
6453 */
6454 if (kr != KERN_SUCCESS)
6455 return kr;
6456
6457 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
6458 || amount_left == 0)
6459 {
6460 /*
6461 * all done with this copy entry, dispose.
6462 */
6463 vm_map_copy_entry_unlink(copy, copy_entry);
6464 vm_object_deallocate(copy_entry->object.vm_object);
6465 vm_map_copy_entry_dispose(copy, copy_entry);
6466
6467 if ((copy_entry = vm_map_copy_first_entry(copy))
6468 == vm_map_copy_to_entry(copy) && amount_left) {
6469 /*
6470 * not finished copying but run out of source
6471 */
6472 return KERN_INVALID_ADDRESS;
6473 }
6474 src_offset = 0;
6475 }
6476
6477 if (amount_left == 0)
6478 return KERN_SUCCESS;
6479
6480 vm_map_lock_read(dst_map);
6481 if (version.main_timestamp == dst_map->timestamp) {
6482 if (start == entry_end) {
6483 /*
6484 * destination region is split. Use the version
6485 * information to avoid a lookup in the normal
6486 * case.
6487 */
6488 entry = entry->vme_next;
6489 /*
6490 * should be contiguous. Fail if we encounter
6491 * a hole in the destination.
6492 */
6493 if (start != entry->vme_start) {
6494 vm_map_unlock_read(dst_map);
6495 return KERN_INVALID_ADDRESS ;
6496 }
6497 }
6498 } else {
6499 /*
6500 * Map version check failed.
6501 * we must lookup the entry because somebody
6502 * might have changed the map behind our backs.
6503 */
6504 RetryLookup:
6505 if (!vm_map_lookup_entry(dst_map, start, &entry))
6506 {
6507 vm_map_unlock_read(dst_map);
6508 return KERN_INVALID_ADDRESS ;
6509 }
6510 }
6511 }/* while */
6512
6513 return KERN_SUCCESS;
6514 }/* vm_map_copy_overwrite_unaligned */
6515
6516 /*
6517 * Routine: vm_map_copy_overwrite_aligned [internal use only]
6518 *
6519 * Description:
6520 * Does all the vm_trickery possible for whole pages.
6521 *
6522 * Implementation:
6523 *
6524 * If there are no permanent objects in the destination,
6525 * and the source and destination map entry zones match,
6526 * and the destination map entry is not shared,
6527 * then the map entries can be deleted and replaced
6528 * with those from the copy. The following code is the
6529 * basic idea of what to do, but there are lots of annoying
6530 * little details about getting protection and inheritance
6531 * right. Should add protection, inheritance, and sharing checks
6532 * to the above pass and make sure that no wiring is involved.
6533 */
6534
6535 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
6536 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
6537 int vm_map_copy_overwrite_aligned_src_large = 0;
6538
6539 static kern_return_t
6540 vm_map_copy_overwrite_aligned(
6541 vm_map_t dst_map,
6542 vm_map_entry_t tmp_entry,
6543 vm_map_copy_t copy,
6544 vm_map_offset_t start,
6545 __unused pmap_t pmap)
6546 {
6547 vm_object_t object;
6548 vm_map_entry_t copy_entry;
6549 vm_map_size_t copy_size;
6550 vm_map_size_t size;
6551 vm_map_entry_t entry;
6552
6553 while ((copy_entry = vm_map_copy_first_entry(copy))
6554 != vm_map_copy_to_entry(copy))
6555 {
6556 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
6557
6558 entry = tmp_entry;
6559 assert(!entry->use_pmap); /* unnested when clipped earlier */
6560 if (entry == vm_map_to_entry(dst_map)) {
6561 vm_map_unlock(dst_map);
6562 return KERN_INVALID_ADDRESS;
6563 }
6564 size = (entry->vme_end - entry->vme_start);
6565 /*
6566 * Make sure that no holes popped up in the
6567 * address map, and that the protection is
6568 * still valid, in case the map was unlocked
6569 * earlier.
6570 */
6571
6572 if ((entry->vme_start != start) || ((entry->is_sub_map)
6573 && !entry->needs_copy)) {
6574 vm_map_unlock(dst_map);
6575 return(KERN_INVALID_ADDRESS);
6576 }
6577 assert(entry != vm_map_to_entry(dst_map));
6578
6579 /*
6580 * Check protection again
6581 */
6582
6583 if ( ! (entry->protection & VM_PROT_WRITE)) {
6584 vm_map_unlock(dst_map);
6585 return(KERN_PROTECTION_FAILURE);
6586 }
6587
6588 /*
6589 * Adjust to source size first
6590 */
6591
6592 if (copy_size < size) {
6593 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
6594 size = copy_size;
6595 }
6596
6597 /*
6598 * Adjust to destination size
6599 */
6600
6601 if (size < copy_size) {
6602 vm_map_copy_clip_end(copy, copy_entry,
6603 copy_entry->vme_start + size);
6604 copy_size = size;
6605 }
6606
6607 assert((entry->vme_end - entry->vme_start) == size);
6608 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
6609 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
6610
6611 /*
6612 * If the destination contains temporary unshared memory,
6613 * we can perform the copy by throwing it away and
6614 * installing the source data.
6615 */
6616
6617 object = entry->object.vm_object;
6618 if ((!entry->is_shared &&
6619 ((object == VM_OBJECT_NULL) ||
6620 (object->internal && !object->true_share))) ||
6621 entry->needs_copy) {
6622 vm_object_t old_object = entry->object.vm_object;
6623 vm_object_offset_t old_offset = entry->offset;
6624 vm_object_offset_t offset;
6625
6626 /*
6627 * Ensure that the source and destination aren't
6628 * identical
6629 */
6630 if (old_object == copy_entry->object.vm_object &&
6631 old_offset == copy_entry->offset) {
6632 vm_map_copy_entry_unlink(copy, copy_entry);
6633 vm_map_copy_entry_dispose(copy, copy_entry);
6634
6635 if (old_object != VM_OBJECT_NULL)
6636 vm_object_deallocate(old_object);
6637
6638 start = tmp_entry->vme_end;
6639 tmp_entry = tmp_entry->vme_next;
6640 continue;
6641 }
6642
6643 #if !CONFIG_EMBEDDED
6644 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
6645 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
6646 if (copy_entry->object.vm_object != VM_OBJECT_NULL &&
6647 copy_entry->object.vm_object->vo_size >= __TRADEOFF1_OBJ_SIZE &&
6648 copy_size <= __TRADEOFF1_COPY_SIZE) {
6649 /*
6650 * Virtual vs. Physical copy tradeoff #1.
6651 *
6652 * Copying only a few pages out of a large
6653 * object: do a physical copy instead of
6654 * a virtual copy, to avoid possibly keeping
6655 * the entire large object alive because of
6656 * those few copy-on-write pages.
6657 */
6658 vm_map_copy_overwrite_aligned_src_large++;
6659 goto slow_copy;
6660 }
6661 #endif /* !CONFIG_EMBEDDED */
6662
6663 if (entry->alias >= VM_MEMORY_MALLOC &&
6664 entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) {
6665 vm_object_t new_object, new_shadow;
6666
6667 /*
6668 * We're about to map something over a mapping
6669 * established by malloc()...
6670 */
6671 new_object = copy_entry->object.vm_object;
6672 if (new_object != VM_OBJECT_NULL) {
6673 vm_object_lock_shared(new_object);
6674 }
6675 while (new_object != VM_OBJECT_NULL &&
6676 #if !CONFIG_EMBEDDED
6677 !new_object->true_share &&
6678 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
6679 #endif /* !CONFIG_EMBEDDED */
6680 new_object->internal) {
6681 new_shadow = new_object->shadow;
6682 if (new_shadow == VM_OBJECT_NULL) {
6683 break;
6684 }
6685 vm_object_lock_shared(new_shadow);
6686 vm_object_unlock(new_object);
6687 new_object = new_shadow;
6688 }
6689 if (new_object != VM_OBJECT_NULL) {
6690 if (!new_object->internal) {
6691 /*
6692 * The new mapping is backed
6693 * by an external object. We
6694 * don't want malloc'ed memory
6695 * to be replaced with such a
6696 * non-anonymous mapping, so
6697 * let's go off the optimized
6698 * path...
6699 */
6700 vm_map_copy_overwrite_aligned_src_not_internal++;
6701 vm_object_unlock(new_object);
6702 goto slow_copy;
6703 }
6704 #if !CONFIG_EMBEDDED
6705 if (new_object->true_share ||
6706 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
6707 /*
6708 * Same if there's a "true_share"
6709 * object in the shadow chain, or
6710 * an object with a non-default
6711 * (SYMMETRIC) copy strategy.
6712 */
6713 vm_map_copy_overwrite_aligned_src_not_symmetric++;
6714 vm_object_unlock(new_object);
6715 goto slow_copy;
6716 }
6717 #endif /* !CONFIG_EMBEDDED */
6718 vm_object_unlock(new_object);
6719 }
6720 /*
6721 * The new mapping is still backed by
6722 * anonymous (internal) memory, so it's
6723 * OK to substitute it for the original
6724 * malloc() mapping.
6725 */
6726 }
6727
6728 if (old_object != VM_OBJECT_NULL) {
6729 if(entry->is_sub_map) {
6730 if(entry->use_pmap) {
6731 #ifndef NO_NESTED_PMAP
6732 pmap_unnest(dst_map->pmap,
6733 (addr64_t)entry->vme_start,
6734 entry->vme_end - entry->vme_start);
6735 #endif /* NO_NESTED_PMAP */
6736 if(dst_map->mapped) {
6737 /* clean up parent */
6738 /* map/maps */
6739 vm_map_submap_pmap_clean(
6740 dst_map, entry->vme_start,
6741 entry->vme_end,
6742 entry->object.sub_map,
6743 entry->offset);
6744 }
6745 } else {
6746 vm_map_submap_pmap_clean(
6747 dst_map, entry->vme_start,
6748 entry->vme_end,
6749 entry->object.sub_map,
6750 entry->offset);
6751 }
6752 vm_map_deallocate(
6753 entry->object.sub_map);
6754 } else {
6755 if(dst_map->mapped) {
6756 vm_object_pmap_protect(
6757 entry->object.vm_object,
6758 entry->offset,
6759 entry->vme_end
6760 - entry->vme_start,
6761 PMAP_NULL,
6762 entry->vme_start,
6763 VM_PROT_NONE);
6764 } else {
6765 pmap_remove(dst_map->pmap,
6766 (addr64_t)(entry->vme_start),
6767 (addr64_t)(entry->vme_end));
6768 }
6769 vm_object_deallocate(old_object);
6770 }
6771 }
6772
6773 entry->is_sub_map = FALSE;
6774 entry->object = copy_entry->object;
6775 object = entry->object.vm_object;
6776 entry->needs_copy = copy_entry->needs_copy;
6777 entry->wired_count = 0;
6778 entry->user_wired_count = 0;
6779 offset = entry->offset = copy_entry->offset;
6780
6781 vm_map_copy_entry_unlink(copy, copy_entry);
6782 vm_map_copy_entry_dispose(copy, copy_entry);
6783
6784 /*
6785 * we could try to push pages into the pmap at this point, BUT
6786 * this optimization only saved on average 2 us per page if ALL
6787 * the pages in the source were currently mapped
6788 * and ALL the pages in the dest were touched, if there were fewer
6789 * than 2/3 of the pages touched, this optimization actually cost more cycles
6790 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6791 */
6792
6793 /*
6794 * Set up for the next iteration. The map
6795 * has not been unlocked, so the next
6796 * address should be at the end of this
6797 * entry, and the next map entry should be
6798 * the one following it.
6799 */
6800
6801 start = tmp_entry->vme_end;
6802 tmp_entry = tmp_entry->vme_next;
6803 } else {
6804 vm_map_version_t version;
6805 vm_object_t dst_object;
6806 vm_object_offset_t dst_offset;
6807 kern_return_t r;
6808
6809 slow_copy:
6810 if (entry->needs_copy) {
6811 vm_object_shadow(&entry->object.vm_object,
6812 &entry->offset,
6813 (entry->vme_end -
6814 entry->vme_start));
6815 entry->needs_copy = FALSE;
6816 }
6817
6818 dst_object = entry->object.vm_object;
6819 dst_offset = entry->offset;
6820
6821 /*
6822 * Take an object reference, and record
6823 * the map version information so that the
6824 * map can be safely unlocked.
6825 */
6826
6827 if (dst_object == VM_OBJECT_NULL) {
6828 /*
6829 * We would usually have just taken the
6830 * optimized path above if the destination
6831 * object has not been allocated yet. But we
6832 * now disable that optimization if the copy
6833 * entry's object is not backed by anonymous
6834 * memory to avoid replacing malloc'ed
6835 * (i.e. re-usable) anonymous memory with a
6836 * not-so-anonymous mapping.
6837 * So we have to handle this case here and
6838 * allocate a new VM object for this map entry.
6839 */
6840 dst_object = vm_object_allocate(
6841 entry->vme_end - entry->vme_start);
6842 dst_offset = 0;
6843 entry->object.vm_object = dst_object;
6844 entry->offset = dst_offset;
6845
6846 }
6847
6848 vm_object_reference(dst_object);
6849
6850 /* account for unlock bumping up timestamp */
6851 version.main_timestamp = dst_map->timestamp + 1;
6852
6853 vm_map_unlock(dst_map);
6854
6855 /*
6856 * Copy as much as possible in one pass
6857 */
6858
6859 copy_size = size;
6860 r = vm_fault_copy(
6861 copy_entry->object.vm_object,
6862 copy_entry->offset,
6863 &copy_size,
6864 dst_object,
6865 dst_offset,
6866 dst_map,
6867 &version,
6868 THREAD_UNINT );
6869
6870 /*
6871 * Release the object reference
6872 */
6873
6874 vm_object_deallocate(dst_object);
6875
6876 /*
6877 * If a hard error occurred, return it now
6878 */
6879
6880 if (r != KERN_SUCCESS)
6881 return(r);
6882
6883 if (copy_size != 0) {
6884 /*
6885 * Dispose of the copied region
6886 */
6887
6888 vm_map_copy_clip_end(copy, copy_entry,
6889 copy_entry->vme_start + copy_size);
6890 vm_map_copy_entry_unlink(copy, copy_entry);
6891 vm_object_deallocate(copy_entry->object.vm_object);
6892 vm_map_copy_entry_dispose(copy, copy_entry);
6893 }
6894
6895 /*
6896 * Pick up in the destination map where we left off.
6897 *
6898 * Use the version information to avoid a lookup
6899 * in the normal case.
6900 */
6901
6902 start += copy_size;
6903 vm_map_lock(dst_map);
6904 if (version.main_timestamp == dst_map->timestamp &&
6905 copy_size != 0) {
6906 /* We can safely use saved tmp_entry value */
6907
6908 vm_map_clip_end(dst_map, tmp_entry, start);
6909 tmp_entry = tmp_entry->vme_next;
6910 } else {
6911 /* Must do lookup of tmp_entry */
6912
6913 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
6914 vm_map_unlock(dst_map);
6915 return(KERN_INVALID_ADDRESS);
6916 }
6917 vm_map_clip_start(dst_map, tmp_entry, start);
6918 }
6919 }
6920 }/* while */
6921
6922 return(KERN_SUCCESS);
6923 }/* vm_map_copy_overwrite_aligned */
6924
6925 /*
6926 * Routine: vm_map_copyin_kernel_buffer [internal use only]
6927 *
6928 * Description:
6929 * Copy in data to a kernel buffer from space in the
6930 * source map. The original space may be optionally
6931 * deallocated.
6932 *
6933 * If successful, returns a new copy object.
6934 */
6935 static kern_return_t
6936 vm_map_copyin_kernel_buffer(
6937 vm_map_t src_map,
6938 vm_map_offset_t src_addr,
6939 vm_map_size_t len,
6940 boolean_t src_destroy,
6941 vm_map_copy_t *copy_result)
6942 {
6943 kern_return_t kr;
6944 vm_map_copy_t copy;
6945 vm_size_t kalloc_size;
6946
6947 if ((vm_size_t) len != len) {
6948 /* "len" is too big and doesn't fit in a "vm_size_t" */
6949 return KERN_RESOURCE_SHORTAGE;
6950 }
6951 kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
6952 assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
6953
6954 copy = (vm_map_copy_t) kalloc(kalloc_size);
6955 if (copy == VM_MAP_COPY_NULL) {
6956 return KERN_RESOURCE_SHORTAGE;
6957 }
6958 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
6959 copy->size = len;
6960 copy->offset = 0;
6961 copy->cpy_kdata = (void *) (copy + 1);
6962 copy->cpy_kalloc_size = kalloc_size;
6963
6964 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
6965 if (kr != KERN_SUCCESS) {
6966 kfree(copy, kalloc_size);
6967 return kr;
6968 }
6969 if (src_destroy) {
6970 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
6971 vm_map_round_page(src_addr + len),
6972 VM_MAP_REMOVE_INTERRUPTIBLE |
6973 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
6974 (src_map == kernel_map) ?
6975 VM_MAP_REMOVE_KUNWIRE : 0);
6976 }
6977 *copy_result = copy;
6978 return KERN_SUCCESS;
6979 }
6980
6981 /*
6982 * Routine: vm_map_copyout_kernel_buffer [internal use only]
6983 *
6984 * Description:
6985 * Copy out data from a kernel buffer into space in the
6986 * destination map. The space may be otpionally dynamically
6987 * allocated.
6988 *
6989 * If successful, consumes the copy object.
6990 * Otherwise, the caller is responsible for it.
6991 */
6992 static int vm_map_copyout_kernel_buffer_failures = 0;
6993 static kern_return_t
6994 vm_map_copyout_kernel_buffer(
6995 vm_map_t map,
6996 vm_map_address_t *addr, /* IN/OUT */
6997 vm_map_copy_t copy,
6998 boolean_t overwrite)
6999 {
7000 kern_return_t kr = KERN_SUCCESS;
7001 thread_t thread = current_thread();
7002
7003 if (!overwrite) {
7004
7005 /*
7006 * Allocate space in the target map for the data
7007 */
7008 *addr = 0;
7009 kr = vm_map_enter(map,
7010 addr,
7011 vm_map_round_page(copy->size),
7012 (vm_map_offset_t) 0,
7013 VM_FLAGS_ANYWHERE,
7014 VM_OBJECT_NULL,
7015 (vm_object_offset_t) 0,
7016 FALSE,
7017 VM_PROT_DEFAULT,
7018 VM_PROT_ALL,
7019 VM_INHERIT_DEFAULT);
7020 if (kr != KERN_SUCCESS)
7021 return kr;
7022 }
7023
7024 /*
7025 * Copyout the data from the kernel buffer to the target map.
7026 */
7027 if (thread->map == map) {
7028
7029 /*
7030 * If the target map is the current map, just do
7031 * the copy.
7032 */
7033 assert((vm_size_t) copy->size == copy->size);
7034 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
7035 kr = KERN_INVALID_ADDRESS;
7036 }
7037 }
7038 else {
7039 vm_map_t oldmap;
7040
7041 /*
7042 * If the target map is another map, assume the
7043 * target's address space identity for the duration
7044 * of the copy.
7045 */
7046 vm_map_reference(map);
7047 oldmap = vm_map_switch(map);
7048
7049 assert((vm_size_t) copy->size == copy->size);
7050 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
7051 vm_map_copyout_kernel_buffer_failures++;
7052 kr = KERN_INVALID_ADDRESS;
7053 }
7054
7055 (void) vm_map_switch(oldmap);
7056 vm_map_deallocate(map);
7057 }
7058
7059 if (kr != KERN_SUCCESS) {
7060 /* the copy failed, clean up */
7061 if (!overwrite) {
7062 /*
7063 * Deallocate the space we allocated in the target map.
7064 */
7065 (void) vm_map_remove(map,
7066 vm_map_trunc_page(*addr),
7067 vm_map_round_page(*addr +
7068 vm_map_round_page(copy->size)),
7069 VM_MAP_NO_FLAGS);
7070 *addr = 0;
7071 }
7072 } else {
7073 /* copy was successful, dicard the copy structure */
7074 kfree(copy, copy->cpy_kalloc_size);
7075 }
7076
7077 return kr;
7078 }
7079
7080 /*
7081 * Macro: vm_map_copy_insert
7082 *
7083 * Description:
7084 * Link a copy chain ("copy") into a map at the
7085 * specified location (after "where").
7086 * Side effects:
7087 * The copy chain is destroyed.
7088 * Warning:
7089 * The arguments are evaluated multiple times.
7090 */
7091 #define vm_map_copy_insert(map, where, copy) \
7092 MACRO_BEGIN \
7093 vm_map_store_copy_insert(map, where, copy); \
7094 zfree(vm_map_copy_zone, copy); \
7095 MACRO_END
7096
7097 /*
7098 * Routine: vm_map_copyout
7099 *
7100 * Description:
7101 * Copy out a copy chain ("copy") into newly-allocated
7102 * space in the destination map.
7103 *
7104 * If successful, consumes the copy object.
7105 * Otherwise, the caller is responsible for it.
7106 */
7107 kern_return_t
7108 vm_map_copyout(
7109 vm_map_t dst_map,
7110 vm_map_address_t *dst_addr, /* OUT */
7111 vm_map_copy_t copy)
7112 {
7113 vm_map_size_t size;
7114 vm_map_size_t adjustment;
7115 vm_map_offset_t start;
7116 vm_object_offset_t vm_copy_start;
7117 vm_map_entry_t last;
7118 register
7119 vm_map_entry_t entry;
7120
7121 /*
7122 * Check for null copy object.
7123 */
7124
7125 if (copy == VM_MAP_COPY_NULL) {
7126 *dst_addr = 0;
7127 return(KERN_SUCCESS);
7128 }
7129
7130 /*
7131 * Check for special copy object, created
7132 * by vm_map_copyin_object.
7133 */
7134
7135 if (copy->type == VM_MAP_COPY_OBJECT) {
7136 vm_object_t object = copy->cpy_object;
7137 kern_return_t kr;
7138 vm_object_offset_t offset;
7139
7140 offset = vm_object_trunc_page(copy->offset);
7141 size = vm_map_round_page(copy->size +
7142 (vm_map_size_t)(copy->offset - offset));
7143 *dst_addr = 0;
7144 kr = vm_map_enter(dst_map, dst_addr, size,
7145 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
7146 object, offset, FALSE,
7147 VM_PROT_DEFAULT, VM_PROT_ALL,
7148 VM_INHERIT_DEFAULT);
7149 if (kr != KERN_SUCCESS)
7150 return(kr);
7151 /* Account for non-pagealigned copy object */
7152 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
7153 zfree(vm_map_copy_zone, copy);
7154 return(KERN_SUCCESS);
7155 }
7156
7157 /*
7158 * Check for special kernel buffer allocated
7159 * by new_ipc_kmsg_copyin.
7160 */
7161
7162 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
7163 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
7164 copy, FALSE));
7165 }
7166
7167 /*
7168 * Find space for the data
7169 */
7170
7171 vm_copy_start = vm_object_trunc_page(copy->offset);
7172 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
7173 - vm_copy_start;
7174
7175 StartAgain: ;
7176
7177 vm_map_lock(dst_map);
7178 if( dst_map->disable_vmentry_reuse == TRUE) {
7179 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
7180 last = entry;
7181 } else {
7182 assert(first_free_is_valid(dst_map));
7183 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
7184 vm_map_min(dst_map) : last->vme_end;
7185 }
7186
7187 while (TRUE) {
7188 vm_map_entry_t next = last->vme_next;
7189 vm_map_offset_t end = start + size;
7190
7191 if ((end > dst_map->max_offset) || (end < start)) {
7192 if (dst_map->wait_for_space) {
7193 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
7194 assert_wait((event_t) dst_map,
7195 THREAD_INTERRUPTIBLE);
7196 vm_map_unlock(dst_map);
7197 thread_block(THREAD_CONTINUE_NULL);
7198 goto StartAgain;
7199 }
7200 }
7201 vm_map_unlock(dst_map);
7202 return(KERN_NO_SPACE);
7203 }
7204
7205 if ((next == vm_map_to_entry(dst_map)) ||
7206 (next->vme_start >= end))
7207 break;
7208
7209 last = next;
7210 start = last->vme_end;
7211 }
7212
7213 /*
7214 * Since we're going to just drop the map
7215 * entries from the copy into the destination
7216 * map, they must come from the same pool.
7217 */
7218
7219 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
7220 /*
7221 * Mismatches occur when dealing with the default
7222 * pager.
7223 */
7224 zone_t old_zone;
7225 vm_map_entry_t next, new;
7226
7227 /*
7228 * Find the zone that the copies were allocated from
7229 */
7230 old_zone = (copy->cpy_hdr.entries_pageable)
7231 ? vm_map_entry_zone
7232 : vm_map_kentry_zone;
7233 entry = vm_map_copy_first_entry(copy);
7234
7235 /*
7236 * Reinitialize the copy so that vm_map_copy_entry_link
7237 * will work.
7238 */
7239 vm_map_store_copy_reset(copy, entry);
7240 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
7241
7242 /*
7243 * Copy each entry.
7244 */
7245 while (entry != vm_map_copy_to_entry(copy)) {
7246 new = vm_map_copy_entry_create(copy);
7247 vm_map_entry_copy_full(new, entry);
7248 new->use_pmap = FALSE; /* clr address space specifics */
7249 vm_map_copy_entry_link(copy,
7250 vm_map_copy_last_entry(copy),
7251 new);
7252 next = entry->vme_next;
7253 zfree(old_zone, entry);
7254 entry = next;
7255 }
7256 }
7257
7258 /*
7259 * Adjust the addresses in the copy chain, and
7260 * reset the region attributes.
7261 */
7262
7263 adjustment = start - vm_copy_start;
7264 for (entry = vm_map_copy_first_entry(copy);
7265 entry != vm_map_copy_to_entry(copy);
7266 entry = entry->vme_next) {
7267 entry->vme_start += adjustment;
7268 entry->vme_end += adjustment;
7269
7270 entry->inheritance = VM_INHERIT_DEFAULT;
7271 entry->protection = VM_PROT_DEFAULT;
7272 entry->max_protection = VM_PROT_ALL;
7273 entry->behavior = VM_BEHAVIOR_DEFAULT;
7274
7275 /*
7276 * If the entry is now wired,
7277 * map the pages into the destination map.
7278 */
7279 if (entry->wired_count != 0) {
7280 register vm_map_offset_t va;
7281 vm_object_offset_t offset;
7282 register vm_object_t object;
7283 vm_prot_t prot;
7284 int type_of_fault;
7285
7286 object = entry->object.vm_object;
7287 offset = entry->offset;
7288 va = entry->vme_start;
7289
7290 pmap_pageable(dst_map->pmap,
7291 entry->vme_start,
7292 entry->vme_end,
7293 TRUE);
7294
7295 while (va < entry->vme_end) {
7296 register vm_page_t m;
7297
7298 /*
7299 * Look up the page in the object.
7300 * Assert that the page will be found in the
7301 * top object:
7302 * either
7303 * the object was newly created by
7304 * vm_object_copy_slowly, and has
7305 * copies of all of the pages from
7306 * the source object
7307 * or
7308 * the object was moved from the old
7309 * map entry; because the old map
7310 * entry was wired, all of the pages
7311 * were in the top-level object.
7312 * (XXX not true if we wire pages for
7313 * reading)
7314 */
7315 vm_object_lock(object);
7316
7317 m = vm_page_lookup(object, offset);
7318 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
7319 m->absent)
7320 panic("vm_map_copyout: wiring %p", m);
7321
7322 /*
7323 * ENCRYPTED SWAP:
7324 * The page is assumed to be wired here, so it
7325 * shouldn't be encrypted. Otherwise, we
7326 * couldn't enter it in the page table, since
7327 * we don't want the user to see the encrypted
7328 * data.
7329 */
7330 ASSERT_PAGE_DECRYPTED(m);
7331
7332 prot = entry->protection;
7333
7334 if (override_nx(dst_map, entry->alias) && prot)
7335 prot |= VM_PROT_EXECUTE;
7336
7337 type_of_fault = DBG_CACHE_HIT_FAULT;
7338
7339 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
7340 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
7341 &type_of_fault);
7342
7343 vm_object_unlock(object);
7344
7345 offset += PAGE_SIZE_64;
7346 va += PAGE_SIZE;
7347 }
7348 }
7349 }
7350
7351 /*
7352 * Correct the page alignment for the result
7353 */
7354
7355 *dst_addr = start + (copy->offset - vm_copy_start);
7356
7357 /*
7358 * Update the hints and the map size
7359 */
7360
7361 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
7362
7363 dst_map->size += size;
7364
7365 /*
7366 * Link in the copy
7367 */
7368
7369 vm_map_copy_insert(dst_map, last, copy);
7370
7371 vm_map_unlock(dst_map);
7372
7373 /*
7374 * XXX If wiring_required, call vm_map_pageable
7375 */
7376
7377 return(KERN_SUCCESS);
7378 }
7379
7380 /*
7381 * Routine: vm_map_copyin
7382 *
7383 * Description:
7384 * see vm_map_copyin_common. Exported via Unsupported.exports.
7385 *
7386 */
7387
7388 #undef vm_map_copyin
7389
7390 kern_return_t
7391 vm_map_copyin(
7392 vm_map_t src_map,
7393 vm_map_address_t src_addr,
7394 vm_map_size_t len,
7395 boolean_t src_destroy,
7396 vm_map_copy_t *copy_result) /* OUT */
7397 {
7398 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
7399 FALSE, copy_result, FALSE));
7400 }
7401
7402 /*
7403 * Routine: vm_map_copyin_common
7404 *
7405 * Description:
7406 * Copy the specified region (src_addr, len) from the
7407 * source address space (src_map), possibly removing
7408 * the region from the source address space (src_destroy).
7409 *
7410 * Returns:
7411 * A vm_map_copy_t object (copy_result), suitable for
7412 * insertion into another address space (using vm_map_copyout),
7413 * copying over another address space region (using
7414 * vm_map_copy_overwrite). If the copy is unused, it
7415 * should be destroyed (using vm_map_copy_discard).
7416 *
7417 * In/out conditions:
7418 * The source map should not be locked on entry.
7419 */
7420
7421 typedef struct submap_map {
7422 vm_map_t parent_map;
7423 vm_map_offset_t base_start;
7424 vm_map_offset_t base_end;
7425 vm_map_size_t base_len;
7426 struct submap_map *next;
7427 } submap_map_t;
7428
7429 kern_return_t
7430 vm_map_copyin_common(
7431 vm_map_t src_map,
7432 vm_map_address_t src_addr,
7433 vm_map_size_t len,
7434 boolean_t src_destroy,
7435 __unused boolean_t src_volatile,
7436 vm_map_copy_t *copy_result, /* OUT */
7437 boolean_t use_maxprot)
7438 {
7439 vm_map_entry_t tmp_entry; /* Result of last map lookup --
7440 * in multi-level lookup, this
7441 * entry contains the actual
7442 * vm_object/offset.
7443 */
7444 register
7445 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
7446
7447 vm_map_offset_t src_start; /* Start of current entry --
7448 * where copy is taking place now
7449 */
7450 vm_map_offset_t src_end; /* End of entire region to be
7451 * copied */
7452 vm_map_offset_t src_base;
7453 vm_map_t base_map = src_map;
7454 boolean_t map_share=FALSE;
7455 submap_map_t *parent_maps = NULL;
7456
7457 register
7458 vm_map_copy_t copy; /* Resulting copy */
7459 vm_map_address_t copy_addr;
7460
7461 /*
7462 * Check for copies of zero bytes.
7463 */
7464
7465 if (len == 0) {
7466 *copy_result = VM_MAP_COPY_NULL;
7467 return(KERN_SUCCESS);
7468 }
7469
7470 /*
7471 * Check that the end address doesn't overflow
7472 */
7473 src_end = src_addr + len;
7474 if (src_end < src_addr)
7475 return KERN_INVALID_ADDRESS;
7476
7477 /*
7478 * If the copy is sufficiently small, use a kernel buffer instead
7479 * of making a virtual copy. The theory being that the cost of
7480 * setting up VM (and taking C-O-W faults) dominates the copy costs
7481 * for small regions.
7482 */
7483 if ((len < msg_ool_size_small) && !use_maxprot)
7484 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
7485 src_destroy, copy_result);
7486
7487 /*
7488 * Compute (page aligned) start and end of region
7489 */
7490 src_start = vm_map_trunc_page(src_addr);
7491 src_end = vm_map_round_page(src_end);
7492
7493 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
7494
7495 /*
7496 * Allocate a header element for the list.
7497 *
7498 * Use the start and end in the header to
7499 * remember the endpoints prior to rounding.
7500 */
7501
7502 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7503 vm_map_copy_first_entry(copy) =
7504 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
7505 copy->type = VM_MAP_COPY_ENTRY_LIST;
7506 copy->cpy_hdr.nentries = 0;
7507 copy->cpy_hdr.entries_pageable = TRUE;
7508
7509 vm_map_store_init( &(copy->cpy_hdr) );
7510
7511 copy->offset = src_addr;
7512 copy->size = len;
7513
7514 new_entry = vm_map_copy_entry_create(copy);
7515
7516 #define RETURN(x) \
7517 MACRO_BEGIN \
7518 vm_map_unlock(src_map); \
7519 if(src_map != base_map) \
7520 vm_map_deallocate(src_map); \
7521 if (new_entry != VM_MAP_ENTRY_NULL) \
7522 vm_map_copy_entry_dispose(copy,new_entry); \
7523 vm_map_copy_discard(copy); \
7524 { \
7525 submap_map_t *_ptr; \
7526 \
7527 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
7528 parent_maps=parent_maps->next; \
7529 if (_ptr->parent_map != base_map) \
7530 vm_map_deallocate(_ptr->parent_map); \
7531 kfree(_ptr, sizeof(submap_map_t)); \
7532 } \
7533 } \
7534 MACRO_RETURN(x); \
7535 MACRO_END
7536
7537 /*
7538 * Find the beginning of the region.
7539 */
7540
7541 vm_map_lock(src_map);
7542
7543 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
7544 RETURN(KERN_INVALID_ADDRESS);
7545 if(!tmp_entry->is_sub_map) {
7546 vm_map_clip_start(src_map, tmp_entry, src_start);
7547 }
7548 /* set for later submap fix-up */
7549 copy_addr = src_start;
7550
7551 /*
7552 * Go through entries until we get to the end.
7553 */
7554
7555 while (TRUE) {
7556 register
7557 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
7558 vm_map_size_t src_size; /* Size of source
7559 * map entry (in both
7560 * maps)
7561 */
7562
7563 register
7564 vm_object_t src_object; /* Object to copy */
7565 vm_object_offset_t src_offset;
7566
7567 boolean_t src_needs_copy; /* Should source map
7568 * be made read-only
7569 * for copy-on-write?
7570 */
7571
7572 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
7573
7574 boolean_t was_wired; /* Was source wired? */
7575 vm_map_version_t version; /* Version before locks
7576 * dropped to make copy
7577 */
7578 kern_return_t result; /* Return value from
7579 * copy_strategically.
7580 */
7581 while(tmp_entry->is_sub_map) {
7582 vm_map_size_t submap_len;
7583 submap_map_t *ptr;
7584
7585 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
7586 ptr->next = parent_maps;
7587 parent_maps = ptr;
7588 ptr->parent_map = src_map;
7589 ptr->base_start = src_start;
7590 ptr->base_end = src_end;
7591 submap_len = tmp_entry->vme_end - src_start;
7592 if(submap_len > (src_end-src_start))
7593 submap_len = src_end-src_start;
7594 ptr->base_len = submap_len;
7595
7596 src_start -= tmp_entry->vme_start;
7597 src_start += tmp_entry->offset;
7598 src_end = src_start + submap_len;
7599 src_map = tmp_entry->object.sub_map;
7600 vm_map_lock(src_map);
7601 /* keep an outstanding reference for all maps in */
7602 /* the parents tree except the base map */
7603 vm_map_reference(src_map);
7604 vm_map_unlock(ptr->parent_map);
7605 if (!vm_map_lookup_entry(
7606 src_map, src_start, &tmp_entry))
7607 RETURN(KERN_INVALID_ADDRESS);
7608 map_share = TRUE;
7609 if(!tmp_entry->is_sub_map)
7610 vm_map_clip_start(src_map, tmp_entry, src_start);
7611 src_entry = tmp_entry;
7612 }
7613 /* we are now in the lowest level submap... */
7614
7615 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
7616 (tmp_entry->object.vm_object->phys_contiguous)) {
7617 /* This is not, supported for now.In future */
7618 /* we will need to detect the phys_contig */
7619 /* condition and then upgrade copy_slowly */
7620 /* to do physical copy from the device mem */
7621 /* based object. We can piggy-back off of */
7622 /* the was wired boolean to set-up the */
7623 /* proper handling */
7624 RETURN(KERN_PROTECTION_FAILURE);
7625 }
7626 /*
7627 * Create a new address map entry to hold the result.
7628 * Fill in the fields from the appropriate source entries.
7629 * We must unlock the source map to do this if we need
7630 * to allocate a map entry.
7631 */
7632 if (new_entry == VM_MAP_ENTRY_NULL) {
7633 version.main_timestamp = src_map->timestamp;
7634 vm_map_unlock(src_map);
7635
7636 new_entry = vm_map_copy_entry_create(copy);
7637
7638 vm_map_lock(src_map);
7639 if ((version.main_timestamp + 1) != src_map->timestamp) {
7640 if (!vm_map_lookup_entry(src_map, src_start,
7641 &tmp_entry)) {
7642 RETURN(KERN_INVALID_ADDRESS);
7643 }
7644 if (!tmp_entry->is_sub_map)
7645 vm_map_clip_start(src_map, tmp_entry, src_start);
7646 continue; /* restart w/ new tmp_entry */
7647 }
7648 }
7649
7650 /*
7651 * Verify that the region can be read.
7652 */
7653 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
7654 !use_maxprot) ||
7655 (src_entry->max_protection & VM_PROT_READ) == 0)
7656 RETURN(KERN_PROTECTION_FAILURE);
7657
7658 /*
7659 * Clip against the endpoints of the entire region.
7660 */
7661
7662 vm_map_clip_end(src_map, src_entry, src_end);
7663
7664 src_size = src_entry->vme_end - src_start;
7665 src_object = src_entry->object.vm_object;
7666 src_offset = src_entry->offset;
7667 was_wired = (src_entry->wired_count != 0);
7668
7669 vm_map_entry_copy(new_entry, src_entry);
7670 new_entry->use_pmap = FALSE; /* clr address space specifics */
7671
7672 /*
7673 * Attempt non-blocking copy-on-write optimizations.
7674 */
7675
7676 if (src_destroy &&
7677 (src_object == VM_OBJECT_NULL ||
7678 (src_object->internal && !src_object->true_share
7679 && !map_share))) {
7680 /*
7681 * If we are destroying the source, and the object
7682 * is internal, we can move the object reference
7683 * from the source to the copy. The copy is
7684 * copy-on-write only if the source is.
7685 * We make another reference to the object, because
7686 * destroying the source entry will deallocate it.
7687 */
7688 vm_object_reference(src_object);
7689
7690 /*
7691 * Copy is always unwired. vm_map_copy_entry
7692 * set its wired count to zero.
7693 */
7694
7695 goto CopySuccessful;
7696 }
7697
7698
7699 RestartCopy:
7700 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
7701 src_object, new_entry, new_entry->object.vm_object,
7702 was_wired, 0);
7703 if ((src_object == VM_OBJECT_NULL ||
7704 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
7705 vm_object_copy_quickly(
7706 &new_entry->object.vm_object,
7707 src_offset,
7708 src_size,
7709 &src_needs_copy,
7710 &new_entry_needs_copy)) {
7711
7712 new_entry->needs_copy = new_entry_needs_copy;
7713
7714 /*
7715 * Handle copy-on-write obligations
7716 */
7717
7718 if (src_needs_copy && !tmp_entry->needs_copy) {
7719 vm_prot_t prot;
7720
7721 prot = src_entry->protection & ~VM_PROT_WRITE;
7722
7723 if (override_nx(src_map, src_entry->alias) && prot)
7724 prot |= VM_PROT_EXECUTE;
7725
7726 vm_object_pmap_protect(
7727 src_object,
7728 src_offset,
7729 src_size,
7730 (src_entry->is_shared ?
7731 PMAP_NULL
7732 : src_map->pmap),
7733 src_entry->vme_start,
7734 prot);
7735
7736 tmp_entry->needs_copy = TRUE;
7737 }
7738
7739 /*
7740 * The map has never been unlocked, so it's safe
7741 * to move to the next entry rather than doing
7742 * another lookup.
7743 */
7744
7745 goto CopySuccessful;
7746 }
7747
7748 /*
7749 * Take an object reference, so that we may
7750 * release the map lock(s).
7751 */
7752
7753 assert(src_object != VM_OBJECT_NULL);
7754 vm_object_reference(src_object);
7755
7756 /*
7757 * Record the timestamp for later verification.
7758 * Unlock the map.
7759 */
7760
7761 version.main_timestamp = src_map->timestamp;
7762 vm_map_unlock(src_map); /* Increments timestamp once! */
7763
7764 /*
7765 * Perform the copy
7766 */
7767
7768 if (was_wired) {
7769 CopySlowly:
7770 vm_object_lock(src_object);
7771 result = vm_object_copy_slowly(
7772 src_object,
7773 src_offset,
7774 src_size,
7775 THREAD_UNINT,
7776 &new_entry->object.vm_object);
7777 new_entry->offset = 0;
7778 new_entry->needs_copy = FALSE;
7779
7780 }
7781 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7782 (tmp_entry->is_shared || map_share)) {
7783 vm_object_t new_object;
7784
7785 vm_object_lock_shared(src_object);
7786 new_object = vm_object_copy_delayed(
7787 src_object,
7788 src_offset,
7789 src_size,
7790 TRUE);
7791 if (new_object == VM_OBJECT_NULL)
7792 goto CopySlowly;
7793
7794 new_entry->object.vm_object = new_object;
7795 new_entry->needs_copy = TRUE;
7796 result = KERN_SUCCESS;
7797
7798 } else {
7799 result = vm_object_copy_strategically(src_object,
7800 src_offset,
7801 src_size,
7802 &new_entry->object.vm_object,
7803 &new_entry->offset,
7804 &new_entry_needs_copy);
7805
7806 new_entry->needs_copy = new_entry_needs_copy;
7807 }
7808
7809 if (result != KERN_SUCCESS &&
7810 result != KERN_MEMORY_RESTART_COPY) {
7811 vm_map_lock(src_map);
7812 RETURN(result);
7813 }
7814
7815 /*
7816 * Throw away the extra reference
7817 */
7818
7819 vm_object_deallocate(src_object);
7820
7821 /*
7822 * Verify that the map has not substantially
7823 * changed while the copy was being made.
7824 */
7825
7826 vm_map_lock(src_map);
7827
7828 if ((version.main_timestamp + 1) == src_map->timestamp)
7829 goto VerificationSuccessful;
7830
7831 /*
7832 * Simple version comparison failed.
7833 *
7834 * Retry the lookup and verify that the
7835 * same object/offset are still present.
7836 *
7837 * [Note: a memory manager that colludes with
7838 * the calling task can detect that we have
7839 * cheated. While the map was unlocked, the
7840 * mapping could have been changed and restored.]
7841 */
7842
7843 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7844 RETURN(KERN_INVALID_ADDRESS);
7845 }
7846
7847 src_entry = tmp_entry;
7848 vm_map_clip_start(src_map, src_entry, src_start);
7849
7850 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
7851 !use_maxprot) ||
7852 ((src_entry->max_protection & VM_PROT_READ) == 0))
7853 goto VerificationFailed;
7854
7855 if (src_entry->vme_end < new_entry->vme_end)
7856 src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
7857
7858 if ((src_entry->object.vm_object != src_object) ||
7859 (src_entry->offset != src_offset) ) {
7860
7861 /*
7862 * Verification failed.
7863 *
7864 * Start over with this top-level entry.
7865 */
7866
7867 VerificationFailed: ;
7868
7869 vm_object_deallocate(new_entry->object.vm_object);
7870 tmp_entry = src_entry;
7871 continue;
7872 }
7873
7874 /*
7875 * Verification succeeded.
7876 */
7877
7878 VerificationSuccessful: ;
7879
7880 if (result == KERN_MEMORY_RESTART_COPY)
7881 goto RestartCopy;
7882
7883 /*
7884 * Copy succeeded.
7885 */
7886
7887 CopySuccessful: ;
7888
7889 /*
7890 * Link in the new copy entry.
7891 */
7892
7893 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
7894 new_entry);
7895
7896 /*
7897 * Determine whether the entire region
7898 * has been copied.
7899 */
7900 src_base = src_start;
7901 src_start = new_entry->vme_end;
7902 new_entry = VM_MAP_ENTRY_NULL;
7903 while ((src_start >= src_end) && (src_end != 0)) {
7904 if (src_map != base_map) {
7905 submap_map_t *ptr;
7906
7907 ptr = parent_maps;
7908 assert(ptr != NULL);
7909 parent_maps = parent_maps->next;
7910
7911 /* fix up the damage we did in that submap */
7912 vm_map_simplify_range(src_map,
7913 src_base,
7914 src_end);
7915
7916 vm_map_unlock(src_map);
7917 vm_map_deallocate(src_map);
7918 vm_map_lock(ptr->parent_map);
7919 src_map = ptr->parent_map;
7920 src_base = ptr->base_start;
7921 src_start = ptr->base_start + ptr->base_len;
7922 src_end = ptr->base_end;
7923 if ((src_end > src_start) &&
7924 !vm_map_lookup_entry(
7925 src_map, src_start, &tmp_entry))
7926 RETURN(KERN_INVALID_ADDRESS);
7927 kfree(ptr, sizeof(submap_map_t));
7928 if(parent_maps == NULL)
7929 map_share = FALSE;
7930 src_entry = tmp_entry->vme_prev;
7931 } else
7932 break;
7933 }
7934 if ((src_start >= src_end) && (src_end != 0))
7935 break;
7936
7937 /*
7938 * Verify that there are no gaps in the region
7939 */
7940
7941 tmp_entry = src_entry->vme_next;
7942 if ((tmp_entry->vme_start != src_start) ||
7943 (tmp_entry == vm_map_to_entry(src_map)))
7944 RETURN(KERN_INVALID_ADDRESS);
7945 }
7946
7947 /*
7948 * If the source should be destroyed, do it now, since the
7949 * copy was successful.
7950 */
7951 if (src_destroy) {
7952 (void) vm_map_delete(src_map,
7953 vm_map_trunc_page(src_addr),
7954 src_end,
7955 (src_map == kernel_map) ?
7956 VM_MAP_REMOVE_KUNWIRE :
7957 VM_MAP_NO_FLAGS,
7958 VM_MAP_NULL);
7959 } else {
7960 /* fix up the damage we did in the base map */
7961 vm_map_simplify_range(src_map,
7962 vm_map_trunc_page(src_addr),
7963 vm_map_round_page(src_end));
7964 }
7965
7966 vm_map_unlock(src_map);
7967
7968 /* Fix-up start and end points in copy. This is necessary */
7969 /* when the various entries in the copy object were picked */
7970 /* up from different sub-maps */
7971
7972 tmp_entry = vm_map_copy_first_entry(copy);
7973 while (tmp_entry != vm_map_copy_to_entry(copy)) {
7974 tmp_entry->vme_end = copy_addr +
7975 (tmp_entry->vme_end - tmp_entry->vme_start);
7976 tmp_entry->vme_start = copy_addr;
7977 assert(tmp_entry->vme_start < tmp_entry->vme_end);
7978 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
7979 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
7980 }
7981
7982 *copy_result = copy;
7983 return(KERN_SUCCESS);
7984
7985 #undef RETURN
7986 }
7987
7988 /*
7989 * vm_map_copyin_object:
7990 *
7991 * Create a copy object from an object.
7992 * Our caller donates an object reference.
7993 */
7994
7995 kern_return_t
7996 vm_map_copyin_object(
7997 vm_object_t object,
7998 vm_object_offset_t offset, /* offset of region in object */
7999 vm_object_size_t size, /* size of region in object */
8000 vm_map_copy_t *copy_result) /* OUT */
8001 {
8002 vm_map_copy_t copy; /* Resulting copy */
8003
8004 /*
8005 * We drop the object into a special copy object
8006 * that contains the object directly.
8007 */
8008
8009 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8010 copy->type = VM_MAP_COPY_OBJECT;
8011 copy->cpy_object = object;
8012 copy->offset = offset;
8013 copy->size = size;
8014
8015 *copy_result = copy;
8016 return(KERN_SUCCESS);
8017 }
8018
8019 static void
8020 vm_map_fork_share(
8021 vm_map_t old_map,
8022 vm_map_entry_t old_entry,
8023 vm_map_t new_map)
8024 {
8025 vm_object_t object;
8026 vm_map_entry_t new_entry;
8027
8028 /*
8029 * New sharing code. New map entry
8030 * references original object. Internal
8031 * objects use asynchronous copy algorithm for
8032 * future copies. First make sure we have
8033 * the right object. If we need a shadow,
8034 * or someone else already has one, then
8035 * make a new shadow and share it.
8036 */
8037
8038 object = old_entry->object.vm_object;
8039 if (old_entry->is_sub_map) {
8040 assert(old_entry->wired_count == 0);
8041 #ifndef NO_NESTED_PMAP
8042 if(old_entry->use_pmap) {
8043 kern_return_t result;
8044
8045 result = pmap_nest(new_map->pmap,
8046 (old_entry->object.sub_map)->pmap,
8047 (addr64_t)old_entry->vme_start,
8048 (addr64_t)old_entry->vme_start,
8049 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
8050 if(result)
8051 panic("vm_map_fork_share: pmap_nest failed!");
8052 }
8053 #endif /* NO_NESTED_PMAP */
8054 } else if (object == VM_OBJECT_NULL) {
8055 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
8056 old_entry->vme_start));
8057 old_entry->offset = 0;
8058 old_entry->object.vm_object = object;
8059 assert(!old_entry->needs_copy);
8060 } else if (object->copy_strategy !=
8061 MEMORY_OBJECT_COPY_SYMMETRIC) {
8062
8063 /*
8064 * We are already using an asymmetric
8065 * copy, and therefore we already have
8066 * the right object.
8067 */
8068
8069 assert(! old_entry->needs_copy);
8070 }
8071 else if (old_entry->needs_copy || /* case 1 */
8072 object->shadowed || /* case 2 */
8073 (!object->true_share && /* case 3 */
8074 !old_entry->is_shared &&
8075 (object->vo_size >
8076 (vm_map_size_t)(old_entry->vme_end -
8077 old_entry->vme_start)))) {
8078
8079 /*
8080 * We need to create a shadow.
8081 * There are three cases here.
8082 * In the first case, we need to
8083 * complete a deferred symmetrical
8084 * copy that we participated in.
8085 * In the second and third cases,
8086 * we need to create the shadow so
8087 * that changes that we make to the
8088 * object do not interfere with
8089 * any symmetrical copies which
8090 * have occured (case 2) or which
8091 * might occur (case 3).
8092 *
8093 * The first case is when we had
8094 * deferred shadow object creation
8095 * via the entry->needs_copy mechanism.
8096 * This mechanism only works when
8097 * only one entry points to the source
8098 * object, and we are about to create
8099 * a second entry pointing to the
8100 * same object. The problem is that
8101 * there is no way of mapping from
8102 * an object to the entries pointing
8103 * to it. (Deferred shadow creation
8104 * works with one entry because occurs
8105 * at fault time, and we walk from the
8106 * entry to the object when handling
8107 * the fault.)
8108 *
8109 * The second case is when the object
8110 * to be shared has already been copied
8111 * with a symmetric copy, but we point
8112 * directly to the object without
8113 * needs_copy set in our entry. (This
8114 * can happen because different ranges
8115 * of an object can be pointed to by
8116 * different entries. In particular,
8117 * a single entry pointing to an object
8118 * can be split by a call to vm_inherit,
8119 * which, combined with task_create, can
8120 * result in the different entries
8121 * having different needs_copy values.)
8122 * The shadowed flag in the object allows
8123 * us to detect this case. The problem
8124 * with this case is that if this object
8125 * has or will have shadows, then we
8126 * must not perform an asymmetric copy
8127 * of this object, since such a copy
8128 * allows the object to be changed, which
8129 * will break the previous symmetrical
8130 * copies (which rely upon the object
8131 * not changing). In a sense, the shadowed
8132 * flag says "don't change this object".
8133 * We fix this by creating a shadow
8134 * object for this object, and sharing
8135 * that. This works because we are free
8136 * to change the shadow object (and thus
8137 * to use an asymmetric copy strategy);
8138 * this is also semantically correct,
8139 * since this object is temporary, and
8140 * therefore a copy of the object is
8141 * as good as the object itself. (This
8142 * is not true for permanent objects,
8143 * since the pager needs to see changes,
8144 * which won't happen if the changes
8145 * are made to a copy.)
8146 *
8147 * The third case is when the object
8148 * to be shared has parts sticking
8149 * outside of the entry we're working
8150 * with, and thus may in the future
8151 * be subject to a symmetrical copy.
8152 * (This is a preemptive version of
8153 * case 2.)
8154 */
8155 vm_object_shadow(&old_entry->object.vm_object,
8156 &old_entry->offset,
8157 (vm_map_size_t) (old_entry->vme_end -
8158 old_entry->vme_start));
8159
8160 /*
8161 * If we're making a shadow for other than
8162 * copy on write reasons, then we have
8163 * to remove write permission.
8164 */
8165
8166 if (!old_entry->needs_copy &&
8167 (old_entry->protection & VM_PROT_WRITE)) {
8168 vm_prot_t prot;
8169
8170 prot = old_entry->protection & ~VM_PROT_WRITE;
8171
8172 if (override_nx(old_map, old_entry->alias) && prot)
8173 prot |= VM_PROT_EXECUTE;
8174
8175 if (old_map->mapped) {
8176 vm_object_pmap_protect(
8177 old_entry->object.vm_object,
8178 old_entry->offset,
8179 (old_entry->vme_end -
8180 old_entry->vme_start),
8181 PMAP_NULL,
8182 old_entry->vme_start,
8183 prot);
8184 } else {
8185 pmap_protect(old_map->pmap,
8186 old_entry->vme_start,
8187 old_entry->vme_end,
8188 prot);
8189 }
8190 }
8191
8192 old_entry->needs_copy = FALSE;
8193 object = old_entry->object.vm_object;
8194 }
8195
8196
8197 /*
8198 * If object was using a symmetric copy strategy,
8199 * change its copy strategy to the default
8200 * asymmetric copy strategy, which is copy_delay
8201 * in the non-norma case and copy_call in the
8202 * norma case. Bump the reference count for the
8203 * new entry.
8204 */
8205
8206 if(old_entry->is_sub_map) {
8207 vm_map_lock(old_entry->object.sub_map);
8208 vm_map_reference(old_entry->object.sub_map);
8209 vm_map_unlock(old_entry->object.sub_map);
8210 } else {
8211 vm_object_lock(object);
8212 vm_object_reference_locked(object);
8213 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
8214 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8215 }
8216 vm_object_unlock(object);
8217 }
8218
8219 /*
8220 * Clone the entry, using object ref from above.
8221 * Mark both entries as shared.
8222 */
8223
8224 new_entry = vm_map_entry_create(new_map);
8225 vm_map_entry_copy(new_entry, old_entry);
8226 old_entry->is_shared = TRUE;
8227 new_entry->is_shared = TRUE;
8228
8229 /*
8230 * Insert the entry into the new map -- we
8231 * know we're inserting at the end of the new
8232 * map.
8233 */
8234
8235 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
8236
8237 /*
8238 * Update the physical map
8239 */
8240
8241 if (old_entry->is_sub_map) {
8242 /* Bill Angell pmap support goes here */
8243 } else {
8244 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
8245 old_entry->vme_end - old_entry->vme_start,
8246 old_entry->vme_start);
8247 }
8248 }
8249
8250 static boolean_t
8251 vm_map_fork_copy(
8252 vm_map_t old_map,
8253 vm_map_entry_t *old_entry_p,
8254 vm_map_t new_map)
8255 {
8256 vm_map_entry_t old_entry = *old_entry_p;
8257 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
8258 vm_map_offset_t start = old_entry->vme_start;
8259 vm_map_copy_t copy;
8260 vm_map_entry_t last = vm_map_last_entry(new_map);
8261
8262 vm_map_unlock(old_map);
8263 /*
8264 * Use maxprot version of copyin because we
8265 * care about whether this memory can ever
8266 * be accessed, not just whether it's accessible
8267 * right now.
8268 */
8269 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
8270 != KERN_SUCCESS) {
8271 /*
8272 * The map might have changed while it
8273 * was unlocked, check it again. Skip
8274 * any blank space or permanently
8275 * unreadable region.
8276 */
8277 vm_map_lock(old_map);
8278 if (!vm_map_lookup_entry(old_map, start, &last) ||
8279 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
8280 last = last->vme_next;
8281 }
8282 *old_entry_p = last;
8283
8284 /*
8285 * XXX For some error returns, want to
8286 * XXX skip to the next element. Note
8287 * that INVALID_ADDRESS and
8288 * PROTECTION_FAILURE are handled above.
8289 */
8290
8291 return FALSE;
8292 }
8293
8294 /*
8295 * Insert the copy into the new map
8296 */
8297
8298 vm_map_copy_insert(new_map, last, copy);
8299
8300 /*
8301 * Pick up the traversal at the end of
8302 * the copied region.
8303 */
8304
8305 vm_map_lock(old_map);
8306 start += entry_size;
8307 if (! vm_map_lookup_entry(old_map, start, &last)) {
8308 last = last->vme_next;
8309 } else {
8310 if (last->vme_start == start) {
8311 /*
8312 * No need to clip here and we don't
8313 * want to cause any unnecessary
8314 * unnesting...
8315 */
8316 } else {
8317 vm_map_clip_start(old_map, last, start);
8318 }
8319 }
8320 *old_entry_p = last;
8321
8322 return TRUE;
8323 }
8324
8325 /*
8326 * vm_map_fork:
8327 *
8328 * Create and return a new map based on the old
8329 * map, according to the inheritance values on the
8330 * regions in that map.
8331 *
8332 * The source map must not be locked.
8333 */
8334 vm_map_t
8335 vm_map_fork(
8336 vm_map_t old_map)
8337 {
8338 pmap_t new_pmap;
8339 vm_map_t new_map;
8340 vm_map_entry_t old_entry;
8341 vm_map_size_t new_size = 0, entry_size;
8342 vm_map_entry_t new_entry;
8343 boolean_t src_needs_copy;
8344 boolean_t new_entry_needs_copy;
8345
8346 new_pmap = pmap_create((vm_map_size_t) 0,
8347 #if defined(__i386__) || defined(__x86_64__)
8348 old_map->pmap->pm_task_map != TASK_MAP_32BIT
8349 #else
8350 0
8351 #endif
8352 );
8353 #if defined(__i386__)
8354 if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
8355 pmap_set_4GB_pagezero(new_pmap);
8356 #endif
8357
8358 vm_map_reference_swap(old_map);
8359 vm_map_lock(old_map);
8360
8361 new_map = vm_map_create(new_pmap,
8362 old_map->min_offset,
8363 old_map->max_offset,
8364 old_map->hdr.entries_pageable);
8365 for (
8366 old_entry = vm_map_first_entry(old_map);
8367 old_entry != vm_map_to_entry(old_map);
8368 ) {
8369
8370 entry_size = old_entry->vme_end - old_entry->vme_start;
8371
8372 switch (old_entry->inheritance) {
8373 case VM_INHERIT_NONE:
8374 break;
8375
8376 case VM_INHERIT_SHARE:
8377 vm_map_fork_share(old_map, old_entry, new_map);
8378 new_size += entry_size;
8379 break;
8380
8381 case VM_INHERIT_COPY:
8382
8383 /*
8384 * Inline the copy_quickly case;
8385 * upon failure, fall back on call
8386 * to vm_map_fork_copy.
8387 */
8388
8389 if(old_entry->is_sub_map)
8390 break;
8391 if ((old_entry->wired_count != 0) ||
8392 ((old_entry->object.vm_object != NULL) &&
8393 (old_entry->object.vm_object->true_share))) {
8394 goto slow_vm_map_fork_copy;
8395 }
8396
8397 new_entry = vm_map_entry_create(new_map);
8398 vm_map_entry_copy(new_entry, old_entry);
8399 /* clear address space specifics */
8400 new_entry->use_pmap = FALSE;
8401
8402 if (! vm_object_copy_quickly(
8403 &new_entry->object.vm_object,
8404 old_entry->offset,
8405 (old_entry->vme_end -
8406 old_entry->vme_start),
8407 &src_needs_copy,
8408 &new_entry_needs_copy)) {
8409 vm_map_entry_dispose(new_map, new_entry);
8410 goto slow_vm_map_fork_copy;
8411 }
8412
8413 /*
8414 * Handle copy-on-write obligations
8415 */
8416
8417 if (src_needs_copy && !old_entry->needs_copy) {
8418 vm_prot_t prot;
8419
8420 prot = old_entry->protection & ~VM_PROT_WRITE;
8421
8422 if (override_nx(old_map, old_entry->alias) && prot)
8423 prot |= VM_PROT_EXECUTE;
8424
8425 vm_object_pmap_protect(
8426 old_entry->object.vm_object,
8427 old_entry->offset,
8428 (old_entry->vme_end -
8429 old_entry->vme_start),
8430 ((old_entry->is_shared
8431 || old_map->mapped)
8432 ? PMAP_NULL :
8433 old_map->pmap),
8434 old_entry->vme_start,
8435 prot);
8436
8437 old_entry->needs_copy = TRUE;
8438 }
8439 new_entry->needs_copy = new_entry_needs_copy;
8440
8441 /*
8442 * Insert the entry at the end
8443 * of the map.
8444 */
8445
8446 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
8447 new_entry);
8448 new_size += entry_size;
8449 break;
8450
8451 slow_vm_map_fork_copy:
8452 if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
8453 new_size += entry_size;
8454 }
8455 continue;
8456 }
8457 old_entry = old_entry->vme_next;
8458 }
8459
8460 new_map->size = new_size;
8461 vm_map_unlock(old_map);
8462 vm_map_deallocate(old_map);
8463
8464 return(new_map);
8465 }
8466
8467 /*
8468 * vm_map_exec:
8469 *
8470 * Setup the "new_map" with the proper execution environment according
8471 * to the type of executable (platform, 64bit, chroot environment).
8472 * Map the comm page and shared region, etc...
8473 */
8474 kern_return_t
8475 vm_map_exec(
8476 vm_map_t new_map,
8477 task_t task,
8478 void *fsroot,
8479 cpu_type_t cpu)
8480 {
8481 SHARED_REGION_TRACE_DEBUG(
8482 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
8483 current_task(), new_map, task, fsroot, cpu));
8484 (void) vm_commpage_enter(new_map, task);
8485 (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
8486 SHARED_REGION_TRACE_DEBUG(
8487 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
8488 current_task(), new_map, task, fsroot, cpu));
8489 return KERN_SUCCESS;
8490 }
8491
8492 /*
8493 * vm_map_lookup_locked:
8494 *
8495 * Finds the VM object, offset, and
8496 * protection for a given virtual address in the
8497 * specified map, assuming a page fault of the
8498 * type specified.
8499 *
8500 * Returns the (object, offset, protection) for
8501 * this address, whether it is wired down, and whether
8502 * this map has the only reference to the data in question.
8503 * In order to later verify this lookup, a "version"
8504 * is returned.
8505 *
8506 * The map MUST be locked by the caller and WILL be
8507 * locked on exit. In order to guarantee the
8508 * existence of the returned object, it is returned
8509 * locked.
8510 *
8511 * If a lookup is requested with "write protection"
8512 * specified, the map may be changed to perform virtual
8513 * copying operations, although the data referenced will
8514 * remain the same.
8515 */
8516 kern_return_t
8517 vm_map_lookup_locked(
8518 vm_map_t *var_map, /* IN/OUT */
8519 vm_map_offset_t vaddr,
8520 vm_prot_t fault_type,
8521 int object_lock_type,
8522 vm_map_version_t *out_version, /* OUT */
8523 vm_object_t *object, /* OUT */
8524 vm_object_offset_t *offset, /* OUT */
8525 vm_prot_t *out_prot, /* OUT */
8526 boolean_t *wired, /* OUT */
8527 vm_object_fault_info_t fault_info, /* OUT */
8528 vm_map_t *real_map)
8529 {
8530 vm_map_entry_t entry;
8531 register vm_map_t map = *var_map;
8532 vm_map_t old_map = *var_map;
8533 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
8534 vm_map_offset_t cow_parent_vaddr = 0;
8535 vm_map_offset_t old_start = 0;
8536 vm_map_offset_t old_end = 0;
8537 register vm_prot_t prot;
8538 boolean_t mask_protections;
8539 vm_prot_t original_fault_type;
8540
8541 /*
8542 * VM_PROT_MASK means that the caller wants us to use "fault_type"
8543 * as a mask against the mapping's actual protections, not as an
8544 * absolute value.
8545 */
8546 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
8547 fault_type &= ~VM_PROT_IS_MASK;
8548 original_fault_type = fault_type;
8549
8550 *real_map = map;
8551
8552 RetryLookup:
8553 fault_type = original_fault_type;
8554
8555 /*
8556 * If the map has an interesting hint, try it before calling
8557 * full blown lookup routine.
8558 */
8559 entry = map->hint;
8560
8561 if ((entry == vm_map_to_entry(map)) ||
8562 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
8563 vm_map_entry_t tmp_entry;
8564
8565 /*
8566 * Entry was either not a valid hint, or the vaddr
8567 * was not contained in the entry, so do a full lookup.
8568 */
8569 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
8570 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
8571 vm_map_unlock(cow_sub_map_parent);
8572 if((*real_map != map)
8573 && (*real_map != cow_sub_map_parent))
8574 vm_map_unlock(*real_map);
8575 return KERN_INVALID_ADDRESS;
8576 }
8577
8578 entry = tmp_entry;
8579 }
8580 if(map == old_map) {
8581 old_start = entry->vme_start;
8582 old_end = entry->vme_end;
8583 }
8584
8585 /*
8586 * Handle submaps. Drop lock on upper map, submap is
8587 * returned locked.
8588 */
8589
8590 submap_recurse:
8591 if (entry->is_sub_map) {
8592 vm_map_offset_t local_vaddr;
8593 vm_map_offset_t end_delta;
8594 vm_map_offset_t start_delta;
8595 vm_map_entry_t submap_entry;
8596 boolean_t mapped_needs_copy=FALSE;
8597
8598 local_vaddr = vaddr;
8599
8600 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
8601 /* if real_map equals map we unlock below */
8602 if ((*real_map != map) &&
8603 (*real_map != cow_sub_map_parent))
8604 vm_map_unlock(*real_map);
8605 *real_map = entry->object.sub_map;
8606 }
8607
8608 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
8609 if (!mapped_needs_copy) {
8610 if (vm_map_lock_read_to_write(map)) {
8611 vm_map_lock_read(map);
8612 /* XXX FBDP: entry still valid ? */
8613 if(*real_map == entry->object.sub_map)
8614 *real_map = map;
8615 goto RetryLookup;
8616 }
8617 vm_map_lock_read(entry->object.sub_map);
8618 cow_sub_map_parent = map;
8619 /* reset base to map before cow object */
8620 /* this is the map which will accept */
8621 /* the new cow object */
8622 old_start = entry->vme_start;
8623 old_end = entry->vme_end;
8624 cow_parent_vaddr = vaddr;
8625 mapped_needs_copy = TRUE;
8626 } else {
8627 vm_map_lock_read(entry->object.sub_map);
8628 if((cow_sub_map_parent != map) &&
8629 (*real_map != map))
8630 vm_map_unlock(map);
8631 }
8632 } else {
8633 vm_map_lock_read(entry->object.sub_map);
8634 /* leave map locked if it is a target */
8635 /* cow sub_map above otherwise, just */
8636 /* follow the maps down to the object */
8637 /* here we unlock knowing we are not */
8638 /* revisiting the map. */
8639 if((*real_map != map) && (map != cow_sub_map_parent))
8640 vm_map_unlock_read(map);
8641 }
8642
8643 /* XXX FBDP: map has been unlocked, what protects "entry" !? */
8644 *var_map = map = entry->object.sub_map;
8645
8646 /* calculate the offset in the submap for vaddr */
8647 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
8648
8649 RetrySubMap:
8650 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
8651 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
8652 vm_map_unlock(cow_sub_map_parent);
8653 }
8654 if((*real_map != map)
8655 && (*real_map != cow_sub_map_parent)) {
8656 vm_map_unlock(*real_map);
8657 }
8658 *real_map = map;
8659 return KERN_INVALID_ADDRESS;
8660 }
8661
8662 /* find the attenuated shadow of the underlying object */
8663 /* on our target map */
8664
8665 /* in english the submap object may extend beyond the */
8666 /* region mapped by the entry or, may only fill a portion */
8667 /* of it. For our purposes, we only care if the object */
8668 /* doesn't fill. In this case the area which will */
8669 /* ultimately be clipped in the top map will only need */
8670 /* to be as big as the portion of the underlying entry */
8671 /* which is mapped */
8672 start_delta = submap_entry->vme_start > entry->offset ?
8673 submap_entry->vme_start - entry->offset : 0;
8674
8675 end_delta =
8676 (entry->offset + start_delta + (old_end - old_start)) <=
8677 submap_entry->vme_end ?
8678 0 : (entry->offset +
8679 (old_end - old_start))
8680 - submap_entry->vme_end;
8681
8682 old_start += start_delta;
8683 old_end -= end_delta;
8684
8685 if(submap_entry->is_sub_map) {
8686 entry = submap_entry;
8687 vaddr = local_vaddr;
8688 goto submap_recurse;
8689 }
8690
8691 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
8692
8693 vm_object_t sub_object, copy_object;
8694 vm_object_offset_t copy_offset;
8695 vm_map_offset_t local_start;
8696 vm_map_offset_t local_end;
8697 boolean_t copied_slowly = FALSE;
8698
8699 if (vm_map_lock_read_to_write(map)) {
8700 vm_map_lock_read(map);
8701 old_start -= start_delta;
8702 old_end += end_delta;
8703 goto RetrySubMap;
8704 }
8705
8706
8707 sub_object = submap_entry->object.vm_object;
8708 if (sub_object == VM_OBJECT_NULL) {
8709 sub_object =
8710 vm_object_allocate(
8711 (vm_map_size_t)
8712 (submap_entry->vme_end -
8713 submap_entry->vme_start));
8714 submap_entry->object.vm_object = sub_object;
8715 submap_entry->offset = 0;
8716 }
8717 local_start = local_vaddr -
8718 (cow_parent_vaddr - old_start);
8719 local_end = local_vaddr +
8720 (old_end - cow_parent_vaddr);
8721 vm_map_clip_start(map, submap_entry, local_start);
8722 vm_map_clip_end(map, submap_entry, local_end);
8723 /* unnesting was done in vm_map_clip_start/end() */
8724 assert(!submap_entry->use_pmap);
8725
8726 /* This is the COW case, lets connect */
8727 /* an entry in our space to the underlying */
8728 /* object in the submap, bypassing the */
8729 /* submap. */
8730
8731
8732 if(submap_entry->wired_count != 0 ||
8733 (sub_object->copy_strategy ==
8734 MEMORY_OBJECT_COPY_NONE)) {
8735 vm_object_lock(sub_object);
8736 vm_object_copy_slowly(sub_object,
8737 submap_entry->offset,
8738 (submap_entry->vme_end -
8739 submap_entry->vme_start),
8740 FALSE,
8741 &copy_object);
8742 copied_slowly = TRUE;
8743 } else {
8744
8745 /* set up shadow object */
8746 copy_object = sub_object;
8747 vm_object_reference(copy_object);
8748 sub_object->shadowed = TRUE;
8749 submap_entry->needs_copy = TRUE;
8750
8751 prot = submap_entry->protection & ~VM_PROT_WRITE;
8752
8753 if (override_nx(map, submap_entry->alias) && prot)
8754 prot |= VM_PROT_EXECUTE;
8755
8756 vm_object_pmap_protect(
8757 sub_object,
8758 submap_entry->offset,
8759 submap_entry->vme_end -
8760 submap_entry->vme_start,
8761 (submap_entry->is_shared
8762 || map->mapped) ?
8763 PMAP_NULL : map->pmap,
8764 submap_entry->vme_start,
8765 prot);
8766 }
8767
8768 /*
8769 * Adjust the fault offset to the submap entry.
8770 */
8771 copy_offset = (local_vaddr -
8772 submap_entry->vme_start +
8773 submap_entry->offset);
8774
8775 /* This works diffently than the */
8776 /* normal submap case. We go back */
8777 /* to the parent of the cow map and*/
8778 /* clip out the target portion of */
8779 /* the sub_map, substituting the */
8780 /* new copy object, */
8781
8782 vm_map_unlock(map);
8783 local_start = old_start;
8784 local_end = old_end;
8785 map = cow_sub_map_parent;
8786 *var_map = cow_sub_map_parent;
8787 vaddr = cow_parent_vaddr;
8788 cow_sub_map_parent = NULL;
8789
8790 if(!vm_map_lookup_entry(map,
8791 vaddr, &entry)) {
8792 vm_object_deallocate(
8793 copy_object);
8794 vm_map_lock_write_to_read(map);
8795 return KERN_INVALID_ADDRESS;
8796 }
8797
8798 /* clip out the portion of space */
8799 /* mapped by the sub map which */
8800 /* corresponds to the underlying */
8801 /* object */
8802
8803 /*
8804 * Clip (and unnest) the smallest nested chunk
8805 * possible around the faulting address...
8806 */
8807 local_start = vaddr & ~(pmap_nesting_size_min - 1);
8808 local_end = local_start + pmap_nesting_size_min;
8809 /*
8810 * ... but don't go beyond the "old_start" to "old_end"
8811 * range, to avoid spanning over another VM region
8812 * with a possibly different VM object and/or offset.
8813 */
8814 if (local_start < old_start) {
8815 local_start = old_start;
8816 }
8817 if (local_end > old_end) {
8818 local_end = old_end;
8819 }
8820 /*
8821 * Adjust copy_offset to the start of the range.
8822 */
8823 copy_offset -= (vaddr - local_start);
8824
8825 vm_map_clip_start(map, entry, local_start);
8826 vm_map_clip_end(map, entry, local_end);
8827 /* unnesting was done in vm_map_clip_start/end() */
8828 assert(!entry->use_pmap);
8829
8830 /* substitute copy object for */
8831 /* shared map entry */
8832 vm_map_deallocate(entry->object.sub_map);
8833 entry->is_sub_map = FALSE;
8834 entry->object.vm_object = copy_object;
8835
8836 /* propagate the submap entry's protections */
8837 entry->protection |= submap_entry->protection;
8838 entry->max_protection |= submap_entry->max_protection;
8839
8840 if(copied_slowly) {
8841 entry->offset = local_start - old_start;
8842 entry->needs_copy = FALSE;
8843 entry->is_shared = FALSE;
8844 } else {
8845 entry->offset = copy_offset;
8846 entry->needs_copy = TRUE;
8847 if(entry->inheritance == VM_INHERIT_SHARE)
8848 entry->inheritance = VM_INHERIT_COPY;
8849 if (map != old_map)
8850 entry->is_shared = TRUE;
8851 }
8852 if(entry->inheritance == VM_INHERIT_SHARE)
8853 entry->inheritance = VM_INHERIT_COPY;
8854
8855 vm_map_lock_write_to_read(map);
8856 } else {
8857 if((cow_sub_map_parent)
8858 && (cow_sub_map_parent != *real_map)
8859 && (cow_sub_map_parent != map)) {
8860 vm_map_unlock(cow_sub_map_parent);
8861 }
8862 entry = submap_entry;
8863 vaddr = local_vaddr;
8864 }
8865 }
8866
8867 /*
8868 * Check whether this task is allowed to have
8869 * this page.
8870 */
8871
8872 prot = entry->protection;
8873
8874 if (override_nx(map, entry->alias) && prot) {
8875 /*
8876 * HACK -- if not a stack, then allow execution
8877 */
8878 prot |= VM_PROT_EXECUTE;
8879 }
8880
8881 if (mask_protections) {
8882 fault_type &= prot;
8883 if (fault_type == VM_PROT_NONE) {
8884 goto protection_failure;
8885 }
8886 }
8887 if ((fault_type & (prot)) != fault_type) {
8888 protection_failure:
8889 if (*real_map != map) {
8890 vm_map_unlock(*real_map);
8891 }
8892 *real_map = map;
8893
8894 if ((fault_type & VM_PROT_EXECUTE) && prot)
8895 log_stack_execution_failure((addr64_t)vaddr, prot);
8896
8897 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
8898 return KERN_PROTECTION_FAILURE;
8899 }
8900
8901 /*
8902 * If this page is not pageable, we have to get
8903 * it for all possible accesses.
8904 */
8905
8906 *wired = (entry->wired_count != 0);
8907 if (*wired)
8908 fault_type = prot;
8909
8910 /*
8911 * If the entry was copy-on-write, we either ...
8912 */
8913
8914 if (entry->needs_copy) {
8915 /*
8916 * If we want to write the page, we may as well
8917 * handle that now since we've got the map locked.
8918 *
8919 * If we don't need to write the page, we just
8920 * demote the permissions allowed.
8921 */
8922
8923 if ((fault_type & VM_PROT_WRITE) || *wired) {
8924 /*
8925 * Make a new object, and place it in the
8926 * object chain. Note that no new references
8927 * have appeared -- one just moved from the
8928 * map to the new object.
8929 */
8930
8931 if (vm_map_lock_read_to_write(map)) {
8932 vm_map_lock_read(map);
8933 goto RetryLookup;
8934 }
8935 vm_object_shadow(&entry->object.vm_object,
8936 &entry->offset,
8937 (vm_map_size_t) (entry->vme_end -
8938 entry->vme_start));
8939
8940 entry->object.vm_object->shadowed = TRUE;
8941 entry->needs_copy = FALSE;
8942 vm_map_lock_write_to_read(map);
8943 }
8944 else {
8945 /*
8946 * We're attempting to read a copy-on-write
8947 * page -- don't allow writes.
8948 */
8949
8950 prot &= (~VM_PROT_WRITE);
8951 }
8952 }
8953
8954 /*
8955 * Create an object if necessary.
8956 */
8957 if (entry->object.vm_object == VM_OBJECT_NULL) {
8958
8959 if (vm_map_lock_read_to_write(map)) {
8960 vm_map_lock_read(map);
8961 goto RetryLookup;
8962 }
8963
8964 entry->object.vm_object = vm_object_allocate(
8965 (vm_map_size_t)(entry->vme_end - entry->vme_start));
8966 entry->offset = 0;
8967 vm_map_lock_write_to_read(map);
8968 }
8969
8970 /*
8971 * Return the object/offset from this entry. If the entry
8972 * was copy-on-write or empty, it has been fixed up. Also
8973 * return the protection.
8974 */
8975
8976 *offset = (vaddr - entry->vme_start) + entry->offset;
8977 *object = entry->object.vm_object;
8978 *out_prot = prot;
8979
8980 if (fault_info) {
8981 fault_info->interruptible = THREAD_UNINT; /* for now... */
8982 /* ... the caller will change "interruptible" if needed */
8983 fault_info->cluster_size = 0;
8984 fault_info->user_tag = entry->alias;
8985 fault_info->behavior = entry->behavior;
8986 fault_info->lo_offset = entry->offset;
8987 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
8988 fault_info->no_cache = entry->no_cache;
8989 fault_info->stealth = FALSE;
8990 fault_info->io_sync = FALSE;
8991 fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
8992 fault_info->mark_zf_absent = FALSE;
8993 }
8994
8995 /*
8996 * Lock the object to prevent it from disappearing
8997 */
8998 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
8999 vm_object_lock(*object);
9000 else
9001 vm_object_lock_shared(*object);
9002
9003 /*
9004 * Save the version number
9005 */
9006
9007 out_version->main_timestamp = map->timestamp;
9008
9009 return KERN_SUCCESS;
9010 }
9011
9012
9013 /*
9014 * vm_map_verify:
9015 *
9016 * Verifies that the map in question has not changed
9017 * since the given version. If successful, the map
9018 * will not change until vm_map_verify_done() is called.
9019 */
9020 boolean_t
9021 vm_map_verify(
9022 register vm_map_t map,
9023 register vm_map_version_t *version) /* REF */
9024 {
9025 boolean_t result;
9026
9027 vm_map_lock_read(map);
9028 result = (map->timestamp == version->main_timestamp);
9029
9030 if (!result)
9031 vm_map_unlock_read(map);
9032
9033 return(result);
9034 }
9035
9036 /*
9037 * vm_map_verify_done:
9038 *
9039 * Releases locks acquired by a vm_map_verify.
9040 *
9041 * This is now a macro in vm/vm_map.h. It does a
9042 * vm_map_unlock_read on the map.
9043 */
9044
9045
9046 /*
9047 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
9048 * Goes away after regular vm_region_recurse function migrates to
9049 * 64 bits
9050 * vm_region_recurse: A form of vm_region which follows the
9051 * submaps in a target map
9052 *
9053 */
9054
9055 kern_return_t
9056 vm_map_region_recurse_64(
9057 vm_map_t map,
9058 vm_map_offset_t *address, /* IN/OUT */
9059 vm_map_size_t *size, /* OUT */
9060 natural_t *nesting_depth, /* IN/OUT */
9061 vm_region_submap_info_64_t submap_info, /* IN/OUT */
9062 mach_msg_type_number_t *count) /* IN/OUT */
9063 {
9064 vm_region_extended_info_data_t extended;
9065 vm_map_entry_t tmp_entry;
9066 vm_map_offset_t user_address;
9067 unsigned int user_max_depth;
9068
9069 /*
9070 * "curr_entry" is the VM map entry preceding or including the
9071 * address we're looking for.
9072 * "curr_map" is the map or sub-map containing "curr_entry".
9073 * "curr_address" is the equivalent of the top map's "user_address"
9074 * in the current map.
9075 * "curr_offset" is the cumulated offset of "curr_map" in the
9076 * target task's address space.
9077 * "curr_depth" is the depth of "curr_map" in the chain of
9078 * sub-maps.
9079 *
9080 * "curr_max_below" and "curr_max_above" limit the range (around
9081 * "curr_address") we should take into account in the current (sub)map.
9082 * They limit the range to what's visible through the map entries
9083 * we've traversed from the top map to the current map.
9084
9085 */
9086 vm_map_entry_t curr_entry;
9087 vm_map_address_t curr_address;
9088 vm_map_offset_t curr_offset;
9089 vm_map_t curr_map;
9090 unsigned int curr_depth;
9091 vm_map_offset_t curr_max_below, curr_max_above;
9092 vm_map_offset_t curr_skip;
9093
9094 /*
9095 * "next_" is the same as "curr_" but for the VM region immediately
9096 * after the address we're looking for. We need to keep track of this
9097 * too because we want to return info about that region if the
9098 * address we're looking for is not mapped.
9099 */
9100 vm_map_entry_t next_entry;
9101 vm_map_offset_t next_offset;
9102 vm_map_offset_t next_address;
9103 vm_map_t next_map;
9104 unsigned int next_depth;
9105 vm_map_offset_t next_max_below, next_max_above;
9106 vm_map_offset_t next_skip;
9107
9108 boolean_t look_for_pages;
9109 vm_region_submap_short_info_64_t short_info;
9110
9111 if (map == VM_MAP_NULL) {
9112 /* no address space to work on */
9113 return KERN_INVALID_ARGUMENT;
9114 }
9115
9116 if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
9117 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
9118 /*
9119 * "info" structure is not big enough and
9120 * would overflow
9121 */
9122 return KERN_INVALID_ARGUMENT;
9123 } else {
9124 look_for_pages = FALSE;
9125 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
9126 short_info = (vm_region_submap_short_info_64_t) submap_info;
9127 submap_info = NULL;
9128 }
9129 } else {
9130 look_for_pages = TRUE;
9131 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
9132 short_info = NULL;
9133 }
9134
9135
9136 user_address = *address;
9137 user_max_depth = *nesting_depth;
9138
9139 curr_entry = NULL;
9140 curr_map = map;
9141 curr_address = user_address;
9142 curr_offset = 0;
9143 curr_skip = 0;
9144 curr_depth = 0;
9145 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
9146 curr_max_below = curr_address;
9147
9148 next_entry = NULL;
9149 next_map = NULL;
9150 next_address = 0;
9151 next_offset = 0;
9152 next_skip = 0;
9153 next_depth = 0;
9154 next_max_above = (vm_map_offset_t) -1;
9155 next_max_below = (vm_map_offset_t) -1;
9156
9157 if (not_in_kdp) {
9158 vm_map_lock_read(curr_map);
9159 }
9160
9161 for (;;) {
9162 if (vm_map_lookup_entry(curr_map,
9163 curr_address,
9164 &tmp_entry)) {
9165 /* tmp_entry contains the address we're looking for */
9166 curr_entry = tmp_entry;
9167 } else {
9168 vm_map_offset_t skip;
9169 /*
9170 * The address is not mapped. "tmp_entry" is the
9171 * map entry preceding the address. We want the next
9172 * one, if it exists.
9173 */
9174 curr_entry = tmp_entry->vme_next;
9175
9176 if (curr_entry == vm_map_to_entry(curr_map) ||
9177 (curr_entry->vme_start >=
9178 curr_address + curr_max_above)) {
9179 /* no next entry at this level: stop looking */
9180 if (not_in_kdp) {
9181 vm_map_unlock_read(curr_map);
9182 }
9183 curr_entry = NULL;
9184 curr_map = NULL;
9185 curr_offset = 0;
9186 curr_depth = 0;
9187 curr_max_above = 0;
9188 curr_max_below = 0;
9189 break;
9190 }
9191
9192 /* adjust current address and offset */
9193 skip = curr_entry->vme_start - curr_address;
9194 curr_address = curr_entry->vme_start;
9195 curr_skip = skip;
9196 curr_offset += skip;
9197 curr_max_above -= skip;
9198 curr_max_below = 0;
9199 }
9200
9201 /*
9202 * Is the next entry at this level closer to the address (or
9203 * deeper in the submap chain) than the one we had
9204 * so far ?
9205 */
9206 tmp_entry = curr_entry->vme_next;
9207 if (tmp_entry == vm_map_to_entry(curr_map)) {
9208 /* no next entry at this level */
9209 } else if (tmp_entry->vme_start >=
9210 curr_address + curr_max_above) {
9211 /*
9212 * tmp_entry is beyond the scope of what we mapped of
9213 * this submap in the upper level: ignore it.
9214 */
9215 } else if ((next_entry == NULL) ||
9216 (tmp_entry->vme_start + curr_offset <=
9217 next_entry->vme_start + next_offset)) {
9218 /*
9219 * We didn't have a "next_entry" or this one is
9220 * closer to the address we're looking for:
9221 * use this "tmp_entry" as the new "next_entry".
9222 */
9223 if (next_entry != NULL) {
9224 /* unlock the last "next_map" */
9225 if (next_map != curr_map && not_in_kdp) {
9226 vm_map_unlock_read(next_map);
9227 }
9228 }
9229 next_entry = tmp_entry;
9230 next_map = curr_map;
9231 next_depth = curr_depth;
9232 next_address = next_entry->vme_start;
9233 next_skip = curr_skip;
9234 next_offset = curr_offset;
9235 next_offset += (next_address - curr_address);
9236 next_max_above = MIN(next_max_above, curr_max_above);
9237 next_max_above = MIN(next_max_above,
9238 next_entry->vme_end - next_address);
9239 next_max_below = MIN(next_max_below, curr_max_below);
9240 next_max_below = MIN(next_max_below,
9241 next_address - next_entry->vme_start);
9242 }
9243
9244 /*
9245 * "curr_max_{above,below}" allow us to keep track of the
9246 * portion of the submap that is actually mapped at this level:
9247 * the rest of that submap is irrelevant to us, since it's not
9248 * mapped here.
9249 * The relevant portion of the map starts at
9250 * "curr_entry->offset" up to the size of "curr_entry".
9251 */
9252 curr_max_above = MIN(curr_max_above,
9253 curr_entry->vme_end - curr_address);
9254 curr_max_below = MIN(curr_max_below,
9255 curr_address - curr_entry->vme_start);
9256
9257 if (!curr_entry->is_sub_map ||
9258 curr_depth >= user_max_depth) {
9259 /*
9260 * We hit a leaf map or we reached the maximum depth
9261 * we could, so stop looking. Keep the current map
9262 * locked.
9263 */
9264 break;
9265 }
9266
9267 /*
9268 * Get down to the next submap level.
9269 */
9270
9271 /*
9272 * Lock the next level and unlock the current level,
9273 * unless we need to keep it locked to access the "next_entry"
9274 * later.
9275 */
9276 if (not_in_kdp) {
9277 vm_map_lock_read(curr_entry->object.sub_map);
9278 }
9279 if (curr_map == next_map) {
9280 /* keep "next_map" locked in case we need it */
9281 } else {
9282 /* release this map */
9283 if (not_in_kdp)
9284 vm_map_unlock_read(curr_map);
9285 }
9286
9287 /*
9288 * Adjust the offset. "curr_entry" maps the submap
9289 * at relative address "curr_entry->vme_start" in the
9290 * curr_map but skips the first "curr_entry->offset"
9291 * bytes of the submap.
9292 * "curr_offset" always represents the offset of a virtual
9293 * address in the curr_map relative to the absolute address
9294 * space (i.e. the top-level VM map).
9295 */
9296 curr_offset +=
9297 (curr_entry->offset - curr_entry->vme_start);
9298 curr_address = user_address + curr_offset;
9299 /* switch to the submap */
9300 curr_map = curr_entry->object.sub_map;
9301 curr_depth++;
9302 curr_entry = NULL;
9303 }
9304
9305 if (curr_entry == NULL) {
9306 /* no VM region contains the address... */
9307 if (next_entry == NULL) {
9308 /* ... and no VM region follows it either */
9309 return KERN_INVALID_ADDRESS;
9310 }
9311 /* ... gather info about the next VM region */
9312 curr_entry = next_entry;
9313 curr_map = next_map; /* still locked ... */
9314 curr_address = next_address;
9315 curr_skip = next_skip;
9316 curr_offset = next_offset;
9317 curr_depth = next_depth;
9318 curr_max_above = next_max_above;
9319 curr_max_below = next_max_below;
9320 if (curr_map == map) {
9321 user_address = curr_address;
9322 }
9323 } else {
9324 /* we won't need "next_entry" after all */
9325 if (next_entry != NULL) {
9326 /* release "next_map" */
9327 if (next_map != curr_map && not_in_kdp) {
9328 vm_map_unlock_read(next_map);
9329 }
9330 }
9331 }
9332 next_entry = NULL;
9333 next_map = NULL;
9334 next_offset = 0;
9335 next_skip = 0;
9336 next_depth = 0;
9337 next_max_below = -1;
9338 next_max_above = -1;
9339
9340 *nesting_depth = curr_depth;
9341 *size = curr_max_above + curr_max_below;
9342 *address = user_address + curr_skip - curr_max_below;
9343
9344 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
9345 // so probably should be a real 32b ID vs. ptr.
9346 // Current users just check for equality
9347 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)p)
9348
9349 if (look_for_pages) {
9350 submap_info->user_tag = curr_entry->alias;
9351 submap_info->offset = curr_entry->offset;
9352 submap_info->protection = curr_entry->protection;
9353 submap_info->inheritance = curr_entry->inheritance;
9354 submap_info->max_protection = curr_entry->max_protection;
9355 submap_info->behavior = curr_entry->behavior;
9356 submap_info->user_wired_count = curr_entry->user_wired_count;
9357 submap_info->is_submap = curr_entry->is_sub_map;
9358 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9359 } else {
9360 short_info->user_tag = curr_entry->alias;
9361 short_info->offset = curr_entry->offset;
9362 short_info->protection = curr_entry->protection;
9363 short_info->inheritance = curr_entry->inheritance;
9364 short_info->max_protection = curr_entry->max_protection;
9365 short_info->behavior = curr_entry->behavior;
9366 short_info->user_wired_count = curr_entry->user_wired_count;
9367 short_info->is_submap = curr_entry->is_sub_map;
9368 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9369 }
9370
9371 extended.pages_resident = 0;
9372 extended.pages_swapped_out = 0;
9373 extended.pages_shared_now_private = 0;
9374 extended.pages_dirtied = 0;
9375 extended.external_pager = 0;
9376 extended.shadow_depth = 0;
9377
9378 if (not_in_kdp) {
9379 if (!curr_entry->is_sub_map) {
9380 vm_map_offset_t range_start, range_end;
9381 range_start = MAX((curr_address - curr_max_below),
9382 curr_entry->vme_start);
9383 range_end = MIN((curr_address + curr_max_above),
9384 curr_entry->vme_end);
9385 vm_map_region_walk(curr_map,
9386 range_start,
9387 curr_entry,
9388 (curr_entry->offset +
9389 (range_start -
9390 curr_entry->vme_start)),
9391 range_end - range_start,
9392 &extended,
9393 look_for_pages);
9394 if (extended.external_pager &&
9395 extended.ref_count == 2 &&
9396 extended.share_mode == SM_SHARED) {
9397 extended.share_mode = SM_PRIVATE;
9398 }
9399 } else {
9400 if (curr_entry->use_pmap) {
9401 extended.share_mode = SM_TRUESHARED;
9402 } else {
9403 extended.share_mode = SM_PRIVATE;
9404 }
9405 extended.ref_count =
9406 curr_entry->object.sub_map->ref_count;
9407 }
9408 }
9409
9410 if (look_for_pages) {
9411 submap_info->pages_resident = extended.pages_resident;
9412 submap_info->pages_swapped_out = extended.pages_swapped_out;
9413 submap_info->pages_shared_now_private =
9414 extended.pages_shared_now_private;
9415 submap_info->pages_dirtied = extended.pages_dirtied;
9416 submap_info->external_pager = extended.external_pager;
9417 submap_info->shadow_depth = extended.shadow_depth;
9418 submap_info->share_mode = extended.share_mode;
9419 submap_info->ref_count = extended.ref_count;
9420 } else {
9421 short_info->external_pager = extended.external_pager;
9422 short_info->shadow_depth = extended.shadow_depth;
9423 short_info->share_mode = extended.share_mode;
9424 short_info->ref_count = extended.ref_count;
9425 }
9426
9427 if (not_in_kdp) {
9428 vm_map_unlock_read(curr_map);
9429 }
9430
9431 return KERN_SUCCESS;
9432 }
9433
9434 /*
9435 * vm_region:
9436 *
9437 * User call to obtain information about a region in
9438 * a task's address map. Currently, only one flavor is
9439 * supported.
9440 *
9441 * XXX The reserved and behavior fields cannot be filled
9442 * in until the vm merge from the IK is completed, and
9443 * vm_reserve is implemented.
9444 */
9445
9446 kern_return_t
9447 vm_map_region(
9448 vm_map_t map,
9449 vm_map_offset_t *address, /* IN/OUT */
9450 vm_map_size_t *size, /* OUT */
9451 vm_region_flavor_t flavor, /* IN */
9452 vm_region_info_t info, /* OUT */
9453 mach_msg_type_number_t *count, /* IN/OUT */
9454 mach_port_t *object_name) /* OUT */
9455 {
9456 vm_map_entry_t tmp_entry;
9457 vm_map_entry_t entry;
9458 vm_map_offset_t start;
9459
9460 if (map == VM_MAP_NULL)
9461 return(KERN_INVALID_ARGUMENT);
9462
9463 switch (flavor) {
9464
9465 case VM_REGION_BASIC_INFO:
9466 /* legacy for old 32-bit objects info */
9467 {
9468 vm_region_basic_info_t basic;
9469
9470 if (*count < VM_REGION_BASIC_INFO_COUNT)
9471 return(KERN_INVALID_ARGUMENT);
9472
9473 basic = (vm_region_basic_info_t) info;
9474 *count = VM_REGION_BASIC_INFO_COUNT;
9475
9476 vm_map_lock_read(map);
9477
9478 start = *address;
9479 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9480 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9481 vm_map_unlock_read(map);
9482 return(KERN_INVALID_ADDRESS);
9483 }
9484 } else {
9485 entry = tmp_entry;
9486 }
9487
9488 start = entry->vme_start;
9489
9490 basic->offset = (uint32_t)entry->offset;
9491 basic->protection = entry->protection;
9492 basic->inheritance = entry->inheritance;
9493 basic->max_protection = entry->max_protection;
9494 basic->behavior = entry->behavior;
9495 basic->user_wired_count = entry->user_wired_count;
9496 basic->reserved = entry->is_sub_map;
9497 *address = start;
9498 *size = (entry->vme_end - start);
9499
9500 if (object_name) *object_name = IP_NULL;
9501 if (entry->is_sub_map) {
9502 basic->shared = FALSE;
9503 } else {
9504 basic->shared = entry->is_shared;
9505 }
9506
9507 vm_map_unlock_read(map);
9508 return(KERN_SUCCESS);
9509 }
9510
9511 case VM_REGION_BASIC_INFO_64:
9512 {
9513 vm_region_basic_info_64_t basic;
9514
9515 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
9516 return(KERN_INVALID_ARGUMENT);
9517
9518 basic = (vm_region_basic_info_64_t) info;
9519 *count = VM_REGION_BASIC_INFO_COUNT_64;
9520
9521 vm_map_lock_read(map);
9522
9523 start = *address;
9524 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9525 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9526 vm_map_unlock_read(map);
9527 return(KERN_INVALID_ADDRESS);
9528 }
9529 } else {
9530 entry = tmp_entry;
9531 }
9532
9533 start = entry->vme_start;
9534
9535 basic->offset = entry->offset;
9536 basic->protection = entry->protection;
9537 basic->inheritance = entry->inheritance;
9538 basic->max_protection = entry->max_protection;
9539 basic->behavior = entry->behavior;
9540 basic->user_wired_count = entry->user_wired_count;
9541 basic->reserved = entry->is_sub_map;
9542 *address = start;
9543 *size = (entry->vme_end - start);
9544
9545 if (object_name) *object_name = IP_NULL;
9546 if (entry->is_sub_map) {
9547 basic->shared = FALSE;
9548 } else {
9549 basic->shared = entry->is_shared;
9550 }
9551
9552 vm_map_unlock_read(map);
9553 return(KERN_SUCCESS);
9554 }
9555 case VM_REGION_EXTENDED_INFO:
9556 {
9557 vm_region_extended_info_t extended;
9558
9559 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
9560 return(KERN_INVALID_ARGUMENT);
9561
9562 extended = (vm_region_extended_info_t) info;
9563 *count = VM_REGION_EXTENDED_INFO_COUNT;
9564
9565 vm_map_lock_read(map);
9566
9567 start = *address;
9568 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9569 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9570 vm_map_unlock_read(map);
9571 return(KERN_INVALID_ADDRESS);
9572 }
9573 } else {
9574 entry = tmp_entry;
9575 }
9576 start = entry->vme_start;
9577
9578 extended->protection = entry->protection;
9579 extended->user_tag = entry->alias;
9580 extended->pages_resident = 0;
9581 extended->pages_swapped_out = 0;
9582 extended->pages_shared_now_private = 0;
9583 extended->pages_dirtied = 0;
9584 extended->external_pager = 0;
9585 extended->shadow_depth = 0;
9586
9587 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
9588
9589 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
9590 extended->share_mode = SM_PRIVATE;
9591
9592 if (object_name)
9593 *object_name = IP_NULL;
9594 *address = start;
9595 *size = (entry->vme_end - start);
9596
9597 vm_map_unlock_read(map);
9598 return(KERN_SUCCESS);
9599 }
9600 case VM_REGION_TOP_INFO:
9601 {
9602 vm_region_top_info_t top;
9603
9604 if (*count < VM_REGION_TOP_INFO_COUNT)
9605 return(KERN_INVALID_ARGUMENT);
9606
9607 top = (vm_region_top_info_t) info;
9608 *count = VM_REGION_TOP_INFO_COUNT;
9609
9610 vm_map_lock_read(map);
9611
9612 start = *address;
9613 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9614 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9615 vm_map_unlock_read(map);
9616 return(KERN_INVALID_ADDRESS);
9617 }
9618 } else {
9619 entry = tmp_entry;
9620
9621 }
9622 start = entry->vme_start;
9623
9624 top->private_pages_resident = 0;
9625 top->shared_pages_resident = 0;
9626
9627 vm_map_region_top_walk(entry, top);
9628
9629 if (object_name)
9630 *object_name = IP_NULL;
9631 *address = start;
9632 *size = (entry->vme_end - start);
9633
9634 vm_map_unlock_read(map);
9635 return(KERN_SUCCESS);
9636 }
9637 default:
9638 return(KERN_INVALID_ARGUMENT);
9639 }
9640 }
9641
9642 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
9643 MIN((entry_size), \
9644 ((obj)->all_reusable ? \
9645 (obj)->wired_page_count : \
9646 (obj)->resident_page_count - (obj)->reusable_page_count))
9647
9648 void
9649 vm_map_region_top_walk(
9650 vm_map_entry_t entry,
9651 vm_region_top_info_t top)
9652 {
9653
9654 if (entry->object.vm_object == 0 || entry->is_sub_map) {
9655 top->share_mode = SM_EMPTY;
9656 top->ref_count = 0;
9657 top->obj_id = 0;
9658 return;
9659 }
9660
9661 {
9662 struct vm_object *obj, *tmp_obj;
9663 int ref_count;
9664 uint32_t entry_size;
9665
9666 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
9667
9668 obj = entry->object.vm_object;
9669
9670 vm_object_lock(obj);
9671
9672 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9673 ref_count--;
9674
9675 assert(obj->reusable_page_count <= obj->resident_page_count);
9676 if (obj->shadow) {
9677 if (ref_count == 1)
9678 top->private_pages_resident =
9679 OBJ_RESIDENT_COUNT(obj, entry_size);
9680 else
9681 top->shared_pages_resident =
9682 OBJ_RESIDENT_COUNT(obj, entry_size);
9683 top->ref_count = ref_count;
9684 top->share_mode = SM_COW;
9685
9686 while ((tmp_obj = obj->shadow)) {
9687 vm_object_lock(tmp_obj);
9688 vm_object_unlock(obj);
9689 obj = tmp_obj;
9690
9691 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9692 ref_count--;
9693
9694 assert(obj->reusable_page_count <= obj->resident_page_count);
9695 top->shared_pages_resident +=
9696 OBJ_RESIDENT_COUNT(obj, entry_size);
9697 top->ref_count += ref_count - 1;
9698 }
9699 } else {
9700 if (entry->superpage_size) {
9701 top->share_mode = SM_LARGE_PAGE;
9702 top->shared_pages_resident = 0;
9703 top->private_pages_resident = entry_size;
9704 } else if (entry->needs_copy) {
9705 top->share_mode = SM_COW;
9706 top->shared_pages_resident =
9707 OBJ_RESIDENT_COUNT(obj, entry_size);
9708 } else {
9709 if (ref_count == 1 ||
9710 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
9711 top->share_mode = SM_PRIVATE;
9712 top->private_pages_resident =
9713 OBJ_RESIDENT_COUNT(obj,
9714 entry_size);
9715 } else {
9716 top->share_mode = SM_SHARED;
9717 top->shared_pages_resident =
9718 OBJ_RESIDENT_COUNT(obj,
9719 entry_size);
9720 }
9721 }
9722 top->ref_count = ref_count;
9723 }
9724 /* XXX K64: obj_id will be truncated */
9725 top->obj_id = (unsigned int) (uintptr_t)obj;
9726
9727 vm_object_unlock(obj);
9728 }
9729 }
9730
9731 void
9732 vm_map_region_walk(
9733 vm_map_t map,
9734 vm_map_offset_t va,
9735 vm_map_entry_t entry,
9736 vm_object_offset_t offset,
9737 vm_object_size_t range,
9738 vm_region_extended_info_t extended,
9739 boolean_t look_for_pages)
9740 {
9741 register struct vm_object *obj, *tmp_obj;
9742 register vm_map_offset_t last_offset;
9743 register int i;
9744 register int ref_count;
9745 struct vm_object *shadow_object;
9746 int shadow_depth;
9747
9748 if ((entry->object.vm_object == 0) ||
9749 (entry->is_sub_map) ||
9750 (entry->object.vm_object->phys_contiguous &&
9751 !entry->superpage_size)) {
9752 extended->share_mode = SM_EMPTY;
9753 extended->ref_count = 0;
9754 return;
9755 }
9756
9757 if (entry->superpage_size) {
9758 extended->shadow_depth = 0;
9759 extended->share_mode = SM_LARGE_PAGE;
9760 extended->ref_count = 1;
9761 extended->external_pager = 0;
9762 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
9763 extended->shadow_depth = 0;
9764 return;
9765 }
9766
9767 {
9768 obj = entry->object.vm_object;
9769
9770 vm_object_lock(obj);
9771
9772 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9773 ref_count--;
9774
9775 if (look_for_pages) {
9776 for (last_offset = offset + range;
9777 offset < last_offset;
9778 offset += PAGE_SIZE_64, va += PAGE_SIZE)
9779 vm_map_region_look_for_page(map, va, obj,
9780 offset, ref_count,
9781 0, extended);
9782 } else {
9783 shadow_object = obj->shadow;
9784 shadow_depth = 0;
9785
9786 if ( !(obj->pager_trusted) && !(obj->internal))
9787 extended->external_pager = 1;
9788
9789 if (shadow_object != VM_OBJECT_NULL) {
9790 vm_object_lock(shadow_object);
9791 for (;
9792 shadow_object != VM_OBJECT_NULL;
9793 shadow_depth++) {
9794 vm_object_t next_shadow;
9795
9796 if ( !(shadow_object->pager_trusted) &&
9797 !(shadow_object->internal))
9798 extended->external_pager = 1;
9799
9800 next_shadow = shadow_object->shadow;
9801 if (next_shadow) {
9802 vm_object_lock(next_shadow);
9803 }
9804 vm_object_unlock(shadow_object);
9805 shadow_object = next_shadow;
9806 }
9807 }
9808 extended->shadow_depth = shadow_depth;
9809 }
9810
9811 if (extended->shadow_depth || entry->needs_copy)
9812 extended->share_mode = SM_COW;
9813 else {
9814 if (ref_count == 1)
9815 extended->share_mode = SM_PRIVATE;
9816 else {
9817 if (obj->true_share)
9818 extended->share_mode = SM_TRUESHARED;
9819 else
9820 extended->share_mode = SM_SHARED;
9821 }
9822 }
9823 extended->ref_count = ref_count - extended->shadow_depth;
9824
9825 for (i = 0; i < extended->shadow_depth; i++) {
9826 if ((tmp_obj = obj->shadow) == 0)
9827 break;
9828 vm_object_lock(tmp_obj);
9829 vm_object_unlock(obj);
9830
9831 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
9832 ref_count--;
9833
9834 extended->ref_count += ref_count;
9835 obj = tmp_obj;
9836 }
9837 vm_object_unlock(obj);
9838
9839 if (extended->share_mode == SM_SHARED) {
9840 register vm_map_entry_t cur;
9841 register vm_map_entry_t last;
9842 int my_refs;
9843
9844 obj = entry->object.vm_object;
9845 last = vm_map_to_entry(map);
9846 my_refs = 0;
9847
9848 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9849 ref_count--;
9850 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
9851 my_refs += vm_map_region_count_obj_refs(cur, obj);
9852
9853 if (my_refs == ref_count)
9854 extended->share_mode = SM_PRIVATE_ALIASED;
9855 else if (my_refs > 1)
9856 extended->share_mode = SM_SHARED_ALIASED;
9857 }
9858 }
9859 }
9860
9861
9862 /* object is locked on entry and locked on return */
9863
9864
9865 static void
9866 vm_map_region_look_for_page(
9867 __unused vm_map_t map,
9868 __unused vm_map_offset_t va,
9869 vm_object_t object,
9870 vm_object_offset_t offset,
9871 int max_refcnt,
9872 int depth,
9873 vm_region_extended_info_t extended)
9874 {
9875 register vm_page_t p;
9876 register vm_object_t shadow;
9877 register int ref_count;
9878 vm_object_t caller_object;
9879 #if MACH_PAGEMAP
9880 kern_return_t kr;
9881 #endif
9882 shadow = object->shadow;
9883 caller_object = object;
9884
9885
9886 while (TRUE) {
9887
9888 if ( !(object->pager_trusted) && !(object->internal))
9889 extended->external_pager = 1;
9890
9891 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
9892 if (shadow && (max_refcnt == 1))
9893 extended->pages_shared_now_private++;
9894
9895 if (!p->fictitious &&
9896 (p->dirty || pmap_is_modified(p->phys_page)))
9897 extended->pages_dirtied++;
9898
9899 extended->pages_resident++;
9900
9901 if(object != caller_object)
9902 vm_object_unlock(object);
9903
9904 return;
9905 }
9906 #if MACH_PAGEMAP
9907 if (object->existence_map) {
9908 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
9909
9910 extended->pages_swapped_out++;
9911
9912 if(object != caller_object)
9913 vm_object_unlock(object);
9914
9915 return;
9916 }
9917 } else if (object->internal &&
9918 object->alive &&
9919 !object->terminating &&
9920 object->pager_ready) {
9921
9922 memory_object_t pager;
9923
9924 vm_object_paging_begin(object);
9925 pager = object->pager;
9926 vm_object_unlock(object);
9927
9928 kr = memory_object_data_request(
9929 pager,
9930 offset + object->paging_offset,
9931 0, /* just poke the pager */
9932 VM_PROT_READ,
9933 NULL);
9934
9935 vm_object_lock(object);
9936 vm_object_paging_end(object);
9937
9938 if (kr == KERN_SUCCESS) {
9939 /* the pager has that page */
9940 extended->pages_swapped_out++;
9941 if (object != caller_object)
9942 vm_object_unlock(object);
9943 return;
9944 }
9945 }
9946 #endif /* MACH_PAGEMAP */
9947
9948 if (shadow) {
9949 vm_object_lock(shadow);
9950
9951 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
9952 ref_count--;
9953
9954 if (++depth > extended->shadow_depth)
9955 extended->shadow_depth = depth;
9956
9957 if (ref_count > max_refcnt)
9958 max_refcnt = ref_count;
9959
9960 if(object != caller_object)
9961 vm_object_unlock(object);
9962
9963 offset = offset + object->vo_shadow_offset;
9964 object = shadow;
9965 shadow = object->shadow;
9966 continue;
9967 }
9968 if(object != caller_object)
9969 vm_object_unlock(object);
9970 break;
9971 }
9972 }
9973
9974 static int
9975 vm_map_region_count_obj_refs(
9976 vm_map_entry_t entry,
9977 vm_object_t object)
9978 {
9979 register int ref_count;
9980 register vm_object_t chk_obj;
9981 register vm_object_t tmp_obj;
9982
9983 if (entry->object.vm_object == 0)
9984 return(0);
9985
9986 if (entry->is_sub_map)
9987 return(0);
9988 else {
9989 ref_count = 0;
9990
9991 chk_obj = entry->object.vm_object;
9992 vm_object_lock(chk_obj);
9993
9994 while (chk_obj) {
9995 if (chk_obj == object)
9996 ref_count++;
9997 tmp_obj = chk_obj->shadow;
9998 if (tmp_obj)
9999 vm_object_lock(tmp_obj);
10000 vm_object_unlock(chk_obj);
10001
10002 chk_obj = tmp_obj;
10003 }
10004 }
10005 return(ref_count);
10006 }
10007
10008
10009 /*
10010 * Routine: vm_map_simplify
10011 *
10012 * Description:
10013 * Attempt to simplify the map representation in
10014 * the vicinity of the given starting address.
10015 * Note:
10016 * This routine is intended primarily to keep the
10017 * kernel maps more compact -- they generally don't
10018 * benefit from the "expand a map entry" technology
10019 * at allocation time because the adjacent entry
10020 * is often wired down.
10021 */
10022 void
10023 vm_map_simplify_entry(
10024 vm_map_t map,
10025 vm_map_entry_t this_entry)
10026 {
10027 vm_map_entry_t prev_entry;
10028
10029 counter(c_vm_map_simplify_entry_called++);
10030
10031 prev_entry = this_entry->vme_prev;
10032
10033 if ((this_entry != vm_map_to_entry(map)) &&
10034 (prev_entry != vm_map_to_entry(map)) &&
10035
10036 (prev_entry->vme_end == this_entry->vme_start) &&
10037
10038 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
10039
10040 (prev_entry->object.vm_object == this_entry->object.vm_object) &&
10041 ((prev_entry->offset + (prev_entry->vme_end -
10042 prev_entry->vme_start))
10043 == this_entry->offset) &&
10044
10045 (prev_entry->inheritance == this_entry->inheritance) &&
10046 (prev_entry->protection == this_entry->protection) &&
10047 (prev_entry->max_protection == this_entry->max_protection) &&
10048 (prev_entry->behavior == this_entry->behavior) &&
10049 (prev_entry->alias == this_entry->alias) &&
10050 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
10051 (prev_entry->no_cache == this_entry->no_cache) &&
10052 (prev_entry->wired_count == this_entry->wired_count) &&
10053 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
10054
10055 (prev_entry->needs_copy == this_entry->needs_copy) &&
10056 (prev_entry->permanent == this_entry->permanent) &&
10057
10058 (prev_entry->use_pmap == FALSE) &&
10059 (this_entry->use_pmap == FALSE) &&
10060 (prev_entry->in_transition == FALSE) &&
10061 (this_entry->in_transition == FALSE) &&
10062 (prev_entry->needs_wakeup == FALSE) &&
10063 (this_entry->needs_wakeup == FALSE) &&
10064 (prev_entry->is_shared == FALSE) &&
10065 (this_entry->is_shared == FALSE)
10066 ) {
10067 _vm_map_store_entry_unlink(&map->hdr, prev_entry);
10068 assert(prev_entry->vme_start < this_entry->vme_end);
10069 this_entry->vme_start = prev_entry->vme_start;
10070 this_entry->offset = prev_entry->offset;
10071 if (prev_entry->is_sub_map) {
10072 vm_map_deallocate(prev_entry->object.sub_map);
10073 } else {
10074 vm_object_deallocate(prev_entry->object.vm_object);
10075 }
10076 vm_map_entry_dispose(map, prev_entry);
10077 SAVE_HINT_MAP_WRITE(map, this_entry);
10078 counter(c_vm_map_simplified++);
10079 }
10080 }
10081
10082 void
10083 vm_map_simplify(
10084 vm_map_t map,
10085 vm_map_offset_t start)
10086 {
10087 vm_map_entry_t this_entry;
10088
10089 vm_map_lock(map);
10090 if (vm_map_lookup_entry(map, start, &this_entry)) {
10091 vm_map_simplify_entry(map, this_entry);
10092 vm_map_simplify_entry(map, this_entry->vme_next);
10093 }
10094 counter(c_vm_map_simplify_called++);
10095 vm_map_unlock(map);
10096 }
10097
10098 static void
10099 vm_map_simplify_range(
10100 vm_map_t map,
10101 vm_map_offset_t start,
10102 vm_map_offset_t end)
10103 {
10104 vm_map_entry_t entry;
10105
10106 /*
10107 * The map should be locked (for "write") by the caller.
10108 */
10109
10110 if (start >= end) {
10111 /* invalid address range */
10112 return;
10113 }
10114
10115 start = vm_map_trunc_page(start);
10116 end = vm_map_round_page(end);
10117
10118 if (!vm_map_lookup_entry(map, start, &entry)) {
10119 /* "start" is not mapped and "entry" ends before "start" */
10120 if (entry == vm_map_to_entry(map)) {
10121 /* start with first entry in the map */
10122 entry = vm_map_first_entry(map);
10123 } else {
10124 /* start with next entry */
10125 entry = entry->vme_next;
10126 }
10127 }
10128
10129 while (entry != vm_map_to_entry(map) &&
10130 entry->vme_start <= end) {
10131 /* try and coalesce "entry" with its previous entry */
10132 vm_map_simplify_entry(map, entry);
10133 entry = entry->vme_next;
10134 }
10135 }
10136
10137
10138 /*
10139 * Routine: vm_map_machine_attribute
10140 * Purpose:
10141 * Provide machine-specific attributes to mappings,
10142 * such as cachability etc. for machines that provide
10143 * them. NUMA architectures and machines with big/strange
10144 * caches will use this.
10145 * Note:
10146 * Responsibilities for locking and checking are handled here,
10147 * everything else in the pmap module. If any non-volatile
10148 * information must be kept, the pmap module should handle
10149 * it itself. [This assumes that attributes do not
10150 * need to be inherited, which seems ok to me]
10151 */
10152 kern_return_t
10153 vm_map_machine_attribute(
10154 vm_map_t map,
10155 vm_map_offset_t start,
10156 vm_map_offset_t end,
10157 vm_machine_attribute_t attribute,
10158 vm_machine_attribute_val_t* value) /* IN/OUT */
10159 {
10160 kern_return_t ret;
10161 vm_map_size_t sync_size;
10162 vm_map_entry_t entry;
10163
10164 if (start < vm_map_min(map) || end > vm_map_max(map))
10165 return KERN_INVALID_ADDRESS;
10166
10167 /* Figure how much memory we need to flush (in page increments) */
10168 sync_size = end - start;
10169
10170 vm_map_lock(map);
10171
10172 if (attribute != MATTR_CACHE) {
10173 /* If we don't have to find physical addresses, we */
10174 /* don't have to do an explicit traversal here. */
10175 ret = pmap_attribute(map->pmap, start, end-start,
10176 attribute, value);
10177 vm_map_unlock(map);
10178 return ret;
10179 }
10180
10181 ret = KERN_SUCCESS; /* Assume it all worked */
10182
10183 while(sync_size) {
10184 if (vm_map_lookup_entry(map, start, &entry)) {
10185 vm_map_size_t sub_size;
10186 if((entry->vme_end - start) > sync_size) {
10187 sub_size = sync_size;
10188 sync_size = 0;
10189 } else {
10190 sub_size = entry->vme_end - start;
10191 sync_size -= sub_size;
10192 }
10193 if(entry->is_sub_map) {
10194 vm_map_offset_t sub_start;
10195 vm_map_offset_t sub_end;
10196
10197 sub_start = (start - entry->vme_start)
10198 + entry->offset;
10199 sub_end = sub_start + sub_size;
10200 vm_map_machine_attribute(
10201 entry->object.sub_map,
10202 sub_start,
10203 sub_end,
10204 attribute, value);
10205 } else {
10206 if(entry->object.vm_object) {
10207 vm_page_t m;
10208 vm_object_t object;
10209 vm_object_t base_object;
10210 vm_object_t last_object;
10211 vm_object_offset_t offset;
10212 vm_object_offset_t base_offset;
10213 vm_map_size_t range;
10214 range = sub_size;
10215 offset = (start - entry->vme_start)
10216 + entry->offset;
10217 base_offset = offset;
10218 object = entry->object.vm_object;
10219 base_object = object;
10220 last_object = NULL;
10221
10222 vm_object_lock(object);
10223
10224 while (range) {
10225 m = vm_page_lookup(
10226 object, offset);
10227
10228 if (m && !m->fictitious) {
10229 ret =
10230 pmap_attribute_cache_sync(
10231 m->phys_page,
10232 PAGE_SIZE,
10233 attribute, value);
10234
10235 } else if (object->shadow) {
10236 offset = offset + object->vo_shadow_offset;
10237 last_object = object;
10238 object = object->shadow;
10239 vm_object_lock(last_object->shadow);
10240 vm_object_unlock(last_object);
10241 continue;
10242 }
10243 range -= PAGE_SIZE;
10244
10245 if (base_object != object) {
10246 vm_object_unlock(object);
10247 vm_object_lock(base_object);
10248 object = base_object;
10249 }
10250 /* Bump to the next page */
10251 base_offset += PAGE_SIZE;
10252 offset = base_offset;
10253 }
10254 vm_object_unlock(object);
10255 }
10256 }
10257 start += sub_size;
10258 } else {
10259 vm_map_unlock(map);
10260 return KERN_FAILURE;
10261 }
10262
10263 }
10264
10265 vm_map_unlock(map);
10266
10267 return ret;
10268 }
10269
10270 /*
10271 * vm_map_behavior_set:
10272 *
10273 * Sets the paging reference behavior of the specified address
10274 * range in the target map. Paging reference behavior affects
10275 * how pagein operations resulting from faults on the map will be
10276 * clustered.
10277 */
10278 kern_return_t
10279 vm_map_behavior_set(
10280 vm_map_t map,
10281 vm_map_offset_t start,
10282 vm_map_offset_t end,
10283 vm_behavior_t new_behavior)
10284 {
10285 register vm_map_entry_t entry;
10286 vm_map_entry_t temp_entry;
10287
10288 XPR(XPR_VM_MAP,
10289 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
10290 map, start, end, new_behavior, 0);
10291
10292 if (start > end ||
10293 start < vm_map_min(map) ||
10294 end > vm_map_max(map)) {
10295 return KERN_NO_SPACE;
10296 }
10297
10298 switch (new_behavior) {
10299
10300 /*
10301 * This first block of behaviors all set a persistent state on the specified
10302 * memory range. All we have to do here is to record the desired behavior
10303 * in the vm_map_entry_t's.
10304 */
10305
10306 case VM_BEHAVIOR_DEFAULT:
10307 case VM_BEHAVIOR_RANDOM:
10308 case VM_BEHAVIOR_SEQUENTIAL:
10309 case VM_BEHAVIOR_RSEQNTL:
10310 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
10311 vm_map_lock(map);
10312
10313 /*
10314 * The entire address range must be valid for the map.
10315 * Note that vm_map_range_check() does a
10316 * vm_map_lookup_entry() internally and returns the
10317 * entry containing the start of the address range if
10318 * the entire range is valid.
10319 */
10320 if (vm_map_range_check(map, start, end, &temp_entry)) {
10321 entry = temp_entry;
10322 vm_map_clip_start(map, entry, start);
10323 }
10324 else {
10325 vm_map_unlock(map);
10326 return(KERN_INVALID_ADDRESS);
10327 }
10328
10329 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
10330 vm_map_clip_end(map, entry, end);
10331 assert(!entry->use_pmap);
10332
10333 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
10334 entry->zero_wired_pages = TRUE;
10335 } else {
10336 entry->behavior = new_behavior;
10337 }
10338 entry = entry->vme_next;
10339 }
10340
10341 vm_map_unlock(map);
10342 break;
10343
10344 /*
10345 * The rest of these are different from the above in that they cause
10346 * an immediate action to take place as opposed to setting a behavior that
10347 * affects future actions.
10348 */
10349
10350 case VM_BEHAVIOR_WILLNEED:
10351 return vm_map_willneed(map, start, end);
10352
10353 case VM_BEHAVIOR_DONTNEED:
10354 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
10355
10356 case VM_BEHAVIOR_FREE:
10357 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
10358
10359 case VM_BEHAVIOR_REUSABLE:
10360 return vm_map_reusable_pages(map, start, end);
10361
10362 case VM_BEHAVIOR_REUSE:
10363 return vm_map_reuse_pages(map, start, end);
10364
10365 case VM_BEHAVIOR_CAN_REUSE:
10366 return vm_map_can_reuse(map, start, end);
10367
10368 default:
10369 return(KERN_INVALID_ARGUMENT);
10370 }
10371
10372 return(KERN_SUCCESS);
10373 }
10374
10375
10376 /*
10377 * Internals for madvise(MADV_WILLNEED) system call.
10378 *
10379 * The present implementation is to do a read-ahead if the mapping corresponds
10380 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
10381 * and basically ignore the "advice" (which we are always free to do).
10382 */
10383
10384
10385 static kern_return_t
10386 vm_map_willneed(
10387 vm_map_t map,
10388 vm_map_offset_t start,
10389 vm_map_offset_t end
10390 )
10391 {
10392 vm_map_entry_t entry;
10393 vm_object_t object;
10394 memory_object_t pager;
10395 struct vm_object_fault_info fault_info;
10396 kern_return_t kr;
10397 vm_object_size_t len;
10398 vm_object_offset_t offset;
10399
10400 /*
10401 * Fill in static values in fault_info. Several fields get ignored by the code
10402 * we call, but we'll fill them in anyway since uninitialized fields are bad
10403 * when it comes to future backwards compatibility.
10404 */
10405
10406 fault_info.interruptible = THREAD_UNINT; /* ignored value */
10407 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
10408 fault_info.no_cache = FALSE; /* ignored value */
10409 fault_info.stealth = TRUE;
10410 fault_info.io_sync = FALSE;
10411 fault_info.cs_bypass = FALSE;
10412 fault_info.mark_zf_absent = FALSE;
10413
10414 /*
10415 * The MADV_WILLNEED operation doesn't require any changes to the
10416 * vm_map_entry_t's, so the read lock is sufficient.
10417 */
10418
10419 vm_map_lock_read(map);
10420
10421 /*
10422 * The madvise semantics require that the address range be fully
10423 * allocated with no holes. Otherwise, we're required to return
10424 * an error.
10425 */
10426
10427 if (! vm_map_range_check(map, start, end, &entry)) {
10428 vm_map_unlock_read(map);
10429 return KERN_INVALID_ADDRESS;
10430 }
10431
10432 /*
10433 * Examine each vm_map_entry_t in the range.
10434 */
10435 for (; entry != vm_map_to_entry(map) && start < end; ) {
10436
10437 /*
10438 * The first time through, the start address could be anywhere
10439 * within the vm_map_entry we found. So adjust the offset to
10440 * correspond. After that, the offset will always be zero to
10441 * correspond to the beginning of the current vm_map_entry.
10442 */
10443 offset = (start - entry->vme_start) + entry->offset;
10444
10445 /*
10446 * Set the length so we don't go beyond the end of the
10447 * map_entry or beyond the end of the range we were given.
10448 * This range could span also multiple map entries all of which
10449 * map different files, so make sure we only do the right amount
10450 * of I/O for each object. Note that it's possible for there
10451 * to be multiple map entries all referring to the same object
10452 * but with different page permissions, but it's not worth
10453 * trying to optimize that case.
10454 */
10455 len = MIN(entry->vme_end - start, end - start);
10456
10457 if ((vm_size_t) len != len) {
10458 /* 32-bit overflow */
10459 len = (vm_size_t) (0 - PAGE_SIZE);
10460 }
10461 fault_info.cluster_size = (vm_size_t) len;
10462 fault_info.lo_offset = offset;
10463 fault_info.hi_offset = offset + len;
10464 fault_info.user_tag = entry->alias;
10465
10466 /*
10467 * If there's no read permission to this mapping, then just
10468 * skip it.
10469 */
10470 if ((entry->protection & VM_PROT_READ) == 0) {
10471 entry = entry->vme_next;
10472 start = entry->vme_start;
10473 continue;
10474 }
10475
10476 /*
10477 * Find the file object backing this map entry. If there is
10478 * none, then we simply ignore the "will need" advice for this
10479 * entry and go on to the next one.
10480 */
10481 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
10482 entry = entry->vme_next;
10483 start = entry->vme_start;
10484 continue;
10485 }
10486
10487 /*
10488 * The data_request() could take a long time, so let's
10489 * release the map lock to avoid blocking other threads.
10490 */
10491 vm_map_unlock_read(map);
10492
10493 vm_object_paging_begin(object);
10494 pager = object->pager;
10495 vm_object_unlock(object);
10496
10497 /*
10498 * Get the data from the object asynchronously.
10499 *
10500 * Note that memory_object_data_request() places limits on the
10501 * amount of I/O it will do. Regardless of the len we
10502 * specified, it won't do more than MAX_UPL_TRANSFER and it
10503 * silently truncates the len to that size. This isn't
10504 * necessarily bad since madvise shouldn't really be used to
10505 * page in unlimited amounts of data. Other Unix variants
10506 * limit the willneed case as well. If this turns out to be an
10507 * issue for developers, then we can always adjust the policy
10508 * here and still be backwards compatible since this is all
10509 * just "advice".
10510 */
10511 kr = memory_object_data_request(
10512 pager,
10513 offset + object->paging_offset,
10514 0, /* ignored */
10515 VM_PROT_READ,
10516 (memory_object_fault_info_t)&fault_info);
10517
10518 vm_object_lock(object);
10519 vm_object_paging_end(object);
10520 vm_object_unlock(object);
10521
10522 /*
10523 * If we couldn't do the I/O for some reason, just give up on
10524 * the madvise. We still return success to the user since
10525 * madvise isn't supposed to fail when the advice can't be
10526 * taken.
10527 */
10528 if (kr != KERN_SUCCESS) {
10529 return KERN_SUCCESS;
10530 }
10531
10532 start += len;
10533 if (start >= end) {
10534 /* done */
10535 return KERN_SUCCESS;
10536 }
10537
10538 /* look up next entry */
10539 vm_map_lock_read(map);
10540 if (! vm_map_lookup_entry(map, start, &entry)) {
10541 /*
10542 * There's a new hole in the address range.
10543 */
10544 vm_map_unlock_read(map);
10545 return KERN_INVALID_ADDRESS;
10546 }
10547 }
10548
10549 vm_map_unlock_read(map);
10550 return KERN_SUCCESS;
10551 }
10552
10553 static boolean_t
10554 vm_map_entry_is_reusable(
10555 vm_map_entry_t entry)
10556 {
10557 vm_object_t object;
10558
10559 if (entry->is_shared ||
10560 entry->is_sub_map ||
10561 entry->in_transition ||
10562 entry->protection != VM_PROT_DEFAULT ||
10563 entry->max_protection != VM_PROT_ALL ||
10564 entry->inheritance != VM_INHERIT_DEFAULT ||
10565 entry->no_cache ||
10566 entry->permanent ||
10567 entry->superpage_size != 0 ||
10568 entry->zero_wired_pages ||
10569 entry->wired_count != 0 ||
10570 entry->user_wired_count != 0) {
10571 return FALSE;
10572 }
10573
10574 object = entry->object.vm_object;
10575 if (object == VM_OBJECT_NULL) {
10576 return TRUE;
10577 }
10578 if (object->ref_count == 1 &&
10579 object->wired_page_count == 0 &&
10580 object->copy == VM_OBJECT_NULL &&
10581 object->shadow == VM_OBJECT_NULL &&
10582 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10583 object->internal &&
10584 !object->true_share &&
10585 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
10586 !object->code_signed) {
10587 return TRUE;
10588 }
10589 return FALSE;
10590
10591
10592 }
10593
10594 static kern_return_t
10595 vm_map_reuse_pages(
10596 vm_map_t map,
10597 vm_map_offset_t start,
10598 vm_map_offset_t end)
10599 {
10600 vm_map_entry_t entry;
10601 vm_object_t object;
10602 vm_object_offset_t start_offset, end_offset;
10603
10604 /*
10605 * The MADV_REUSE operation doesn't require any changes to the
10606 * vm_map_entry_t's, so the read lock is sufficient.
10607 */
10608
10609 vm_map_lock_read(map);
10610
10611 /*
10612 * The madvise semantics require that the address range be fully
10613 * allocated with no holes. Otherwise, we're required to return
10614 * an error.
10615 */
10616
10617 if (!vm_map_range_check(map, start, end, &entry)) {
10618 vm_map_unlock_read(map);
10619 vm_page_stats_reusable.reuse_pages_failure++;
10620 return KERN_INVALID_ADDRESS;
10621 }
10622
10623 /*
10624 * Examine each vm_map_entry_t in the range.
10625 */
10626 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10627 entry = entry->vme_next) {
10628 /*
10629 * Sanity check on the VM map entry.
10630 */
10631 if (! vm_map_entry_is_reusable(entry)) {
10632 vm_map_unlock_read(map);
10633 vm_page_stats_reusable.reuse_pages_failure++;
10634 return KERN_INVALID_ADDRESS;
10635 }
10636
10637 /*
10638 * The first time through, the start address could be anywhere
10639 * within the vm_map_entry we found. So adjust the offset to
10640 * correspond.
10641 */
10642 if (entry->vme_start < start) {
10643 start_offset = start - entry->vme_start;
10644 } else {
10645 start_offset = 0;
10646 }
10647 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10648 start_offset += entry->offset;
10649 end_offset += entry->offset;
10650
10651 object = entry->object.vm_object;
10652 if (object != VM_OBJECT_NULL) {
10653 vm_object_lock(object);
10654 vm_object_reuse_pages(object, start_offset, end_offset,
10655 TRUE);
10656 vm_object_unlock(object);
10657 }
10658
10659 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
10660 /*
10661 * XXX
10662 * We do not hold the VM map exclusively here.
10663 * The "alias" field is not that critical, so it's
10664 * safe to update it here, as long as it is the only
10665 * one that can be modified while holding the VM map
10666 * "shared".
10667 */
10668 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
10669 }
10670 }
10671
10672 vm_map_unlock_read(map);
10673 vm_page_stats_reusable.reuse_pages_success++;
10674 return KERN_SUCCESS;
10675 }
10676
10677
10678 static kern_return_t
10679 vm_map_reusable_pages(
10680 vm_map_t map,
10681 vm_map_offset_t start,
10682 vm_map_offset_t end)
10683 {
10684 vm_map_entry_t entry;
10685 vm_object_t object;
10686 vm_object_offset_t start_offset, end_offset;
10687
10688 /*
10689 * The MADV_REUSABLE operation doesn't require any changes to the
10690 * vm_map_entry_t's, so the read lock is sufficient.
10691 */
10692
10693 vm_map_lock_read(map);
10694
10695 /*
10696 * The madvise semantics require that the address range be fully
10697 * allocated with no holes. Otherwise, we're required to return
10698 * an error.
10699 */
10700
10701 if (!vm_map_range_check(map, start, end, &entry)) {
10702 vm_map_unlock_read(map);
10703 vm_page_stats_reusable.reusable_pages_failure++;
10704 return KERN_INVALID_ADDRESS;
10705 }
10706
10707 /*
10708 * Examine each vm_map_entry_t in the range.
10709 */
10710 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10711 entry = entry->vme_next) {
10712 int kill_pages = 0;
10713
10714 /*
10715 * Sanity check on the VM map entry.
10716 */
10717 if (! vm_map_entry_is_reusable(entry)) {
10718 vm_map_unlock_read(map);
10719 vm_page_stats_reusable.reusable_pages_failure++;
10720 return KERN_INVALID_ADDRESS;
10721 }
10722
10723 /*
10724 * The first time through, the start address could be anywhere
10725 * within the vm_map_entry we found. So adjust the offset to
10726 * correspond.
10727 */
10728 if (entry->vme_start < start) {
10729 start_offset = start - entry->vme_start;
10730 } else {
10731 start_offset = 0;
10732 }
10733 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10734 start_offset += entry->offset;
10735 end_offset += entry->offset;
10736
10737 object = entry->object.vm_object;
10738 if (object == VM_OBJECT_NULL)
10739 continue;
10740
10741
10742 vm_object_lock(object);
10743 if (object->ref_count == 1 && !object->shadow)
10744 kill_pages = 1;
10745 else
10746 kill_pages = -1;
10747 if (kill_pages != -1) {
10748 vm_object_deactivate_pages(object,
10749 start_offset,
10750 end_offset - start_offset,
10751 kill_pages,
10752 TRUE /*reusable_pages*/);
10753 } else {
10754 vm_page_stats_reusable.reusable_pages_shared++;
10755 }
10756 vm_object_unlock(object);
10757
10758 if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
10759 entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
10760 /*
10761 * XXX
10762 * We do not hold the VM map exclusively here.
10763 * The "alias" field is not that critical, so it's
10764 * safe to update it here, as long as it is the only
10765 * one that can be modified while holding the VM map
10766 * "shared".
10767 */
10768 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
10769 }
10770 }
10771
10772 vm_map_unlock_read(map);
10773 vm_page_stats_reusable.reusable_pages_success++;
10774 return KERN_SUCCESS;
10775 }
10776
10777
10778 static kern_return_t
10779 vm_map_can_reuse(
10780 vm_map_t map,
10781 vm_map_offset_t start,
10782 vm_map_offset_t end)
10783 {
10784 vm_map_entry_t entry;
10785
10786 /*
10787 * The MADV_REUSABLE operation doesn't require any changes to the
10788 * vm_map_entry_t's, so the read lock is sufficient.
10789 */
10790
10791 vm_map_lock_read(map);
10792
10793 /*
10794 * The madvise semantics require that the address range be fully
10795 * allocated with no holes. Otherwise, we're required to return
10796 * an error.
10797 */
10798
10799 if (!vm_map_range_check(map, start, end, &entry)) {
10800 vm_map_unlock_read(map);
10801 vm_page_stats_reusable.can_reuse_failure++;
10802 return KERN_INVALID_ADDRESS;
10803 }
10804
10805 /*
10806 * Examine each vm_map_entry_t in the range.
10807 */
10808 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10809 entry = entry->vme_next) {
10810 /*
10811 * Sanity check on the VM map entry.
10812 */
10813 if (! vm_map_entry_is_reusable(entry)) {
10814 vm_map_unlock_read(map);
10815 vm_page_stats_reusable.can_reuse_failure++;
10816 return KERN_INVALID_ADDRESS;
10817 }
10818 }
10819
10820 vm_map_unlock_read(map);
10821 vm_page_stats_reusable.can_reuse_success++;
10822 return KERN_SUCCESS;
10823 }
10824
10825
10826
10827 #include <mach_kdb.h>
10828 #if MACH_KDB
10829 #include <ddb/db_output.h>
10830 #include <vm/vm_print.h>
10831
10832 #define printf db_printf
10833
10834 /*
10835 * Forward declarations for internal functions.
10836 */
10837 extern void vm_map_links_print(
10838 struct vm_map_links *links);
10839
10840 extern void vm_map_header_print(
10841 struct vm_map_header *header);
10842
10843 extern void vm_map_entry_print(
10844 vm_map_entry_t entry);
10845
10846 extern void vm_follow_entry(
10847 vm_map_entry_t entry);
10848
10849 extern void vm_follow_map(
10850 vm_map_t map);
10851
10852 /*
10853 * vm_map_links_print: [ debug ]
10854 */
10855 void
10856 vm_map_links_print(
10857 struct vm_map_links *links)
10858 {
10859 iprintf("prev = %08X next = %08X start = %016llX end = %016llX\n",
10860 links->prev,
10861 links->next,
10862 (unsigned long long)links->start,
10863 (unsigned long long)links->end);
10864 }
10865
10866 /*
10867 * vm_map_header_print: [ debug ]
10868 */
10869 void
10870 vm_map_header_print(
10871 struct vm_map_header *header)
10872 {
10873 vm_map_links_print(&header->links);
10874 iprintf("nentries = %08X, %sentries_pageable\n",
10875 header->nentries,
10876 (header->entries_pageable ? "" : "!"));
10877 }
10878
10879 /*
10880 * vm_follow_entry: [ debug ]
10881 */
10882 void
10883 vm_follow_entry(
10884 vm_map_entry_t entry)
10885 {
10886 int shadows;
10887
10888 iprintf("map entry %08X\n", entry);
10889
10890 db_indent += 2;
10891
10892 shadows = vm_follow_object(entry->object.vm_object);
10893 iprintf("Total objects : %d\n",shadows);
10894
10895 db_indent -= 2;
10896 }
10897
10898 /*
10899 * vm_map_entry_print: [ debug ]
10900 */
10901 void
10902 vm_map_entry_print(
10903 register vm_map_entry_t entry)
10904 {
10905 static const char *inheritance_name[4] =
10906 { "share", "copy", "none", "?"};
10907 static const char *behavior_name[4] =
10908 { "dflt", "rand", "seqtl", "rseqntl" };
10909
10910 iprintf("map entry %08X - prev = %08X next = %08X\n", entry, entry->vme_prev, entry->vme_next);
10911
10912 db_indent += 2;
10913
10914 vm_map_links_print(&entry->links);
10915
10916 iprintf("start = %016llX end = %016llX - prot=%x/%x/%s\n",
10917 (unsigned long long)entry->vme_start,
10918 (unsigned long long)entry->vme_end,
10919 entry->protection,
10920 entry->max_protection,
10921 inheritance_name[(entry->inheritance & 0x3)]);
10922
10923 iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
10924 behavior_name[(entry->behavior & 0x3)],
10925 entry->wired_count,
10926 entry->user_wired_count);
10927 iprintf("%sin_transition, %sneeds_wakeup\n",
10928 (entry->in_transition ? "" : "!"),
10929 (entry->needs_wakeup ? "" : "!"));
10930
10931 if (entry->is_sub_map) {
10932 iprintf("submap = %08X - offset = %016llX\n",
10933 entry->object.sub_map,
10934 (unsigned long long)entry->offset);
10935 } else {
10936 iprintf("object = %08X offset = %016llX - ",
10937 entry->object.vm_object,
10938 (unsigned long long)entry->offset);
10939 printf("%sis_shared, %sneeds_copy\n",
10940 (entry->is_shared ? "" : "!"),
10941 (entry->needs_copy ? "" : "!"));
10942 }
10943
10944 db_indent -= 2;
10945 }
10946
10947 /*
10948 * vm_follow_map: [ debug ]
10949 */
10950 void
10951 vm_follow_map(
10952 vm_map_t map)
10953 {
10954 register vm_map_entry_t entry;
10955
10956 iprintf("task map %08X\n", map);
10957
10958 db_indent += 2;
10959
10960 for (entry = vm_map_first_entry(map);
10961 entry && entry != vm_map_to_entry(map);
10962 entry = entry->vme_next) {
10963 vm_follow_entry(entry);
10964 }
10965
10966 db_indent -= 2;
10967 }
10968
10969 /*
10970 * vm_map_print: [ debug ]
10971 */
10972 void
10973 vm_map_print(
10974 db_addr_t inmap)
10975 {
10976 register vm_map_entry_t entry;
10977 vm_map_t map;
10978 #if TASK_SWAPPER
10979 char *swstate;
10980 #endif /* TASK_SWAPPER */
10981
10982 map = (vm_map_t)(long)
10983 inmap; /* Make sure we have the right type */
10984
10985 iprintf("task map %08X\n", map);
10986
10987 db_indent += 2;
10988
10989 vm_map_header_print(&map->hdr);
10990
10991 iprintf("pmap = %08X size = %08X ref = %d hint = %08X first_free = %08X\n",
10992 map->pmap,
10993 map->size,
10994 map->ref_count,
10995 map->hint,
10996 map->first_free);
10997
10998 iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
10999 (map->wait_for_space ? "" : "!"),
11000 (map->wiring_required ? "" : "!"),
11001 map->timestamp);
11002
11003 #if TASK_SWAPPER
11004 switch (map->sw_state) {
11005 case MAP_SW_IN:
11006 swstate = "SW_IN";
11007 break;
11008 case MAP_SW_OUT:
11009 swstate = "SW_OUT";
11010 break;
11011 default:
11012 swstate = "????";
11013 break;
11014 }
11015 iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
11016 #endif /* TASK_SWAPPER */
11017
11018 for (entry = vm_map_first_entry(map);
11019 entry && entry != vm_map_to_entry(map);
11020 entry = entry->vme_next) {
11021 vm_map_entry_print(entry);
11022 }
11023
11024 db_indent -= 2;
11025 }
11026
11027 /*
11028 * Routine: vm_map_copy_print
11029 * Purpose:
11030 * Pretty-print a copy object for ddb.
11031 */
11032
11033 void
11034 vm_map_copy_print(
11035 db_addr_t incopy)
11036 {
11037 vm_map_copy_t copy;
11038 vm_map_entry_t entry;
11039
11040 copy = (vm_map_copy_t)(long)
11041 incopy; /* Make sure we have the right type */
11042
11043 printf("copy object 0x%x\n", copy);
11044
11045 db_indent += 2;
11046
11047 iprintf("type=%d", copy->type);
11048 switch (copy->type) {
11049 case VM_MAP_COPY_ENTRY_LIST:
11050 printf("[entry_list]");
11051 break;
11052
11053 case VM_MAP_COPY_OBJECT:
11054 printf("[object]");
11055 break;
11056
11057 case VM_MAP_COPY_KERNEL_BUFFER:
11058 printf("[kernel_buffer]");
11059 break;
11060
11061 default:
11062 printf("[bad type]");
11063 break;
11064 }
11065 printf(", offset=0x%llx", (unsigned long long)copy->offset);
11066 printf(", size=0x%x\n", copy->size);
11067
11068 switch (copy->type) {
11069 case VM_MAP_COPY_ENTRY_LIST:
11070 vm_map_header_print(&copy->cpy_hdr);
11071 for (entry = vm_map_copy_first_entry(copy);
11072 entry && entry != vm_map_copy_to_entry(copy);
11073 entry = entry->vme_next) {
11074 vm_map_entry_print(entry);
11075 }
11076 break;
11077
11078 case VM_MAP_COPY_OBJECT:
11079 iprintf("object=0x%x\n", copy->cpy_object);
11080 break;
11081
11082 case VM_MAP_COPY_KERNEL_BUFFER:
11083 iprintf("kernel buffer=0x%x", copy->cpy_kdata);
11084 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
11085 break;
11086
11087 }
11088
11089 db_indent -=2;
11090 }
11091
11092 /*
11093 * db_vm_map_total_size(map) [ debug ]
11094 *
11095 * return the total virtual size (in bytes) of the map
11096 */
11097 vm_map_size_t
11098 db_vm_map_total_size(
11099 db_addr_t inmap)
11100 {
11101 vm_map_entry_t entry;
11102 vm_map_size_t total;
11103 vm_map_t map;
11104
11105 map = (vm_map_t)(long)
11106 inmap; /* Make sure we have the right type */
11107
11108 total = 0;
11109 for (entry = vm_map_first_entry(map);
11110 entry != vm_map_to_entry(map);
11111 entry = entry->vme_next) {
11112 total += entry->vme_end - entry->vme_start;
11113 }
11114
11115 return total;
11116 }
11117
11118 #endif /* MACH_KDB */
11119
11120 /*
11121 * Routine: vm_map_entry_insert
11122 *
11123 * Descritpion: This routine inserts a new vm_entry in a locked map.
11124 */
11125 vm_map_entry_t
11126 vm_map_entry_insert(
11127 vm_map_t map,
11128 vm_map_entry_t insp_entry,
11129 vm_map_offset_t start,
11130 vm_map_offset_t end,
11131 vm_object_t object,
11132 vm_object_offset_t offset,
11133 boolean_t needs_copy,
11134 boolean_t is_shared,
11135 boolean_t in_transition,
11136 vm_prot_t cur_protection,
11137 vm_prot_t max_protection,
11138 vm_behavior_t behavior,
11139 vm_inherit_t inheritance,
11140 unsigned wired_count,
11141 boolean_t no_cache,
11142 boolean_t permanent,
11143 unsigned int superpage_size)
11144 {
11145 vm_map_entry_t new_entry;
11146
11147 assert(insp_entry != (vm_map_entry_t)0);
11148
11149 new_entry = vm_map_entry_create(map);
11150
11151 new_entry->vme_start = start;
11152 new_entry->vme_end = end;
11153 assert(page_aligned(new_entry->vme_start));
11154 assert(page_aligned(new_entry->vme_end));
11155 assert(new_entry->vme_start < new_entry->vme_end);
11156
11157 new_entry->object.vm_object = object;
11158 new_entry->offset = offset;
11159 new_entry->is_shared = is_shared;
11160 new_entry->is_sub_map = FALSE;
11161 new_entry->needs_copy = needs_copy;
11162 new_entry->in_transition = in_transition;
11163 new_entry->needs_wakeup = FALSE;
11164 new_entry->inheritance = inheritance;
11165 new_entry->protection = cur_protection;
11166 new_entry->max_protection = max_protection;
11167 new_entry->behavior = behavior;
11168 new_entry->wired_count = wired_count;
11169 new_entry->user_wired_count = 0;
11170 new_entry->use_pmap = FALSE;
11171 new_entry->alias = 0;
11172 new_entry->zero_wired_pages = FALSE;
11173 new_entry->no_cache = no_cache;
11174 new_entry->permanent = permanent;
11175 new_entry->superpage_size = superpage_size;
11176 new_entry->used_for_jit = FALSE;
11177
11178 /*
11179 * Insert the new entry into the list.
11180 */
11181
11182 vm_map_store_entry_link(map, insp_entry, new_entry);
11183 map->size += end - start;
11184
11185 /*
11186 * Update the free space hint and the lookup hint.
11187 */
11188
11189 SAVE_HINT_MAP_WRITE(map, new_entry);
11190 return new_entry;
11191 }
11192
11193 /*
11194 * Routine: vm_map_remap_extract
11195 *
11196 * Descritpion: This routine returns a vm_entry list from a map.
11197 */
11198 static kern_return_t
11199 vm_map_remap_extract(
11200 vm_map_t map,
11201 vm_map_offset_t addr,
11202 vm_map_size_t size,
11203 boolean_t copy,
11204 struct vm_map_header *map_header,
11205 vm_prot_t *cur_protection,
11206 vm_prot_t *max_protection,
11207 /* What, no behavior? */
11208 vm_inherit_t inheritance,
11209 boolean_t pageable)
11210 {
11211 kern_return_t result;
11212 vm_map_size_t mapped_size;
11213 vm_map_size_t tmp_size;
11214 vm_map_entry_t src_entry; /* result of last map lookup */
11215 vm_map_entry_t new_entry;
11216 vm_object_offset_t offset;
11217 vm_map_offset_t map_address;
11218 vm_map_offset_t src_start; /* start of entry to map */
11219 vm_map_offset_t src_end; /* end of region to be mapped */
11220 vm_object_t object;
11221 vm_map_version_t version;
11222 boolean_t src_needs_copy;
11223 boolean_t new_entry_needs_copy;
11224
11225 assert(map != VM_MAP_NULL);
11226 assert(size != 0 && size == vm_map_round_page(size));
11227 assert(inheritance == VM_INHERIT_NONE ||
11228 inheritance == VM_INHERIT_COPY ||
11229 inheritance == VM_INHERIT_SHARE);
11230
11231 /*
11232 * Compute start and end of region.
11233 */
11234 src_start = vm_map_trunc_page(addr);
11235 src_end = vm_map_round_page(src_start + size);
11236
11237 /*
11238 * Initialize map_header.
11239 */
11240 map_header->links.next = (struct vm_map_entry *)&map_header->links;
11241 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
11242 map_header->nentries = 0;
11243 map_header->entries_pageable = pageable;
11244
11245 vm_map_store_init( map_header );
11246
11247 *cur_protection = VM_PROT_ALL;
11248 *max_protection = VM_PROT_ALL;
11249
11250 map_address = 0;
11251 mapped_size = 0;
11252 result = KERN_SUCCESS;
11253
11254 /*
11255 * The specified source virtual space might correspond to
11256 * multiple map entries, need to loop on them.
11257 */
11258 vm_map_lock(map);
11259 while (mapped_size != size) {
11260 vm_map_size_t entry_size;
11261
11262 /*
11263 * Find the beginning of the region.
11264 */
11265 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
11266 result = KERN_INVALID_ADDRESS;
11267 break;
11268 }
11269
11270 if (src_start < src_entry->vme_start ||
11271 (mapped_size && src_start != src_entry->vme_start)) {
11272 result = KERN_INVALID_ADDRESS;
11273 break;
11274 }
11275
11276 tmp_size = size - mapped_size;
11277 if (src_end > src_entry->vme_end)
11278 tmp_size -= (src_end - src_entry->vme_end);
11279
11280 entry_size = (vm_map_size_t)(src_entry->vme_end -
11281 src_entry->vme_start);
11282
11283 if(src_entry->is_sub_map) {
11284 vm_map_reference(src_entry->object.sub_map);
11285 object = VM_OBJECT_NULL;
11286 } else {
11287 object = src_entry->object.vm_object;
11288
11289 if (object == VM_OBJECT_NULL) {
11290 object = vm_object_allocate(entry_size);
11291 src_entry->offset = 0;
11292 src_entry->object.vm_object = object;
11293 } else if (object->copy_strategy !=
11294 MEMORY_OBJECT_COPY_SYMMETRIC) {
11295 /*
11296 * We are already using an asymmetric
11297 * copy, and therefore we already have
11298 * the right object.
11299 */
11300 assert(!src_entry->needs_copy);
11301 } else if (src_entry->needs_copy || object->shadowed ||
11302 (object->internal && !object->true_share &&
11303 !src_entry->is_shared &&
11304 object->vo_size > entry_size)) {
11305
11306 vm_object_shadow(&src_entry->object.vm_object,
11307 &src_entry->offset,
11308 entry_size);
11309
11310 if (!src_entry->needs_copy &&
11311 (src_entry->protection & VM_PROT_WRITE)) {
11312 vm_prot_t prot;
11313
11314 prot = src_entry->protection & ~VM_PROT_WRITE;
11315
11316 if (override_nx(map, src_entry->alias) && prot)
11317 prot |= VM_PROT_EXECUTE;
11318
11319 if(map->mapped) {
11320 vm_object_pmap_protect(
11321 src_entry->object.vm_object,
11322 src_entry->offset,
11323 entry_size,
11324 PMAP_NULL,
11325 src_entry->vme_start,
11326 prot);
11327 } else {
11328 pmap_protect(vm_map_pmap(map),
11329 src_entry->vme_start,
11330 src_entry->vme_end,
11331 prot);
11332 }
11333 }
11334
11335 object = src_entry->object.vm_object;
11336 src_entry->needs_copy = FALSE;
11337 }
11338
11339
11340 vm_object_lock(object);
11341 vm_object_reference_locked(object); /* object ref. for new entry */
11342 if (object->copy_strategy ==
11343 MEMORY_OBJECT_COPY_SYMMETRIC) {
11344 object->copy_strategy =
11345 MEMORY_OBJECT_COPY_DELAY;
11346 }
11347 vm_object_unlock(object);
11348 }
11349
11350 offset = src_entry->offset + (src_start - src_entry->vme_start);
11351
11352 new_entry = _vm_map_entry_create(map_header);
11353 vm_map_entry_copy(new_entry, src_entry);
11354 new_entry->use_pmap = FALSE; /* clr address space specifics */
11355
11356 new_entry->vme_start = map_address;
11357 new_entry->vme_end = map_address + tmp_size;
11358 assert(new_entry->vme_start < new_entry->vme_end);
11359 new_entry->inheritance = inheritance;
11360 new_entry->offset = offset;
11361
11362 /*
11363 * The new region has to be copied now if required.
11364 */
11365 RestartCopy:
11366 if (!copy) {
11367 src_entry->is_shared = TRUE;
11368 new_entry->is_shared = TRUE;
11369 if (!(new_entry->is_sub_map))
11370 new_entry->needs_copy = FALSE;
11371
11372 } else if (src_entry->is_sub_map) {
11373 /* make this a COW sub_map if not already */
11374 new_entry->needs_copy = TRUE;
11375 object = VM_OBJECT_NULL;
11376 } else if (src_entry->wired_count == 0 &&
11377 vm_object_copy_quickly(&new_entry->object.vm_object,
11378 new_entry->offset,
11379 (new_entry->vme_end -
11380 new_entry->vme_start),
11381 &src_needs_copy,
11382 &new_entry_needs_copy)) {
11383
11384 new_entry->needs_copy = new_entry_needs_copy;
11385 new_entry->is_shared = FALSE;
11386
11387 /*
11388 * Handle copy_on_write semantics.
11389 */
11390 if (src_needs_copy && !src_entry->needs_copy) {
11391 vm_prot_t prot;
11392
11393 prot = src_entry->protection & ~VM_PROT_WRITE;
11394
11395 if (override_nx(map, src_entry->alias) && prot)
11396 prot |= VM_PROT_EXECUTE;
11397
11398 vm_object_pmap_protect(object,
11399 offset,
11400 entry_size,
11401 ((src_entry->is_shared
11402 || map->mapped) ?
11403 PMAP_NULL : map->pmap),
11404 src_entry->vme_start,
11405 prot);
11406
11407 src_entry->needs_copy = TRUE;
11408 }
11409 /*
11410 * Throw away the old object reference of the new entry.
11411 */
11412 vm_object_deallocate(object);
11413
11414 } else {
11415 new_entry->is_shared = FALSE;
11416
11417 /*
11418 * The map can be safely unlocked since we
11419 * already hold a reference on the object.
11420 *
11421 * Record the timestamp of the map for later
11422 * verification, and unlock the map.
11423 */
11424 version.main_timestamp = map->timestamp;
11425 vm_map_unlock(map); /* Increments timestamp once! */
11426
11427 /*
11428 * Perform the copy.
11429 */
11430 if (src_entry->wired_count > 0) {
11431 vm_object_lock(object);
11432 result = vm_object_copy_slowly(
11433 object,
11434 offset,
11435 entry_size,
11436 THREAD_UNINT,
11437 &new_entry->object.vm_object);
11438
11439 new_entry->offset = 0;
11440 new_entry->needs_copy = FALSE;
11441 } else {
11442 result = vm_object_copy_strategically(
11443 object,
11444 offset,
11445 entry_size,
11446 &new_entry->object.vm_object,
11447 &new_entry->offset,
11448 &new_entry_needs_copy);
11449
11450 new_entry->needs_copy = new_entry_needs_copy;
11451 }
11452
11453 /*
11454 * Throw away the old object reference of the new entry.
11455 */
11456 vm_object_deallocate(object);
11457
11458 if (result != KERN_SUCCESS &&
11459 result != KERN_MEMORY_RESTART_COPY) {
11460 _vm_map_entry_dispose(map_header, new_entry);
11461 break;
11462 }
11463
11464 /*
11465 * Verify that the map has not substantially
11466 * changed while the copy was being made.
11467 */
11468
11469 vm_map_lock(map);
11470 if (version.main_timestamp + 1 != map->timestamp) {
11471 /*
11472 * Simple version comparison failed.
11473 *
11474 * Retry the lookup and verify that the
11475 * same object/offset are still present.
11476 */
11477 vm_object_deallocate(new_entry->
11478 object.vm_object);
11479 _vm_map_entry_dispose(map_header, new_entry);
11480 if (result == KERN_MEMORY_RESTART_COPY)
11481 result = KERN_SUCCESS;
11482 continue;
11483 }
11484
11485 if (result == KERN_MEMORY_RESTART_COPY) {
11486 vm_object_reference(object);
11487 goto RestartCopy;
11488 }
11489 }
11490
11491 _vm_map_store_entry_link(map_header,
11492 map_header->links.prev, new_entry);
11493
11494 /*Protections for submap mapping are irrelevant here*/
11495 if( !src_entry->is_sub_map ) {
11496 *cur_protection &= src_entry->protection;
11497 *max_protection &= src_entry->max_protection;
11498 }
11499 map_address += tmp_size;
11500 mapped_size += tmp_size;
11501 src_start += tmp_size;
11502
11503 } /* end while */
11504
11505 vm_map_unlock(map);
11506 if (result != KERN_SUCCESS) {
11507 /*
11508 * Free all allocated elements.
11509 */
11510 for (src_entry = map_header->links.next;
11511 src_entry != (struct vm_map_entry *)&map_header->links;
11512 src_entry = new_entry) {
11513 new_entry = src_entry->vme_next;
11514 _vm_map_store_entry_unlink(map_header, src_entry);
11515 vm_object_deallocate(src_entry->object.vm_object);
11516 _vm_map_entry_dispose(map_header, src_entry);
11517 }
11518 }
11519 return result;
11520 }
11521
11522 /*
11523 * Routine: vm_remap
11524 *
11525 * Map portion of a task's address space.
11526 * Mapped region must not overlap more than
11527 * one vm memory object. Protections and
11528 * inheritance attributes remain the same
11529 * as in the original task and are out parameters.
11530 * Source and Target task can be identical
11531 * Other attributes are identical as for vm_map()
11532 */
11533 kern_return_t
11534 vm_map_remap(
11535 vm_map_t target_map,
11536 vm_map_address_t *address,
11537 vm_map_size_t size,
11538 vm_map_offset_t mask,
11539 int flags,
11540 vm_map_t src_map,
11541 vm_map_offset_t memory_address,
11542 boolean_t copy,
11543 vm_prot_t *cur_protection,
11544 vm_prot_t *max_protection,
11545 vm_inherit_t inheritance)
11546 {
11547 kern_return_t result;
11548 vm_map_entry_t entry;
11549 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
11550 vm_map_entry_t new_entry;
11551 struct vm_map_header map_header;
11552
11553 if (target_map == VM_MAP_NULL)
11554 return KERN_INVALID_ARGUMENT;
11555
11556 switch (inheritance) {
11557 case VM_INHERIT_NONE:
11558 case VM_INHERIT_COPY:
11559 case VM_INHERIT_SHARE:
11560 if (size != 0 && src_map != VM_MAP_NULL)
11561 break;
11562 /*FALL THRU*/
11563 default:
11564 return KERN_INVALID_ARGUMENT;
11565 }
11566
11567 size = vm_map_round_page(size);
11568
11569 result = vm_map_remap_extract(src_map, memory_address,
11570 size, copy, &map_header,
11571 cur_protection,
11572 max_protection,
11573 inheritance,
11574 target_map->hdr.
11575 entries_pageable);
11576
11577 if (result != KERN_SUCCESS) {
11578 return result;
11579 }
11580
11581 /*
11582 * Allocate/check a range of free virtual address
11583 * space for the target
11584 */
11585 *address = vm_map_trunc_page(*address);
11586 vm_map_lock(target_map);
11587 result = vm_map_remap_range_allocate(target_map, address, size,
11588 mask, flags, &insp_entry);
11589
11590 for (entry = map_header.links.next;
11591 entry != (struct vm_map_entry *)&map_header.links;
11592 entry = new_entry) {
11593 new_entry = entry->vme_next;
11594 _vm_map_store_entry_unlink(&map_header, entry);
11595 if (result == KERN_SUCCESS) {
11596 entry->vme_start += *address;
11597 entry->vme_end += *address;
11598 vm_map_store_entry_link(target_map, insp_entry, entry);
11599 insp_entry = entry;
11600 } else {
11601 if (!entry->is_sub_map) {
11602 vm_object_deallocate(entry->object.vm_object);
11603 } else {
11604 vm_map_deallocate(entry->object.sub_map);
11605 }
11606 _vm_map_entry_dispose(&map_header, entry);
11607 }
11608 }
11609
11610 if( target_map->disable_vmentry_reuse == TRUE) {
11611 if( target_map->highest_entry_end < insp_entry->vme_end ){
11612 target_map->highest_entry_end = insp_entry->vme_end;
11613 }
11614 }
11615
11616 if (result == KERN_SUCCESS) {
11617 target_map->size += size;
11618 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
11619 }
11620 vm_map_unlock(target_map);
11621
11622 if (result == KERN_SUCCESS && target_map->wiring_required)
11623 result = vm_map_wire(target_map, *address,
11624 *address + size, *cur_protection, TRUE);
11625 return result;
11626 }
11627
11628 /*
11629 * Routine: vm_map_remap_range_allocate
11630 *
11631 * Description:
11632 * Allocate a range in the specified virtual address map.
11633 * returns the address and the map entry just before the allocated
11634 * range
11635 *
11636 * Map must be locked.
11637 */
11638
11639 static kern_return_t
11640 vm_map_remap_range_allocate(
11641 vm_map_t map,
11642 vm_map_address_t *address, /* IN/OUT */
11643 vm_map_size_t size,
11644 vm_map_offset_t mask,
11645 int flags,
11646 vm_map_entry_t *map_entry) /* OUT */
11647 {
11648 vm_map_entry_t entry;
11649 vm_map_offset_t start;
11650 vm_map_offset_t end;
11651 kern_return_t kr;
11652
11653 StartAgain: ;
11654
11655 start = *address;
11656
11657 if (flags & VM_FLAGS_ANYWHERE)
11658 {
11659 /*
11660 * Calculate the first possible address.
11661 */
11662
11663 if (start < map->min_offset)
11664 start = map->min_offset;
11665 if (start > map->max_offset)
11666 return(KERN_NO_SPACE);
11667
11668 /*
11669 * Look for the first possible address;
11670 * if there's already something at this
11671 * address, we have to start after it.
11672 */
11673
11674 if( map->disable_vmentry_reuse == TRUE) {
11675 VM_MAP_HIGHEST_ENTRY(map, entry, start);
11676 } else {
11677 assert(first_free_is_valid(map));
11678 if (start == map->min_offset) {
11679 if ((entry = map->first_free) != vm_map_to_entry(map))
11680 start = entry->vme_end;
11681 } else {
11682 vm_map_entry_t tmp_entry;
11683 if (vm_map_lookup_entry(map, start, &tmp_entry))
11684 start = tmp_entry->vme_end;
11685 entry = tmp_entry;
11686 }
11687 }
11688
11689 /*
11690 * In any case, the "entry" always precedes
11691 * the proposed new region throughout the
11692 * loop:
11693 */
11694
11695 while (TRUE) {
11696 register vm_map_entry_t next;
11697
11698 /*
11699 * Find the end of the proposed new region.
11700 * Be sure we didn't go beyond the end, or
11701 * wrap around the address.
11702 */
11703
11704 end = ((start + mask) & ~mask);
11705 if (end < start)
11706 return(KERN_NO_SPACE);
11707 start = end;
11708 end += size;
11709
11710 if ((end > map->max_offset) || (end < start)) {
11711 if (map->wait_for_space) {
11712 if (size <= (map->max_offset -
11713 map->min_offset)) {
11714 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
11715 vm_map_unlock(map);
11716 thread_block(THREAD_CONTINUE_NULL);
11717 vm_map_lock(map);
11718 goto StartAgain;
11719 }
11720 }
11721
11722 return(KERN_NO_SPACE);
11723 }
11724
11725 /*
11726 * If there are no more entries, we must win.
11727 */
11728
11729 next = entry->vme_next;
11730 if (next == vm_map_to_entry(map))
11731 break;
11732
11733 /*
11734 * If there is another entry, it must be
11735 * after the end of the potential new region.
11736 */
11737
11738 if (next->vme_start >= end)
11739 break;
11740
11741 /*
11742 * Didn't fit -- move to the next entry.
11743 */
11744
11745 entry = next;
11746 start = entry->vme_end;
11747 }
11748 *address = start;
11749 } else {
11750 vm_map_entry_t temp_entry;
11751
11752 /*
11753 * Verify that:
11754 * the address doesn't itself violate
11755 * the mask requirement.
11756 */
11757
11758 if ((start & mask) != 0)
11759 return(KERN_NO_SPACE);
11760
11761
11762 /*
11763 * ... the address is within bounds
11764 */
11765
11766 end = start + size;
11767
11768 if ((start < map->min_offset) ||
11769 (end > map->max_offset) ||
11770 (start >= end)) {
11771 return(KERN_INVALID_ADDRESS);
11772 }
11773
11774 /*
11775 * If we're asked to overwrite whatever was mapped in that
11776 * range, first deallocate that range.
11777 */
11778 if (flags & VM_FLAGS_OVERWRITE) {
11779 vm_map_t zap_map;
11780
11781 /*
11782 * We use a "zap_map" to avoid having to unlock
11783 * the "map" in vm_map_delete(), which would compromise
11784 * the atomicity of the "deallocate" and then "remap"
11785 * combination.
11786 */
11787 zap_map = vm_map_create(PMAP_NULL,
11788 start,
11789 end - start,
11790 map->hdr.entries_pageable);
11791 if (zap_map == VM_MAP_NULL) {
11792 return KERN_RESOURCE_SHORTAGE;
11793 }
11794
11795 kr = vm_map_delete(map, start, end,
11796 VM_MAP_REMOVE_SAVE_ENTRIES,
11797 zap_map);
11798 if (kr == KERN_SUCCESS) {
11799 vm_map_destroy(zap_map,
11800 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
11801 zap_map = VM_MAP_NULL;
11802 }
11803 }
11804
11805 /*
11806 * ... the starting address isn't allocated
11807 */
11808
11809 if (vm_map_lookup_entry(map, start, &temp_entry))
11810 return(KERN_NO_SPACE);
11811
11812 entry = temp_entry;
11813
11814 /*
11815 * ... the next region doesn't overlap the
11816 * end point.
11817 */
11818
11819 if ((entry->vme_next != vm_map_to_entry(map)) &&
11820 (entry->vme_next->vme_start < end))
11821 return(KERN_NO_SPACE);
11822 }
11823 *map_entry = entry;
11824 return(KERN_SUCCESS);
11825 }
11826
11827 /*
11828 * vm_map_switch:
11829 *
11830 * Set the address map for the current thread to the specified map
11831 */
11832
11833 vm_map_t
11834 vm_map_switch(
11835 vm_map_t map)
11836 {
11837 int mycpu;
11838 thread_t thread = current_thread();
11839 vm_map_t oldmap = thread->map;
11840
11841 mp_disable_preemption();
11842 mycpu = cpu_number();
11843
11844 /*
11845 * Deactivate the current map and activate the requested map
11846 */
11847 PMAP_SWITCH_USER(thread, map, mycpu);
11848
11849 mp_enable_preemption();
11850 return(oldmap);
11851 }
11852
11853
11854 /*
11855 * Routine: vm_map_write_user
11856 *
11857 * Description:
11858 * Copy out data from a kernel space into space in the
11859 * destination map. The space must already exist in the
11860 * destination map.
11861 * NOTE: This routine should only be called by threads
11862 * which can block on a page fault. i.e. kernel mode user
11863 * threads.
11864 *
11865 */
11866 kern_return_t
11867 vm_map_write_user(
11868 vm_map_t map,
11869 void *src_p,
11870 vm_map_address_t dst_addr,
11871 vm_size_t size)
11872 {
11873 kern_return_t kr = KERN_SUCCESS;
11874
11875 if(current_map() == map) {
11876 if (copyout(src_p, dst_addr, size)) {
11877 kr = KERN_INVALID_ADDRESS;
11878 }
11879 } else {
11880 vm_map_t oldmap;
11881
11882 /* take on the identity of the target map while doing */
11883 /* the transfer */
11884
11885 vm_map_reference(map);
11886 oldmap = vm_map_switch(map);
11887 if (copyout(src_p, dst_addr, size)) {
11888 kr = KERN_INVALID_ADDRESS;
11889 }
11890 vm_map_switch(oldmap);
11891 vm_map_deallocate(map);
11892 }
11893 return kr;
11894 }
11895
11896 /*
11897 * Routine: vm_map_read_user
11898 *
11899 * Description:
11900 * Copy in data from a user space source map into the
11901 * kernel map. The space must already exist in the
11902 * kernel map.
11903 * NOTE: This routine should only be called by threads
11904 * which can block on a page fault. i.e. kernel mode user
11905 * threads.
11906 *
11907 */
11908 kern_return_t
11909 vm_map_read_user(
11910 vm_map_t map,
11911 vm_map_address_t src_addr,
11912 void *dst_p,
11913 vm_size_t size)
11914 {
11915 kern_return_t kr = KERN_SUCCESS;
11916
11917 if(current_map() == map) {
11918 if (copyin(src_addr, dst_p, size)) {
11919 kr = KERN_INVALID_ADDRESS;
11920 }
11921 } else {
11922 vm_map_t oldmap;
11923
11924 /* take on the identity of the target map while doing */
11925 /* the transfer */
11926
11927 vm_map_reference(map);
11928 oldmap = vm_map_switch(map);
11929 if (copyin(src_addr, dst_p, size)) {
11930 kr = KERN_INVALID_ADDRESS;
11931 }
11932 vm_map_switch(oldmap);
11933 vm_map_deallocate(map);
11934 }
11935 return kr;
11936 }
11937
11938
11939 /*
11940 * vm_map_check_protection:
11941 *
11942 * Assert that the target map allows the specified
11943 * privilege on the entire address region given.
11944 * The entire region must be allocated.
11945 */
11946 boolean_t
11947 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
11948 vm_map_offset_t end, vm_prot_t protection)
11949 {
11950 vm_map_entry_t entry;
11951 vm_map_entry_t tmp_entry;
11952
11953 vm_map_lock(map);
11954
11955 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
11956 {
11957 vm_map_unlock(map);
11958 return (FALSE);
11959 }
11960
11961 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11962 vm_map_unlock(map);
11963 return(FALSE);
11964 }
11965
11966 entry = tmp_entry;
11967
11968 while (start < end) {
11969 if (entry == vm_map_to_entry(map)) {
11970 vm_map_unlock(map);
11971 return(FALSE);
11972 }
11973
11974 /*
11975 * No holes allowed!
11976 */
11977
11978 if (start < entry->vme_start) {
11979 vm_map_unlock(map);
11980 return(FALSE);
11981 }
11982
11983 /*
11984 * Check protection associated with entry.
11985 */
11986
11987 if ((entry->protection & protection) != protection) {
11988 vm_map_unlock(map);
11989 return(FALSE);
11990 }
11991
11992 /* go to next entry */
11993
11994 start = entry->vme_end;
11995 entry = entry->vme_next;
11996 }
11997 vm_map_unlock(map);
11998 return(TRUE);
11999 }
12000
12001 kern_return_t
12002 vm_map_purgable_control(
12003 vm_map_t map,
12004 vm_map_offset_t address,
12005 vm_purgable_t control,
12006 int *state)
12007 {
12008 vm_map_entry_t entry;
12009 vm_object_t object;
12010 kern_return_t kr;
12011
12012 /*
12013 * Vet all the input parameters and current type and state of the
12014 * underlaying object. Return with an error if anything is amiss.
12015 */
12016 if (map == VM_MAP_NULL)
12017 return(KERN_INVALID_ARGUMENT);
12018
12019 if (control != VM_PURGABLE_SET_STATE &&
12020 control != VM_PURGABLE_GET_STATE &&
12021 control != VM_PURGABLE_PURGE_ALL)
12022 return(KERN_INVALID_ARGUMENT);
12023
12024 if (control == VM_PURGABLE_PURGE_ALL) {
12025 vm_purgeable_object_purge_all();
12026 return KERN_SUCCESS;
12027 }
12028
12029 if (control == VM_PURGABLE_SET_STATE &&
12030 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
12031 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
12032 return(KERN_INVALID_ARGUMENT);
12033
12034 vm_map_lock_read(map);
12035
12036 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
12037
12038 /*
12039 * Must pass a valid non-submap address.
12040 */
12041 vm_map_unlock_read(map);
12042 return(KERN_INVALID_ADDRESS);
12043 }
12044
12045 if ((entry->protection & VM_PROT_WRITE) == 0) {
12046 /*
12047 * Can't apply purgable controls to something you can't write.
12048 */
12049 vm_map_unlock_read(map);
12050 return(KERN_PROTECTION_FAILURE);
12051 }
12052
12053 object = entry->object.vm_object;
12054 if (object == VM_OBJECT_NULL) {
12055 /*
12056 * Object must already be present or it can't be purgable.
12057 */
12058 vm_map_unlock_read(map);
12059 return KERN_INVALID_ARGUMENT;
12060 }
12061
12062 vm_object_lock(object);
12063
12064 if (entry->offset != 0 ||
12065 entry->vme_end - entry->vme_start != object->vo_size) {
12066 /*
12067 * Can only apply purgable controls to the whole (existing)
12068 * object at once.
12069 */
12070 vm_map_unlock_read(map);
12071 vm_object_unlock(object);
12072 return KERN_INVALID_ARGUMENT;
12073 }
12074
12075 vm_map_unlock_read(map);
12076
12077 kr = vm_object_purgable_control(object, control, state);
12078
12079 vm_object_unlock(object);
12080
12081 return kr;
12082 }
12083
12084 kern_return_t
12085 vm_map_page_query_internal(
12086 vm_map_t target_map,
12087 vm_map_offset_t offset,
12088 int *disposition,
12089 int *ref_count)
12090 {
12091 kern_return_t kr;
12092 vm_page_info_basic_data_t info;
12093 mach_msg_type_number_t count;
12094
12095 count = VM_PAGE_INFO_BASIC_COUNT;
12096 kr = vm_map_page_info(target_map,
12097 offset,
12098 VM_PAGE_INFO_BASIC,
12099 (vm_page_info_t) &info,
12100 &count);
12101 if (kr == KERN_SUCCESS) {
12102 *disposition = info.disposition;
12103 *ref_count = info.ref_count;
12104 } else {
12105 *disposition = 0;
12106 *ref_count = 0;
12107 }
12108
12109 return kr;
12110 }
12111
12112 kern_return_t
12113 vm_map_page_info(
12114 vm_map_t map,
12115 vm_map_offset_t offset,
12116 vm_page_info_flavor_t flavor,
12117 vm_page_info_t info,
12118 mach_msg_type_number_t *count)
12119 {
12120 vm_map_entry_t map_entry;
12121 vm_object_t object;
12122 vm_page_t m;
12123 kern_return_t kr;
12124 kern_return_t retval = KERN_SUCCESS;
12125 boolean_t top_object;
12126 int disposition;
12127 int ref_count;
12128 vm_object_id_t object_id;
12129 vm_page_info_basic_t basic_info;
12130 int depth;
12131 vm_map_offset_t offset_in_page;
12132
12133 switch (flavor) {
12134 case VM_PAGE_INFO_BASIC:
12135 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
12136 /*
12137 * The "vm_page_info_basic_data" structure was not
12138 * properly padded, so allow the size to be off by
12139 * one to maintain backwards binary compatibility...
12140 */
12141 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
12142 return KERN_INVALID_ARGUMENT;
12143 }
12144 break;
12145 default:
12146 return KERN_INVALID_ARGUMENT;
12147 }
12148
12149 disposition = 0;
12150 ref_count = 0;
12151 object_id = 0;
12152 top_object = TRUE;
12153 depth = 0;
12154
12155 retval = KERN_SUCCESS;
12156 offset_in_page = offset & PAGE_MASK;
12157 offset = vm_map_trunc_page(offset);
12158
12159 vm_map_lock_read(map);
12160
12161 /*
12162 * First, find the map entry covering "offset", going down
12163 * submaps if necessary.
12164 */
12165 for (;;) {
12166 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
12167 vm_map_unlock_read(map);
12168 return KERN_INVALID_ADDRESS;
12169 }
12170 /* compute offset from this map entry's start */
12171 offset -= map_entry->vme_start;
12172 /* compute offset into this map entry's object (or submap) */
12173 offset += map_entry->offset;
12174
12175 if (map_entry->is_sub_map) {
12176 vm_map_t sub_map;
12177
12178 sub_map = map_entry->object.sub_map;
12179 vm_map_lock_read(sub_map);
12180 vm_map_unlock_read(map);
12181
12182 map = sub_map;
12183
12184 ref_count = MAX(ref_count, map->ref_count);
12185 continue;
12186 }
12187 break;
12188 }
12189
12190 object = map_entry->object.vm_object;
12191 if (object == VM_OBJECT_NULL) {
12192 /* no object -> no page */
12193 vm_map_unlock_read(map);
12194 goto done;
12195 }
12196
12197 vm_object_lock(object);
12198 vm_map_unlock_read(map);
12199
12200 /*
12201 * Go down the VM object shadow chain until we find the page
12202 * we're looking for.
12203 */
12204 for (;;) {
12205 ref_count = MAX(ref_count, object->ref_count);
12206
12207 m = vm_page_lookup(object, offset);
12208
12209 if (m != VM_PAGE_NULL) {
12210 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
12211 break;
12212 } else {
12213 #if MACH_PAGEMAP
12214 if (object->existence_map) {
12215 if (vm_external_state_get(object->existence_map,
12216 offset) ==
12217 VM_EXTERNAL_STATE_EXISTS) {
12218 /*
12219 * this page has been paged out
12220 */
12221 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12222 break;
12223 }
12224 } else
12225 #endif
12226 {
12227 if (object->internal &&
12228 object->alive &&
12229 !object->terminating &&
12230 object->pager_ready) {
12231
12232 memory_object_t pager;
12233
12234 vm_object_paging_begin(object);
12235 pager = object->pager;
12236 vm_object_unlock(object);
12237
12238 /*
12239 * Ask the default pager if
12240 * it has this page.
12241 */
12242 kr = memory_object_data_request(
12243 pager,
12244 offset + object->paging_offset,
12245 0, /* just poke the pager */
12246 VM_PROT_READ,
12247 NULL);
12248
12249 vm_object_lock(object);
12250 vm_object_paging_end(object);
12251
12252 if (kr == KERN_SUCCESS) {
12253 /* the default pager has it */
12254 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12255 break;
12256 }
12257 }
12258 }
12259
12260 if (object->shadow != VM_OBJECT_NULL) {
12261 vm_object_t shadow;
12262
12263 offset += object->vo_shadow_offset;
12264 shadow = object->shadow;
12265
12266 vm_object_lock(shadow);
12267 vm_object_unlock(object);
12268
12269 object = shadow;
12270 top_object = FALSE;
12271 depth++;
12272 } else {
12273 // if (!object->internal)
12274 // break;
12275 // retval = KERN_FAILURE;
12276 // goto done_with_object;
12277 break;
12278 }
12279 }
12280 }
12281 /* The ref_count is not strictly accurate, it measures the number */
12282 /* of entities holding a ref on the object, they may not be mapping */
12283 /* the object or may not be mapping the section holding the */
12284 /* target page but its still a ball park number and though an over- */
12285 /* count, it picks up the copy-on-write cases */
12286
12287 /* We could also get a picture of page sharing from pmap_attributes */
12288 /* but this would under count as only faulted-in mappings would */
12289 /* show up. */
12290
12291 if (top_object == TRUE && object->shadow)
12292 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
12293
12294 if (! object->internal)
12295 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
12296
12297 if (m == VM_PAGE_NULL)
12298 goto done_with_object;
12299
12300 if (m->fictitious) {
12301 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
12302 goto done_with_object;
12303 }
12304 if (m->dirty || pmap_is_modified(m->phys_page))
12305 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
12306
12307 if (m->reference || pmap_is_referenced(m->phys_page))
12308 disposition |= VM_PAGE_QUERY_PAGE_REF;
12309
12310 if (m->speculative)
12311 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
12312
12313 if (m->cs_validated)
12314 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
12315 if (m->cs_tainted)
12316 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
12317
12318 done_with_object:
12319 vm_object_unlock(object);
12320 done:
12321
12322 switch (flavor) {
12323 case VM_PAGE_INFO_BASIC:
12324 basic_info = (vm_page_info_basic_t) info;
12325 basic_info->disposition = disposition;
12326 basic_info->ref_count = ref_count;
12327 basic_info->object_id = (vm_object_id_t) (uintptr_t) object;
12328 basic_info->offset =
12329 (memory_object_offset_t) offset + offset_in_page;
12330 basic_info->depth = depth;
12331 break;
12332 }
12333
12334 return retval;
12335 }
12336
12337 /*
12338 * vm_map_msync
12339 *
12340 * Synchronises the memory range specified with its backing store
12341 * image by either flushing or cleaning the contents to the appropriate
12342 * memory manager engaging in a memory object synchronize dialog with
12343 * the manager. The client doesn't return until the manager issues
12344 * m_o_s_completed message. MIG Magically converts user task parameter
12345 * to the task's address map.
12346 *
12347 * interpretation of sync_flags
12348 * VM_SYNC_INVALIDATE - discard pages, only return precious
12349 * pages to manager.
12350 *
12351 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
12352 * - discard pages, write dirty or precious
12353 * pages back to memory manager.
12354 *
12355 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
12356 * - write dirty or precious pages back to
12357 * the memory manager.
12358 *
12359 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
12360 * is a hole in the region, and we would
12361 * have returned KERN_SUCCESS, return
12362 * KERN_INVALID_ADDRESS instead.
12363 *
12364 * NOTE
12365 * The memory object attributes have not yet been implemented, this
12366 * function will have to deal with the invalidate attribute
12367 *
12368 * RETURNS
12369 * KERN_INVALID_TASK Bad task parameter
12370 * KERN_INVALID_ARGUMENT both sync and async were specified.
12371 * KERN_SUCCESS The usual.
12372 * KERN_INVALID_ADDRESS There was a hole in the region.
12373 */
12374
12375 kern_return_t
12376 vm_map_msync(
12377 vm_map_t map,
12378 vm_map_address_t address,
12379 vm_map_size_t size,
12380 vm_sync_t sync_flags)
12381 {
12382 msync_req_t msr;
12383 msync_req_t new_msr;
12384 queue_chain_t req_q; /* queue of requests for this msync */
12385 vm_map_entry_t entry;
12386 vm_map_size_t amount_left;
12387 vm_object_offset_t offset;
12388 boolean_t do_sync_req;
12389 boolean_t had_hole = FALSE;
12390 memory_object_t pager;
12391
12392 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
12393 (sync_flags & VM_SYNC_SYNCHRONOUS))
12394 return(KERN_INVALID_ARGUMENT);
12395
12396 /*
12397 * align address and size on page boundaries
12398 */
12399 size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
12400 address = vm_map_trunc_page(address);
12401
12402 if (map == VM_MAP_NULL)
12403 return(KERN_INVALID_TASK);
12404
12405 if (size == 0)
12406 return(KERN_SUCCESS);
12407
12408 queue_init(&req_q);
12409 amount_left = size;
12410
12411 while (amount_left > 0) {
12412 vm_object_size_t flush_size;
12413 vm_object_t object;
12414
12415 vm_map_lock(map);
12416 if (!vm_map_lookup_entry(map,
12417 vm_map_trunc_page(address), &entry)) {
12418
12419 vm_map_size_t skip;
12420
12421 /*
12422 * hole in the address map.
12423 */
12424 had_hole = TRUE;
12425
12426 /*
12427 * Check for empty map.
12428 */
12429 if (entry == vm_map_to_entry(map) &&
12430 entry->vme_next == entry) {
12431 vm_map_unlock(map);
12432 break;
12433 }
12434 /*
12435 * Check that we don't wrap and that
12436 * we have at least one real map entry.
12437 */
12438 if ((map->hdr.nentries == 0) ||
12439 (entry->vme_next->vme_start < address)) {
12440 vm_map_unlock(map);
12441 break;
12442 }
12443 /*
12444 * Move up to the next entry if needed
12445 */
12446 skip = (entry->vme_next->vme_start - address);
12447 if (skip >= amount_left)
12448 amount_left = 0;
12449 else
12450 amount_left -= skip;
12451 address = entry->vme_next->vme_start;
12452 vm_map_unlock(map);
12453 continue;
12454 }
12455
12456 offset = address - entry->vme_start;
12457
12458 /*
12459 * do we have more to flush than is contained in this
12460 * entry ?
12461 */
12462 if (amount_left + entry->vme_start + offset > entry->vme_end) {
12463 flush_size = entry->vme_end -
12464 (entry->vme_start + offset);
12465 } else {
12466 flush_size = amount_left;
12467 }
12468 amount_left -= flush_size;
12469 address += flush_size;
12470
12471 if (entry->is_sub_map == TRUE) {
12472 vm_map_t local_map;
12473 vm_map_offset_t local_offset;
12474
12475 local_map = entry->object.sub_map;
12476 local_offset = entry->offset;
12477 vm_map_unlock(map);
12478 if (vm_map_msync(
12479 local_map,
12480 local_offset,
12481 flush_size,
12482 sync_flags) == KERN_INVALID_ADDRESS) {
12483 had_hole = TRUE;
12484 }
12485 continue;
12486 }
12487 object = entry->object.vm_object;
12488
12489 /*
12490 * We can't sync this object if the object has not been
12491 * created yet
12492 */
12493 if (object == VM_OBJECT_NULL) {
12494 vm_map_unlock(map);
12495 continue;
12496 }
12497 offset += entry->offset;
12498
12499 vm_object_lock(object);
12500
12501 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
12502 int kill_pages = 0;
12503 boolean_t reusable_pages = FALSE;
12504
12505 if (sync_flags & VM_SYNC_KILLPAGES) {
12506 if (object->ref_count == 1 && !object->shadow)
12507 kill_pages = 1;
12508 else
12509 kill_pages = -1;
12510 }
12511 if (kill_pages != -1)
12512 vm_object_deactivate_pages(object, offset,
12513 (vm_object_size_t)flush_size, kill_pages, reusable_pages);
12514 vm_object_unlock(object);
12515 vm_map_unlock(map);
12516 continue;
12517 }
12518 /*
12519 * We can't sync this object if there isn't a pager.
12520 * Don't bother to sync internal objects, since there can't
12521 * be any "permanent" storage for these objects anyway.
12522 */
12523 if ((object->pager == MEMORY_OBJECT_NULL) ||
12524 (object->internal) || (object->private)) {
12525 vm_object_unlock(object);
12526 vm_map_unlock(map);
12527 continue;
12528 }
12529 /*
12530 * keep reference on the object until syncing is done
12531 */
12532 vm_object_reference_locked(object);
12533 vm_object_unlock(object);
12534
12535 vm_map_unlock(map);
12536
12537 do_sync_req = vm_object_sync(object,
12538 offset,
12539 flush_size,
12540 sync_flags & VM_SYNC_INVALIDATE,
12541 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
12542 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
12543 sync_flags & VM_SYNC_SYNCHRONOUS);
12544 /*
12545 * only send a m_o_s if we returned pages or if the entry
12546 * is writable (ie dirty pages may have already been sent back)
12547 */
12548 if (!do_sync_req) {
12549 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
12550 /*
12551 * clear out the clustering and read-ahead hints
12552 */
12553 vm_object_lock(object);
12554
12555 object->pages_created = 0;
12556 object->pages_used = 0;
12557 object->sequential = 0;
12558 object->last_alloc = 0;
12559
12560 vm_object_unlock(object);
12561 }
12562 vm_object_deallocate(object);
12563 continue;
12564 }
12565 msync_req_alloc(new_msr);
12566
12567 vm_object_lock(object);
12568 offset += object->paging_offset;
12569
12570 new_msr->offset = offset;
12571 new_msr->length = flush_size;
12572 new_msr->object = object;
12573 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
12574 re_iterate:
12575
12576 /*
12577 * We can't sync this object if there isn't a pager. The
12578 * pager can disappear anytime we're not holding the object
12579 * lock. So this has to be checked anytime we goto re_iterate.
12580 */
12581
12582 pager = object->pager;
12583
12584 if (pager == MEMORY_OBJECT_NULL) {
12585 vm_object_unlock(object);
12586 vm_object_deallocate(object);
12587 continue;
12588 }
12589
12590 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
12591 /*
12592 * need to check for overlapping entry, if found, wait
12593 * on overlapping msr to be done, then reiterate
12594 */
12595 msr_lock(msr);
12596 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
12597 ((offset >= msr->offset &&
12598 offset < (msr->offset + msr->length)) ||
12599 (msr->offset >= offset &&
12600 msr->offset < (offset + flush_size))))
12601 {
12602 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
12603 msr_unlock(msr);
12604 vm_object_unlock(object);
12605 thread_block(THREAD_CONTINUE_NULL);
12606 vm_object_lock(object);
12607 goto re_iterate;
12608 }
12609 msr_unlock(msr);
12610 }/* queue_iterate */
12611
12612 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
12613
12614 vm_object_paging_begin(object);
12615 vm_object_unlock(object);
12616
12617 queue_enter(&req_q, new_msr, msync_req_t, req_q);
12618
12619 (void) memory_object_synchronize(
12620 pager,
12621 offset,
12622 flush_size,
12623 sync_flags & ~VM_SYNC_CONTIGUOUS);
12624
12625 vm_object_lock(object);
12626 vm_object_paging_end(object);
12627 vm_object_unlock(object);
12628 }/* while */
12629
12630 /*
12631 * wait for memory_object_sychronize_completed messages from pager(s)
12632 */
12633
12634 while (!queue_empty(&req_q)) {
12635 msr = (msync_req_t)queue_first(&req_q);
12636 msr_lock(msr);
12637 while(msr->flag != VM_MSYNC_DONE) {
12638 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
12639 msr_unlock(msr);
12640 thread_block(THREAD_CONTINUE_NULL);
12641 msr_lock(msr);
12642 }/* while */
12643 queue_remove(&req_q, msr, msync_req_t, req_q);
12644 msr_unlock(msr);
12645 vm_object_deallocate(msr->object);
12646 msync_req_free(msr);
12647 }/* queue_iterate */
12648
12649 /* for proper msync() behaviour */
12650 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
12651 return(KERN_INVALID_ADDRESS);
12652
12653 return(KERN_SUCCESS);
12654 }/* vm_msync */
12655
12656 /*
12657 * Routine: convert_port_entry_to_map
12658 * Purpose:
12659 * Convert from a port specifying an entry or a task
12660 * to a map. Doesn't consume the port ref; produces a map ref,
12661 * which may be null. Unlike convert_port_to_map, the
12662 * port may be task or a named entry backed.
12663 * Conditions:
12664 * Nothing locked.
12665 */
12666
12667
12668 vm_map_t
12669 convert_port_entry_to_map(
12670 ipc_port_t port)
12671 {
12672 vm_map_t map;
12673 vm_named_entry_t named_entry;
12674 uint32_t try_failed_count = 0;
12675
12676 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12677 while(TRUE) {
12678 ip_lock(port);
12679 if(ip_active(port) && (ip_kotype(port)
12680 == IKOT_NAMED_ENTRY)) {
12681 named_entry =
12682 (vm_named_entry_t)port->ip_kobject;
12683 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12684 ip_unlock(port);
12685
12686 try_failed_count++;
12687 mutex_pause(try_failed_count);
12688 continue;
12689 }
12690 named_entry->ref_count++;
12691 lck_mtx_unlock(&(named_entry)->Lock);
12692 ip_unlock(port);
12693 if ((named_entry->is_sub_map) &&
12694 (named_entry->protection
12695 & VM_PROT_WRITE)) {
12696 map = named_entry->backing.map;
12697 } else {
12698 mach_destroy_memory_entry(port);
12699 return VM_MAP_NULL;
12700 }
12701 vm_map_reference_swap(map);
12702 mach_destroy_memory_entry(port);
12703 break;
12704 }
12705 else
12706 return VM_MAP_NULL;
12707 }
12708 }
12709 else
12710 map = convert_port_to_map(port);
12711
12712 return map;
12713 }
12714
12715 /*
12716 * Routine: convert_port_entry_to_object
12717 * Purpose:
12718 * Convert from a port specifying a named entry to an
12719 * object. Doesn't consume the port ref; produces a map ref,
12720 * which may be null.
12721 * Conditions:
12722 * Nothing locked.
12723 */
12724
12725
12726 vm_object_t
12727 convert_port_entry_to_object(
12728 ipc_port_t port)
12729 {
12730 vm_object_t object;
12731 vm_named_entry_t named_entry;
12732 uint32_t try_failed_count = 0;
12733
12734 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12735 while(TRUE) {
12736 ip_lock(port);
12737 if(ip_active(port) && (ip_kotype(port)
12738 == IKOT_NAMED_ENTRY)) {
12739 named_entry =
12740 (vm_named_entry_t)port->ip_kobject;
12741 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12742 ip_unlock(port);
12743
12744 try_failed_count++;
12745 mutex_pause(try_failed_count);
12746 continue;
12747 }
12748 named_entry->ref_count++;
12749 lck_mtx_unlock(&(named_entry)->Lock);
12750 ip_unlock(port);
12751 if ((!named_entry->is_sub_map) &&
12752 (!named_entry->is_pager) &&
12753 (named_entry->protection
12754 & VM_PROT_WRITE)) {
12755 object = named_entry->backing.object;
12756 } else {
12757 mach_destroy_memory_entry(port);
12758 return (vm_object_t)NULL;
12759 }
12760 vm_object_reference(named_entry->backing.object);
12761 mach_destroy_memory_entry(port);
12762 break;
12763 }
12764 else
12765 return (vm_object_t)NULL;
12766 }
12767 } else {
12768 return (vm_object_t)NULL;
12769 }
12770
12771 return object;
12772 }
12773
12774 /*
12775 * Export routines to other components for the things we access locally through
12776 * macros.
12777 */
12778 #undef current_map
12779 vm_map_t
12780 current_map(void)
12781 {
12782 return (current_map_fast());
12783 }
12784
12785 /*
12786 * vm_map_reference:
12787 *
12788 * Most code internal to the osfmk will go through a
12789 * macro defining this. This is always here for the
12790 * use of other kernel components.
12791 */
12792 #undef vm_map_reference
12793 void
12794 vm_map_reference(
12795 register vm_map_t map)
12796 {
12797 if (map == VM_MAP_NULL)
12798 return;
12799
12800 lck_mtx_lock(&map->s_lock);
12801 #if TASK_SWAPPER
12802 assert(map->res_count > 0);
12803 assert(map->ref_count >= map->res_count);
12804 map->res_count++;
12805 #endif
12806 map->ref_count++;
12807 lck_mtx_unlock(&map->s_lock);
12808 }
12809
12810 /*
12811 * vm_map_deallocate:
12812 *
12813 * Removes a reference from the specified map,
12814 * destroying it if no references remain.
12815 * The map should not be locked.
12816 */
12817 void
12818 vm_map_deallocate(
12819 register vm_map_t map)
12820 {
12821 unsigned int ref;
12822
12823 if (map == VM_MAP_NULL)
12824 return;
12825
12826 lck_mtx_lock(&map->s_lock);
12827 ref = --map->ref_count;
12828 if (ref > 0) {
12829 vm_map_res_deallocate(map);
12830 lck_mtx_unlock(&map->s_lock);
12831 return;
12832 }
12833 assert(map->ref_count == 0);
12834 lck_mtx_unlock(&map->s_lock);
12835
12836 #if TASK_SWAPPER
12837 /*
12838 * The map residence count isn't decremented here because
12839 * the vm_map_delete below will traverse the entire map,
12840 * deleting entries, and the residence counts on objects
12841 * and sharing maps will go away then.
12842 */
12843 #endif
12844
12845 vm_map_destroy(map, VM_MAP_NO_FLAGS);
12846 }
12847
12848
12849 void
12850 vm_map_disable_NX(vm_map_t map)
12851 {
12852 if (map == NULL)
12853 return;
12854 if (map->pmap == NULL)
12855 return;
12856
12857 pmap_disable_NX(map->pmap);
12858 }
12859
12860 void
12861 vm_map_disallow_data_exec(vm_map_t map)
12862 {
12863 if (map == NULL)
12864 return;
12865
12866 map->map_disallow_data_exec = TRUE;
12867 }
12868
12869 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
12870 * more descriptive.
12871 */
12872 void
12873 vm_map_set_32bit(vm_map_t map)
12874 {
12875 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
12876 }
12877
12878
12879 void
12880 vm_map_set_64bit(vm_map_t map)
12881 {
12882 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
12883 }
12884
12885 vm_map_offset_t
12886 vm_compute_max_offset(unsigned is64)
12887 {
12888 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
12889 }
12890
12891 boolean_t
12892 vm_map_is_64bit(
12893 vm_map_t map)
12894 {
12895 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
12896 }
12897
12898 boolean_t
12899 vm_map_has_4GB_pagezero(
12900 vm_map_t map)
12901 {
12902 /*
12903 * XXX FBDP
12904 * We should lock the VM map (for read) here but we can get away
12905 * with it for now because there can't really be any race condition:
12906 * the VM map's min_offset is changed only when the VM map is created
12907 * and when the zero page is established (when the binary gets loaded),
12908 * and this routine gets called only when the task terminates and the
12909 * VM map is being torn down, and when a new map is created via
12910 * load_machfile()/execve().
12911 */
12912 return (map->min_offset >= 0x100000000ULL);
12913 }
12914
12915 void
12916 vm_map_set_4GB_pagezero(vm_map_t map)
12917 {
12918 #if defined(__i386__)
12919 pmap_set_4GB_pagezero(map->pmap);
12920 #else
12921 #pragma unused(map)
12922 #endif
12923
12924 }
12925
12926 void
12927 vm_map_clear_4GB_pagezero(vm_map_t map)
12928 {
12929 #if defined(__i386__)
12930 pmap_clear_4GB_pagezero(map->pmap);
12931 #else
12932 #pragma unused(map)
12933 #endif
12934 }
12935
12936 /*
12937 * Raise a VM map's minimum offset.
12938 * To strictly enforce "page zero" reservation.
12939 */
12940 kern_return_t
12941 vm_map_raise_min_offset(
12942 vm_map_t map,
12943 vm_map_offset_t new_min_offset)
12944 {
12945 vm_map_entry_t first_entry;
12946
12947 new_min_offset = vm_map_round_page(new_min_offset);
12948
12949 vm_map_lock(map);
12950
12951 if (new_min_offset < map->min_offset) {
12952 /*
12953 * Can't move min_offset backwards, as that would expose
12954 * a part of the address space that was previously, and for
12955 * possibly good reasons, inaccessible.
12956 */
12957 vm_map_unlock(map);
12958 return KERN_INVALID_ADDRESS;
12959 }
12960
12961 first_entry = vm_map_first_entry(map);
12962 if (first_entry != vm_map_to_entry(map) &&
12963 first_entry->vme_start < new_min_offset) {
12964 /*
12965 * Some memory was already allocated below the new
12966 * minimun offset. It's too late to change it now...
12967 */
12968 vm_map_unlock(map);
12969 return KERN_NO_SPACE;
12970 }
12971
12972 map->min_offset = new_min_offset;
12973
12974 vm_map_unlock(map);
12975
12976 return KERN_SUCCESS;
12977 }
12978
12979 /*
12980 * Set the limit on the maximum amount of user wired memory allowed for this map.
12981 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
12982 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
12983 * don't have to reach over to the BSD data structures.
12984 */
12985
12986 void
12987 vm_map_set_user_wire_limit(vm_map_t map,
12988 vm_size_t limit)
12989 {
12990 map->user_wire_limit = limit;
12991 }
12992
12993
12994 void vm_map_switch_protect(vm_map_t map,
12995 boolean_t val)
12996 {
12997 vm_map_lock(map);
12998 map->switch_protect=val;
12999 vm_map_unlock(map);
13000 }
13001
13002 /* Add (generate) code signature for memory range */
13003 #if CONFIG_DYNAMIC_CODE_SIGNING
13004 kern_return_t vm_map_sign(vm_map_t map,
13005 vm_map_offset_t start,
13006 vm_map_offset_t end)
13007 {
13008 vm_map_entry_t entry;
13009 vm_page_t m;
13010 vm_object_t object;
13011
13012 /*
13013 * Vet all the input parameters and current type and state of the
13014 * underlaying object. Return with an error if anything is amiss.
13015 */
13016 if (map == VM_MAP_NULL)
13017 return(KERN_INVALID_ARGUMENT);
13018
13019 vm_map_lock_read(map);
13020
13021 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
13022 /*
13023 * Must pass a valid non-submap address.
13024 */
13025 vm_map_unlock_read(map);
13026 return(KERN_INVALID_ADDRESS);
13027 }
13028
13029 if((entry->vme_start > start) || (entry->vme_end < end)) {
13030 /*
13031 * Map entry doesn't cover the requested range. Not handling
13032 * this situation currently.
13033 */
13034 vm_map_unlock_read(map);
13035 return(KERN_INVALID_ARGUMENT);
13036 }
13037
13038 object = entry->object.vm_object;
13039 if (object == VM_OBJECT_NULL) {
13040 /*
13041 * Object must already be present or we can't sign.
13042 */
13043 vm_map_unlock_read(map);
13044 return KERN_INVALID_ARGUMENT;
13045 }
13046
13047 vm_object_lock(object);
13048 vm_map_unlock_read(map);
13049
13050 while(start < end) {
13051 uint32_t refmod;
13052
13053 m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
13054 if (m==VM_PAGE_NULL) {
13055 /* shoud we try to fault a page here? we can probably
13056 * demand it exists and is locked for this request */
13057 vm_object_unlock(object);
13058 return KERN_FAILURE;
13059 }
13060 /* deal with special page status */
13061 if (m->busy ||
13062 (m->unusual && (m->error || m->restart || m->private || m->absent))) {
13063 vm_object_unlock(object);
13064 return KERN_FAILURE;
13065 }
13066
13067 /* Page is OK... now "validate" it */
13068 /* This is the place where we'll call out to create a code
13069 * directory, later */
13070 m->cs_validated = TRUE;
13071
13072 /* The page is now "clean" for codesigning purposes. That means
13073 * we don't consider it as modified (wpmapped) anymore. But
13074 * we'll disconnect the page so we note any future modification
13075 * attempts. */
13076 m->wpmapped = FALSE;
13077 refmod = pmap_disconnect(m->phys_page);
13078
13079 /* Pull the dirty status from the pmap, since we cleared the
13080 * wpmapped bit */
13081 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
13082 m->dirty = TRUE;
13083 }
13084
13085 /* On to the next page */
13086 start += PAGE_SIZE;
13087 }
13088 vm_object_unlock(object);
13089
13090 return KERN_SUCCESS;
13091 }
13092 #endif
13093
13094 #if CONFIG_FREEZE
13095
13096 kern_return_t vm_map_freeze_walk(
13097 vm_map_t map,
13098 unsigned int *purgeable_count,
13099 unsigned int *wired_count,
13100 unsigned int *clean_count,
13101 unsigned int *dirty_count,
13102 boolean_t *has_shared)
13103 {
13104 vm_map_entry_t entry;
13105
13106 vm_map_lock_read(map);
13107
13108 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13109 *has_shared = FALSE;
13110
13111 for (entry = vm_map_first_entry(map);
13112 entry != vm_map_to_entry(map);
13113 entry = entry->vme_next) {
13114 unsigned int purgeable, clean, dirty, wired;
13115 boolean_t shared;
13116
13117 if ((entry->object.vm_object == 0) ||
13118 (entry->is_sub_map) ||
13119 (entry->object.vm_object->phys_contiguous)) {
13120 continue;
13121 }
13122
13123 vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared, entry->object.vm_object, VM_OBJECT_NULL, NULL, NULL);
13124
13125 *purgeable_count += purgeable;
13126 *wired_count += wired;
13127 *clean_count += clean;
13128 *dirty_count += dirty;
13129
13130 if (shared) {
13131 *has_shared = TRUE;
13132 }
13133 }
13134
13135 vm_map_unlock_read(map);
13136
13137 return KERN_SUCCESS;
13138 }
13139
13140 kern_return_t vm_map_freeze(
13141 vm_map_t map,
13142 unsigned int *purgeable_count,
13143 unsigned int *wired_count,
13144 unsigned int *clean_count,
13145 unsigned int *dirty_count,
13146 boolean_t *has_shared)
13147 {
13148 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
13149 vm_object_t compact_object = VM_OBJECT_NULL;
13150 vm_object_offset_t offset = 0x0;
13151 kern_return_t kr = KERN_SUCCESS;
13152 void *default_freezer_toc = NULL;
13153 boolean_t cleanup = FALSE;
13154
13155 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13156 *has_shared = FALSE;
13157
13158 /* Create our compact object */
13159 compact_object = vm_object_allocate((vm_map_offset_t)(VM_MAX_ADDRESS) - (vm_map_offset_t)(VM_MIN_ADDRESS));
13160 if (!compact_object) {
13161 kr = KERN_FAILURE;
13162 goto done;
13163 }
13164
13165 default_freezer_toc = default_freezer_mapping_create(compact_object, offset);
13166 if (!default_freezer_toc) {
13167 kr = KERN_FAILURE;
13168 goto done;
13169 }
13170
13171 /*
13172 * We need the exclusive lock here so that we can
13173 * block any page faults or lookups while we are
13174 * in the middle of freezing this vm map.
13175 */
13176 vm_map_lock(map);
13177
13178 if (map->default_freezer_toc != NULL){
13179 /*
13180 * This map has already been frozen.
13181 */
13182 cleanup = TRUE;
13183 kr = KERN_SUCCESS;
13184 goto done;
13185 }
13186
13187 /* Get a mapping in place for the freezing about to commence */
13188 map->default_freezer_toc = default_freezer_toc;
13189
13190 vm_object_lock(compact_object);
13191
13192 for (entry2 = vm_map_first_entry(map);
13193 entry2 != vm_map_to_entry(map);
13194 entry2 = entry2->vme_next) {
13195
13196 vm_object_t src_object = entry2->object.vm_object;
13197
13198 /* If eligible, scan the entry, moving eligible pages over to our parent object */
13199 if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
13200 unsigned int purgeable, clean, dirty, wired;
13201 boolean_t shared;
13202
13203 vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared,
13204 src_object, compact_object, &default_freezer_toc, &offset);
13205
13206 *purgeable_count += purgeable;
13207 *wired_count += wired;
13208 *clean_count += clean;
13209 *dirty_count += dirty;
13210
13211 if (shared) {
13212 *has_shared = TRUE;
13213 }
13214 }
13215 }
13216
13217 vm_object_unlock(compact_object);
13218
13219 /* Finally, throw out the pages to swap */
13220 vm_object_pageout(compact_object);
13221
13222 done:
13223 vm_map_unlock(map);
13224
13225 /* Unwind if there was a failure */
13226 if ((cleanup) || (KERN_SUCCESS != kr)) {
13227 if (default_freezer_toc){
13228 default_freezer_mapping_free(&map->default_freezer_toc, TRUE);
13229 }
13230 if (compact_object){
13231 vm_object_deallocate(compact_object);
13232 }
13233 }
13234
13235 return kr;
13236 }
13237
13238 __private_extern__ vm_object_t default_freezer_get_compact_vm_object( void** );
13239
13240 void
13241 vm_map_thaw(
13242 vm_map_t map)
13243 {
13244 void **default_freezer_toc;
13245 vm_object_t compact_object;
13246
13247 vm_map_lock(map);
13248
13249 if (map->default_freezer_toc == NULL){
13250 /*
13251 * This map is not in a frozen state.
13252 */
13253 goto out;
13254 }
13255
13256 default_freezer_toc = &(map->default_freezer_toc);
13257
13258 compact_object = default_freezer_get_compact_vm_object(default_freezer_toc);
13259
13260 /* Bring the pages back in */
13261 vm_object_pagein(compact_object);
13262
13263 /* Shift pages back to their original objects */
13264 vm_object_unpack(compact_object, default_freezer_toc);
13265
13266 vm_object_deallocate(compact_object);
13267
13268 map->default_freezer_toc = NULL;
13269
13270 out:
13271 vm_map_unlock(map);
13272 }
13273 #endif
13274
13275 #if !CONFIG_EMBEDDED
13276 /*
13277 * vm_map_entry_should_cow_for_true_share:
13278 *
13279 * Determines if the map entry should be clipped and setup for copy-on-write
13280 * to avoid applying "true_share" to a large VM object when only a subset is
13281 * targeted.
13282 *
13283 * For now, we target only the map entries created for the Objective C
13284 * Garbage Collector, which initially have the following properties:
13285 * - alias == VM_MEMORY_MALLOC
13286 * - wired_count == 0
13287 * - !needs_copy
13288 * and a VM object with:
13289 * - internal
13290 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
13291 * - !true_share
13292 * - vo_size == ANON_CHUNK_SIZE
13293 */
13294 boolean_t
13295 vm_map_entry_should_cow_for_true_share(
13296 vm_map_entry_t entry)
13297 {
13298 vm_object_t object;
13299
13300 if (entry->is_sub_map) {
13301 /* entry does not point at a VM object */
13302 return FALSE;
13303 }
13304
13305 if (entry->needs_copy) {
13306 /* already set for copy_on_write: done! */
13307 return FALSE;
13308 }
13309
13310 if (entry->alias != VM_MEMORY_MALLOC) {
13311 /* not tagged as an ObjectiveC's Garbage Collector entry */
13312 return FALSE;
13313 }
13314
13315 if (entry->wired_count) {
13316 /* wired: can't change the map entry... */
13317 return FALSE;
13318 }
13319
13320 object = entry->object.vm_object;
13321
13322 if (object == VM_OBJECT_NULL) {
13323 /* no object yet... */
13324 return FALSE;
13325 }
13326
13327 if (!object->internal) {
13328 /* not an internal object */
13329 return FALSE;
13330 }
13331
13332 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
13333 /* not the default copy strategy */
13334 return FALSE;
13335 }
13336
13337 if (object->true_share) {
13338 /* already true_share: too late to avoid it */
13339 return FALSE;
13340 }
13341
13342 if (object->vo_size != ANON_CHUNK_SIZE) {
13343 /* not an object created for the ObjC Garbage Collector */
13344 return FALSE;
13345 }
13346
13347 /*
13348 * All the criteria match: we have a large object being targeted for "true_share".
13349 * To limit the adverse side-effects linked with "true_share", tell the caller to
13350 * try and avoid setting up the entire object for "true_share" by clipping the
13351 * targeted range and setting it up for copy-on-write.
13352 */
13353 return TRUE;
13354 }
13355 #endif /* !CONFIG_EMBEDDED */